extraction and generation engine enhanced for doc, code, image
This commit is contained in:
parent
f6540d6b5c
commit
909ee9528f
51 changed files with 2137 additions and 7955 deletions
|
|
@ -14,10 +14,6 @@ from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata,
|
||||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||||
from modules.shared.jsonUtils import (
|
from modules.shared.jsonUtils import (
|
||||||
extractJsonString,
|
|
||||||
repairBrokenJson,
|
|
||||||
extractSectionsFromDocument,
|
|
||||||
buildContinuationContext,
|
|
||||||
parseJsonWithModel
|
parseJsonWithModel
|
||||||
)
|
)
|
||||||
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
||||||
|
|
@ -209,7 +205,7 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
processingMode=ProcessingModeEnum.BASIC
|
processingMode=ProcessingModeEnum.BASIC
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _callAiWithLooping(
|
async def callAiWithLooping(
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
options: AiCallOptions,
|
options: AiCallOptions,
|
||||||
|
|
@ -218,11 +214,12 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
promptArgs: Optional[Dict[str, Any]] = None,
|
promptArgs: Optional[Dict[str, Any]] = None,
|
||||||
operationId: Optional[str] = None,
|
operationId: Optional[str] = None,
|
||||||
userPrompt: Optional[str] = None,
|
userPrompt: Optional[str] = None,
|
||||||
contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content
|
contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content
|
||||||
|
useCaseId: Optional[str] = None # REQUIRED: Explicit use case ID for generic looping system
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Delegate to AiCallLooper."""
|
"""Public method: Delegate to AiCallLooper for AI calls with looping support."""
|
||||||
return await self.aiCallLooper.callAiWithLooping(
|
return await self.aiCallLooper.callAiWithLooping(
|
||||||
prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts
|
prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts, useCaseId
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _defineKpisFromPrompt(
|
async def _defineKpisFromPrompt(
|
||||||
|
|
@ -341,49 +338,21 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
prompt: str,
|
prompt: str,
|
||||||
options: AiCallOptions,
|
options: AiCallOptions,
|
||||||
title: Optional[str],
|
title: Optional[str],
|
||||||
aiOperationId: str
|
parentOperationId: Optional[str]
|
||||||
) -> AiResponse:
|
) -> AiResponse:
|
||||||
"""Handle IMAGE_GENERATE operation type."""
|
"""Handle IMAGE_GENERATE operation type using image generation path."""
|
||||||
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
|
from modules.services.serviceGeneration.paths.imagePath import ImageGenerationPath
|
||||||
|
|
||||||
request = AiCallRequest(
|
imagePath = ImageGenerationPath(self.services)
|
||||||
prompt=prompt,
|
|
||||||
context="",
|
|
||||||
options=options
|
|
||||||
)
|
|
||||||
|
|
||||||
response = await self.callAi(request)
|
# Extract format from options
|
||||||
|
format = options.resultFormat or "png"
|
||||||
|
|
||||||
if not response.content:
|
return await imagePath.generateImages(
|
||||||
errorMsg = f"No image data returned: {response.content}"
|
userPrompt=prompt,
|
||||||
logger.error(f"Error in AI image generation: {errorMsg}")
|
format=format,
|
||||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
title=title,
|
||||||
raise ValueError(errorMsg)
|
parentOperationId=parentOperationId
|
||||||
|
|
||||||
imageDoc = DocumentData(
|
|
||||||
documentName="generated_image.png",
|
|
||||||
documentData=response.content,
|
|
||||||
mimeType="image/png"
|
|
||||||
)
|
|
||||||
|
|
||||||
metadata = AiResponseMetadata(
|
|
||||||
title=title or "Generated Image",
|
|
||||||
operationType=options.operationType.value
|
|
||||||
)
|
|
||||||
|
|
||||||
self.services.chat.storeWorkflowStat(
|
|
||||||
self.services.workflow,
|
|
||||||
response,
|
|
||||||
"ai.generate.image"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated")
|
|
||||||
self.services.chat.progressLogFinish(aiOperationId, True)
|
|
||||||
|
|
||||||
return AiResponse(
|
|
||||||
content=response.content,
|
|
||||||
metadata=metadata,
|
|
||||||
documents=[imageDoc]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _handleWebOperation(
|
async def _handleWebOperation(
|
||||||
|
|
@ -441,54 +410,54 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
return intent
|
return intent
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def _clarifyDocumentIntents(
|
async def clarifyDocumentIntents(
|
||||||
self,
|
self,
|
||||||
documents: List[ChatDocument],
|
documents: List[ChatDocument],
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
actionParameters: Dict[str, Any],
|
actionParameters: Dict[str, Any],
|
||||||
parentOperationId: str
|
parentOperationId: str
|
||||||
) -> List[DocumentIntent]:
|
) -> List[DocumentIntent]:
|
||||||
"""Delegate to DocumentIntentAnalyzer."""
|
"""Public method: Delegate to DocumentIntentAnalyzer."""
|
||||||
return await self.intentAnalyzer.clarifyDocumentIntents(
|
return await self.intentAnalyzer.clarifyDocumentIntents(
|
||||||
documents, userPrompt, actionParameters, parentOperationId
|
documents, userPrompt, actionParameters, parentOperationId
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _extractAndPrepareContent(
|
async def extractAndPrepareContent(
|
||||||
self,
|
self,
|
||||||
documents: List[ChatDocument],
|
documents: List[ChatDocument],
|
||||||
documentIntents: List[DocumentIntent],
|
documentIntents: List[DocumentIntent],
|
||||||
parentOperationId: str
|
parentOperationId: str
|
||||||
) -> List[ContentPart]:
|
) -> List[ContentPart]:
|
||||||
"""Delegate to ContentExtractor."""
|
"""Public method: Delegate to ContentExtractor."""
|
||||||
return await self.contentExtractor.extractAndPrepareContent(
|
return await self.contentExtractor.extractAndPrepareContent(
|
||||||
documents, documentIntents, parentOperationId, self._getIntentForDocument
|
documents, documentIntents, parentOperationId, self._getIntentForDocument
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _generateStructure(
|
async def generateStructure(
|
||||||
self,
|
self,
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
contentParts: List[ContentPart],
|
contentParts: List[ContentPart],
|
||||||
outputFormat: str,
|
outputFormat: str,
|
||||||
parentOperationId: str
|
parentOperationId: str
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Delegate to StructureGenerator."""
|
"""Public method: Delegate to StructureGenerator."""
|
||||||
return await self.structureGenerator.generateStructure(
|
return await self.structureGenerator.generateStructure(
|
||||||
userPrompt, contentParts, outputFormat, parentOperationId
|
userPrompt, contentParts, outputFormat, parentOperationId
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _fillStructure(
|
async def fillStructure(
|
||||||
self,
|
self,
|
||||||
structure: Dict[str, Any],
|
structure: Dict[str, Any],
|
||||||
contentParts: List[ContentPart],
|
contentParts: List[ContentPart],
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
parentOperationId: str
|
parentOperationId: str
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Delegate to StructureFiller."""
|
"""Public method: Delegate to StructureFiller."""
|
||||||
return await self.structureFiller.fillStructure(
|
return await self.structureFiller.fillStructure(
|
||||||
structure, contentParts, userPrompt, parentOperationId
|
structure, contentParts, userPrompt, parentOperationId
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _renderResult(
|
async def renderResult(
|
||||||
self,
|
self,
|
||||||
filledStructure: Dict[str, Any],
|
filledStructure: Dict[str, Any],
|
||||||
outputFormat: str,
|
outputFormat: str,
|
||||||
|
|
@ -577,13 +546,14 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
documentIntents: Optional[List[DocumentIntent]] = None,
|
documentIntents: Optional[List[DocumentIntent]] = None,
|
||||||
outputFormat: Optional[str] = None,
|
outputFormat: Optional[str] = None,
|
||||||
title: Optional[str] = None,
|
title: Optional[str] = None,
|
||||||
parentOperationId: Optional[str] = None
|
parentOperationId: Optional[str] = None,
|
||||||
|
generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection)
|
||||||
) -> AiResponse:
|
) -> AiResponse:
|
||||||
"""
|
"""
|
||||||
Einheitliche AI-Content-Verarbeitung - Single Entry Point für alle AI-Actions.
|
Unified AI content generation with explicit intent requirement.
|
||||||
|
|
||||||
Alle AI-Actions (ai.process, ai.generateDocument, etc.) routen hier durch.
|
All AI-Actions (ai.process, ai.generateDocument, etc.) route through here.
|
||||||
Sie unterscheiden sich nur in Parametern, nicht in Logik.
|
They differ only in parameters, not in logic.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prompt: The main prompt for the AI call
|
prompt: The main prompt for the AI call
|
||||||
|
|
@ -594,6 +564,8 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx')
|
outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx')
|
||||||
title: Optional title for generated documents
|
title: Optional title for generated documents
|
||||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||||
|
generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action.
|
||||||
|
NO auto-detection - actions must explicitly specify intent.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
AiResponse with content, metadata, and optional documents
|
AiResponse with content, metadata, and optional documents
|
||||||
|
|
@ -625,111 +597,73 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
|
|
||||||
# Route zu Operation-spezifischen Handlern
|
# Route zu Operation-spezifischen Handlern
|
||||||
if opType == OperationTypeEnum.IMAGE_GENERATE:
|
if opType == OperationTypeEnum.IMAGE_GENERATE:
|
||||||
return await self._handleImageGeneration(prompt, options, title, aiOperationId)
|
# Image generation - route to image path
|
||||||
|
return await self._handleImageGeneration(prompt, options, title, parentOperationId)
|
||||||
|
|
||||||
if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL:
|
if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL:
|
||||||
return await self._handleWebOperation(prompt, options, opType, aiOperationId)
|
return await self._handleWebOperation(prompt, options, opType, aiOperationId)
|
||||||
|
|
||||||
# Dokument-Generierungs-Pfad
|
# Data generation - REQUIRES explicit generationIntent
|
||||||
options.compressPrompt = False
|
if opType == OperationTypeEnum.DATA_GENERATE:
|
||||||
options.compressContext = False
|
if not generationIntent:
|
||||||
|
errorMsg = (
|
||||||
# Schritt 5A: Kläre Dokument-Intents
|
"generationIntent is required for DATA_GENERATE operation. "
|
||||||
documents = []
|
"Actions must explicitly specify 'document' or 'code' intent. "
|
||||||
if documentList:
|
"No auto-detection - use qualified actions (ai.generateDocument, ai.generateCode)."
|
||||||
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
)
|
||||||
|
logger.error(errorMsg)
|
||||||
if not documentIntents and documents:
|
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||||
documentIntents = await self._clarifyDocumentIntents(
|
raise ValueError(errorMsg)
|
||||||
documents,
|
|
||||||
prompt,
|
# Route based on explicit intent (no auto-detection, no fallback)
|
||||||
{"outputFormat": outputFormat},
|
if generationIntent == "code":
|
||||||
aiOperationId
|
# Route to code generation path
|
||||||
)
|
return await self._handleCodeGeneration(
|
||||||
|
prompt=prompt,
|
||||||
# Schritt 5B: Extrahiere und bereite Content vor
|
options=options,
|
||||||
if documents:
|
contentParts=contentParts,
|
||||||
preparedContentParts = await self._extractAndPrepareContent(
|
outputFormat=outputFormat,
|
||||||
documents,
|
title=title,
|
||||||
documentIntents or [],
|
parentOperationId=parentOperationId
|
||||||
aiOperationId
|
)
|
||||||
)
|
else:
|
||||||
|
# Route to document generation path (existing behavior)
|
||||||
# Merge mit bereitgestellten contentParts (falls vorhanden)
|
return await self._handleDocumentGeneration(
|
||||||
if contentParts:
|
prompt=prompt,
|
||||||
# Prüfe auf pre-extracted Content
|
options=options,
|
||||||
for part in contentParts:
|
documentList=documentList,
|
||||||
if part.metadata.get("skipExtraction", False):
|
documentIntents=documentIntents,
|
||||||
# Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
|
contentParts=contentParts,
|
||||||
part.metadata.setdefault("contentFormat", "extracted")
|
outputFormat=outputFormat,
|
||||||
part.metadata.setdefault("isPreExtracted", True)
|
title=title,
|
||||||
preparedContentParts.extend(contentParts)
|
parentOperationId=parentOperationId
|
||||||
|
|
||||||
contentParts = preparedContentParts
|
|
||||||
|
|
||||||
# Schritt 5C: Generiere Struktur
|
|
||||||
structure = await self._generateStructure(
|
|
||||||
prompt,
|
|
||||||
contentParts or [],
|
|
||||||
outputFormat,
|
|
||||||
aiOperationId
|
|
||||||
)
|
|
||||||
|
|
||||||
# Schritt 5D: Fülle Struktur
|
|
||||||
# Language will be extracted from services (user intention analysis) in fillStructure
|
|
||||||
filledStructure = await self._fillStructure(
|
|
||||||
structure,
|
|
||||||
contentParts or [],
|
|
||||||
prompt,
|
|
||||||
aiOperationId
|
|
||||||
)
|
|
||||||
|
|
||||||
# Schritt 5E: Rendere Resultat
|
|
||||||
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
|
|
||||||
renderedDocuments = await self._renderResult(
|
|
||||||
filledStructure,
|
|
||||||
outputFormat,
|
|
||||||
title or "Generated Document",
|
|
||||||
prompt,
|
|
||||||
aiOperationId
|
|
||||||
)
|
|
||||||
|
|
||||||
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
|
|
||||||
documentDataList = []
|
|
||||||
for renderedDoc in renderedDocuments:
|
|
||||||
try:
|
|
||||||
# Erstelle DocumentData für jedes gerenderte Dokument
|
|
||||||
docDataObj = DocumentData(
|
|
||||||
documentName=renderedDoc.filename,
|
|
||||||
documentData=renderedDoc.documentData,
|
|
||||||
mimeType=renderedDoc.mimeType,
|
|
||||||
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
|
|
||||||
)
|
)
|
||||||
documentDataList.append(docDataObj)
|
|
||||||
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
|
|
||||||
|
|
||||||
if not documentDataList:
|
# DATA_EXTRACT: Extract content from documents and process with AI (no structure generation)
|
||||||
raise ValueError("No documents were rendered")
|
if opType == OperationTypeEnum.DATA_EXTRACT:
|
||||||
|
return await self._handleDataExtraction(
|
||||||
|
prompt=prompt,
|
||||||
|
options=options,
|
||||||
|
documentList=documentList,
|
||||||
|
documentIntents=documentIntents,
|
||||||
|
contentParts=contentParts,
|
||||||
|
outputFormat=outputFormat,
|
||||||
|
title=title,
|
||||||
|
parentOperationId=parentOperationId
|
||||||
|
)
|
||||||
|
|
||||||
metadata = AiResponseMetadata(
|
# Other operation types (DATA_ANALYSE, etc.) - existing logic
|
||||||
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
|
# Fallback to document generation for backward compatibility (should not happen)
|
||||||
operationType=opType.value
|
logger.warning(f"Unhandled operation type: {opType}, falling back to document generation")
|
||||||
)
|
return await self._handleDocumentGeneration(
|
||||||
|
prompt=prompt,
|
||||||
# Debug-Log (harmonisiert)
|
options=options,
|
||||||
self.services.utils.writeDebugFile(
|
documentList=documentList,
|
||||||
json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
|
documentIntents=documentIntents,
|
||||||
"document_generation_response"
|
contentParts=contentParts,
|
||||||
)
|
outputFormat=outputFormat,
|
||||||
|
title=title,
|
||||||
self.services.chat.progressLogFinish(aiOperationId, True)
|
parentOperationId=parentOperationId
|
||||||
|
|
||||||
return AiResponse(
|
|
||||||
content=json.dumps(filledStructure),
|
|
||||||
metadata=metadata,
|
|
||||||
documents=documentDataList
|
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -737,6 +671,166 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
async def _handleDataExtraction(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
options: AiCallOptions,
|
||||||
|
documentList: Optional[Any],
|
||||||
|
documentIntents: Optional[List[DocumentIntent]],
|
||||||
|
contentParts: Optional[List[ContentPart]],
|
||||||
|
outputFormat: str,
|
||||||
|
title: str,
|
||||||
|
parentOperationId: Optional[str]
|
||||||
|
) -> AiResponse:
|
||||||
|
"""
|
||||||
|
Handle DATA_EXTRACT: Extract content from documents (no AI), then process with AI.
|
||||||
|
This is the original flow: extract all documents first, then process contentParts with AI.
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Create operation ID
|
||||||
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||||
|
extractOperationId = f"data_extract_{workflowId}_{int(time.time())}"
|
||||||
|
|
||||||
|
# Start progress tracking
|
||||||
|
self.services.chat.progressLogStart(
|
||||||
|
extractOperationId,
|
||||||
|
"Data Extraction",
|
||||||
|
"Extraction",
|
||||||
|
f"Format: {outputFormat}",
|
||||||
|
parentOperationId=parentOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Get documents from documentList
|
||||||
|
documents = []
|
||||||
|
if documentList:
|
||||||
|
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||||
|
|
||||||
|
# Step 2: Clarify document intents (if not provided) - REQUIRED for all documents
|
||||||
|
if not documentIntents and documents:
|
||||||
|
documentIntents = await self.clarifyDocumentIntents(
|
||||||
|
documents,
|
||||||
|
prompt,
|
||||||
|
{"outputFormat": outputFormat},
|
||||||
|
extractOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 3: Extract and prepare content (NO AI - pure extraction) - REQUIRED for all documents
|
||||||
|
if documents:
|
||||||
|
preparedContentParts = await self.extractAndPrepareContent(
|
||||||
|
documents,
|
||||||
|
documentIntents or [],
|
||||||
|
extractOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
# Merge with provided contentParts (if any)
|
||||||
|
if contentParts:
|
||||||
|
for part in contentParts:
|
||||||
|
if part.metadata.get("skipExtraction", False):
|
||||||
|
part.metadata.setdefault("contentFormat", "extracted")
|
||||||
|
part.metadata.setdefault("isPreExtracted", True)
|
||||||
|
preparedContentParts.extend(contentParts)
|
||||||
|
|
||||||
|
contentParts = preparedContentParts
|
||||||
|
|
||||||
|
# Step 4: Process extracted contentParts with AI (simple text processing, no structure generation)
|
||||||
|
if not contentParts:
|
||||||
|
raise ValueError("No content extracted from documents")
|
||||||
|
|
||||||
|
# Use simple AI call to process extracted content
|
||||||
|
# Prepare content for AI processing
|
||||||
|
contentText = "\n\n".join([
|
||||||
|
f"[Document: {part.metadata.get('documentName', 'Unknown')}]\n{part.data}"
|
||||||
|
for part in contentParts
|
||||||
|
if part.data
|
||||||
|
])
|
||||||
|
|
||||||
|
# Call AI with extracted content
|
||||||
|
aiRequest = AiCallRequest(
|
||||||
|
prompt=f"{prompt}\n\nExtracted Content:\n{contentText}",
|
||||||
|
context="",
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
|
||||||
|
aiResponse = await self.callAi(aiRequest)
|
||||||
|
|
||||||
|
# Create response document
|
||||||
|
resultDocument = DocumentData(
|
||||||
|
documentName=f"{title or 'extracted_data'}.{outputFormat}",
|
||||||
|
documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
|
||||||
|
mimeType=f"text/{outputFormat}" if outputFormat in ["txt", "json", "csv"] else "application/octet-stream"
|
||||||
|
)
|
||||||
|
|
||||||
|
metadata = AiResponseMetadata(
|
||||||
|
title=title or "Extracted Data",
|
||||||
|
operationType=OperationTypeEnum.DATA_EXTRACT.value
|
||||||
|
)
|
||||||
|
|
||||||
|
self.services.chat.progressLogFinish(extractOperationId, True)
|
||||||
|
|
||||||
|
return AiResponse(
|
||||||
|
content=aiResponse.content if isinstance(aiResponse.content, str) else aiResponse.content.decode('utf-8', errors='replace'),
|
||||||
|
metadata=metadata,
|
||||||
|
documents=[resultDocument]
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in data extraction: {str(e)}")
|
||||||
|
self.services.chat.progressLogFinish(extractOperationId, False)
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def _handleCodeGeneration(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
options: AiCallOptions,
|
||||||
|
contentParts: Optional[List[ContentPart]],
|
||||||
|
outputFormat: str,
|
||||||
|
title: str,
|
||||||
|
parentOperationId: Optional[str]
|
||||||
|
) -> AiResponse:
|
||||||
|
"""Handle code generation using code generation path."""
|
||||||
|
from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath
|
||||||
|
|
||||||
|
codePath = CodeGenerationPath(self.services)
|
||||||
|
return await codePath.generateCode(
|
||||||
|
userPrompt=prompt,
|
||||||
|
outputFormat=outputFormat,
|
||||||
|
contentParts=contentParts,
|
||||||
|
title=title or "Generated Code",
|
||||||
|
parentOperationId=parentOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _handleDocumentGeneration(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
options: AiCallOptions,
|
||||||
|
documentList: Optional[Any],
|
||||||
|
documentIntents: Optional[List[DocumentIntent]],
|
||||||
|
contentParts: Optional[List[ContentPart]],
|
||||||
|
outputFormat: str,
|
||||||
|
title: str,
|
||||||
|
parentOperationId: Optional[str]
|
||||||
|
) -> AiResponse:
|
||||||
|
"""Handle document generation using document generation path."""
|
||||||
|
from modules.services.serviceGeneration.paths.documentPath import DocumentGenerationPath
|
||||||
|
|
||||||
|
# Set compression options for document generation
|
||||||
|
options.compressPrompt = False
|
||||||
|
options.compressContext = False
|
||||||
|
|
||||||
|
documentPath = DocumentGenerationPath(self.services)
|
||||||
|
return await documentPath.generateDocument(
|
||||||
|
userPrompt=prompt,
|
||||||
|
documentList=documentList,
|
||||||
|
documentIntents=documentIntents,
|
||||||
|
contentParts=contentParts,
|
||||||
|
outputFormat=outputFormat,
|
||||||
|
title=title or "Generated Document",
|
||||||
|
parentOperationId=parentOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _determineDocumentName(
|
def _determineDocumentName(
|
||||||
self,
|
self,
|
||||||
filledStructure: Dict[str, Any],
|
filledStructure: Dict[str, Any],
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, Operati
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
|
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
|
||||||
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
||||||
|
from modules.services.serviceAi.subLoopingUseCases import LoopingUseCaseRegistry
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -28,6 +29,7 @@ class AiCallLooper:
|
||||||
self.services = services
|
self.services = services
|
||||||
self.aiService = aiService
|
self.aiService = aiService
|
||||||
self.responseParser = responseParser
|
self.responseParser = responseParser
|
||||||
|
self.useCaseRegistry = LoopingUseCaseRegistry() # Initialize use case registry
|
||||||
|
|
||||||
async def callAiWithLooping(
|
async def callAiWithLooping(
|
||||||
self,
|
self,
|
||||||
|
|
@ -38,7 +40,8 @@ class AiCallLooper:
|
||||||
promptArgs: Optional[Dict[str, Any]] = None,
|
promptArgs: Optional[Dict[str, Any]] = None,
|
||||||
operationId: Optional[str] = None,
|
operationId: Optional[str] = None,
|
||||||
userPrompt: Optional[str] = None,
|
userPrompt: Optional[str] = None,
|
||||||
contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content
|
contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content
|
||||||
|
useCaseId: str = None # REQUIRED: Explicit use case ID - no auto-detection, no fallback
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Shared core function for AI calls with repair-based looping system.
|
Shared core function for AI calls with repair-based looping system.
|
||||||
|
|
@ -53,10 +56,31 @@ class AiCallLooper:
|
||||||
operationId: Optional operation ID for progress tracking
|
operationId: Optional operation ID for progress tracking
|
||||||
userPrompt: Optional user prompt for KPI definition
|
userPrompt: Optional user prompt for KPI definition
|
||||||
contentParts: Optional content parts for first iteration
|
contentParts: Optional content parts for first iteration
|
||||||
|
useCaseId: REQUIRED: Explicit use case ID - no auto-detection, no fallback
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Complete AI response after all iterations
|
Complete AI response after all iterations
|
||||||
"""
|
"""
|
||||||
|
# REQUIRED: useCaseId must be provided - no auto-detection, no fallback
|
||||||
|
if not useCaseId:
|
||||||
|
errorMsg = (
|
||||||
|
"useCaseId is REQUIRED for callAiWithLooping. "
|
||||||
|
"No auto-detection - must explicitly specify use case ID. "
|
||||||
|
f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}"
|
||||||
|
)
|
||||||
|
logger.error(errorMsg)
|
||||||
|
raise ValueError(errorMsg)
|
||||||
|
|
||||||
|
# Validate use case exists
|
||||||
|
useCase = self.useCaseRegistry.get(useCaseId)
|
||||||
|
if not useCase:
|
||||||
|
errorMsg = (
|
||||||
|
f"Use case '{useCaseId}' not found in registry. "
|
||||||
|
f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}"
|
||||||
|
)
|
||||||
|
logger.error(errorMsg)
|
||||||
|
raise ValueError(errorMsg)
|
||||||
|
|
||||||
maxIterations = 50 # Prevent infinite loops
|
maxIterations = 50 # Prevent infinite loops
|
||||||
iteration = 0
|
iteration = 0
|
||||||
allSections = [] # Accumulate all sections across iterations
|
allSections = [] # Accumulate all sections across iterations
|
||||||
|
|
@ -199,36 +223,31 @@ class AiCallLooper:
|
||||||
# Store raw response for continuation (even if broken)
|
# Store raw response for continuation (even if broken)
|
||||||
lastRawResponse = result
|
lastRawResponse = result
|
||||||
|
|
||||||
# Check if this is section content generation (has "elements" not "sections")
|
# Parse JSON for use case handling
|
||||||
# Section content generation returns JSON with "elements" array, not document structure with "sections"
|
parsedJsonForUseCase = None
|
||||||
isSectionContentGeneration = False
|
extractedJsonForUseCase = None
|
||||||
parsedJsonForSection = None
|
|
||||||
extractedJsonForSection = None
|
|
||||||
try:
|
try:
|
||||||
extractedJsonForSection = extractJsonString(result)
|
extractedJsonForUseCase = extractJsonString(result)
|
||||||
parsedJson, parseError, _ = tryParseJson(extractedJsonForSection)
|
parsedJson, parseError, _ = tryParseJson(extractedJsonForUseCase)
|
||||||
if parseError is None and parsedJson:
|
if parseError is None and parsedJson:
|
||||||
parsedJsonForSection = parsedJson
|
parsedJsonForUseCase = parsedJson
|
||||||
# Check if JSON has "elements" (section content) or "sections" (document structure)
|
|
||||||
if isinstance(parsedJson, dict):
|
|
||||||
if "elements" in parsedJson:
|
|
||||||
isSectionContentGeneration = True
|
|
||||||
elif isinstance(parsedJson, list) and len(parsedJson) > 0:
|
|
||||||
# Check if it's a list of elements (section content format)
|
|
||||||
if isinstance(parsedJson[0], dict) and "type" in parsedJson[0]:
|
|
||||||
isSectionContentGeneration = True
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if isSectionContentGeneration:
|
# Handle use cases that return JSON directly (no section extraction needed)
|
||||||
# This is section content generation - return the JSON directly
|
directReturnUseCases = ["section_content", "chapter_structure", "code_structure", "code_content", "image_batch"]
|
||||||
# No need to extract sections, just return the complete JSON string
|
if useCaseId in directReturnUseCases:
|
||||||
logger.info(f"Iteration {iteration}: Section content generation detected (elements found), returning JSON directly")
|
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - returning JSON directly")
|
||||||
if iterationOperationId:
|
if iterationOperationId:
|
||||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||||
# Note: Debug files (_prompt and _response) are already written above for iteration 1
|
|
||||||
# No need to write _final_result as it's redundant with _response
|
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
|
||||||
final_json = json.dumps(parsedJsonForSection, indent=2, ensure_ascii=False) if parsedJsonForSection else (extractedJsonForSection or result)
|
|
||||||
|
# Write final result for chapter structure and code structure (section_content skips it)
|
||||||
|
if useCaseId in ["chapter_structure", "code_structure"]:
|
||||||
|
self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
|
||||||
|
|
||||||
return final_json
|
return final_json
|
||||||
|
|
||||||
# Extract sections from response (handles both valid and broken JSON)
|
# Extract sections from response (handles both valid and broken JSON)
|
||||||
|
|
|
||||||
231
modules/services/serviceAi/subLoopingUseCases.py
Normal file
231
modules/services/serviceAi/subLoopingUseCases.py
Normal file
|
|
@ -0,0 +1,231 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""
|
||||||
|
Generic Looping Use Case System
|
||||||
|
|
||||||
|
Provides parametrized looping infrastructure supporting different JSON formats and use cases.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Dict, Any, List, Optional, Callable
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LoopingUseCase:
|
||||||
|
"""Configuration for a specific looping use case."""
|
||||||
|
|
||||||
|
# Identification
|
||||||
|
useCaseId: str # "section_content", "chapter_structure", "document_structure", "code_structure", "code_content", "image_batch"
|
||||||
|
|
||||||
|
# JSON Format Detection
|
||||||
|
jsonTemplate: Dict[str, Any] # Expected JSON structure template
|
||||||
|
detectionKeys: List[str] # Keys to check for format detection (e.g., ["elements"], ["chapters"], ["files"])
|
||||||
|
detectionPath: str # JSONPath to check (e.g., "documents[0].chapters", "files[0].content")
|
||||||
|
|
||||||
|
# Prompt Building
|
||||||
|
initialPromptBuilder: Optional[Callable] = None # Function to build initial prompt
|
||||||
|
continuationPromptBuilder: Optional[Callable] = None # Function to build continuation prompt
|
||||||
|
|
||||||
|
# Accumulation & Merging
|
||||||
|
accumulator: Optional[Callable] = None # Function to accumulate fragments
|
||||||
|
merger: Optional[Callable] = None # Function to merge accumulated data
|
||||||
|
|
||||||
|
# Continuation Context
|
||||||
|
continuationContextBuilder: Optional[Callable] = None # Build continuation context for this format
|
||||||
|
|
||||||
|
# Result Building
|
||||||
|
resultBuilder: Optional[Callable] = None # Build final result from accumulated data
|
||||||
|
|
||||||
|
# Metadata
|
||||||
|
supportsAccumulation: bool = True # Whether this use case supports accumulation
|
||||||
|
requiresExtraction: bool = False # Whether this requires extraction (like sections)
|
||||||
|
|
||||||
|
|
||||||
|
class LoopingUseCaseRegistry:
|
||||||
|
"""Registry of all looping use cases."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.useCases: Dict[str, LoopingUseCase] = {}
|
||||||
|
self._registerDefaultUseCases()
|
||||||
|
|
||||||
|
def register(self, useCase: LoopingUseCase):
|
||||||
|
"""Register a new use case."""
|
||||||
|
self.useCases[useCase.useCaseId] = useCase
|
||||||
|
logger.debug(f"Registered looping use case: {useCase.useCaseId}")
|
||||||
|
|
||||||
|
def get(self, useCaseId: str) -> Optional[LoopingUseCase]:
|
||||||
|
"""Get use case by ID."""
|
||||||
|
return self.useCases.get(useCaseId)
|
||||||
|
|
||||||
|
def detectUseCase(self, parsedJson: Dict[str, Any]) -> Optional[str]:
|
||||||
|
"""Detect which use case matches the JSON structure."""
|
||||||
|
for useCaseId, useCase in self.useCases.items():
|
||||||
|
if self._matchesFormat(parsedJson, useCase):
|
||||||
|
return useCaseId
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _matchesFormat(self, json: Dict[str, Any], useCase: LoopingUseCase) -> bool:
|
||||||
|
"""Check if JSON matches use case format."""
|
||||||
|
# Check top-level keys
|
||||||
|
for key in useCase.detectionKeys:
|
||||||
|
if key in json:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check nested path using simple dictionary traversal (no jsonpath_ng needed)
|
||||||
|
if useCase.detectionPath:
|
||||||
|
try:
|
||||||
|
# Simple path matching without jsonpath_ng
|
||||||
|
# Format: "documents[0].chapters" or "files[0].content"
|
||||||
|
pathParts = useCase.detectionPath.split(".")
|
||||||
|
current = json
|
||||||
|
|
||||||
|
for part in pathParts:
|
||||||
|
# Handle array indices like "documents[0]"
|
||||||
|
if "[" in part and "]" in part:
|
||||||
|
key = part.split("[")[0]
|
||||||
|
index = int(part.split("[")[1].split("]")[0])
|
||||||
|
if isinstance(current, dict) and key in current:
|
||||||
|
if isinstance(current[key], list) and 0 <= index < len(current[key]):
|
||||||
|
current = current[key][index]
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Regular key access
|
||||||
|
if isinstance(current, dict) and part in current:
|
||||||
|
current = current[part]
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If we successfully traversed the path, it matches
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Path matching failed for {useCase.useCaseId}: {e}")
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _registerDefaultUseCases(self):
|
||||||
|
"""Register default use cases."""
|
||||||
|
|
||||||
|
# Use Case 1: Section Content Generation
|
||||||
|
# Returns JSON with "elements" array directly
|
||||||
|
self.register(LoopingUseCase(
|
||||||
|
useCaseId="section_content",
|
||||||
|
jsonTemplate={"elements": []},
|
||||||
|
detectionKeys=["elements"],
|
||||||
|
detectionPath="",
|
||||||
|
initialPromptBuilder=None, # Will use default prompt builder
|
||||||
|
continuationPromptBuilder=None, # Will use default continuation builder
|
||||||
|
accumulator=None, # Direct return, no accumulation
|
||||||
|
merger=None,
|
||||||
|
continuationContextBuilder=None, # Will use default continuation context
|
||||||
|
resultBuilder=None, # Return JSON directly
|
||||||
|
supportsAccumulation=False,
|
||||||
|
requiresExtraction=False
|
||||||
|
))
|
||||||
|
|
||||||
|
# Use Case 2: Chapter Structure Generation
|
||||||
|
# Returns JSON with "documents[0].chapters" structure
|
||||||
|
self.register(LoopingUseCase(
|
||||||
|
useCaseId="chapter_structure",
|
||||||
|
jsonTemplate={"documents": [{"chapters": []}]},
|
||||||
|
detectionKeys=["chapters"],
|
||||||
|
detectionPath="documents[0].chapters",
|
||||||
|
initialPromptBuilder=None,
|
||||||
|
continuationPromptBuilder=None,
|
||||||
|
accumulator=None, # Direct return, no accumulation
|
||||||
|
merger=None,
|
||||||
|
continuationContextBuilder=None,
|
||||||
|
resultBuilder=None, # Return JSON directly
|
||||||
|
supportsAccumulation=False,
|
||||||
|
requiresExtraction=False
|
||||||
|
))
|
||||||
|
|
||||||
|
# Use Case 3: Document Structure Generation
|
||||||
|
# Returns JSON with "documents[0].sections" structure, requires extraction and accumulation
|
||||||
|
self.register(LoopingUseCase(
|
||||||
|
useCaseId="document_structure",
|
||||||
|
jsonTemplate={"documents": [{"sections": []}]},
|
||||||
|
detectionKeys=["sections"],
|
||||||
|
detectionPath="documents[0].sections",
|
||||||
|
initialPromptBuilder=None,
|
||||||
|
continuationPromptBuilder=None,
|
||||||
|
accumulator=None, # Will use default accumulator
|
||||||
|
merger=None, # Will use default merger
|
||||||
|
continuationContextBuilder=None,
|
||||||
|
resultBuilder=None, # Will use default result builder
|
||||||
|
supportsAccumulation=True,
|
||||||
|
requiresExtraction=True
|
||||||
|
))
|
||||||
|
|
||||||
|
# Use Case 4: Code Structure Generation (NEW)
|
||||||
|
self.register(LoopingUseCase(
|
||||||
|
useCaseId="code_structure",
|
||||||
|
jsonTemplate={
|
||||||
|
"metadata": {
|
||||||
|
"language": "",
|
||||||
|
"projectType": "single_file|multi_file",
|
||||||
|
"projectName": ""
|
||||||
|
},
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"id": "",
|
||||||
|
"filename": "",
|
||||||
|
"fileType": "",
|
||||||
|
"dependencies": [],
|
||||||
|
"imports": [],
|
||||||
|
"functions": [],
|
||||||
|
"classes": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
detectionKeys=["files"],
|
||||||
|
detectionPath="files",
|
||||||
|
initialPromptBuilder=None,
|
||||||
|
continuationPromptBuilder=None,
|
||||||
|
accumulator=None, # Direct return
|
||||||
|
merger=None,
|
||||||
|
continuationContextBuilder=None,
|
||||||
|
resultBuilder=None,
|
||||||
|
supportsAccumulation=False,
|
||||||
|
requiresExtraction=False
|
||||||
|
))
|
||||||
|
|
||||||
|
# Use Case 5: Code Content Generation (NEW)
|
||||||
|
self.register(LoopingUseCase(
|
||||||
|
useCaseId="code_content",
|
||||||
|
jsonTemplate={"files": [{"content": "", "functions": []}]},
|
||||||
|
detectionKeys=["content", "functions"],
|
||||||
|
detectionPath="files[0].content",
|
||||||
|
initialPromptBuilder=None,
|
||||||
|
continuationPromptBuilder=None,
|
||||||
|
accumulator=None, # Will use default accumulator
|
||||||
|
merger=None, # Will use default merger
|
||||||
|
continuationContextBuilder=None,
|
||||||
|
resultBuilder=None, # Will use default result builder
|
||||||
|
supportsAccumulation=True,
|
||||||
|
requiresExtraction=False
|
||||||
|
))
|
||||||
|
|
||||||
|
# Use Case 6: Image Batch Generation (NEW)
|
||||||
|
self.register(LoopingUseCase(
|
||||||
|
useCaseId="image_batch",
|
||||||
|
jsonTemplate={"images": []},
|
||||||
|
detectionKeys=["images"],
|
||||||
|
detectionPath="images",
|
||||||
|
initialPromptBuilder=None,
|
||||||
|
continuationPromptBuilder=None,
|
||||||
|
accumulator=None, # Direct return
|
||||||
|
merger=None,
|
||||||
|
continuationContextBuilder=None,
|
||||||
|
resultBuilder=None,
|
||||||
|
supportsAccumulation=False,
|
||||||
|
requiresExtraction=False
|
||||||
|
))
|
||||||
|
|
||||||
|
logger.info(f"Registered {len(self.useCases)} default looping use cases")
|
||||||
|
|
||||||
|
|
@ -23,11 +23,20 @@ logger = logging.getLogger(__name__)
|
||||||
class StructureFiller:
|
class StructureFiller:
|
||||||
"""Handles filling document structure with content."""
|
"""Handles filling document structure with content."""
|
||||||
|
|
||||||
|
# Default concurrency limit for parallel generation (chapters/sections)
|
||||||
|
DEFAULT_MAX_CONCURRENT_GENERATION = 16
|
||||||
|
|
||||||
def __init__(self, services, aiService):
|
def __init__(self, services, aiService):
|
||||||
"""Initialize StructureFiller with service center and AI service access."""
|
"""Initialize StructureFiller with service center and AI service access."""
|
||||||
self.services = services
|
self.services = services
|
||||||
self.aiService = aiService
|
self.aiService = aiService
|
||||||
|
|
||||||
|
def _getMaxConcurrentGeneration(self, options: Optional[AiCallOptions] = None) -> int:
|
||||||
|
"""Get max concurrent generation limit, configurable via options."""
|
||||||
|
if options and hasattr(options, 'maxConcurrentGeneration'):
|
||||||
|
return options.maxConcurrentGeneration
|
||||||
|
return self.DEFAULT_MAX_CONCURRENT_GENERATION
|
||||||
|
|
||||||
def _getUserLanguage(self) -> str:
|
def _getUserLanguage(self) -> str:
|
||||||
"""Get user language for document generation"""
|
"""Get user language for document generation"""
|
||||||
try:
|
try:
|
||||||
|
|
@ -101,14 +110,19 @@ class StructureFiller:
|
||||||
try:
|
try:
|
||||||
filledStructure = copy.deepcopy(structure)
|
filledStructure = copy.deepcopy(structure)
|
||||||
|
|
||||||
|
# Get options from AI service if available (for concurrency control)
|
||||||
|
# Default concurrency limit (16) will be used if options is None
|
||||||
|
options = None
|
||||||
|
# Note: Options can be passed via fillStructure if needed in the future
|
||||||
|
|
||||||
# Phase 5D.1: Sections-Struktur für jedes Chapter generieren
|
# Phase 5D.1: Sections-Struktur für jedes Chapter generieren
|
||||||
filledStructure = await self._generateChapterSectionsStructure(
|
filledStructure = await self._generateChapterSectionsStructure(
|
||||||
filledStructure, contentParts, userPrompt, fillOperationId, language
|
filledStructure, contentParts, userPrompt, fillOperationId, language, options
|
||||||
)
|
)
|
||||||
|
|
||||||
# Phase 5D.2: Sections mit ContentParts füllen
|
# Phase 5D.2: Sections mit ContentParts füllen
|
||||||
filledStructure = await self._fillChapterSections(
|
filledStructure = await self._fillChapterSections(
|
||||||
filledStructure, contentParts, userPrompt, fillOperationId, language
|
filledStructure, contentParts, userPrompt, fillOperationId, language, options
|
||||||
)
|
)
|
||||||
|
|
||||||
# Flattening: Chapters zu Sections konvertieren
|
# Flattening: Chapters zu Sections konvertieren
|
||||||
|
|
@ -243,7 +257,8 @@ class StructureFiller:
|
||||||
contentParts: List[ContentPart],
|
contentParts: List[ContentPart],
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
parentOperationId: str,
|
parentOperationId: str,
|
||||||
language: str
|
language: str,
|
||||||
|
options: Optional[AiCallOptions] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content) in parallel.
|
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content) in parallel.
|
||||||
|
|
@ -252,6 +267,10 @@ class StructureFiller:
|
||||||
# Count total chapters for progress tracking
|
# Count total chapters for progress tracking
|
||||||
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
|
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
|
||||||
|
|
||||||
|
# Get concurrency limit
|
||||||
|
maxConcurrent = self._getMaxConcurrentGeneration(options)
|
||||||
|
semaphore = asyncio.Semaphore(maxConcurrent)
|
||||||
|
|
||||||
# Collect all chapters with their indices for parallel processing
|
# Collect all chapters with their indices for parallel processing
|
||||||
chapterTasks = []
|
chapterTasks = []
|
||||||
chapterIndex = 0
|
chapterIndex = 0
|
||||||
|
|
@ -266,25 +285,31 @@ class StructureFiller:
|
||||||
contentPartIds = chapter.get("contentPartIds", [])
|
contentPartIds = chapter.get("contentPartIds", [])
|
||||||
contentPartInstructions = chapter.get("contentPartInstructions", {})
|
contentPartInstructions = chapter.get("contentPartInstructions", {})
|
||||||
|
|
||||||
# Create task for parallel processing
|
# Create task for parallel processing with semaphore
|
||||||
task = self._generateSingleChapterSectionsStructure(
|
async def processChapterWithSemaphore(chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions):
|
||||||
chapter=chapter,
|
async with semaphore:
|
||||||
chapterIndex=chapterIndex,
|
return await self._generateSingleChapterSectionsStructure(
|
||||||
chapterId=chapterId,
|
chapter=chapter,
|
||||||
chapterLevel=chapterLevel,
|
chapterIndex=chapterIndex,
|
||||||
chapterTitle=chapterTitle,
|
chapterId=chapterId,
|
||||||
generationHint=generationHint,
|
chapterLevel=chapterLevel,
|
||||||
contentPartIds=contentPartIds,
|
chapterTitle=chapterTitle,
|
||||||
contentPartInstructions=contentPartInstructions,
|
generationHint=generationHint,
|
||||||
contentParts=contentParts,
|
contentPartIds=contentPartIds,
|
||||||
userPrompt=userPrompt,
|
contentPartInstructions=contentPartInstructions,
|
||||||
language=language,
|
contentParts=contentParts,
|
||||||
parentOperationId=parentOperationId,
|
userPrompt=userPrompt,
|
||||||
totalChapters=totalChapters
|
language=language,
|
||||||
|
parentOperationId=parentOperationId,
|
||||||
|
totalChapters=totalChapters
|
||||||
|
)
|
||||||
|
|
||||||
|
task = processChapterWithSemaphore(
|
||||||
|
chapter, chapterIndex, chapterId, chapterLevel, chapterTitle, generationHint, contentPartIds, contentPartInstructions
|
||||||
)
|
)
|
||||||
chapterTasks.append((chapterIndex, chapter, task))
|
chapterTasks.append((chapterIndex, chapter, task))
|
||||||
|
|
||||||
# Execute all chapter tasks in parallel
|
# Execute all chapter tasks in parallel with concurrency control
|
||||||
if chapterTasks:
|
if chapterTasks:
|
||||||
# Create list of tasks (without indices for gather)
|
# Create list of tasks (without indices for gather)
|
||||||
tasks = [task for _, _, task in chapterTasks]
|
tasks = [task for _, _, task in chapterTasks]
|
||||||
|
|
@ -386,11 +411,25 @@ class StructureFiller:
|
||||||
if generatedElements:
|
if generatedElements:
|
||||||
elements.extend(generatedElements)
|
elements.extend(generatedElements)
|
||||||
else:
|
else:
|
||||||
# Fallback: Try to parse JSON response directly
|
# Fallback: Try to parse JSON response directly with repair logic
|
||||||
try:
|
try:
|
||||||
fallbackElements = json.loads(
|
from modules.shared.jsonUtils import tryParseJson, repairBrokenJson
|
||||||
self.services.utils.jsonExtractString(aiResponse.content)
|
|
||||||
)
|
# Use tryParseJson which handles extraction and basic parsing
|
||||||
|
fallbackElements, parseError, cleanedStr = tryParseJson(aiResponse.content)
|
||||||
|
|
||||||
|
# If parsing failed, try repair
|
||||||
|
if parseError and isinstance(aiResponse.content, str):
|
||||||
|
logger.warning(f"Initial JSON parse failed for section {sectionId}, attempting repair: {str(parseError)}")
|
||||||
|
repairedJson = repairBrokenJson(aiResponse.content)
|
||||||
|
if repairedJson:
|
||||||
|
fallbackElements = repairedJson
|
||||||
|
parseError = None
|
||||||
|
logger.info(f"Successfully repaired JSON for section {sectionId}")
|
||||||
|
|
||||||
|
if parseError:
|
||||||
|
raise parseError
|
||||||
|
|
||||||
if isinstance(fallbackElements, list):
|
if isinstance(fallbackElements, list):
|
||||||
elements.extend(fallbackElements)
|
elements.extend(fallbackElements)
|
||||||
elif isinstance(fallbackElements, dict) and "elements" in fallbackElements:
|
elif isinstance(fallbackElements, dict) and "elements" in fallbackElements:
|
||||||
|
|
@ -621,7 +660,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
||||||
processingMode=ProcessingModeEnum.DETAILED
|
processingMode=ProcessingModeEnum.DETAILED
|
||||||
)
|
)
|
||||||
|
|
||||||
aiResponseJson = await self.aiService._callAiWithLooping(
|
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||||
prompt=generationPrompt,
|
prompt=generationPrompt,
|
||||||
options=options,
|
options=options,
|
||||||
debugPrefix=f"{chapterId}_section_{sectionId}",
|
debugPrefix=f"{chapterId}_section_{sectionId}",
|
||||||
|
|
@ -638,11 +677,28 @@ The JSON should be a fragment that can be merged with the previous response."""
|
||||||
},
|
},
|
||||||
operationId=sectionOperationId,
|
operationId=sectionOperationId,
|
||||||
userPrompt=userPrompt,
|
userPrompt=userPrompt,
|
||||||
contentParts=extractedParts
|
contentParts=extractedParts,
|
||||||
|
useCaseId="section_content" # REQUIRED: Explicit use case ID
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsedResponse = json.loads(self.services.utils.jsonExtractString(aiResponseJson))
|
# Use tryParseJson which handles extraction and basic parsing
|
||||||
|
from modules.shared.jsonUtils import tryParseJson, repairBrokenJson
|
||||||
|
|
||||||
|
parsedResponse, parseError, cleanedStr = tryParseJson(aiResponseJson)
|
||||||
|
|
||||||
|
# If parsing failed, try repair
|
||||||
|
if parseError and isinstance(aiResponseJson, str):
|
||||||
|
logger.warning(f"Initial JSON parse failed for section {sectionId}, attempting repair: {str(parseError)}")
|
||||||
|
repairedJson = repairBrokenJson(aiResponseJson)
|
||||||
|
if repairedJson:
|
||||||
|
parsedResponse = repairedJson
|
||||||
|
parseError = None
|
||||||
|
logger.info(f"Successfully repaired JSON for section {sectionId}")
|
||||||
|
|
||||||
|
if parseError:
|
||||||
|
raise parseError
|
||||||
|
|
||||||
if isinstance(parsedResponse, list):
|
if isinstance(parsedResponse, list):
|
||||||
generatedElements = parsedResponse
|
generatedElements = parsedResponse
|
||||||
elif isinstance(parsedResponse, dict):
|
elif isinstance(parsedResponse, dict):
|
||||||
|
|
@ -824,7 +880,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
||||||
processingMode=ProcessingModeEnum.DETAILED
|
processingMode=ProcessingModeEnum.DETAILED
|
||||||
)
|
)
|
||||||
|
|
||||||
aiResponseJson = await self.aiService._callAiWithLooping(
|
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||||
prompt=generationPrompt,
|
prompt=generationPrompt,
|
||||||
options=options,
|
options=options,
|
||||||
debugPrefix=f"{chapterId}_section_{sectionId}",
|
debugPrefix=f"{chapterId}_section_{sectionId}",
|
||||||
|
|
@ -841,7 +897,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
||||||
},
|
},
|
||||||
operationId=sectionOperationId,
|
operationId=sectionOperationId,
|
||||||
userPrompt=userPrompt,
|
userPrompt=userPrompt,
|
||||||
contentParts=[]
|
contentParts=[],
|
||||||
|
useCaseId="section_content" # REQUIRED: Explicit use case ID
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -1060,7 +1117,7 @@ The JSON should be a fragment that can be merged with the previous response."""
|
||||||
processingMode=ProcessingModeEnum.DETAILED
|
processingMode=ProcessingModeEnum.DETAILED
|
||||||
)
|
)
|
||||||
|
|
||||||
aiResponseJson = await self.aiService._callAiWithLooping(
|
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||||
prompt=generationPrompt,
|
prompt=generationPrompt,
|
||||||
options=options,
|
options=options,
|
||||||
debugPrefix=f"{chapterId}_section_{sectionId}",
|
debugPrefix=f"{chapterId}_section_{sectionId}",
|
||||||
|
|
@ -1077,7 +1134,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
||||||
},
|
},
|
||||||
operationId=sectionOperationId,
|
operationId=sectionOperationId,
|
||||||
userPrompt=userPrompt,
|
userPrompt=userPrompt,
|
||||||
contentParts=[part]
|
contentParts=[part],
|
||||||
|
useCaseId="section_content" # REQUIRED: Explicit use case ID
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -1200,7 +1258,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
||||||
contentParts: List[ContentPart],
|
contentParts: List[ContentPart],
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
parentOperationId: str,
|
parentOperationId: str,
|
||||||
language: str
|
language: str,
|
||||||
|
options: Optional[AiCallOptions] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Phase 5D.2: Füllt Sections mit ContentParts.
|
Phase 5D.2: Füllt Sections mit ContentParts.
|
||||||
|
|
@ -1217,6 +1276,10 @@ The JSON should be a fragment that can be merged with the previous response."""
|
||||||
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
|
totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", []))
|
||||||
fillOperationId = parentOperationId
|
fillOperationId = parentOperationId
|
||||||
|
|
||||||
|
# Get concurrency limit for sections
|
||||||
|
maxConcurrent = self._getMaxConcurrentGeneration(options)
|
||||||
|
sectionSemaphore = asyncio.Semaphore(maxConcurrent)
|
||||||
|
|
||||||
# Helper function to calculate overall progress
|
# Helper function to calculate overall progress
|
||||||
def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections):
|
def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections):
|
||||||
"""Calculate overall progress: 0.0 to 1.0"""
|
"""Calculate overall progress: 0.0 to 1.0"""
|
||||||
|
|
@ -1251,28 +1314,34 @@ The JSON should be a fragment that can be merged with the previous response."""
|
||||||
parentOperationId=fillOperationId
|
parentOperationId=fillOperationId
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process sections within chapter in parallel
|
# Process sections within chapter in parallel with concurrency control
|
||||||
sectionTasks = []
|
sectionTasks = []
|
||||||
for sectionIndex, section in enumerate(sections):
|
for sectionIndex, section in enumerate(sections):
|
||||||
# Create task for parallel processing
|
# Create task wrapper with semaphore for parallel processing
|
||||||
task = self._processSingleSection(
|
async def processSectionWithSemaphore(section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress):
|
||||||
section=section,
|
async with sectionSemaphore:
|
||||||
sectionIndex=sectionIndex,
|
return await self._processSingleSection(
|
||||||
totalSections=totalSections,
|
section=section,
|
||||||
chapterIndex=chapterIndex,
|
sectionIndex=sectionIndex,
|
||||||
totalChapters=totalChapters,
|
totalSections=totalSections,
|
||||||
chapterId=chapterId,
|
chapterIndex=chapterIndex,
|
||||||
chapterOperationId=chapterOperationId,
|
totalChapters=totalChapters,
|
||||||
fillOperationId=fillOperationId,
|
chapterId=chapterId,
|
||||||
contentParts=contentParts,
|
chapterOperationId=chapterOperationId,
|
||||||
userPrompt=userPrompt,
|
fillOperationId=fillOperationId,
|
||||||
all_sections_list=all_sections_list,
|
contentParts=contentParts,
|
||||||
language=language,
|
userPrompt=userPrompt,
|
||||||
calculateOverallProgress=calculateOverallProgress
|
all_sections_list=all_sections_list,
|
||||||
|
language=language,
|
||||||
|
calculateOverallProgress=calculateOverallProgress
|
||||||
|
)
|
||||||
|
|
||||||
|
task = processSectionWithSemaphore(
|
||||||
|
section, sectionIndex, totalSections, chapterIndex, totalChapters, chapterId, chapterOperationId, fillOperationId, contentParts, userPrompt, all_sections_list, language, calculateOverallProgress
|
||||||
)
|
)
|
||||||
sectionTasks.append((sectionIndex, section, task))
|
sectionTasks.append((sectionIndex, section, task))
|
||||||
|
|
||||||
# Execute all section tasks in parallel
|
# Execute all section tasks in parallel with concurrency control
|
||||||
if sectionTasks:
|
if sectionTasks:
|
||||||
# Create list of tasks (without indices for gather)
|
# Create list of tasks (without indices for gather)
|
||||||
tasks = [task for _, _, task in sectionTasks]
|
tasks = [task for _, _, task in sectionTasks]
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,10 @@ Handles document structure generation, including:
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List, Optional
|
||||||
|
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -82,28 +83,89 @@ class StructureGenerator:
|
||||||
outputFormat=outputFormat
|
outputFormat=outputFormat
|
||||||
)
|
)
|
||||||
|
|
||||||
# AI-Call für Chapter-Struktur-Generierung
|
# AI-Call für Chapter-Struktur-Generierung mit Looping-Unterstützung
|
||||||
# Note: Debug logging is handled by callAiPlanning
|
# Use _callAiWithLooping instead of callAiPlanning to support continuation if response is cut
|
||||||
aiResponse = await self.aiService.callAiPlanning(
|
options = AiCallOptions(
|
||||||
prompt=structurePrompt,
|
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||||
debugType="chapter_structure_generation"
|
priority=PriorityEnum.QUALITY,
|
||||||
|
processingMode=ProcessingModeEnum.DETAILED,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False,
|
||||||
|
resultFormat="json"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse Struktur
|
# Create prompt builder for continuation support
|
||||||
# Use tryParseJson which handles malformed JSON and unterminated strings
|
async def buildChapterStructurePromptWithContinuation(
|
||||||
extractedJson = self.services.utils.jsonExtractString(aiResponse)
|
continuationContext: Optional[Dict[str, Any]] = None,
|
||||||
|
**kwargs
|
||||||
|
) -> str:
|
||||||
|
"""Build chapter structure prompt with optional continuation context."""
|
||||||
|
basePrompt = self._buildChapterStructurePrompt(
|
||||||
|
userPrompt=userPrompt,
|
||||||
|
contentParts=contentParts,
|
||||||
|
outputFormat=outputFormat
|
||||||
|
)
|
||||||
|
|
||||||
|
if continuationContext:
|
||||||
|
# Add continuation instructions
|
||||||
|
deliveredSummary = continuationContext.get("delivered_summary", "")
|
||||||
|
elementBeforeCutoff = continuationContext.get("element_before_cutoff", "")
|
||||||
|
cutOffElement = continuationContext.get("cut_off_element", "")
|
||||||
|
|
||||||
|
continuationText = f"{deliveredSummary}\n\n"
|
||||||
|
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
|
||||||
|
|
||||||
|
if elementBeforeCutoff:
|
||||||
|
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
|
||||||
|
continuationText += f"{elementBeforeCutoff}\n\n"
|
||||||
|
|
||||||
|
if cutOffElement:
|
||||||
|
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
|
||||||
|
continuationText += f"{cutOffElement}\n\n"
|
||||||
|
|
||||||
|
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
|
||||||
|
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
|
||||||
|
continuationText += "Start directly with the next chapter that should follow.\n\n"
|
||||||
|
|
||||||
|
return f"""{basePrompt}
|
||||||
|
|
||||||
|
{continuationText}
|
||||||
|
|
||||||
|
Continue generating the remaining chapters now.
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
return basePrompt
|
||||||
|
|
||||||
|
# Call AI with looping support
|
||||||
|
aiResponseJson = await self.aiService.callAiWithLooping(
|
||||||
|
prompt=structurePrompt,
|
||||||
|
options=options,
|
||||||
|
debugPrefix="chapter_structure_generation",
|
||||||
|
promptBuilder=buildChapterStructurePromptWithContinuation,
|
||||||
|
promptArgs={
|
||||||
|
"userPrompt": userPrompt,
|
||||||
|
"outputFormat": outputFormat,
|
||||||
|
"services": self.services
|
||||||
|
},
|
||||||
|
useCaseId="chapter_structure", # REQUIRED: Explicit use case ID
|
||||||
|
operationId=structureOperationId,
|
||||||
|
userPrompt=userPrompt,
|
||||||
|
contentParts=contentParts
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse the complete JSON response (looping system already handles completion)
|
||||||
|
extractedJson = self.services.utils.jsonExtractString(aiResponseJson)
|
||||||
parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson)
|
parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson)
|
||||||
|
|
||||||
if parseError is not None:
|
if parseError is not None:
|
||||||
# Try to repair broken JSON (handles unterminated strings, incomplete structures, etc.)
|
# Even with looping, try repair as fallback
|
||||||
logger.warning(f"Initial JSON parsing failed: {str(parseError)}. Attempting repair...")
|
logger.warning(f"JSON parsing failed after looping: {str(parseError)}. Attempting repair...")
|
||||||
from modules.shared import jsonUtils
|
from modules.shared import jsonUtils
|
||||||
repairedJson = jsonUtils.repairBrokenJson(extractedJson)
|
repairedJson = jsonUtils.repairBrokenJson(extractedJson)
|
||||||
if repairedJson:
|
if repairedJson:
|
||||||
# Try parsing repaired JSON
|
|
||||||
parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson))
|
parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson))
|
||||||
if parseError is None:
|
if parseError is None:
|
||||||
logger.info("Successfully repaired and parsed JSON structure")
|
logger.info("Successfully repaired and parsed JSON structure after looping")
|
||||||
structure = parsedJson
|
structure = parsedJson
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to parse repaired JSON: {str(parseError)}")
|
logger.error(f"Failed to parse repaired JSON: {str(parseError)}")
|
||||||
|
|
|
||||||
|
|
@ -1254,9 +1254,10 @@ class ExtractionService:
|
||||||
aiObjects, # Pass interface for AI calls
|
aiObjects, # Pass interface for AI calls
|
||||||
progressCallback=None
|
progressCallback=None
|
||||||
) -> AiCallResponse:
|
) -> AiCallResponse:
|
||||||
"""Process content parts with model-aware chunking and AI calls.
|
"""Process content parts with model-aware chunking and AI calls in parallel.
|
||||||
|
|
||||||
Moved from interfaceAiObjects.callWithContentParts() - entry point for content parts processing.
|
Moved from interfaceAiObjects.callWithContentParts() - entry point for content parts processing.
|
||||||
|
Uses parallel processing similar to section generation for better performance.
|
||||||
"""
|
"""
|
||||||
prompt = request.prompt
|
prompt = request.prompt
|
||||||
options = request.options
|
options = request.options
|
||||||
|
|
@ -1269,13 +1270,65 @@ class ExtractionService:
|
||||||
if not failoverModelList:
|
if not failoverModelList:
|
||||||
return self._createErrorResponse("No suitable models found", 0, 0)
|
return self._createErrorResponse("No suitable models found", 0, 0)
|
||||||
|
|
||||||
# Process each content part
|
totalParts = len(contentParts)
|
||||||
|
if totalParts == 0:
|
||||||
|
return self._createErrorResponse("No content parts to process", 0, 0)
|
||||||
|
|
||||||
|
# Thread-safe counter for progress tracking
|
||||||
|
completedCount = [0] # Use list to allow modification in nested function
|
||||||
|
|
||||||
|
# Process parts in parallel with concurrency control
|
||||||
|
maxConcurrent = 5
|
||||||
|
if options and hasattr(options, 'maxConcurrentParts'):
|
||||||
|
maxConcurrent = options.maxConcurrentParts
|
||||||
|
|
||||||
|
semaphore = asyncio.Semaphore(maxConcurrent)
|
||||||
|
|
||||||
|
async def processSinglePart(contentPart, partIndex: int) -> AiCallResponse:
|
||||||
|
"""Process a single content part with progress logging."""
|
||||||
|
async with semaphore:
|
||||||
|
partLabel = contentPart.label or f"Part {partIndex+1}"
|
||||||
|
partType = contentPart.typeGroup or "unknown"
|
||||||
|
|
||||||
|
# Log start of processing
|
||||||
|
if progressCallback:
|
||||||
|
progressCallback(0.1 + (partIndex / totalParts) * 0.8, f"Processing {partLabel} ({partType}) - {partIndex+1}/{totalParts}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Process the part
|
||||||
|
partResult = await self.processContentPartWithFallback(
|
||||||
|
contentPart, prompt, options, failoverModelList, aiObjects, None # Don't pass progressCallback to avoid double logging
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update completed count and log progress
|
||||||
|
completedCount[0] += 1
|
||||||
|
if progressCallback:
|
||||||
|
progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Completed {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
|
||||||
|
|
||||||
|
return partResult
|
||||||
|
except Exception as e:
|
||||||
|
# Update completed count even on error
|
||||||
|
completedCount[0] += 1
|
||||||
|
logger.error(f"Error processing part {partIndex+1} ({partLabel}): {str(e)}")
|
||||||
|
if progressCallback:
|
||||||
|
progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Error processing {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
|
||||||
|
# Return error response
|
||||||
|
return self._createErrorResponse(f"Error processing part: {str(e)}", 0, 0)
|
||||||
|
|
||||||
|
# Create tasks for all parts
|
||||||
|
tasks = [processSinglePart(contentPart, i) for i, contentPart in enumerate(contentParts)]
|
||||||
|
|
||||||
|
# Execute all tasks in parallel with error handling
|
||||||
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
# Process results and handle exceptions
|
||||||
allResults = []
|
allResults = []
|
||||||
for contentPart in contentParts:
|
for i, result in enumerate(results):
|
||||||
partResult = await self.processContentPartWithFallback(
|
if isinstance(result, Exception):
|
||||||
contentPart, prompt, options, failoverModelList, aiObjects, progressCallback
|
logger.error(f"Exception processing part {i+1}: {str(result)}")
|
||||||
)
|
allResults.append(self._createErrorResponse(f"Exception: {str(result)}", 0, 0))
|
||||||
allResults.append(partResult)
|
elif result is not None:
|
||||||
|
allResults.append(result)
|
||||||
|
|
||||||
# Merge all results using unified mergePartResults
|
# Merge all results using unified mergePartResults
|
||||||
mergedContent = self.mergePartResults(allResults)
|
mergedContent = self.mergePartResults(allResults)
|
||||||
|
|
|
||||||
584
modules/services/serviceGeneration/paths/codePath.py
Normal file
584
modules/services/serviceGeneration/paths/codePath.py
Normal file
|
|
@ -0,0 +1,584 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""
|
||||||
|
Code Generation Path
|
||||||
|
|
||||||
|
Handles code generation with multi-file project support, dependency handling,
|
||||||
|
and proper cross-file references.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class CodeGenerationPath:
|
||||||
|
"""Code generation path."""
|
||||||
|
|
||||||
|
def __init__(self, services):
|
||||||
|
self.services = services
|
||||||
|
|
||||||
|
async def generateCode(
|
||||||
|
self,
|
||||||
|
userPrompt: str,
|
||||||
|
outputFormat: str = None,
|
||||||
|
contentParts: Optional[List[ContentPart]] = None,
|
||||||
|
title: str = "Generated Code",
|
||||||
|
parentOperationId: Optional[str] = None
|
||||||
|
) -> AiResponse:
|
||||||
|
"""
|
||||||
|
Generate code files with multi-file project support.
|
||||||
|
|
||||||
|
Returns: AiResponse with code files as documents
|
||||||
|
"""
|
||||||
|
# Create operation ID
|
||||||
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||||
|
codeOperationId = f"code_gen_{workflowId}_{int(time.time())}"
|
||||||
|
|
||||||
|
# Start progress tracking
|
||||||
|
self.services.chat.progressLogStart(
|
||||||
|
codeOperationId,
|
||||||
|
"Code Generation",
|
||||||
|
"Code Generation",
|
||||||
|
f"Format: {outputFormat or 'txt'}",
|
||||||
|
parentOperationId=parentOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Detect language and project type from prompt or outputFormat
|
||||||
|
language, projectType = self._detectLanguageAndProjectType(userPrompt, outputFormat)
|
||||||
|
|
||||||
|
# Phase 1: Code structure generation (with looping)
|
||||||
|
self.services.chat.progressLogUpdate(codeOperationId, 0.2, "Generating code structure")
|
||||||
|
codeStructure = await self._generateCodeStructure(
|
||||||
|
userPrompt=userPrompt,
|
||||||
|
language=language,
|
||||||
|
outputFormat=outputFormat,
|
||||||
|
contentParts=contentParts
|
||||||
|
)
|
||||||
|
|
||||||
|
# Phase 2: Code content generation (with dependency handling)
|
||||||
|
self.services.chat.progressLogUpdate(codeOperationId, 0.5, "Generating code content")
|
||||||
|
codeFiles = await self._generateCodeContent(codeStructure, codeOperationId)
|
||||||
|
|
||||||
|
# Phase 3: Code formatting & validation
|
||||||
|
self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Formatting code files")
|
||||||
|
formattedFiles = await self._formatAndValidateCode(codeFiles)
|
||||||
|
|
||||||
|
# Convert to unified document format
|
||||||
|
documents = []
|
||||||
|
for file in formattedFiles:
|
||||||
|
mimeType = self._getMimeType(file.get("fileType", outputFormat or "txt"))
|
||||||
|
content = file.get("content", "")
|
||||||
|
if isinstance(content, str):
|
||||||
|
contentBytes = content.encode('utf-8')
|
||||||
|
else:
|
||||||
|
contentBytes = content
|
||||||
|
|
||||||
|
documents.append(DocumentData(
|
||||||
|
documentName=file.get("filename", "generated.txt"),
|
||||||
|
documentData=contentBytes,
|
||||||
|
mimeType=mimeType,
|
||||||
|
sourceJson=file
|
||||||
|
))
|
||||||
|
|
||||||
|
metadata = AiResponseMetadata(
|
||||||
|
title=title,
|
||||||
|
operationType=OperationTypeEnum.DATA_GENERATE.value
|
||||||
|
)
|
||||||
|
|
||||||
|
self.services.chat.progressLogFinish(codeOperationId, True)
|
||||||
|
|
||||||
|
return AiResponse(
|
||||||
|
documents=documents,
|
||||||
|
content=None,
|
||||||
|
metadata=metadata
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in code generation: {str(e)}")
|
||||||
|
self.services.chat.progressLogFinish(codeOperationId, False)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _detectLanguageAndProjectType(self, userPrompt: str, outputFormat: Optional[str]) -> tuple:
|
||||||
|
"""Detect programming language and project type from prompt or format."""
|
||||||
|
promptLower = userPrompt.lower()
|
||||||
|
|
||||||
|
# Detect language
|
||||||
|
language = None
|
||||||
|
if outputFormat:
|
||||||
|
if outputFormat == "py":
|
||||||
|
language = "python"
|
||||||
|
elif outputFormat in ["js", "ts"]:
|
||||||
|
language = outputFormat
|
||||||
|
elif outputFormat == "html":
|
||||||
|
language = "html"
|
||||||
|
|
||||||
|
if not language:
|
||||||
|
if "python" in promptLower or ".py" in promptLower:
|
||||||
|
language = "python"
|
||||||
|
elif "javascript" in promptLower or ".js" in promptLower:
|
||||||
|
language = "javascript"
|
||||||
|
elif "typescript" in promptLower or ".ts" in promptLower:
|
||||||
|
language = "typescript"
|
||||||
|
elif "html" in promptLower:
|
||||||
|
language = "html"
|
||||||
|
else:
|
||||||
|
language = "python" # Default
|
||||||
|
|
||||||
|
# Detect project type
|
||||||
|
projectType = "single_file"
|
||||||
|
if "multi" in promptLower or "multiple files" in promptLower or "project" in promptLower:
|
||||||
|
projectType = "multi_file"
|
||||||
|
|
||||||
|
return language, projectType
|
||||||
|
|
||||||
|
async def _generateCodeStructure(
|
||||||
|
self,
|
||||||
|
userPrompt: str,
|
||||||
|
language: str,
|
||||||
|
outputFormat: Optional[str],
|
||||||
|
contentParts: Optional[List[ContentPart]]
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Generate code structure using looping system."""
|
||||||
|
|
||||||
|
# Build structure generation prompt
|
||||||
|
structurePrompt = f"""Analyze the following code generation request and create a project structure.
|
||||||
|
|
||||||
|
Request: {userPrompt}
|
||||||
|
|
||||||
|
Language: {language}
|
||||||
|
|
||||||
|
Create a JSON structure with:
|
||||||
|
1. metadata: {{"language": "{language}", "projectType": "single_file|multi_file", "projectName": "..."}}
|
||||||
|
2. files: Array of file structures, each with:
|
||||||
|
- id: Unique identifier
|
||||||
|
- filename: File name (e.g., "main.py", "utils.py")
|
||||||
|
- fileType: File extension (e.g., "py", "js")
|
||||||
|
- dependencies: List of file IDs this file depends on (for multi-file projects)
|
||||||
|
- imports: List of import statements (for dependency extraction)
|
||||||
|
- functions: Array of function signatures {{"name": "...", "signature": "..."}}
|
||||||
|
- classes: Array of class definitions {{"name": "...", "signature": "..."}}
|
||||||
|
|
||||||
|
For single-file projects, return one file. For multi-file projects, break down into logical modules.
|
||||||
|
|
||||||
|
Return ONLY valid JSON in this format:
|
||||||
|
{{
|
||||||
|
"metadata": {{
|
||||||
|
"language": "{language}",
|
||||||
|
"projectType": "single_file",
|
||||||
|
"projectName": "generated-project"
|
||||||
|
}},
|
||||||
|
"files": [
|
||||||
|
{{
|
||||||
|
"id": "file_1",
|
||||||
|
"filename": "main.py",
|
||||||
|
"fileType": "py",
|
||||||
|
"dependencies": [],
|
||||||
|
"imports": [],
|
||||||
|
"functions": [],
|
||||||
|
"classes": []
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use generic looping system with code_structure use case
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||||
|
resultFormat="json"
|
||||||
|
)
|
||||||
|
|
||||||
|
structureJson = await self.services.ai.callAiWithLooping(
|
||||||
|
prompt=structurePrompt,
|
||||||
|
options=options,
|
||||||
|
useCaseId="code_structure",
|
||||||
|
debugPrefix="code_structure_generation",
|
||||||
|
contentParts=contentParts
|
||||||
|
)
|
||||||
|
|
||||||
|
parsed = json.loads(structureJson)
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
async def _generateCodeContent(
|
||||||
|
self,
|
||||||
|
codeStructure: Dict[str, Any],
|
||||||
|
parentOperationId: str
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Generate code content for each file with dependency handling."""
|
||||||
|
files = codeStructure.get("files", [])
|
||||||
|
metadata = codeStructure.get("metadata", {})
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
raise ValueError("No files found in code structure")
|
||||||
|
|
||||||
|
# Step 1: Resolve dependency order
|
||||||
|
orderedFiles = self._resolveDependencyOrder(files)
|
||||||
|
|
||||||
|
# Step 2: Generate dependency files first (requirements.txt, package.json, etc.)
|
||||||
|
dependencyFiles = await self._generateDependencyFiles(metadata, orderedFiles)
|
||||||
|
|
||||||
|
# Step 3: Generate code files in dependency order (not fully parallel)
|
||||||
|
codeFiles = []
|
||||||
|
generatedFileContext = {} # Track what's been generated for cross-file references
|
||||||
|
|
||||||
|
for idx, fileStructure in enumerate(orderedFiles):
|
||||||
|
# Update progress
|
||||||
|
progress = 0.5 + (0.4 * (idx / len(orderedFiles)))
|
||||||
|
self.services.chat.progressLogUpdate(
|
||||||
|
parentOperationId,
|
||||||
|
progress,
|
||||||
|
f"Generating {fileStructure.get('filename', 'file')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Provide context about already-generated files for proper imports
|
||||||
|
fileContext = self._buildFileContext(generatedFileContext, fileStructure)
|
||||||
|
|
||||||
|
# Generate this file with context
|
||||||
|
fileContent = await self._generateSingleFileContent(
|
||||||
|
fileStructure,
|
||||||
|
fileContext=fileContext,
|
||||||
|
allFilesStructure=orderedFiles,
|
||||||
|
metadata=metadata
|
||||||
|
)
|
||||||
|
|
||||||
|
codeFiles.append(fileContent)
|
||||||
|
|
||||||
|
# Update context with generated file info (for next files)
|
||||||
|
generatedFileContext[fileStructure["id"]] = {
|
||||||
|
"filename": fileContent.get("filename", fileStructure.get("filename")),
|
||||||
|
"functions": fileContent.get("functions", []),
|
||||||
|
"classes": fileContent.get("classes", []),
|
||||||
|
"exports": fileContent.get("exports", [])
|
||||||
|
}
|
||||||
|
|
||||||
|
# Combine dependency files and code files
|
||||||
|
return dependencyFiles + codeFiles
|
||||||
|
|
||||||
|
def _resolveDependencyOrder(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""Resolve file generation order based on dependencies using topological sort."""
|
||||||
|
# Build dependency graph
|
||||||
|
fileMap = {f["id"]: f for f in files}
|
||||||
|
dependencies = {}
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
fileId = file["id"]
|
||||||
|
deps = file.get("dependencies", []) # List of file IDs this file depends on
|
||||||
|
dependencies[fileId] = deps
|
||||||
|
|
||||||
|
# Topological sort
|
||||||
|
ordered = []
|
||||||
|
visited = set()
|
||||||
|
tempMark = set()
|
||||||
|
|
||||||
|
def visit(fileId: str):
|
||||||
|
if fileId in tempMark:
|
||||||
|
# Circular dependency detected - break it
|
||||||
|
logger.warning(f"Circular dependency detected involving {fileId}")
|
||||||
|
return
|
||||||
|
if fileId in visited:
|
||||||
|
return
|
||||||
|
|
||||||
|
tempMark.add(fileId)
|
||||||
|
for depId in dependencies.get(fileId, []):
|
||||||
|
if depId in fileMap:
|
||||||
|
visit(depId)
|
||||||
|
tempMark.remove(fileId)
|
||||||
|
visited.add(fileId)
|
||||||
|
ordered.append(fileMap[fileId])
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
if file["id"] not in visited:
|
||||||
|
visit(file["id"])
|
||||||
|
|
||||||
|
return ordered
|
||||||
|
|
||||||
|
async def _generateDependencyFiles(
|
||||||
|
self,
|
||||||
|
metadata: Dict[str, Any],
|
||||||
|
files: List[Dict[str, Any]]
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Generate dependency files (requirements.txt, package.json, etc.)."""
|
||||||
|
language = metadata.get("language", "").lower()
|
||||||
|
dependencyFiles = []
|
||||||
|
|
||||||
|
# Generate requirements.txt for Python
|
||||||
|
if language in ["python", "py"]:
|
||||||
|
requirementsContent = await self._generateRequirementsTxt(files)
|
||||||
|
if requirementsContent:
|
||||||
|
dependencyFiles.append({
|
||||||
|
"filename": "requirements.txt",
|
||||||
|
"content": requirementsContent,
|
||||||
|
"fileType": "txt",
|
||||||
|
"id": "requirements_txt"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Generate package.json for JavaScript/TypeScript
|
||||||
|
elif language in ["javascript", "typescript", "js", "ts"]:
|
||||||
|
packageJson = await self._generatePackageJson(files, metadata)
|
||||||
|
if packageJson:
|
||||||
|
dependencyFiles.append({
|
||||||
|
"filename": "package.json",
|
||||||
|
"content": json.dumps(packageJson, indent=2),
|
||||||
|
"fileType": "json",
|
||||||
|
"id": "package_json"
|
||||||
|
})
|
||||||
|
|
||||||
|
return dependencyFiles
|
||||||
|
|
||||||
|
async def _generateRequirementsTxt(
|
||||||
|
self,
|
||||||
|
files: List[Dict[str, Any]]
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Generate requirements.txt content from Python imports."""
|
||||||
|
pythonPackages = set()
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
imports = file.get("imports", [])
|
||||||
|
if isinstance(imports, list):
|
||||||
|
for imp in imports:
|
||||||
|
if isinstance(imp, str):
|
||||||
|
# Extract package name from import
|
||||||
|
# Handle: "from flask import", "import flask", "from flask import Flask"
|
||||||
|
imp = imp.strip()
|
||||||
|
if "import" in imp:
|
||||||
|
if "from" in imp:
|
||||||
|
# "from package import ..."
|
||||||
|
parts = imp.split("from")
|
||||||
|
if len(parts) > 1:
|
||||||
|
package = parts[1].split("import")[0].strip()
|
||||||
|
if package and not package.startswith("."):
|
||||||
|
pythonPackages.add(package.split(".")[0]) # Get root package
|
||||||
|
else:
|
||||||
|
# "import package" or "import package.module"
|
||||||
|
parts = imp.split("import")
|
||||||
|
if len(parts) > 1:
|
||||||
|
package = parts[1].strip().split(".")[0].strip()
|
||||||
|
if package and not package.startswith("."):
|
||||||
|
pythonPackages.add(package)
|
||||||
|
|
||||||
|
if pythonPackages:
|
||||||
|
return "\n".join(sorted(pythonPackages))
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _generatePackageJson(
|
||||||
|
self,
|
||||||
|
files: List[Dict[str, Any]],
|
||||||
|
metadata: Dict[str, Any]
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Generate package.json content from JavaScript/TypeScript imports."""
|
||||||
|
npmPackages = {}
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
imports = file.get("imports", [])
|
||||||
|
if isinstance(imports, list):
|
||||||
|
for imp in imports:
|
||||||
|
if isinstance(imp, str):
|
||||||
|
# Extract npm package from import
|
||||||
|
# Handle: "import express from 'express'", "const express = require('express')"
|
||||||
|
imp = imp.strip()
|
||||||
|
if "from" in imp:
|
||||||
|
# ES6 import: "import ... from 'package'"
|
||||||
|
parts = imp.split("from")
|
||||||
|
if len(parts) > 1:
|
||||||
|
package = parts[1].strip().strip("'\"")
|
||||||
|
if package and not package.startswith(".") and not package.startswith("/"):
|
||||||
|
npmPackages[package] = "*"
|
||||||
|
elif "require" in imp:
|
||||||
|
# CommonJS: "require('package')"
|
||||||
|
match = re.search(r"require\(['\"]([^'\"]+)['\"]\)", imp)
|
||||||
|
if match:
|
||||||
|
package = match.group(1)
|
||||||
|
if not package.startswith(".") and not package.startswith("/"):
|
||||||
|
npmPackages[package] = "*"
|
||||||
|
|
||||||
|
if npmPackages:
|
||||||
|
return {
|
||||||
|
"name": metadata.get("projectName", "generated-project"),
|
||||||
|
"version": "1.0.0",
|
||||||
|
"dependencies": npmPackages
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _buildFileContext(
|
||||||
|
self,
|
||||||
|
generatedFileContext: Dict[str, Dict[str, Any]],
|
||||||
|
currentFile: Dict[str, Any]
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Build context about other files for proper imports/references."""
|
||||||
|
context = {
|
||||||
|
"availableFiles": [],
|
||||||
|
"availableFunctions": {},
|
||||||
|
"availableClasses": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add info about already-generated files
|
||||||
|
for fileId, fileInfo in generatedFileContext.items():
|
||||||
|
context["availableFiles"].append({
|
||||||
|
"id": fileId,
|
||||||
|
"filename": fileInfo["filename"],
|
||||||
|
"functions": fileInfo.get("functions", []),
|
||||||
|
"classes": fileInfo.get("classes", []),
|
||||||
|
"exports": fileInfo.get("exports", [])
|
||||||
|
})
|
||||||
|
|
||||||
|
# Build function/class maps for easy lookup
|
||||||
|
for func in fileInfo.get("functions", []):
|
||||||
|
funcName = func.get("name", "")
|
||||||
|
if funcName:
|
||||||
|
context["availableFunctions"][funcName] = {
|
||||||
|
"file": fileInfo["filename"],
|
||||||
|
"signature": func.get("signature", "")
|
||||||
|
}
|
||||||
|
|
||||||
|
for cls in fileInfo.get("classes", []):
|
||||||
|
className = cls.get("name", "")
|
||||||
|
if className:
|
||||||
|
context["availableClasses"][className] = {
|
||||||
|
"file": fileInfo["filename"]
|
||||||
|
}
|
||||||
|
|
||||||
|
return context
|
||||||
|
|
||||||
|
async def _generateSingleFileContent(
|
||||||
|
self,
|
||||||
|
fileStructure: Dict[str, Any],
|
||||||
|
fileContext: Dict[str, Any] = None,
|
||||||
|
allFilesStructure: List[Dict[str, Any]] = None,
|
||||||
|
metadata: Dict[str, Any] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Generate code content for a single file with context about other files."""
|
||||||
|
|
||||||
|
# Build prompt with context about other files for proper imports
|
||||||
|
filename = fileStructure.get("filename", "generated.py")
|
||||||
|
fileType = fileStructure.get("fileType", "py")
|
||||||
|
dependencies = fileStructure.get("dependencies", [])
|
||||||
|
functions = fileStructure.get("functions", [])
|
||||||
|
classes = fileStructure.get("classes", [])
|
||||||
|
|
||||||
|
contextInfo = ""
|
||||||
|
if fileContext and fileContext.get("availableFiles"):
|
||||||
|
contextInfo = "\n\nAvailable files and their exports:\n"
|
||||||
|
for fileInfo in fileContext["availableFiles"]:
|
||||||
|
contextInfo += f"- {fileInfo['filename']}: "
|
||||||
|
funcs = [f.get("name", "") for f in fileInfo.get("functions", [])]
|
||||||
|
cls = [c.get("name", "") for c in fileInfo.get("classes", [])]
|
||||||
|
exports = []
|
||||||
|
if funcs:
|
||||||
|
exports.extend(funcs)
|
||||||
|
if cls:
|
||||||
|
exports.extend(cls)
|
||||||
|
if exports:
|
||||||
|
contextInfo += ", ".join(exports)
|
||||||
|
contextInfo += "\n"
|
||||||
|
|
||||||
|
contentPrompt = f"""Generate complete, executable code for the file: {filename}
|
||||||
|
|
||||||
|
File Type: {fileType}
|
||||||
|
Language: {metadata.get('language', 'python') if metadata else 'python'}
|
||||||
|
|
||||||
|
Required functions:
|
||||||
|
{json.dumps(functions, indent=2) if functions else 'None specified'}
|
||||||
|
|
||||||
|
Required classes:
|
||||||
|
{json.dumps(classes, indent=2) if classes else 'None specified'}
|
||||||
|
|
||||||
|
Dependencies on other files: {', '.join(dependencies) if dependencies else 'None'}
|
||||||
|
{contextInfo}
|
||||||
|
|
||||||
|
Generate complete, production-ready code with:
|
||||||
|
1. Proper imports (including imports from other files in the project if dependencies exist)
|
||||||
|
2. All required functions and classes
|
||||||
|
3. Error handling
|
||||||
|
4. Documentation/docstrings
|
||||||
|
5. Type hints where appropriate
|
||||||
|
|
||||||
|
Return ONLY valid JSON in this format:
|
||||||
|
{{
|
||||||
|
"files": [
|
||||||
|
{{
|
||||||
|
"filename": "{filename}",
|
||||||
|
"content": "// Complete code here",
|
||||||
|
"functions": {json.dumps(functions, indent=2) if functions else '[]'},
|
||||||
|
"classes": {json.dumps(classes, indent=2) if classes else '[]'}
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use generic looping system with code_content use case
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||||
|
resultFormat="json"
|
||||||
|
)
|
||||||
|
|
||||||
|
contentJson = await self.services.ai.callAiWithLooping(
|
||||||
|
prompt=contentPrompt,
|
||||||
|
options=options,
|
||||||
|
useCaseId="code_content",
|
||||||
|
debugPrefix=f"code_content_{fileStructure.get('id', 'file')}",
|
||||||
|
)
|
||||||
|
|
||||||
|
parsed = json.loads(contentJson)
|
||||||
|
|
||||||
|
# Extract file content and metadata
|
||||||
|
files = parsed.get("files", [])
|
||||||
|
if files and len(files) > 0:
|
||||||
|
fileData = files[0]
|
||||||
|
return {
|
||||||
|
"filename": fileData.get("filename", filename),
|
||||||
|
"content": fileData.get("content", ""),
|
||||||
|
"fileType": fileType,
|
||||||
|
"functions": fileData.get("functions", functions),
|
||||||
|
"classes": fileData.get("classes", classes),
|
||||||
|
"id": fileStructure.get("id")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fallback if structure is different
|
||||||
|
return {
|
||||||
|
"filename": filename,
|
||||||
|
"content": parsed.get("content", ""),
|
||||||
|
"fileType": fileType,
|
||||||
|
"functions": functions,
|
||||||
|
"classes": classes,
|
||||||
|
"id": fileStructure.get("id")
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _formatAndValidateCode(self, codeFiles: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""Format and validate generated code files."""
|
||||||
|
# For now, just return files as-is
|
||||||
|
# TODO: Add code formatting (black, prettier, etc.) and validation
|
||||||
|
formatted = []
|
||||||
|
for file in codeFiles:
|
||||||
|
content = file.get("content", "")
|
||||||
|
# Basic cleanup: remove markdown code fences if present
|
||||||
|
if isinstance(content, str):
|
||||||
|
content = re.sub(r'^```[\w]*\n', '', content, flags=re.MULTILINE)
|
||||||
|
content = re.sub(r'\n```$', '', content, flags=re.MULTILINE)
|
||||||
|
file["content"] = content.strip()
|
||||||
|
formatted.append(file)
|
||||||
|
return formatted
|
||||||
|
|
||||||
|
def _getMimeType(self, fileType: str) -> str:
|
||||||
|
"""Get MIME type for file type."""
|
||||||
|
mimeTypes = {
|
||||||
|
"py": "text/x-python",
|
||||||
|
"js": "application/javascript",
|
||||||
|
"ts": "application/typescript",
|
||||||
|
"html": "text/html",
|
||||||
|
"css": "text/css",
|
||||||
|
"json": "application/json",
|
||||||
|
"txt": "text/plain",
|
||||||
|
"md": "text/markdown",
|
||||||
|
"java": "text/x-java-source",
|
||||||
|
"cpp": "text/x-c++src",
|
||||||
|
"c": "text/x-csrc"
|
||||||
|
}
|
||||||
|
return mimeTypes.get(fileType.lower(), "text/plain")
|
||||||
258
modules/services/serviceGeneration/paths/documentPath.py
Normal file
258
modules/services/serviceGeneration/paths/documentPath.py
Normal file
|
|
@ -0,0 +1,258 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""
|
||||||
|
Document Generation Path
|
||||||
|
|
||||||
|
Handles document generation using existing chapter/section model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||||
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentGenerationPath:
|
||||||
|
"""Document generation path (existing functionality, refactored)."""
|
||||||
|
|
||||||
|
def __init__(self, services):
|
||||||
|
self.services = services
|
||||||
|
|
||||||
|
async def generateDocument(
|
||||||
|
self,
|
||||||
|
userPrompt: str,
|
||||||
|
documentList: Optional[Any] = None, # DocumentReferenceList
|
||||||
|
documentIntents: Optional[List[DocumentIntent]] = None,
|
||||||
|
contentParts: Optional[List[ContentPart]] = None,
|
||||||
|
outputFormat: str = "txt",
|
||||||
|
title: Optional[str] = None,
|
||||||
|
parentOperationId: Optional[str] = None
|
||||||
|
) -> AiResponse:
|
||||||
|
"""
|
||||||
|
Generate document using existing chapter/section model.
|
||||||
|
|
||||||
|
Returns: AiResponse with documents list
|
||||||
|
"""
|
||||||
|
# Create operation ID
|
||||||
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||||
|
docOperationId = f"doc_gen_{workflowId}_{int(time.time())}"
|
||||||
|
|
||||||
|
# Start progress tracking
|
||||||
|
self.services.chat.progressLogStart(
|
||||||
|
docOperationId,
|
||||||
|
"Document Generation",
|
||||||
|
"Document Generation",
|
||||||
|
f"Format: {outputFormat}",
|
||||||
|
parentOperationId=parentOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Schritt 5A: Kläre Dokument-Intents
|
||||||
|
documents = []
|
||||||
|
if documentList:
|
||||||
|
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||||
|
|
||||||
|
if not documentIntents and documents:
|
||||||
|
documentIntents = await self.services.ai.clarifyDocumentIntents(
|
||||||
|
documents,
|
||||||
|
userPrompt,
|
||||||
|
{"outputFormat": outputFormat},
|
||||||
|
docOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
# Schritt 5B: Extrahiere und bereite Content vor
|
||||||
|
if documents:
|
||||||
|
preparedContentParts = await self.services.ai.extractAndPrepareContent(
|
||||||
|
documents,
|
||||||
|
documentIntents or [],
|
||||||
|
docOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
# Merge mit bereitgestellten contentParts (falls vorhanden)
|
||||||
|
if contentParts:
|
||||||
|
# Prüfe auf pre-extracted Content
|
||||||
|
for part in contentParts:
|
||||||
|
if part.metadata.get("skipExtraction", False):
|
||||||
|
# Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
|
||||||
|
part.metadata.setdefault("contentFormat", "extracted")
|
||||||
|
part.metadata.setdefault("isPreExtracted", True)
|
||||||
|
preparedContentParts.extend(contentParts)
|
||||||
|
|
||||||
|
contentParts = preparedContentParts
|
||||||
|
|
||||||
|
# Schritt 5B.5: Process contentParts with AI extraction (if provided)
|
||||||
|
# This extracts text from images, processes content, and updates contentParts with extracted data
|
||||||
|
# This matches the original flow: extract content first (no AI), then process with AI
|
||||||
|
if contentParts:
|
||||||
|
# Filter out binary/other parts that shouldn't be processed
|
||||||
|
processableParts = []
|
||||||
|
skippedParts = []
|
||||||
|
for p in contentParts:
|
||||||
|
if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))):
|
||||||
|
processableParts.append(p)
|
||||||
|
else:
|
||||||
|
skippedParts.append(p)
|
||||||
|
|
||||||
|
if skippedParts:
|
||||||
|
logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation")
|
||||||
|
|
||||||
|
if processableParts:
|
||||||
|
# Count images for progress update
|
||||||
|
imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))])
|
||||||
|
if imageCount > 0:
|
||||||
|
self.services.chat.progressLogUpdate(docOperationId, 0.25, f"Extracting data from {imageCount} images using vision models")
|
||||||
|
|
||||||
|
# Build proper extraction prompt using buildExtractionPrompt
|
||||||
|
# This creates a focused extraction prompt, not the user's generation prompt
|
||||||
|
from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
|
||||||
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||||
|
|
||||||
|
# Determine renderer for format-specific guidelines
|
||||||
|
renderer = None
|
||||||
|
if outputFormat:
|
||||||
|
try:
|
||||||
|
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||||
|
generationService = GenerationService(self.services)
|
||||||
|
renderer = generationService.getRendererForFormat(outputFormat)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Could not get renderer for format {outputFormat}: {e}")
|
||||||
|
|
||||||
|
extractionPrompt = await buildExtractionPrompt(
|
||||||
|
outputFormat=outputFormat or "txt",
|
||||||
|
userPrompt=userPrompt, # User's prompt as context for what to extract
|
||||||
|
title=title or "Document",
|
||||||
|
aiService=self.services.ai if hasattr(self.services.ai, 'aiObjects') and self.services.ai.aiObjects else None,
|
||||||
|
services=self.services,
|
||||||
|
renderer=renderer
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt")
|
||||||
|
|
||||||
|
# Update progress - starting extraction
|
||||||
|
self.services.chat.progressLogUpdate(docOperationId, 0.26, f"Starting AI extraction from {len(processableParts)} content parts")
|
||||||
|
|
||||||
|
# Use DATA_EXTRACT operation type for extraction
|
||||||
|
extractionOptions = AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create progress callback for per-part progress updates
|
||||||
|
def extractionProgressCallback(progress: float, message: str):
|
||||||
|
"""Progress callback for extraction - updates parent operation."""
|
||||||
|
# Map progress from 0.0-1.0 to 0.26-0.35 range (extraction phase)
|
||||||
|
mappedProgress = 0.26 + (progress * 0.09) # 0.26 to 0.35
|
||||||
|
self.services.chat.progressLogUpdate(docOperationId, mappedProgress, message)
|
||||||
|
|
||||||
|
extractionRequest = AiCallRequest(
|
||||||
|
prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt
|
||||||
|
context="",
|
||||||
|
options=extractionOptions,
|
||||||
|
contentParts=processableParts
|
||||||
|
)
|
||||||
|
|
||||||
|
# Write debug file for extraction prompt (all parts)
|
||||||
|
self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
|
||||||
|
|
||||||
|
# Call AI to extract content from contentParts (with progress callback)
|
||||||
|
extractionResponse = await self.services.ai.callAi(extractionRequest, progressCallback=extractionProgressCallback)
|
||||||
|
|
||||||
|
# Update progress - extraction completed
|
||||||
|
self.services.chat.progressLogUpdate(docOperationId, 0.35, f"Completed AI extraction from {len(processableParts)} content parts")
|
||||||
|
|
||||||
|
# Write debug file for extraction response
|
||||||
|
if extractionResponse.content:
|
||||||
|
self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response")
|
||||||
|
else:
|
||||||
|
self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response")
|
||||||
|
logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})")
|
||||||
|
|
||||||
|
# Update contentParts with extracted content (matching original flow)
|
||||||
|
if extractionResponse.errorCount == 0 and extractionResponse.content:
|
||||||
|
# The extracted content is already merged - update the first processable part with it
|
||||||
|
# This matches the original behavior where extracted text was used for generation
|
||||||
|
if processableParts:
|
||||||
|
# Store extracted content in metadata for use in structure generation
|
||||||
|
processableParts[0].metadata["extractedContent"] = extractionResponse.content
|
||||||
|
logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars)")
|
||||||
|
else:
|
||||||
|
# Extraction failed - log warning but continue
|
||||||
|
logger.warning(f"Content extraction failed, continuing with original contentParts")
|
||||||
|
|
||||||
|
# Schritt 5C: Generiere Struktur
|
||||||
|
structure = await self.services.ai.generateStructure(
|
||||||
|
userPrompt,
|
||||||
|
contentParts or [],
|
||||||
|
outputFormat,
|
||||||
|
docOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
# Schritt 5D: Fülle Struktur
|
||||||
|
# Language will be extracted from services (user intention analysis) in fillStructure
|
||||||
|
filledStructure = await self.services.ai.fillStructure(
|
||||||
|
structure,
|
||||||
|
contentParts or [],
|
||||||
|
userPrompt,
|
||||||
|
docOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
# Schritt 5E: Rendere Resultat
|
||||||
|
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
|
||||||
|
renderedDocuments = await self.services.ai.renderResult(
|
||||||
|
filledStructure,
|
||||||
|
outputFormat,
|
||||||
|
title or "Generated Document",
|
||||||
|
userPrompt,
|
||||||
|
docOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
|
||||||
|
documentDataList = []
|
||||||
|
for renderedDoc in renderedDocuments:
|
||||||
|
try:
|
||||||
|
# Erstelle DocumentData für jedes gerenderte Dokument
|
||||||
|
docDataObj = DocumentData(
|
||||||
|
documentName=renderedDoc.filename,
|
||||||
|
documentData=renderedDoc.documentData,
|
||||||
|
mimeType=renderedDoc.mimeType,
|
||||||
|
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
|
||||||
|
)
|
||||||
|
documentDataList.append(docDataObj)
|
||||||
|
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
|
||||||
|
|
||||||
|
if not documentDataList:
|
||||||
|
raise ValueError("No documents were rendered")
|
||||||
|
|
||||||
|
metadata = AiResponseMetadata(
|
||||||
|
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
|
||||||
|
operationType=OperationTypeEnum.DATA_GENERATE.value
|
||||||
|
)
|
||||||
|
|
||||||
|
# Debug-Log (harmonisiert)
|
||||||
|
self.services.utils.writeDebugFile(
|
||||||
|
json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
|
||||||
|
"document_generation_response"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.services.chat.progressLogFinish(docOperationId, True)
|
||||||
|
|
||||||
|
return AiResponse(
|
||||||
|
content=json.dumps(filledStructure),
|
||||||
|
metadata=metadata,
|
||||||
|
documents=documentDataList
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in document generation: {str(e)}")
|
||||||
|
self.services.chat.progressLogFinish(docOperationId, False)
|
||||||
|
raise
|
||||||
|
|
||||||
132
modules/services/serviceGeneration/paths/imagePath.py
Normal file
132
modules/services/serviceGeneration/paths/imagePath.py
Normal file
|
|
@ -0,0 +1,132 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
"""
|
||||||
|
Image Generation Path
|
||||||
|
|
||||||
|
Handles image generation with support for single and batch generation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import List, Optional
|
||||||
|
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallRequest
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ImageGenerationPath:
|
||||||
|
"""Image generation path."""
|
||||||
|
|
||||||
|
def __init__(self, services):
|
||||||
|
self.services = services
|
||||||
|
|
||||||
|
async def generateImages(
|
||||||
|
self,
|
||||||
|
userPrompt: str,
|
||||||
|
count: int = 1,
|
||||||
|
style: Optional[str] = None,
|
||||||
|
format: str = "png",
|
||||||
|
title: Optional[str] = None,
|
||||||
|
parentOperationId: Optional[str] = None
|
||||||
|
) -> AiResponse:
|
||||||
|
"""
|
||||||
|
Generate image files.
|
||||||
|
|
||||||
|
Returns: AiResponse with image files as documents
|
||||||
|
"""
|
||||||
|
# Create operation ID
|
||||||
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||||
|
imageOperationId = f"image_gen_{workflowId}_{int(time.time())}"
|
||||||
|
|
||||||
|
# Start progress tracking
|
||||||
|
self.services.chat.progressLogStart(
|
||||||
|
imageOperationId,
|
||||||
|
"Image Generation",
|
||||||
|
"Image Generation",
|
||||||
|
f"Format: {format}",
|
||||||
|
parentOperationId=parentOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.services.chat.progressLogUpdate(imageOperationId, 0.4, "Calling AI for image generation")
|
||||||
|
|
||||||
|
# Build prompt with style if provided
|
||||||
|
imagePrompt = userPrompt
|
||||||
|
if style:
|
||||||
|
imagePrompt = f"{userPrompt}\n\nStyle: {style}"
|
||||||
|
|
||||||
|
# Use IMAGE_GENERATE operation
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.IMAGE_GENERATE,
|
||||||
|
resultFormat=format
|
||||||
|
)
|
||||||
|
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=imagePrompt,
|
||||||
|
context="",
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await self.services.ai.callAi(request)
|
||||||
|
|
||||||
|
if not response.content:
|
||||||
|
errorMsg = f"No image data returned: {response.content}"
|
||||||
|
logger.error(f"Error in AI image generation: {errorMsg}")
|
||||||
|
self.services.chat.progressLogFinish(imageOperationId, False)
|
||||||
|
raise ValueError(errorMsg)
|
||||||
|
|
||||||
|
# Handle response content (could be base64 string or bytes)
|
||||||
|
imageData = response.content
|
||||||
|
if isinstance(imageData, str):
|
||||||
|
# Assume base64 encoded string
|
||||||
|
import base64
|
||||||
|
try:
|
||||||
|
imageData = base64.b64decode(imageData)
|
||||||
|
except Exception:
|
||||||
|
# If not base64, try encoding as bytes
|
||||||
|
imageData = imageData.encode('utf-8')
|
||||||
|
elif not isinstance(imageData, bytes):
|
||||||
|
imageData = bytes(imageData)
|
||||||
|
|
||||||
|
# Create document
|
||||||
|
imageDoc = DocumentData(
|
||||||
|
documentName=f"generated_image.{format}",
|
||||||
|
documentData=imageData,
|
||||||
|
mimeType=f"image/{format}"
|
||||||
|
)
|
||||||
|
|
||||||
|
metadata = AiResponseMetadata(
|
||||||
|
title=title or "Generated Image",
|
||||||
|
operationType=OperationTypeEnum.IMAGE_GENERATE.value
|
||||||
|
)
|
||||||
|
|
||||||
|
self.services.chat.storeWorkflowStat(
|
||||||
|
self.services.workflow,
|
||||||
|
response,
|
||||||
|
"ai.generate.image"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.services.chat.progressLogUpdate(imageOperationId, 0.9, "Image generated")
|
||||||
|
self.services.chat.progressLogFinish(imageOperationId, True)
|
||||||
|
|
||||||
|
# Create content string describing the image generation
|
||||||
|
import json
|
||||||
|
contentJson = json.dumps({
|
||||||
|
"type": "image",
|
||||||
|
"format": format,
|
||||||
|
"prompt": userPrompt,
|
||||||
|
"filename": imageDoc.documentName
|
||||||
|
}, ensure_ascii=False)
|
||||||
|
|
||||||
|
return AiResponse(
|
||||||
|
content=contentJson, # JSON string describing the image generation
|
||||||
|
metadata=metadata,
|
||||||
|
documents=[imageDoc]
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in image generation: {str(e)}")
|
||||||
|
self.services.chat.progressLogFinish(imageOperationId, False)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
@ -1,742 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
AI processing method module.
|
|
||||||
Handles direct AI calls for any type of task.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import time
|
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, List, Optional
|
|
||||||
from datetime import datetime, UTC
|
|
||||||
|
|
||||||
from modules.workflows.methods.methodBase import MethodBase, action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
|
||||||
from modules.datamodels.datamodelWorkflow import ExtractContentParameters
|
|
||||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class MethodAi(MethodBase):
|
|
||||||
"""AI processing methods."""
|
|
||||||
|
|
||||||
def __init__(self, services):
|
|
||||||
super().__init__(services)
|
|
||||||
self.name = "ai"
|
|
||||||
self.description = "AI processing methods"
|
|
||||||
|
|
||||||
def _format_timestamp_for_filename(self) -> str:
|
|
||||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
|
||||||
return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
|
|
||||||
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
|
|
||||||
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
|
|
||||||
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
|
|
||||||
- documentList (list, optional): Document reference(s) in any format to use as input/context.
|
|
||||||
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Init progress logger
|
|
||||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
||||||
operationId = f"ai_process_{workflowId}_{int(time.time())}"
|
|
||||||
|
|
||||||
# Start progress tracking
|
|
||||||
parentOperationId = parameters.get('parentOperationId')
|
|
||||||
self.services.chat.progressLogStart(
|
|
||||||
operationId,
|
|
||||||
"Generate",
|
|
||||||
"AI Processing",
|
|
||||||
f"Format: {parameters.get('resultType', 'txt')}",
|
|
||||||
parentOperationId=parentOperationId
|
|
||||||
)
|
|
||||||
|
|
||||||
aiPrompt = parameters.get("aiPrompt")
|
|
||||||
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
|
|
||||||
|
|
||||||
# Update progress - preparing parameters
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
||||||
|
|
||||||
documentListParam = parameters.get("documentList")
|
|
||||||
# Convert to DocumentReferenceList if needed
|
|
||||||
if documentListParam is None:
|
|
||||||
documentList = DocumentReferenceList(references=[])
|
|
||||||
elif isinstance(documentListParam, DocumentReferenceList):
|
|
||||||
documentList = documentListParam
|
|
||||||
elif isinstance(documentListParam, str):
|
|
||||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
|
||||||
elif isinstance(documentListParam, list):
|
|
||||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
|
||||||
else:
|
|
||||||
logger.error(f"Invalid documentList type: {type(documentListParam)}")
|
|
||||||
documentList = DocumentReferenceList(references=[])
|
|
||||||
|
|
||||||
resultType = parameters.get("resultType", "txt")
|
|
||||||
|
|
||||||
|
|
||||||
if not aiPrompt:
|
|
||||||
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error="AI prompt is required"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Determine output extension and default MIME type without duplicating service logic
|
|
||||||
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
|
|
||||||
output_extension = f".{normalized_result_type}"
|
|
||||||
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
|
||||||
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
|
||||||
|
|
||||||
# Phase 7.3: Extract content first if documents provided, then use contentParts
|
|
||||||
# Check if contentParts are already provided (preferred path)
|
|
||||||
contentParts: Optional[List[ContentPart]] = None
|
|
||||||
if "contentParts" in parameters:
|
|
||||||
contentParts = parameters.get("contentParts")
|
|
||||||
if contentParts and not isinstance(contentParts, list):
|
|
||||||
# Try to extract from ContentExtracted if it's an ActionDocument
|
|
||||||
if hasattr(contentParts, 'parts'):
|
|
||||||
contentParts = contentParts.parts
|
|
||||||
else:
|
|
||||||
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
|
||||||
contentParts = None
|
|
||||||
|
|
||||||
# If contentParts not provided but documentList is, extract content first
|
|
||||||
if not contentParts and documentList.references:
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
|
|
||||||
|
|
||||||
# Get ChatDocuments
|
|
||||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
|
||||||
if not chatDocuments:
|
|
||||||
logger.warning("No documents found in documentList")
|
|
||||||
else:
|
|
||||||
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
|
||||||
|
|
||||||
# Prepare extraction options (use defaults if not provided)
|
|
||||||
extractionOptions = parameters.get("extractionOptions")
|
|
||||||
if not extractionOptions:
|
|
||||||
extractionOptions = ExtractionOptions(
|
|
||||||
prompt="Extract all content from the document",
|
|
||||||
mergeStrategy=MergeStrategy(
|
|
||||||
mergeType="concatenate",
|
|
||||||
groupBy="typeGroup",
|
|
||||||
orderBy="id"
|
|
||||||
),
|
|
||||||
processDocumentsIndividually=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract content using extraction service with hierarchical progress logging
|
|
||||||
# Pass operationId for per-document progress tracking
|
|
||||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
|
|
||||||
|
|
||||||
# Combine all ContentParts from all extracted results
|
|
||||||
contentParts = []
|
|
||||||
for extracted in extractedResults:
|
|
||||||
if extracted.parts:
|
|
||||||
contentParts.extend(extracted.parts)
|
|
||||||
|
|
||||||
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
|
|
||||||
|
|
||||||
# Update progress - preparing AI call
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
|
||||||
|
|
||||||
# Build options with only resultFormat - let service layer handle all other parameters
|
|
||||||
output_format = output_extension.replace('.', '') or 'txt'
|
|
||||||
options = AiCallOptions(
|
|
||||||
resultFormat=output_format
|
|
||||||
# Removed all model parameters - service layer will analyze prompt and determine optimal parameters
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update progress - calling AI
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
|
||||||
|
|
||||||
# Use unified callAiContent method with contentParts (extraction is now separate)
|
|
||||||
aiResponse = await self.services.ai.callAiContent(
|
|
||||||
prompt=aiPrompt,
|
|
||||||
options=options,
|
|
||||||
contentParts=contentParts, # Already extracted (or None if no documents)
|
|
||||||
outputFormat=output_format,
|
|
||||||
parentOperationId=operationId
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update progress - processing result
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelChat import ActionDocument
|
|
||||||
|
|
||||||
# Extract documents from AiResponse
|
|
||||||
if aiResponse.documents and len(aiResponse.documents) > 0:
|
|
||||||
action_documents = []
|
|
||||||
for doc in aiResponse.documents:
|
|
||||||
validationMetadata = {
|
|
||||||
"actionType": "ai.process",
|
|
||||||
"resultType": normalized_result_type,
|
|
||||||
"outputFormat": output_format,
|
|
||||||
"hasDocuments": True,
|
|
||||||
"documentCount": len(aiResponse.documents)
|
|
||||||
}
|
|
||||||
action_documents.append(ActionDocument(
|
|
||||||
documentName=doc.documentName,
|
|
||||||
documentData=doc.documentData,
|
|
||||||
mimeType=doc.mimeType or output_mime_type,
|
|
||||||
sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
|
|
||||||
validationMetadata=validationMetadata
|
|
||||||
))
|
|
||||||
|
|
||||||
final_documents = action_documents
|
|
||||||
else:
|
|
||||||
# Text response - create document from content
|
|
||||||
extension = output_extension.lstrip('.')
|
|
||||||
meaningful_name = self._generateMeaningfulFileName(
|
|
||||||
base_name="ai",
|
|
||||||
extension=extension,
|
|
||||||
action_name="result"
|
|
||||||
)
|
|
||||||
validationMetadata = {
|
|
||||||
"actionType": "ai.process",
|
|
||||||
"resultType": normalized_result_type,
|
|
||||||
"outputFormat": output_format,
|
|
||||||
"hasDocuments": False,
|
|
||||||
"contentType": "text"
|
|
||||||
}
|
|
||||||
action_document = ActionDocument(
|
|
||||||
documentName=meaningful_name,
|
|
||||||
documentData=aiResponse.content,
|
|
||||||
mimeType=output_mime_type,
|
|
||||||
validationMetadata=validationMetadata
|
|
||||||
)
|
|
||||||
final_documents = [action_document]
|
|
||||||
|
|
||||||
# Complete progress tracking
|
|
||||||
self.services.chat.progressLogFinish(operationId, True)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=final_documents)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in AI processing: {str(e)}")
|
|
||||||
|
|
||||||
# Complete progress tracking with failure
|
|
||||||
try:
|
|
||||||
self.services.chat.progressLogFinish(operationId, False)
|
|
||||||
except:
|
|
||||||
pass # Don't fail on progress logging errors
|
|
||||||
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error=str(e)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Web research with two-step process: search for URLs, then crawl content.
|
|
||||||
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
|
|
||||||
- Output format: JSON with research results including URLs and content.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- prompt (str, required): Natural language research instruction.
|
|
||||||
- urlList (list, optional): Specific URLs to crawl, if needed.
|
|
||||||
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
|
|
||||||
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
|
|
||||||
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
prompt = parameters.get("prompt")
|
|
||||||
if not prompt:
|
|
||||||
return ActionResult.isFailure(error="Research prompt is required")
|
|
||||||
|
|
||||||
# Init progress logger
|
|
||||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
||||||
operationId = f"web_research_{workflowId}_{int(time.time())}"
|
|
||||||
|
|
||||||
# Start progress tracking
|
|
||||||
parentOperationId = parameters.get('parentOperationId')
|
|
||||||
self.services.chat.progressLogStart(
|
|
||||||
operationId,
|
|
||||||
"Web Research",
|
|
||||||
"Searching and Crawling",
|
|
||||||
"Extracting URLs and Content",
|
|
||||||
parentOperationId=parentOperationId
|
|
||||||
)
|
|
||||||
|
|
||||||
# Call webcrawl service - service handles all AI intention analysis and processing
|
|
||||||
result = await self.services.web.performWebResearch(
|
|
||||||
prompt=prompt,
|
|
||||||
urls=parameters.get("urlList", []),
|
|
||||||
country=parameters.get("country"),
|
|
||||||
language=parameters.get("language"),
|
|
||||||
researchDepth=parameters.get("researchDepth", "general"),
|
|
||||||
operationId=operationId
|
|
||||||
)
|
|
||||||
|
|
||||||
# Complete progress tracking
|
|
||||||
self.services.chat.progressLogFinish(operationId, True)
|
|
||||||
|
|
||||||
# Get meaningful filename from research result (generated by intent analyzer)
|
|
||||||
suggestedFilename = result.get("suggested_filename")
|
|
||||||
if suggestedFilename:
|
|
||||||
# Clean and validate filename
|
|
||||||
import re
|
|
||||||
cleaned = suggestedFilename.strip().strip('"\'')
|
|
||||||
cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
|
|
||||||
# Ensure it doesn't already have extension
|
|
||||||
if cleaned.lower().endswith('.json'):
|
|
||||||
cleaned = cleaned[:-5]
|
|
||||||
# Validate: should be reasonable length and contain only safe characters
|
|
||||||
if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
|
|
||||||
meaningfulName = f"{cleaned}.json"
|
|
||||||
else:
|
|
||||||
# Fallback to generic meaningful filename
|
|
||||||
meaningfulName = self._generateMeaningfulFileName(
|
|
||||||
base_name="web_research",
|
|
||||||
extension="json",
|
|
||||||
action_name="research"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Fallback to generic meaningful filename
|
|
||||||
meaningfulName = self._generateMeaningfulFileName(
|
|
||||||
base_name="web_research",
|
|
||||||
extension="json",
|
|
||||||
action_name="research"
|
|
||||||
)
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelChat import ActionDocument
|
|
||||||
validationMetadata = {
|
|
||||||
"actionType": "ai.webResearch",
|
|
||||||
"prompt": prompt,
|
|
||||||
"urlList": parameters.get("urlList", []),
|
|
||||||
"country": parameters.get("country"),
|
|
||||||
"language": parameters.get("language"),
|
|
||||||
"researchDepth": parameters.get("researchDepth", "general"),
|
|
||||||
"resultFormat": "json"
|
|
||||||
}
|
|
||||||
actionDocument = ActionDocument(
|
|
||||||
documentName=meaningfulName,
|
|
||||||
documentData=result,
|
|
||||||
mimeType="application/json",
|
|
||||||
validationMetadata=validationMetadata
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=[actionDocument])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in web research: {str(e)}")
|
|
||||||
try:
|
|
||||||
self.services.chat.progressLogFinish(operationId, False)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return ActionResult.isFailure(error=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# Document Transformation Wrappers
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Summarize one or more documents, extracting key points and main ideas.
|
|
||||||
- Input requirements: documentList (required); optional summaryLength, focus.
|
|
||||||
- Output format: Text document with summary (default: txt, can be overridden with resultType).
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to summarize.
|
|
||||||
- summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
|
|
||||||
- focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
|
|
||||||
- resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
|
|
||||||
"""
|
|
||||||
documentList = parameters.get("documentList", [])
|
|
||||||
if not documentList:
|
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
|
||||||
|
|
||||||
summaryLength = parameters.get("summaryLength", "medium")
|
|
||||||
focus = parameters.get("focus")
|
|
||||||
resultType = parameters.get("resultType", "txt")
|
|
||||||
|
|
||||||
lengthInstructions = {
|
|
||||||
"brief": "Create a brief summary (2-3 paragraphs)",
|
|
||||||
"medium": "Create a medium-length summary (comprehensive but concise)",
|
|
||||||
"detailed": "Create a detailed summary covering all major points"
|
|
||||||
}
|
|
||||||
lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"])
|
|
||||||
|
|
||||||
aiPrompt = f"Summarize the provided document(s). {lengthInstruction}."
|
|
||||||
if focus:
|
|
||||||
aiPrompt += f" Focus specifically on: {focus}."
|
|
||||||
aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
|
|
||||||
|
|
||||||
return await self.process({
|
|
||||||
"aiPrompt": aiPrompt,
|
|
||||||
"documentList": documentList,
|
|
||||||
"resultType": resultType
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Translate documents to a target language while preserving formatting and structure.
|
|
||||||
- Input requirements: documentList (required); targetLanguage (required).
|
|
||||||
- Output format: Translated document in same format as input (default) or specified resultType.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to translate.
|
|
||||||
- targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
|
|
||||||
- sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
|
|
||||||
- preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
|
|
||||||
- resultType (str, optional): Output file extension. If not specified, uses same format as input.
|
|
||||||
"""
|
|
||||||
documentList = parameters.get("documentList", [])
|
|
||||||
if not documentList:
|
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
|
||||||
|
|
||||||
targetLanguage = parameters.get("targetLanguage")
|
|
||||||
if not targetLanguage:
|
|
||||||
return ActionResult.isFailure(error="targetLanguage is required")
|
|
||||||
|
|
||||||
sourceLanguage = parameters.get("sourceLanguage")
|
|
||||||
preserveFormatting = parameters.get("preserveFormatting", True)
|
|
||||||
resultType = parameters.get("resultType")
|
|
||||||
|
|
||||||
aiPrompt = f"Translate the provided document(s) to {targetLanguage}."
|
|
||||||
if sourceLanguage:
|
|
||||||
aiPrompt += f" The source language is {sourceLanguage}."
|
|
||||||
if preserveFormatting:
|
|
||||||
aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document."
|
|
||||||
else:
|
|
||||||
aiPrompt += " Focus on accurate translation of content."
|
|
||||||
aiPrompt += " Maintain the same document structure, headings, and organization."
|
|
||||||
|
|
||||||
processParams = {
|
|
||||||
"aiPrompt": aiPrompt,
|
|
||||||
"documentList": documentList
|
|
||||||
}
|
|
||||||
if resultType:
|
|
||||||
processParams["resultType"] = resultType
|
|
||||||
|
|
||||||
return await self.process(processParams)
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters).
|
|
||||||
- Input requirements: documentList (required); inputFormat and outputFormat (required).
|
|
||||||
- Output format: Document in target format with specified formatting options.
|
|
||||||
- CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to convert.
|
|
||||||
- inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
|
|
||||||
- outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
|
|
||||||
- columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
|
|
||||||
- delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
|
|
||||||
- includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
|
|
||||||
- language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
|
|
||||||
"""
|
|
||||||
documentList = parameters.get("documentList", [])
|
|
||||||
if not documentList:
|
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
|
||||||
|
|
||||||
inputFormat = parameters.get("inputFormat")
|
|
||||||
outputFormat = parameters.get("outputFormat")
|
|
||||||
if not inputFormat or not outputFormat:
|
|
||||||
return ActionResult.isFailure(error="inputFormat and outputFormat are required")
|
|
||||||
|
|
||||||
# Normalize formats (remove leading dot if present)
|
|
||||||
normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
|
|
||||||
normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
|
|
||||||
|
|
||||||
# Get documents
|
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
||||||
if isinstance(documentList, DocumentReferenceList):
|
|
||||||
docRefList = documentList
|
|
||||||
elif isinstance(documentList, list):
|
|
||||||
docRefList = DocumentReferenceList.from_string_list(documentList)
|
|
||||||
else:
|
|
||||||
docRefList = DocumentReferenceList.from_string_list([documentList])
|
|
||||||
|
|
||||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
|
|
||||||
if not chatDocuments:
|
|
||||||
return ActionResult.isFailure(error="No documents found in documentList")
|
|
||||||
|
|
||||||
# Check if input is standardized JSON format - if so, use direct rendering
|
|
||||||
if normalizedInputFormat == "json" and len(chatDocuments) == 1:
|
|
||||||
try:
|
|
||||||
import json
|
|
||||||
doc = chatDocuments[0]
|
|
||||||
# ChatDocument doesn't have documentData - need to load file content using fileId
|
|
||||||
docBytes = self.services.chat.getFileData(doc.fileId)
|
|
||||||
if not docBytes:
|
|
||||||
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
|
||||||
|
|
||||||
# Decode bytes to string
|
|
||||||
docData = docBytes.decode('utf-8')
|
|
||||||
|
|
||||||
# Try to parse as JSON
|
|
||||||
if isinstance(docData, str):
|
|
||||||
jsonData = json.loads(docData)
|
|
||||||
elif isinstance(docData, dict):
|
|
||||||
jsonData = docData
|
|
||||||
else:
|
|
||||||
jsonData = None
|
|
||||||
|
|
||||||
# Check if it's standardized JSON format (has "documents" or "sections")
|
|
||||||
if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
|
|
||||||
# Use direct rendering - no AI call needed!
|
|
||||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
|
||||||
generationService = GenerationService(self.services)
|
|
||||||
|
|
||||||
# Ensure format is "documents" array
|
|
||||||
if "documents" not in jsonData:
|
|
||||||
jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
|
|
||||||
|
|
||||||
# Get title
|
|
||||||
title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
|
|
||||||
|
|
||||||
# Render with options
|
|
||||||
renderOptions = {}
|
|
||||||
if normalizedOutputFormat == "csv":
|
|
||||||
renderOptions["delimiter"] = parameters.get("delimiter", ",")
|
|
||||||
renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
|
|
||||||
renderOptions["includeHeader"] = parameters.get("includeHeader", True)
|
|
||||||
|
|
||||||
rendered_content, mime_type = await generationService.renderReport(
|
|
||||||
jsonData, normalizedOutputFormat, title, None, None
|
|
||||||
)
|
|
||||||
|
|
||||||
# Apply CSV options if needed (renderer will handle them)
|
|
||||||
if normalizedOutputFormat == "csv" and renderOptions:
|
|
||||||
rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
|
|
||||||
|
|
||||||
from modules.datamodels.datamodelChat import ActionDocument
|
|
||||||
validationMetadata = {
|
|
||||||
"actionType": "ai.convert",
|
|
||||||
"inputFormat": normalizedInputFormat,
|
|
||||||
"outputFormat": normalizedOutputFormat,
|
|
||||||
"hasSourceJson": True,
|
|
||||||
"conversionType": "direct_rendering"
|
|
||||||
}
|
|
||||||
actionDoc = ActionDocument(
|
|
||||||
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
|
|
||||||
documentData=rendered_content,
|
|
||||||
mimeType=mime_type,
|
|
||||||
sourceJson=jsonData, # Preserve source JSON for structure validation
|
|
||||||
validationMetadata=validationMetadata
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=[actionDoc])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
|
|
||||||
# Fall through to AI-based conversion
|
|
||||||
|
|
||||||
# Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
|
|
||||||
columnsPerRow = parameters.get("columnsPerRow")
|
|
||||||
delimiter = parameters.get("delimiter", ",")
|
|
||||||
includeHeader = parameters.get("includeHeader", True)
|
|
||||||
language = parameters.get("language", "en")
|
|
||||||
|
|
||||||
aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
|
|
||||||
|
|
||||||
if normalizedOutputFormat == "csv":
|
|
||||||
aiPrompt += f" Use '{delimiter}' as the delimiter character."
|
|
||||||
if columnsPerRow:
|
|
||||||
aiPrompt += f" Format the output with {columnsPerRow} columns per row."
|
|
||||||
if not includeHeader:
|
|
||||||
aiPrompt += " Do not include a header row."
|
|
||||||
else:
|
|
||||||
aiPrompt += " Include a header row with column names."
|
|
||||||
|
|
||||||
if language and language != "en":
|
|
||||||
aiPrompt += f" Use language: {language}."
|
|
||||||
|
|
||||||
aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
|
|
||||||
|
|
||||||
return await self.process({
|
|
||||||
"aiPrompt": aiPrompt,
|
|
||||||
"documentList": documentList,
|
|
||||||
"resultType": normalizedOutputFormat
|
|
||||||
})
|
|
||||||
|
|
||||||
def _applyCsvOptions(self, csvContent: str, options: Dict[str, Any]) -> str:
|
|
||||||
"""Apply CSV formatting options to rendered CSV content."""
|
|
||||||
delimiter = options.get("delimiter", ",")
|
|
||||||
columnsPerRow = options.get("columnsPerRow")
|
|
||||||
includeHeader = options.get("includeHeader", True)
|
|
||||||
|
|
||||||
# Check if any options need to be applied
|
|
||||||
needsProcessing = (delimiter != ",") or (columnsPerRow is not None) or (not includeHeader)
|
|
||||||
|
|
||||||
if not needsProcessing:
|
|
||||||
return csvContent
|
|
||||||
|
|
||||||
import csv
|
|
||||||
import io
|
|
||||||
# Re-read CSV with comma, write with new delimiter
|
|
||||||
reader = csv.reader(io.StringIO(csvContent))
|
|
||||||
output = io.StringIO()
|
|
||||||
writer = csv.writer(output, delimiter=delimiter)
|
|
||||||
|
|
||||||
rows = list(reader)
|
|
||||||
|
|
||||||
# Handle header
|
|
||||||
if not includeHeader and rows:
|
|
||||||
rows = rows[1:] # Skip header
|
|
||||||
|
|
||||||
# Handle columnsPerRow
|
|
||||||
if columnsPerRow:
|
|
||||||
newRows = []
|
|
||||||
for row in rows:
|
|
||||||
# Split row into chunks of columnsPerRow
|
|
||||||
for i in range(0, len(row), columnsPerRow):
|
|
||||||
chunk = row[i:i+columnsPerRow]
|
|
||||||
# Pad to columnsPerRow if needed
|
|
||||||
while len(chunk) < columnsPerRow:
|
|
||||||
chunk.append("")
|
|
||||||
newRows.append(chunk)
|
|
||||||
rows = newRows
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
writer.writerow(row)
|
|
||||||
|
|
||||||
return output.getvalue()
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
|
|
||||||
- Input requirements: documentList (required); targetFormat (required).
|
|
||||||
- Output format: Document in target format.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to convert.
|
|
||||||
- targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
|
|
||||||
- preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
|
|
||||||
"""
|
|
||||||
documentList = parameters.get("documentList", [])
|
|
||||||
if not documentList:
|
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
|
||||||
|
|
||||||
targetFormat = parameters.get("targetFormat")
|
|
||||||
if not targetFormat:
|
|
||||||
return ActionResult.isFailure(error="targetFormat is required")
|
|
||||||
|
|
||||||
preserveStructure = parameters.get("preserveStructure", True)
|
|
||||||
|
|
||||||
# Normalize format (remove leading dot if present)
|
|
||||||
normalizedFormat = targetFormat.strip().lstrip('.').lower()
|
|
||||||
|
|
||||||
aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format."
|
|
||||||
if preserveStructure:
|
|
||||||
aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
|
|
||||||
aiPrompt += " Ensure the converted document maintains the same content and information as the original."
|
|
||||||
|
|
||||||
return await self.process({
|
|
||||||
"aiPrompt": aiPrompt,
|
|
||||||
"documentList": documentList,
|
|
||||||
"resultType": normalizedFormat
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
|
|
||||||
- Input requirements: documentList (required); optional dataStructure, fields.
|
|
||||||
- Output format: JSON by default, or specified resultType.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to extract data from.
|
|
||||||
- dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
|
|
||||||
- fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
|
|
||||||
- resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
|
|
||||||
"""
|
|
||||||
documentList = parameters.get("documentList", [])
|
|
||||||
if not documentList:
|
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
|
||||||
|
|
||||||
dataStructure = parameters.get("dataStructure", "nested")
|
|
||||||
fields = parameters.get("fields", [])
|
|
||||||
resultType = parameters.get("resultType", "json")
|
|
||||||
|
|
||||||
aiPrompt = "Extract structured data from the provided document(s)."
|
|
||||||
if fields:
|
|
||||||
fieldsStr = ", ".join(fields)
|
|
||||||
aiPrompt += f" Extract the following specific fields: {fieldsStr}."
|
|
||||||
else:
|
|
||||||
aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
|
|
||||||
|
|
||||||
structureInstructions = {
|
|
||||||
"flat": "Use a flat key-value structure with simple properties.",
|
|
||||||
"nested": "Use a nested JSON structure with logical grouping of related data.",
|
|
||||||
"list": "Structure the data as a list/array of objects, one per entity or record."
|
|
||||||
}
|
|
||||||
aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
|
|
||||||
|
|
||||||
aiPrompt += " Ensure all extracted data is accurate and complete."
|
|
||||||
|
|
||||||
return await self.process({
|
|
||||||
"aiPrompt": aiPrompt,
|
|
||||||
"documentList": documentList,
|
|
||||||
"resultType": resultType
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# Content Generation Wrapper
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Generate documents from scratch or based on templates/inputs.
|
|
||||||
- Input requirements: prompt or description (required); optional documentList (for templates/references).
|
|
||||||
- Output format: Document in specified format (default: docx).
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- prompt (str, required): Description of the document to generate.
|
|
||||||
- documentList (list, optional): Template documents or reference documents to use as a guide.
|
|
||||||
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
|
|
||||||
- resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
|
|
||||||
"""
|
|
||||||
prompt = parameters.get("prompt")
|
|
||||||
if not prompt:
|
|
||||||
return ActionResult.isFailure(error="prompt is required")
|
|
||||||
|
|
||||||
documentList = parameters.get("documentList", [])
|
|
||||||
documentType = parameters.get("documentType")
|
|
||||||
resultType = parameters.get("resultType", "docx")
|
|
||||||
|
|
||||||
aiPrompt = f"Generate a document based on the following requirements: {prompt}"
|
|
||||||
if documentType:
|
|
||||||
aiPrompt += f" Document type: {documentType}."
|
|
||||||
if documentList:
|
|
||||||
aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
|
|
||||||
aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
|
|
||||||
|
|
||||||
processParams = {
|
|
||||||
"aiPrompt": aiPrompt,
|
|
||||||
"resultType": resultType
|
|
||||||
}
|
|
||||||
if documentList:
|
|
||||||
processParams["documentList"] = documentList
|
|
||||||
|
|
||||||
return await self.process(processParams)
|
|
||||||
|
|
@ -10,6 +10,7 @@ from .summarizeDocument import summarizeDocument
|
||||||
from .translateDocument import translateDocument
|
from .translateDocument import translateDocument
|
||||||
from .convertDocument import convertDocument
|
from .convertDocument import convertDocument
|
||||||
from .generateDocument import generateDocument
|
from .generateDocument import generateDocument
|
||||||
|
from .generateCode import generateCode
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'process',
|
'process',
|
||||||
|
|
@ -18,5 +19,6 @@ __all__ = [
|
||||||
'translateDocument',
|
'translateDocument',
|
||||||
'convertDocument',
|
'convertDocument',
|
||||||
'generateDocument',
|
'generateDocument',
|
||||||
|
'generateCode',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,31 +1,13 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Convert Document action for AI operations.
|
|
||||||
Converts documents between different formats (PDF→Word, Excel→CSV, etc.).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult
|
from modules.datamodels.datamodelChat import ActionResult
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
|
|
||||||
- Input requirements: documentList (required); targetFormat (required).
|
|
||||||
- Output format: Document in target format.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to convert.
|
|
||||||
- targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
|
|
||||||
- preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
|
|
||||||
"""
|
|
||||||
documentList = parameters.get("documentList", [])
|
documentList = parameters.get("documentList", [])
|
||||||
if not documentList:
|
if not documentList:
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
return ActionResult.isFailure(error="documentList is required")
|
||||||
|
|
|
||||||
135
modules/workflows/methods/methodAi/actions/generateCode.py
Normal file
135
modules/workflows/methods/methodAi/actions/generateCode.py
Normal file
|
|
@ -0,0 +1,135 @@
|
||||||
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
|
# All rights reserved.
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Dict, Any, Optional, List
|
||||||
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||||
|
from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
prompt = parameters.get("prompt")
|
||||||
|
if not prompt:
|
||||||
|
return ActionResult.isFailure(error="prompt is required")
|
||||||
|
|
||||||
|
documentList = parameters.get("documentList", [])
|
||||||
|
resultType = parameters.get("resultType")
|
||||||
|
|
||||||
|
# Auto-detect format from prompt if not provided
|
||||||
|
if not resultType:
|
||||||
|
promptLower = prompt.lower()
|
||||||
|
if ".html" in promptLower or "html file" in promptLower:
|
||||||
|
resultType = "html"
|
||||||
|
elif ".js" in promptLower or "javascript" in promptLower:
|
||||||
|
resultType = "js"
|
||||||
|
elif ".py" in promptLower or "python" in promptLower:
|
||||||
|
resultType = "py"
|
||||||
|
elif ".ts" in promptLower or "typescript" in promptLower:
|
||||||
|
resultType = "ts"
|
||||||
|
elif ".java" in promptLower:
|
||||||
|
resultType = "java"
|
||||||
|
elif ".cpp" in promptLower or ".c++" in promptLower:
|
||||||
|
resultType = "cpp"
|
||||||
|
else:
|
||||||
|
resultType = "txt" # Default
|
||||||
|
|
||||||
|
# Create operation ID for progress tracking
|
||||||
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||||
|
operationId = f"code_gen_{workflowId}_{int(time.time())}"
|
||||||
|
parentOperationId = parameters.get('parentOperationId')
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Convert documentList to DocumentReferenceList if needed
|
||||||
|
docRefList = None
|
||||||
|
if documentList:
|
||||||
|
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||||
|
|
||||||
|
if isinstance(documentList, DocumentReferenceList):
|
||||||
|
docRefList = documentList
|
||||||
|
elif isinstance(documentList, str):
|
||||||
|
docRefList = DocumentReferenceList.from_string_list([documentList])
|
||||||
|
elif isinstance(documentList, list):
|
||||||
|
docRefList = DocumentReferenceList.from_string_list(documentList)
|
||||||
|
else:
|
||||||
|
docRefList = DocumentReferenceList(references=[])
|
||||||
|
|
||||||
|
# Prepare title
|
||||||
|
title = "Generated Code"
|
||||||
|
|
||||||
|
# Call AI service with explicit code intent
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||||
|
priority=PriorityEnum.BALANCED,
|
||||||
|
processingMode=ProcessingModeEnum.DETAILED
|
||||||
|
)
|
||||||
|
|
||||||
|
aiResponse: AiResponse = await self.services.ai.callAiContent(
|
||||||
|
prompt=prompt,
|
||||||
|
options=options,
|
||||||
|
documentList=docRefList,
|
||||||
|
outputFormat=resultType,
|
||||||
|
title=title,
|
||||||
|
parentOperationId=parentOperationId,
|
||||||
|
generationIntent="code" # Explicit intent, skips detection
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert AiResponse to ActionResult
|
||||||
|
documents = []
|
||||||
|
|
||||||
|
# Convert DocumentData to ActionDocument
|
||||||
|
if aiResponse.documents:
|
||||||
|
for docData in aiResponse.documents:
|
||||||
|
documents.append(ActionDocument(
|
||||||
|
documentName=docData.documentName,
|
||||||
|
documentData=docData.documentData,
|
||||||
|
mimeType=docData.mimeType,
|
||||||
|
sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None
|
||||||
|
))
|
||||||
|
|
||||||
|
# If no documents but content exists, create a document from content
|
||||||
|
if not documents and aiResponse.content:
|
||||||
|
# Determine document name from metadata
|
||||||
|
docName = f"code.{resultType}"
|
||||||
|
if aiResponse.metadata and aiResponse.metadata.filename:
|
||||||
|
docName = aiResponse.metadata.filename
|
||||||
|
elif aiResponse.metadata and aiResponse.metadata.title:
|
||||||
|
import re
|
||||||
|
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
|
||||||
|
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||||
|
if sanitized:
|
||||||
|
if not sanitized.lower().endswith(f".{resultType}"):
|
||||||
|
docName = f"{sanitized}.{resultType}"
|
||||||
|
else:
|
||||||
|
docName = sanitized
|
||||||
|
|
||||||
|
# Determine mime type
|
||||||
|
mimeType = "text/plain"
|
||||||
|
if resultType == "html":
|
||||||
|
mimeType = "text/html"
|
||||||
|
elif resultType == "js":
|
||||||
|
mimeType = "application/javascript"
|
||||||
|
elif resultType == "py":
|
||||||
|
mimeType = "text/x-python"
|
||||||
|
elif resultType == "ts":
|
||||||
|
mimeType = "application/typescript"
|
||||||
|
elif resultType == "java":
|
||||||
|
mimeType = "text/x-java-source"
|
||||||
|
elif resultType == "cpp":
|
||||||
|
mimeType = "text/x-c++src"
|
||||||
|
|
||||||
|
documents.append(ActionDocument(
|
||||||
|
documentName=docName,
|
||||||
|
documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
|
||||||
|
mimeType=mimeType
|
||||||
|
))
|
||||||
|
|
||||||
|
return ActionResult.isSuccess(documents=documents)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in code generation: {str(e)}")
|
||||||
|
return ActionResult.isFailure(error=str(e))
|
||||||
|
|
||||||
|
|
@ -1,15 +1,9 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Generate Document action for AI operations.
|
|
||||||
Wrapper around AI service callAiContent method.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any, Optional, List
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||||
|
|
@ -17,23 +11,7 @@ from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach.
|
|
||||||
- Input requirements: prompt or description (required); optional documentList (for templates/references).
|
|
||||||
- Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt).
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- prompt (str, required): Description of the document to generate.
|
|
||||||
- documentList (list, optional): Template documents or reference documents to use as a guide.
|
|
||||||
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
|
|
||||||
- resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt.
|
|
||||||
- maxSectionLength (int, optional): Maximum words for simple sections. Default: 500.
|
|
||||||
- parallelGeneration (bool, optional): Enable parallel section generation. Default: True.
|
|
||||||
- progressLogging (bool, optional): Send ChatLog progress updates. Default: True.
|
|
||||||
"""
|
|
||||||
prompt = parameters.get("prompt")
|
prompt = parameters.get("prompt")
|
||||||
if not prompt:
|
if not prompt:
|
||||||
return ActionResult.isFailure(error="prompt is required")
|
return ActionResult.isFailure(error="prompt is required")
|
||||||
|
|
@ -97,7 +75,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
|
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
|
||||||
outputFormat=resultType,
|
outputFormat=resultType,
|
||||||
title=title,
|
title=title,
|
||||||
parentOperationId=parentOperationId
|
parentOperationId=parentOperationId,
|
||||||
|
generationIntent="document" # NEW: Explicit intent, skips detection
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert AiResponse to ActionResult
|
# Convert AiResponse to ActionResult
|
||||||
|
|
|
||||||
|
|
@ -1,36 +1,17 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Process action for AI operations.
|
|
||||||
Universal AI document processing action.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Any, List, Optional
|
from typing import Dict, Any, List, Optional
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions
|
from modules.datamodels.datamodelAi import AiCallOptions
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
|
|
||||||
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
|
|
||||||
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
|
|
||||||
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
|
|
||||||
- documentList (list, optional): Document reference(s) in any format to use as input/context.
|
|
||||||
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||||
|
|
@ -88,7 +69,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
||||||
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
||||||
|
|
||||||
# Check if contentParts are already provided (from context.extractContent or other sources)
|
# Phase 7.3: Extract content first if documents provided, then use contentParts
|
||||||
|
# Check if contentParts are already provided (preferred path)
|
||||||
contentParts: Optional[List[ContentPart]] = None
|
contentParts: Optional[List[ContentPart]] = None
|
||||||
if "contentParts" in parameters:
|
if "contentParts" in parameters:
|
||||||
contentParts = parameters.get("contentParts")
|
contentParts = parameters.get("contentParts")
|
||||||
|
|
@ -100,42 +82,96 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
||||||
contentParts = None
|
contentParts = None
|
||||||
|
|
||||||
|
# If contentParts not provided but documentList is, extract content first
|
||||||
|
if not contentParts and documentList.references:
|
||||||
|
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
|
||||||
|
|
||||||
|
# Get ChatDocuments
|
||||||
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||||
|
if not chatDocuments:
|
||||||
|
logger.warning("No documents found in documentList")
|
||||||
|
else:
|
||||||
|
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
||||||
|
|
||||||
|
# Prepare extraction options (use defaults if not provided)
|
||||||
|
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
||||||
|
extractionOptions = parameters.get("extractionOptions")
|
||||||
|
if not extractionOptions:
|
||||||
|
extractionOptions = ExtractionOptions(
|
||||||
|
prompt="Extract all content from the document",
|
||||||
|
mergeStrategy=MergeStrategy(
|
||||||
|
mergeType="concatenate",
|
||||||
|
groupBy="typeGroup",
|
||||||
|
orderBy="id"
|
||||||
|
),
|
||||||
|
processDocumentsIndividually=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract content using extraction service
|
||||||
|
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
|
||||||
|
|
||||||
|
# Combine all ContentParts from all extracted results
|
||||||
|
contentParts = []
|
||||||
|
for extracted in extractedResults:
|
||||||
|
if extracted.parts:
|
||||||
|
contentParts.extend(extracted.parts)
|
||||||
|
|
||||||
|
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
|
||||||
|
|
||||||
# Update progress - preparing AI call
|
# Update progress - preparing AI call
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
||||||
|
|
||||||
# Build options
|
# Detect image generation from resultType
|
||||||
|
imageFormats = ["png", "jpg", "jpeg", "gif", "webp"]
|
||||||
|
isImageGeneration = normalized_result_type in imageFormats
|
||||||
|
|
||||||
|
# Build options with correct operationType
|
||||||
output_format = output_extension.replace('.', '') or 'txt'
|
output_format = output_extension.replace('.', '') or 'txt'
|
||||||
|
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||||||
options = AiCallOptions(
|
options = AiCallOptions(
|
||||||
resultFormat=output_format
|
resultFormat=output_format,
|
||||||
|
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Get generationIntent from parameters
|
||||||
|
generationIntent = parameters.get("generationIntent")
|
||||||
|
|
||||||
|
# For DATA_GENERATE, generationIntent is REQUIRED
|
||||||
|
# If not provided, default to "document" for document formats (xlsx, docx, pdf, txt, html, etc.)
|
||||||
|
# This is format-based defaulting, not prompt-based auto-detection
|
||||||
|
if options.operationType == OperationTypeEnum.DATA_GENERATE and not generationIntent:
|
||||||
|
# Document formats (default to document generation)
|
||||||
|
documentFormats = ["xlsx", "docx", "pdf", "txt", "md", "html", "csv", "xml", "json", "pptx"]
|
||||||
|
# Code formats (should use ai.generateCode instead, but default to code if ai.process is used)
|
||||||
|
codeFormats = ["py", "js", "ts", "java", "cpp", "c", "go", "rs", "rb", "php", "swift", "kt"]
|
||||||
|
|
||||||
|
if normalized_result_type in documentFormats:
|
||||||
|
generationIntent = "document"
|
||||||
|
logger.info(f"Defaulting generationIntent to 'document' for resultType '{normalized_result_type}'")
|
||||||
|
elif normalized_result_type in codeFormats:
|
||||||
|
generationIntent = "code"
|
||||||
|
logger.info(f"Defaulting generationIntent to 'code' for resultType '{normalized_result_type}'")
|
||||||
|
else:
|
||||||
|
# Unknown format - default to document (most common use case)
|
||||||
|
generationIntent = "document"
|
||||||
|
logger.warning(
|
||||||
|
f"Unknown resultType '{normalized_result_type}', defaulting generationIntent to 'document'. "
|
||||||
|
f"For code generation, use ai.generateCode action or explicitly pass generationIntent='code'."
|
||||||
|
)
|
||||||
|
|
||||||
# Update progress - calling AI
|
# Update progress - calling AI
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
||||||
|
|
||||||
# Use unified callAiContent method
|
# Use unified callAiContent method with contentParts (extraction is now separate)
|
||||||
# If contentParts provided (pre-extracted), use them directly
|
# ContentParts are already extracted above (or None if no documents)
|
||||||
# Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally
|
aiResponse = await self.services.ai.callAiContent(
|
||||||
# Note: ContentExtracted documents (from context.extractContent) are now handled
|
prompt=aiPrompt,
|
||||||
# automatically in _extractAndPrepareContent() (Phase 5B)
|
options=options,
|
||||||
if contentParts:
|
contentParts=contentParts, # Already extracted (or None if no documents)
|
||||||
# Pre-extracted ContentParts - use them directly
|
outputFormat=output_format,
|
||||||
aiResponse = await self.services.ai.callAiContent(
|
parentOperationId=operationId,
|
||||||
prompt=aiPrompt,
|
generationIntent=generationIntent # REQUIRED for DATA_GENERATE
|
||||||
options=options,
|
)
|
||||||
contentParts=contentParts, # Pre-extracted ContentParts
|
|
||||||
outputFormat=output_format,
|
|
||||||
parentOperationId=operationId
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Pass documentList - callAiContent handles Phases 5A-5E internally
|
|
||||||
# This includes automatic detection of ContentExtracted documents
|
|
||||||
aiResponse = await self.services.ai.callAiContent(
|
|
||||||
prompt=aiPrompt,
|
|
||||||
options=options,
|
|
||||||
documentList=documentList, # callAiContent macht Phasen 5A-5E
|
|
||||||
outputFormat=output_format,
|
|
||||||
parentOperationId=operationId
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update progress - processing result
|
# Update progress - processing result
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
|
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
|
||||||
|
|
|
||||||
|
|
@ -1,32 +1,13 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Summarize Document action for AI operations.
|
|
||||||
Summarizes one or more documents, extracting key points and main ideas.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult
|
from modules.datamodels.datamodelChat import ActionResult
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Summarize one or more documents, extracting key points and main ideas.
|
|
||||||
- Input requirements: documentList (required); optional summaryLength, focus.
|
|
||||||
- Output format: Text document with summary (default: txt, can be overridden with resultType).
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to summarize.
|
|
||||||
- summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
|
|
||||||
- focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
|
|
||||||
- resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
|
|
||||||
"""
|
|
||||||
documentList = parameters.get("documentList", [])
|
documentList = parameters.get("documentList", [])
|
||||||
if not documentList:
|
if not documentList:
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
return ActionResult.isFailure(error="documentList is required")
|
||||||
|
|
@ -50,6 +31,7 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
return await self.process({
|
return await self.process({
|
||||||
"aiPrompt": aiPrompt,
|
"aiPrompt": aiPrompt,
|
||||||
"documentList": documentList,
|
"documentList": documentList,
|
||||||
"resultType": resultType
|
"resultType": resultType,
|
||||||
|
"generationIntent": "document" # NEW: Explicit intent
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,33 +1,13 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Translate Document action for AI operations.
|
|
||||||
Translates documents to a target language while preserving formatting and structure.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult
|
from modules.datamodels.datamodelChat import ActionResult
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Translate documents to a target language while preserving formatting and structure.
|
|
||||||
- Input requirements: documentList (required); targetLanguage (required).
|
|
||||||
- Output format: Translated document in same format as input (default) or specified resultType.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to translate.
|
|
||||||
- targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
|
|
||||||
- sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
|
|
||||||
- preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
|
|
||||||
- resultType (str, optional): Output file extension. If not specified, uses same format as input.
|
|
||||||
"""
|
|
||||||
documentList = parameters.get("documentList", [])
|
documentList = parameters.get("documentList", [])
|
||||||
if not documentList:
|
if not documentList:
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
return ActionResult.isFailure(error="documentList is required")
|
||||||
|
|
@ -51,7 +31,8 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
|
||||||
processParams = {
|
processParams = {
|
||||||
"aiPrompt": aiPrompt,
|
"aiPrompt": aiPrompt,
|
||||||
"documentList": documentList
|
"documentList": documentList,
|
||||||
|
"generationIntent": "document" # NEW: Explicit intent
|
||||||
}
|
}
|
||||||
if resultType:
|
if resultType:
|
||||||
processParams["resultType"] = resultType
|
processParams["resultType"] = resultType
|
||||||
|
|
|
||||||
|
|
@ -1,35 +1,15 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Web Research action for AI operations.
|
|
||||||
Web research with two-step process: search for URLs, then crawl content.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Web research with two-step process: search for URLs, then crawl content.
|
|
||||||
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
|
|
||||||
- Output format: JSON with research results including URLs and content.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- prompt (str, required): Natural language research instruction.
|
|
||||||
- urlList (list, optional): Specific URLs to crawl, if needed.
|
|
||||||
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
|
|
||||||
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
|
|
||||||
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
prompt = parameters.get("prompt")
|
prompt = parameters.get("prompt")
|
||||||
if not prompt:
|
if not prompt:
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ from .actions.summarizeDocument import summarizeDocument
|
||||||
from .actions.translateDocument import translateDocument
|
from .actions.translateDocument import translateDocument
|
||||||
from .actions.convertDocument import convertDocument
|
from .actions.convertDocument import convertDocument
|
||||||
from .actions.generateDocument import generateDocument
|
from .actions.generateDocument import generateDocument
|
||||||
|
from .actions.generateCode import generateCode
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -59,6 +60,14 @@ class MethodAi(MethodBase):
|
||||||
required=False,
|
required=False,
|
||||||
default="txt",
|
default="txt",
|
||||||
description="Output file extension. All output documents will use this format"
|
description="Output file extension. All output documents will use this format"
|
||||||
|
),
|
||||||
|
"generationIntent": WorkflowActionParameter(
|
||||||
|
name="generationIntent",
|
||||||
|
type="str",
|
||||||
|
frontendType=FrontendType.SELECT,
|
||||||
|
frontendOptions=["document", "code", "image"],
|
||||||
|
required=False,
|
||||||
|
description="Explicit generation intent (\"document\" | \"code\" | \"image\"). For DATA_GENERATE operations, if not provided, defaults based on resultType: document formats (xlsx, docx, pdf, etc.) → \"document\", code formats (py, js, ts, etc.) → \"code\". For IMAGE_GENERATE operations, this parameter is ignored. Best practice: Use qualified actions (ai.generateDocument, ai.generateCode) instead of ai.process."
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
execute=process.__get__(self, self.__class__)
|
execute=process.__get__(self, self.__class__)
|
||||||
|
|
@ -256,6 +265,35 @@ class MethodAi(MethodBase):
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
execute=generateDocument.__get__(self, self.__class__)
|
execute=generateDocument.__get__(self, self.__class__)
|
||||||
|
),
|
||||||
|
"generateCode": WorkflowActionDefinition(
|
||||||
|
actionId="ai.generateCode",
|
||||||
|
description="Generate code files - explicitly sets intent to 'code'",
|
||||||
|
parameters={
|
||||||
|
"prompt": WorkflowActionParameter(
|
||||||
|
name="prompt",
|
||||||
|
type="str",
|
||||||
|
frontendType=FrontendType.TEXTAREA,
|
||||||
|
required=True,
|
||||||
|
description="Description of code to generate"
|
||||||
|
),
|
||||||
|
"documentList": WorkflowActionParameter(
|
||||||
|
name="documentList",
|
||||||
|
type="List[str]",
|
||||||
|
frontendType=FrontendType.DOCUMENT_REFERENCE,
|
||||||
|
required=False,
|
||||||
|
description="Reference documents"
|
||||||
|
),
|
||||||
|
"resultType": WorkflowActionParameter(
|
||||||
|
name="resultType",
|
||||||
|
type="str",
|
||||||
|
frontendType=FrontendType.SELECT,
|
||||||
|
frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt"],
|
||||||
|
required=False,
|
||||||
|
description="Output format (html, js, py, etc.). Default: based on prompt"
|
||||||
|
)
|
||||||
|
},
|
||||||
|
execute=generateCode.__get__(self, self.__class__)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -269,6 +307,7 @@ class MethodAi(MethodBase):
|
||||||
self.translateDocument = translateDocument.__get__(self, self.__class__)
|
self.translateDocument = translateDocument.__get__(self, self.__class__)
|
||||||
self.convertDocument = convertDocument.__get__(self, self.__class__)
|
self.convertDocument = convertDocument.__get__(self, self.__class__)
|
||||||
self.generateDocument = generateDocument.__get__(self, self.__class__)
|
self.generateDocument = generateDocument.__get__(self, self.__class__)
|
||||||
|
self.generateCode = generateCode.__get__(self, self.__class__)
|
||||||
|
|
||||||
def _format_timestamp_for_filename(self) -> str:
|
def _format_timestamp_for_filename(self) -> str:
|
||||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
||||||
|
|
|
||||||
|
|
@ -1,460 +0,0 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
|
||||||
# All rights reserved.
|
|
||||||
"""
|
|
||||||
Context and workflow information method module.
|
|
||||||
Handles workflow context queries and document indexing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import time
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import aiohttp
|
|
||||||
from typing import Dict, Any, List
|
|
||||||
from datetime import datetime, UTC
|
|
||||||
|
|
||||||
from modules.workflows.methods.methodBase import MethodBase, action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
||||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class MethodContext(MethodBase):
|
|
||||||
"""Context and workflow information methods."""
|
|
||||||
|
|
||||||
def __init__(self, services):
|
|
||||||
super().__init__(services)
|
|
||||||
self.name = "context"
|
|
||||||
self.description = "Context and workflow information methods"
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
|
|
||||||
- Input requirements: No input documents required. Optional resultType parameter.
|
|
||||||
- Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- resultType (str, optional): Output format (json, txt, md). Default: json.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
workflow = self.services.workflow
|
|
||||||
if not workflow:
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error="No workflow available"
|
|
||||||
)
|
|
||||||
|
|
||||||
resultType = parameters.get("resultType", "json").lower().strip().lstrip('.')
|
|
||||||
|
|
||||||
# Get available documents index from chat service
|
|
||||||
documentsIndex = self.services.chat.getAvailableDocuments(workflow)
|
|
||||||
|
|
||||||
if not documentsIndex or documentsIndex == "No documents available" or documentsIndex == "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.":
|
|
||||||
# Return empty index structure
|
|
||||||
if resultType == "json":
|
|
||||||
indexData = {
|
|
||||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
|
||||||
"totalDocuments": 0,
|
|
||||||
"rounds": [],
|
|
||||||
"documentReferences": []
|
|
||||||
}
|
|
||||||
indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
|
|
||||||
else:
|
|
||||||
indexContent = "Document Index\n==============\n\nNo documents available in this workflow.\n"
|
|
||||||
else:
|
|
||||||
# Parse the document index string to extract structured information
|
|
||||||
indexData = self._parseDocumentIndex(documentsIndex, workflow)
|
|
||||||
|
|
||||||
if resultType == "json":
|
|
||||||
indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
|
|
||||||
elif resultType == "md":
|
|
||||||
indexContent = self._formatAsMarkdown(indexData)
|
|
||||||
else: # txt
|
|
||||||
indexContent = self._formatAsText(indexData, documentsIndex)
|
|
||||||
|
|
||||||
# Generate meaningful filename
|
|
||||||
workflowContext = self.services.chat.getWorkflowContext()
|
|
||||||
filename = self._generateMeaningfulFileName(
|
|
||||||
"document_index",
|
|
||||||
resultType if resultType in ["json", "txt", "md"] else "json",
|
|
||||||
workflowContext,
|
|
||||||
"getDocumentIndex"
|
|
||||||
)
|
|
||||||
|
|
||||||
validationMetadata = {
|
|
||||||
"actionType": "context.getDocumentIndex",
|
|
||||||
"resultType": resultType,
|
|
||||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
|
||||||
"totalDocuments": indexData.get("totalDocuments", 0) if isinstance(indexData, dict) else 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create ActionDocument
|
|
||||||
document = ActionDocument(
|
|
||||||
documentName=filename,
|
|
||||||
documentData=indexContent,
|
|
||||||
mimeType="application/json" if resultType == "json" else "text/plain",
|
|
||||||
validationMetadata=validationMetadata
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=[document])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating document index: {str(e)}")
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error=f"Failed to generate document index: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
def _parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]:
|
|
||||||
"""Parse the document index string into structured data."""
|
|
||||||
try:
|
|
||||||
indexData = {
|
|
||||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
|
||||||
"generatedAt": datetime.now(UTC).isoformat(),
|
|
||||||
"totalDocuments": 0,
|
|
||||||
"rounds": [],
|
|
||||||
"documentReferences": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract document references from the index string
|
|
||||||
lines = documentsIndex.split('\n')
|
|
||||||
currentRound = None
|
|
||||||
currentDocList = None
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
line = line.strip()
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check for round headers
|
|
||||||
if "Current round documents:" in line:
|
|
||||||
currentRound = "current"
|
|
||||||
continue
|
|
||||||
elif "Past rounds documents:" in line:
|
|
||||||
currentRound = "past"
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check for document list references (docList:...)
|
|
||||||
if line.startswith("- docList:"):
|
|
||||||
docListRef = line.replace("- docList:", "").strip()
|
|
||||||
currentDocList = {
|
|
||||||
"reference": docListRef,
|
|
||||||
"round": currentRound,
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
indexData["rounds"].append(currentDocList)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check for individual document references (docItem:...)
|
|
||||||
if line.startswith(" - docItem:") or line.startswith("- docItem:"):
|
|
||||||
docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip()
|
|
||||||
indexData["documentReferences"].append({
|
|
||||||
"reference": docItemRef,
|
|
||||||
"round": currentRound,
|
|
||||||
"docList": currentDocList["reference"] if currentDocList else None
|
|
||||||
})
|
|
||||||
indexData["totalDocuments"] += 1
|
|
||||||
if currentDocList:
|
|
||||||
currentDocList["documents"].append(docItemRef)
|
|
||||||
|
|
||||||
return indexData
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error parsing document index: {str(e)}")
|
|
||||||
return {
|
|
||||||
"workflowId": getattr(workflow, 'id', 'unknown'),
|
|
||||||
"error": f"Failed to parse document index: {str(e)}",
|
|
||||||
"rawIndex": documentsIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
def _formatAsMarkdown(self, indexData: Dict[str, Any]) -> str:
|
|
||||||
"""Format document index as Markdown."""
|
|
||||||
try:
|
|
||||||
md = f"# Document Index\n\n"
|
|
||||||
md += f"**Workflow ID:** {indexData.get('workflowId', 'unknown')}\n\n"
|
|
||||||
md += f"**Generated At:** {indexData.get('generatedAt', 'unknown')}\n\n"
|
|
||||||
md += f"**Total Documents:** {indexData.get('totalDocuments', 0)}\n\n"
|
|
||||||
|
|
||||||
if indexData.get('rounds'):
|
|
||||||
md += "## Documents by Round\n\n"
|
|
||||||
for roundInfo in indexData['rounds']:
|
|
||||||
roundLabel = roundInfo.get('round', 'unknown').title()
|
|
||||||
md += f"### {roundLabel} Round\n\n"
|
|
||||||
md += f"**Document List:** `{roundInfo.get('reference', 'unknown')}`\n\n"
|
|
||||||
if roundInfo.get('documents'):
|
|
||||||
md += "**Documents:**\n\n"
|
|
||||||
for docRef in roundInfo['documents']:
|
|
||||||
md += f"- `{docRef}`\n"
|
|
||||||
md += "\n"
|
|
||||||
|
|
||||||
if indexData.get('documentReferences'):
|
|
||||||
md += "## All Document References\n\n"
|
|
||||||
for docRef in indexData['documentReferences']:
|
|
||||||
md += f"- `{docRef.get('reference', 'unknown')}`\n"
|
|
||||||
|
|
||||||
return md
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error formatting as Markdown: {str(e)}")
|
|
||||||
return f"# Document Index\n\nError formatting index: {str(e)}\n"
|
|
||||||
|
|
||||||
def _formatAsText(self, indexData: Dict[str, Any], rawIndex: str) -> str:
|
|
||||||
"""Format document index as plain text."""
|
|
||||||
try:
|
|
||||||
text = "Document Index\n"
|
|
||||||
text += "=" * 50 + "\n\n"
|
|
||||||
text += f"Workflow ID: {indexData.get('workflowId', 'unknown')}\n"
|
|
||||||
text += f"Generated At: {indexData.get('generatedAt', 'unknown')}\n"
|
|
||||||
text += f"Total Documents: {indexData.get('totalDocuments', 0)}\n\n"
|
|
||||||
|
|
||||||
# Include the raw formatted index for readability
|
|
||||||
text += rawIndex
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error formatting as text: {str(e)}")
|
|
||||||
return f"Document Index\n\nError formatting index: {str(e)}\n\nRaw index:\n{rawIndex}\n"
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
Extract content from documents (separate from AI calls).
|
|
||||||
|
|
||||||
This action performs pure content extraction without AI processing.
|
|
||||||
The extracted ContentParts can then be used by subsequent AI processing actions.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to extract content from.
|
|
||||||
- extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing ContentExtracted objects
|
|
||||||
- ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Init progress logger
|
|
||||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
||||||
operationId = f"context_extract_{workflowId}_{int(time.time())}"
|
|
||||||
|
|
||||||
# Extract documentList from parameters dict
|
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
||||||
documentListParam = parameters.get("documentList")
|
|
||||||
if not documentListParam:
|
|
||||||
return ActionResult.isFailure(error="documentList is required")
|
|
||||||
|
|
||||||
# Convert to DocumentReferenceList if needed
|
|
||||||
if isinstance(documentListParam, DocumentReferenceList):
|
|
||||||
documentList = documentListParam
|
|
||||||
elif isinstance(documentListParam, str):
|
|
||||||
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
|
||||||
elif isinstance(documentListParam, list):
|
|
||||||
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
|
||||||
else:
|
|
||||||
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
|
|
||||||
|
|
||||||
# Start progress tracking
|
|
||||||
parentOperationId = parameters.get('parentOperationId')
|
|
||||||
self.services.chat.progressLogStart(
|
|
||||||
operationId,
|
|
||||||
"Extracting content from documents",
|
|
||||||
"Content Extraction",
|
|
||||||
f"Documents: {len(documentList.references)}",
|
|
||||||
parentOperationId=parentOperationId
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get ChatDocuments from documentList
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
|
|
||||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
|
||||||
|
|
||||||
if not chatDocuments:
|
|
||||||
self.services.chat.progressLogFinish(operationId, False)
|
|
||||||
return ActionResult.isFailure(error="No documents found in documentList")
|
|
||||||
|
|
||||||
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
|
||||||
|
|
||||||
# Prepare extraction options
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
|
|
||||||
extractionOptionsParam = parameters.get("extractionOptions")
|
|
||||||
|
|
||||||
# Convert dict to ExtractionOptions object if needed, or create defaults
|
|
||||||
if extractionOptionsParam:
|
|
||||||
if isinstance(extractionOptionsParam, dict):
|
|
||||||
# Convert dict to ExtractionOptions object
|
|
||||||
extractionOptions = ExtractionOptions(**extractionOptionsParam)
|
|
||||||
elif isinstance(extractionOptionsParam, ExtractionOptions):
|
|
||||||
extractionOptions = extractionOptionsParam
|
|
||||||
else:
|
|
||||||
# Invalid type, use defaults
|
|
||||||
extractionOptions = None
|
|
||||||
else:
|
|
||||||
extractionOptions = None
|
|
||||||
|
|
||||||
# If extractionOptions not provided, create defaults
|
|
||||||
if not extractionOptions:
|
|
||||||
# Default extraction options for pure content extraction (no AI processing)
|
|
||||||
extractionOptions = ExtractionOptions(
|
|
||||||
prompt="Extract all content from the document",
|
|
||||||
mergeStrategy=MergeStrategy(
|
|
||||||
mergeType="concatenate",
|
|
||||||
groupBy="typeGroup",
|
|
||||||
orderBy="id"
|
|
||||||
),
|
|
||||||
processDocumentsIndividually=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Call extraction service with hierarchical progress logging
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
|
|
||||||
# Pass operationId for hierarchical per-document progress logging
|
|
||||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
|
|
||||||
|
|
||||||
# Build ActionDocuments from ContentExtracted results
|
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
|
|
||||||
actionDocuments = []
|
|
||||||
# Map extracted results back to original documents by index (results are in same order)
|
|
||||||
for i, extracted in enumerate(extractedResults):
|
|
||||||
# Get original document name if available
|
|
||||||
originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
|
|
||||||
if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
|
|
||||||
# Use original filename with "extracted_" prefix
|
|
||||||
baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
|
|
||||||
documentName = f"{baseName}_extracted_{extracted.id}.json"
|
|
||||||
else:
|
|
||||||
# Fallback to generic name with index
|
|
||||||
documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
|
|
||||||
|
|
||||||
# Store ContentExtracted object in ActionDocument.documentData
|
|
||||||
validationMetadata = {
|
|
||||||
"actionType": "context.extractContent",
|
|
||||||
"documentIndex": i,
|
|
||||||
"extractedId": extracted.id,
|
|
||||||
"partCount": len(extracted.parts) if extracted.parts else 0,
|
|
||||||
"originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
|
|
||||||
}
|
|
||||||
actionDoc = ActionDocument(
|
|
||||||
documentName=documentName,
|
|
||||||
documentData=extracted, # ContentExtracted object
|
|
||||||
mimeType="application/json",
|
|
||||||
validationMetadata=validationMetadata
|
|
||||||
)
|
|
||||||
actionDocuments.append(actionDoc)
|
|
||||||
|
|
||||||
self.services.chat.progressLogFinish(operationId, True)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=actionDocuments)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in content extraction: {str(e)}")
|
|
||||||
|
|
||||||
# Complete progress tracking with failure
|
|
||||||
try:
|
|
||||||
self.services.chat.progressLogFinish(operationId, False)
|
|
||||||
except:
|
|
||||||
pass # Don't fail on progress logging errors
|
|
||||||
|
|
||||||
return ActionResult.isFailure(error=str(e))
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
Trigger preprocessing server at customer tenant to update database with configuration.
|
|
||||||
|
|
||||||
This action makes a POST request to the preprocessing server endpoint with the provided
|
|
||||||
configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- endpoint (str, required): The full URL endpoint for the preprocessing server API.
|
|
||||||
- configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
|
|
||||||
- authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing "ok" on success, or error message on failure.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
endpoint = parameters.get("endpoint")
|
|
||||||
if not endpoint:
|
|
||||||
return ActionResult.isFailure(error="endpoint parameter is required")
|
|
||||||
|
|
||||||
configJsonParam = parameters.get("configJson")
|
|
||||||
if not configJsonParam:
|
|
||||||
return ActionResult.isFailure(error="configJson parameter is required")
|
|
||||||
|
|
||||||
authSecretConfigKey = parameters.get("authSecretConfigKey")
|
|
||||||
if not authSecretConfigKey:
|
|
||||||
return ActionResult.isFailure(error="authSecretConfigKey parameter is required")
|
|
||||||
|
|
||||||
# Handle configJson as either dict or JSON string
|
|
||||||
if isinstance(configJsonParam, str):
|
|
||||||
try:
|
|
||||||
configJson = json.loads(configJsonParam)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
return ActionResult.isFailure(error=f"configJson is not valid JSON: {str(e)}")
|
|
||||||
elif isinstance(configJsonParam, dict):
|
|
||||||
configJson = configJsonParam
|
|
||||||
else:
|
|
||||||
return ActionResult.isFailure(error=f"configJson must be a dict or JSON string, got {type(configJsonParam)}")
|
|
||||||
|
|
||||||
# Get authorization secret from APP_CONFIG using the provided config key
|
|
||||||
authSecret = APP_CONFIG.get(authSecretConfigKey)
|
|
||||||
if not authSecret:
|
|
||||||
errorMsg = f"{authSecretConfigKey} not found in APP_CONFIG"
|
|
||||||
logger.error(errorMsg)
|
|
||||||
return ActionResult.isFailure(error=errorMsg)
|
|
||||||
|
|
||||||
# Prepare headers with authorization (default headers as in original function)
|
|
||||||
headers = {
|
|
||||||
"X-PP-API-Key": authSecret,
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Make POST request
|
|
||||||
timeout = aiohttp.ClientTimeout(total=60)
|
|
||||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
||||||
async with session.post(
|
|
||||||
endpoint,
|
|
||||||
headers=headers,
|
|
||||||
json=configJson
|
|
||||||
) as response:
|
|
||||||
if response.status in [200, 201]:
|
|
||||||
responseText = await response.text()
|
|
||||||
logger.info(f"Preprocessing server trigger successful: {response.status}")
|
|
||||||
logger.debug(f"Response: {responseText}")
|
|
||||||
|
|
||||||
# Generate meaningful filename
|
|
||||||
workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
|
|
||||||
filename = self._generateMeaningfulFileName(
|
|
||||||
"preprocessing_result",
|
|
||||||
"txt",
|
|
||||||
workflowContext,
|
|
||||||
"triggerPreprocessingServer"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create validation metadata
|
|
||||||
validationMetadata = self._createValidationMetadata(
|
|
||||||
"triggerPreprocessingServer",
|
|
||||||
endpoint=endpoint,
|
|
||||||
statusCode=response.status,
|
|
||||||
responseText=responseText
|
|
||||||
)
|
|
||||||
|
|
||||||
# Return success with "ok" document
|
|
||||||
document = ActionDocument(
|
|
||||||
documentName=filename,
|
|
||||||
documentData="ok",
|
|
||||||
mimeType="text/plain",
|
|
||||||
validationMetadata=validationMetadata
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(documents=[document])
|
|
||||||
else:
|
|
||||||
errorText = await response.text()
|
|
||||||
errorMsg = f"Preprocessing server trigger failed: {response.status} - {errorText}"
|
|
||||||
logger.error(errorMsg)
|
|
||||||
return ActionResult.isFailure(error=errorMsg)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
errorMsg = f"Error triggering preprocessing server: {str(e)}"
|
|
||||||
logger.error(errorMsg)
|
|
||||||
return ActionResult.isFailure(error=errorMsg)
|
|
||||||
|
|
||||||
|
|
@ -1,49 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Extract Content action for Context operations.
|
|
||||||
Extracts content from documents (separate from AI calls).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentExtracted, ContentPart
|
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentExtracted, ContentPart
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Extract raw content parts from documents without AI processing.
|
|
||||||
|
|
||||||
This action performs pure content extraction WITHOUT AI/OCR processing.
|
|
||||||
It returns ContentParts with different typeGroups:
|
|
||||||
- "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.)
|
|
||||||
- "image": Images as base64-encoded data (NOT converted to text, no OCR)
|
|
||||||
- "table": Tables as structured data
|
|
||||||
- "structure": Structured content (JSON, etc.)
|
|
||||||
- "container": Container elements (PDF pages, etc.)
|
|
||||||
|
|
||||||
IMPORTANT:
|
|
||||||
- Images are returned as base64 data, NOT as extracted text
|
|
||||||
- No OCR is performed - images are preserved as visual elements
|
|
||||||
- Text extraction only works for text-based formats (not images)
|
|
||||||
- The extracted ContentParts can then be used by subsequent AI processing actions
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) to extract content from.
|
|
||||||
- extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing ContentExtracted objects
|
|
||||||
- ContentExtracted.parts contains List[ContentPart] with various typeGroups
|
|
||||||
- Each ContentPart has a typeGroup indicating its type (text, image, table, etc.)
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||||
|
|
|
||||||
|
|
@ -1,30 +1,14 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Get Document Index action for Context operations.
|
|
||||||
Generates a comprehensive index of all documents available in the current workflow.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
|
|
||||||
- Input requirements: No input documents required. Optional resultType parameter.
|
|
||||||
- Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- resultType (str, optional): Output format (json, txt, md). Default: json.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
workflow = self.services.workflow
|
workflow = self.services.workflow
|
||||||
if not workflow:
|
if not workflow:
|
||||||
|
|
|
||||||
|
|
@ -1,35 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Neutralize Data action for Context operations.
|
|
||||||
Neutralizes extracted content data from ContentExtracted documents.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
||||||
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
|
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Neutralize data from ContentExtracted documents.
|
|
||||||
|
|
||||||
This action takes documents containing ContentExtracted objects (from extractContent)
|
|
||||||
and neutralizes the text data in ContentPart.data fields.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- documentList (list, required): Document reference(s) containing ContentExtracted objects.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing neutralized ContentExtracted objects
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
||||||
|
|
|
||||||
|
|
@ -1,37 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Trigger Preprocessing Server action for Context operations.
|
|
||||||
Triggers preprocessing server at customer tenant to update database with configuration.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Trigger preprocessing server at customer tenant to update database with configuration.
|
|
||||||
|
|
||||||
This action makes a POST request to the preprocessing server endpoint with the provided
|
|
||||||
configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- endpoint (str, required): The full URL endpoint for the preprocessing server API.
|
|
||||||
- configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
|
|
||||||
- authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing "ok" on success, or error message on failure.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
endpoint = parameters.get("endpoint")
|
endpoint = parameters.get("endpoint")
|
||||||
if not endpoint:
|
if not endpoint:
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,37 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Connect JIRA action for JIRA operations.
|
|
||||||
Connects to JIRA instance and creates ticket interface.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Connect to JIRA instance and create ticket interface.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- apiUsername (str, required): JIRA API username/email
|
|
||||||
- apiTokenConfigKey (str, required): APP_CONFIG key name for JIRA API token
|
|
||||||
- apiUrl (str, required): JIRA instance URL (e.g., https://example.atlassian.net)
|
|
||||||
- projectCode (str, required): JIRA project code (e.g., "DCS")
|
|
||||||
- issueType (str, required): JIRA issue type (e.g., "Task")
|
|
||||||
- taskSyncDefinition (str or dict, required): Field mapping definition as JSON string or dict
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing connection ID
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
apiUsername = parameters.get("apiUsername")
|
apiUsername = parameters.get("apiUsername")
|
||||||
if not apiUsername:
|
if not apiUsername:
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,6 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Create CSV Content action for JIRA operations.
|
|
||||||
Creates CSV content with custom headers.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
|
|
@ -14,25 +9,11 @@ import csv as csv_module
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Create CSV content with custom headers.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
|
|
||||||
- headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent)
|
|
||||||
- columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
|
|
||||||
- taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing CSV content as bytes
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
dataParam = parameters.get("data")
|
dataParam = parameters.get("data")
|
||||||
if not dataParam:
|
if not dataParam:
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,6 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Create Excel Content action for JIRA operations.
|
|
||||||
Creates Excel content with custom headers.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
|
|
@ -14,25 +9,11 @@ import csv as csv_module
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Create Excel content with custom headers.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
|
|
||||||
- headers (str, optional): Document reference containing headers JSON (from parseExcelContent)
|
|
||||||
- columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
|
|
||||||
- taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing Excel content as bytes
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
dataParam = parameters.get("data")
|
dataParam = parameters.get("data")
|
||||||
if not dataParam:
|
if not dataParam:
|
||||||
|
|
|
||||||
|
|
@ -1,31 +1,14 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Export Tickets As JSON action for JIRA operations.
|
|
||||||
Exports tickets from JIRA as JSON list.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Export tickets from JIRA as JSON list.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionId (str, required): Connection ID from connectJira action result
|
|
||||||
- taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing list of tickets as JSON
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
connectionIdParam = parameters.get("connectionId")
|
connectionIdParam = parameters.get("connectionId")
|
||||||
if not connectionIdParam:
|
if not connectionIdParam:
|
||||||
|
|
|
||||||
|
|
@ -1,32 +1,14 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Import Tickets From JSON action for JIRA operations.
|
|
||||||
Imports ticket data from JSON back to JIRA.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Import ticket data from JSON back to JIRA.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionId (str, required): Connection ID from connectJira action result
|
|
||||||
- ticketData (str, required): Document reference containing ticket data as JSON
|
|
||||||
- taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing import result with counts
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
connectionIdParam = parameters.get("connectionId")
|
connectionIdParam = parameters.get("connectionId")
|
||||||
if not connectionIdParam:
|
if not connectionIdParam:
|
||||||
|
|
|
||||||
|
|
@ -1,33 +1,14 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Merge Ticket Data action for JIRA operations.
|
|
||||||
Merges JIRA export data with existing SharePoint data.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Merge JIRA export data with existing SharePoint data.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- jiraData (str, required): Document reference containing JIRA ticket data as JSON array
|
|
||||||
- existingData (str, required): Document reference containing existing SharePoint data as JSON array
|
|
||||||
- taskSyncDefinition (str or dict, required): Field mapping definition
|
|
||||||
- idField (str, optional): Field name to use as ID for merging (default: "ID")
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing merged data and merge details
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
jiraDataParam = parameters.get("jiraData")
|
jiraDataParam = parameters.get("jiraData")
|
||||||
if not jiraDataParam:
|
if not jiraDataParam:
|
||||||
|
|
|
||||||
|
|
@ -1,34 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Parse CSV Content action for JIRA operations.
|
|
||||||
Parses CSV content with custom headers.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import io
|
import io
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Parse CSV content with custom headers.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- csvContent (str, required): Document reference containing CSV file content as bytes
|
|
||||||
- skipRows (int, optional): Number of header rows to skip (default: 2)
|
|
||||||
- hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing parsed data and headers as JSON
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
csvContentParam = parameters.get("csvContent")
|
csvContentParam = parameters.get("csvContent")
|
||||||
if not csvContentParam:
|
if not csvContentParam:
|
||||||
|
|
|
||||||
|
|
@ -1,34 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Parse Excel Content action for JIRA operations.
|
|
||||||
Parses Excel content with custom headers.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Parse Excel content with custom headers.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- excelContent (str, required): Document reference containing Excel file content as bytes
|
|
||||||
- skipRows (int, optional): Number of header rows to skip (default: 3)
|
|
||||||
- hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing parsed data and headers as JSON
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
excelContentParam = parameters.get("excelContent")
|
excelContentParam = parameters.get("excelContent")
|
||||||
if not excelContentParam:
|
if not excelContentParam:
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,39 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Compose And Draft Email With Context action for Outlook operations.
|
|
||||||
Composes email content using AI from context and optional documents, then creates a draft.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
import requests
|
import requests
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Compose email content using AI from context and optional documents, then create a draft.
|
|
||||||
- Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
|
|
||||||
- Output format: JSON confirmation with AI-generated draft metadata.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- to (list, required): Recipient email addresses.
|
|
||||||
- context (str, required): Detailled context for composing the email.
|
|
||||||
- documentList (list, optional): Document references for context/attachments.
|
|
||||||
- cc (list, optional): CC recipients.
|
|
||||||
- bcc (list, optional): BCC recipients.
|
|
||||||
- emailStyle (str, optional): formal | casual | business. Default: business.
|
|
||||||
- maxLength (int, optional): Maximum length for generated content. Default: 1000.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
to = parameters.get("to")
|
to = parameters.get("to")
|
||||||
|
|
|
||||||
|
|
@ -1,36 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Read Emails action for Outlook operations.
|
|
||||||
Reads emails and metadata from a mailbox folder.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Read emails and metadata from a mailbox folder.
|
|
||||||
- Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
|
|
||||||
- Output format: JSON with emails and metadata.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- folder (str, optional): Folder to read from. Default: Inbox.
|
|
||||||
- limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
|
|
||||||
- filter (str, optional): Sender, query operators, or subject text.
|
|
||||||
- outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
|
|
||||||
"""
|
|
||||||
operationId = None
|
operationId = None
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
|
|
|
||||||
|
|
@ -1,35 +1,15 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Search Emails action for Outlook operations.
|
|
||||||
Searches emails by query and returns matching items with metadata.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Search emails by query and return matching items with metadata.
|
|
||||||
- Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
|
|
||||||
- Output format: JSON with search results and metadata.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- query (str, required): Search expression.
|
|
||||||
- folder (str, optional): Folder scope or All. Default: All.
|
|
||||||
- limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
|
|
||||||
- outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
query = parameters.get("query")
|
query = parameters.get("query")
|
||||||
|
|
|
||||||
|
|
@ -1,33 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Send Draft Email action for Outlook operations.
|
|
||||||
Sends draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
|
|
||||||
- Input requirements: connectionReference (required); documentList with draft email JSON documents (required).
|
|
||||||
- Output format: JSON confirmation with sent mail metadata for all emails.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
|
|
||||||
"""
|
|
||||||
operationId = None
|
operationId = None
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,36 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Analyze Folder Usage action for SharePoint operations.
|
|
||||||
Analyzes usage intensity of folders and files in SharePoint.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Analyze usage intensity of folders and files in SharePoint.
|
|
||||||
- Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
|
|
||||||
- Output format: JSON with usage analytics grouped by time intervals.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- documentList (list, required): Document list reference(s) containing findDocumentPath result.
|
|
||||||
- startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
|
|
||||||
- endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
|
|
||||||
- interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
|
|
||||||
"""
|
|
||||||
operationId = None
|
operationId = None
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
|
|
|
||||||
|
|
@ -1,35 +1,14 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Copy File action for SharePoint operations.
|
|
||||||
Copies file within SharePoint.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Copy file within SharePoint.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
|
|
||||||
- sourceFolder (str, required): Source folder path relative to site root
|
|
||||||
- sourceFile (str, required): Source file name
|
|
||||||
- destFolder (str, required): Destination folder path relative to site root
|
|
||||||
- destFile (str, required): Destination file name
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing copy result
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
if not connectionReference:
|
if not connectionReference:
|
||||||
|
|
|
||||||
|
|
@ -1,34 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Download File By Path action for SharePoint operations.
|
|
||||||
Downloads file from SharePoint by exact file path.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
import os
|
import os
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Download file from SharePoint by exact file path.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
|
|
||||||
- filePath (str, required): Full file path relative to site root (e.g., "/General/50 Docs hosted by SELISE/file.xlsx")
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing file content as base64-encoded bytes
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
if not connectionReference:
|
if not connectionReference:
|
||||||
|
|
|
||||||
|
|
@ -1,35 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Find Document Path action for SharePoint operations.
|
|
||||||
Finds documents and folders by name/path across SharePoint sites.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Find documents and folders by name/path across sites.
|
|
||||||
- Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
|
|
||||||
- Output format: JSON with found items and paths.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- site (str, optional): Site hint.
|
|
||||||
- searchQuery (str, required): Search terms or path.
|
|
||||||
- maxResults (int, optional): Maximum items to return. Default: 1000.
|
|
||||||
"""
|
|
||||||
operationId = None
|
operationId = None
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
|
|
|
||||||
|
|
@ -1,32 +1,14 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Find Site By URL action for SharePoint operations.
|
|
||||||
Finds SharePoint site by hostname and site path.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Find SharePoint site by hostname and site path.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- hostname (str, required): SharePoint hostname (e.g., "example.sharepoint.com")
|
|
||||||
- sitePath (str, required): Site path (e.g., "SteeringBPM" or "/sites/SteeringBPM")
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing site information (id, displayName, name, webUrl)
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
if not connectionReference:
|
if not connectionReference:
|
||||||
|
|
|
||||||
|
|
@ -1,34 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
List Documents action for SharePoint operations.
|
|
||||||
Lists documents and folders in SharePoint paths across sites.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: List documents and folders in SharePoint paths across sites.
|
|
||||||
- Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional).
|
|
||||||
- Output format: JSON with folder items and metadata.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- documentList (list, required): Document list reference(s) containing findDocumentPath result.
|
|
||||||
- includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
|
|
||||||
"""
|
|
||||||
operationId = None
|
operationId = None
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
|
|
|
||||||
|
|
@ -1,44 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Read Documents action for SharePoint operations.
|
|
||||||
Reads documents from SharePoint and extracts content/metadata.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Read documents from SharePoint and extract content/metadata.
|
|
||||||
- Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional).
|
|
||||||
- Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
|
|
||||||
- Binary files (PDFs, etc.) are Base64-encoded in documentData.
|
|
||||||
- Text files are stored as plain text in documentData.
|
|
||||||
- Returns ActionResult with documents list for template processing.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- documentList (list, optional): Document list reference(s) containing findDocumentPath result.
|
|
||||||
- pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath).
|
|
||||||
- includeMetadata (bool, optional): Include metadata. Default: True.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with documents: List[ActionDocument] where each ActionDocument contains:
|
|
||||||
- documentName: File name
|
|
||||||
- documentData: Base64-encoded content (binary files) or plain text (text files)
|
|
||||||
- mimeType: MIME type (e.g., application/pdf, text/plain)
|
|
||||||
"""
|
|
||||||
operationId = None
|
operationId = None
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
|
|
|
||||||
|
|
@ -1,34 +1,16 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Upload Document action for SharePoint operations.
|
|
||||||
Uploads documents to SharePoint.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
GENERAL:
|
|
||||||
- Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
|
|
||||||
- Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
|
|
||||||
- Output format: JSON with upload status and file info.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
|
|
||||||
- pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
|
|
||||||
"""
|
|
||||||
operationId = None
|
operationId = None
|
||||||
try:
|
try:
|
||||||
# Init progress logger
|
# Init progress logger
|
||||||
|
|
|
||||||
|
|
@ -1,35 +1,15 @@
|
||||||
# Copyright (c) 2025 Patrick Motsch
|
# Copyright (c) 2025 Patrick Motsch
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
||||||
"""
|
|
||||||
Upload File action for SharePoint operations.
|
|
||||||
Uploads raw file content (bytes) to SharePoint.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.workflows.methods.methodBase import action
|
|
||||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@action
|
|
||||||
async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
|
||||||
Upload raw file content (bytes) to SharePoint.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
|
||||||
- siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
|
|
||||||
- folderPath (str, required): Folder path relative to site root
|
|
||||||
- fileName (str, required): File name
|
|
||||||
- content (str, required): Document reference containing file content as base64-encoded bytes
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- ActionResult with ActionDocument containing upload result
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
if not connectionReference:
|
if not connectionReference:
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ class ContentValidator:
|
||||||
self.services = services
|
self.services = services
|
||||||
self.learningEngine = learningEngine
|
self.learningEngine = learningEngine
|
||||||
|
|
||||||
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
|
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
|
||||||
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
|
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -34,8 +34,9 @@ class ContentValidator:
|
||||||
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
|
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
|
||||||
actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
|
actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
|
||||||
actionHistory: Optional list of previously executed actions in the workflow (for multi-step workflow context)
|
actionHistory: Optional list of previously executed actions in the workflow (for multi-step workflow context)
|
||||||
|
context: Optional context object to access all documents delivered in the current round
|
||||||
"""
|
"""
|
||||||
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory)
|
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory, context)
|
||||||
|
|
||||||
def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]:
|
def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]:
|
||||||
"""Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs."""
|
"""Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs."""
|
||||||
|
|
@ -533,7 +534,7 @@ class ContentValidator:
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
|
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
|
||||||
"""AI-based comprehensive validation - generic approach"""
|
"""AI-based comprehensive validation - generic approach"""
|
||||||
try:
|
try:
|
||||||
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
|
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
|
||||||
|
|
@ -636,9 +637,46 @@ class ContentValidator:
|
||||||
actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
|
actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
|
||||||
actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
|
actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
|
||||||
|
|
||||||
|
# Build document index context (all documents delivered in current round)
|
||||||
|
documentIndexContext = ""
|
||||||
|
if context and self.services and hasattr(self.services, 'chat') and hasattr(self.services, 'workflow') and self.services.workflow:
|
||||||
|
try:
|
||||||
|
documentIndex = self.services.chat.getAvailableDocuments(self.services.workflow)
|
||||||
|
if documentIndex and documentIndex.strip() and documentIndex != "No documents available":
|
||||||
|
# Extract only "Current round documents" section if present
|
||||||
|
lines = documentIndex.split('\n')
|
||||||
|
currentRoundSection = []
|
||||||
|
inCurrentRound = False
|
||||||
|
for line in lines:
|
||||||
|
if "Current round documents:" in line:
|
||||||
|
inCurrentRound = True
|
||||||
|
currentRoundSection.append(line)
|
||||||
|
elif inCurrentRound:
|
||||||
|
if line.strip().startswith("- docList:") or line.strip().startswith(" - docItem:") or line.strip().startswith("- docItem:"):
|
||||||
|
currentRoundSection.append(line)
|
||||||
|
elif line.strip() == "":
|
||||||
|
# Empty line is okay, continue
|
||||||
|
continue
|
||||||
|
elif "Past rounds documents:" in line or "AVAILABLE_CONNECTIONS_INDEX:" in line:
|
||||||
|
# End of current round section
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Still in current round section
|
||||||
|
currentRoundSection.append(line)
|
||||||
|
|
||||||
|
if currentRoundSection:
|
||||||
|
documentIndexContext = "\n\n=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection)
|
||||||
|
documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round, not just the ones being validated in this step. Use this to check if all required formats/documents are present across the entire round."
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error extracting document index for validation: {str(e)}")
|
||||||
|
# Continue without document index - not critical
|
||||||
|
|
||||||
|
# Transform criteria that require data access into metadata-only checks
|
||||||
|
transformedCriteria = self._transformCriteriaForMetadataOnly(successCriteria)
|
||||||
|
|
||||||
# Format success criteria for display with index numbers
|
# Format success criteria for display with index numbers
|
||||||
if successCriteria:
|
if transformedCriteria:
|
||||||
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
|
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(transformedCriteria)])
|
||||||
else:
|
else:
|
||||||
criteriaDisplay = "[]"
|
criteriaDisplay = "[]"
|
||||||
|
|
||||||
|
|
@ -647,7 +685,7 @@ class ContentValidator:
|
||||||
=== TASK INFORMATION ===
|
=== TASK INFORMATION ===
|
||||||
{objectiveLabel}: '{objectiveText}'
|
{objectiveLabel}: '{objectiveText}'
|
||||||
EXPECTED DATA TYPE: {dataType}
|
EXPECTED DATA TYPE: {dataType}
|
||||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}
|
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}{documentIndexContext}
|
||||||
|
|
||||||
=== VALIDATION INSTRUCTIONS ===
|
=== VALIDATION INSTRUCTIONS ===
|
||||||
|
|
||||||
|
|
@ -661,6 +699,7 @@ VALIDATION RULES:
|
||||||
5. PROCESS VALIDATION: Use ACTION HISTORY for process-oriented criteria (e.g., "search performed", "extraction done").
|
5. PROCESS VALIDATION: Use ACTION HISTORY for process-oriented criteria (e.g., "search performed", "extraction done").
|
||||||
6. ONE CRITERION PER EVALUATION: Evaluate each criterion independently. Do not mention other criteria.
|
6. ONE CRITERION PER EVALUATION: Evaluate each criterion independently. Do not mention other criteria.
|
||||||
7. NO ASSUMPTIONS: Do NOT assume content was AI-generated vs extracted. If a section exists with content_type, the content was delivered. Only validate what is present in the metadata.
|
7. NO ASSUMPTIONS: Do NOT assume content was AI-generated vs extracted. If a section exists with content_type, the content was delivered. Only validate what is present in the metadata.
|
||||||
|
8. DATA-LEVEL CRITERIA TRANSFORMATION: Criteria mentioning accuracy percentages (e.g., "95% accuracy"), completeness percentages (e.g., "98% completeness"), or "all X extracted" have been transformed to metadata-only checks. For accuracy/completeness: Check if contentPartIds reference all source documents and if structure metadata shows expected data types (tables, lists, etc.) exist. For "all X extracted": Check if contentPartIds reference all source documents mentioned in ACTION HISTORY or document index. NEVER attempt to verify accuracy/completeness by comparing actual data values - only use metadata indicators.
|
||||||
|
|
||||||
VALIDATION STEPS:
|
VALIDATION STEPS:
|
||||||
- Check ACTION HISTORY for process-oriented criteria
|
- Check ACTION HISTORY for process-oriented criteria
|
||||||
|
|
@ -812,6 +851,52 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
||||||
logger.error(f"AI validation failed: {str(e)}")
|
logger.error(f"AI validation failed: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
def _transformCriteriaForMetadataOnly(self, criteria: List[str]) -> List[str]:
|
||||||
|
"""
|
||||||
|
Transform criteria that require data access into metadata-only checks.
|
||||||
|
|
||||||
|
Preserves original criterion intent while converting data-level checks to metadata checks.
|
||||||
|
Examples:
|
||||||
|
- "95% accuracy" → "[METADATA ONLY] Data structure indicates extraction completed (check contentPartIds reference all source documents)"
|
||||||
|
- "98% completeness" → "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
|
||||||
|
- "all transactions extracted" → "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
|
||||||
|
"""
|
||||||
|
if not criteria:
|
||||||
|
return []
|
||||||
|
|
||||||
|
transformed = []
|
||||||
|
for criterion in criteria:
|
||||||
|
original = criterion.strip()
|
||||||
|
transformed_criterion = original
|
||||||
|
|
||||||
|
# Pattern: accuracy percentage (e.g., "95% accuracy", "accuracy meets or exceeds 95% threshold")
|
||||||
|
if re.search(r'\d+%?\s*accuracy|accuracy.*\d+%', original, re.IGNORECASE):
|
||||||
|
# Extract the main subject (e.g., "transactions", "data", etc.)
|
||||||
|
subject_match = re.search(r'(transactions?|data|items?|records?|entries?)', original, re.IGNORECASE)
|
||||||
|
subject = subject_match.group(1).lower() if subject_match else "data"
|
||||||
|
|
||||||
|
transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference all source documents and jsonStructure shows expected {subject} structure exists (tables/lists with rowCount/itemCount > 0). Cannot verify actual {subject} accuracy values from metadata."
|
||||||
|
|
||||||
|
# Pattern: completeness percentage or "all X extracted" (e.g., "98% completeness", "all transactions extracted")
|
||||||
|
elif re.search(r'\d+%?\s*completeness|completeness.*\d+%|all\s+.*extracted|extract.*all', original, re.IGNORECASE):
|
||||||
|
# Extract the main subject
|
||||||
|
subject_match = re.search(r'(transactions?|data|items?|records?|entries?|statements?|documents?)', original, re.IGNORECASE)
|
||||||
|
subject = subject_match.group(1).lower() if subject_match else "items"
|
||||||
|
|
||||||
|
transformed_criterion = f"[METADATA ONLY] {original}: Verify that contentPartIds reference all source documents mentioned in ACTION HISTORY/document index, and jsonStructure shows {subject} structure exists (check rowCount/itemCount in tables/lists). Cannot verify actual {subject} count from metadata."
|
||||||
|
|
||||||
|
# Pattern: "no missing data" or "no incorrect data"
|
||||||
|
elif re.search(r'no\s+missing|no\s+incorrect|no\s+errors?', original, re.IGNORECASE):
|
||||||
|
transformed_criterion = f"[METADATA ONLY] {original}: Check that jsonStructure.content_type shows expected data types present (tables, lists, etc.) and contentPreview.looksLikeRenderedContent=true. Cannot verify actual data values from metadata."
|
||||||
|
|
||||||
|
# Pattern: data accuracy without percentage (e.g., "data is accurate", "accurate data")
|
||||||
|
elif re.search(r'data.*accurate|accurate.*data', original, re.IGNORECASE) and '%' not in original:
|
||||||
|
transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference source documents and jsonStructure shows expected data structure exists. Cannot verify actual data accuracy values from metadata."
|
||||||
|
|
||||||
|
transformed.append(transformed_criterion)
|
||||||
|
|
||||||
|
return transformed
|
||||||
|
|
||||||
def _createFailedValidationResult(self, errorMessage: str) -> Dict[str, Any]:
|
def _createFailedValidationResult(self, errorMessage: str) -> Dict[str, Any]:
|
||||||
"""Create a standardized failed validation result"""
|
"""Create a standardized failed validation result"""
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -158,7 +158,7 @@ class DynamicMode(BaseMode):
|
||||||
actionName = selection.get('action', 'unknown')
|
actionName = selection.get('action', 'unknown')
|
||||||
actionParameters = selection.get('parameters', {})
|
actionParameters = selection.get('parameters', {})
|
||||||
actionHistory = getattr(context, 'executedActions', None) if hasattr(context, 'executedActions') else None
|
actionHistory = getattr(context, 'executedActions', None) if hasattr(context, 'executedActions') else None
|
||||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory)
|
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory, context)
|
||||||
observation.contentValidation = validationResult
|
observation.contentValidation = validationResult
|
||||||
quality_score = validationResult.get('qualityScore', 0.0)
|
quality_score = validationResult.get('qualityScore', 0.0)
|
||||||
if quality_score is None:
|
if quality_score is None:
|
||||||
|
|
@ -194,6 +194,31 @@ class DynamicMode(BaseMode):
|
||||||
if decision: # Only append if decision is not None
|
if decision: # Only append if decision is not None
|
||||||
context.previousReviewResult.append(decision)
|
context.previousReviewResult.append(decision)
|
||||||
|
|
||||||
|
# Send ChatLog message if userMessage is present in refinement response
|
||||||
|
if decision and decision.userMessage:
|
||||||
|
try:
|
||||||
|
currentRound = getattr(workflow, 'currentRound', 0)
|
||||||
|
currentTask = getattr(workflow, 'currentTask', 0)
|
||||||
|
|
||||||
|
messageData = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": decision.userMessage,
|
||||||
|
"status": "refinement",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": self.services.utils.timestampGetUtc(),
|
||||||
|
"documentsLabel": None,
|
||||||
|
"documents": [],
|
||||||
|
"roundNumber": currentRound,
|
||||||
|
"taskNumber": currentTask,
|
||||||
|
"actionNumber": step
|
||||||
|
}
|
||||||
|
|
||||||
|
self.services.chat.storeMessageWithDocuments(workflow, messageData, [])
|
||||||
|
logger.info(f"Sent refinement userMessage to UI: {decision.userMessage[:100]}...")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to send refinement userMessage to UI: {str(e)}")
|
||||||
|
|
||||||
# Store next action guidance from decision for use in next iteration
|
# Store next action guidance from decision for use in next iteration
|
||||||
if decision and decision.status == "continue" and decision.nextAction:
|
if decision and decision.status == "continue" and decision.nextAction:
|
||||||
# Set nextActionGuidance directly (now defined in TaskContext model)
|
# Set nextActionGuidance directly (now defined in TaskContext model)
|
||||||
|
|
|
||||||
|
|
@ -413,12 +413,11 @@ class DocumentGenerationFormatsTester10:
|
||||||
async def testAllFormats(self) -> Dict[str, Any]:
|
async def testAllFormats(self) -> Dict[str, Any]:
|
||||||
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
|
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
|
||||||
print("\n" + "="*80)
|
print("\n" + "="*80)
|
||||||
print("TESTING DOCUMENT GENERATION IN HTML FORMAT")
|
print("TESTING DOCUMENT GENERATION IN ALL FORMATS")
|
||||||
print("="*80)
|
print("="*80)
|
||||||
|
|
||||||
# Only test HTML format
|
# Test all document formats
|
||||||
formats = ["html"]
|
formats = ["docx", "xlsx", "pptx", "pdf", "html"]
|
||||||
# formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats
|
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
for format in formats:
|
for format in formats:
|
||||||
|
|
@ -471,7 +470,7 @@ class DocumentGenerationFormatsTester10:
|
||||||
async def runTest(self):
|
async def runTest(self):
|
||||||
"""Run the complete test."""
|
"""Run the complete test."""
|
||||||
print("\n" + "="*80)
|
print("\n" + "="*80)
|
||||||
print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY")
|
print("DOCUMENT GENERATION FORMATS TEST 10 - ALL FORMATS")
|
||||||
print("="*80)
|
print("="*80)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue