From c44fc92568b1ebd1c3891447206c5d47f448e1c7 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Fri, 31 Oct 2025 00:05:39 +0100 Subject: [PATCH] refactored whole codebase for camelCase part 1 of 2 --- analyze_naming_violations.py | 242 ++++ app.py | 62 +- modules/connectors/connectorDbJson.py | 4 +- modules/connectors/connectorDbPostgre.py | 6 +- modules/connectors/connectorTicketsClickup.py | 6 +- modules/connectors/connectorTicketsJira.py | 6 +- modules/connectors/connectorVoiceGoogle.py | 100 +- modules/datamodels/datamodelChat.py | 64 +- modules/datamodels/datamodelDocument.py | 6 +- modules/datamodels/datamodelFiles.py | 12 +- modules/datamodels/datamodelJson.py | 90 ++ modules/datamodels/datamodelNeutralizer.py | 6 +- modules/datamodels/datamodelSecurity.py | 10 +- modules/datamodels/datamodelTickets.py | 6 +- modules/datamodels/datamodelUam.py | 16 +- modules/datamodels/datamodelUtils.py | 4 +- modules/datamodels/datamodelVoice.py | 10 +- .../mainNeutralizePlayground.py | 56 +- modules/features/syncDelta/mainSyncDelta.py | 58 +- modules/interfaces/interfaceAiObjects.py | 2 - modules/interfaces/interfaceDbAppObjects.py | 20 +- modules/interfaces/interfaceDbChatObjects.py | 138 ++- .../interfaces/interfaceDbComponentObjects.py | 12 +- modules/interfaces/interfaceTicketObjects.py | 4 +- modules/interfaces/interfaceVoiceObjects.py | 6 +- modules/routes/routeDataConnections.py | 8 +- modules/routes/routeDataNeutralization.py | 18 +- modules/routes/routeDataUsers.py | 12 +- modules/routes/routeSecurityGoogle.py | 28 +- modules/routes/routeSecurityLocal.py | 24 +- modules/routes/routeSecurityMsft.py | 46 +- modules/routes/routeVoiceGoogle.py | 238 ++-- modules/security/jwtService.py | 6 +- modules/security/tokenManager.py | 176 +-- modules/security/tokenRefreshMiddleware.py | 6 +- modules/security/tokenRefreshService.py | 36 +- modules/services/__init__.py | 16 +- modules/services/serviceAi/mainServiceAi.py | 792 ++++++++++-- modules/services/serviceAi/subCoreAi.py | 687 ----------- .../serviceAi/subDocumentGeneration.py | 500 -------- .../serviceAi/subDocumentProcessing.py | 1094 +---------------- .../services/serviceAi/subSharedAiUtils.py | 165 --- .../mainServiceExtraction.py | 23 +- .../services/serviceExtraction/subMerger.py | 178 +-- .../services/serviceExtraction/subPipeline.py | 4 +- .../subPromptBuilderExtraction.py | 2 +- .../mainServiceGeneration.py | 82 +- .../serviceGeneration/renderers/registry.py | 96 +- .../renderers/rendererBaseTemplate.py | 320 ++--- .../renderers/rendererCsv.py | 164 +-- .../renderers/rendererDocx.py | 132 +- .../renderers/rendererHtml.py | 240 ++-- .../renderers/rendererImage.py | 202 +-- .../renderers/rendererJson.py | 22 +- .../renderers/rendererMarkdown.py | 148 +-- .../renderers/rendererPdf.py | 124 +- .../renderers/rendererPptx.py | 122 +- .../renderers/rendererText.py | 168 +-- .../renderers/rendererXlsx.py | 390 +++--- .../serviceGeneration/subJsonSchema.py | 113 +- .../subPromptBuilderGeneration.py | 165 +-- .../mainServiceNeutralization.py | 14 +- .../serviceNeutralization/subParseString.py | 58 +- .../serviceNeutralization/subPatterns.py | 14 +- .../serviceNeutralization/subProcessBinary.py | 14 +- .../serviceNeutralization/subProcessCommon.py | 20 +- .../serviceNeutralization/subProcessList.py | 128 +- .../serviceNeutralization/subProcessText.py | 36 +- .../mainServiceSharepoint.py | 234 ++-- .../services/serviceUtils/mainServiceUtils.py | 73 +- modules/shared/attributeUtils.py | 32 +- modules/shared/auditLogger.py | 92 +- modules/shared/configuration.py | 216 ++-- modules/shared/debugLogger.py | 4 +- modules/shared/jsonUtils.py | 144 +-- modules/shared/timezoneUtils.py | 10 +- .../adaptive/adaptiveLearningEngine.py | 60 +- .../processing/adaptive/contentValidator.py | 4 +- .../processing/adaptive/intentAnalyzer.py | 2 +- .../processing/modes/modeActionplan.py | 4 +- .../workflows/processing/modes/modeReact.py | 15 +- modules/workflows/workflowManager.py | 2 +- naming_violations_report.csv | 107 ++ ...ocumentsWithContinuation_usage_analysis.md | 184 +++ tool_security_encrypt_all_env_files.py | 4 +- tool_security_encrypt_config_value.py | 10 +- 86 files changed, 3969 insertions(+), 5005 deletions(-) create mode 100644 analyze_naming_violations.py create mode 100644 modules/datamodels/datamodelJson.py delete mode 100644 modules/services/serviceAi/subCoreAi.py delete mode 100644 modules/services/serviceAi/subDocumentGeneration.py delete mode 100644 modules/services/serviceAi/subSharedAiUtils.py create mode 100644 naming_violations_report.csv create mode 100644 processDocumentsWithContinuation_usage_analysis.md diff --git a/analyze_naming_violations.py b/analyze_naming_violations.py new file mode 100644 index 00000000..a4f9b30f --- /dev/null +++ b/analyze_naming_violations.py @@ -0,0 +1,242 @@ +""" +Script to analyze codebase for snake_case naming violations that should be camelStyle. +Excludes routes (decorated endpoint functions) and JSON field names. +""" +import ast +import os +import re +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Tuple +import csv + +# Patterns to exclude (external library interfaces, etc.) +EXCLUDE_PATTERNS = [ + r'@.*\.(get|post|put|delete|patch|options|head)', # FastAPI route decorators + r'self\.(db|db_|model|orm)', # Database ORM attributes + r'\.(objects|query|filter|get|all)', # ORM methods + r'(request|response|response_model|status_code)', # FastAPI params + r'(snake_case|kebab-case)', # String literals +] + +# External library attribute patterns (should not be changed) +EXTERNAL_LIB_ATTRIBUTES = { + 'pydantic', 'fastapi', 'sqlalchemy', 'psycopg', 'requests', + 'aiohttp', 'azure', 'google', 'openai', 'anthropic', 'reportlab', + 'docx', 'pptx', 'openpyxl', 'json', 'logging', 'datetime', 'typing' +} + +def isRouteFile(filePath: str) -> bool: + """Check if file is a route file""" + return 'routes' in filePath or 'route' in os.path.basename(filePath).lower() + +def shouldExcludeName(name: str, context: str = "") -> bool: + """Check if a name should be excluded from analysis""" + # Skip if it's a builtin or external library attribute + if name.startswith('__') and name.endswith('__'): + return True + + # Skip if context suggests external library usage + for pattern in EXCLUDE_PATTERNS: + if re.search(pattern, context, re.IGNORECASE): + return True + + return False + +def isSnakeCase(name: str) -> bool: + """Check if a name is snake_case""" + if not name or name.startswith('_'): + return False + # Check if contains underscore and is not all caps + return '_' in name and not name.isupper() + +def analyzeFile(filePath: str) -> Dict[str, List[str]]: + """Analyze a Python file for naming violations""" + violations = { + 'functions': [], + 'parameters': [], + 'variables': [] + } + + try: + with open(filePath, 'r', encoding='utf-8') as f: + content = f.read() + tree = ast.parse(content, filename=filePath) + except (SyntaxError, UnicodeDecodeError): + return violations + + # Track current context + currentClass = None + inRouteDecorator = False + + class NamingAnalyzer(ast.NodeVisitor): + def __init__(self): + self.violations = violations + self.currentClass = None + self.inRouteDecorator = False + self.functionDefs = [] + + def visit_FunctionDef(self, node): + # Check if this is a route endpoint (has FastAPI decorator) + isRouteEndpoint = False + for decorator in node.decorator_list: + if isinstance(decorator, ast.Attribute): + if decorator.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']: + isRouteEndpoint = True + break + elif isinstance(decorator, ast.Call): + if isinstance(decorator.func, ast.Attribute): + if decorator.func.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']: + isRouteEndpoint = True + break + + # Skip route endpoint function names + # But we still need to check their parameters and variables + funcName = node.name + if not isRouteEndpoint and isSnakeCase(funcName) and not shouldExcludeName(funcName): + self.violations['functions'].append(f"{funcName} (line {node.lineno})") + + # Analyze parameters + for arg in node.args.args: + if arg.arg != 'self' and arg.arg != 'cls': + paramName = arg.arg + if isSnakeCase(paramName) and not shouldExcludeName(paramName): + self.violations['parameters'].append(f"{paramName} in {funcName} (line {node.lineno})") + + # Analyze function body for local variables + for stmt in node.body: + self.visit(stmt) + + def visit_ClassDef(self, node): + oldClass = self.currentClass + self.currentClass = node.name + self.generic_visit(node) + self.currentClass = oldClass + + def visit_Assign(self, node): + for target in node.targets: + if isinstance(target, ast.Name): + varName = target.id + # Skip constants (ALL_CAPS), builtins, and private (_xxx) + if varName.isupper() or varName.startswith('_'): + continue + # Local variables should be camelStyle + if isSnakeCase(varName) and not shouldExcludeName(varName): + self.violations['variables'].append(f"{varName} (line {node.lineno})") + + def visit_For(self, node): + if isinstance(node.target, ast.Name): + varName = node.target.id + if isSnakeCase(varName) and not shouldExcludeName(varName): + self.violations['variables'].append(f"{varName} (line {node.lineno})") + self.generic_visit(node) + + def visit_With(self, node): + if node.items: + for item in node.items: + if item.optional_vars: + if isinstance(item.optional_vars, ast.Name): + varName = item.optional_vars.id + if isSnakeCase(varName) and not shouldExcludeName(varName): + self.violations['variables'].append(f"{varName} (line {node.lineno})") + self.generic_visit(node) + + analyzer = NamingAnalyzer() + analyzer.visit(tree) + + return violations + +def analyzeCodebase(rootDir: str = 'gateway') -> Dict[str, Dict[str, int]]: + """Analyze entire codebase""" + results = defaultdict(lambda: { + 'functions': 0, + 'parameters': 0, + 'variables': 0, + 'details': { + 'functions': [], + 'parameters': [], + 'variables': [] + } + }) + + # Handle both absolute and relative paths + rootPath = Path(rootDir) + if not rootPath.exists(): + # Try relative to current directory + rootPath = Path('.').resolve() / rootDir + if not rootPath.exists(): + # Try just current directory if we're already in gateway + rootPath = Path('.') + + # Find all Python files + for pyFile in rootPath.rglob('*.py'): + # Skip route files for function name analysis (but analyze their internals) + filePath = str(pyFile.relative_to(rootPath)) + + # Skip test files and special scripts + if 'test' in filePath.lower() or 'tool_' in filePath or '__pycache__' in filePath: + continue + + violations = analyzeFile(str(pyFile)) + + # Check if there are any violations + totalViolations = len(violations['functions']) + len(violations['parameters']) + len(violations['variables']) + if totalViolations > 0: + moduleName = filePath.replace('\\', '/') + results[moduleName]['functions'] = len(violations['functions']) + results[moduleName]['parameters'] = len(violations['parameters']) + results[moduleName]['variables'] = len(violations['variables']) + results[moduleName]['details'] = violations + + return results + +def generateCSV(results: Dict[str, Dict[str, int]], outputFile: str = 'naming_violations.csv'): + """Generate CSV report""" + with open(outputFile, 'w', newline='', encoding='utf-8') as f: + writer = csv.writer(f) + writer.writerow(['Module', 'Function Names', 'Parameter Names', 'Variable Names', 'Total']) + + # Sort by total violations + sortedResults = sorted( + results.items(), + key=lambda x: x[1]['functions'] + x[1]['parameters'] + x[1]['variables'], + reverse=True + ) + + rowsWritten = 0 + for module, stats in sortedResults: + total = stats['functions'] + stats['parameters'] + stats['variables'] + if total > 0: + writer.writerow([ + module, + stats['functions'], + stats['parameters'], + stats['variables'], + total + ]) + rowsWritten += 1 + + if rowsWritten == 0: + print("WARNING: No rows written to CSV despite finding violations!") + + print(f"CSV report generated: {outputFile}") + print(f"Total modules analyzed: {len(results)}") + + # Print summary + totalFuncs = sum(r['functions'] for r in results.values()) + totalParams = sum(r['parameters'] for r in results.values()) + totalVars = sum(r['variables'] for r in results.values()) + print(f"\nSummary:") + print(f" Function names: {totalFuncs}") + print(f" Parameter names: {totalParams}") + print(f" Variable names: {totalVars}") + print(f" Total violations: {totalFuncs + totalParams + totalVars}") + +if __name__ == '__main__': + print("Analyzing codebase for naming violations...") + results = analyzeCodebase('gateway') + + # Write CSV to gateway directory + outputPath = Path('gateway') / 'naming_violations_report.csv' + generateCSV(results, str(outputPath)) + diff --git a/app.py b/app.py index e91a7892..d5254f9c 100644 --- a/app.py +++ b/app.py @@ -24,45 +24,45 @@ class DailyRotatingFileHandler(RotatingFileHandler): """ def __init__( - self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs + self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs ): - self.log_dir = log_dir - self.filename_prefix = filename_prefix - self.current_date = None - self.current_file = None + self.logDir = logDir + self.filenamePrefix = filenamePrefix + self.currentDate = None + self.currentFile = None # Initialize with today's file - self._update_file_if_needed() + self._updateFileIfNeeded() # Call parent constructor with current file super().__init__( - self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs + self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs ) - def _update_file_if_needed(self): + def _updateFileIfNeeded(self): """Update the log file if the date has changed""" today = datetime.now().strftime("%Y%m%d") - if self.current_date != today: - self.current_date = today - new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log") + if self.currentDate != today: + self.currentDate = today + newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log") - if self.current_file != new_file: - self.current_file = new_file + if self.currentFile != newFile: + self.currentFile = newFile return True return False def emit(self, record): """Emit a log record, switching files if date has changed""" # Check if we need to switch to a new file - if self._update_file_if_needed(): + if self._updateFileIfNeeded(): # Close current file and open new one if self.stream: self.stream.close() self.stream = None # Update the baseFilename for the parent class - self.baseFilename = self.current_file + self.baseFilename = self.currentFile # Reopen the stream if not self.delay: self.stream = self._open() @@ -200,10 +200,10 @@ def initLogging(): backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5)) fileHandler = DailyRotatingFileHandler( - log_dir=logDir, - filename_prefix="log_app", - max_bytes=rotationSize, - backup_count=backupCount, + logDir=logDir, + filenamePrefix="log_app", + maxBytes=rotationSize, + backupCount=backupCount, encoding="utf-8", ) fileHandler.setFormatter(fileFormatter) @@ -252,7 +252,7 @@ def initLogging(): ) -def make_sqlalchemy_db_url() -> str: +def makeSqlalchemyDbUrl() -> str: host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost") port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432") db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway") @@ -299,17 +299,17 @@ app = FastAPI( # Configure OpenAPI security scheme for Swagger UI # This adds the "Authorize" button to the /docs page -security_scheme = HTTPBearer() +securityScheme = HTTPBearer() app.openapi_schema = None # Reset schema to regenerate with security -def custom_openapi(): +def customOpenapi(): if app.openapi_schema: return app.openapi_schema from fastapi.openapi.utils import get_openapi - openapi_schema = get_openapi( + openapiSchema = get_openapi( title=app.title, version="1.0.0", description=app.description, @@ -317,7 +317,7 @@ def custom_openapi(): ) # Add security scheme definition - openapi_schema["components"]["securitySchemes"] = { + openapiSchema["components"]["securitySchemes"] = { "BearerAuth": { "type": "http", "scheme": "bearer", @@ -328,20 +328,20 @@ def custom_openapi(): # Apply security globally to all endpoints # Individual endpoints can override this if needed - openapi_schema["security"] = [{"BearerAuth": []}] + openapiSchema["security"] = [{"BearerAuth": []}] - app.openapi_schema = openapi_schema + app.openapi_schema = openapiSchema return app.openapi_schema -app.openapi = custom_openapi +app.openapi = customOpenapi # Parse CORS origins from environment variable -def get_allowed_origins(): - origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080") +def getAllowedOrigins(): + originsStr = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080") # Split by comma and strip whitespace - origins = [origin.strip() for origin in origins_str.split(",")] + origins = [origin.strip() for origin in originsStr.split(",")] logger.info(f"CORS allowed origins: {origins}") return origins @@ -349,7 +349,7 @@ def get_allowed_origins(): # CORS configuration using environment variables app.add_middleware( CORSMiddleware, - allow_origins=get_allowed_origins(), + allow_origins=getAllowedOrigins(), allow_credentials=True, allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], allow_headers=["*"], diff --git a/modules/connectors/connectorDbJson.py b/modules/connectors/connectorDbJson.py index 999814db..9ad73e8c 100644 --- a/modules/connectors/connectorDbJson.py +++ b/modules/connectors/connectorDbJson.py @@ -7,7 +7,7 @@ from pydantic import BaseModel import threading import time -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp logger = logging.getLogger(__name__) @@ -232,7 +232,7 @@ class DatabaseConnector: raise ValueError(f"Record ID mismatch: file name ID ({recordId}) does not match record ID ({record['id']})") # Add metadata - currentTime = get_utc_timestamp() + currentTime = getUtcTimestamp() if "_createdAt" not in record: record["_createdAt"] = currentTime record["_createdBy"] = self.userId diff --git a/modules/connectors/connectorDbPostgre.py b/modules/connectors/connectorDbPostgre.py index ef937a7c..58d17b66 100644 --- a/modules/connectors/connectorDbPostgre.py +++ b/modules/connectors/connectorDbPostgre.py @@ -6,7 +6,7 @@ import uuid from pydantic import BaseModel, Field import threading -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp from modules.shared.configuration import APP_CONFIG logger = logging.getLogger(__name__) @@ -287,7 +287,7 @@ class DatabaseConnector: INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt") VALUES (%s, %s, %s) """, - (table_name, initial_id, get_utc_timestamp()), + (table_name, initial_id, getUtcTimestamp()), ) self.connection.commit() @@ -611,7 +611,7 @@ class DatabaseConnector: raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}") # Add metadata - currentTime = get_utc_timestamp() + currentTime = getUtcTimestamp() if "_createdAt" not in record: record["_createdAt"] = currentTime record["_createdBy"] = self.userId diff --git a/modules/connectors/connectorTicketsClickup.py b/modules/connectors/connectorTicketsClickup.py index 7d92f54a..6d22c93b 100644 --- a/modules/connectors/connectorTicketsClickup.py +++ b/modules/connectors/connectorTicketsClickup.py @@ -32,7 +32,7 @@ class ConnectorTicketClickup(TicketBase): "Content-Type": "application/json", } - async def read_attributes(self) -> list[TicketFieldAttribute]: + async def readAttributes(self) -> list[TicketFieldAttribute]: """Fetch field attributes. Uses list custom fields if listId provided; else basic fields.""" attributes: list[TicketFieldAttribute] = [] try: @@ -65,7 +65,7 @@ class ConnectorTicketClickup(TicketBase): logger.error(f"ClickUp read_attributes error: {e}") return attributes - async def read_tasks(self, *, limit: int = 0) -> list[dict]: + async def readTasks(self, *, limit: int = 0) -> list[dict]: """Read tasks from ClickUp, always returning full task records. If list_id is set, read from that list; otherwise read from team. """ @@ -102,7 +102,7 @@ class ConnectorTicketClickup(TicketBase): logger.error(f"ClickUp read_tasks error: {e}") return tasks - async def write_tasks(self, tasklist: list[dict]) -> None: + async def writeTasks(self, tasklist: list[dict]) -> None: """Update tasks in ClickUp. Expects each item to contain {'ID' or 'id' or 'task_id', 'fields': {...}}""" try: async with aiohttp.ClientSession() as session: diff --git a/modules/connectors/connectorTicketsJira.py b/modules/connectors/connectorTicketsJira.py index eb665036..27fbc676 100644 --- a/modules/connectors/connectorTicketsJira.py +++ b/modules/connectors/connectorTicketsJira.py @@ -29,7 +29,7 @@ class ConnectorTicketJira(TicketBase): self.ticketType = ticketType - async def read_attributes(self) -> list[TicketFieldAttribute]: + async def readAttributes(self) -> list[TicketFieldAttribute]: """ Read field attributes from Jira by querying for a single issue and extracting the field mappings. @@ -130,7 +130,7 @@ class ConnectorTicketJira(TicketBase): logger.error(f"Error while calling fields API: {str(e)}") return [] - async def read_tasks(self, *, limit: int = 0) -> list[dict]: + async def readTasks(self, *, limit: int = 0) -> list[dict]: """ Read tasks from Jira with pagination support. @@ -253,7 +253,7 @@ class ConnectorTicketJira(TicketBase): logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}") raise - async def write_tasks(self, tasklist: list[dict]) -> None: + async def writeTasks(self, tasklist: list[dict]) -> None: """ Write/update tasks to Jira. diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py index 2bf4aff3..5cb43f35 100644 --- a/modules/connectors/connectorVoiceGoogle.py +++ b/modules/connectors/connectorVoiceGoogle.py @@ -26,18 +26,18 @@ class ConnectorGoogleSpeech: """ try: # Get JSON key from config.ini - api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET") + apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET") - if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE": + if not apiKey or apiKey == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE": raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key") # Parse the JSON key and set up authentication try: - credentials_info = json.loads(api_key) + credentialsInfo = json.loads(apiKey) # Create credentials object directly (no file needed!) from google.oauth2 import service_account - credentials = service_account.Credentials.from_service_account_info(credentials_info) + credentials = service_account.Credentials.from_service_account_info(credentialsInfo) logger.info("✅ Using Google Speech credentials from config.ini") @@ -55,8 +55,8 @@ class ConnectorGoogleSpeech: logger.error(f"❌ Failed to initialize Google Cloud clients: {e}") raise - async def speech_to_text(self, audio_content: bytes, language: str = "de-DE", - sample_rate: int = None, channels: int = None) -> Dict: + async def speech_to_text(self, audioContent: bytes, language: str = "de-DE", + sampleRate: int = None, channels: int = None) -> Dict: """ Convert speech to text using Google Cloud Speech-to-Text API. @@ -71,8 +71,8 @@ class ConnectorGoogleSpeech: """ try: # Auto-detect audio format if not provided - if sample_rate is None or channels is None: - validation = self.validate_audio_format(audio_content) + if sampleRate is None or channels is None: + validation = self.validate_audio_format(audioContent) if not validation["valid"]: return { "success": False, @@ -80,59 +80,59 @@ class ConnectorGoogleSpeech: "confidence": 0.0, "error": f"Invalid audio format: {validation.get('error', 'Unknown error')}" } - sample_rate = validation["sample_rate"] + sampleRate = validation["sample_rate"] channels = validation["channels"] - audio_format = validation["format"] - logger.info(f"Auto-detected audio: {audio_format}, {sample_rate}Hz, {channels}ch") + audioFormat = validation["format"] + logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch") logger.info(f"Processing audio with Google Cloud Speech-to-Text") - logger.info(f"Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch") + logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch") # Configure audio settings - audio = speech.RecognitionAudio(content=audio_content) + audio = speech.RecognitionAudio(content=audioContent) # Determine encoding based on detected format # Google Cloud Speech API has specific requirements for different formats - if audio_format == "webm_opus": + if audioFormat == "webm_opus": # For WEBM OPUS, we need to ensure proper format encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS # WEBM_OPUS requires specific sample rate handling - must match header - if sample_rate != 48000: - logger.warning(f"WEBM_OPUS detected but sample rate is {sample_rate}, adjusting to 48000") - sample_rate = 48000 + if sampleRate != 48000: + logger.warning(f"WEBM_OPUS detected but sample rate is {sampleRate}, adjusting to 48000") + sampleRate = 48000 # For WEBM_OPUS, don't specify sample_rate_hertz in config # Google Cloud will read it from the WEBM header - use_sample_rate = False - elif audio_format == "linear16": + useSampleRate = False + elif audioFormat == "linear16": # For LINEAR16 format (PCM) encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 # Ensure sample rate is reasonable - if sample_rate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]: - logger.warning(f"Unusual sample rate {sample_rate}, adjusting to 16000") - sample_rate = 16000 - use_sample_rate = True - elif audio_format == "mp3": + if sampleRate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]: + logger.warning(f"Unusual sample rate {sampleRate}, adjusting to 16000") + sampleRate = 16000 + useSampleRate = True + elif audioFormat == "mp3": # For MP3 format encoding = speech.RecognitionConfig.AudioEncoding.MP3 - use_sample_rate = True - elif audio_format == "flac": + useSampleRate = True + elif audioFormat == "flac": # For FLAC format encoding = speech.RecognitionConfig.AudioEncoding.FLAC - use_sample_rate = True - elif audio_format == "wav": + useSampleRate = True + elif audioFormat == "wav": # For WAV format encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 - use_sample_rate = True + useSampleRate = True else: # For unknown formats, try LINEAR16 as fallback encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 - sample_rate = 16000 # Use standard sample rate + sampleRate = 16000 # Use standard sample rate channels = 1 # Use mono - use_sample_rate = True - logger.warning(f"Unknown audio format '{audio_format}', using LINEAR16 encoding with 16000Hz") + useSampleRate = True + logger.warning(f"Unknown audio format '{audioFormat}', using LINEAR16 encoding with 16000Hz") # Build config based on format requirements - config_params = { + configParams = { "encoding": encoding, "audio_channel_count": channels, "language_code": language, @@ -145,13 +145,13 @@ class ConnectorGoogleSpeech: } # Only add sample_rate_hertz if needed (not for WEBM_OPUS) - if use_sample_rate: - config_params["sample_rate_hertz"] = sample_rate - logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sample_rate}, channels={channels}, language={language}") + if useSampleRate: + configParams["sample_rate_hertz"] = sampleRate + logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sampleRate}, channels={channels}, language={language}") else: logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}") - config = speech.RecognitionConfig(**config_params) + config = speech.RecognitionConfig(**configParams) # Perform speech recognition logger.info("Sending audio to Google Cloud Speech-to-Text...") @@ -162,12 +162,12 @@ class ConnectorGoogleSpeech: response = self.speech_client.recognize(config=config, audio=audio) logger.debug(f"Google Cloud response: {response}") - except Exception as api_error: - logger.error(f"Google Cloud API error: {api_error}") + except Exception as apiError: + logger.error(f"Google Cloud API error: {apiError}") # Try with different encoding as fallback if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16: logger.info("Trying fallback with LINEAR16 encoding...") - fallback_config = speech.RecognitionConfig( + fallbackConfig = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, # Use standard sample rate audio_channel_count=1, @@ -177,13 +177,13 @@ class ConnectorGoogleSpeech: ) try: - response = self.speech_client.recognize(config=fallback_config, audio=audio) + response = self.speech_client.recognize(config=fallbackConfig, audio=audio) logger.debug(f"Google Cloud fallback response: {response}") - except Exception as fallback_error: - logger.error(f"Google Cloud fallback error: {fallback_error}") - raise api_error + except Exception as fallbackError: + logger.error(f"Google Cloud fallback error: {fallbackError}") + raise apiError else: - raise api_error + raise apiError # Process results if response.results: @@ -234,18 +234,18 @@ class ConnectorGoogleSpeech: if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16: # For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts - if audio_format != "webm_opus": + if audioFormat != "webm_opus": # Try LINEAR16 with detected sample rate for non-WEBM formats fallback_configs.append({ "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16, - "sample_rate": sample_rate, + "sample_rate": sampleRate, "channels": channels, "use_sample_rate": True, - "description": f"LINEAR16 with {sample_rate}Hz" + "description": f"LINEAR16 with {sampleRate}Hz" }) # For WEBM_OPUS, only try compatible sample rates or skip sample rate specification - if audio_format == "webm_opus": + if audioFormat == "webm_opus": # Try WEBM_OPUS without sample rate specification (let Google read from header) fallback_configs.append({ "encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS, @@ -273,7 +273,7 @@ class ConnectorGoogleSpeech: else: # For other formats, try standard sample rates for std_rate in [16000, 8000, 22050, 44100]: - if std_rate != sample_rate: + if std_rate != sampleRate: fallback_configs.append({ "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16, "sample_rate": std_rate, diff --git a/modules/datamodels/datamodelChat.py b/modules/datamodels/datamodelChat.py index 93f8bf62..3c03e64b 100644 --- a/modules/datamodels/datamodelChat.py +++ b/modules/datamodels/datamodelChat.py @@ -3,8 +3,8 @@ from typing import List, Dict, Any, Optional from enum import Enum from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp import uuid @@ -26,7 +26,7 @@ class ChatStat(BaseModel): priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation") -register_model_labels( +registerModelLabels( "ChatStat", {"en": "Chat Statistics", "fr": "Statistiques de chat"}, { @@ -51,7 +51,7 @@ class ChatLog(BaseModel): message: str = Field(description="Log message") type: str = Field(description="Log type (info, warning, error, etc.)") timestamp: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="When the log entry was created (UTC timestamp in seconds)", ) status: Optional[str] = Field(None, description="Status of the log entry") @@ -63,7 +63,7 @@ class ChatLog(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatLog", {"en": "Chat Log", "fr": "Journal de chat"}, { @@ -96,7 +96,7 @@ class ChatDocument(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatDocument", {"en": "Chat Document", "fr": "Document de chat"}, { @@ -133,7 +133,7 @@ class ContentMetadata(BaseModel): base64Encoded: bool = Field(description="Whether the data is base64 encoded") -register_model_labels( +registerModelLabels( "ContentMetadata", {"en": "Content Metadata", "fr": "Métadonnées du contenu"}, { @@ -157,7 +157,7 @@ class ContentItem(BaseModel): metadata: ContentMetadata = Field(description="Content metadata") -register_model_labels( +registerModelLabels( "ContentItem", {"en": "Content Item", "fr": "Élément de contenu"}, { @@ -175,7 +175,7 @@ class ChatContentExtracted(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatContentExtracted", {"en": "Extracted Content", "fr": "Contenu extrait"}, { @@ -209,7 +209,7 @@ class ChatMessage(BaseModel): description="Sequence number of the message (set automatically)" ) publishedAt: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="When the message was published (UTC timestamp in seconds)", ) success: Optional[bool] = Field( @@ -235,7 +235,7 @@ class ChatMessage(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatMessage", {"en": "Chat Message", "fr": "Message de chat"}, { @@ -331,14 +331,14 @@ class ChatWorkflow(BaseModel): frontend_required=False, ) lastActivity: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="Timestamp of last activity (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False, ) startedAt: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="When the workflow started (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, @@ -395,7 +395,7 @@ class ChatWorkflow(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatWorkflow", {"en": "Chat Workflow", "fr": "Flux de travail de chat"}, { @@ -426,7 +426,7 @@ class UserInputRequest(BaseModel): userLanguage: str = Field(default="en", description="User's preferred language") -register_model_labels( +registerModelLabels( "UserInputRequest", {"en": "User Input Request", "fr": "Demande de saisie utilisateur"}, { @@ -445,7 +445,7 @@ class ActionDocument(BaseModel): mimeType: str = Field(description="MIME type of the document") -register_model_labels( +registerModelLabels( "ActionDocument", {"en": "Action Document", "fr": "Document d'action"}, { @@ -485,7 +485,7 @@ class ActionResult(BaseModel): return cls(success=False, documents=documents or [], error=error) -register_model_labels( +registerModelLabels( "ActionResult", {"en": "Action Result", "fr": "Résultat de l'action"}, { @@ -504,7 +504,7 @@ class ActionSelection(BaseModel): ) -register_model_labels( +registerModelLabels( "ActionSelection", {"en": "Action Selection", "fr": "Sélection d'action"}, { @@ -520,7 +520,7 @@ class ActionParameters(BaseModel): ) -register_model_labels( +registerModelLabels( "ActionParameters", {"en": "Action Parameters", "fr": "Paramètres d'action"}, { @@ -535,7 +535,7 @@ class ObservationPreview(BaseModel): snippet: str = Field(description="Short snippet or summary") -register_model_labels( +registerModelLabels( "ObservationPreview", {"en": "Observation Preview", "fr": "Aperçu d'observation"}, { @@ -558,7 +558,7 @@ class Observation(BaseModel): ) -register_model_labels( +registerModelLabels( "Observation", {"en": "Observation", "fr": "Observation"}, { @@ -579,7 +579,7 @@ class TaskStatus(str, Enum): CANCELLED = "cancelled" -register_model_labels( +registerModelLabels( "TaskStatus", {"en": "Task Status", "fr": "Statut de la tâche"}, { @@ -599,7 +599,7 @@ class DocumentExchange(BaseModel): ) -register_model_labels( +registerModelLabels( "DocumentExchange", {"en": "Document Exchange", "fr": "Échange de documents"}, { @@ -650,7 +650,7 @@ class ActionItem(BaseModel): self.error = error_message -register_model_labels( +registerModelLabels( "ActionItem", {"en": "Task Action", "fr": "Action de tâche"}, { @@ -683,7 +683,7 @@ class TaskResult(BaseModel): error: Optional[str] = Field(None, description="Error message if task failed") -register_model_labels( +registerModelLabels( "TaskResult", {"en": "Task Result", "fr": "Résultat de tâche"}, { @@ -728,7 +728,7 @@ class TaskItem(BaseModel): ) -register_model_labels( +registerModelLabels( "TaskItem", {"en": "Task", "fr": "Tâche"}, { @@ -758,7 +758,7 @@ class TaskStep(BaseModel): ) -register_model_labels( +registerModelLabels( "TaskStep", {"en": "Task Step", "fr": "Étape de tâche"}, { @@ -805,7 +805,7 @@ class TaskHandover(BaseModel): ) -register_model_labels( +registerModelLabels( "TaskHandover", {"en": "Task Handover", "fr": "Transfert de tâche"}, { @@ -879,7 +879,7 @@ class ReviewResult(BaseModel): ) -register_model_labels( +registerModelLabels( "ReviewResult", {"en": "Review Result", "fr": "Résultat de l'évaluation"}, { @@ -904,7 +904,7 @@ class TaskPlan(BaseModel): ) -register_model_labels( +registerModelLabels( "TaskPlan", {"en": "Task Plan", "fr": "Plan de tâches"}, { @@ -927,7 +927,7 @@ class PromptPlaceholder(BaseModel): ) -register_model_labels( +registerModelLabels( "PromptPlaceholder", {"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"}, { @@ -943,7 +943,7 @@ class PromptBundle(BaseModel): placeholders: List[PromptPlaceholder] = Field(default_factory=list) -register_model_labels( +registerModelLabels( "PromptBundle", {"en": "Prompt Bundle", "fr": "Lot d'invite"}, { diff --git a/modules/datamodels/datamodelDocument.py b/modules/datamodels/datamodelDocument.py index a437b6f1..33472130 100644 --- a/modules/datamodels/datamodelDocument.py +++ b/modules/datamodels/datamodelDocument.py @@ -81,11 +81,11 @@ class StructuredDocument(BaseModel): summary: Optional[str] = Field(default=None, description="Document summary") tags: List[str] = Field(default_factory=list, description="Document tags") - def get_sections_by_type(self, content_type: str) -> List[DocumentSection]: + def getSectionsByType(self, content_type: str) -> List[DocumentSection]: """Get all sections of a specific content type.""" return [section for section in self.sections if section.content_type == content_type] - def get_all_tables(self) -> List[TableData]: + def getAllTables(self) -> List[TableData]: """Get all table data from the document.""" tables = [] for section in self.sections: @@ -94,7 +94,7 @@ class StructuredDocument(BaseModel): tables.append(element) return tables - def get_all_lists(self) -> List[BulletList]: + def getAllLists(self) -> List[BulletList]: """Get all lists from the document.""" lists = [] for section in self.sections: diff --git a/modules/datamodels/datamodelFiles.py b/modules/datamodels/datamodelFiles.py index 6dcb3a0e..e1f802b7 100644 --- a/modules/datamodels/datamodelFiles.py +++ b/modules/datamodels/datamodelFiles.py @@ -2,8 +2,8 @@ from typing import Dict, Any, Optional, Union from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp import uuid import base64 @@ -15,9 +15,9 @@ class FileItem(BaseModel): mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False) fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False) fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False) - creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) -register_model_labels( +registerModelLabels( "FileItem", {"en": "File Item", "fr": "Élément de fichier"}, { @@ -45,7 +45,7 @@ class FilePreview(BaseModel): if isinstance(data.get("content"), bytes): data["content"] = base64.b64encode(data["content"]).decode("utf-8") return data -register_model_labels( +registerModelLabels( "FilePreview", {"en": "File Preview", "fr": "Aperçu du fichier"}, { @@ -62,7 +62,7 @@ class FileData(BaseModel): id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") data: str = Field(description="File data content") base64Encoded: bool = Field(description="Whether the data is base64 encoded") -register_model_labels( +registerModelLabels( "FileData", {"en": "File Data", "fr": "Données de fichier"}, { diff --git a/modules/datamodels/datamodelJson.py b/modules/datamodels/datamodelJson.py new file mode 100644 index 00000000..0af89375 --- /dev/null +++ b/modules/datamodels/datamodelJson.py @@ -0,0 +1,90 @@ +""" +Unified JSON document schema and helpers used by both generation prompts and renderers. + +This defines a single canonical template and the supported section types. +""" + +from typing import List + +# Canonical list of supported section types across the system +supportedSectionTypes: List[str] = [ + "table", + "bullet_list", + "heading", + "paragraph", + "code_block", + "image", +] + +# Canonical JSON template used for AI generation (documents array + sections) +# Rendering pipelines can select the first document and read its sections. +jsonTemplateDocument: str = """{ + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }, + "documents": [ + { + "id": "doc_1", + "title": "{{DOCUMENT_TITLE}}", + "filename": "document.json", + "sections": [ + { + "id": "section_heading_example", + "content_type": "heading", + "elements": [ + {"level": 1, "text": "Heading Text"} + ], + "order": 0 + }, + { + "id": "section_paragraph_example", + "content_type": "paragraph", + "elements": [ + {"text": "Paragraph text content"} + ], + "order": 0 + }, + { + "id": "section_bullet_list_example", + "content_type": "bullet_list", + "elements": [ + { + "items": ["Item 1", "Item 2"] + } + ], + "order": 0 + }, + { + "id": "section_table_example", + "content_type": "table", + "elements": [ + { + "headers": ["Column 1", "Column 2"], + "rows": [ + ["Row 1 Col 1", "Row 1 Col 2"], + ["Row 2 Col 1", "Row 2 Col 2"] + ], + "caption": "Table caption" + } + ], + "order": 0 + }, + { + "id": "section_code_example", + "content_type": "code_block", + "elements": [ + { + "code": "function example() { return true; }", + "language": "javascript" + } + ], + "order": 0 + } + ] + } + ] +}""" + + diff --git a/modules/datamodels/datamodelNeutralizer.py b/modules/datamodels/datamodelNeutralizer.py index 93f751e5..60894dff 100644 --- a/modules/datamodels/datamodelNeutralizer.py +++ b/modules/datamodels/datamodelNeutralizer.py @@ -3,7 +3,7 @@ import uuid from typing import Optional from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels +from modules.shared.attributeUtils import registerModelLabels class DataNeutraliserConfig(BaseModel): @@ -14,7 +14,7 @@ class DataNeutraliserConfig(BaseModel): namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False) sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False) sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False) -register_model_labels( +registerModelLabels( "DataNeutraliserConfig", {"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"}, { @@ -35,7 +35,7 @@ class DataNeutralizerAttributes(BaseModel): originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True) fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True) -register_model_labels( +registerModelLabels( "DataNeutralizerAttributes", {"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"}, { diff --git a/modules/datamodels/datamodelSecurity.py b/modules/datamodels/datamodelSecurity.py index cb629a01..42b9a1ad 100644 --- a/modules/datamodels/datamodelSecurity.py +++ b/modules/datamodels/datamodelSecurity.py @@ -2,8 +2,8 @@ from typing import Optional from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp from .datamodelUam import AuthAuthority from enum import Enum import uuid @@ -51,7 +51,7 @@ class Token(BaseModel): use_enum_values = True -register_model_labels( +registerModelLabels( "Token", {"en": "Token", "fr": "Jeton"}, { @@ -95,7 +95,7 @@ class AuthEvent(BaseModel): frontend_required=True, ) timestamp: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="Unix timestamp when the event occurred", frontend_type="datetime", frontend_readonly=True, @@ -131,7 +131,7 @@ class AuthEvent(BaseModel): ) -register_model_labels( +registerModelLabels( "AuthEvent", {"en": "Authentication Event", "fr": "Événement d'authentification"}, { diff --git a/modules/datamodels/datamodelTickets.py b/modules/datamodels/datamodelTickets.py index 40478bc6..5d800b15 100644 --- a/modules/datamodels/datamodelTickets.py +++ b/modules/datamodels/datamodelTickets.py @@ -11,12 +11,12 @@ class TicketFieldAttribute(BaseModel): class TicketBase(ABC): @abstractmethod - async def read_attributes(self) -> list[TicketFieldAttribute]: ... + async def readAttributes(self) -> list[TicketFieldAttribute]: ... @abstractmethod - async def read_tasks(self, *, limit: int = 0) -> list[dict]: ... + async def readTasks(self, *, limit: int = 0) -> list[dict]: ... @abstractmethod - async def write_tasks(self, tasklist: list[dict]) -> None: ... + async def writeTasks(self, tasklist: list[dict]) -> None: ... diff --git a/modules/datamodels/datamodelUam.py b/modules/datamodels/datamodelUam.py index 747bfc27..0bf71fa9 100644 --- a/modules/datamodels/datamodelUam.py +++ b/modules/datamodels/datamodelUam.py @@ -4,8 +4,8 @@ import uuid from typing import Optional from enum import Enum from pydantic import BaseModel, Field, EmailStr -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp class AuthAuthority(str, Enum): @@ -34,7 +34,7 @@ class Mandate(BaseModel): {"value": "it", "label": {"en": "Italiano", "fr": "Italien"}}, ]) enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) -register_model_labels( +registerModelLabels( "Mandate", {"en": "Mandate", "fr": "Mandat"}, { @@ -62,8 +62,8 @@ class UserConnection(BaseModel): {"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}}, {"value": "pending", "label": {"en": "Pending", "fr": "En attente"}}, ]) - connectedAt: float = Field(default_factory=get_utc_timestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) - lastChecked: float = Field(default_factory=get_utc_timestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + connectedAt: float = Field(default_factory=getUtcTimestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + lastChecked: float = Field(default_factory=getUtcTimestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[ {"value": "active", "label": {"en": "Active", "fr": "Actif"}}, @@ -71,7 +71,7 @@ class UserConnection(BaseModel): {"value": "none", "label": {"en": "None", "fr": "Aucun"}}, ]) tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) -register_model_labels( +registerModelLabels( "UserConnection", {"en": "User Connection", "fr": "Connexion utilisateur"}, { @@ -113,7 +113,7 @@ class User(BaseModel): {"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}}, ]) mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) -register_model_labels( +registerModelLabels( "User", {"en": "User", "fr": "Utilisateur"}, { @@ -131,7 +131,7 @@ register_model_labels( class UserInDB(User): hashedPassword: Optional[str] = Field(None, description="Hash of the user password") -register_model_labels( +registerModelLabels( "UserInDB", {"en": "User Access", "fr": "Accès de l'utilisateur"}, {"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}}, diff --git a/modules/datamodels/datamodelUtils.py b/modules/datamodels/datamodelUtils.py index c928cd47..67a42534 100644 --- a/modules/datamodels/datamodelUtils.py +++ b/modules/datamodels/datamodelUtils.py @@ -1,7 +1,7 @@ """Utility datamodels: Prompt.""" from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels +from modules.shared.attributeUtils import registerModelLabels import uuid @@ -10,7 +10,7 @@ class Prompt(BaseModel): mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True) name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True) -register_model_labels( +registerModelLabels( "Prompt", {"en": "Prompt", "fr": "Invite"}, { diff --git a/modules/datamodels/datamodelVoice.py b/modules/datamodels/datamodelVoice.py index 8be43b58..6ecdd857 100644 --- a/modules/datamodels/datamodelVoice.py +++ b/modules/datamodels/datamodelVoice.py @@ -1,8 +1,8 @@ """Voice settings datamodel.""" from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp import uuid @@ -15,11 +15,11 @@ class VoiceSettings(BaseModel): ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True) translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False) - creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) - lastModified: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + lastModified: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) -register_model_labels( +registerModelLabels( "VoiceSettings", {"en": "Voice Settings", "fr": "Paramètres vocaux"}, { diff --git a/modules/features/neutralizePlayground/mainNeutralizePlayground.py b/modules/features/neutralizePlayground/mainNeutralizePlayground.py index 52d6a7ce..df486c8f 100644 --- a/modules/features/neutralizePlayground/mainNeutralizePlayground.py +++ b/modules/features/neutralizePlayground/mainNeutralizePlayground.py @@ -43,10 +43,6 @@ class NeutralizationPlayground: 'errors': errors, } - async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]: - from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService - processor = SharepointProcessor(self.currentUser, self.services) - return await processor.processSharepointFiles(sourcePath, targetPath) # Cleanup attributes def cleanAttributes(self, fileId: str) -> bool: @@ -77,49 +73,51 @@ class NeutralizationPlayground: } # Additional methods needed by the route - def get_config(self) -> Optional[DataNeutraliserConfig]: + def getConfig(self) -> Optional[DataNeutraliserConfig]: """Get neutralization configuration""" return self.services.neutralization.getConfig() - def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig: + def saveConfig(self, configData: Dict[str, Any]) -> DataNeutraliserConfig: """Save neutralization configuration""" - return self.services.neutralization.saveConfig(config_data) + return self.services.neutralization.saveConfig(configData) - def neutralize_text(self, text: str, file_id: str = None) -> Dict[str, Any]: + def neutralizeText(self, text: str, fileId: str = None) -> Dict[str, Any]: """Neutralize text content""" return self.services.neutralization.processText(text) - def resolve_text(self, text: str) -> str: + def resolveText(self, text: str) -> str: """Resolve UIDs in neutralized text back to original text""" return self.services.neutralization.resolveText(text) - def get_attributes(self, file_id: str = None) -> List[DataNeutralizerAttributes]: + def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]: """Get neutralization attributes, optionally filtered by file ID""" try: - all_attributes = self.services.neutralization.getAttributes() - if file_id: - return [attr for attr in all_attributes if attr.fileId == file_id] - return all_attributes + allAttributes = self.services.neutralization.getAttributes() + if fileId: + return [attr for attr in allAttributes if attr.fileId == fileId] + return allAttributes except Exception as e: logger.error(f"Error getting attributes: {str(e)}") return [] - async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]: + async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]: """Process files from SharePoint source path and store neutralized files in target path""" - return await self.processSharepointFiles(source_path, target_path) + from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService + processor = SharepointProcessor(self.currentUser, self.services) + return await processor.processSharepointFiles(sourcePath, targetPath) - def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]: + def batchNeutralizeFiles(self, filesData: List[Dict[str, Any]]) -> Dict[str, Any]: """Process multiple files for neutralization""" - file_ids = [file_data.get('fileId') for file_data in files_data if file_data.get('fileId')] - return self.processFiles(file_ids) + fileIds = [fileData.get('fileId') for fileData in filesData if fileData.get('fileId')] + return self.processFiles(fileIds) - def get_processing_stats(self) -> Dict[str, Any]: + def getProcessingStats(self) -> Dict[str, Any]: """Get neutralization processing statistics""" return self.getStats() - def cleanup_file_attributes(self, file_id: str) -> bool: + def cleanupFileAttributes(self, fileId: str) -> bool: """Clean up neutralization attributes for a specific file""" - return self.cleanAttributes(file_id) + return self.cleanAttributes(fileId) # Internal SharePoint helper module separated to keep feature logic tidy @@ -208,7 +206,7 @@ class SharepointProcessor: siteUrl, _ = self._parseSharepointPath(sharepointPath) if not siteUrl: return False - siteInfo = await self.services.sharepoint.find_site_by_web_url(siteUrl) + siteInfo = await self.services.sharepoint.findSiteByWebUrl(siteUrl) return siteInfo is not None except Exception: return False @@ -219,17 +217,17 @@ class SharepointProcessor: targetSite, targetFolder = self._parseSharepointPath(targetPath) if not sourceSite or not targetSite: return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']} - sourceSiteInfo = await self.services.sharepoint.find_site_by_web_url(sourceSite) + sourceSiteInfo = await self.services.sharepoint.findSiteByWebUrl(sourceSite) if not sourceSiteInfo: return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']} - targetSiteInfo = await self.services.sharepoint.find_site_by_web_url(targetSite) + targetSiteInfo = await self.services.sharepoint.findSiteByWebUrl(targetSite) if not targetSiteInfo: return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']} logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}") - files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], sourceFolder) + files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], sourceFolder) if not files: logger.warning(f"No files found in folder '{sourceFolder}', trying root folder") - files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], '') + files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], '') if files: folders = [f for f in files if f.get('type') == 'folder'] folderNames = [f.get('name') for f in folders] @@ -251,7 +249,7 @@ class SharepointProcessor: async def _processSingle(fileInfo: Dict[str, Any]): try: - fileContent = await self.services.sharepoint.download_file(sourceSiteInfo['id'], fileInfo['id']) + fileContent = await self.services.sharepoint.downloadFile(sourceSiteInfo['id'], fileInfo['id']) if not fileContent: return {'error': f"Failed to download file: {fileInfo['name']}"} try: @@ -260,7 +258,7 @@ class SharepointProcessor: textContent = fileContent.decode('latin-1') result = self.services.neutralization.processText(textContent) neutralizedFilename = f"neutralized_{fileInfo['name']}" - uploadResult = await self.services.sharepoint.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8')) + uploadResult = await self.services.sharepoint.uploadFile(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8')) if 'error' in uploadResult: return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"} return { diff --git a/modules/features/syncDelta/mainSyncDelta.py b/modules/features/syncDelta/mainSyncDelta.py index fa8c1f93..3fc9e7af 100644 --- a/modules/features/syncDelta/mainSyncDelta.py +++ b/modules/features/syncDelta/mainSyncDelta.py @@ -204,9 +204,9 @@ class ManagerSyncDelta: logger.info( f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}" ) - resolved = await self.services.sharepoint.find_site_by_url( + resolved = await self.services.sharepoint.findSiteByUrl( hostname=self.SHAREPOINT_HOSTNAME, - site_path=self.SHAREPOINT_SITE_PATH + sitePath=self.SHAREPOINT_SITE_PATH ) if not resolved: @@ -223,9 +223,9 @@ class ManagerSyncDelta: # Test site access by listing root of the drive logger.info("Testing site access using resolved site ID...") - test_result = await self.services.sharepoint.list_folder_contents( - site_id=self.targetSite["id"], - folder_path="" + test_result = await self.services.sharepoint.listFolderContents( + siteId=self.targetSite["id"], + folderPath="" ) if test_result is not None: @@ -293,8 +293,8 @@ class ManagerSyncDelta: existing_headers = {"header1": "Header 1", "header2": "Header 2"} try: file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}" - excel_content = await self.services.sharepoint.download_file_by_path( - site_id=self.targetSite['id'], file_path=file_path + excel_content = await self.services.sharepoint.downloadFileByPath( + siteId=self.targetSite['id'], filePath=file_path ) existing_data, existing_headers = self.parseExcelContent(excel_content) except Exception: @@ -307,16 +307,16 @@ class ManagerSyncDelta: await self.backupSharepointFile(filename=sync_file_name) excel_bytes = self.createExcelContent(merged_data, existing_headers) - await self.services.sharepoint.upload_file( - site_id=self.targetSite['id'], - folder_path=self.SHAREPOINT_MAIN_FOLDER, - file_name=sync_file_name, + await self.services.sharepoint.uploadFile( + siteId=self.targetSite['id'], + folderPath=self.SHAREPOINT_MAIN_FOLDER, + fileName=sync_file_name, content=excel_bytes, ) # Import back to tickets try: - excel_content = await self.services.sharepoint.download_file_by_path( - site_id=self.targetSite['id'], file_path=file_path + excel_content = await self.services.sharepoint.downloadFileByPath( + siteId=self.targetSite['id'], filePath=file_path ) excel_rows, _ = self.parseExcelContent(excel_content) self._log_audit_event("SYNC_IMPORT", "INFO", f"Importing {len(excel_rows)} Excel rows back to tickets") @@ -333,8 +333,8 @@ class ManagerSyncDelta: existing_data: list[dict] = [] try: file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}" - csv_content = await self.services.sharepoint.download_file_by_path( - site_id=self.targetSite['id'], file_path=file_path + csv_content = await self.services.sharepoint.downloadFileByPath( + siteId=self.targetSite['id'], filePath=file_path ) csv_lines = csv_content.decode('utf-8').split('\n') if len(csv_lines) >= 2: @@ -348,16 +348,16 @@ class ManagerSyncDelta: await self.backupSharepointFile(filename=sync_file_name) merged_data, _ = self.mergeJiraWithExistingDetailed(data_list, existing_data) csv_bytes = self.createCsvContent(merged_data, existing_headers) - await self.services.sharepoint.upload_file( - site_id=self.targetSite['id'], - folder_path=self.SHAREPOINT_MAIN_FOLDER, - file_name=sync_file_name, + await self.services.sharepoint.uploadFile( + siteId=self.targetSite['id'], + folderPath=self.SHAREPOINT_MAIN_FOLDER, + fileName=sync_file_name, content=csv_bytes, ) # Import from CSV try: - csv_content = await self.services.sharepoint.download_file_by_path( - site_id=self.targetSite['id'], file_path=file_path + csv_content = await self.services.sharepoint.downloadFileByPath( + siteId=self.targetSite['id'], filePath=file_path ) df = pd.read_csv(io.BytesIO(csv_content), skiprows=2, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python') csv_rows = df.to_dict('records') @@ -388,12 +388,12 @@ class ManagerSyncDelta: try: timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y%m%d_%H%M%S") backup_filename = f"backup_{timestamp}_{filename}" - await self.services.sharepoint.copy_file_async( - site_id=self.targetSite['id'], - source_folder=self.SHAREPOINT_MAIN_FOLDER, - source_file=filename, - dest_folder=self.SHAREPOINT_BACKUP_FOLDER, - dest_file=backup_filename, + await self.services.sharepoint.copyFileAsync( + siteId=self.targetSite['id'], + sourceFolder=self.SHAREPOINT_MAIN_FOLDER, + sourceFile=filename, + destFolder=self.SHAREPOINT_BACKUP_FOLDER, + destFile=backup_filename, ) self._log_audit_event("SYNC_BACKUP", "SUCCESS", f"Backed up file: {filename} -> {backup_filename}") return True @@ -679,7 +679,7 @@ class ManagerSyncDelta: connectorType=connectorType, connectorParams=connectorParams, ) - attributes = await ticket_interface.connector_ticket.read_attributes() + attributes = await ticket_interface.connector_ticket.readAttributes() if not attributes: logger.warning("No ticket attributes returned; nothing to write.") return False @@ -713,7 +713,7 @@ class ManagerSyncDelta: connectorType=connectorType, connectorParams=connectorParams, ) - tickets = await ticket_interface.connector_ticket.read_tasks(limit=sampleLimit) + tickets = await ticket_interface.connector_ticket.readTasks(limit=sampleLimit) if not tickets: logger.warning("No tickets returned; nothing to write.") return False diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index e58fa1ef..c6b0e62c 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -54,8 +54,6 @@ class AiObjects: # No need to manually create connectors - they're auto-discovered return cls() - - def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str: """Select the best model using dynamic model selection system.""" # Get available models from the dynamic registry diff --git a/modules/interfaces/interfaceDbAppObjects.py b/modules/interfaces/interfaceDbAppObjects.py index e37c98f8..8da37568 100644 --- a/modules/interfaces/interfaceDbAppObjects.py +++ b/modules/interfaces/interfaceDbAppObjects.py @@ -10,7 +10,7 @@ import uuid from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.shared.configuration import APP_CONFIG -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp from modules.interfaces.interfaceDbAppAccess import AppAccess from modules.datamodels.datamodelUam import ( User, @@ -604,8 +604,8 @@ class AppObjects: externalUsername=externalUsername, externalEmail=externalEmail, status=status, - connectedAt=get_utc_timestamp(), - lastChecked=get_utc_timestamp(), + connectedAt=getUtcTimestamp(), + lastChecked=getUtcTimestamp(), expiresAt=None, # Optional field, set to None by default ) @@ -755,7 +755,7 @@ class AppObjects: if not token.id: token.id = str(uuid.uuid4()) if not token.createdAt: - token.createdAt = get_utc_timestamp() + token.createdAt = getUtcTimestamp() # If replace_existing is True, delete old access tokens for this user and authority first if replace_existing: @@ -822,7 +822,7 @@ class AppObjects: if not token.id: token.id = str(uuid.uuid4()) if not token.createdAt: - token.createdAt = get_utc_timestamp() + token.createdAt = getUtcTimestamp() # Convert to dict and ensure all fields are properly set token_dict = token.model_dump() @@ -932,7 +932,7 @@ class AppObjects: return True tokenUpdate = { "status": TokenStatus.REVOKED, - "revokedAt": get_utc_timestamp(), + "revokedAt": getUtcTimestamp(), "revokedBy": revokedBy, "reason": reason or "revoked", } @@ -970,7 +970,7 @@ class AppObjects: t["id"], { "status": TokenStatus.REVOKED, - "revokedAt": get_utc_timestamp(), + "revokedAt": getUtcTimestamp(), "revokedBy": revokedBy, "reason": reason or "session logout", }, @@ -1008,7 +1008,7 @@ class AppObjects: t["id"], { "status": TokenStatus.REVOKED, - "revokedAt": get_utc_timestamp(), + "revokedAt": getUtcTimestamp(), "revokedBy": revokedBy, "reason": reason or "admin revoke", }, @@ -1022,7 +1022,7 @@ class AppObjects: def cleanupExpiredTokens(self) -> int: """Clean up expired tokens for all connections, returns count of cleaned tokens""" try: - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() cleaned_count = 0 # Get all tokens @@ -1100,7 +1100,7 @@ class AppObjects: # Update existing config update_data = existing_config.model_dump() update_data.update(config_data) - update_data["updatedAt"] = get_utc_timestamp() + update_data["updatedAt"] = getUtcTimestamp() updated_config = DataNeutraliserConfig(**update_data) self.db.recordModify( diff --git a/modules/interfaces/interfaceDbChatObjects.py b/modules/interfaces/interfaceDbChatObjects.py index 92dcccf6..deea239a 100644 --- a/modules/interfaces/interfaceDbChatObjects.py +++ b/modules/interfaces/interfaceDbChatObjects.py @@ -31,7 +31,7 @@ from modules.datamodels.datamodelUam import User # DYNAMIC PART: Connectors to the Interface from modules.connectors.connectorDbPostgre import DatabaseConnector -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp # Basic Configurations from modules.shared.configuration import APP_CONFIG @@ -66,56 +66,56 @@ class ChatObjects: # ===== Generic Utility Methods ===== - def _is_object_field(self, field_type) -> bool: + def _isObjectField(self, fieldType) -> bool: """Check if a field type represents a complex object (not a simple type).""" # Simple scalar types - if field_type in (str, int, float, bool, type(None)): + if fieldType in (str, int, float, bool, type(None)): return False # Everything else is an object return True - def _separate_object_fields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]: + def _separateObjectFields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]: """Separate simple fields from object fields based on Pydantic model structure.""" - simple_fields = {} - object_fields = {} + simpleFields = {} + objectFields = {} # Get field information from the Pydantic model - model_fields = model_class.model_fields + modelFields = model_class.model_fields - for field_name, value in data.items(): + for fieldName, value in data.items(): # Check if this field should be stored as JSONB in the database - if field_name in model_fields: - field_info = model_fields[field_name] + if fieldName in modelFields: + fieldInfo = modelFields[fieldName] # Pydantic v2 only - field_type = field_info.annotation + fieldType = fieldInfo.annotation # Always route relational/object fields to object_fields for separate handling - if field_name in ['documents', 'stats']: - object_fields[field_name] = value + if fieldName in ['documents', 'stats']: + objectFields[fieldName] = value continue # Check if this is a JSONB field (Dict, List, or complex types) - if (field_type == dict or - field_type == list or - (hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or - field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']): + if (fieldType == dict or + fieldType == list or + (hasattr(fieldType, '__origin__') and fieldType.__origin__ in (dict, list)) or + fieldName in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']): # Store as JSONB - include in simple_fields for database storage - simple_fields[field_name] = value + simpleFields[fieldName] = value elif isinstance(value, (str, int, float, bool, type(None))): # Simple scalar types - simple_fields[field_name] = value + simpleFields[fieldName] = value else: # Complex objects that should be filtered out - object_fields[field_name] = value + objectFields[fieldName] = value else: # Field not in model - treat as scalar if simple, otherwise filter out if isinstance(value, (str, int, float, bool, type(None))): - simple_fields[field_name] = value + simpleFields[fieldName] = value else: - object_fields[field_name] = value + objectFields[fieldName] = value - return simple_fields, object_fields + return simpleFields, objectFields def _initializeServices(self): pass @@ -240,8 +240,8 @@ class ChatObjects: currentAction=workflow.get("currentAction", 0), totalTasks=workflow.get("totalTasks", 0), totalActions=workflow.get("totalActions", 0), - lastActivity=workflow.get("lastActivity", get_utc_timestamp()), - startedAt=workflow.get("startedAt", get_utc_timestamp()), + lastActivity=workflow.get("lastActivity", getUtcTimestamp()), + startedAt=workflow.get("startedAt", getUtcTimestamp()), logs=logs, messages=messages, stats=stats, @@ -257,7 +257,7 @@ class ChatObjects: raise PermissionError("No permission to create workflows") # Set timestamp if not present - currentTime = get_utc_timestamp() + currentTime = getUtcTimestamp() if "startedAt" not in workflowData: workflowData["startedAt"] = currentTime @@ -265,10 +265,10 @@ class ChatObjects: workflowData["lastActivity"] = currentTime # Use generic field separation based on ChatWorkflow model - simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) + simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData) # Create workflow in database - created = self.db.recordCreate(ChatWorkflow, simple_fields) + created = self.db.recordCreate(ChatWorkflow, simpleFields) # Convert to ChatWorkflow model (empty related data for new workflow) @@ -302,13 +302,13 @@ class ChatObjects: raise PermissionError(f"No permission to update workflow {workflowId}") # Use generic field separation based on ChatWorkflow model - simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) + simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData) # Set update time for main workflow - simple_fields["lastActivity"] = get_utc_timestamp() + simpleFields["lastActivity"] = getUtcTimestamp() # Update main workflow in database - updated = self.db.recordModify(ChatWorkflow, workflowId, simple_fields) + updated = self.db.recordModify(ChatWorkflow, workflowId, simpleFields) # Removed cascade writes for logs/messages/stats during workflow update. # CUD for child entities must be executed via dedicated service methods. @@ -423,7 +423,7 @@ class ChatObjects: role=msg.get("role", "assistant"), status=msg.get("status", "step"), sequenceNr=msg.get("sequenceNr", 0), - publishedAt=msg.get("publishedAt", get_utc_timestamp()), + publishedAt=msg.get("publishedAt", getUtcTimestamp()), success=msg.get("success"), actionId=msg.get("actionId"), actionMethod=msg.get("actionMethod"), @@ -490,20 +490,30 @@ class ChatObjects: messageData["actionNumber"] = workflow.currentAction # Use generic field separation based on ChatMessage model - simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) + simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData) # Handle documents separately - they will be stored in normalized documents table - documents_to_create = object_fields.get("documents", []) + documents_to_create = objectFields.get("documents", []) # Create message in normalized table using only simple fields - createdMessage = self.db.recordCreate(ChatMessage, simple_fields) + createdMessage = self.db.recordCreate(ChatMessage, simpleFields) # Create documents in normalized documents table created_documents = [] for doc_data in documents_to_create: - # Use the document data directly - doc_dict = doc_data + # Normalize to plain dict before assignment + if isinstance(doc_data, ChatDocument): + doc_dict = doc_data.model_dump() + elif isinstance(doc_data, dict): + doc_dict = dict(doc_data) + else: + # Attempt to coerce to ChatDocument then dump + try: + doc_dict = ChatDocument(**doc_data).model_dump() + except Exception: + logger.error("Invalid document data type for message creation") + continue doc_dict["messageId"] = createdMessage["id"] created_doc = self.createDocument(doc_dict) @@ -522,8 +532,8 @@ class ChatObjects: role=createdMessage.get("role", "assistant"), status=createdMessage.get("status", "step"), sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number - publishedAt=createdMessage.get("publishedAt", get_utc_timestamp()), - stats=object_fields.get("stats"), # Use stats from object_fields + publishedAt=createdMessage.get("publishedAt", getUtcTimestamp()), + stats=objectFields.get("stats"), # Use stats from objectFields roundNumber=createdMessage.get("roundNumber"), taskNumber=createdMessage.get("taskNumber"), actionNumber=createdMessage.get("actionNumber"), @@ -588,31 +598,41 @@ class ChatObjects: raise PermissionError(f"No permission to modify workflow {workflowId}") # Use generic field separation based on ChatMessage model - simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) + simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData) # Ensure required fields present for key in ["role", "agentName"]: - if key not in simple_fields and key not in existingMessage: - simple_fields[key] = "assistant" if key == "role" else "" + if key not in simpleFields and key not in existingMessage: + simpleFields[key] = "assistant" if key == "role" else "" # Ensure ID is in the dataset - if 'id' not in simple_fields: - simple_fields['id'] = messageId + if 'id' not in simpleFields: + simpleFields['id'] = messageId # Convert createdAt to startedAt if needed - if "createdAt" in simple_fields and "startedAt" not in simple_fields: - simple_fields["startedAt"] = simple_fields["createdAt"] - del simple_fields["createdAt"] + if "createdAt" in simpleFields and "startedAt" not in simpleFields: + simpleFields["startedAt"] = simpleFields["createdAt"] + del simpleFields["createdAt"] # Update the message with simple fields only - updatedMessage = self.db.recordModify(ChatMessage, messageId, simple_fields) + updatedMessage = self.db.recordModify(ChatMessage, messageId, simpleFields) # Handle object field updates (documents, stats) inline - if 'documents' in object_fields: - documents_data = object_fields['documents'] + if 'documents' in objectFields: + documents_data = objectFields['documents'] try: for doc_data in documents_data: - doc_dict = doc_data + # Normalize to dict before mutation + if isinstance(doc_data, ChatDocument): + doc_dict = doc_data.model_dump() + elif isinstance(doc_data, dict): + doc_dict = dict(doc_data) + else: + try: + doc_dict = ChatDocument(**doc_data).model_dump() + except Exception: + logger.error("Invalid document data type for message update") + continue doc_dict["messageId"] = messageId self.createDocument(doc_dict) except Exception as e: @@ -732,11 +752,9 @@ class ChatObjects: def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument: """Creates a document for a message in normalized table.""" try: - # Validate document data + # Validate and normalize document data to dict document = ChatDocument(**documentData) - - # Create document in normalized table - created = self.db.recordCreate(ChatDocument, document) + created = self.db.recordCreate(ChatDocument, document.model_dump()) return ChatDocument(**created) @@ -785,7 +803,7 @@ class ChatObjects: # Make sure required fields are present if "timestamp" not in logData: - logData["timestamp"] = get_utc_timestamp() + logData["timestamp"] = getUtcTimestamp() # Add status information if not present if "status" not in logData and "type" in logData: @@ -882,7 +900,7 @@ class ChatObjects: messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId}) for msg in messages: # Apply timestamp filtering in Python - msg_timestamp = msg.get("publishedAt", get_utc_timestamp()) + msg_timestamp = msg.get("publishedAt", getUtcTimestamp()) if afterTimestamp is not None and msg_timestamp <= afterTimestamp: continue @@ -900,7 +918,7 @@ class ChatObjects: role=msg.get("role", "assistant"), status=msg.get("status", "step"), sequenceNr=msg.get("sequenceNr", 0), - publishedAt=msg.get("publishedAt", get_utc_timestamp()), + publishedAt=msg.get("publishedAt", getUtcTimestamp()), success=msg.get("success"), actionId=msg.get("actionId"), actionMethod=msg.get("actionMethod"), @@ -923,7 +941,7 @@ class ChatObjects: logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) for log in logs: # Apply timestamp filtering in Python - log_timestamp = log.get("timestamp", get_utc_timestamp()) + log_timestamp = log.get("timestamp", getUtcTimestamp()) if afterTimestamp is not None and log_timestamp <= afterTimestamp: continue @@ -938,7 +956,7 @@ class ChatObjects: stats = self.getStats(workflowId) for stat in stats: # Apply timestamp filtering in Python - stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else get_utc_timestamp() + stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else getUtcTimestamp() if afterTimestamp is not None and stat_timestamp <= afterTimestamp: continue diff --git a/modules/interfaces/interfaceDbComponentObjects.py b/modules/interfaces/interfaceDbComponentObjects.py index d2a74e69..20e7aae1 100644 --- a/modules/interfaces/interfaceDbComponentObjects.py +++ b/modules/interfaces/interfaceDbComponentObjects.py @@ -16,7 +16,7 @@ from modules.datamodels.datamodelUtils import Prompt from modules.datamodels.datamodelVoice import VoiceSettings from modules.datamodels.datamodelUam import User, Mandate from modules.shared.configuration import APP_CONFIG -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp logger = logging.getLogger(__name__) @@ -466,7 +466,7 @@ class ComponentObjects: # Ensure proper values, use defaults for invalid data creationDate = file.get("creationDate") if creationDate is None or not isinstance(creationDate, (int, float)) or creationDate <= 0: - creationDate = get_utc_timestamp() + creationDate = getUtcTimestamp() fileName = file.get("fileName") if not fileName or fileName == "None": @@ -503,7 +503,7 @@ class ComponentObjects: # Get creation date from record or use current time creationDate = file.get("creationDate") if not creationDate: - creationDate = get_utc_timestamp() + creationDate = getUtcTimestamp() return FileItem( id=file.get("id"), @@ -881,9 +881,9 @@ class ComponentObjects: # Ensure timestamps are set for validation settings_data = filteredSettings[0] if not settings_data.get("creationDate"): - settings_data["creationDate"] = get_utc_timestamp() + settings_data["creationDate"] = getUtcTimestamp() if not settings_data.get("lastModified"): - settings_data["lastModified"] = get_utc_timestamp() + settings_data["lastModified"] = getUtcTimestamp() return VoiceSettings(**settings_data) @@ -931,7 +931,7 @@ class ComponentObjects: raise ValueError(f"Voice settings not found for user {userId}") # Update lastModified timestamp - updateData["lastModified"] = get_utc_timestamp() + updateData["lastModified"] = getUtcTimestamp() # Update voice settings record success = self.db.recordModify(VoiceSettings, existingSettings.id, updateData) diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index b7a090fa..8c76d455 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -31,7 +31,7 @@ class TicketInterface: self.task_sync_definition = task_sync_definition async def exportTicketsAsList(self) -> list[dict]: - tickets: list[dict] = await self.connector_ticket.read_tasks(limit=0) + tickets: list[dict] = await self.connector_ticket.readTasks(limit=0) transformed_tasks = self._transformTicketRecords(tickets, includePut=True) # Return plain dictionaries filtered by presence of ID rows: list[dict] = [] @@ -57,7 +57,7 @@ class TicketInterface: if fields: updates.append({"ID": task_id, "fields": fields}) if updates: - await self.connector_ticket.write_tasks(updates) + await self.connector_ticket.writeTasks(updates) def _transformTicketRecords( self, tasks: list[dict], includePut: bool = False diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py index a1d0b96c..2bb1b729 100644 --- a/modules/interfaces/interfaceVoiceObjects.py +++ b/modules/interfaces/interfaceVoiceObjects.py @@ -10,7 +10,7 @@ from typing import Dict, Any, Optional, List from modules.connectors.connectorVoiceGoogle import ConnectorGoogleSpeech from modules.datamodels.datamodelVoice import VoiceSettings from modules.datamodels.datamodelUam import User -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp logger = logging.getLogger(__name__) @@ -269,7 +269,7 @@ class VoiceObjects: logger.info(f"Creating voice settings: {settingsData}") # Add timestamps - currentTime = get_utc_timestamp() + currentTime = getUtcTimestamp() settingsData["creationDate"] = currentTime settingsData["lastModified"] = currentTime @@ -298,7 +298,7 @@ class VoiceObjects: logger.info(f"Updating voice settings for user {userId}: {settingsData}") # Add last modified timestamp - settingsData["lastModified"] = get_utc_timestamp() + settingsData["lastModified"] = getUtcTimestamp() # Create updated VoiceSettings object voiceSettings = VoiceSettings(**settingsData) diff --git a/modules/routes/routeDataConnections.py b/modules/routes/routeDataConnections.py index 1c49fa13..eec8d140 100644 --- a/modules/routes/routeDataConnections.py +++ b/modules/routes/routeDataConnections.py @@ -18,7 +18,7 @@ from modules.datamodels.datamodelUam import User, UserConnection, AuthAuthority, from modules.datamodels.datamodelSecurity import Token from modules.security.auth import getCurrentUser, limiter from modules.interfaces.interfaceDbAppObjects import getInterface -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp # Configure logger logger = logging.getLogger(__name__) @@ -64,7 +64,7 @@ def get_token_status_for_connection(interface, connection_id: str) -> tuple[str, if not expires_at: return "none", None - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() # Add 5 minute buffer for proactive refresh buffer_time = 5 * 60 # 5 minutes in seconds @@ -247,7 +247,7 @@ async def update_connection( setattr(connection, field, value) # Update lastChecked timestamp using UTC timestamp - connection.lastChecked = get_utc_timestamp() + connection.lastChecked = getUtcTimestamp() # Update connection - models now handle timestamp serialization automatically interface.db.recordModify(UserConnection, connectionId, connection.model_dump()) @@ -382,7 +382,7 @@ async def disconnect_service( # Update connection status connection.status = ConnectionStatus.INACTIVE - connection.lastChecked = get_utc_timestamp() + connection.lastChecked = getUtcTimestamp() # Update connection record - models now handle timestamp serialization automatically interface.db.recordModify(UserConnection, connectionId, connection.model_dump()) diff --git a/modules/routes/routeDataNeutralization.py b/modules/routes/routeDataNeutralization.py index 61e8c25d..07d6c025 100644 --- a/modules/routes/routeDataNeutralization.py +++ b/modules/routes/routeDataNeutralization.py @@ -35,7 +35,7 @@ async def get_neutralization_config( """Get data neutralization configuration""" try: service = NeutralizationPlayground(currentUser) - config = service.get_config() + config = service.getConfig() if not config: # Return default config instead of 404 @@ -69,7 +69,7 @@ async def save_neutralization_config( """Save or update data neutralization configuration""" try: service = NeutralizationPlayground(currentUser) - config = service.save_config(config_data) + config = service.saveConfig(config_data) return config @@ -99,7 +99,7 @@ async def neutralize_text( ) service = NeutralizationPlayground(currentUser) - result = service.neutralize_text(text, file_id) + result = service.neutralizeText(text, file_id) return result @@ -130,7 +130,7 @@ async def resolve_text( ) service = NeutralizationPlayground(currentUser) - resolved_text = service.resolve_text(text) + resolved_text = service.resolveText(text) return {"resolved_text": resolved_text} @@ -153,7 +153,7 @@ async def get_neutralization_attributes( """Get neutralization attributes, optionally filtered by file ID""" try: service = NeutralizationPlayground(currentUser) - attributes = service.get_attributes(fileId) + attributes = service.getAttributes(fileId) return attributes @@ -183,7 +183,7 @@ async def process_sharepoint_files( ) service = NeutralizationPlayground(currentUser) - result = await service.process_sharepoint_files(source_path, target_path) + result = await service.processSharepointFiles(source_path, target_path) return result @@ -212,7 +212,7 @@ async def batch_process_files( ) service = NeutralizationPlayground(currentUser) - result = service.batch_neutralize_files(files_data) + result = service.batchNeutralizeFiles(files_data) return result @@ -234,7 +234,7 @@ async def get_neutralization_stats( """Get neutralization processing statistics""" try: service = NeutralizationPlayground(currentUser) - stats = service.get_processing_stats() + stats = service.getProcessingStats() return stats @@ -255,7 +255,7 @@ async def cleanup_file_attributes( """Clean up neutralization attributes for a specific file""" try: service = NeutralizationPlayground(currentUser) - success = service.cleanup_file_attributes(fileId) + success = service.cleanupFileAttributes(fileId) if success: return {"message": f"Successfully cleaned up attributes for file {fileId}"} diff --git a/modules/routes/routeDataUsers.py b/modules/routes/routeDataUsers.py index 7a0ea735..7b7e627f 100644 --- a/modules/routes/routeDataUsers.py +++ b/modules/routes/routeDataUsers.py @@ -181,9 +181,9 @@ async def reset_user_password( # Log password reset try: from modules.shared.auditLogger import audit_logger - audit_logger.log_security_event( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logSecurityEvent( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="password_reset", details=f"Reset password for user {userId}" ) @@ -257,9 +257,9 @@ async def change_password( # Log password change try: from modules.shared.auditLogger import audit_logger - audit_logger.log_security_event( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logSecurityEvent( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="password_change", details="User changed their own password" ) diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py index 8177da8d..fbd9a445 100644 --- a/modules/routes/routeSecurityGoogle.py +++ b/modules/routes/routeSecurityGoogle.py @@ -14,7 +14,7 @@ from modules.shared.configuration import APP_CONFIG from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection from modules.security.auth import getCurrentUser, limiter -from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp +from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp # Configure logger logger = logging.getLogger(__name__) @@ -356,7 +356,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenRefresh=token_response.get("refresh_token", ""), tokenType="bearer", expiresAt=jwt_expires_at.timestamp(), - createdAt=get_utc_timestamp() + createdAt=getUtcTimestamp() ) # Save access token (no connectionId) @@ -460,8 +460,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse logger.info(f"Updating connection {connection_id} for user {user.username}") # Update connection with external service details connection.status = ConnectionStatus.ACTIVE - connection.lastChecked = get_utc_timestamp() - connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0) + connection.lastChecked = getUtcTimestamp() + connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0) connection.externalId = user_info.get("id") connection.externalUsername = user_info.get("email") connection.externalEmail = user_info.get("email") @@ -479,8 +479,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenAccess=token_response["access_token"], tokenRefresh=token_response.get("refresh_token", ""), tokenType=token_response.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)), - createdAt=get_utc_timestamp() + expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)), + createdAt=getUtcTimestamp() ) interface.saveConnectionToken(token) @@ -498,8 +498,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse id: '{connection.id}', status: 'connected', type: 'google', - lastChecked: {get_utc_timestamp()}, - expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))} + lastChecked: {getUtcTimestamp()}, + expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))} }} }}, '*'); // Wait for message to be sent before closing @@ -592,11 +592,11 @@ async def logout( # Log successful logout try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logUserAccess( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="logout", - success_info="google_auth_logout" + successInfo="google_auth_logout" ) except Exception: # Don't fail if audit logging fails @@ -726,12 +726,12 @@ async def refresh_token( # Update the connection status and timing google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt - google_connection.lastChecked = get_utc_timestamp() + google_connection.lastChecked = getUtcTimestamp() google_connection.status = ConnectionStatus.ACTIVE appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.model_dump()) # Calculate time until expiration - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0 return { diff --git a/modules/routes/routeSecurityLocal.py b/modules/routes/routeSecurityLocal.py index d705d5c6..0a2fff71 100644 --- a/modules/routes/routeSecurityLocal.py +++ b/modules/routes/routeSecurityLocal.py @@ -131,11 +131,11 @@ async def login( # Log successful login try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id=str(user.id), - mandate_id=str(user.mandateId), + audit_logger.logUserAccess( + userId=str(user.id), + mandateId=str(user.mandateId), action="login", - success_info="local_auth_success" + successInfo="local_auth_success" ) except Exception: # Don't fail if audit logging fails @@ -159,11 +159,11 @@ async def login( # Log failed login attempt try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id="unknown", - mandate_id="unknown", + audit_logger.logUserAccess( + userId="unknown", + mandateId="unknown", action="login", - success_info=f"failed: {error_msg}" + successInfo=f"failed: {error_msg}" ) except Exception: # Don't fail if audit logging fails @@ -367,11 +367,11 @@ async def logout(request: Request, response: Response, currentUser: User = Depen # Log successful logout try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logUserAccess( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="logout", - success_info=f"revoked_tokens: {revoked}" + successInfo=f"revoked_tokens: {revoked}" ) except Exception: # Don't fail if audit logging fails diff --git a/modules/routes/routeSecurityMsft.py b/modules/routes/routeSecurityMsft.py index 6c16c37e..30c5d33e 100644 --- a/modules/routes/routeSecurityMsft.py +++ b/modules/routes/routeSecurityMsft.py @@ -16,7 +16,7 @@ from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatu from modules.datamodels.datamodelSecurity import Token from modules.security.auth import getCurrentUser, limiter from modules.security.jwtService import createAccessToken -from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp +from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp # Configure logger logger = logging.getLogger(__name__) @@ -199,8 +199,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenAccess=token_response["access_token"], tokenRefresh=token_response.get("refresh_token", ""), tokenType=token_response.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)), - createdAt=get_utc_timestamp() + expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)), + createdAt=getUtcTimestamp() ) # Save access token (no connectionId) @@ -225,7 +225,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenAccess=jwt_token, tokenType="bearer", expiresAt=jwt_expires_at.timestamp(), - createdAt=get_utc_timestamp() + createdAt=getUtcTimestamp() ) # Save JWT access token @@ -332,8 +332,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse logger.info(f"Updating connection {connection_id} for user {user.username}") # Update connection with external service details connection.status = ConnectionStatus.ACTIVE - connection.lastChecked = get_utc_timestamp() - connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0) + connection.lastChecked = getUtcTimestamp() + connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0) connection.externalId = user_info.get("id") connection.externalUsername = user_info.get("userPrincipalName") connection.externalEmail = user_info.get("mail") @@ -351,8 +351,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenAccess=token_response["access_token"], tokenRefresh=token_response.get("refresh_token", ""), tokenType=token_response.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)), - createdAt=get_utc_timestamp() + expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)), + createdAt=getUtcTimestamp() ) @@ -373,8 +373,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse id: '{connection.id}', status: 'connected', type: 'msft', - lastChecked: {get_utc_timestamp()}, - expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))} + lastChecked: {getUtcTimestamp()}, + expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))} }} }}, '*'); // Wait for message to be sent before closing @@ -467,11 +467,11 @@ async def logout( # Log successful logout try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logUserAccess( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="logout", - success_info="microsoft_auth_logout" + successInfo="microsoft_auth_logout" ) except Exception: # Don't fail if audit logging fails @@ -575,27 +575,27 @@ async def refresh_token( from modules.security.tokenManager import TokenManager token_manager = TokenManager() - refreshed_token = token_manager.refresh_token(current_token) - if refreshed_token: + refreshedToken = token_manager.refreshToken(current_token) + if refreshedToken: # Save the new connection token (which will automatically replace old ones) - appInterface.saveConnectionToken(refreshed_token) + appInterface.saveConnectionToken(refreshedToken) # Update the connection's expiration time - msft_connection.expiresAt = float(refreshed_token.expiresAt) - msft_connection.lastChecked = get_utc_timestamp() + msft_connection.expiresAt = float(refreshedToken.expiresAt) + msft_connection.lastChecked = getUtcTimestamp() msft_connection.status = ConnectionStatus.ACTIVE # Save updated connection appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.model_dump()) # Calculate time until expiration - current_time = get_utc_timestamp() - expires_in = int(refreshed_token.expiresAt - current_time) + current_time = getUtcTimestamp() + expiresIn = int(refreshedToken.expiresAt - current_time) return { "message": "Token refreshed successfully", - "expires_at": refreshed_token.expiresAt, - "expires_in_seconds": expires_in + "expires_at": refreshedToken.expiresAt, + "expires_in_seconds": expiresIn } else: raise HTTPException( diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py index 17c6e73d..0e1b009f 100644 --- a/modules/routes/routeVoiceGoogle.py +++ b/modules/routes/routeVoiceGoogle.py @@ -18,26 +18,26 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix="/voice-google", tags=["Voice Google"]) # Store active WebSocket connections -active_connections: Dict[str, WebSocket] = {} +activeConnections: Dict[str, WebSocket] = {} class ConnectionManager: def __init__(self): - self.active_connections: List[WebSocket] = [] + self.activeConnections: List[WebSocket] = [] - async def connect(self, websocket: WebSocket, connection_id: str): + async def connect(self, websocket: WebSocket, connectionId: str): await websocket.accept() - self.active_connections.append(websocket) - active_connections[connection_id] = websocket - logger.info(f"WebSocket connected: {connection_id}") + self.activeConnections.append(websocket) + activeConnections[connectionId] = websocket + logger.info(f"WebSocket connected: {connectionId}") - def disconnect(self, websocket: WebSocket, connection_id: str): - if websocket in self.active_connections: - self.active_connections.remove(websocket) - if connection_id in active_connections: - del active_connections[connection_id] - logger.info(f"WebSocket disconnected: {connection_id}") + def disconnect(self, websocket: WebSocket, connectionId: str): + if websocket in self.activeConnections: + self.activeConnections.remove(websocket) + if connectionId in activeConnections: + del activeConnections[connectionId] + logger.info(f"WebSocket disconnected: {connectionId}") - async def send_personal_message(self, message: dict, websocket: WebSocket): + async def sendPersonalMessage(self, message: dict, websocket: WebSocket): try: await websocket.send_text(json.dumps(message)) except Exception as e: @@ -45,10 +45,10 @@ class ConnectionManager: manager = ConnectionManager() -def get_voice_interface(current_user: User) -> VoiceObjects: +def _getVoiceInterface(currentUser: User) -> VoiceObjects: """Get voice interface instance with user context.""" try: - return getVoiceInterface(current_user) + return getVoiceInterface(currentUser) except Exception as e: logger.error(f"Failed to initialize voice interface: {e}") raise HTTPException( @@ -58,23 +58,23 @@ def get_voice_interface(current_user: User) -> VoiceObjects: @router.post("/speech-to-text") async def speech_to_text( - audio_file: UploadFile = File(...), + audioFile: UploadFile = File(...), language: str = Form("de-DE"), - current_user: User = Depends(getCurrentUser) + currentUser: User = Depends(getCurrentUser) ): """Convert speech to text using Google Cloud Speech-to-Text API.""" try: - logger.info(f"🎤 Speech-to-text request: {audio_file.filename}, language: {language}") + logger.info(f"🎤 Speech-to-text request: {audioFile.filename}, language: {language}") # Read audio file - audio_content = await audio_file.read() - logger.info(f"📊 Audio file size: {len(audio_content)} bytes") + audioContent = await audioFile.read() + logger.info(f"📊 Audio file size: {len(audioContent)} bytes") # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Validate audio format - validation = voice_interface.validateAudioFormat(audio_content) + validation = voiceInterface.validateAudioFormat(audioContent) if not validation["valid"]: raise HTTPException( @@ -83,8 +83,8 @@ async def speech_to_text( ) # Perform speech recognition - result = await voice_interface.speechToText( - audioContent=audio_content, + result = await voiceInterface.speechToText( + audioContent=audioContent, language=language ) @@ -95,7 +95,7 @@ async def speech_to_text( "confidence": result["confidence"], "language": result["language"], "audio_info": { - "size": len(audio_content), + "size": len(audioContent), "format": validation["format"], "estimated_duration": validation.get("estimated_duration", 0) } @@ -118,13 +118,13 @@ async def speech_to_text( @router.post("/translate") async def translate_text( text: str = Form(...), - source_language: str = Form("de"), - target_language: str = Form("en"), - current_user: User = Depends(getCurrentUser) + sourceLanguage: str = Form("de"), + targetLanguage: str = Form("en"), + currentUser: User = Depends(getCurrentUser) ): """Translate text using Google Cloud Translation API.""" try: - logger.info(f"🌐 Translation request: '{text}' ({source_language} -> {target_language})") + logger.info(f"🌐 Translation request: '{text}' ({sourceLanguage} -> {targetLanguage})") if not text.strip(): raise HTTPException( @@ -133,13 +133,13 @@ async def translate_text( ) # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Perform translation - result = await voice_interface.translateText( + result = await voiceInterface.translateText( text=text, - sourceLanguage=source_language, - targetLanguage=target_language + sourceLanguage=sourceLanguage, + targetLanguage=targetLanguage ) if result["success"]: @@ -167,21 +167,21 @@ async def translate_text( @router.post("/realtime-interpreter") async def realtime_interpreter( - audio_file: UploadFile = File(...), - from_language: str = Form("de-DE"), - to_language: str = Form("en-US"), - connection_id: str = Form(None), - current_user: User = Depends(getCurrentUser) + audioFile: UploadFile = File(...), + fromLanguage: str = Form("de-DE"), + toLanguage: str = Form("en-US"), + connectionId: str = Form(None), + currentUser: User = Depends(getCurrentUser) ): """Real-time interpreter: speech to translated text using Google Cloud APIs.""" try: - logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}") - logger.info(f" From: {from_language} -> To: {to_language}") - logger.info(f" MIME type: {audio_file.content_type}") + logger.info(f"🔄 Real-time interpreter request: {audioFile.filename}") + logger.info(f" From: {fromLanguage} -> To: {toLanguage}") + logger.info(f" MIME type: {audioFile.content_type}") # Read audio file - audio_content = await audio_file.read() - logger.info(f"📊 Audio file size: {len(audio_content)} bytes") + audioContent = await audioFile.read() + logger.info(f"📊 Audio file size: {len(audioContent)} bytes") # Save audio file for debugging with correct extension # file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav" @@ -192,10 +192,10 @@ async def realtime_interpreter( # logger.info(f"💾 Saved audio file for debugging: {debug_filename}") # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Validate audio format - validation = voice_interface.validateAudioFormat(audio_content) + validation = voiceInterface.validateAudioFormat(audioContent) if not validation["valid"]: raise HTTPException( @@ -204,10 +204,10 @@ async def realtime_interpreter( ) # Perform complete pipeline: Speech-to-Text + Translation - result = await voice_interface.speechToTranslatedText( - audioContent=audio_content, - fromLanguage=from_language, - toLanguage=to_language + result = await voiceInterface.speechToTranslatedText( + audioContent=audioContent, + fromLanguage=fromLanguage, + toLanguage=toLanguage ) if result["success"]: @@ -223,7 +223,7 @@ async def realtime_interpreter( "source_language": result["source_language"], "target_language": result["target_language"], "audio_info": { - "size": len(audio_content), + "size": len(audioContent), "format": validation["format"], "estimated_duration": validation.get("estimated_duration", 0) } @@ -249,7 +249,7 @@ async def text_to_speech( text: str = Form(...), language: str = Form("de-DE"), voice: str = Form(None), - current_user: User = Depends(getCurrentUser) + currentUser: User = Depends(getCurrentUser) ): """Convert text to speech using Google Cloud Text-to-Speech.""" try: @@ -261,8 +261,8 @@ async def text_to_speech( detail="Empty text provided for text-to-speech" ) - voice_interface = get_voice_interface(current_user) - result = await voice_interface.textToSpeech( + voiceInterface = _getVoiceInterface(currentUser) + result = await voiceInterface.textToSpeech( text=text, languageCode=language, voiceName=voice @@ -294,13 +294,13 @@ async def text_to_speech( ) @router.get("/languages") -async def get_available_languages(current_user: User = Depends(getCurrentUser)): +async def get_available_languages(currentUser: User = Depends(getCurrentUser)): """Get available languages from Google Cloud Text-to-Speech.""" try: logger.info("🌐 Getting available languages from Google Cloud TTS") - voice_interface = get_voice_interface(current_user) - result = await voice_interface.getAvailableLanguages() + voiceInterface = _getVoiceInterface(currentUser) + result = await voiceInterface.getAvailableLanguages() if result["success"]: return { @@ -324,21 +324,21 @@ async def get_available_languages(current_user: User = Depends(getCurrentUser)): @router.get("/voices") async def get_available_voices( - language_code: Optional[str] = None, - current_user: User = Depends(getCurrentUser) + languageCode: Optional[str] = None, + currentUser: User = Depends(getCurrentUser) ): """Get available voices from Google Cloud Text-to-Speech.""" try: - logger.info(f"🎤 Getting available voices, language filter: {language_code}") + logger.info(f"🎤 Getting available voices, language filter: {languageCode}") - voice_interface = get_voice_interface(current_user) - result = await voice_interface.getAvailableVoices(languageCode=language_code) + voiceInterface = _getVoiceInterface(currentUser) + result = await voiceInterface.getAvailableVoices(languageCode=languageCode) if result["success"]: return { "success": True, "voices": result["voices"], - "language_filter": language_code + "language_filter": languageCode } else: raise HTTPException( @@ -356,11 +356,11 @@ async def get_available_voices( ) @router.get("/health") -async def health_check(current_user: User = Depends(getCurrentUser)): +async def health_check(currentUser: User = Depends(getCurrentUser)): """Health check for Google Cloud voice services.""" try: - voice_interface = get_voice_interface(current_user) - test_result = await voice_interface.healthCheck() + voiceInterface = _getVoiceInterface(currentUser) + test_result = await voiceInterface.healthCheck() return test_result @@ -372,16 +372,16 @@ async def health_check(current_user: User = Depends(getCurrentUser)): } @router.get("/settings") -async def get_voice_settings(current_user: User = Depends(getCurrentUser)): +async def get_voice_settings(currentUser: User = Depends(getCurrentUser)): """Get voice settings for the current user.""" try: - logger.info(f"Getting voice settings for user: {current_user.id}") + logger.info(f"Getting voice settings for user: {currentUser.id}") # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Get or create voice settings for the user - voice_settings = voice_interface.getOrCreateVoiceSettings(current_user.id) + voice_settings = voiceInterface.getOrCreateVoiceSettings(currentUser.id) if voice_settings: # Return user settings @@ -425,16 +425,16 @@ async def get_voice_settings(current_user: User = Depends(getCurrentUser)): @router.post("/settings") async def save_voice_settings( settings: Dict[str, Any] = Body(...), - current_user: User = Depends(getCurrentUser) + currentUser: User = Depends(getCurrentUser) ): """Save voice settings for the current user.""" try: - logger.info(f"Saving voice settings for user: {current_user.id}") + logger.info(f"Saving voice settings for user: {currentUser.id}") logger.info(f"Settings: {settings}") # Validate required settings - required_fields = ["sttLanguage", "ttsLanguage", "ttsVoice"] - for field in required_fields: + requiredFields = ["sttLanguage", "ttsLanguage", "ttsVoice"] + for field in requiredFields: if field not in settings: raise HTTPException( status_code=400, @@ -448,23 +448,23 @@ async def save_voice_settings( settings["targetLanguage"] = "en-US" # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Check if settings already exist for this user - existing_settings = voice_interface.getVoiceSettings(current_user.id) + existing_settings = voiceInterface.getVoiceSettings(currentUser.id) if existing_settings: # Update existing settings - logger.info(f"Updating existing voice settings for user {current_user.id}") - updated_settings = voice_interface.updateVoiceSettings(current_user.id, settings) - logger.info(f"Voice settings updated for user {current_user.id}: {updated_settings}") + logger.info(f"Updating existing voice settings for user {currentUser.id}") + updated_settings = voiceInterface.updateVoiceSettings(currentUser.id, settings) + logger.info(f"Voice settings updated for user {currentUser.id}: {updated_settings}") else: # Create new settings - logger.info(f"Creating new voice settings for user {current_user.id}") + logger.info(f"Creating new voice settings for user {currentUser.id}") # Add userId to settings - settings["userId"] = current_user.id - created_settings = voice_interface.createVoiceSettings(settings) - logger.info(f"Voice settings created for user {current_user.id}: {created_settings}") + settings["userId"] = currentUser.id + created_settings = voiceInterface.createVoiceSettings(settings) + logger.info(f"Voice settings created for user {currentUser.id}: {created_settings}") return { "success": True, @@ -486,25 +486,25 @@ async def save_voice_settings( @router.websocket("/ws/realtime-interpreter") async def websocket_realtime_interpreter( websocket: WebSocket, - user_id: str = "default", - from_language: str = "de-DE", - to_language: str = "en-US" + userId: str = "default", + fromLanguage: str = "de-DE", + toLanguage: str = "en-US" ): """WebSocket endpoint for real-time voice interpretation""" - connection_id = f"realtime_{user_id}_{from_language}_{to_language}" + connectionId = f"realtime_{userId}_{fromLanguage}_{toLanguage}" try: - await manager.connect(websocket, connection_id) + await manager.connect(websocket, connectionId) # Send connection confirmation - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "connected", - "connection_id": connection_id, + "connection_id": connectionId, "message": "Connected to real-time interpreter" }, websocket) # Initialize voice interface - voice_interface = get_voice_interface(User(id=user_id)) + voiceInterface = _getVoiceInterface(User(id=userId)) while True: # Receive message from client @@ -515,7 +515,7 @@ async def websocket_realtime_interpreter( # Process audio chunk try: # Decode base64 audio data - audio_data = base64.b64decode(message["data"]) + audioData = base64.b64decode(message["data"]) # For now, just acknowledge receipt # In a full implementation, this would: @@ -524,9 +524,9 @@ async def websocket_realtime_interpreter( # 3. Send partial results back # 4. Handle translation - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "audio_received", - "chunk_size": len(audio_data), + "chunk_size": len(audioData), "timestamp": message.get("timestamp") }, websocket) @@ -539,7 +539,7 @@ async def websocket_realtime_interpreter( elif message["type"] == "ping": # Respond to ping - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "pong", "timestamp": message.get("timestamp") }, websocket) @@ -548,32 +548,32 @@ async def websocket_realtime_interpreter( logger.warning(f"Unknown message type: {message['type']}") except WebSocketDisconnect: - manager.disconnect(websocket, connection_id) - logger.info(f"Client disconnected: {connection_id}") + manager.disconnect(websocket, connectionId) + logger.info(f"Client disconnected: {connectionId}") except Exception as e: logger.error(f"WebSocket error: {e}") - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) @router.websocket("/ws/speech-to-text") async def websocket_speech_to_text( websocket: WebSocket, - user_id: str = "default", + userId: str = "default", language: str = "de-DE" ): """WebSocket endpoint for real-time speech-to-text""" - connection_id = f"stt_{user_id}_{language}" + connectionId = f"stt_{userId}_{language}" try: - await manager.connect(websocket, connection_id) + await manager.connect(websocket, connectionId) - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "connected", - "connection_id": connection_id, + "connection_id": connectionId, "message": "Connected to speech-to-text" }, websocket) # Initialize voice interface - voice_interface = get_voice_interface(User(id=user_id)) + voiceInterface = _getVoiceInterface(User(id=userId)) while True: data = await websocket.receive_text() @@ -581,12 +581,12 @@ async def websocket_speech_to_text( if message["type"] == "audio_chunk": try: - audio_data = base64.b64decode(message["data"]) + audioData = base64.b64decode(message["data"]) # Process audio chunk # This would integrate with Google Cloud Speech-to-Text streaming API - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "transcription_result", "text": "Audio chunk received", # Placeholder "confidence": 0.95, @@ -595,39 +595,39 @@ async def websocket_speech_to_text( except Exception as e: logger.error(f"Error processing audio: {e}") - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "error", "error": f"Failed to process audio: {str(e)}" }, websocket) elif message["type"] == "ping": - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "pong", "timestamp": message.get("timestamp") }, websocket) except WebSocketDisconnect: - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) except Exception as e: logger.error(f"WebSocket error: {e}") - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) @router.websocket("/ws/text-to-speech") async def websocket_text_to_speech( websocket: WebSocket, - user_id: str = "default", + userId: str = "default", language: str = "de-DE", voice: str = "de-DE-Wavenet-A" ): """WebSocket endpoint for real-time text-to-speech""" - connection_id = f"tts_{user_id}_{language}_{voice}" + connectionId = f"tts_{userId}_{language}_{voice}" try: - await manager.connect(websocket, connection_id) + await manager.connect(websocket, connectionId) - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "connected", - "connection_id": connection_id, + "connection_id": connectionId, "message": "Connected to text-to-speech" }, websocket) @@ -643,7 +643,7 @@ async def websocket_text_to_speech( # This would integrate with Google Cloud Text-to-Speech API # For now, send a placeholder response - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "audio_data", "audio": "base64_encoded_audio_here", # Placeholder "format": "mp3" @@ -651,19 +651,19 @@ async def websocket_text_to_speech( except Exception as e: logger.error(f"Error processing text-to-speech: {e}") - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "error", "error": f"Failed to process text: {str(e)}" }, websocket) elif message["type"] == "ping": - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "pong", "timestamp": message.get("timestamp") }, websocket) except WebSocketDisconnect: - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) except Exception as e: logger.error(f"WebSocket error: {e}") - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) diff --git a/modules/security/jwtService.py b/modules/security/jwtService.py index 87e226c7..ab5a9392 100644 --- a/modules/security/jwtService.py +++ b/modules/security/jwtService.py @@ -9,7 +9,7 @@ from fastapi import Response from jose import jwt from modules.shared.configuration import APP_CONFIG -from modules.shared.timezoneUtils import get_utc_now +from modules.shared.timezoneUtils import getUtcNow # Config SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET") @@ -30,7 +30,7 @@ def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> T import uuid toEncode["jti"] = str(uuid.uuid4()) - expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)) + expire = getUtcNow() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)) toEncode.update({"exp": expire}) encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM) return encodedJwt, expire @@ -44,7 +44,7 @@ def createRefreshToken(data: dict) -> Tuple[str, "datetime"]: toEncode["jti"] = str(uuid.uuid4()) toEncode["type"] = "refresh" - expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS) + expire = getUtcNow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS) toEncode.update({"exp": expire}) encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM) return encodedJwt, expire diff --git a/modules/security/tokenManager.py b/modules/security/tokenManager.py index 07c5c2a9..42c4a7cf 100644 --- a/modules/security/tokenManager.py +++ b/modules/security/tokenManager.py @@ -10,7 +10,7 @@ from typing import Optional, Dict, Any, Callable from modules.datamodels.datamodelSecurity import Token from modules.datamodels.datamodelUam import AuthAuthority from modules.shared.configuration import APP_CONFIG -from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp, createExpirationTimestamp logger = logging.getLogger(__name__) @@ -27,54 +27,54 @@ class TokenManager: self.google_client_id = APP_CONFIG.get("Service_GOOGLE_CLIENT_ID") self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET") - def refresh_microsoft_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]: + def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]: """Refresh Microsoft OAuth token using refresh token""" try: - logger.debug(f"refresh_microsoft_token: Starting Microsoft token refresh for user {user_id}") - logger.debug(f"refresh_microsoft_token: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}") + logger.debug(f"refreshMicrosoftToken: Starting Microsoft token refresh for user {userId}") + logger.debug(f"refreshMicrosoftToken: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}") if not self.msft_client_id or not self.msft_client_secret: logger.error("Microsoft OAuth configuration not found") return None # Microsoft token refresh endpoint - token_url = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token" - logger.debug(f"refresh_microsoft_token: Using token URL: {token_url}") + tokenUrl = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token" + logger.debug(f"refreshMicrosoftToken: Using token URL: {tokenUrl}") # Prepare refresh request data = { "client_id": self.msft_client_id, "client_secret": self.msft_client_secret, "grant_type": "refresh_token", - "refresh_token": refresh_token, + "refresh_token": refreshToken, "scope": "Mail.ReadWrite Mail.Send Mail.ReadWrite.Shared User.Read" } - logger.debug(f"refresh_microsoft_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})") + logger.debug(f"refreshMicrosoftToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})") # Make refresh request with httpx.Client(timeout=30.0) as client: - logger.debug(f"refresh_microsoft_token: Making HTTP request to Microsoft OAuth endpoint") - response = client.post(token_url, data=data) - logger.debug(f"refresh_microsoft_token: HTTP response status: {response.status_code}") + logger.debug(f"refreshMicrosoftToken: Making HTTP request to Microsoft OAuth endpoint") + response = client.post(tokenUrl, data=data) + logger.debug(f"refreshMicrosoftToken: HTTP response status: {response.status_code}") if response.status_code == 200: - token_data = response.json() - logger.debug(f"refresh_microsoft_token: Token refresh successful, creating new token") + tokenData = response.json() + logger.debug(f"refreshMicrosoftToken: Token refresh successful, creating new token") # Create new token - new_token = Token( - userId=user_id, + newToken = Token( + userId=userId, authority=AuthAuthority.MSFT, - connectionId=old_token.connectionId, # Preserve connection ID - tokenAccess=token_data["access_token"], - tokenRefresh=token_data.get("refresh_token", refresh_token), # Keep old refresh token if new one not provided - tokenType=token_data.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)), - createdAt=get_utc_timestamp() + connectionId=oldToken.connectionId, # Preserve connection ID + tokenAccess=tokenData["access_token"], + tokenRefresh=tokenData.get("refresh_token", refreshToken), # Keep old refresh token if new one not provided + tokenType=tokenData.get("token_type", "bearer"), + expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)), + createdAt=getUtcTimestamp() ) - logger.debug(f"refresh_microsoft_token: New token created with ID: {new_token.id}") - return new_token + logger.debug(f"refreshMicrosoftToken: New token created with ID: {newToken.id}") + return newToken else: logger.error(f"Failed to refresh Microsoft token: {response.status_code} - {response.text}") return None @@ -83,70 +83,70 @@ class TokenManager: logger.error(f"Error refreshing Microsoft token: {str(e)}") return None - def refresh_google_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]: + def refreshGoogleToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]: """Refresh Google OAuth token using refresh token""" try: - logger.debug(f"refresh_google_token: Starting Google token refresh for user {user_id}") - logger.debug(f"refresh_google_token: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}") + logger.debug(f"refreshGoogleToken: Starting Google token refresh for user {userId}") + logger.debug(f"refreshGoogleToken: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}") if not self.google_client_id or not self.google_client_secret: logger.error("Google OAuth configuration not found") return None # Google token refresh endpoint - token_url = "https://oauth2.googleapis.com/token" - logger.debug(f"refresh_google_token: Using token URL: {token_url}") + tokenUrl = "https://oauth2.googleapis.com/token" + logger.debug(f"refreshGoogleToken: Using token URL: {tokenUrl}") # Prepare refresh request data = { "client_id": self.google_client_id, "client_secret": self.google_client_secret, "grant_type": "refresh_token", - "refresh_token": refresh_token + "refresh_token": refreshToken } - logger.debug(f"refresh_google_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})") + logger.debug(f"refreshGoogleToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})") # Make refresh request with httpx.Client(timeout=30.0) as client: - logger.debug(f"refresh_google_token: Making HTTP request to Google OAuth endpoint") - response = client.post(token_url, data=data) - logger.debug(f"refresh_google_token: HTTP response status: {response.status_code}") + logger.debug(f"refreshGoogleToken: Making HTTP request to Google OAuth endpoint") + response = client.post(tokenUrl, data=data) + logger.debug(f"refreshGoogleToken: HTTP response status: {response.status_code}") if response.status_code == 200: - token_data = response.json() - logger.debug(f"refresh_google_token: Token refresh successful, creating new token") + tokenData = response.json() + logger.debug(f"refreshGoogleToken: Token refresh successful, creating new token") # Validate the response contains required fields - if "access_token" not in token_data: + if "access_token" not in tokenData: logger.error("Google token refresh response missing access_token") return None # Create new token - new_token = Token( - userId=user_id, + newToken = Token( + userId=userId, authority=AuthAuthority.GOOGLE, - connectionId=old_token.connectionId, # Preserve connection ID - tokenAccess=token_data["access_token"], - tokenRefresh=token_data.get("refresh_token", refresh_token), # Use new refresh token if provided - tokenType=token_data.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)), - createdAt=get_utc_timestamp() + connectionId=oldToken.connectionId, # Preserve connection ID + tokenAccess=tokenData["access_token"], + tokenRefresh=tokenData.get("refresh_token", refreshToken), # Use new refresh token if provided + tokenType=tokenData.get("token_type", "bearer"), + expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)), + createdAt=getUtcTimestamp() ) - logger.debug(f"refresh_google_token: New token created with ID: {new_token.id}") - return new_token + logger.debug(f"refreshGoogleToken: New token created with ID: {newToken.id}") + return newToken else: - error_details = response.text - logger.error(f"Failed to refresh Google token: {response.status_code} - {error_details}") + errorDetails = response.text + logger.error(f"Failed to refresh Google token: {response.status_code} - {errorDetails}") # Handle specific error cases if response.status_code == 400: try: - error_data = response.json() - error_code = error_data.get("error") - if error_code == "invalid_grant": + errorData = response.json() + errorCode = errorData.get("error") + if errorCode == "invalid_grant": logger.warning("Google refresh token is invalid or expired - user needs to re-authenticate") - elif error_code == "invalid_client": + elif errorCode == "invalid_client": logger.error("Google OAuth client configuration is invalid") except: pass @@ -157,55 +157,55 @@ class TokenManager: logger.error(f"Error refreshing Google token: {str(e)}") return None - def refresh_token(self, old_token: Token) -> Optional[Token]: + def refreshToken(self, oldToken: Token) -> Optional[Token]: """Refresh an expired token using the appropriate OAuth service""" try: - logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}") - logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}") + logger.debug(f"refreshToken: Starting refresh for token {oldToken.id}, authority: {oldToken.authority}") + logger.debug(f"refreshToken: Token details: userId={oldToken.userId}, connectionId={oldToken.connectionId}, hasRefreshToken={bool(oldToken.tokenRefresh)}") # Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly # Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed try: - now_ts = get_utc_timestamp() - created_ts = float(old_token.createdAt) if old_token.createdAt is not None else 0.0 - seconds_since_last_refresh = now_ts - created_ts - if seconds_since_last_refresh < 10 * 60: + nowTs = getUtcTimestamp() + createdTs = float(oldToken.createdAt) if oldToken.createdAt is not None else 0.0 + secondsSinceLastRefresh = nowTs - createdTs + if secondsSinceLastRefresh < 10 * 60: logger.info( - f"refresh_token: Skipping refresh for connection {old_token.connectionId} due to cooldown. " - f"Last refresh {int(seconds_since_last_refresh)}s ago (< 600s)." + f"refreshToken: Skipping refresh for connection {oldToken.connectionId} due to cooldown. " + f"Last refresh {int(secondsSinceLastRefresh)}s ago (< 600s)." ) # Return the existing token to avoid caller errors while preventing provider rate limits - return old_token + return oldToken except Exception: # If any issue reading timestamps, proceed with normal refresh to be safe pass - if not old_token.tokenRefresh: - logger.warning(f"No refresh token available for {old_token.authority}") + if not oldToken.tokenRefresh: + logger.warning(f"No refresh token available for {oldToken.authority}") return None # Route to appropriate refresh method - if old_token.authority == AuthAuthority.MSFT: - logger.debug(f"refresh_token: Refreshing Microsoft token") - return self.refresh_microsoft_token(old_token.tokenRefresh, old_token.userId, old_token) - elif old_token.authority == AuthAuthority.GOOGLE: - logger.debug(f"refresh_token: Refreshing Google token") - return self.refresh_google_token(old_token.tokenRefresh, old_token.userId, old_token) + if oldToken.authority == AuthAuthority.MSFT: + logger.debug(f"refreshToken: Refreshing Microsoft token") + return self.refreshMicrosoftToken(oldToken.tokenRefresh, oldToken.userId, oldToken) + elif oldToken.authority == AuthAuthority.GOOGLE: + logger.debug(f"refreshToken: Refreshing Google token") + return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken) else: - logger.warning(f"Unknown authority for token refresh: {old_token.authority}") + logger.warning(f"Unknown authority for token refresh: {oldToken.authority}") return None except Exception as e: logger.error(f"Error refreshing token: {str(e)}") return None - def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]: + def ensureFreshToken(self, token: Token, *, secondsBeforeExpiry: int = 30 * 60, saveCallback: Optional[Callable[[Token], None]] = None) -> Optional[Token]: """Ensure a token is fresh; refresh if expiring within threshold. Args: token: Existing token to validate/refresh. - seconds_before_expiry: Threshold window to proactively refresh. - save_callback: Optional function to persist a refreshed token. + secondsBeforeExpiry: Threshold window to proactively refresh. + saveCallback: Optional function to persist a refreshed token. Returns: A fresh token (refreshed or original) or None if refresh failed. @@ -214,31 +214,31 @@ class TokenManager: if token is None: return None - now_ts = get_utc_timestamp() - expires_at = token.expiresAt or 0 + nowTs = getUtcTimestamp() + expiresAt = token.expiresAt or 0 # If token expires within the threshold, try to refresh - if expires_at and expires_at < (now_ts + seconds_before_expiry): + if expiresAt and expiresAt < (nowTs + secondsBeforeExpiry): logger.info( - f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon " - f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh." + f"ensureFreshToken: Token for connection {token.connectionId} expiring soon " + f"(in {max(0, expiresAt - nowTs)}s). Attempting proactive refresh." ) - refreshed = self.refresh_token(token) + refreshed = self.refreshToken(token) if refreshed: - if save_callback is not None: + if saveCallback is not None: try: - save_callback(refreshed) + saveCallback(refreshed) except Exception as e: - logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}") + logger.warning(f"ensureFreshToken: Failed to persist refreshed token: {e}") return refreshed else: - logger.warning("ensure_fresh_token: Token refresh failed") + logger.warning("ensureFreshToken: Token refresh failed") return None # Token is sufficiently fresh return token except Exception as e: - logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}") + logger.error(f"ensureFreshToken: Error ensuring fresh token: {e}") return None # Convenience wrapper to fetch and ensure fresh token for a connection via interface layer @@ -256,10 +256,10 @@ class TokenManager: token = interfaceDbApp.getConnectionToken(connectionId) if not token: return None - return self.ensure_fresh_token( + return self.ensureFreshToken( token, - seconds_before_expiry=secondsBeforeExpiry, - save_callback=lambda t: interfaceDbApp.saveConnectionToken(t) + secondsBeforeExpiry=secondsBeforeExpiry, + saveCallback=lambda t: interfaceDbApp.saveConnectionToken(t) ) except Exception as e: logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}") diff --git a/modules/security/tokenRefreshMiddleware.py b/modules/security/tokenRefreshMiddleware.py index 2ced531c..b7131a40 100644 --- a/modules/security/tokenRefreshMiddleware.py +++ b/modules/security/tokenRefreshMiddleware.py @@ -11,7 +11,7 @@ from starlette.middleware.base import BaseHTTPMiddleware from typing import Callable import asyncio from modules.security.tokenRefreshService import token_refresh_service -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp logger = logging.getLogger(__name__) @@ -135,7 +135,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware): try: # Perform proactive refresh in background asyncio.create_task(self._proactive_refresh_tokens(user_id)) - self.last_check[user_id] = get_utc_timestamp() + self.last_check[user_id] = getUtcTimestamp() except Exception as e: logger.warning(f"Error scheduling proactive refresh: {str(e)}") @@ -160,7 +160,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware): Check if we should perform proactive refresh for this user """ try: - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() last_check = self.last_check.get(user_id, 0) # Check every 5 minutes diff --git a/modules/security/tokenRefreshService.py b/modules/security/tokenRefreshService.py index ff7ac720..24a99e3b 100644 --- a/modules/security/tokenRefreshService.py +++ b/modules/security/tokenRefreshService.py @@ -9,7 +9,7 @@ to ensure users don't experience token expiration issues. import logging from typing import Dict, Any from modules.datamodels.datamodelUam import UserConnection, AuthAuthority -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp from modules.shared.auditLogger import audit_logger logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ class TokenRefreshService: def _is_rate_limited(self, connection_id: str) -> bool: """Check if connection is rate limited for refresh attempts""" - now = get_utc_timestamp() + now = getUtcTimestamp() if connection_id not in self.rate_limit_map: return False @@ -39,7 +39,7 @@ class TokenRefreshService: def _record_refresh_attempt(self, connection_id: str) -> None: """Record a refresh attempt for rate limiting""" - now = get_utc_timestamp() + now = getUtcTimestamp() if connection_id not in self.rate_limit_map: self.rate_limit_map[connection_id] = [] self.rate_limit_map[connection_id].append(now) @@ -60,14 +60,14 @@ class TokenRefreshService: token_manager = TokenManager() # Attempt to refresh the token - refreshed_token = token_manager.refresh_token(current_token) - if refreshed_token: + refreshedToken = token_manager.refreshToken(current_token) + if refreshedToken: # Save the refreshed token - interface.saveConnectionToken(refreshed_token) + interface.saveConnectionToken(refreshedToken) # Update connection status interface.db.recordModify(UserConnection, connection.id, { - "lastChecked": get_utc_timestamp(), + "lastChecked": getUtcTimestamp(), "expiresAt": refreshed_token.expiresAt }) @@ -75,9 +75,9 @@ class TokenRefreshService: # Log audit event try: - audit_logger.log_security_event( - user_id=str(connection.userId), - mandate_id="system", + audit_logger.logSecurityEvent( + userId=str(connection.userId), + mandateId="system", action="token_refresh", details=f"Google token refreshed for connection {connection.id}" ) @@ -109,14 +109,14 @@ class TokenRefreshService: token_manager = TokenManager() # Attempt to refresh the token - refreshed_token = token_manager.refresh_token(current_token) - if refreshed_token: + refreshedToken = token_manager.refreshToken(current_token) + if refreshedToken: # Save the refreshed token - interface.saveConnectionToken(refreshed_token) + interface.saveConnectionToken(refreshedToken) # Update connection status interface.db.recordModify(UserConnection, connection.id, { - "lastChecked": get_utc_timestamp(), + "lastChecked": getUtcTimestamp(), "expiresAt": refreshed_token.expiresAt }) @@ -124,9 +124,9 @@ class TokenRefreshService: # Log audit event try: - audit_logger.log_security_event( - user_id=str(connection.userId), - mandate_id="system", + audit_logger.logSecurityEvent( + userId=str(connection.userId), + mandateId="system", action="token_refresh", details=f"Microsoft token refreshed for connection {connection.id}" ) @@ -234,7 +234,7 @@ class TokenRefreshService: refreshed_count = 0 failed_count = 0 rate_limited_count = 0 - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() five_minutes = 5 * 60 # 5 minutes in seconds # Process each connection diff --git a/modules/services/__init__.py b/modules/services/__init__.py index 0f269e28..87b13207 100644 --- a/modules/services/__init__.py +++ b/modules/services/__init__.py @@ -11,18 +11,18 @@ class PublicService: - Optional name_filter predicate for allow-list patterns """ - def __init__(self, target: Any, functions_only: bool = True, name_filter=None): + def __init__(self, target: Any, functionsOnly: bool = True, nameFilter=None): self._target = target - self._functions_only = functions_only - self._name_filter = name_filter + self._functionsOnly = functionsOnly + self._nameFilter = nameFilter def __getattr__(self, name: str): if name.startswith('_'): raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private") - if self._name_filter and not self._name_filter(name): + if self._nameFilter and not self._nameFilter(name): raise AttributeError(f"'{name}' not exposed by policy") attr = getattr(self._target, name) - if self._functions_only and not callable(attr): + if self._functionsOnly and not callable(attr): raise AttributeError(f"'{name}' is not a function") return attr @@ -30,8 +30,8 @@ class PublicService: names = [ n for n in dir(self._target) if not n.startswith('_') - and (not self._functions_only or callable(getattr(self._target, n, None))) - and (self._name_filter(n) if self._name_filter else True) + and (not self._functionsOnly or callable(getattr(self._target, n, None))) + and (self._nameFilter(n) if self._nameFilter else True) ] return sorted(names) @@ -70,7 +70,7 @@ class Services: self.sharepoint = PublicService(SharepointService(self)) from .serviceAi.mainServiceAi import AiService - self.ai = PublicService(AiService(self)) + self.ai = PublicService(AiService(self), functionsOnly=False) from .serviceTicket.mainServiceTicket import TicketService self.ticket = PublicService(TicketService(self)) diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index b1326967..87b56ceb 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -1,30 +1,26 @@ +import json import logging -from typing import Dict, Any, List, Optional, Union +import time +from typing import Dict, Any, List, Optional, Tuple, Union from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService -from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum +from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum from modules.interfaces.interfaceAiObjects import AiObjects -from modules.services.serviceAi.subCoreAi import SubCoreAi from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing -from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration -from modules.services.serviceAi.subSharedAiUtils import sanitizePromptContent - +from modules.shared.jsonUtils import ( + extractJsonString, + repairBrokenJson, + extractSectionsFromDocument, + buildContinuationContext +) logger = logging.getLogger(__name__) +# Rebuild the model to resolve forward references +AiCallRequest.model_rebuild() + class AiService: - """Lightweight AI service orchestrator that delegates to specialized sub-modules. - - Manager delegates to specialized sub-modules: - - SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls) - - SubDocumentProcessing: Document chunking, processing, and merging logic - - SubDocumentGeneration: Single-file and multi-file document generation - - The main service acts as a coordinator: - 1. Manages lazy initialization of sub-modules - 2. Delegates operations to appropriate sub-modules - 3. Maintains the same public API for backward compatibility - """ + """AI service with core operations integrated.""" def __init__(self, serviceCenter=None) -> None: """Initialize AI service with service center access. @@ -34,64 +30,638 @@ class AiService: """ self.services = serviceCenter # Only depend on interfaces - self.aiObjects = None # Will be initialized in create() - self._extractionService = None # Lazy initialization - self._coreAi = None # Lazy initialization - self._documentProcessor = None # Lazy initialization - self._documentGenerator = None # Lazy initialization + self.aiObjects = None # Will be initialized in create() or _ensureAiObjectsInitialized() + # Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready + self.extractionService = None + self.documentProcessor = None - @property - def extractionService(self): - """Lazy initialization of extraction service.""" - if self._extractionService is None: - logger.info("Lazy initializing ExtractionService...") - self._extractionService = ExtractionService(self.services) - return self._extractionService - - @property - def coreAi(self): - """Lazy initialization of core AI service.""" - if self._coreAi is None: - if self.aiObjects is None: - raise RuntimeError("AiService.aiObjects must be initialized before accessing coreAi. Use await AiService.create() or await service._ensureAiObjectsInitialized()") - logger.info("Lazy initializing SubCoreAi...") - self._coreAi = SubCoreAi(self.services, self.aiObjects) - return self._coreAi - - @property - def documentProcessor(self): - """Lazy initialization of document processing service.""" - if self._documentProcessor is None: - logger.info("Lazy initializing SubDocumentProcessing...") - self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects) - return self._documentProcessor - - - @property - def documentGenerator(self): - """Lazy initialization of document generation service.""" - if self._documentGenerator is None: - logger.info("Lazy initializing SubDocumentGeneration...") - self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor) - return self._documentGenerator + def _initializeSubmodules(self): + """Initialize all submodules after aiObjects is ready.""" + if self.aiObjects is None: + raise RuntimeError("aiObjects must be initialized before initializing submodules") + + if self.extractionService is None: + logger.info("Initializing ExtractionService...") + self.extractionService = ExtractionService(self.services) + + if self.documentProcessor is None: + logger.info("Initializing SubDocumentProcessing...") + self.documentProcessor = SubDocumentProcessing(self.services, self.aiObjects) async def _ensureAiObjectsInitialized(self): - """Ensure aiObjects is initialized.""" + """Ensure aiObjects is initialized and submodules are ready.""" if self.aiObjects is None: logger.info("Lazy initializing AiObjects...") self.aiObjects = await AiObjects.create() logger.info("AiObjects initialization completed") + # Initialize submodules after aiObjects is ready + self._initializeSubmodules() @classmethod async def create(cls, serviceCenter=None) -> "AiService": - """Create AiService instance with all connectors initialized.""" + """Create AiService instance with all connectors and submodules initialized.""" logger.info("AiService.create() called") instance = cls(serviceCenter) logger.info("AiService created, about to call AiObjects.create()...") instance.aiObjects = await AiObjects.create() logger.info("AiObjects.create() completed") + # Initialize all submodules after aiObjects is ready + instance._initializeSubmodules() + logger.info("AiService submodules initialized") return instance + # Helper methods + + def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str: + """ + Build full prompt by replacing placeholders with their content. + Uses the new {{KEY:placeholder}} format. + + Args: + prompt: The base prompt template + placeholders: Dictionary of placeholder key-value pairs + + Returns: + Prompt with placeholders replaced + """ + if not placeholders: + return prompt + + full_prompt = prompt + for placeholder, content in placeholders.items(): + # Skip if content is None or empty + if content is None: + continue + # Replace {{KEY:placeholder}} + full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content)) + + return full_prompt + + async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions: + """Analyze prompt to determine appropriate AiCallOptions parameters.""" + try: + # Get dynamic enum values from Pydantic models + operationTypes = [e.value for e in OperationTypeEnum] + priorities = [e.value for e in PriorityEnum] + processingModes = [e.value for e in ProcessingModeEnum] + + # Create analysis prompt for AI to determine operation type and parameters + analysisPrompt = f""" +You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters. + +PROMPT TO ANALYZE: +{self.services.utils.sanitizePromptContent(prompt, 'userinput')} + +Based on the prompt content, determine: +1. operationType: Choose the most appropriate from: {', '.join(operationTypes)} +2. priority: Choose from: {', '.join(priorities)} +3. processingMode: Choose from: {', '.join(processingModes)} +4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas) +5. compressContext: true/false (true to summarize context, false to process fully) + +Respond with ONLY a JSON object in this exact format: +{{ + "operationType": "dataAnalyse", + "priority": "balanced", + "processingMode": "basic", + "compressPrompt": true, + "compressContext": true +}} +""" + + # Use AI to analyze the prompt + request = AiCallRequest( + prompt=analysisPrompt, + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.SPEED, + processingMode=ProcessingModeEnum.BASIC, + compressPrompt=True, + compressContext=False + ) + ) + + response = await self.aiObjects.call(request) + + # Parse AI response + try: + jsonStart = response.content.find('{') + jsonEnd = response.content.rfind('}') + 1 + if jsonStart != -1 and jsonEnd > jsonStart: + analysis = json.loads(response.content[jsonStart:jsonEnd]) + + # Map string values to enums + operationType = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse')) + priority = PriorityEnum(analysis.get('priority', 'balanced')) + processingMode = ProcessingModeEnum(analysis.get('processingMode', 'basic')) + + return AiCallOptions( + operationType=operationType, + priority=priority, + processingMode=processingMode, + compressPrompt=analysis.get('compressPrompt', True), + compressContext=analysis.get('compressContext', True) + ) + except Exception as e: + logger.warning(f"Failed to parse AI analysis response: {e}") + + except Exception as e: + logger.warning(f"Prompt analysis failed: {e}") + + # Fallback to default options + return AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.BASIC + ) + + async def _callAiWithLooping( + self, + prompt: str, + options: AiCallOptions, + debugPrefix: str = "ai_call", + promptBuilder: Optional[callable] = None, + promptArgs: Optional[Dict[str, Any]] = None, + operationId: Optional[str] = None + ) -> str: + """ + Shared core function for AI calls with repair-based looping system. + Automatically repairs broken JSON and continues generation seamlessly. + + Args: + prompt: The prompt to send to AI + options: AI call configuration options + debugPrefix: Prefix for debug file names + promptBuilder: Optional function to rebuild prompts for continuation + promptArgs: Optional arguments for prompt builder + operationId: Optional operation ID for progress tracking + + Returns: + Complete AI response after all iterations + """ + maxIterations = 50 # Prevent infinite loops + iteration = 0 + allSections = [] # Accumulate all sections across iterations + lastRawResponse = None # Store last raw JSON response for continuation + + while iteration < maxIterations: + iteration += 1 + + # Update progress for iteration start + if operationId: + if iteration == 1: + self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}") + else: + # For continuation iterations, show progress incrementally + baseProgress = 0.5 + (min(iteration - 1, maxIterations) / maxIterations * 0.4) # Progress from 0.5 to 0.9 over maxIterations iterations + self.services.workflow.progressLogUpdate(operationId, baseProgress, f"Continuing generation (iteration {iteration})") + + # Build iteration prompt + if len(allSections) > 0 and promptBuilder and promptArgs: + # This is a continuation - build continuation context with raw JSON and rebuild prompt + continuationContext = buildContinuationContext(allSections, lastRawResponse) + if not lastRawResponse: + logger.warning(f"Iteration {iteration}: No previous response available for continuation!") + + # Rebuild prompt with continuation context using the provided prompt builder + iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext) + else: + # First iteration - use original prompt + iterationPrompt = prompt + + # Make AI call + try: + if operationId and iteration == 1: + self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model") + request = AiCallRequest( + prompt=iterationPrompt, + context="", + options=options + ) + + # Write the ACTUAL prompt sent to AI + if iteration == 1: + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt") + else: + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") + + response = await self.aiObjects.call(request) + result = response.content + + # Update progress after AI call + if operationId: + if iteration == 1: + self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})") + else: + progress = 0.6 + (min(iteration - 1, 10) * 0.03) + self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})") + + # Write raw AI response to debug file + if iteration == 1: + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") + else: + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") + + # Emit stats for this iteration + self.services.workflow.storeWorkflowStat( + self.services.currentWorkflow, + response, + f"ai.call.{debugPrefix}.iteration_{iteration}" + ) + + if not result or not result.strip(): + logger.warning(f"Iteration {iteration}: Empty response, stopping") + break + + # Store raw response for continuation (even if broken) + lastRawResponse = result + + # Check for complete_response flag in raw response (before parsing) + import re + if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE): + pass # Flag detected, will stop in _shouldContinueGeneration + + # Extract sections from response (handles both valid and broken JSON) + extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix) + + # Update progress after parsing + if operationId: + if extractedSections: + self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})") + + if not extractedSections: + # If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry + if iteration > 1 and not wasJsonComplete: + logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt") + continue + # Otherwise, stop if no sections + logger.warning(f"Iteration {iteration}: No sections extracted, stopping") + break + + # Add new sections to accumulator + allSections.extend(extractedSections) + + # Check if we should continue (completion detection) + if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result): + continue + else: + # Done - build final result + if operationId: + self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)") + break + + except Exception as e: + logger.error(f"Error in AI call iteration {iteration}: {str(e)}") + break + + if iteration >= maxIterations: + logger.warning(f"AI call stopped after maximum iterations ({maxIterations})") + + # Build final result from accumulated sections + final_result = self._buildFinalResultFromSections(allSections) + + # Write final result to debug file + self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result") + + return final_result + + def _extractSectionsFromResponse( + self, + result: str, + iteration: int, + debugPrefix: str + ) -> Tuple[List[Dict[str, Any]], bool]: + """ + Extract sections from AI response, handling both valid and broken JSON. + Uses repair mechanism for broken JSON. + Checks for "complete_response": true flag to determine completion. + Returns (sections, wasJsonComplete) + """ + # First, try to parse as valid JSON + try: + extracted = extractJsonString(result) + parsed_result = json.loads(extracted) + + # Check if AI marked response as complete + isComplete = parsed_result.get("complete_response", False) == True + + # Extract sections from parsed JSON + sections = extractSectionsFromDocument(parsed_result) + + # If AI marked as complete, always return as complete + if isComplete: + return sections, True + + # If in continuation mode (iteration > 1), continuation responses are expected to be fragments + # A fragment with 0 extractable sections means JSON is incomplete - need another iteration + if len(sections) == 0 and iteration > 1: + return sections, False # Mark as incomplete so loop continues + + # First iteration with 0 sections means empty response - stop + if len(sections) == 0: + return sections, True # Complete but empty + + return sections, True # JSON was complete with sections + + except json.JSONDecodeError as e: + # Broken JSON - try repair mechanism (normal in iterative generation) + self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}") + + # Try to repair + repaired_json = repairBrokenJson(result) + + if repaired_json: + # Extract sections from repaired JSON + sections = extractSectionsFromDocument(repaired_json) + return sections, False # JSON was broken but repaired + else: + # Repair failed - log error + logger.error(f"Iteration {iteration}: All repair strategies failed") + return [], False + + except Exception as e: + logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}") + return [], False + + def _shouldContinueGeneration( + self, + allSections: List[Dict[str, Any]], + iteration: int, + wasJsonComplete: bool, + rawResponse: str = None + ) -> bool: + """ + Determine if generation should continue based on JSON completeness and complete_response flag. + Returns True if we should continue, False if done. + """ + if len(allSections) == 0: + return True # No sections yet, continue + + # Check for complete_response flag in raw response + if rawResponse: + import re + if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE): + return False + + # If JSON was complete (and no complete_response flag), we're done + # If JSON was broken and repaired, continue to get more content + if wasJsonComplete: + return False + else: + return True + + def _buildFinalResultFromSections( + self, + allSections: List[Dict[str, Any]] + ) -> str: + """ + Build final JSON result from accumulated sections. + """ + if not allSections: + return "" + + # Build documents structure + # Assuming single document for now + documents = [{ + "id": "doc_1", + "title": "Generated Document", # This should come from prompt + "filename": "document.json", + "sections": allSections + }] + + result = { + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }, + "documents": documents + } + + return json.dumps(result, indent=2) + + # Public API Methods + + # Planning AI Call + async def callAiPlanning( + self, + prompt: str, + placeholders: Optional[List[PromptPlaceholder]] = None + ) -> str: + """ + Planning AI call for task planning, action planning, action selection, etc. + Always uses static parameters optimized for planning tasks. + + Args: + prompt: The planning prompt + placeholders: Optional list of placeholder replacements + + Returns: + Planning JSON response + """ + await self._ensureAiObjectsInitialized() + + # Planning calls always use static parameters + options = AiCallOptions( + operationType=OperationTypeEnum.PLAN, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + compressPrompt=False, + compressContext=False + ) + + # Build full prompt with placeholders + if placeholders: + placeholdersDict = {p.label: p.content for p in placeholders} + fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict) + else: + fullPrompt = prompt + + # Root-cause fix: planning must return raw single-shot JSON, not section-based output + request = AiCallRequest( + prompt=fullPrompt, + context="", + options=options + ) + + # Debug: persist prompt/response for analysis + self.services.utils.writeDebugFile(fullPrompt, "plan_prompt") + response = await self.aiObjects.call(request) + result = response.content or "" + self.services.utils.writeDebugFile(result, "plan_response") + return result + + # Document Generation AI Call + async def callAiDocuments( + self, + prompt: str, + documents: Optional[List[ChatDocument]] = None, + options: Optional[AiCallOptions] = None, + outputFormat: Optional[str] = None, + title: Optional[str] = None + ) -> Union[str, Dict[str, Any]]: + """ + Document generation AI call for all non-planning calls. + Uses the current unified path with extraction and generation. + + Args: + prompt: The main prompt for the AI call + documents: Optional list of documents to process + options: AI call configuration options + outputFormat: Optional output format for document generation + title: Optional title for generated documents + + Returns: + AI response as string, or dict with documents if outputFormat is specified + """ + await self._ensureAiObjectsInitialized() + + # Create separate operationId for detailed progress tracking + workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}" + aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}" + + # Start progress tracking for this operation + self.services.workflow.progressLogStart( + aiOperationId, + "AI call with documents", + "Document Generation", + f"Format: {outputFormat or 'text'}" + ) + + try: + # Ensure AI connectors are initialized before delegating to documentProcessor/generator + if hasattr(self.services, 'ai') and hasattr(self.services.ai, '_ensureAiObjectsInitialized'): + await self.services.ai._ensureAiObjectsInitialized() + if options is None or (hasattr(options, 'operationType') and options.operationType is None): + # Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None) + self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters") + options = await self._analyzePromptAndCreateOptions(prompt) + + # Route image-generation requests directly to image pipeline to avoid JSON loop + imgFormats = {"png", "jpg", "jpeg", "webp", "image", "base64"} + opType = getattr(options, "operationType", None) + fmt = (outputFormat or "").lower() if outputFormat else None + isImageRequest = (opType == OperationTypeEnum.IMAGE_GENERATE) or (fmt in imgFormats) + if isImageRequest: + self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") + imageResponse = await self.generateImage(prompt, options=options) + self.services.workflow.progressLogUpdate(aiOperationId, 0.9, "Image generated") + self.services.workflow.progressLogFinish(aiOperationId, True) + return imageResponse + + # CRITICAL: For document generation with JSON templates, NEVER compress the prompt + # Compressing would truncate the template structure and confuse the AI + if outputFormat: # Document generation with structured output + if not options: + options = AiCallOptions() + options.compressPrompt = False # JSON templates must NOT be truncated + options.compressContext = False # Context also should not be compressed + + # Handle document generation with specific output format using unified approach + if outputFormat: + # Use unified generation method for all document generation + if documents and len(documents) > 0: + self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents") + extracted_content = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId) + else: + self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation") + extracted_content = None + + self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") + from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt + # First call without continuation context + generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None) + + # Prepare prompt builder arguments for continuation + promptArgs = { + "outputFormat": outputFormat, + "userPrompt": prompt, + "title": title, + "extracted_content": extracted_content + } + + self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") + generated_json = await self._callAiWithLooping( + generation_prompt, + options, + "document_generation", + buildGenerationPrompt, + promptArgs, + aiOperationId + ) + + self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON") + # Parse the generated JSON (extract fenced/embedded JSON first) + try: + extracted_json = self.services.utils.jsonExtractString(generated_json) + generated_data = json.loads(extracted_json) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse generated JSON: {str(e)}") + logger.error(f"JSON content length: {len(generated_json)}") + logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}") + logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}") + + # Write the problematic JSON to debug file + self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") + + self.services.workflow.progressLogFinish(aiOperationId, False) + return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"} + + self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format") + # Render to final format using the existing renderer + try: + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + generationService = GenerationService(self.services) + rendered_content, mime_type = await generationService.renderReport( + generated_data, outputFormat, title or "Generated Document", prompt, self + ) + + # Build result in the expected format + result = { + "success": True, + "content": generated_data, + "documents": [{ + "documentName": f"generated.{outputFormat}", + "documentData": rendered_content, + "mimeType": mime_type, + "title": title or "Generated Document" + }], + "is_multi_file": False, + "format": outputFormat, + "title": title, + "split_strategy": "single", + "total_documents": 1, + "processed_documents": 1 + } + + # Log AI response for debugging + self.services.utils.writeDebugFile(str(result), "document_generation_response", documents) + + self.services.workflow.progressLogFinish(aiOperationId, True) + return result + + except Exception as e: + logger.error(f"Error rendering document: {str(e)}") + self.services.workflow.progressLogFinish(aiOperationId, False) + return {"success": False, "error": f"Rendering failed: {str(e)}"} + + # Handle text calls (no output format specified) + self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call") + if documents: + # Use document processing for text calls with documents + result = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId) + else: + # Use shared core function for direct text calls + result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId) + + self.services.workflow.progressLogFinish(aiOperationId, True) + return result + + except Exception as e: + logger.error(f"Error in callAiDocuments: {str(e)}") + self.services.workflow.progressLogFinish(aiOperationId, False) + raise + # AI Image Analysis async def readImage( self, @@ -102,7 +672,64 @@ class AiService: ) -> str: """Call AI for image analysis using interface.call() with contentParts.""" await self._ensureAiObjectsInitialized() - return await self.coreAi.readImage(prompt, imageData, mimeType, options) + + try: + # Check if imageData is valid + if not imageData: + error_msg = "No image data provided" + logger.error(f"Error in AI image analysis: {error_msg}") + return f"Error: {error_msg}" + + + # Always use IMAGE_ANALYSE operation type for image processing + if options is None: + options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE) + else: + # Override the operation type to ensure image analysis + options.operationType = OperationTypeEnum.IMAGE_ANALYSE + + # Create content parts with image data + from modules.datamodels.datamodelExtraction import ContentPart + import base64 + + # ContentPart.data must be a string - convert bytes to base64 if needed + if isinstance(imageData, bytes): + imageDataStr = base64.b64encode(imageData).decode('utf-8') + else: + # Already a base64 string + imageDataStr = imageData + + imagePart = ContentPart( + id="image_0", + parentId=None, + label="Image", + typeGroup="image", + mimeType=mimeType or "image/jpeg", + data=imageDataStr, # Must be a string (base64 encoded) + metadata={"imageAnalysis": True} + ) + + # Create request with content parts + request = AiCallRequest( + prompt=prompt, + context="", + options=options, + contentParts=[imagePart] + ) + + response = await self.aiObjects.call(request) + result = response.content + + # Check if result is valid + if not result or (isinstance(result, str) and not result.strip()): + error_msg = f"No response from AI image analysis (result: {repr(result)})" + logger.error(f"Error in AI image analysis: {error_msg}") + return f"Error: {error_msg}" + + return result + except Exception as e: + logger.error(f"Error in AI image analysis: {str(e)}") + return f"Error: {str(e)}" # AI Image Generation async def generateImage( @@ -115,34 +742,19 @@ class AiService: ) -> Dict[str, Any]: """Generate an image using AI using interface.generateImage().""" await self._ensureAiObjectsInitialized() - return await self.coreAi.generateImage(prompt, size, quality, style, options) - - - # Core AI Methods - Delegating to SubCoreAi - async def callAiPlanning( - self, - prompt: str, - placeholders: Optional[List[PromptPlaceholder]] = None - ) -> str: - """Planning AI call for task planning, action planning, action selection, etc.""" - await self._ensureAiObjectsInitialized() - # Always use "json" for planning calls since they return JSON - return await self.coreAi.callAiPlanning(prompt, placeholders) - - async def callAiDocuments( - self, - prompt: str, - documents: Optional[List[ChatDocument]] = None, - options: Optional[AiCallOptions] = None, - outputFormat: Optional[str] = None, - title: Optional[str] = None - ) -> Union[str, Dict[str, Any]]: - """Document generation AI call for all non-planning calls.""" - await self._ensureAiObjectsInitialized() - return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title) - - def sanitizePromptContent(self, content: str, contentType: str = "text") -> str: - """Sanitize prompt content to prevent injection attacks and ensure safe presentation.""" - return sanitizePromptContent(content, contentType) - + + try: + response = await self.aiObjects.generateImage(prompt, size, quality, style, options) + + # Emit stats for image generation + self.services.workflow.storeWorkflowStat( + self.services.currentWorkflow, + response, + f"ai.generate.image" + ) + + return response + except Exception as e: + logger.error(f"Error in AI image generation: {str(e)}") + return {"success": False, "error": str(e)} diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py deleted file mode 100644 index e35af0d0..00000000 --- a/modules/services/serviceAi/subCoreAi.py +++ /dev/null @@ -1,687 +0,0 @@ -import json -import logging -from typing import Dict, Any, List, Optional, Tuple, Union -from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument -from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum -from modules.services.serviceAi.subSharedAiUtils import ( - buildPromptWithPlaceholders, - extractTextFromContentParts, - reduceText, - determineCallType -) -from modules.shared.jsonUtils import ( - extractJsonString, - repairBrokenJson, - extractSectionsFromDocument, - buildContinuationContext -) - -logger = logging.getLogger(__name__) - -# Repair-based looping system - no longer needs LOOP_INSTRUCTION_TEXT -# Sections are accumulated and repair mechanism handles broken JSON automatically - -# Rebuild the model to resolve forward references -AiCallRequest.model_rebuild() - - -class SubCoreAi: - """Core AI operations including image analysis, text generation, and planning calls.""" - - def __init__(self, services, aiObjects): - """Initialize core AI operations. - - Args: - services: Service center instance for accessing other services - aiObjects: Initialized AiObjects instance - """ - self.services = services - self.aiObjects = aiObjects - - async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions: - """Analyze prompt to determine appropriate AiCallOptions parameters.""" - try: - # Get dynamic enum values from Pydantic models - operation_types = [e.value for e in OperationTypeEnum] - priorities = [e.value for e in PriorityEnum] - processing_modes = [e.value for e in ProcessingModeEnum] - - # Create analysis prompt for AI to determine operation type and parameters - analysisPrompt = f""" -You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters. - -PROMPT TO ANALYZE: -{self.services.ai.sanitizePromptContent(prompt, 'userinput')} - -Based on the prompt content, determine: -1. operationType: Choose the most appropriate from: {', '.join(operation_types)} -2. priority: Choose from: {', '.join(priorities)} -3. processingMode: Choose from: {', '.join(processing_modes)} -4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas) -5. compressContext: true/false (true to summarize context, false to process fully) - -Respond with ONLY a JSON object in this exact format: -{{ - "operationType": "dataAnalyse", - "priority": "balanced", - "processingMode": "basic", - "compressPrompt": true, - "compressContext": true -}} -""" - - # Use AI to analyze the prompt - request = AiCallRequest( - prompt=analysisPrompt, - options=AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, - priority=PriorityEnum.SPEED, - processingMode=ProcessingModeEnum.BASIC, - compressPrompt=True, - compressContext=False - ) - ) - - response = await self.aiObjects.call(request) - - # Parse AI response - try: - import json - json_start = response.content.find('{') - json_end = response.content.rfind('}') + 1 - if json_start != -1 and json_end > json_start: - analysis = json.loads(response.content[json_start:json_end]) - - # Map string values to enums - operation_type = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse')) - priority = PriorityEnum(analysis.get('priority', 'balanced')) - processing_mode = ProcessingModeEnum(analysis.get('processingMode', 'basic')) - - return AiCallOptions( - operationType=operation_type, - priority=priority, - processingMode=processing_mode, - compressPrompt=analysis.get('compressPrompt', True), - compressContext=analysis.get('compressContext', True) - ) - except Exception as e: - logger.warning(f"Failed to parse AI analysis response: {e}") - - except Exception as e: - logger.warning(f"Prompt analysis failed: {e}") - - # Fallback to default options - return AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, - priority=PriorityEnum.BALANCED, - processingMode=ProcessingModeEnum.BASIC - ) - - - - # Shared Core Function for AI Calls with Looping and Repair - async def _callAiWithLooping( - self, - prompt: str, - options: AiCallOptions, - debugPrefix: str = "ai_call", - promptBuilder: Optional[callable] = None, - promptArgs: Optional[Dict[str, Any]] = None, - operationId: Optional[str] = None - ) -> str: - """ - Shared core function for AI calls with repair-based looping system. - Automatically repairs broken JSON and continues generation seamlessly. - - Args: - prompt: The prompt to send to AI - options: AI call configuration options - debugPrefix: Prefix for debug file names - promptBuilder: Optional function to rebuild prompts for continuation - promptArgs: Optional arguments for prompt builder - operationId: Optional operation ID for progress tracking - - Returns: - Complete AI response after all iterations - """ - max_iterations = 50 # Prevent infinite loops - iteration = 0 - allSections = [] # Accumulate all sections across iterations - lastRawResponse = None # Store last raw JSON response for continuation - - while iteration < max_iterations: - iteration += 1 - - # Update progress for iteration start - if operationId: - if iteration == 1: - self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}") - else: - # For continuation iterations, show progress incrementally - base_progress = 0.5 + (min(iteration - 1, max_iterations) / max_iterations * 0.4) # Progress from 0.5 to 0.9 over max_iterations iterations - self.services.workflow.progressLogUpdate(operationId, base_progress, f"Continuing generation (iteration {iteration})") - - # Build iteration prompt - if len(allSections) > 0 and promptBuilder and promptArgs: - # This is a continuation - build continuation context with raw JSON and rebuild prompt - continuationContext = buildContinuationContext(allSections, lastRawResponse) - if not lastRawResponse: - logger.warning(f"Iteration {iteration}: No previous response available for continuation!") - - # Rebuild prompt with continuation context using the provided prompt builder - iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext) - else: - # First iteration - use original prompt - iterationPrompt = prompt - - # Make AI call - try: - if operationId and iteration == 1: - self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model") - from modules.datamodels.datamodelAi import AiCallRequest - request = AiCallRequest( - prompt=iterationPrompt, - context="", - options=options - ) - - # Write the ACTUAL prompt sent to AI - if iteration == 1: - self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt") - else: - self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") - - response = await self.aiObjects.call(request) - result = response.content - - # Update progress after AI call - if operationId: - if iteration == 1: - self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})") - else: - progress = 0.6 + (min(iteration - 1, 10) * 0.03) - self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})") - - # Write raw AI response to debug file - if iteration == 1: - self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") - else: - self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") - - # Emit stats for this iteration - self.services.workflow.storeWorkflowStat( - self.services.currentWorkflow, - response, - f"ai.call.{debugPrefix}.iteration_{iteration}" - ) - - if not result or not result.strip(): - logger.warning(f"Iteration {iteration}: Empty response, stopping") - break - - # Store raw response for continuation (even if broken) - lastRawResponse = result - - # Check for complete_response flag in raw response (before parsing) - import re - if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE): - pass # Flag detected, will stop in _shouldContinueGeneration - - # Extract sections from response (handles both valid and broken JSON) - extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix) - - # Update progress after parsing - if operationId: - if extractedSections: - self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})") - - if not extractedSections: - # If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry - if iteration > 1 and not wasJsonComplete: - logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt") - continue - # Otherwise, stop if no sections - logger.warning(f"Iteration {iteration}: No sections extracted, stopping") - break - - # Add new sections to accumulator - allSections.extend(extractedSections) - - # Check if we should continue (completion detection) - if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result): - continue - else: - # Done - build final result - if operationId: - self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)") - break - - except Exception as e: - logger.error(f"Error in AI call iteration {iteration}: {str(e)}") - break - - if iteration >= max_iterations: - logger.warning(f"AI call stopped after maximum iterations ({max_iterations})") - - # Build final result from accumulated sections - final_result = self._buildFinalResultFromSections(allSections) - - # Write final result to debug file - self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result") - - return final_result - - def _extractSectionsFromResponse( - self, - result: str, - iteration: int, - debugPrefix: str - ) -> Tuple[List[Dict[str, Any]], bool]: - """ - Extract sections from AI response, handling both valid and broken JSON. - Uses repair mechanism for broken JSON. - Checks for "complete_response": true flag to determine completion. - Returns (sections, wasJsonComplete) - """ - # First, try to parse as valid JSON - try: - extracted = extractJsonString(result) - parsed_result = json.loads(extracted) - - # Check if AI marked response as complete - isComplete = parsed_result.get("complete_response", False) == True - - # Extract sections from parsed JSON - sections = extractSectionsFromDocument(parsed_result) - - # If AI marked as complete, always return as complete - if isComplete: - return sections, True - - # If in continuation mode (iteration > 1), continuation responses are expected to be fragments - # A fragment with 0 extractable sections means JSON is incomplete - need another iteration - if len(sections) == 0 and iteration > 1: - return sections, False # Mark as incomplete so loop continues - - # First iteration with 0 sections means empty response - stop - if len(sections) == 0: - return sections, True # Complete but empty - - return sections, True # JSON was complete with sections - - except json.JSONDecodeError as e: - # Broken JSON - try repair mechanism (normal in iterative generation) - self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}") - - # Try to repair - repaired_json = repairBrokenJson(result) - - if repaired_json: - # Extract sections from repaired JSON - sections = extractSectionsFromDocument(repaired_json) - return sections, False # JSON was broken but repaired - else: - # Repair failed - log error - logger.error(f"Iteration {iteration}: All repair strategies failed") - return [], False - - except Exception as e: - logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}") - return [], False - - def _shouldContinueGeneration( - self, - allSections: List[Dict[str, Any]], - iteration: int, - wasJsonComplete: bool, - rawResponse: str = None - ) -> bool: - """ - Determine if generation should continue based on JSON completeness and complete_response flag. - Returns True if we should continue, False if done. - """ - if len(allSections) == 0: - return True # No sections yet, continue - - # Check for complete_response flag in raw response - if rawResponse: - import re - if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE): - return False - - # If JSON was complete (and no complete_response flag), we're done - # If JSON was broken and repaired, continue to get more content - if wasJsonComplete: - return False - else: - return True - - def _buildFinalResultFromSections( - self, - allSections: List[Dict[str, Any]] - ) -> str: - """ - Build final JSON result from accumulated sections. - """ - if not allSections: - return "" - - # Build documents structure - # Assuming single document for now - documents = [{ - "id": "doc_1", - "title": "Generated Document", # This should come from prompt - "filename": "document.json", - "sections": allSections - }] - - result = { - "metadata": { - "split_strategy": "single_document", - "source_documents": [], - "extraction_method": "ai_generation" - }, - "documents": documents - } - - return json.dumps(result, indent=2) - - # Old _buildContinuationPrompt and _mergeJsonContent methods removed - # Now handled by repair mechanism in jsonUtils.py and section accumulation - - - # Planning AI Call - async def callAiPlanning( - self, - prompt: str, - placeholders: Optional[List[PromptPlaceholder]] = None - ) -> str: - """ - Planning AI call for task planning, action planning, action selection, etc. - Always uses static parameters optimized for planning tasks. - - Args: - prompt: The planning prompt - placeholders: Optional list of placeholder replacements - - Returns: - Planning JSON response - """ - # Planning calls always use static parameters - options = AiCallOptions( - operationType=OperationTypeEnum.PLAN, - priority=PriorityEnum.QUALITY, - processingMode=ProcessingModeEnum.DETAILED, - compressPrompt=False, - compressContext=False - ) - - # Build full prompt with placeholders - if placeholders: - placeholders_dict = {p.label: p.content for p in placeholders} - full_prompt = buildPromptWithPlaceholders(prompt, placeholders_dict) - else: - full_prompt = prompt - - # Use shared core function with planning-specific debug prefix - return await self._callAiWithLooping(full_prompt, options, "plan") - - # Document Generation AI Call - async def callAiDocuments( - self, - prompt: str, - documents: Optional[List[ChatDocument]] = None, - options: Optional[AiCallOptions] = None, - outputFormat: Optional[str] = None, - title: Optional[str] = None - ) -> Union[str, Dict[str, Any]]: - """ - Document generation AI call for all non-planning calls. - Uses the current unified path with extraction and generation. - - Args: - prompt: The main prompt for the AI call - documents: Optional list of documents to process - options: AI call configuration options - outputFormat: Optional output format for document generation - title: Optional title for generated documents - - Returns: - AI response as string, or dict with documents if outputFormat is specified - """ - # Create separate operationId for detailed progress tracking - import time - import uuid - workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}" - aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}" - - # Start progress tracking for this operation - self.services.workflow.progressLogStart( - aiOperationId, - "AI call with documents", - "Document Generation", - f"Format: {outputFormat or 'text'}" - ) - - try: - if options is None or (hasattr(options, 'operationType') and options.operationType is None): - # Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None) - self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters") - options = await self._analyzePromptAndCreateOptions(prompt) - - # CRITICAL: For document generation with JSON templates, NEVER compress the prompt - # Compressing would truncate the template structure and confuse the AI - if outputFormat: # Document generation with structured output - if not options: - options = AiCallOptions() - options.compressPrompt = False # JSON templates must NOT be truncated - options.compressContext = False # Context also should not be compressed - - # Handle document generation with specific output format using unified approach - if outputFormat: - # Use unified generation method for all document generation - if documents and len(documents) > 0: - self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents") - extracted_content = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId) - else: - self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation") - extracted_content = None - - self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt - # First call without continuation context - generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None) - - # Prepare prompt builder arguments for continuation - promptArgs = { - "outputFormat": outputFormat, - "userPrompt": prompt, - "title": title, - "extracted_content": extracted_content - } - - self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") - generated_json = await self._callAiWithLooping( - generation_prompt, - options, - "document_generation", - buildGenerationPrompt, - promptArgs, - aiOperationId - ) - - self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON") - # Parse the generated JSON (extract fenced/embedded JSON first) - try: - extracted_json = self.services.utils.jsonExtractString(generated_json) - generated_data = json.loads(extracted_json) - except json.JSONDecodeError as e: - logger.error(f"Failed to parse generated JSON: {str(e)}") - logger.error(f"JSON content length: {len(generated_json)}") - logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}") - logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}") - - # Write the problematic JSON to debug file - self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") - - self.services.workflow.progressLogFinish(aiOperationId, False) - return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"} - - self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format") - # Render to final format using the existing renderer - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - rendered_content, mime_type = await generationService.renderReport( - generated_data, outputFormat, title or "Generated Document", prompt, self - ) - - # Build result in the expected format - result = { - "success": True, - "content": generated_data, - "documents": [{ - "documentName": f"generated.{outputFormat}", - "documentData": rendered_content, - "mimeType": mime_type, - "title": title or "Generated Document" - }], - "is_multi_file": False, - "format": outputFormat, - "title": title, - "split_strategy": "single", - "total_documents": 1, - "processed_documents": 1 - } - - # Log AI response for debugging - self.services.utils.writeDebugFile(str(result), "document_generation_response", documents) - - self.services.workflow.progressLogFinish(aiOperationId, True) - return result - - except Exception as e: - logger.error(f"Error rendering document: {str(e)}") - self.services.workflow.progressLogFinish(aiOperationId, False) - return {"success": False, "error": f"Rendering failed: {str(e)}"} - - # Handle text calls (no output format specified) - self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call") - if documents: - # Use document processing for text calls with documents - result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId) - else: - # Use shared core function for direct text calls - result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId) - - self.services.workflow.progressLogFinish(aiOperationId, True) - return result - - except Exception as e: - logger.error(f"Error in callAiDocuments: {str(e)}") - self.services.workflow.progressLogFinish(aiOperationId, False) - raise - - - # AI Image Analysis - async def readImage( - self, - prompt: str, - imageData: Union[str, bytes], - mimeType: str = None, - options: Optional[AiCallOptions] = None, - ) -> str: - """Call AI for image analysis using interface.call() with contentParts.""" - try: - # Check if imageData is valid - if not imageData: - error_msg = "No image data provided" - logger.error(f"Error in AI image analysis: {error_msg}") - return f"Error: {error_msg}" - - - # Always use IMAGE_ANALYSE operation type for image processing - if options is None: - options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE) - else: - # Override the operation type to ensure image analysis - options.operationType = OperationTypeEnum.IMAGE_ANALYSE - - # Create content parts with image data - from modules.datamodels.datamodelExtraction import ContentPart - import base64 - - # ContentPart.data must be a string - convert bytes to base64 if needed - if isinstance(imageData, bytes): - imageDataStr = base64.b64encode(imageData).decode('utf-8') - else: - # Already a base64 string - imageDataStr = imageData - - imagePart = ContentPart( - id="image_0", - parentId=None, - label="Image", - typeGroup="image", - mimeType=mimeType or "image/jpeg", - data=imageDataStr, # Must be a string (base64 encoded) - metadata={"imageAnalysis": True} - ) - - # Create request with content parts - from modules.datamodels.datamodelAi import AiCallRequest - request = AiCallRequest( - prompt=prompt, - context="", - options=options, - contentParts=[imagePart] - ) - - response = await self.aiObjects.call(request) - result = response.content - - # Check if result is valid - if not result or (isinstance(result, str) and not result.strip()): - error_msg = f"No response from AI image analysis (result: {repr(result)})" - logger.error(f"Error in AI image analysis: {error_msg}") - return f"Error: {error_msg}" - - return result - except Exception as e: - logger.error(f"Error in AI image analysis: {str(e)}") - return f"Error: {str(e)}" - - # AI Image Generation - async def generateImage( - self, - prompt: str, - size: str = "1024x1024", - quality: str = "standard", - style: str = "vivid", - options: Optional[AiCallOptions] = None, - ) -> Dict[str, Any]: - """Generate an image using AI using interface.generateImage().""" - try: - response = await self.aiObjects.generateImage(prompt, size, quality, style, options) - - # Emit stats for image generation - self.services.workflow.storeWorkflowStat( - self.services.currentWorkflow, - response, - f"ai.generate.image" - ) - - # Convert response to dict format for backward compatibility - if hasattr(response, 'content'): - return { - "success": True, - "content": response.content, - "modelName": response.modelName, - "priceUsd": response.priceUsd, - "processingTime": response.processingTime - } - else: - return response - except Exception as e: - logger.error(f"Error in AI image generation: {str(e)}") - return {"success": False, "error": str(e)} diff --git a/modules/services/serviceAi/subDocumentGeneration.py b/modules/services/serviceAi/subDocumentGeneration.py deleted file mode 100644 index 351f68cc..00000000 --- a/modules/services/serviceAi/subDocumentGeneration.py +++ /dev/null @@ -1,500 +0,0 @@ -import re -import json -import logging -import time -from datetime import datetime, UTC -from typing import Dict, Any, List, Optional -from modules.datamodels.datamodelChat import ChatDocument -from modules.datamodels.datamodelAi import AiCallOptions - -logger = logging.getLogger(__name__) - - -class SubDocumentGeneration: - """Document generation operations including single-file and multi-file generation.""" - - def __init__(self, services, aiObjects, documentProcessor): - """Initialize document generation service. - - Args: - services: Service center instance for accessing other services - aiObjects: Initialized AiObjects instance - documentProcessor: Document processing service instance - """ - self.services = services - self.aiObjects = aiObjects - self.documentProcessor = documentProcessor - - async def callAiWithDocumentGeneration( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions, - outputFormat: str, - title: Optional[str] - ) -> Dict[str, Any]: - """ - Unified document generation method that handles both single and multi-file cases. - Always uses multi-file approach internally. - - Args: - prompt: The main prompt for the AI call - documents: Optional list of documents to process - options: AI call configuration options - outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx) - title: Optional title for generated documents - - Returns: - Dict with generated documents and metadata in unified structure - """ - try: - # 1. Get unified extraction prompt - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - - extractionPrompt = await generationService.getAdaptiveExtractionPrompt( - outputFormat=outputFormat, - userPrompt=prompt, - title=title, - aiService=self - ) - - # 2. Process with unified pipeline (always multi-file approach) - aiResponse = await self._processDocumentsUnified( - documents, extractionPrompt, options - ) - - # 3. Return unified result structure - return await self._buildUnifiedResult(aiResponse, outputFormat, title) - - except Exception as e: - logger.error(f"Error in unified document generation: {str(e)}") - return self._buildErrorResult(str(e), outputFormat, title) - - async def _processDocumentsUnified( - self, - documents: Optional[List[ChatDocument]], - extractionPrompt: str, - options: AiCallOptions - ) -> Dict[str, Any]: - """ - Unified document processing that handles both single and multi-file cases. - Always processes as multi-file structure internally. - """ - - # Init progress logger - workflow = self.services.currentWorkflow - operationId = f"docGenUnified_{workflow.id}_{int(time.time())}" - - try: - # Start progress tracking - self.services.workflow.progressLogStart( - operationId, - "Generate", - "Unified Document Generation", - f"Processing {len(documents) if documents else 0} documents" - ) - - # Update progress - generating extraction prompt - self.services.workflow.progressLogUpdate(operationId, 0.1, "Generating prompt") - - # Write prompt to debug file - self.services.utils.writeDebugFile(extractionPrompt, "extraction_prompt", documents) - - # Process with unified JSON pipeline using continuation logic - aiResponse = await self.documentProcessor.processDocumentsWithContinuation( - documents, extractionPrompt, options - ) - - # Update progress - AI processing completed - self.services.workflow.progressLogUpdate(operationId, 0.6, "Processing done") - - - - # Write AI response to debug file - response_json = json.dumps(aiResponse, indent=2, ensure_ascii=False) if isinstance(aiResponse, dict) else str(aiResponse) - self.services.utils.writeDebugFile(response_json, "ai_response", documents) - - # Validate response structure - if not self._validateUnifiedResponseStructure(aiResponse): - raise Exception("AI response is not valid unified document structure") - - # Emit raw extracted data as a chat message attachment - try: - await self._postRawDataChatMessage(aiResponse, label="raw_extraction_unified") - except Exception: - logger.warning("Failed to emit raw extraction chat message (unified)") - - # Complete progress tracking - self.services.workflow.progressLogFinish(operationId, True) - - return aiResponse - - except Exception as e: - logger.error(f"Error in unified document processing: {str(e)}") - self.services.workflow.progressLogFinish(operationId, False) - raise - - def _validateUnifiedResponseStructure(self, response: Dict[str, Any]) -> bool: - """ - Unified validation that checks for document structure. - Handles both multi-file (documents array) and single-file (sections array) structures. - """ - try: - if not isinstance(response, dict): - logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}") - return False - - # Check for documents array (multi-file structure) - hasDocuments = "documents" in response - isDocumentsList = isinstance(response.get("documents"), list) - - # Check for sections array (single-file structure) - hasSections = "sections" in response - isSectionsList = isinstance(response.get("sections"), list) - - if hasDocuments and isDocumentsList: - # Multi-file structure - documents = response.get("documents", []) - if not documents: - logger.warning("Unified validation failed: documents array is empty") - return False - - # Validate each document individually - validDocuments = 0 - for i, doc in enumerate(documents): - if self._validateDocumentStructure(doc, i): - validDocuments += 1 - else: - logger.warning(f"Document {i} failed validation, but continuing with others") - - # Process succeeds if at least one document is valid - if validDocuments == 0: - logger.error("Unified validation failed: no valid documents found") - return False - - logger.info(f"Unified validation passed: {validDocuments}/{len(documents)} documents valid") - return True - - elif hasSections and isSectionsList: - # Single-file structure - convert to multi-file format - logger.info("Converting single-file structure to multi-file format") - sections = response.get("sections", []) - if not sections: - logger.warning("Unified validation failed: sections array is empty") - return False - - # Convert to documents array format - response["documents"] = [{ - "id": "document_1", - "title": response.get("metadata", {}).get("title", "Generated Document"), - "filename": "document_1", - "sections": sections - }] - - logger.info("Successfully converted single-file structure to multi-file format") - return True - - else: - # No valid structure found - fail with clear error details - logger.error("Unified validation failed: No valid structure found") - logger.error(f"Response type: {type(response)}") - logger.error(f"Available keys: {list(response.keys()) if isinstance(response, dict) else 'Not a dict'}") - logger.error(f"hasDocuments={hasDocuments}, isDocumentsList={isDocumentsList}") - logger.error(f"hasSections={hasSections}, isSectionsList={isSectionsList}") - logger.error(f"Full response: {response}") - return False - - except Exception as e: - logger.warning(f"Unified response validation failed with exception: {str(e)}") - return False - - def _validateDocumentStructure(self, document: Dict[str, Any], documentIndex: int) -> bool: - """ - Validate individual document structure. - Returns True if document is valid, False otherwise. - Does not fail the entire process if one document is invalid. - """ - try: - if not isinstance(document, dict): - logger.error(f"Document {documentIndex} validation failed: not a dict, got {type(document)}") - logger.error(f"Document {documentIndex} content: {document}") - return False - - # Check for required fields - hasTitle = "title" in document - hasSections = "sections" in document - isSectionsList = isinstance(document.get("sections"), list) - - logger.debug(f"Document {documentIndex} structure check:") - logger.debug(f" - hasTitle: {hasTitle}") - logger.debug(f" - hasSections: {hasSections}") - logger.debug(f" - isSectionsList: {isSectionsList}") - logger.debug(f" - available keys: {list(document.keys())}") - - if not (hasTitle and hasSections and isSectionsList): - logger.error(f"Document {documentIndex} validation failed:") - logger.error(f" - title present: {hasTitle}") - logger.error(f" - sections present: {hasSections}") - logger.error(f" - sections is list: {isSectionsList}") - logger.error(f" - document content: {document}") - return False - - sections = document.get("sections", []) - if not sections: - logger.error(f"Document {documentIndex} validation failed: sections array is empty") - logger.error(f" - document content: {document}") - return False - - logger.info(f"Document {documentIndex} validation passed") - return True - - except Exception as e: - logger.error(f"Document {documentIndex} validation failed with exception: {str(e)}") - logger.error(f" - document content: {document}") - return False - - async def _buildUnifiedResult( - self, - aiResponse: Dict[str, Any], - outputFormat: str, - title: str - ) -> Dict[str, Any]: - """ - Build unified result structure that always returns array-based format. - Content is always a multi-document structure. - """ - try: - # Process all documents uniformly - generatedDocuments = [] - documents = aiResponse.get("documents", []) - - for i, docData in enumerate(documents): - try: - processedDocument = await self._processDocument( - docData, outputFormat, title, i - ) - generatedDocuments.append(processedDocument) - except Exception as e: - logger.warning(f"Failed to process document {i}: {str(e)}, skipping") - continue - - if not generatedDocuments: - raise Exception("No documents could be processed successfully") - - # Build unified result - result = { - "success": True, - "content": aiResponse, # Always multi-document structure - "documents": generatedDocuments, # Always array - "is_multi_file": len(generatedDocuments) > 1, - "format": outputFormat, - "title": title, - "total_documents": len(generatedDocuments), - "processed_documents": len(generatedDocuments) - } - - return result - - except Exception as e: - logger.error(f"Error building unified result: {str(e)}") - return self._buildErrorResult(str(e), outputFormat, title) - - async def _processDocument( - self, - docData: Dict[str, Any], - outputFormat: str, - title: str, - documentIndex: int - ) -> Dict[str, Any]: - """ - Process individual document with content enhancement and rendering. - """ - try: - # Get generation service - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - - # Use AI generation to enhance the extracted JSON before rendering - enhancedContent = docData # Default to original - if docData.get("sections"): - try: - # Get generation prompt directly - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt - generationPrompt = await buildGenerationPrompt( - outputFormat=outputFormat, - userPrompt=title, - title=docData.get("title", title) - ) - - # Prepare the AI call - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum - requestOptions = AiCallOptions() - requestOptions.operationType = OperationTypeEnum.DATA_GENERATE - - # Create context with the extracted JSON content - context = f"Extracted JSON content:\n{json.dumps(docData, indent=2)}" - - request = AiCallRequest( - prompt=generationPrompt, - context=context, - options=requestOptions - ) - - # Write document generation prompt to debug file - self.services.utils.writeDebugFile(generationPrompt, "document_generation_enhancement_prompt") - - # Call AI to enhance the content - response = await self.aiObjects.call(request) - - # Write document generation response to debug file - self.services.utils.writeDebugFile(response.content or '', "document_generation_enhancement_response") - - if response and response.content: - # Parse the AI response as JSON - try: - result = response.content.strip() - - # Extract JSON from markdown if present - jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) - if jsonMatch: - result = jsonMatch.group(1).strip() - elif result.startswith('```json'): - result = re.sub(r'^```json\s*', '', result) - result = re.sub(r'\s*```$', '', result) - elif result.startswith('```'): - result = re.sub(r'^```\s*', '', result) - result = re.sub(r'\s*```$', '', result) - - # Try to parse JSON - enhancedContent = json.loads(result) - logger.info(f"AI enhanced JSON content successfully for document {documentIndex}") - - except json.JSONDecodeError as e: - logger.warning(f"AI generation returned invalid JSON for document {documentIndex}: {str(e)}, using original content") - enhancedContent = docData - else: - logger.warning(f"AI generation returned empty response for document {documentIndex}, using original content") - enhancedContent = docData - - except Exception as e: - logger.warning(f"AI generation failed for document {documentIndex}: {str(e)}, using original content") - enhancedContent = docData - - # Render the enhanced JSON content - renderedContent, mimeType = await generationService.renderReport( - extractedContent=enhancedContent, - outputFormat=outputFormat, - title=docData.get("title", title), - userPrompt=title, - aiService=self - ) - - # Generate proper filename - baseFilename = docData.get("filename", f"document_{documentIndex + 1}") - if '.' in baseFilename: - baseFilename = baseFilename.rsplit('.', 1)[0] - - # Add proper extension based on output format - if outputFormat.lower() == "docx": - filename = f"{baseFilename}.docx" - elif outputFormat.lower() == "pdf": - filename = f"{baseFilename}.pdf" - elif outputFormat.lower() == "html": - filename = f"{baseFilename}.html" - else: - filename = f"{baseFilename}.{outputFormat}" - - return { - "documentName": filename, - "documentData": renderedContent, - "mimeType": mimeType, - "title": docData.get("title", title), - "documentIndex": documentIndex - } - - except Exception as e: - logger.error(f"Error processing document {documentIndex}: {str(e)}") - raise - - def _buildErrorResult(self, errorMessage: str, outputFormat: str, title: str) -> Dict[str, Any]: - """ - Build error result with unified structure. - """ - return { - "success": False, - "error": errorMessage, - "content": {}, - "documents": [], - "is_multi_file": False, - "format": outputFormat, - "title": title, - "split_strategy": "error", - "total_documents": 0, - "processed_documents": 0 - } - - async def _callAiJson( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions - ) -> Dict[str, Any]: - """ - Handle AI calls with document processing for JSON output. - Returns structured JSON document instead of text. - """ - # Process documents with JSON merging - return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options) - - - async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None: - """ - Create a ChatMessage with the extracted raw JSON attached as a file so the user - has access to the data even if downstream processing fails. - """ - try: - services = self.services - workflow = services.currentWorkflow - - # Serialize payload - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - content_text = json.dumps(payload, ensure_ascii=False, indent=2) - content_bytes = content_text.encode('utf-8') - - # Store as file via component storage - file_name = f"{label}_{ts}.json" - file_item = services.interfaceDbComponent.createFile( - name=file_name, - mimeType="application/json", - content=content_bytes - ) - services.interfaceDbComponent.createFileData(file_item.id, content_bytes) - - # Lookup file info for ChatDocument - file_info = services.workflow.getFileInfo(file_item.id) - doc = ChatDocument( - messageId="", # set after message creation - fileId=file_item.id, - fileName=file_info.get("fileName", file_name) if file_info else file_name, - fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes), - mimeType=file_info.get("mimeType", "application/json") if file_info else "application/json" - ) - - # Create message referencing the file - include document in initial call - messageData = { - "workflowId": workflow.id, - "role": "assistant", - "message": "Raw extraction data saved", - "status": "data", - "sequenceNr": len(getattr(workflow, 'messages', []) or []) + 1, - "publishedAt": services.utils.timestampGetUtc(), - "documentsLabel": label, - "documents": [] - } - - # Store message with document included from the start - services.workflow.storeMessageWithDocuments(services.workflow.workflow, messageData, [doc]) - except Exception: - # Non-fatal; ignore if storage or chat creation fails - return \ No newline at end of file diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py index a9d01a8a..2903c066 100644 --- a/modules/services/serviceAi/subDocumentProcessing.py +++ b/modules/services/serviceAi/subDocumentProcessing.py @@ -5,7 +5,16 @@ import time from typing import Dict, Any, List, Optional from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum -from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted, PartResult, ExtractionOptions, MergeStrategy +from modules.datamodels.datamodelExtraction import ContentExtracted, PartResult, ExtractionOptions, MergeStrategy +# Resolve forward refs for ExtractionOptions (OperationTypeEnum) at runtime without using unsupported args +try: + # Import here to avoid circular import at module load time + from modules.datamodels.datamodelAi import OperationTypeEnum + # Provide parent namespace so Pydantic can resolve forward refs + ExtractionOptions.__pydantic_parent_namespace__ = {"OperationTypeEnum": OperationTypeEnum} + ExtractionOptions.model_rebuild() +except Exception as _e: + logging.getLogger(__name__).warning(f"ExtractionOptions forward-ref rebuild skipped: {_e}") from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService logger = logging.getLogger(__name__) @@ -121,373 +130,6 @@ class SubDocumentProcessing: self.services.workflow.progressLogFinish(operationId, False) raise - async def processDocumentsPerChunkJson( - self, - documents: List[ChatDocument], - prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with model-aware chunking and merge results in JSON mode. - Returns structured JSON document instead of text. - """ - if not documents: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - # Build extraction options using Pydantic model - mergeStrategy = MergeStrategy( - useIntelligentMerging=True, - prompt=prompt, - groupBy="typeGroup", - orderBy="id", - mergeType="concatenate" - ) - - extractionOptions = ExtractionOptions( - prompt=prompt, - operationType=options.operationType if options else OperationTypeEnum.DATA_EXTRACT, - processDocumentsIndividually=True, - mergeStrategy=mergeStrategy - ) - - logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.prompt)} chars, operationType={extractionOptions.operationType}") - - try: - # Extract content WITHOUT chunking - extractionResult = self.extractionService.extractContent(documents, extractionOptions) - - if not isinstance(extractionResult, list): - return {"metadata": {"title": "Error Document"}, "sections": []} - - # Process parts with model-aware chunking - partResults = await self._processPartsWithMapping(extractionResult, prompt, options) - - # Convert to JSON format (simplified for now) - mergedJsonDocument = self._convertPartResultsToJson(partResults, options) - - # Normalize merged JSON into a single canonical table (only if table content exists) - try: - from modules.services.serviceNormalization.mainServiceNormalization import NormalizationService - normalizer = NormalizationService(self.services) - inventory = normalizer.discoverStructures(mergedJsonDocument) - - # Check if any table content was discovered - tableHeaders = inventory.get("tableHeaders", []) - if not tableHeaders: - logger.info("No table content found in merged JSON, skipping normalization and returning original structure") - else: - # Use workflow id as cache key - cacheKey = self.services.currentWorkflow.id - # Provide the extraction/merge prompt context when available to help mapping - mergePrompt = prompt - mapping = await normalizer.requestHeaderMapping(inventory, cacheKey, None, mergePrompt) - canonical = normalizer.applyMapping(mergedJsonDocument, mapping) - report = normalizer.validateCanonical(canonical) - if report.get('success'): - mergedJsonDocument = canonical - else: - raise ValueError('Normalization produced zero rows') - except Exception as e: - # Log normalization failure but don't re-raise - continue with original merged JSON - logger.warning(f"Normalization failed (expected): {str(e)}") - logger.debug(f"Normalization error type: {type(e).__name__}") - # Continue with original merged JSON instead of re-raising - - # Save merged JSON extraction content to debug - jsonStr = json.dumps(mergedJsonDocument, ensure_ascii=False, indent=2) - self.services.utils.writeDebugFile(jsonStr, "extraction_merged_json") - - return mergedJsonDocument - - except Exception as e: - logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}") - logger.error(f"Exception type: {type(e).__name__}") - logger.error(f"Exception args: {e.args}") - import traceback - logger.error(f"Traceback: {traceback.format_exc()}") - return {"metadata": {"title": "Error Document"}, "sections": []} - - async def processDocumentsPerChunkJsonWithPrompt( - self, - documents: List[ChatDocument], - custom_prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with per-chunk AI calls and merge results in JSON mode. - Uses a custom prompt instead of the default extraction prompt. - Enhanced with partial results continuation logic. - """ - if not documents: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - # Build extraction options using Pydantic model (model-aware chunking in AI call phase) - mergeStrategy = MergeStrategy( - useIntelligentMerging=True, - prompt=custom_prompt, - groupBy="typeGroup", - orderBy="id", - mergeType="concatenate" - ) - - extractionOptions = ExtractionOptions( - prompt=custom_prompt, # Use the custom prompt instead of default - operationType=options.operationType if options else OperationTypeEnum.DATA_EXTRACT, - processDocumentsIndividually=True, # Process each document separately - imageMaxPixels=1024 * 1024, - imageQuality=85, - mergeStrategy=mergeStrategy - ) - - logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.prompt)} chars, operationType={extractionOptions.operationType}") - - try: - # Extract content with chunking - extractionResult = self.extractionService.extractContent(documents, extractionOptions) - - if not isinstance(extractionResult, list): - return {"metadata": {"title": "Error Document"}, "sections": []} - - # Process chunks with proper mapping - logger.info(f"Processing {len(extractionResult)} chunks with custom prompt") - logger.debug(f"Custom prompt preview: {custom_prompt[:200]}...") - - # Debug: Show what content is being processed (before filtering) - for i, ec in enumerate(extractionResult): - if hasattr(ec, 'parts'): - for j, part in enumerate(ec.parts): - if not (hasattr(part, 'data') and part.data): - # Check if this is an empty container chunk (which is expected) - part_type = getattr(part, 'typeGroup', None) - part_mime = getattr(part, 'mimeType', '') - - is_empty_container = ( - part_type == "container" and - part_mime and - 'document' in part_mime.lower() - ) - - if not is_empty_container: - logger.warning(f"Part {j} has no data - typeGroup='{part_type}', mimeType='{part_mime}'") - - chunkResults = await self._processChunksWithMapping(extractionResult, custom_prompt, options, generate_json=True) - - # Debug: Show what chunks were actually processed (after filtering) - logger.info(f"After filtering: {len(chunkResults)} chunks will be processed") - - # Merge with JSON mode - mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options) - - # Debug: Show what the AI actually returned - logger.info(f"AI returned document with keys: {list(mergedJsonDocument.keys())}") - if 'documents' in mergedJsonDocument: - logger.info(f"Number of documents: {len(mergedJsonDocument['documents'])}") - elif 'sections' in mergedJsonDocument: - logger.info(f"Number of sections: {len(mergedJsonDocument['sections'])}") - - return mergedJsonDocument - - except Exception as e: - logger.error(f"Error in per-chunk JSON processing: {str(e)}") - return {"metadata": {"title": "Error Document"}, "sections": []} - - async def processDocumentsWithContinuation( - self, - documents: List[ChatDocument], - custom_prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with partial results continuation logic. - Handles AI responses that indicate partial completion and loops until complete. - """ - if not documents: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - logger.info("Starting document processing with continuation logic") - - # Build enhanced prompt with continuation instructions - enhanced_prompt = self._buildContinuationPrompt(custom_prompt) - - # Process with continuation logic - return await self._processWithContinuationLoop(documents, enhanced_prompt, options) - - def _buildContinuationPrompt(self, base_prompt: str) -> str: - """ - Build a prompt that includes partial results continuation instructions. - - NOTE: This uses a different continuation pattern than SubCoreAi: - - This uses "continue": true/false + "continuation_context" for document sections - - Kept separate because it's tightly coupled to document processing needs - """ - continuation_instructions = """ - -IMPORTANT CHUNKING LOGIC: -- If the response is too large to generate completely in one response, set "continue": true -- When "continue": true, include a "continuation_context" field with: - - "last_section_id": "id of the last completed section" - - "last_element_index": "index of the last completed element in that section" - - "remaining_requirements": "brief description of what still needs to be generated" -- The AI will be called again with this context to continue generation -- Only set "continue": false when the response is completely generated - -OUTPUT FORMAT: Return only valid JSON in this exact structure: -{ - "metadata": { - "title": "Document Title" - }, - "sections": [ - { - "id": "section_1", - "content_type": "paragraph", - "elements": [ - { - "text": "This is the actual content that should be generated." - } - ], - "order": 1 - } - ], - "continue": false, - "continuation_context": { - "last_section_id": "section_1", - "last_element_index": 0, - "remaining_requirements": "description of what still needs to be generated" - } -} - -The AI should generate content using the canonical format with "sections" and "elements". -""" - - return f"{base_prompt}{continuation_instructions}" - - async def _processWithContinuationLoop( - self, - documents: List[ChatDocument], - enhanced_prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with continuation loop until complete. - """ - max_iterations = 10 # Prevent infinite loops - iteration = 0 - accumulated_sections = [] - continuation_context = None - - while iteration < max_iterations: - iteration += 1 - logger.info(f"Continuation iteration {iteration}/{max_iterations}") - - # Build prompt for this iteration - if continuation_context: - iteration_prompt = self._buildContinuationIterationPrompt( - enhanced_prompt, continuation_context, accumulated_sections - ) - else: - iteration_prompt = enhanced_prompt - - # Process documents for this iteration - try: - # Use the existing processing method - result = await self.processDocumentsPerChunkJsonWithPrompt( - documents, iteration_prompt, options - ) - - # Check if this is a valid JSON response - if not isinstance(result, dict): - logger.warning(f"Iteration {iteration}: Invalid result type, stopping") - break - - # Extract sections from result - sections = result.get("sections", []) - if not sections: - logger.warning(f"Iteration {iteration}: No sections found, stopping") - break - - # Add sections to accumulated results - for section in sections: - # Update section order to maintain sequence - section["order"] = len(accumulated_sections) + 1 - accumulated_sections.append(section) - - # Check if continuation is needed - continue_flag = result.get("continue", False) - continuation_context = result.get("continuation_context") - - logger.info(f"Iteration {iteration}: Added {len(sections)} sections, continue={continue_flag}") - - if not continue_flag: - logger.info(f"Continuation complete after {iteration} iterations") - break - - if not continuation_context: - logger.warning(f"Iteration {iteration}: continue=true but no continuation_context, stopping") - break - - except Exception as e: - logger.error(f"Iteration {iteration} failed: {str(e)}") - break - - if iteration >= max_iterations: - logger.warning(f"Continuation stopped after maximum iterations ({max_iterations})") - - # Build final result - final_result = { - "metadata": { - "title": "Generated Document", - "total_sections": len(accumulated_sections), - "iterations": iteration, - "continuation_used": iteration > 1 - }, - "sections": accumulated_sections, - "continue": False - } - - logger.info(f"Final result: {len(accumulated_sections)} sections from {iteration} iterations") - return final_result - - def _buildContinuationIterationPrompt( - self, - base_prompt: str, - continuation_context: Dict[str, Any], - accumulated_sections: List[Dict[str, Any]] - ) -> str: - """ - Build a prompt for continuation iteration with context. - """ - last_section_id = continuation_context.get("last_section_id", "") - last_element_index = continuation_context.get("last_element_index", 0) - remaining_requirements = continuation_context.get("remaining_requirements", "") - - # Build context of what's already been generated - context_summary = "PREVIOUSLY GENERATED CONTENT:\n" - for i, section in enumerate(accumulated_sections[-3:]): # Show last 3 sections for context - context_summary += f"Section {i+1}: {section.get('id', 'unknown')}\n" - if 'elements' in section and section['elements']: - first_element = section['elements'][0] - if 'text' in first_element: - preview = first_element['text'][:100] + "..." if len(first_element['text']) > 100 else first_element['text'] - context_summary += f" Preview: {preview}\n" - - continuation_prompt = f""" -{base_prompt} - -{context_summary} - -CONTINUATION INSTRUCTIONS: -- Continue from where you left off -- Last completed section: {last_section_id} -- Last completed element index: {last_element_index} -- Remaining requirements: {remaining_requirements} -- Generate the next part of the content -- Maintain consistency with previously generated content -- Use the same JSON format as before -- Set "continue": true if more content is needed, false if complete -""" - - return continuation_prompt - async def callAiText( self, prompt: str, @@ -513,8 +155,8 @@ CONTINUATION INSTRUCTIONS: import asyncio # Collect all parts that need processing - parts_to_process = [] - part_index = 0 + partsToProcess = [] + partIndex = 0 for ec in extractionResult: for part in ec.parts: @@ -524,24 +166,24 @@ CONTINUATION INSTRUCTIONS: logger.debug(f"Skipping empty container part: mimeType={part.mimeType}") continue - parts_to_process.append({ + partsToProcess.append({ 'part': part, - 'part_index': part_index, + 'part_index': partIndex, 'document_id': ec.id }) - part_index += 1 + partIndex += 1 - logger.info(f"Processing {len(parts_to_process)} parts with model-aware chunking") + logger.info(f"Processing {len(partsToProcess)} parts with model-aware chunking") - total_parts = len(parts_to_process) + totalParts = len(partsToProcess) # Process parts in parallel - processed_count = [0] # Use list to allow modification in nested function + processedCount = [0] # Use list to allow modification in nested function - async def process_single_part(part_info: Dict) -> PartResult: - part = part_info['part'] - part_index = part_info['part_index'] - document_id = part_info['document_id'] + async def processSinglePart(partInfo: Dict) -> PartResult: + part = partInfo['part'] + part_index = partInfo['part_index'] + documentId = partInfo['document_id'] start_time = time.time() @@ -556,10 +198,10 @@ CONTINUATION INSTRUCTIONS: ) # Update progress before AI call - if operationId and total_parts > 0: - processed_count[0] += 1 - progress = 0.3 + (processed_count[0] / total_parts * 0.6) # Progress from 0.3 to 0.9 - self.services.workflow.progressLogUpdate(operationId, progress, f"Processing part {processed_count[0]}/{total_parts}") + if operationId and totalParts > 0: + processedCount[0] += 1 + progress = 0.3 + (processedCount[0] / totalParts * 0.6) # Progress from 0.3 to 0.9 + self.services.workflow.progressLogUpdate(operationId, progress, f"Processing part {processedCount[0]}/{totalParts}") # Call AI with model-aware chunking response = await self.aiObjects.call(request) @@ -570,7 +212,7 @@ CONTINUATION INSTRUCTIONS: originalPart=part, aiResult=response.content, partIndex=part_index, - documentId=document_id, + documentId=documentId, processingTime=processing_time, metadata={ "success": True, @@ -590,7 +232,7 @@ CONTINUATION INSTRUCTIONS: originalPart=part, aiResult=f"[Error processing part: {str(e)}]", partIndex=part_index, - documentId=document_id, + documentId=documentId, processingTime=processing_time, metadata={ "success": False, @@ -601,25 +243,25 @@ CONTINUATION INSTRUCTIONS: ) # Process parts with concurrency control - max_concurrent = 5 + maxConcurrent = 5 if options and hasattr(options, 'maxConcurrentParts'): - max_concurrent = options.maxConcurrentParts + maxConcurrent = options.maxConcurrentParts - semaphore = asyncio.Semaphore(max_concurrent) + semaphore = asyncio.Semaphore(maxConcurrent) - async def process_with_semaphore(part_info): + async def processWithSemaphore(partInfo): async with semaphore: - return await process_single_part(part_info) + return await processSinglePart(partInfo) - tasks = [process_with_semaphore(part_info) for part_info in parts_to_process] - part_results = await asyncio.gather(*tasks, return_exceptions=True) + tasks = [processWithSemaphore(part_info) for part_info in partsToProcess] + partResults = await asyncio.gather(*tasks, return_exceptions=True) # Handle exceptions - processed_results = [] - for i, result in enumerate(part_results): + processedResults = [] + for i, result in enumerate(partResults): if isinstance(result, Exception): - part_info = parts_to_process[i] - processed_results.append(PartResult( + part_info = partsToProcess[i] + processedResults.append(PartResult( originalPart=part_info['part'], aiResult=f"[Error in parallel processing: {str(result)}]", partIndex=part_info['part_index'], @@ -628,412 +270,10 @@ CONTINUATION INSTRUCTIONS: metadata={"success": False, "error": str(result)} )) elif result is not None: - processed_results.append(result) + processedResults.append(result) - logger.info(f"Completed processing {len(processed_results)} parts") - return processed_results - - async def _processChunksWithMapping( - self, - extractionResult: List[ContentExtracted], - prompt: str, - options: Optional[AiCallOptions] = None, - generate_json: bool = False - ) -> List[ChunkResult]: - """Process chunks with proper mapping to preserve relationships.""" - from modules.datamodels.datamodelExtraction import ChunkResult - import asyncio - - # Collect all chunks that need processing with proper indexing - chunks_to_process = [] - chunk_index = 0 - - for ec in extractionResult: - # Get document MIME type from metadata - document_mime_type = None - for part in ec.parts: - if part.metadata and 'documentMimeType' in part.metadata: - document_mime_type = part.metadata['documentMimeType'] - break - - for part in ec.parts: - if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"): - # Skip empty container chunks (they're just metadata containers) - if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0): - logger.debug(f"Skipping empty container chunk: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}") - continue - - chunks_to_process.append({ - 'part': part, - 'chunk_index': chunk_index, - 'document_id': ec.id, - 'document_mime_type': document_mime_type - }) - chunk_index += 1 - - logger.info(f"Processing {len(chunks_to_process)} chunks with proper mapping") - - # Process chunks in parallel with proper mapping - async def process_single_chunk(chunk_info: Dict) -> ChunkResult: - part = chunk_info['part'] - chunk_index = chunk_info['chunk_index'] - document_id = chunk_info['document_id'] - document_mime_type = chunk_info.get('document_mime_type', part.mimeType) - - start_time = time.time() - - try: - # FIXED: Check MIME type first, then fallback to typeGroup - is_image = ( - (document_mime_type and document_mime_type.startswith('image/')) or - (part.mimeType and part.mimeType.startswith('image/')) or - (part.typeGroup == "image") - ) - - # Debug logging - self.services.utils.debugLogToFile(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}") - - if is_image: - # Use the same extraction prompt for image analysis (contains table JSON format) - self.services.utils.debugLogToFile(f"Processing image chunk {chunk_index}: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - - # Check if image data is available - if not part.data: - error_msg = f"No image data available for chunk {chunk_index}" - logger.warning(error_msg) - ai_result = f"Error: {error_msg}" - else: - try: - # Import here to avoid circular imports - from modules.services.serviceAi.subCoreAi import SubCoreAi - core_ai = SubCoreAi(self.services, self.aiObjects) - - ai_result = await core_ai.readImage( - prompt=prompt, - imageData=part.data, - mimeType=part.mimeType, - options=options - ) - - self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE") - # Save image extraction response to debug file - self.services.utils.writeDebugFile(ai_result or 'No response', f"extraction_image_chunk_{chunk_index}_response") - - # Check if result is empty or None - if not ai_result or not ai_result.strip(): - logger.warning(f"Image chunk {chunk_index} returned empty response from AI") - ai_result = "No content detected in image" - - except Exception as e: - logger.error(f"Error processing image chunk {chunk_index}: {str(e)}") - ai_result = f"Error analyzing image: {str(e)}" - - # If generating JSON, clean image analysis result - if generate_json: - try: - - # Clean the response - remove markdown code blocks if present - cleaned_result = ai_result.strip() - - # Remove various markdown patterns - if cleaned_result.startswith('```json'): - cleaned_result = re.sub(r'^```json\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - elif cleaned_result.startswith('```'): - cleaned_result = re.sub(r'^```\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - - # Remove any leading/trailing text that's not JSON - # Look for the first { and last } to extract JSON - first_brace = cleaned_result.find('{') - last_brace = cleaned_result.rfind('}') - - if first_brace != -1 and last_brace != -1 and last_brace > first_brace: - cleaned_result = cleaned_result[first_brace:last_brace + 1] - - # Additional cleaning for common AI response issues - cleaned_result = cleaned_result.strip() - - # Validate JSON - json.loads(cleaned_result) - ai_result = cleaned_result # Use cleaned version - self.services.utils.debugLogToFile(f"Image chunk {chunk_index} JSON validation successful", "AI_SERVICE") - - except json.JSONDecodeError as e: - logger.warning(f"Image chunk {chunk_index} returned invalid JSON: {str(e)}") - logger.warning(f"Raw response was: '{ai_result[:500]}...'") - - # Create fallback JSON with the actual response content (not the error message) - # Use the original AI response content, not the error message - fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected" - - self.services.utils.debugLogToFile(f"IMAGE FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE") - - ai_result = json.dumps({ - "metadata": {"title": f"Image Analysis - Chunk {chunk_index}"}, - "sections": [{ - "id": f"image_section_{chunk_index}", - "content_type": "paragraph", - "elements": [{"text": fallback_content}] - }] - }) - self.services.utils.debugLogToFile(f"Created fallback JSON for image chunk {chunk_index} with actual content", "AI_SERVICE") - elif part.typeGroup in ("container", "binary"): - # Handle ALL container and binary content generically - let AI process any document type - self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: typeGroup={part.typeGroup}, mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - - # Skip empty container chunks (they're just metadata containers) - if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0): - self.services.utils.debugLogToFile(f"DEBUG: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}") - # Skip processing this chunk - pass - elif part.mimeType and part.data and len(part.data.strip()) > 0: - # Process any document container as text content - request_options = options if options is not None else AiCallOptions() - request_options.operationType = OperationTypeEnum.DATA_EXTRACT - self.services.utils.debugLogToFile(f"EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}") - - # Log extraction prompt and context - self.services.utils.debugLogToFile(f"EXTRACTION PROMPT: {prompt}", "AI_SERVICE") - self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE") - - # Strengthen prompt to forbid fabrication for text/container extraction - augmented_prompt = ( - f"{prompt}\n\n" - "CRITICAL RULES (NO FABRICATION):\n" - "- Use ONLY content present in the provided CONTEXT.\n" - "- Do NOT create, infer, or guess values not explicitly in the context.\n" - "- If a value is missing, leave the cell empty or omit the row.\n" - ) - request = AiCallRequest( - prompt=augmented_prompt, - context=part.data, - options=request_options - ) - response = await self.aiObjects.call(request) - ai_result = response.content - - # Log extraction response - self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") - - # Save extraction prompt and response to debug - self.services.utils.writeDebugFile(augmented_prompt, f"extraction_chunk_{chunk_index}_prompt") - self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response") - - # If generating JSON, validate the response - if generate_json: - try: - - # Clean the response - remove markdown code blocks if present - cleaned_result = ai_result.strip() - - # Remove various markdown patterns - if cleaned_result.startswith('```json'): - cleaned_result = re.sub(r'^```json\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - elif cleaned_result.startswith('```'): - cleaned_result = re.sub(r'^```\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - - # Remove any leading/trailing text that's not JSON - # Look for the first { and last } to extract JSON - first_brace = cleaned_result.find('{') - last_brace = cleaned_result.rfind('}') - - if first_brace != -1 and last_brace != -1 and last_brace > first_brace: - cleaned_result = cleaned_result[first_brace:last_brace + 1] - - # Additional cleaning for common AI response issues - cleaned_result = cleaned_result.strip() - - # Validate JSON - json.loads(cleaned_result) - ai_result = cleaned_result # Use cleaned version - - except json.JSONDecodeError as e: - logger.warning(f"Container chunk {chunk_index} ({part.mimeType}) returned invalid JSON: {str(e)}") - logger.warning(f"Raw response was: '{ai_result[:500]}...'") - - # Create fallback JSON with the actual response content (not the error message) - # Use the original AI response content, not the error message - fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected" - - self.services.utils.debugLogToFile(f"FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE") - - ai_result = json.dumps({ - "metadata": {"title": f"Document Analysis - Chunk {chunk_index}"}, - "sections": [{ - "id": f"analysis_section_{chunk_index}", - "content_type": "paragraph", - "elements": [{"text": fallback_content}] - }] - }) - self.services.utils.debugLogToFile(f"Created fallback JSON for container chunk {chunk_index} with actual content", "AI_SERVICE") - else: - # Skip empty or invalid container/binary content - don't create a result - self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - # Return None to indicate this chunk should be completely skipped - return None - else: - # Ensure options is not None and set correct operation type for text - request_options = options if options is not None else AiCallOptions() - # FIXED: Set operation type to general for text processing - request_options.operationType = OperationTypeEnum.DATA_EXTRACT - self.services.utils.debugLogToFile(f"EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}") - - # Log extraction context length - self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE") - - # Debug: Log the actual prompt being sent to AI - logger.debug(f"AI PROMPT PREVIEW: {prompt[:300]}...") - logger.debug(f"AI CONTEXT PREVIEW: {part.data[:200] if part.data else 'None'}...") - - # Strengthen prompt to forbid fabrication for text extraction - augmented_prompt_text = ( - f"{prompt}\n\n" - "CRITICAL RULES (NO FABRICATION):\n" - "- Use ONLY content present in the provided CONTEXT.\n" - "- Do NOT create, infer, or guess values not explicitly in the context.\n" - "- If a value is missing, leave the cell empty or omit the row.\n" - ) - request = AiCallRequest( - prompt=augmented_prompt_text, - context=part.data, - options=request_options - ) - response = await self.aiObjects.call(request) - - # Debug: Log what AI actually returned - logger.debug(f"AI RESPONSE PREVIEW: {response.content[:300] if response.content else 'None'}...") - ai_result = response.content - - # Log extraction response length - self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") - - # Save extraction prompt and response to debug - self.services.utils.writeDebugFile(augmented_prompt_text, f"extraction_chunk_{chunk_index}_prompt") - self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response") - - # If generating JSON, validate the response - if generate_json: - try: - - # Clean the response - remove markdown code blocks and extra formatting - cleaned_result = ai_result.strip() - - # Remove any markdown code block markers (```json, ```, etc.) - cleaned_result = re.sub(r'^```(?:json)?\s*', '', cleaned_result, flags=re.MULTILINE) - cleaned_result = re.sub(r'\s*```\s*$', '', cleaned_result, flags=re.MULTILINE) - - # Remove any remaining ``` markers anywhere in the text - cleaned_result = re.sub(r'```', '', cleaned_result) - - # Try to extract JSON from the response if it's embedded in other text - json_match = re.search(r'\{.*\}', cleaned_result, re.DOTALL) - if json_match: - cleaned_result = json_match.group(0) - - # Validate JSON - json.loads(cleaned_result) - ai_result = cleaned_result # Use cleaned version - - except json.JSONDecodeError as e: - logger.warning(f"Chunk {chunk_index} returned invalid JSON: {str(e)}") - # Create fallback JSON - ai_result = json.dumps({ - "metadata": {"title": "Error Section"}, - "sections": [{ - "id": f"error_section_{chunk_index}", - "content_type": "paragraph", - "elements": [{"text": f"Error parsing JSON: {str(e)}"}] - }] - }) - - processing_time = time.time() - start_time - - logger.info(f"Chunk {chunk_index} processed: {len(ai_result)} chars in {processing_time:.2f}s") - - return ChunkResult( - originalChunk=part, - aiResult=ai_result, - chunkIndex=chunk_index, - documentId=document_id, - processingTime=processing_time, - metadata={ - "success": True, - "chunkSize": len(part.data) if part.data else 0, - "resultSize": len(ai_result), - "typeGroup": part.typeGroup - } - ) - - except Exception as e: - processing_time = time.time() - start_time - logger.warning(f"Error processing chunk {chunk_index}: {str(e)}") - - return ChunkResult( - originalChunk=part, - aiResult=f"[Error processing chunk: {str(e)}]", - chunkIndex=chunk_index, - documentId=document_id, - processingTime=processing_time, - metadata={ - "success": False, - "error": str(e), - "chunkSize": len(part.data) if part.data else 0, - "typeGroup": part.typeGroup - } - ) - - # Process chunks with concurrency control - max_concurrent = 5 # Default concurrency - if options and hasattr(options, 'maxConcurrentChunks'): - max_concurrent = options.maxConcurrentChunks - elif options and hasattr(options, 'maxParallelChunks'): - max_concurrent = options.maxParallelChunks - - logger.info(f"Processing {len(chunks_to_process)} chunks with max concurrency: {max_concurrent}") - self.services.utils.debugLogToFile(f"DEBUG: Chunks to process: {len(chunks_to_process)}", "AI_SERVICE") - for i, chunk_info in enumerate(chunks_to_process): - self.services.utils.debugLogToFile(f"DEBUG: Chunk {i}: typeGroup={chunk_info['part'].typeGroup}, mimeType={chunk_info['part'].mimeType}, data_length={len(chunk_info['part'].data) if chunk_info['part'].data else 0}", "AI_SERVICE") - - # Create semaphore for concurrency control - semaphore = asyncio.Semaphore(max_concurrent) - - async def process_with_semaphore(chunk_info): - async with semaphore: - return await process_single_chunk(chunk_info) - - # Process all chunks in parallel with concurrency control - tasks = [process_with_semaphore(chunk_info) for chunk_info in chunks_to_process] - self.services.utils.debugLogToFile(f"DEBUG: Created {len(tasks)} tasks for parallel processing", "AI_SERVICE") - chunk_results = await asyncio.gather(*tasks, return_exceptions=True) - self.services.utils.debugLogToFile(f"DEBUG: Got {len(chunk_results)} results from parallel processing", "AI_SERVICE") - - # Handle any exceptions in the gather itself - processed_results = [] - for i, result in enumerate(chunk_results): - if isinstance(result, Exception): - # Create error ChunkResult - chunk_info = chunks_to_process[i] - processed_results.append(ChunkResult( - originalChunk=chunk_info['part'], - aiResult=f"[Error in parallel processing: {str(result)}]", - chunkIndex=chunk_info['chunk_index'], - documentId=chunk_info['document_id'], - processingTime=0.0, - metadata={"success": False, "error": str(result)} - )) - elif result is not None: - # Only add non-None results (skip empty containers) - processed_results.append(result) - - logger.info(f"Completed processing {len(processed_results)} chunks") - return processed_results + logger.info(f"Completed processing {len(processedResults)} parts") + return processedResults def _mergePartResults( self, @@ -1221,253 +461,3 @@ CONTINUATION INSTRUCTIONS: logger.info(f"Converted {len(partResults)} parts to JSON format using existing sophisticated merging system") return merged_document - - def _mergeChunkResults( - self, - chunkResults: List[ChunkResult], - options: Optional[AiCallOptions] = None - ) -> str: - """Merge chunk results using existing sophisticated merging system.""" - if not chunkResults: - return "" - - # Convert ChunkResults back to ContentParts for existing merger system - from modules.datamodels.datamodelExtraction import ContentPart - content_parts = [] - for chunk_result in chunkResults: - # Create ContentPart from ChunkResult with proper typeGroup - content_part = ContentPart( - id=chunk_result.originalChunk.id, - parentId=chunk_result.originalChunk.parentId, - label=chunk_result.originalChunk.label, - typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup - mimeType=chunk_result.originalChunk.mimeType, - data=chunk_result.aiResult, # Use AI result as data - metadata={ - **chunk_result.originalChunk.metadata, - "aiResult": True, - "chunk": True, - "chunkIndex": chunk_result.chunkIndex, - "documentId": chunk_result.documentId, - "processingTime": chunk_result.processingTime, - "success": chunk_result.metadata.get("success", False) - } - ) - content_parts.append(content_part) - - # Use existing merging strategy from options - merge_strategy = MergeStrategy( - useIntelligentMerging=True, - groupBy="documentId", # Group by document - orderBy="chunkIndex", # Order by chunk index - mergeType="concatenate" - ) - - - # Apply existing merging logic using the sophisticated merging system - from modules.services.serviceExtraction.subPipeline import _applyMerging - merged_parts = _applyMerging(content_parts, merge_strategy) - - # Convert merged parts back to final string - final_content = "\n\n".join([part.data for part in merged_parts]) - - logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system") - return final_content.strip() - - def _mergeChunkResultsClean( - self, - chunkResults: List[ChunkResult], - options: Optional[AiCallOptions] = None - ) -> str: - """Merge chunk results in CLEAN mode using existing sophisticated merging system.""" - if not chunkResults: - return "" - - # Convert ChunkResults back to ContentParts for existing merger system - from modules.datamodels.datamodelExtraction import ContentPart - content_parts = [] - for chunk_result in chunkResults: - # Skip empty or error chunks in clean mode - if not chunk_result.metadata.get("success", False): - continue - if not chunk_result.aiResult or not chunk_result.aiResult.strip(): - continue - # Skip container/binary chunks in clean mode - if chunk_result.aiResult.startswith("[Skipped ") and "content:" in chunk_result.aiResult: - continue - - # Create ContentPart from ChunkResult with proper typeGroup - content_part = ContentPart( - id=chunk_result.originalChunk.id, - parentId=chunk_result.originalChunk.parentId, - label=chunk_result.originalChunk.label, - typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup - mimeType=chunk_result.originalChunk.mimeType, - data=chunk_result.aiResult, # Use AI result as data - metadata={ - **chunk_result.originalChunk.metadata, - "aiResult": True, - "chunk": True, - "chunkIndex": chunk_result.chunkIndex, - "documentId": chunk_result.documentId, - "processingTime": chunk_result.processingTime, - "success": chunk_result.metadata.get("success", False) - } - ) - content_parts.append(content_part) - - # Use existing merging strategy for clean mode - merge_strategy = MergeStrategy( - useIntelligentMerging=True, - groupBy="documentId", # Group by document - orderBy="chunkIndex", # Order by chunk index - mergeType="concatenate" - ) - - - # Apply existing merging logic using the sophisticated merging system - from modules.services.serviceExtraction.subPipeline import _applyMerging - merged_parts = _applyMerging(content_parts, merge_strategy) - - # Convert merged parts back to final string - final_content = "\n\n".join([part.data for part in merged_parts]) - - logger.info(f"Merged {len(content_parts)} chunks in clean mode using existing sophisticated merging system") - return final_content.strip() - - def _mergeChunkResultsJson( - self, - chunkResults: List[ChunkResult], - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """Merge chunk results in JSON mode using existing sophisticated merging system.""" - if not chunkResults: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - # Convert ChunkResults back to ContentParts for existing merger system - from modules.datamodels.datamodelExtraction import ContentPart - content_parts = [] - for chunk_result in chunkResults: - # Create ContentPart from ChunkResult with proper typeGroup - content_part = ContentPart( - id=chunk_result.originalChunk.id, - parentId=chunk_result.originalChunk.parentId, - label=chunk_result.originalChunk.label, - typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup - mimeType=chunk_result.originalChunk.mimeType, - data=chunk_result.aiResult, # Use AI result as data - metadata={ - **chunk_result.originalChunk.metadata, - "aiResult": True, - "chunk": True, - "chunkIndex": chunk_result.chunkIndex, - "documentId": chunk_result.documentId, - "processingTime": chunk_result.processingTime, - "success": chunk_result.metadata.get("success", False) - } - ) - content_parts.append(content_part) - - # Use existing merging strategy for JSON mode - merge_strategy = MergeStrategy( - useIntelligentMerging=True, - groupBy="documentId", # Group by document - orderBy="chunkIndex", # Order by chunk index - mergeType="concatenate" - ) - - - # Apply existing merging logic using the sophisticated merging system - from modules.services.serviceExtraction.subPipeline import _applyMerging - merged_parts = _applyMerging(content_parts, merge_strategy) - - # Convert merged parts to JSON format - all_sections = [] - document_titles = [] - - for part in merged_parts: - if part.metadata.get("success", False): - try: - # Parse JSON from AI result - chunk_json = json.loads(part.data) - - # Check if this is a multi-file response (has "documents" key) - if isinstance(chunk_json, dict) and "documents" in chunk_json: - # This is a multi-file response - merge all documents - logger.debug(f"Processing multi-file response from part {part.id} with {len(chunk_json['documents'])} documents") - - # Return multi-file response directly - return { - "metadata": chunk_json.get("metadata", {"title": "Merged Document"}), - "documents": chunk_json["documents"] - } - - # Extract sections from single-file response - elif isinstance(chunk_json, dict) and "sections" in chunk_json: - for section in chunk_json["sections"]: - # Add part context to section - section["metadata"] = section.get("metadata", {}) - section["metadata"]["source_part"] = part.id - section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown") - section["metadata"]["chunk_index"] = part.metadata.get("chunkIndex", 0) - all_sections.append(section) - - # Extract document title - if isinstance(chunk_json, dict) and "metadata" in chunk_json: - title = chunk_json["metadata"].get("title", "") - if title and title not in document_titles: - document_titles.append(title) - - except json.JSONDecodeError as e: - logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}") - # Create a fallback section for invalid JSON - fallback_section = { - "id": f"error_section_{part.id}", - "title": "Error Section", - "content_type": "paragraph", - "elements": [{ - "text": f"Error parsing part {part.id}: {str(e)}" - }], - "order": part.metadata.get("chunkIndex", 0), - "metadata": { - "source_document": part.metadata.get("documentId", "unknown"), - "part_id": part.id, - "error": str(e) - } - } - all_sections.append(fallback_section) - else: - # Handle error parts - error_section = { - "id": f"error_section_{part.id}", - "title": "Error Section", - "content_type": "paragraph", - "elements": [{ - "text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}" - }], - "order": part.metadata.get("chunkIndex", 0), - "metadata": { - "source_document": part.metadata.get("documentId", "unknown"), - "part_id": part.id, - "error": part.metadata.get('error', 'Unknown error') - } - } - all_sections.append(error_section) - - # Sort sections by order - all_sections.sort(key=lambda x: x.get("order", 0)) - - # Create merged document with sections - merged_document = { - "metadata": { - "title": document_titles[0] if document_titles else "Merged Document", - "extraction_method": "ai_json_extraction_with_merging", - "version": "2.0" - }, - "sections": all_sections, - "summary": f"Merged document using sophisticated merging system", - "tags": ["merged", "ai_generated", "sophisticated_merging"] - } - - logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system (JSON mode)") - return merged_document diff --git a/modules/services/serviceAi/subSharedAiUtils.py b/modules/services/serviceAi/subSharedAiUtils.py deleted file mode 100644 index 1dcf6c41..00000000 --- a/modules/services/serviceAi/subSharedAiUtils.py +++ /dev/null @@ -1,165 +0,0 @@ -""" -Shared utilities for AI services to eliminate code duplication. - -This module contains common functions used across multiple AI service modules -to maintain DRY principles and ensure consistency. -""" - -import re -import logging -from typing import Dict, Any, List, Optional, Union - -logger = logging.getLogger(__name__) - - -def buildPromptWithPlaceholders(prompt: str, placeholders: Optional[Dict[str, str]]) -> str: - """ - Build full prompt by replacing placeholders with their content. - Uses the new {{KEY:placeholder}} format. - - Args: - prompt: The base prompt template - placeholders: Dictionary of placeholder key-value pairs - - Returns: - Prompt with placeholders replaced - """ - if not placeholders: - return prompt - - full_prompt = prompt - for placeholder, content in placeholders.items(): - # Skip if content is None or empty - if content is None: - continue - # Replace {{KEY:placeholder}} - full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content)) - - return full_prompt - - -def sanitizePromptContent(content: str, contentType: str = "text") -> str: - """ - Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation. - - This is the single source of truth for all prompt sanitization across the system. - Replaces all scattered sanitization functions with a unified approach. - - Args: - content: The content to sanitize - contentType: Type of content ("text", "userinput", "json", "document") - - Returns: - Safely sanitized content ready for AI prompt insertion - """ - if not content: - return "" - - try: - # Convert to string if not already - content_str = str(content) - - # Remove null bytes and control characters (except newlines and tabs) - sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str) - - # Handle different content types with appropriate sanitization - if contentType == "userinput": - # Extra security for user-controlled content - # Escape curly braces to prevent placeholder injection - sanitized = sanitized.replace('{', '{{').replace('}', '}}') - # Escape quotes and wrap in single quotes - sanitized = sanitized.replace('"', '\\"').replace("'", "\\'") - return f"'{sanitized}'" - - elif contentType == "json": - # For JSON content, escape quotes and backslashes - sanitized = sanitized.replace('\\', '\\\\') - sanitized = sanitized.replace('"', '\\"') - sanitized = sanitized.replace('\n', '\\n') - sanitized = sanitized.replace('\r', '\\r') - sanitized = sanitized.replace('\t', '\\t') - - elif contentType == "document": - # For document content, escape special characters - sanitized = sanitized.replace('\\', '\\\\') - sanitized = sanitized.replace('"', '\\"') - sanitized = sanitized.replace("'", "\\'") - sanitized = sanitized.replace('\n', '\\n') - sanitized = sanitized.replace('\r', '\\r') - sanitized = sanitized.replace('\t', '\\t') - - else: # contentType == "text" or default - # Basic text sanitization - sanitized = sanitized.replace('\\', '\\\\') - sanitized = sanitized.replace('"', '\\"') - sanitized = sanitized.replace("'", "\\'") - sanitized = sanitized.replace('\n', '\\n') - sanitized = sanitized.replace('\r', '\\r') - sanitized = sanitized.replace('\t', '\\t') - - return sanitized - - except Exception as e: - logger.error(f"Error sanitizing prompt content: {str(e)}") - # Return a safe fallback - return "[ERROR: Content could not be safely sanitized]" - - -def extractTextFromContentParts(extracted_content) -> str: - """ - Extract text content from ExtractionService ContentPart objects. - - Args: - extracted_content: ContentExtracted object with parts - - Returns: - Concatenated text content from all text/table/structure parts - """ - if not extracted_content or not hasattr(extracted_content, 'parts'): - return "" - - text_parts = [] - for part in extracted_content.parts: - if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']: - if hasattr(part, 'data') and part.data: - text_parts.append(part.data) - - return "\n\n".join(text_parts) - - -def reduceText(text: str, reduction_factor: float) -> str: - """ - Reduce text size by the specified factor. - - Args: - text: Text to reduce - reduction_factor: Factor by which to reduce (0.0 to 1.0) - - Returns: - Reduced text with truncation indicator - """ - if reduction_factor >= 1.0: - return text - - target_length = int(len(text) * reduction_factor) - return text[:target_length] + "... [reduced]" - - -def determineCallType(documents: Optional[List], operation_type: str) -> str: - """ - Determine call type based on documents and operation type. - - Args: - documents: List of ChatDocument objects - operation_type: Type of operation being performed - - Returns: - Call type: "plan" or "text" - """ - has_documents = documents is not None and len(documents) > 0 - is_planning_operation = operation_type == "plan" - - if not has_documents and is_planning_operation: - return "plan" - else: - return "text" diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index 62931565..1e0c1d21 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -19,6 +19,16 @@ class ExtractionService: self.services = services self._extractorRegistry = ExtractorRegistry() self._chunkerRegistry = ChunkerRegistry() + # Ensure AI connectors are discovered so pricing models are available + try: + # If internal model is missing, trigger discovery and registration + if modelRegistry.getModel("internal-extractor") is None: + discovered = modelRegistry.discoverConnectors() + for connector in discovered: + modelRegistry.registerConnector(connector) + except Exception: + # Propagate actual errors during use; init should be fast and side-effect free otherwise + pass def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]: """ @@ -82,12 +92,12 @@ class ExtractionService: p.metadata["documentMimeType"] = documentData["mimeType"] # Log chunking information - chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)] - if chunked_parts: + chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)] + if chunkedParts: logger.debug(f"=== CHUNKING RESULTS ===") logger.debug(f"Total parts: {len(ec.parts)}") - logger.debug(f"Chunked parts: {len(chunked_parts)}") - for chunk in chunked_parts: + logger.debug(f"Chunked parts: {len(chunkedParts)}") + for chunk in chunkedParts: logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})") else: logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits") @@ -101,8 +111,11 @@ class ExtractionService: # Emit stats for extraction operation # Use internal extraction model for pricing - modelName = "internal_extraction" + modelName = "internal-extractor" model = modelRegistry.getModel(modelName) + # Hard fail if model is missing; caller must ensure connectors are registered + if model is None or model.calculatePriceUsd is None: + raise RuntimeError(f"Pricing model not available: {modelName}") priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived) # Create AiCallResponse with real calculation diff --git a/modules/services/serviceExtraction/subMerger.py b/modules/services/serviceExtraction/subMerger.py index aa9bf1f5..9e8120d0 100644 --- a/modules/services/serviceExtraction/subMerger.py +++ b/modules/services/serviceExtraction/subMerger.py @@ -20,13 +20,13 @@ class IntelligentTokenAwareMerger: 4. Minimize total number of AI calls """ - def __init__(self, model_capabilities: Dict[str, Any]): - self.max_tokens = model_capabilities.get("maxTokens", 4000) - self.safety_margin = model_capabilities.get("safetyMargin", 0.1) - self.effective_max_tokens = int(self.max_tokens * (1 - self.safety_margin)) - self.chars_per_token = model_capabilities.get("charsPerToken", 4) # Rough estimation + def __init__(self, modelCapabilities: Dict[str, Any]): + self.maxTokens = modelCapabilities.get("maxTokens", 4000) + self.safetyMargin = modelCapabilities.get("safetyMargin", 0.1) + self.effectiveMaxTokens = int(self.maxTokens * (1 - self.safetyMargin)) + self.charsPerToken = modelCapabilities.get("charsPerToken", 4) # Rough estimation - def merge_chunks_intelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]: + def mergeChunksIntelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]: """ Merge chunks intelligently based on token limits. @@ -40,125 +40,125 @@ class IntelligentTokenAwareMerger: if not chunks: return chunks - logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, max_tokens={self.effective_max_tokens}") + logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, maxTokens={self.effectiveMaxTokens}") # Calculate tokens for prompt - prompt_tokens = self._estimate_tokens(prompt) - available_tokens = self.effective_max_tokens - prompt_tokens + promptTokens = self._estimateTokens(prompt) + availableTokens = self.effectiveMaxTokens - promptTokens - logger.info(f"📊 Prompt tokens: {prompt_tokens}, Available for content: {available_tokens}") + logger.info(f"📊 Prompt tokens: {promptTokens}, Available for content: {availableTokens}") # Group chunks by document and type for semantic coherence - grouped_chunks = self._group_chunks_by_document_and_type(chunks) + groupedChunks = self._groupChunksByDocumentAndType(chunks) - merged_parts = [] + mergedParts = [] - for group_key, group_chunks in grouped_chunks.items(): - logger.info(f"📁 Processing group: {group_key} ({len(group_chunks)} chunks)") + for groupKey, groupChunks in groupedChunks.items(): + logger.info(f"📁 Processing group: {groupKey} ({len(groupChunks)} chunks)") # Merge chunks within this group optimally - group_merged = self._merge_group_optimally(group_chunks, available_tokens) - merged_parts.extend(group_merged) + groupMerged = self._mergeGroupOptimally(groupChunks, availableTokens) + mergedParts.extend(groupMerged) - logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(merged_parts)} parts") - return merged_parts + logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(mergedParts)} parts") + return mergedParts - def _group_chunks_by_document_and_type(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]: + def _groupChunksByDocumentAndType(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]: """Group chunks by document and type for semantic coherence.""" groups = {} for chunk in chunks: # Create group key: document_id + type_group - doc_id = chunk.metadata.get("documentId", "unknown") - type_group = chunk.typeGroup - group_key = f"{doc_id}_{type_group}" - - if group_key not in groups: - groups[group_key] = [] - groups[group_key].append(chunk) + docId = chunk.metadata.get("documentId", "unknown") + typeGroup = chunk.typeGroup + groupKey = f"{docId}_{typeGroup}" + if groupKey not in groups: + groups[groupKey] = [] + groups[groupKey].append(chunk) + return groups - def _merge_group_optimally(self, chunks: List[ContentPart], available_tokens: int) -> List[ContentPart]: + def _mergeGroupOptimally(self, chunks: List[ContentPart], availableTokens: int) -> List[ContentPart]: """Merge chunks within a group optimally to minimize AI calls.""" if not chunks: return [] # Sort chunks by size (smallest first for better packing) - sorted_chunks = sorted(chunks, key=lambda c: self._estimate_tokens(c.data)) + sortedChunks = sorted(chunks, key=lambda c: self._estimateTokens(c.data)) - merged_parts = [] - current_group = [] - current_tokens = 0 + mergedParts = [] + currentGroup = [] + currentTokens = 0 - for chunk in sorted_chunks: - chunk_tokens = self._estimate_tokens(chunk.data) + for chunk in sortedChunks: + chunkTokens = self._estimateTokens(chunk.data) # Special case: If single chunk is already at max size, process it alone - if chunk_tokens >= available_tokens * 0.9: # 90% of available tokens + if chunkTokens >= availableTokens * 0.9: # 90% of available tokens # Finalize current group if it exists - if current_group: - merged_part = self._create_merged_part(current_group, current_tokens) - merged_parts.append(merged_part) - current_group = [] - current_tokens = 0 + if currentGroup: + mergedPart = self._createMergedPart(currentGroup, currentTokens) + mergedParts.append(mergedPart) + currentGroup = [] + currentTokens = 0 # Process large chunk individually - merged_parts.append(chunk) - logger.debug(f"🔍 Large chunk processed individually: {chunk_tokens} tokens") + mergedParts.append(chunk) + logger.debug(f"🔍 Large chunk processed individually: {chunkTokens} tokens") continue # If adding this chunk would exceed limit, finalize current group - if current_tokens + chunk_tokens > available_tokens and current_group: - merged_part = self._create_merged_part(current_group, current_tokens) - merged_parts.append(merged_part) - current_group = [chunk] - current_tokens = chunk_tokens + if currentTokens + chunkTokens > availableTokens and currentGroup: + mergedPart = self._createMergedPart(currentGroup, currentTokens) + mergedParts.append(mergedPart) + currentGroup = [chunk] + currentTokens = chunkTokens else: - current_group.append(chunk) - current_tokens += chunk_tokens + currentGroup.append(chunk) + currentTokens += chunkTokens # Finalize remaining group - if current_group: - merged_part = self._create_merged_part(current_group, current_tokens) - merged_parts.append(merged_part) + if currentGroup: + mergedPart = self._createMergedPart(currentGroup, currentTokens) + mergedParts.append(mergedPart) - logger.info(f"📦 Group merged: {len(chunks)} → {len(merged_parts)} parts") - return merged_parts + logger.info(f"📦 Group merged: {len(chunks)} → {len(mergedParts)} parts") + return mergedParts - def _create_merged_part(self, chunks: List[ContentPart], total_tokens: int) -> ContentPart: + def _createMergedPart(self, chunks: List[ContentPart], totalTokens: int) -> ContentPart: """Create a merged ContentPart from multiple chunks.""" if len(chunks) == 1: return chunks[0] # No need to merge single chunk # Combine data with semantic separators - combined_data = self._combine_chunk_data(chunks) + combinedData = self._combineChunkData(chunks) # Use metadata from first chunk as base - base_chunk = chunks[0] - merged_metadata = base_chunk.metadata.copy() - merged_metadata.update({ + baseChunk = chunks[0] + mergedMetadata = baseChunk.metadata.copy() + mergedMetadata.update({ "merged": True, "originalChunkCount": len(chunks), - "totalTokens": total_tokens, + "totalTokens": totalTokens, "originalChunkIds": [c.id for c in chunks], - "size": len(combined_data.encode('utf-8')) + "size": len(combinedData.encode('utf-8')) }) - merged_part = ContentPart( + mergedPart = ContentPart( id=makeId(), - parentId=base_chunk.parentId, + parentId=baseChunk.parentId, label=f"merged_{len(chunks)}_chunks", - typeGroup=base_chunk.typeGroup, - mimeType=base_chunk.mimeType, - data=combined_data, - metadata=merged_metadata + typeGroup=baseChunk.typeGroup, + mimeType=baseChunk.mimeType, + data=combinedData, + metadata=mergedMetadata ) - logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {total_tokens} tokens") - return merged_part + logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {totalTokens} tokens") + return mergedPart - def _combine_chunk_data(self, chunks: List[ContentPart]) -> str: + def _combineChunkData(self, chunks: List[ContentPart]) -> str: """Combine chunk data with appropriate separators.""" if not chunks: return "" @@ -173,37 +173,37 @@ class IntelligentTokenAwareMerger: return separator.join([chunk.data for chunk in chunks]) - def _estimate_tokens(self, text: str) -> int: + def _estimateTokens(self, text: str) -> int: """Estimate token count for text.""" if not text: return 0 - return len(text) // self.chars_per_token + return len(text) // self.charsPerToken - def calculate_optimization_stats(self, original_chunks: List[ContentPart], merged_parts: List[ContentPart]) -> Dict[str, Any]: + def calculateOptimizationStats(self, originalChunks: List[ContentPart], mergedParts: List[ContentPart]) -> Dict[str, Any]: """Calculate optimization statistics with detailed analysis.""" - original_calls = len(original_chunks) - optimized_calls = len(merged_parts) - reduction_percent = ((original_calls - optimized_calls) / original_calls * 100) if original_calls > 0 else 0 + originalCalls = len(originalChunks) + optimizedCalls = len(mergedParts) + reductionPercent = ((originalCalls - optimizedCalls) / originalCalls * 100) if originalCalls > 0 else 0 # Analyze chunk sizes - large_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) >= self.effective_max_tokens * 0.9] - small_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) < self.effective_max_tokens * 0.9] + largeChunks = [c for c in originalChunks if self._estimateTokens(c.data) >= self.effectiveMaxTokens * 0.9] + smallChunks = [c for c in originalChunks if self._estimateTokens(c.data) < self.effectiveMaxTokens * 0.9] # Calculate theoretical maximum optimization (if all small chunks could be merged) - theoretical_min_calls = len(large_chunks) + max(1, len(small_chunks) // 3) # Assume 3 small chunks per call - theoretical_reduction = ((original_calls - theoretical_min_calls) / original_calls * 100) if original_calls > 0 else 0 + theoreticalMinCalls = len(largeChunks) + max(1, len(smallChunks) // 3) # Assume 3 small chunks per call + theoreticalReduction = ((originalCalls - theoreticalMinCalls) / originalCalls * 100) if originalCalls > 0 else 0 return { - "original_ai_calls": original_calls, - "optimized_ai_calls": optimized_calls, - "reduction_percent": round(reduction_percent, 1), - "cost_savings": f"{reduction_percent:.1f}%", - "efficiency_gain": f"{original_calls / optimized_calls:.1f}x" if optimized_calls > 0 else "∞", + "original_ai_calls": originalCalls, + "optimized_ai_calls": optimizedCalls, + "reduction_percent": round(reductionPercent, 1), + "cost_savings": f"{reductionPercent:.1f}%", + "efficiency_gain": f"{originalCalls / optimizedCalls:.1f}x" if optimizedCalls > 0 else "∞", "analysis": { - "large_chunks": len(large_chunks), - "small_chunks": len(small_chunks), - "theoretical_min_calls": theoretical_min_calls, - "theoretical_reduction": round(theoretical_reduction, 1), - "optimization_potential": "high" if reduction_percent > 50 else "moderate" if reduction_percent > 20 else "low" + "large_chunks": len(largeChunks), + "small_chunks": len(smallChunks), + "theoretical_min_calls": theoreticalMinCalls, + "theoretical_reduction": round(theoreticalReduction, 1), + "optimization_potential": "high" if reductionPercent > 50 else "moderate" if reductionPercent > 20 else "low" } } diff --git a/modules/services/serviceExtraction/subPipeline.py b/modules/services/serviceExtraction/subPipeline.py index e935f3c3..f36afe8e 100644 --- a/modules/services/serviceExtraction/subPipeline.py +++ b/modules/services/serviceExtraction/subPipeline.py @@ -96,10 +96,10 @@ def _applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[Con subMerger = IntelligentTokenAwareMerger(model_capabilities) # Use intelligent merging for all parts - merged = subMerger.merge_chunks_intelligently(parts, strategy.prompt or "") + merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "") # Calculate and log optimization stats - stats = subMerger.calculate_optimization_stats(parts, merged) + stats = subMerger.calculateOptimizationStats(parts, merged) logger.info(f"🧠 Intelligent merging stats: {stats}") logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)") diff --git a/modules/services/serviceExtraction/subPromptBuilderExtraction.py b/modules/services/serviceExtraction/subPromptBuilderExtraction.py index 5b887482..a796ea3b 100644 --- a/modules/services/serviceExtraction/subPromptBuilderExtraction.py +++ b/modules/services/serviceExtraction/subPromptBuilderExtraction.py @@ -101,7 +101,7 @@ async def buildExtractionPrompt( # Build base prompt adaptive_prompt = f""" -{services.ai.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt} +{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt} You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output. diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index 41bce06d..9dddb49d 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -37,13 +37,13 @@ class GenerationService: return [] # Process each document from the AI action result - processed_documents = [] + processedDocuments = [] for doc in documents: - processed_doc = self.processSingleDocument(doc, action) - if processed_doc: - processed_documents.append(processed_doc) + processedDoc = self.processSingleDocument(doc, action) + if processedDoc: + processedDocuments.append(processedDoc) - return processed_documents + return processedDocuments except Exception as e: logger.error(f"Error processing action result documents: {str(e)}") return [] @@ -77,20 +77,20 @@ class GenerationService: try: processed_docs = self.processActionResultDocuments(action_result, action, workflow) - created_documents = [] + createdDocuments = [] for i, doc_data in enumerate(processed_docs): try: - document_name = doc_data['fileName'] - document_data = doc_data['content'] - mime_type = doc_data['mimeType'] + documentName = doc_data['fileName'] + documentData = doc_data['content'] + mimeType = doc_data['mimeType'] # Convert document data to string content - content = convertDocumentDataToString(document_data, getFileExtension(document_name)) + content = convertDocumentDataToString(documentData, getFileExtension(documentName)) # Skip empty or minimal content - minimal_content_patterns = ['{}', '[]', 'null', '""', "''"] - if not content or content.strip() == "" or content.strip() in minimal_content_patterns: - logger.warning(f"Empty or minimal content for document {document_name}, skipping") + minimalContentPatterns = ['{}', '[]', 'null', '""', "''"] + if not content or content.strip() == "" or content.strip() in minimalContentPatterns: + logger.warning(f"Empty or minimal content for document {documentName}, skipping") continue # Normalize file extension based on mime type if missing or incorrect @@ -105,35 +105,35 @@ class GenerationService: "text/plain": ".txt", "application/json": ".json", } - expected_ext = mime_to_ext.get(mime_type) - if expected_ext: - if not document_name.lower().endswith(expected_ext): + expectedExt = mime_to_ext.get(mimeType) + if expectedExt: + if not documentName.lower().endswith(expectedExt): # Append/replace extension to match mime type - if "." in document_name: - document_name = document_name.rsplit(".", 1)[0] + expected_ext + if "." in documentName: + documentName = documentName.rsplit(".", 1)[0] + expectedExt else: - document_name = document_name + expected_ext + documentName = documentName + expectedExt except Exception: pass # Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text base64encoded = False try: - binary_mime_types = { + binaryMimeTypes = { "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/pdf", } - if isinstance(document_data, str) and mime_type in binary_mime_types: + if isinstance(documentData, str) and mimeType in binaryMimeTypes: base64encoded = True except Exception: base64encoded = False # Create document with file in one step using interfaces directly document = self._createDocument( - fileName=document_name, - mimeType=mime_type, + fileName=documentName, + mimeType=mimeType, content=content, base64encoded=base64encoded, messageId=message_id @@ -141,14 +141,14 @@ class GenerationService: if document: # Set workflow context on the document if possible self._setDocumentWorkflowContext(document, action, workflow) - created_documents.append(document) + createdDocuments.append(document) else: - logger.error(f"Failed to create ChatDocument object for {document_name}") + logger.error(f"Failed to create ChatDocument object for {documentName}") except Exception as e: logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}") continue - return created_documents + return createdDocuments except Exception as e: logger.error(f"Error creating documents from action result: {str(e)}") return [] @@ -157,28 +157,28 @@ class GenerationService: """Set workflow context on a document for proper routing and labeling""" try: # Get current workflow context directly from workflow object - workflow_context = self._getWorkflowContext(workflow) - workflow_stats = self._getWorkflowStats(workflow) + workflowContext = self._getWorkflowContext(workflow) + workflowStats = self._getWorkflowStats(workflow) - current_round = workflow_context.get('currentRound', 0) - current_task = workflow_context.get('currentTask', 0) - current_action = workflow_context.get('currentAction', 0) + currentRound = workflowContext.get('currentRound', 0) + currentTask = workflowContext.get('currentTask', 0) + currentAction = workflowContext.get('currentAction', 0) # Try to set workflow context attributes if they exist if hasattr(document, 'roundNumber'): - document.roundNumber = current_round + document.roundNumber = currentRound if hasattr(document, 'taskNumber'): - document.taskNumber = current_task + document.taskNumber = currentTask if hasattr(document, 'actionNumber'): - document.actionNumber = current_action + document.actionNumber = currentAction if hasattr(document, 'actionId'): document.actionId = action.id if hasattr(action, 'id') else None # Set additional workflow metadata if available if hasattr(document, 'workflowId'): - document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None) + document.workflowId = workflowStats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None) if hasattr(document, 'workflowStatus'): - document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown') + document.workflowStatus = workflowStats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown') except Exception as e: @@ -355,17 +355,17 @@ class GenerationService: def _getFormatRenderer(self, output_format: str): """Get the appropriate renderer for the specified format using auto-discovery.""" try: - from .renderers.registry import get_renderer - renderer = get_renderer(output_format, services=self.services) + from .renderers.registry import getRenderer + renderer = getRenderer(output_format, services=self.services) if renderer: return renderer # Fallback to text renderer if no specific renderer found logger.warning(f"No renderer found for format {output_format}, falling back to text") - fallback_renderer = get_renderer('text', services=self.services) - if fallback_renderer: - return fallback_renderer + fallbackRenderer = getRenderer('text', services=self.services) + if fallbackRenderer: + return fallbackRenderer logger.error("Even text renderer fallback failed") return None diff --git a/modules/services/serviceGeneration/renderers/registry.py b/modules/services/serviceGeneration/renderers/registry.py index bb890a82..5065424e 100644 --- a/modules/services/serviceGeneration/renderers/registry.py +++ b/modules/services/serviceGeneration/renderers/registry.py @@ -17,7 +17,7 @@ class RendererRegistry: self._format_mappings: Dict[str, str] = {} self._discovered = False - def discover_renderers(self) -> None: + def discoverRenderers(self) -> None: """Automatically discover and register all renderers by scanning files.""" if self._discovered: return @@ -28,38 +28,38 @@ class RendererRegistry: from pathlib import Path # Get the directory containing this registry file - current_dir = Path(__file__).parent - renderers_dir = current_dir + currentDir = Path(__file__).parent + renderersDir = currentDir # Get the package name dynamically - package_name = __name__.rsplit('.', 1)[0] + packageName = __name__.rsplit('.', 1)[0] # Scan all Python files in the renderers directory - for file_path in renderers_dir.glob("*.py"): - if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']: + for filePath in renderersDir.glob("*.py"): + if filePath.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']: continue # Extract module name from filename - module_name = file_path.stem + moduleName = filePath.stem try: # Import the module dynamically - full_module_name = f"{package_name}.{module_name}" - module = importlib.import_module(full_module_name) + fullModuleName = f"{packageName}.{moduleName}" + module = importlib.import_module(fullModuleName) # Look for renderer classes in the module - for attr_name in dir(module): - attr = getattr(module, attr_name) + for attrName in dir(module): + attr = getattr(module, attrName) if (isinstance(attr, type) and issubclass(attr, BaseRenderer) and attr != BaseRenderer and - hasattr(attr, 'get_supported_formats')): + hasattr(attr, 'getSupportedFormats')): # Register the renderer - self._register_renderer_class(attr) + self._registerRendererClass(attr) except Exception as e: - logger.warning(f"Could not load renderer from {module_name}: {str(e)}") + logger.warning(f"Could not load renderer from {moduleName}: {str(e)}") continue self._discovered = True @@ -68,72 +68,72 @@ class RendererRegistry: logger.error(f"Error during renderer discovery: {str(e)}") self._discovered = True # Mark as discovered to avoid repeated attempts - def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None: + def _registerRendererClass(self, rendererClass: Type[BaseRenderer]) -> None: """Register a renderer class with its supported formats.""" try: # Get supported formats from the renderer class - supported_formats = renderer_class.get_supported_formats() + supportedFormats = rendererClass.getSupportedFormats() - for format_name in supported_formats: + for formatName in supportedFormats: # Register primary format - self._renderers[format_name.lower()] = renderer_class + self._renderers[formatName.lower()] = rendererClass # Register aliases if any - if hasattr(renderer_class, 'get_format_aliases'): - aliases = renderer_class.get_format_aliases() + if hasattr(rendererClass, 'getFormatAliases'): + aliases = rendererClass.getFormatAliases() for alias in aliases: - self._format_mappings[alias.lower()] = format_name.lower() + self._format_mappings[alias.lower()] = formatName.lower() - logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}") + logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}") except Exception as e: - logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}") + logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}") - def get_renderer(self, output_format: str, services=None) -> Optional[BaseRenderer]: + def getRenderer(self, outputFormat: str, services=None) -> Optional[BaseRenderer]: """Get a renderer instance for the specified format.""" if not self._discovered: - self.discover_renderers() + self.discoverRenderers() # Normalize format name - format_name = output_format.lower().strip() + formatName = outputFormat.lower().strip() # Check for aliases first - if format_name in self._format_mappings: - format_name = self._format_mappings[format_name] + if formatName in self._format_mappings: + formatName = self._format_mappings[formatName] # Get renderer class - renderer_class = self._renderers.get(format_name) + rendererClass = self._renderers.get(formatName) - if renderer_class: + if rendererClass: try: - return renderer_class(services=services) + return rendererClass(services=services) except Exception as e: - logger.error(f"Error creating renderer instance for {format_name}: {str(e)}") + logger.error(f"Error creating renderer instance for {formatName}: {str(e)}") return None - logger.warning(f"No renderer found for format: {output_format}") + logger.warning(f"No renderer found for format: {outputFormat}") return None - def get_supported_formats(self) -> List[str]: + def getSupportedFormats(self) -> List[str]: """Get list of all supported formats.""" if not self._discovered: - self.discover_renderers() + self.discoverRenderers() formats = list(self._renderers.keys()) formats.extend(self._format_mappings.keys()) return sorted(set(formats)) - def get_renderer_info(self) -> Dict[str, Dict[str, str]]: + def getRendererInfo(self) -> Dict[str, Dict[str, str]]: """Get information about all registered renderers.""" if not self._discovered: - self.discover_renderers() + self.discoverRenderers() info = {} - for format_name, renderer_class in self._renderers.items(): - info[format_name] = { - 'class_name': renderer_class.__name__, - 'module': renderer_class.__module__, - 'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description' + for formatName, rendererClass in self._renderers.items(): + info[formatName] = { + 'class_name': rendererClass.__name__, + 'module': rendererClass.__module__, + 'description': getattr(rendererClass, '__doc__', 'No description').strip().split('\n')[0] if rendererClass.__doc__ else 'No description' } return info @@ -141,14 +141,14 @@ class RendererRegistry: # Global registry instance _registry = RendererRegistry() -def get_renderer(output_format: str, services=None) -> Optional[BaseRenderer]: +def getRenderer(outputFormat: str, services=None) -> Optional[BaseRenderer]: """Get a renderer instance for the specified format.""" - return _registry.get_renderer(output_format, services) + return _registry.getRenderer(outputFormat, services) -def get_supported_formats() -> List[str]: +def getSupportedFormats() -> List[str]: """Get list of all supported formats.""" - return _registry.get_supported_formats() + return _registry.getSupportedFormats() -def get_renderer_info() -> Dict[str, Dict[str, str]]: +def getRendererInfo() -> Dict[str, Dict[str, str]]: """Get information about all registered renderers.""" - return _registry.get_renderer_info() + return _registry.getRendererInfo() diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index 5444525a..566c7765 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -4,6 +4,7 @@ Base renderer class for all format renderers. from abc import ABC, abstractmethod from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelJson import supportedSectionTypes import json import logging import re @@ -23,7 +24,7 @@ class BaseRenderer(ABC): self.services = services # Add services attribute @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """ Return list of supported format names for this renderer. Override this method in subclasses to specify supported formats. @@ -31,7 +32,7 @@ class BaseRenderer(ABC): return [] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """ Return list of format aliases for this renderer. Override this method in subclasses to specify format aliases. @@ -39,7 +40,7 @@ class BaseRenderer(ABC): return [] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """ Return priority for this renderer (higher number = higher priority). Used when multiple renderers support the same format. @@ -47,43 +48,43 @@ class BaseRenderer(ABC): return 0 @abstractmethod - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """ Render extracted JSON content to the target format. Args: - extracted_content: Structured JSON content with sections and metadata + extractedContent: Structured JSON content with sections and metadata title: Report title - user_prompt: Original user prompt for context - ai_service: AI service instance for additional processing + userPrompt: Original user prompt for context + aiService: AI service instance for additional processing Returns: - tuple: (rendered_content, mime_type) + tuple: (renderedContent, mimeType) """ pass - def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]: + def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]: """Extract sections from report data.""" - return report_data.get('sections', []) + return reportData.get('sections', []) - def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]: + def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]: """Extract metadata from report data.""" - return report_data.get('metadata', {}) + return reportData.get('metadata', {}) - def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str: + def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str: """Get title from report data or use fallback.""" - metadata = report_data.get('metadata', {}) - return metadata.get('title', fallback_title) + metadata = reportData.get('metadata', {}) + return metadata.get('title', fallbackTitle) - def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool: + def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool: """Validate that JSON content has the expected structure.""" - if not isinstance(json_content, dict): + if not isinstance(jsonContent, dict): return False - if "sections" not in json_content: + if "sections" not in jsonContent: return False - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) if not isinstance(sections, list): return False @@ -96,14 +97,14 @@ class BaseRenderer(ABC): return True - def _get_section_type(self, section: Dict[str, Any]) -> str: + def _getSectionType(self, section: Dict[str, Any]) -> str: """Get the type of a section; default to 'paragraph' for non-dict inputs.""" if isinstance(section, dict): return section.get("content_type", "paragraph") # If section is a list or any other type, treat as paragraph elements return "paragraph" - def _get_section_data(self, section: Dict[str, Any]) -> List[Dict[str, Any]]: + def _getSectionData(self, section: Dict[str, Any]) -> List[Dict[str, Any]]: """Get the elements of a section; if a list is provided directly, return it.""" if isinstance(section, dict): return section.get("elements", []) @@ -111,21 +112,30 @@ class BaseRenderer(ABC): return section return [] - def _get_section_id(self, section: Dict[str, Any]) -> str: + def _getSectionId(self, section: Dict[str, Any]) -> str: """Get the ID of a section (if available).""" if isinstance(section, dict): return section.get("id", "unknown") return "unknown" - def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]: + def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]: """Extract table headers and rows from section data.""" - headers = section_data.get("headers", []) - rows = section_data.get("rows", []) + # Normalize when elements array was passed in + if isinstance(sectionData, list) and sectionData: + candidate = sectionData[0] + sectionData = candidate if isinstance(candidate, dict) else {} + headers = sectionData.get("headers", []) + rows = sectionData.get("rows", []) return headers, rows - def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]: + def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]: """Extract bullet list items from section data.""" - items = section_data.get("items", []) + # Normalize when elements array or raw list was passed in + if isinstance(sectionData, list): + # Already a list of items (strings or dicts) + items = sectionData + else: + items = sectionData.get("items", []) result = [] for item in items: if isinstance(item, str): @@ -134,29 +144,47 @@ class BaseRenderer(ABC): result.append(item["text"]) return result - def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]: + def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]: """Extract heading level and text from section data.""" - level = section_data.get("level", 1) - text = section_data.get("text", "") + # Normalize when elements array was passed in + if isinstance(sectionData, list) and sectionData: + sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {} + level = sectionData.get("level", 1) + text = sectionData.get("text", "") return level, text - def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str: + def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str: """Extract paragraph text from section data.""" - return section_data.get("text", "") + if isinstance(sectionData, list): + # Join multiple paragraph elements if provided as a list + texts = [] + for el in sectionData: + if isinstance(el, dict) and "text" in el: + texts.append(el["text"]) + elif isinstance(el, str): + texts.append(el) + return "\n".join(texts) + return sectionData.get("text", "") - def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]: + def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: """Extract code and language from section data.""" - code = section_data.get("code", "") - language = section_data.get("language", "") + # Normalize when elements array was passed in + if isinstance(sectionData, list) and sectionData: + sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {} + code = sectionData.get("code", "") + language = sectionData.get("language", "") return code, language - def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]: + def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: """Extract base64 data and alt text from section data.""" - base64_data = section_data.get("base64Data", "") - alt_text = section_data.get("altText", "Image") - return base64_data, alt_text + # Normalize when elements array was passed in + if isinstance(sectionData, list) and sectionData: + sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {} + base64Data = sectionData.get("base64Data", "") + altText = sectionData.get("altText", "Image") + return base64Data, altText - def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any: + def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any: """ Render an image section. This is a base implementation that should be overridden by format-specific renderers. @@ -168,47 +196,47 @@ class BaseRenderer(ABC): Returns: Format-specific image representation """ - section_data = self._get_section_data(section) - base64_data, alt_text = self._extract_image_data(section_data) + sectionData = self._getSectionData(section) + base64Data, altText = self._extractImageData(sectionData) # Base implementation returns a simple dict # Format-specific renderers should override this method return { "content_type": "image", - "base64Data": base64_data, - "altText": alt_text, - "width": section_data.get("width", None), - "height": section_data.get("height", None), - "caption": section_data.get("caption", "") + "base64Data": base64Data, + "altText": altText, + "width": sectionData.get("width", None), + "height": sectionData.get("height", None), + "caption": sectionData.get("caption", "") } - def _validate_image_data(self, base64_data: str, alt_text: str) -> bool: + def _validateImageData(self, base64Data: str, altText: str) -> bool: """Validate image data.""" - if not base64_data: + if not base64Data: self.logger.warning("Image section has no base64 data") return False - if not alt_text: + if not altText: self.logger.warning("Image section has no alt text") return False # Basic base64 validation try: - base64.b64decode(base64_data, validate=True) + base64.b64decode(base64Data, validate=True) return True except Exception as e: self.logger.warning(f"Invalid base64 image data: {str(e)}") return False - def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]: + def _getImageDimensions(self, base64Data: str) -> Tuple[int, int]: """ Get image dimensions from base64 data. This is a helper method that format-specific renderers can use. """ try: # Decode base64 data - image_data = base64.b64decode(base64_data) - image = Image.open(io.BytesIO(image_data)) + imageData = base64.b64decode(base64Data) + image = Image.open(io.BytesIO(imageData)) return image.size # Returns (width, height) @@ -216,89 +244,89 @@ class BaseRenderer(ABC): self.logger.warning(f"Could not determine image dimensions: {str(e)}") return (0, 0) - def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str: + def _resizeImageIfNeeded(self, base64Data: str, maxWidth: int = 800, maxHeight: int = 600) -> str: """ Resize image if it exceeds maximum dimensions. Returns the resized image as base64 string. """ try: # Decode base64 data - image_data = base64.b64decode(base64_data) - image = Image.open(io.BytesIO(image_data)) + imageData = base64.b64decode(base64Data) + image = Image.open(io.BytesIO(imageData)) # Check if resizing is needed width, height = image.size - if width <= max_width and height <= max_height: - return base64_data # No resizing needed + if width <= maxWidth and height <= maxHeight: + return base64Data # No resizing needed # Calculate new dimensions maintaining aspect ratio - ratio = min(max_width / width, max_height / height) - new_width = int(width * ratio) - new_height = int(height * ratio) + ratio = min(maxWidth / width, maxHeight / height) + newWidth = int(width * ratio) + newHeight = int(height * ratio) # Resize image - resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) + resizedImage = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS) # Convert back to base64 buffer = io.BytesIO() - resized_image.save(buffer, format=image.format or 'PNG') - resized_data = buffer.getvalue() + resizedImage.save(buffer, format=image.format or 'PNG') + resizedData = buffer.getvalue() - return base64.b64encode(resized_data).decode('utf-8') + return base64.b64encode(resizedData).decode('utf-8') except Exception as e: self.logger.warning(f"Could not resize image: {str(e)}") - return base64_data # Return original if resize fails + return base64Data # Return original if resize fails - def _get_supported_section_types(self) -> List[str]: - """Return list of supported section types.""" - return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"] + def _getSupportedSectionTypes(self) -> List[str]: + """Return list of supported section types (from unified schema).""" + return supportedSectionTypes - def _is_valid_section_type(self, section_type: str) -> bool: + def _isValidSectionType(self, sectionType: str) -> bool: """Check if a section type is valid.""" - return section_type in self._get_supported_section_types() + return sectionType in self._getSupportedSectionTypes() - def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]: + def _processSectionByType(self, section: Dict[str, Any]) -> Dict[str, Any]: """Process a section and return structured data based on its type.""" - section_type = self._get_section_type(section) - section_data = self._get_section_data(section) + sectionType = self._getSectionType(section) + sectionData = self._getSectionData(section) - if section_type == "table": - headers, rows = self._extract_table_data(section_data) + if sectionType == "table": + headers, rows = self._extractTableData(sectionData) return {"content_type": "table", "headers": headers, "rows": rows} - elif section_type == "bullet_list": - items = self._extract_bullet_list_items(section_data) + elif sectionType == "bullet_list": + items = self._extractBulletListItems(sectionData) return {"content_type": "bullet_list", "items": items} - elif section_type == "heading": - level, text = self._extract_heading_data(section_data) + elif sectionType == "heading": + level, text = self._extractHeadingData(sectionData) return {"content_type": "heading", "level": level, "text": text} - elif section_type == "paragraph": - text = self._extract_paragraph_text(section_data) + elif sectionType == "paragraph": + text = self._extractParagraphText(sectionData) return {"content_type": "paragraph", "text": text} - elif section_type == "code_block": - code, language = self._extract_code_block_data(section_data) + elif sectionType == "code_block": + code, language = self._extractCodeBlockData(sectionData) return {"content_type": "code_block", "code": code, "language": language} - elif section_type == "image": - base64_data, alt_text = self._extract_image_data(section_data) + elif sectionType == "image": + base64Data, altText = self._extractImageData(sectionData) # Validate image data - if self._validate_image_data(base64_data, alt_text): + if self._validateImageData(base64Data, altText): return { "content_type": "image", - "base64Data": base64_data, - "altText": alt_text, - "width": section_data.get("width"), - "height": section_data.get("height"), - "caption": section_data.get("caption", "") + "base64Data": base64Data, + "altText": altText, + "width": sectionData.get("width") if isinstance(sectionData, dict) else None, + "height": sectionData.get("height") if isinstance(sectionData, dict) else None, + "caption": sectionData.get("caption", "") if isinstance(sectionData, dict) else "" } else: # Return placeholder if image data is invalid - return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"} + return {"content_type": "paragraph", "text": f"[Image: {altText}]"} else: # Fallback to paragraph - text = self._extract_paragraph_text(section_data) + text = self._extractParagraphText(sectionData) return {"content_type": "paragraph", "text": text} - def _format_timestamp(self, timestamp: str = None) -> str: + def _formatTimestamp(self, timestamp: str = None) -> str: """Format timestamp for display.""" if timestamp: return timestamp @@ -306,38 +334,38 @@ class BaseRenderer(ABC): # ===== GENERIC AI STYLING HELPERS ===== - async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: + async def _getAiStyles(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]: """ Generic AI styling method that can be used by all renderers. Args: - ai_service: AI service instance - style_template: Format-specific style template - default_styles: Default styles to fall back to + aiService: AI service instance + styleTemplate: Format-specific style template + defaultStyles: Default styles to fall back to Returns: Dict with styling definitions """ # DEBUG: Show which renderer is calling this method - if not ai_service: - return default_styles + if not aiService: + return defaultStyles try: - request_options = AiCallOptions() - request_options.operationType = OperationTypeEnum.DATA_GENERATE + requestOptions = AiCallOptions() + requestOptions.operationType = OperationTypeEnum.DATA_GENERATE - request = AiCallRequest(prompt=style_template, context="", options=request_options) + request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions) # DEBUG: Show the actual prompt being sent to AI self.logger.debug(f"AI Style Template Prompt:") - self.logger.debug(f"{style_template}") + self.logger.debug(f"{styleTemplate}") - response = await ai_service.aiObjects.call(request) + response = await aiService.aiObjects.call(request) # Save styling prompt and response to debug - self.services.utils.writeDebugFile(style_template, "renderer_styling_prompt") + self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt") self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response") # Clean and parse JSON @@ -346,12 +374,12 @@ class BaseRenderer(ABC): # Check if result is empty if not result: self.logger.warning("AI styling returned empty response, using defaults") - return default_styles + return defaultStyles # Extract JSON from markdown if present - json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) - if json_match: - result = json_match.group(1).strip() + jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) + if jsonMatch: + result = jsonMatch.group(1).strip() elif result.startswith('```json'): result = re.sub(r'^```json\s*', '', result) result = re.sub(r'\s*```$', '', result) @@ -362,8 +390,8 @@ class BaseRenderer(ABC): # Try to parse JSON try: styles = json.loads(result) - except json.JSONDecodeError as json_error: - self.logger.warning(f"AI styling returned invalid JSON: {json_error}") + except json.JSONDecodeError as jsonError: + self.logger.warning(f"AI styling returned invalid JSON: {jsonError}") # Use print instead of logger to avoid truncation self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER") @@ -372,88 +400,88 @@ class BaseRenderer(ABC): self.logger.warning(f"Raw content that failed to parse: {result}") # Try to fix incomplete JSON by adding missing closing braces - open_braces = result.count('{') - close_braces = result.count('}') + openBraces = result.count('{') + closeBraces = result.count('}') - if open_braces > close_braces: + if openBraces > closeBraces: # JSON is incomplete, add missing closing braces - missing_braces = open_braces - close_braces - result = result + '}' * missing_braces - self.logger.info(f"Added {missing_braces} missing closing brace(s)") + missingBraces = openBraces - closeBraces + result = result + '}' * missingBraces + self.logger.info(f"Added {missingBraces} missing closing brace(s)") self.logger.debug(f"Fixed JSON: {result}") # Try parsing the fixed JSON try: styles = json.loads(result) self.logger.info("Successfully fixed incomplete JSON") - except json.JSONDecodeError as fix_error: - self.logger.warning(f"Fixed JSON still invalid: {fix_error}") + except json.JSONDecodeError as fixError: + self.logger.warning(f"Fixed JSON still invalid: {fixError}") self.logger.warning(f"Fixed JSON content: {result}") # Try to extract just the JSON part if it's embedded in text - json_start = result.find('{') - json_end = result.rfind('}') - if json_start != -1 and json_end != -1 and json_end > json_start: - json_part = result[json_start:json_end+1] + jsonStart = result.find('{') + jsonEnd = result.rfind('}') + if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart: + jsonPart = result[jsonStart:jsonEnd+1] try: - styles = json.loads(json_part) + styles = json.loads(jsonPart) self.logger.info("Successfully extracted JSON from explanatory text") except json.JSONDecodeError: self.logger.warning("Could not extract valid JSON from response, using defaults") - return default_styles + return defaultStyles else: - return default_styles + return defaultStyles else: # Try to extract just the JSON part if it's embedded in text - json_start = result.find('{') - json_end = result.rfind('}') - if json_start != -1 and json_end != -1 and json_end > json_start: - json_part = result[json_start:json_end+1] + jsonStart = result.find('{') + jsonEnd = result.rfind('}') + if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart: + jsonPart = result[jsonStart:jsonEnd+1] try: - styles = json.loads(json_part) + styles = json.loads(jsonPart) self.logger.info("Successfully extracted JSON from explanatory text") except json.JSONDecodeError: self.logger.warning("Could not extract valid JSON from response, using defaults") - return default_styles + return defaultStyles else: - return default_styles + return defaultStyles # Convert colors to appropriate format - styles = self._convert_colors_format(styles) + styles = self._convertColorsFormat(styles) return styles except Exception as e: self.logger.warning(f"AI styling failed: {str(e)}, using defaults") - return default_styles + return defaultStyles - def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: """ Convert colors to appropriate format based on renderer type. Override this method in subclasses for format-specific color handling. """ return styles - def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str: + def _createAiStyleTemplate(self, formatName: str, userPrompt: str, styleSchema: Dict[str, Any]) -> str: """ Create a standardized AI style template for any format. Args: - format_name: Name of the format (e.g., "docx", "xlsx", "pptx") - user_prompt: User's original prompt - style_schema: Format-specific style schema + formatName: Name of the format (e.g., "docx", "xlsx", "pptx") + userPrompt: User's original prompt + styleSchema: Format-specific style schema Returns: Formatted prompt string """ - schema_json = json.dumps(style_schema, indent=4) + schemaJson = json.dumps(styleSchema, indent=4) # DEBUG: Show the schema being sent - return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents. + return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {formatName.upper()} documents. Use this schema as a template and customize the values for professional document styling: -{schema_json} +{schemaJson} Requirements: - Return ONLY the complete JSON object (no markdown, no explanations) diff --git a/modules/services/serviceGeneration/renderers/rendererCsv.py b/modules/services/serviceGeneration/renderers/rendererCsv.py index 5ad2d4bc..3e1ef3d5 100644 --- a/modules/services/serviceGeneration/renderers/rendererCsv.py +++ b/modules/services/serviceGeneration/renderers/rendererCsv.py @@ -9,163 +9,163 @@ class RendererCsv(BaseRenderer): """Renders content to CSV format with format-specific extraction.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported CSV formats.""" return ['csv'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['spreadsheet', 'table'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for CSV renderer.""" return 70 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to CSV format.""" try: # Generate CSV directly from JSON (no styling needed for CSV) - csv_content = await self._generate_csv_from_json(extracted_content, title) + csvContent = await self._generateCsvFromJson(extractedContent, title) - return csv_content, "text/csv" + return csvContent, "text/csv" except Exception as e: self.logger.error(f"Error rendering CSV: {str(e)}") # Return minimal CSV fallback return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv" - async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str: + async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate CSV content from structured JSON document.""" try: # Validate JSON structure - if not isinstance(json_content, dict): + if not isinstance(jsonContent, dict): raise ValueError("JSON content must be a dictionary") - if "sections" not in json_content: + if "sections" not in jsonContent: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + documentTitle = jsonContent.get("metadata", {}).get("title", title) # Generate CSV content - csv_rows = [] + csvRows = [] # Add title row - if document_title: - csv_rows.append([document_title]) - csv_rows.append([]) # Empty row + if documentTitle: + csvRows.append([documentTitle]) + csvRows.append([]) # Empty row # Process each section in order - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) for section in sections: - section_csv = self._render_json_section_to_csv(section) - if section_csv: - csv_rows.extend(section_csv) - csv_rows.append([]) # Empty row between sections + sectionCsv = self._renderJsonSectionToCsv(section) + if sectionCsv: + csvRows.extend(sectionCsv) + csvRows.append([]) # Empty row between sections # Convert to CSV string - csv_content = self._convert_rows_to_csv(csv_rows) + csvContent = self._convertRowsToCsv(csvRows) - return csv_content + return csvContent except Exception as e: self.logger.error(f"Error generating CSV from JSON: {str(e)}") raise Exception(f"CSV generation failed: {str(e)}") - def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]: + def _renderJsonSectionToCsv(self, section: Dict[str, Any]) -> List[List[str]]: """Render a single JSON section to CSV rows.""" try: - section_type = section.get("content_type", "paragraph") + sectionType = section.get("content_type", "paragraph") elements = section.get("elements", []) - csv_rows = [] + csvRows = [] # Add section title if available - section_title = section.get("title") - if section_title: - csv_rows.append([f"# {section_title}"]) + sectionTitle = section.get("title") + if sectionTitle: + csvRows.append([f"# {sectionTitle}"]) # Process each element in the section for element in elements: - if section_type == "table": - csv_rows.extend(self._render_json_table_to_csv(element)) - elif section_type == "list": - csv_rows.extend(self._render_json_list_to_csv(element)) - elif section_type == "heading": - csv_rows.extend(self._render_json_heading_to_csv(element)) - elif section_type == "paragraph": - csv_rows.extend(self._render_json_paragraph_to_csv(element)) - elif section_type == "code": - csv_rows.extend(self._render_json_code_to_csv(element)) + if sectionType == "table": + csvRows.extend(self._renderJsonTableToCsv(element)) + elif sectionType == "list": + csvRows.extend(self._renderJsonListToCsv(element)) + elif sectionType == "heading": + csvRows.extend(self._renderJsonHeadingToCsv(element)) + elif sectionType == "paragraph": + csvRows.extend(self._renderJsonParagraphToCsv(element)) + elif sectionType == "code": + csvRows.extend(self._renderJsonCodeToCsv(element)) else: # Fallback to paragraph for unknown types - csv_rows.extend(self._render_json_paragraph_to_csv(element)) + csvRows.extend(self._renderJsonParagraphToCsv(element)) - return csv_rows + return csvRows except Exception as e: self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}") return [["[Error rendering section]"]] - def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonTableToCsv(self, tableData: Dict[str, Any]) -> List[List[str]]: """Render a JSON table to CSV rows.""" try: - headers = table_data.get("headers", []) - rows = table_data.get("rows", []) + headers = tableData.get("headers", []) + rows = tableData.get("rows", []) - csv_rows = [] + csvRows = [] if headers: - csv_rows.append(headers) + csvRows.append(headers) if rows: - csv_rows.extend(rows) + csvRows.extend(rows) - return csv_rows + return csvRows except Exception as e: self.logger.warning(f"Error rendering table: {str(e)}") return [["[Error rendering table]"]] - def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonListToCsv(self, listData: Dict[str, Any]) -> List[List[str]]: """Render a JSON list to CSV rows.""" try: - items = list_data.get("items", []) - csv_rows = [] + items = listData.get("items", []) + csvRows = [] for item in items: if isinstance(item, dict): text = item.get("text", "") subitems = item.get("subitems", []) - csv_rows.append([text]) + csvRows.append([text]) # Add subitems as indented rows for subitem in subitems: if isinstance(subitem, dict): - csv_rows.append([f" - {subitem.get('text', '')}"]) + csvRows.append([f" - {subitem.get('text', '')}"]) else: - csv_rows.append([f" - {subitem}"]) + csvRows.append([f" - {subitem}"]) else: - csv_rows.append([str(item)]) + csvRows.append([str(item)]) - return csv_rows + return csvRows except Exception as e: self.logger.warning(f"Error rendering list: {str(e)}") return [["[Error rendering list]"]] - def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonHeadingToCsv(self, headingData: Dict[str, Any]) -> List[List[str]]: """Render a JSON heading to CSV rows.""" try: - text = heading_data.get("text", "") - level = heading_data.get("level", 1) + text = headingData.get("text", "") + level = headingData.get("level", 1) if text: # Use # symbols for heading levels - heading_text = f"{'#' * level} {text}" - return [[heading_text]] + headingText = f"{'#' * level} {text}" + return [[headingText]] return [] @@ -173,30 +173,30 @@ class RendererCsv(BaseRenderer): self.logger.warning(f"Error rendering heading: {str(e)}") return [["[Error rendering heading]"]] - def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonParagraphToCsv(self, paragraphData: Dict[str, Any]) -> List[List[str]]: """Render a JSON paragraph to CSV rows.""" try: - text = paragraph_data.get("text", "") + text = paragraphData.get("text", "") if text: # Split long paragraphs into multiple rows if needed if len(text) > 100: words = text.split() rows = [] - current_row = [] - current_length = 0 + currentRow = [] + currentLength = 0 for word in words: - if current_length + len(word) > 100 and current_row: - rows.append([" ".join(current_row)]) - current_row = [word] - current_length = len(word) + if currentLength + len(word) > 100 and currentRow: + rows.append([" ".join(currentRow)]) + currentRow = [word] + currentLength = len(word) else: - current_row.append(word) - current_length += len(word) + 1 + currentRow.append(word) + currentLength += len(word) + 1 - if current_row: - rows.append([" ".join(current_row)]) + if currentRow: + rows.append([" ".join(currentRow)]) return rows else: @@ -208,30 +208,30 @@ class RendererCsv(BaseRenderer): self.logger.warning(f"Error rendering paragraph: {str(e)}") return [["[Error rendering paragraph]"]] - def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonCodeToCsv(self, codeData: Dict[str, Any]) -> List[List[str]]: """Render a JSON code block to CSV rows.""" try: - code = code_data.get("code", "") - language = code_data.get("language", "") + code = codeData.get("code", "") + language = codeData.get("language", "") - csv_rows = [] + csvRows = [] if language: - csv_rows.append([f"Code ({language}):"]) + csvRows.append([f"Code ({language}):"]) if code: # Split code into lines - code_lines = code.split('\n') - for line in code_lines: - csv_rows.append([f" {line}"]) + codeLines = code.split('\n') + for line in codeLines: + csvRows.append([f" {line}"]) - return csv_rows + return csvRows except Exception as e: self.logger.warning(f"Error rendering code block: {str(e)}") return [["[Error rendering code block]"]] - def _convert_rows_to_csv(self, rows: List[List[str]]) -> str: + def _convertRowsToCsv(self, rows: List[List[str]]) -> str: """Convert rows to CSV string.""" import csv import io @@ -245,7 +245,7 @@ class RendererCsv(BaseRenderer): return output.getvalue() - def _clean_csv_content(self, content: str, title: str) -> str: + def _cleanCsvContent(self, content: str, title: str) -> str: """Clean and validate CSV content from AI.""" content = content.strip() diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index 42bb71f3..6db48c32 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -21,33 +21,33 @@ class RendererDocx(BaseRenderer): """Renders content to DOCX format using python-docx.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported DOCX formats.""" return ['docx', 'doc'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['word', 'document'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for DOCX renderer.""" return 115 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to DOCX format using AI-analyzed styling.""" - self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...", "DOCX_RENDERER") + self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER") try: if not DOCX_AVAILABLE: # Fallback to HTML if python-docx not available from .rendererHtml import RendererHtml - html_renderer = RendererHtml() - html_content, _ = await html_renderer.render(extracted_content, title) - return html_content, "text/html" + htmlRenderer = RendererHtml() + htmlContent, _ = await htmlRenderer.render(extractedContent, title) + return htmlContent, "text/html" # Generate DOCX using AI-analyzed styling - docx_content = await self._generate_docx_from_json(extracted_content, title, user_prompt, ai_service) + docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService) return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document" @@ -56,18 +56,18 @@ class RendererDocx(BaseRenderer): # Return minimal fallback return f"DOCX Generation Error: {str(e)}", "text/plain" - async def _generate_docx_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate DOCX content from structured JSON document using AI-generated styling.""" try: # Create new document doc = Document() # Get AI-generated styling definitions - self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...") - styles = await self._get_docx_styles(user_prompt, ai_service) + self.logger.info(f"About to call AI styling with user_prompt: {userPrompt[:100] if userPrompt else 'None'}...") + styles = await self._getDocxStyles(userPrompt, aiService) # Apply basic document setup - self._setup_basic_document_styles(doc) + self._setupBasicDocumentStyles(doc) # Validate JSON structure if not isinstance(json_content, dict): @@ -104,7 +104,7 @@ class RendererDocx(BaseRenderer): self.logger.error(f"Error generating DOCX from JSON: {str(e)}") raise Exception(f"DOCX generation failed: {str(e)}") - async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: + async def _getDocxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]: """Get DOCX styling definitions using base template AI styling.""" style_schema = { "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"}, @@ -118,13 +118,13 @@ class RendererDocx(BaseRenderer): "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} } - style_template = self._create_ai_style_template("docx", user_prompt, style_schema) - styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles()) + style_template = self._createAiStyleTemplate("docx", userPrompt, style_schema) + styles = await self._getAiStyles(aiService, style_template, self._getDefaultStyles()) # Validate and fix contrast issues - return self._validate_styles_contrast(styles) + return self._validateStylesContrast(styles) - def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix contrast issues in AI-generated styles.""" try: # Fix table header contrast @@ -159,9 +159,9 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Style validation failed: {str(e)}") - return self._get_default_styles() + return self._getDefaultStyles() - def _get_default_styles(self) -> Dict[str, Any]: + def _getDefaultStyles(self) -> Dict[str, Any]: """Default DOCX styles.""" return { "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"}, @@ -175,7 +175,7 @@ class RendererDocx(BaseRenderer): "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} } - def _setup_basic_document_styles(self, doc: Document) -> None: + def _setupBasicDocumentStyles(self, doc: Document) -> None: """Set up basic document styles.""" try: # Set default font @@ -189,7 +189,7 @@ class RendererDocx(BaseRenderer): - def _clear_template_content(self, doc: Document) -> None: + def _clearTemplateContent(self, doc: Document) -> None: """Clear template content while preserving styles.""" try: # Remove all paragraphs except keep the styles @@ -204,7 +204,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not clear template content: {str(e)}") - def _render_json_section(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a single JSON section to DOCX using AI-generated styles.""" try: section_type = section.get("content_type", "paragraph") @@ -213,27 +213,27 @@ class RendererDocx(BaseRenderer): # Process each element in the section for element in elements: if section_type == "table": - self._render_json_table(doc, element, styles) + self._renderJsonTable(doc, element, styles) elif section_type == "bullet_list": - self._render_json_bullet_list(doc, element, styles) + self._renderJsonBulletList(doc, element, styles) elif section_type == "heading": - self._render_json_heading(doc, element, styles) + self._renderJsonHeading(doc, element, styles) elif section_type == "paragraph": - self._render_json_paragraph(doc, element, styles) + self._renderJsonParagraph(doc, element, styles) elif section_type == "code_block": - self._render_json_code_block(doc, element, styles) + self._renderJsonCodeBlock(doc, element, styles) elif section_type == "image": - self._render_json_image(doc, element, styles) + self._renderJsonImage(doc, element, styles) else: # Fallback to paragraph for unknown types - self._render_json_paragraph(doc, element, styles) + self._renderJsonParagraph(doc, element, styles) except Exception as e: self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}") # Add error paragraph as fallback error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]") - def _render_json_table(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON table to DOCX using AI-generated styles.""" try: headers = table_data.get("headers", []) @@ -249,7 +249,7 @@ class RendererDocx(BaseRenderer): # Apply table borders based on AI style border_style = styles["table_border"]["style"] if border_style == "horizontal_only": - self._apply_horizontal_borders_only(table) + self._applyHorizontalBordersOnly(table) elif border_style == "grid": table.style = 'Table Grid' # else: no borders @@ -264,7 +264,7 @@ class RendererDocx(BaseRenderer): # Apply background color bg_color = header_style["background"].lstrip('#') - self._set_cell_background(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16))) + self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16))) # Apply text styling for paragraph in cell.paragraphs: @@ -296,7 +296,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering table: {str(e)}") - def _apply_horizontal_borders_only(self, table) -> None: + def _applyHorizontalBordersOnly(self, table) -> None: """Apply only horizontal borders to the table (no vertical borders).""" try: from docx.oxml.shared import OxmlElement, qn @@ -359,7 +359,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not apply horizontal borders: {str(e)}") - def _set_cell_background(self, cell, color: RGBColor) -> None: + def _setCellBackground(self, cell, color: RGBColor) -> None: """Set the background color of a table cell.""" try: from docx.oxml.shared import OxmlElement, qn @@ -389,7 +389,7 @@ class RendererDocx(BaseRenderer): self.logger.warning(f"Could not set cell background: {str(e)}") - def _render_json_bullet_list(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON bullet list to DOCX using AI-generated styles.""" try: items = list_data.get("items", []) @@ -404,7 +404,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") - def _render_json_heading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON heading to DOCX using AI-generated styles.""" try: level = heading_data.get("level", 1) @@ -417,7 +417,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering heading: {str(e)}") - def _render_json_paragraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON paragraph to DOCX using AI-generated styles.""" try: text = paragraph_data.get("text", "") @@ -428,7 +428,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") - def _render_json_code_block(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON code block to DOCX using AI-generated styles.""" try: code = code_data.get("code", "") @@ -447,7 +447,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering code block: {str(e)}") - def _render_json_image(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON image to DOCX.""" try: base64_data = image_data.get("base64Data", "") @@ -465,7 +465,7 @@ class RendererDocx(BaseRenderer): self.logger.warning(f"Error rendering image: {str(e)}") doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]") - def _extract_structure_from_prompt(self, user_prompt: str, title: str) -> Dict[str, Any]: + def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]: """Extract document structure from user prompt.""" structure = { 'title': title, @@ -473,21 +473,21 @@ class RendererDocx(BaseRenderer): 'format': 'standard' } - if not user_prompt: + if not userPrompt: return structure # Extract title from prompt if not provided if not title or title == "Generated Document": # Look for "create a ... document" or "generate a ... report" import re - title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', user_prompt.lower()) + title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower()) if title_match: structure['title'] = title_match.group(1).strip().title() # Extract sections from numbered lists in prompt import re section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)' - sections = re.findall(section_pattern, user_prompt) + sections = re.findall(section_pattern, userPrompt) for num, section_text in sections: structure['sections'].append({ @@ -498,7 +498,7 @@ class RendererDocx(BaseRenderer): # If no numbered sections found, try to extract from "including:" patterns if not structure['sections']: - including_match = re.search(r'including:\s*(.+?)(?:\.|$)', user_prompt, re.DOTALL) + including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL) if including_match: including_text = including_match.group(1) # Split by common separators @@ -516,7 +516,7 @@ class RendererDocx(BaseRenderer): if not structure['sections']: # Look for bullet points or dashes bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)' - bullets = re.findall(bullet_pattern, user_prompt) + bullets = re.findall(bullet_pattern, userPrompt) for i, bullet in enumerate(bullets, 1): bullet = bullet.strip() if bullet and len(bullet) > 3: @@ -529,7 +529,7 @@ class RendererDocx(BaseRenderer): # If still no sections, extract from sentence structure if not structure['sections']: # Split prompt into sentences and use as sections - sentences = re.split(r'[.!?]\s+', user_prompt) + sentences = re.split(r'[.!?]\s+', userPrompt) for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections sentence = sentence.strip() if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')): @@ -545,7 +545,7 @@ class RendererDocx(BaseRenderer): action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate'] found_actions = [] for action in action_words: - if action in user_prompt.lower(): + if action in userPrompt.lower(): found_actions.append(action.title()) if found_actions: @@ -565,7 +565,7 @@ class RendererDocx(BaseRenderer): return structure - def _generate_from_structure(self, doc, content: str, structure: Dict[str, Any]): + def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]): """Generate DOCX content based on extracted structure.""" # Add sections based on prompt structure for section in structure['sections']: @@ -574,7 +574,7 @@ class RendererDocx(BaseRenderer): # Add AI-generated content for this section # Try to extract relevant content for this section from the AI response - section_content = self._extract_section_content(content, section['title']) + section_content = self._extractSectionContent(content, section['title']) if section_content: doc.add_paragraph(section_content) @@ -590,7 +590,7 @@ class RendererDocx(BaseRenderer): doc.add_heading("Complete Analysis", level=1) doc.add_paragraph(content) - def _extract_section_content(self, content: str, section_title: str) -> str: + def _extractSectionContent(self, content: str, section_title: str) -> str: """Extract relevant content for a specific section from AI response.""" if not content or not section_title: return "" @@ -613,7 +613,7 @@ class RendererDocx(BaseRenderer): return "" - def _setup_document_styles(self, doc): + def _setupDocumentStyles(self, doc): """Set up document styles.""" try: # Set default font @@ -632,7 +632,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not set up document styles: {str(e)}") - def _process_section(self, doc, lines: list): + def _processSection(self, doc, lines: list): """Process a section of content into DOCX elements.""" for line in lines: if not line.strip(): @@ -641,9 +641,9 @@ class RendererDocx(BaseRenderer): # Check for tables (lines with |) if '|' in line and not line.startswith('|'): # This might be part of a table, process as table - table_data = self._extract_table_data(lines) + table_data = self._extractTableData(lines) if table_data: - self._add_table(doc, table_data) + self._addTable(doc, table_data) return # Check for lists @@ -657,7 +657,7 @@ class RendererDocx(BaseRenderer): # Regular paragraph doc.add_paragraph(line) - def _extract_table_data(self, lines: list) -> list: + def _extractTableData(self, lines: list) -> list: """Extract table data from lines.""" table_data = [] in_table = False @@ -676,7 +676,7 @@ class RendererDocx(BaseRenderer): return table_data if len(table_data) > 1 else [] - def _add_table(self, doc, table_data: list): + def _addTable(self, doc, table_data: list): """Add a table to the document.""" try: if not table_data: @@ -693,12 +693,12 @@ class RendererDocx(BaseRenderer): table.rows[row_idx].cells[col_idx].text = cell_data # Style the table - self._style_table(table) + self._styleTable(table) except Exception as e: self.logger.warning(f"Could not add table: {str(e)}") - def _style_table(self, table): + def _styleTable(self, table): """Apply styling to the table.""" try: # Style header row @@ -711,7 +711,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not style table: {str(e)}") - def _process_table_row(self, doc, line: str): + def _processTableRow(self, doc, line: str): """Process a table row and add it to the document.""" if not line.strip(): return @@ -745,7 +745,7 @@ class RendererDocx(BaseRenderer): # Not a table row, treat as regular text doc.add_paragraph(line) - def _clean_ai_content(self, content: str) -> str: + def _cleanAiContent(self, content: str) -> str: """Clean AI-generated content by removing debug information and duplicates.""" if not content: return "" @@ -781,7 +781,7 @@ class RendererDocx(BaseRenderer): return '\n\n'.join(unique_sections) - def _process_tables(self, doc, content: str) -> str: + def _processTables(self, doc, content: str) -> str: """ Process tables in the content (both CSV and pipe-separated) and convert them to Word tables. Returns the content with tables replaced by placeholders. @@ -864,13 +864,13 @@ class RendererDocx(BaseRenderer): return '\n'.join(processed_lines) - def _parse_and_format_content(self, doc, content: str, title: str): + def _parseAndFormatContent(self, doc, content: str, title: str): """Parse AI-generated content in standardized format and apply proper DOCX formatting.""" if not content: return # Process tables and replace them with placeholders - content = self._process_tables(doc, content) + content = self._processTables(doc, content) # Parse content line by line in exact sequence lines = content.split('\n') @@ -920,9 +920,9 @@ class RendererDocx(BaseRenderer): # Regular paragraph else: - self._add_paragraph_to_doc(doc, line) + self._addParagraphToDoc(doc, line) - def _add_paragraph_to_doc(self, doc, text: str): + def _addParagraphToDoc(self, doc, text: str): """Add a paragraph to the document with proper formatting.""" if not text.strip(): return diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 1b202886..660a16c2 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -9,97 +9,97 @@ class RendererHtml(BaseRenderer): """Renders content to HTML format with format-specific extraction.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported HTML formats.""" return ['html', 'htm'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['web', 'webpage'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for HTML renderer.""" return 100 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to HTML format using AI-analyzed styling.""" try: # Generate HTML using AI-analyzed styling - html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service) + htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) - return html_content, "text/html" + return htmlContent, "text/html" except Exception as e: self.logger.error(f"Error rendering HTML: {str(e)}") # Return minimal HTML fallback return f"{title}

{title}

Error rendering report: {str(e)}

", "text/html" - async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate HTML content from structured JSON document using AI-generated styling.""" try: # Get AI-generated styling definitions - styles = await self._get_html_styles(user_prompt, ai_service) + styles = await self._getHtmlStyles(userPrompt, aiService) # Validate JSON structure - if not isinstance(json_content, dict): + if not isinstance(jsonContent, dict): raise ValueError("JSON content must be a dictionary") - if "sections" not in json_content: + if "sections" not in jsonContent: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + documentTitle = jsonContent.get("metadata", {}).get("title", title) # Build HTML document - html_parts = [] + htmlParts = [] # HTML document structure - html_parts.append('') - html_parts.append('') - html_parts.append('') - html_parts.append('') - html_parts.append('') - html_parts.append(f'{document_title}') - html_parts.append('') - html_parts.append('') - html_parts.append('') + htmlParts.append('') + htmlParts.append('') + htmlParts.append('') + htmlParts.append('') + htmlParts.append('') + htmlParts.append(f'{documentTitle}') + htmlParts.append('') + htmlParts.append('') + htmlParts.append('') # Document header - html_parts.append(f'

{document_title}

') + htmlParts.append(f'

{documentTitle}

') # Main content - html_parts.append('
') + htmlParts.append('
') # Process each section - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) for section in sections: - section_html = self._render_json_section(section, styles) - if section_html: - html_parts.append(section_html) + sectionHtml = self._renderJsonSection(section, styles) + if sectionHtml: + htmlParts.append(sectionHtml) - html_parts.append('
') + htmlParts.append('
') # Footer - html_parts.append('') + htmlParts.append('') - html_parts.append('') - html_parts.append('') + htmlParts.append('') + htmlParts.append('') - return '\n'.join(html_parts) + return '\n'.join(htmlParts) except Exception as e: self.logger.error(f"Error generating HTML from JSON: {str(e)}") raise Exception(f"HTML generation failed: {str(e)}") - async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: + async def _getHtmlStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]: """Get HTML styling definitions using base template AI styling.""" - style_schema = { + styleSchema = { "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"}, "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"}, "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"}, @@ -113,40 +113,40 @@ class RendererHtml(BaseRenderer): "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"} } - style_template = self._create_ai_style_template("html", user_prompt, style_schema) - styles = await self._get_ai_styles(ai_service, style_template, self._get_default_html_styles()) + styleTemplate = self._createAiStyleTemplate("html", userPrompt, styleSchema) + styles = await self._getAiStyles(aiService, styleTemplate, self._getDefaultHtmlStyles()) # Validate and fix contrast issues - return self._validate_html_styles_contrast(styles) + return self._validateHtmlStylesContrast(styles) - def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _validateHtmlStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix contrast issues in AI-generated styles.""" try: # Fix table header contrast if "table_header" in styles: header = styles["table_header"] - bg_color = header.get("background", "#FFFFFF") - text_color = header.get("color", "#000000") + bgColor = header.get("background", "#FFFFFF") + textColor = header.get("color", "#000000") # If both are white or both are dark, fix it - if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": + if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF": header["background"] = "#4F4F4F" header["color"] = "#FFFFFF" - elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": + elif bgColor.upper() == "#000000" and textColor.upper() == "#000000": header["background"] = "#4F4F4F" header["color"] = "#FFFFFF" # Fix table cell contrast if "table_cell" in styles: cell = styles["table_cell"] - bg_color = cell.get("background", "#FFFFFF") - text_color = cell.get("color", "#000000") + bgColor = cell.get("background", "#FFFFFF") + textColor = cell.get("color", "#000000") # If both are white or both are dark, fix it - if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": + if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF": cell["background"] = "#FFFFFF" cell["color"] = "#2F2F2F" - elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": + elif bgColor.upper() == "#000000" and textColor.upper() == "#000000": cell["background"] = "#FFFFFF" cell["color"] = "#2F2F2F" @@ -154,10 +154,10 @@ class RendererHtml(BaseRenderer): except Exception as e: self.logger.warning(f"Style validation failed: {str(e)}") - return self._get_default_html_styles() + return self._getDefaultHtmlStyles() - def _get_default_html_styles(self) -> Dict[str, Any]: + def _getDefaultHtmlStyles(self) -> Dict[str, Any]: """Default HTML styles.""" return { "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"}, @@ -173,7 +173,7 @@ class RendererHtml(BaseRenderer): "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"} } - def _generate_css_styles(self, styles: Dict[str, Any]) -> str: + def _generateCssStyles(self, styles: Dict[str, Any]) -> str: """Generate CSS from style definitions.""" css_parts = [] @@ -271,109 +271,109 @@ class RendererHtml(BaseRenderer): return '\n'.join(css_parts) - def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: + def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a single JSON section to HTML using AI-generated styles.""" try: - section_type = self._get_section_type(section) - section_data = self._get_section_data(section) + sectionType = self._getSectionType(section) + sectionData = self._getSectionData(section) - if section_type == "table": + if sectionType == "table": # Process the section data to extract table structure - processed_data = self._process_section_by_type(section) - return self._render_json_table(processed_data, styles) - elif section_type == "bullet_list": + processedData = self._processSectionByType(section) + return self._renderJsonTable(processedData, styles) + elif sectionType == "bullet_list": # Process the section data to extract bullet list structure - processed_data = self._process_section_by_type(section) - return self._render_json_bullet_list(processed_data, styles) - elif section_type == "heading": - return self._render_json_heading(section_data, styles) - elif section_type == "paragraph": - return self._render_json_paragraph(section_data, styles) - elif section_type == "code_block": + processedData = self._processSectionByType(section) + return self._renderJsonBulletList(processedData, styles) + elif sectionType == "heading": + return self._renderJsonHeading(sectionData, styles) + elif sectionType == "paragraph": + return self._renderJsonParagraph(sectionData, styles) + elif sectionType == "code_block": # Process the section data to extract code block structure - processed_data = self._process_section_by_type(section) - return self._render_json_code_block(processed_data, styles) - elif section_type == "image": + processedData = self._processSectionByType(section) + return self._renderJsonCodeBlock(processedData, styles) + elif sectionType == "image": # Process the section data to extract image structure - processed_data = self._process_section_by_type(section) - return self._render_json_image(processed_data, styles) + processedData = self._processSectionByType(section) + return self._renderJsonImage(processedData, styles) else: # Fallback to paragraph for unknown types - return self._render_json_paragraph(section_data, styles) + return self._renderJsonParagraph(sectionData, styles) except Exception as e: - self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") + self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}") return f'
[Error rendering section: {str(e)}]
' - def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON table to HTML using AI-generated styles.""" try: - headers = table_data.get("headers", []) - rows = table_data.get("rows", []) + headers = tableData.get("headers", []) + rows = tableData.get("rows", []) if not headers or not rows: return "" - html_parts = [''] + htmlParts = ['
'] # Table header - html_parts.append('') + htmlParts.append('') for header in headers: - html_parts.append(f'') - html_parts.append('') + htmlParts.append(f'') + htmlParts.append('') # Table body - html_parts.append('') + htmlParts.append('') for row in rows: - html_parts.append('') - for cell_data in row: - html_parts.append(f'') - html_parts.append('') - html_parts.append('') + htmlParts.append('') + for cellData in row: + htmlParts.append(f'') + htmlParts.append('') + htmlParts.append('') - html_parts.append('
{header}
{header}
{cell_data}
{cellData}
') - return '\n'.join(html_parts) + htmlParts.append('') + return '\n'.join(htmlParts) except Exception as e: self.logger.warning(f"Error rendering table: {str(e)}") return "" - def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON bullet list to HTML using AI-generated styles.""" try: - items = list_data.get("items", []) + items = listData.get("items", []) if not items: return "" - html_parts = ['') - return '\n'.join(html_parts) + return '\n'.join(htmlParts) except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") return "" - def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON heading to HTML using AI-generated styles.""" try: # Normalize non-dict inputs - if isinstance(heading_data, str): - heading_data = {"text": heading_data, "level": 2} - elif isinstance(heading_data, list): + if isinstance(headingData, str): + headingData = {"text": headingData, "level": 2} + elif isinstance(headingData, list): # Render a list as bullet list under a default heading label - return self._render_json_bullet_list({"items": heading_data}, styles) - elif not isinstance(heading_data, dict): + return self._renderJsonBulletList({"items": headingData}, styles) + elif not isinstance(headingData, dict): return "" - level = heading_data.get("level", 1) - text = heading_data.get("text", "") + level = headingData.get("level", 1) + text = headingData.get("text", "") if text: level = max(1, min(6, level)) @@ -385,19 +385,19 @@ class RendererHtml(BaseRenderer): self.logger.warning(f"Error rendering heading: {str(e)}") return "" - def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON paragraph to HTML using AI-generated styles.""" try: # Normalize non-dict inputs - if isinstance(paragraph_data, str): - paragraph_data = {"text": paragraph_data} - elif isinstance(paragraph_data, list): + if isinstance(paragraphData, str): + paragraphData = {"text": paragraphData} + elif isinstance(paragraphData, list): # Treat list as bullet list paragraph - return self._render_json_bullet_list({"items": paragraph_data}, styles) - elif not isinstance(paragraph_data, dict): + return self._renderJsonBulletList({"items": paragraphData}, styles) + elif not isinstance(paragraphData, dict): return "" - text = paragraph_data.get("text", "") + text = paragraphData.get("text", "") if text: return f'

{text}

' @@ -408,11 +408,11 @@ class RendererHtml(BaseRenderer): self.logger.warning(f"Error rendering paragraph: {str(e)}") return "" - def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON code block to HTML using AI-generated styles.""" try: - code = code_data.get("code", "") - language = code_data.get("language", "") + code = codeData.get("code", "") + language = codeData.get("language", "") if code: if language: @@ -426,17 +426,17 @@ class RendererHtml(BaseRenderer): self.logger.warning(f"Error rendering code block: {str(e)}") return "" - def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON image to HTML.""" try: - base64_data = image_data.get("base64Data", "") - alt_text = image_data.get("altText", "Image") + base64Data = imageData.get("base64Data", "") + altText = imageData.get("altText", "Image") - if base64_data: - return f'{alt_text}' + if base64Data: + return f'{altText}' return "" except Exception as e: self.logger.warning(f"Error rendering image: {str(e)}") - return f'
[Image: {image_data.get("altText", "Image")}]
' + return f'
[Image: {imageData.get("altText", "Image")}]
' diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py index 4db495dc..bfc89927 100644 --- a/modules/services/serviceGeneration/renderers/rendererImage.py +++ b/modules/services/serviceGeneration/renderers/rendererImage.py @@ -12,154 +12,156 @@ class RendererImage(BaseRenderer): """Renders content to image format using AI image generation.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported image formats.""" return ['png', 'jpg', 'jpeg', 'image'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['img', 'picture', 'photo', 'graphic'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for image renderer.""" return 90 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to image format using AI image generation.""" try: # Generate AI image from content - image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service) + imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService) - return image_content, "image/png" + return imageContent, "image/png" except Exception as e: self.logger.error(f"Error rendering image: {str(e)}") # Re-raise the exception instead of using fallback raise Exception(f"Image rendering failed: {str(e)}") - async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate AI image from extracted content.""" try: - if not ai_service: + if not aiService: raise ValueError("AI service is required for image generation") # Validate JSON structure - if not isinstance(extracted_content, dict): + if not isinstance(extractedContent, dict): raise ValueError("Extracted content must be a dictionary") - if "sections" not in extracted_content: + if "sections" not in extractedContent: raise ValueError("Extracted content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title - document_title = extracted_content.get("metadata", {}).get("title", title) + documentTitle = extractedContent.get("metadata", {}).get("title", title) # Create AI prompt for image generation - image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service) + imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService) # Save image generation prompt to debug - ai_service.services.utils.writeDebugFile(image_prompt, "image_generation_prompt") + aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt") # Generate image using AI - image_result = await ai_service.aiObjects.generateImage( - prompt=image_prompt, + imageResult = await aiService.aiObjects.generateImage( + prompt=imagePrompt, size="1024x1024", quality="standard", style="vivid" ) # Save image generation response to debug - ai_service.services.utils.writeDebugFile(str(image_result), "image_generation_response") + aiService.services.utils.writeDebugFile(str(imageResult), "image_generation_response") # Extract base64 image data from result - if image_result and image_result.get("success", False): - image_data = image_result.get("image_data", "") - if image_data: - return image_data + if imageResult and imageResult.get("success", False): + imageData = imageResult.get("image_data", "") + if imageData: + return imageData else: raise ValueError("No image data returned from AI") else: - error_msg = image_result.get("error", "Unknown error") if image_result else "No result" - raise ValueError(f"AI image generation failed: {error_msg}") + errorMsg = imageResult.get("error", "Unknown error") if imageResult else "No result" + raise ValueError(f"AI image generation failed: {errorMsg}") except Exception as e: self.logger.error(f"Error generating AI image: {str(e)}") raise Exception(f"AI image generation failed: {str(e)}") - async def _create_imageGenerate_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Create a detailed prompt for AI image generation based on the content.""" try: # Start with base prompt - prompt_parts = [] + promptParts = [] # Add user's original intent if available - if user_prompt: - prompt_parts.append(f"User Request: {ai_service.sanitizePromptContent(user_prompt, 'userinput')}") + if userPrompt: + sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt + promptParts.append(f"User Request: {sanitized_prompt}") # Add document title - prompt_parts.append(f"Document Title: {title}") + promptParts.append(f"Document Title: {title}") # Analyze content and create visual description - sections = extracted_content.get("sections", []) - content_description = self._analyze_content_for_visual_description(sections) + sections = extractedContent.get("sections", []) + contentDescription = self._analyzeContentForVisualDescription(sections) - if content_description: - prompt_parts.append(f"Content to Visualize: {content_description}") + if contentDescription: + promptParts.append(f"Content to Visualize: {contentDescription}") # Add style guidance - style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt) - if style_guidance: - prompt_parts.append(f"Visual Style: {style_guidance}") + styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt) + if styleGuidance: + promptParts.append(f"Visual Style: {styleGuidance}") # Combine all parts - full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts) + fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts) # Add technical requirements - full_prompt += "\n\nTechnical Requirements:" - full_prompt += "\n- High quality, professional appearance" - full_prompt += "\n- Clear, readable text if any text is included" - full_prompt += "\n- Appropriate colors and layout" - full_prompt += "\n- Suitable for business/professional use" + fullPrompt += "\n\nTechnical Requirements:" + fullPrompt += "\n- High quality, professional appearance" + fullPrompt += "\n- Clear, readable text if any text is included" + fullPrompt += "\n- Appropriate colors and layout" + fullPrompt += "\n- Suitable for business/professional use" # Truncate prompt if it exceeds DALL-E's 4000 character limit - if len(full_prompt) > 4000: + if len(fullPrompt) > 4000: # Use AI to compress the prompt intelligently - compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service) - if compressed_prompt and len(compressed_prompt) <= 4000: - return compressed_prompt + compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService) + if compressedPrompt and len(compressedPrompt) <= 4000: + return compressedPrompt # Fallback to minimal prompt if AI compression fails or is still too long - minimal_prompt = f"Create a professional image representing: {title}" - if user_prompt: - minimal_prompt += f" - {ai_service.sanitizePromptContent(user_prompt, 'userinput')}" + minimalPrompt = f"Create a professional image representing: {title}" + if userPrompt: + sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt + minimalPrompt += f" - {sanitized_prompt}" # If even the minimal prompt is too long, truncate it - if len(minimal_prompt) > 4000: - minimal_prompt = minimal_prompt[:3997] + "..." + if len(minimalPrompt) > 4000: + minimalPrompt = minimalPrompt[:3997] + "..." - return minimal_prompt + return minimalPrompt - return full_prompt + return fullPrompt except Exception as e: self.logger.warning(f"Error creating image prompt: {str(e)}") # Fallback to simple prompt return f"Create a professional image representing: {title}" - async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str: + async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str: """Use AI to intelligently compress a long prompt while preserving key information.""" try: - if not ai_service: + if not aiService: return None - compression_prompt = f""" + compressionPrompt = f""" You are an expert at creating concise, effective prompts for AI image generation. The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information. -Original prompt ({len(long_prompt)} characters): -{long_prompt} +Original prompt ({len(longPrompt)} characters): +{longPrompt} Please create a compressed version that: 1. Keeps the most important visual elements and requirements @@ -176,7 +178,7 @@ Return only the compressed prompt, no explanations. from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum request = AiCallRequest( - prompt=compression_prompt, + prompt=compressionPrompt, options=AiCallOptions( operationType=OperationTypeEnum.DATA_GENERATE, maxTokens=None, # Let the model use its full context length @@ -184,12 +186,12 @@ Return only the compressed prompt, no explanations. ) ) - response = await ai_service.aiObjects.call(request) + response = await aiService.aiObjects.call(request) compressed = response.content.strip() # Validate the compressed prompt if compressed and len(compressed) <= 4000 and len(compressed) > 50: - self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters") + self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters") return compressed else: self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars") @@ -199,42 +201,42 @@ Return only the compressed prompt, no explanations. self.logger.warning(f"Error compressing prompt with AI: {str(e)}") return None - def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str: + def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str: """Analyze content sections and create a visual description for AI.""" try: descriptions = [] for section in sections: - section_type = self._get_section_type(section) - section_data = self._get_section_data(section) + sectionType = self._getSectionType(section) + sectionData = self._getSectionData(section) - if section_type == "table": - headers = section_data.get("headers", []) - rows = section_data.get("rows", []) + if sectionType == "table": + headers = sectionData.get("headers", []) + rows = sectionData.get("rows", []) if headers and rows: descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}") - elif section_type == "bullet_list": - items = section_data.get("items", []) + elif sectionType == "bullet_list": + items = sectionData.get("items", []) if items: descriptions.append(f"List with {len(items)} items") - elif section_type == "heading": - text = section_data.get("text", "") - level = section_data.get("level", 1) + elif sectionType == "heading": + text = sectionData.get("text", "") + level = sectionData.get("level", 1) if text: descriptions.append(f"Heading {level}: {text}") - elif section_type == "paragraph": - text = section_data.get("text", "") + elif sectionType == "paragraph": + text = sectionData.get("text", "") if text and len(text) > 10: # Only include substantial paragraphs # Truncate long text truncated = text[:100] + "..." if len(text) > 100 else text descriptions.append(f"Text content: {truncated}") - elif section_type == "code_block": - code = section_data.get("code", "") - language = section_data.get("language", "") + elif sectionType == "code_block": + code = sectionData.get("code", "") + language = sectionData.get("language", "") if code: descriptions.append(f"Code block ({language}): {code[:50]}...") @@ -244,42 +246,42 @@ Return only the compressed prompt, no explanations. self.logger.warning(f"Error analyzing content: {str(e)}") return "Document content" - def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str: + def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str: """Determine visual style guidance based on content and user prompt.""" try: - style_elements = [] + styleElements = [] # Analyze user prompt for style hints - if user_prompt: - prompt_lower = user_prompt.lower() + if userPrompt: + promptLower = userPrompt.lower() - if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]): - style_elements.append("modern, clean design") - elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]): - style_elements.append("classic, formal design") - elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]): - style_elements.append("creative, artistic design") - elif any(word in prompt_lower for word in ["corporate", "business", "professional"]): - style_elements.append("corporate, professional design") + if any(word in promptLower for word in ["modern", "contemporary", "sleek"]): + styleElements.append("modern, clean design") + elif any(word in promptLower for word in ["classic", "traditional", "formal"]): + styleElements.append("classic, formal design") + elif any(word in promptLower for word in ["creative", "artistic", "colorful"]): + styleElements.append("creative, artistic design") + elif any(word in promptLower for word in ["corporate", "business", "professional"]): + styleElements.append("corporate, professional design") # Analyze content type for additional style hints - sections = extracted_content.get("sections", []) - has_tables = any(self._get_section_type(s) == "table" for s in sections) - has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections) - has_code = any(self._get_section_type(s) == "code_block" for s in sections) + sections = extractedContent.get("sections", []) + hasTables = any(self._getSectionType(s) == "table" for s in sections) + hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections) + hasCode = any(self._getSectionType(s) == "code_block" for s in sections) - if has_tables: - style_elements.append("data-focused layout") - if has_lists: - style_elements.append("organized, structured presentation") - if has_code: - style_elements.append("technical, developer-friendly") + if hasTables: + styleElements.append("data-focused layout") + if hasLists: + styleElements.append("organized, structured presentation") + if hasCode: + styleElements.append("technical, developer-friendly") # Default style if no specific guidance - if not style_elements: - style_elements.append("professional, clean design") + if not styleElements: + styleElements.append("professional, clean design") - return ", ".join(style_elements) + return ", ".join(styleElements) except Exception as e: self.logger.warning(f"Error determining style guidance: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererJson.py b/modules/services/serviceGeneration/renderers/rendererJson.py index 2ff07ad6..d688da85 100644 --- a/modules/services/serviceGeneration/renderers/rendererJson.py +++ b/modules/services/serviceGeneration/renderers/rendererJson.py @@ -10,40 +10,40 @@ class RendererJson(BaseRenderer): """Renders content to JSON format with format-specific extraction.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported JSON formats.""" return ['json'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['data'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for JSON renderer.""" return 80 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to JSON format.""" try: # The extracted content should already be JSON from the AI # Just validate and format it - json_content = self._clean_json_content(extracted_content, title) + jsonContent = self._cleanJsonContent(extractedContent, title) - return json_content, "application/json" + return jsonContent, "application/json" except Exception as e: self.logger.error(f"Error rendering JSON: {str(e)}") # Return minimal JSON fallback - fallback_data = { + fallbackData = { "title": title, "sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}], "metadata": {"error": str(e)} } - return json.dumps(fallback_data, indent=2), "application/json" + return json.dumps(fallbackData, indent=2), "application/json" - def _clean_json_content(self, content: Dict[str, Any], title: str) -> str: + def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str: """Clean and validate JSON content from AI.""" try: # Validate JSON structure @@ -72,8 +72,8 @@ class RendererJson(BaseRenderer): except Exception as e: self.logger.warning(f"Error cleaning JSON content: {str(e)}") # Return minimal valid JSON - fallback_data = { + fallbackData = { "sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}], "metadata": {"title": title, "error": str(e)} } - return json.dumps(fallback_data, indent=2, ensure_ascii=False) + return json.dumps(fallbackData, indent=2, ensure_ascii=False) diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index 59806d4c..5a91c667 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -9,161 +9,161 @@ class RendererMarkdown(BaseRenderer): """Renders content to Markdown format with format-specific extraction.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported Markdown formats.""" return ['md', 'markdown'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['mdown', 'mkd'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for markdown renderer.""" return 95 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to Markdown format.""" try: # Generate markdown from JSON structure - markdown_content = self._generate_markdown_from_json(extracted_content, title) + markdownContent = self._generateMarkdownFromJson(extractedContent, title) - return markdown_content, "text/markdown" + return markdownContent, "text/markdown" except Exception as e: self.logger.error(f"Error rendering markdown: {str(e)}") # Return minimal markdown fallback return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown" - def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str: + def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate markdown content from structured JSON document.""" try: # Validate JSON structure - if not isinstance(json_content, dict): + if not isinstance(jsonContent, dict): raise ValueError("JSON content must be a dictionary") - if "sections" not in json_content: + if "sections" not in jsonContent: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + documentTitle = jsonContent.get("metadata", {}).get("title", title) # Build markdown content - markdown_parts = [] + markdownParts = [] # Document title - markdown_parts.append(f"# {document_title}") - markdown_parts.append("") + markdownParts.append(f"# {documentTitle}") + markdownParts.append("") # Process each section - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) for section in sections: - section_markdown = self._render_json_section(section) - if section_markdown: - markdown_parts.append(section_markdown) - markdown_parts.append("") # Add spacing between sections + sectionMarkdown = self._renderJsonSection(section) + if sectionMarkdown: + markdownParts.append(sectionMarkdown) + markdownParts.append("") # Add spacing between sections # Add generation info - markdown_parts.append("---") - markdown_parts.append(f"*Generated: {self._format_timestamp()}*") + markdownParts.append("---") + markdownParts.append(f"*Generated: {self._formatTimestamp()}*") - return '\n'.join(markdown_parts) + return '\n'.join(markdownParts) except Exception as e: self.logger.error(f"Error generating markdown from JSON: {str(e)}") raise Exception(f"Markdown generation failed: {str(e)}") - def _render_json_section(self, section: Dict[str, Any]) -> str: + def _renderJsonSection(self, section: Dict[str, Any]) -> str: """Render a single JSON section to markdown.""" try: - section_type = self._get_section_type(section) - section_data = self._get_section_data(section) + sectionType = self._getSectionType(section) + sectionData = self._getSectionData(section) - if section_type == "table": + if sectionType == "table": # Process the section data to extract table structure - processed_data = self._process_section_by_type(section) - return self._render_json_table(processed_data) - elif section_type == "bullet_list": + processedData = self._processSectionByType(section) + return self._renderJsonTable(processedData) + elif sectionType == "bullet_list": # Process the section data to extract bullet list structure - processed_data = self._process_section_by_type(section) - return self._render_json_bullet_list(processed_data) - elif section_type == "heading": - return self._render_json_heading(section_data) - elif section_type == "paragraph": - return self._render_json_paragraph(section_data) - elif section_type == "code_block": + processedData = self._processSectionByType(section) + return self._renderJsonBulletList(processedData) + elif sectionType == "heading": + return self._renderJsonHeading(sectionData) + elif sectionType == "paragraph": + return self._renderJsonParagraph(sectionData) + elif sectionType == "code_block": # Process the section data to extract code block structure - processed_data = self._process_section_by_type(section) - return self._render_json_code_block(processed_data) - elif section_type == "image": + processedData = self._processSectionByType(section) + return self._renderJsonCodeBlock(processedData) + elif sectionType == "image": # Process the section data to extract image structure - processed_data = self._process_section_by_type(section) - return self._render_json_image(processed_data) + processedData = self._processSectionByType(section) + return self._renderJsonImage(processedData) else: # Fallback to paragraph for unknown types - return self._render_json_paragraph(section_data) + return self._renderJsonParagraph(sectionData) except Exception as e: - self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") + self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}") return f"*[Error rendering section: {str(e)}]*" - def _render_json_table(self, table_data: Dict[str, Any]) -> str: + def _renderJsonTable(self, tableData: Dict[str, Any]) -> str: """Render a JSON table to markdown.""" try: - headers = table_data.get("headers", []) - rows = table_data.get("rows", []) + headers = tableData.get("headers", []) + rows = tableData.get("rows", []) if not headers or not rows: return "" - markdown_parts = [] + markdownParts = [] # Create table header - header_line = " | ".join(str(header) for header in headers) - markdown_parts.append(header_line) + headerLine = " | ".join(str(header) for header in headers) + markdownParts.append(headerLine) # Add separator line - separator_line = " | ".join("---" for _ in headers) - markdown_parts.append(separator_line) + separatorLine = " | ".join("---" for _ in headers) + markdownParts.append(separatorLine) # Add data rows for row in rows: - row_line = " | ".join(str(cell_data) for cell_data in row) - markdown_parts.append(row_line) + rowLine = " | ".join(str(cellData) for cellData in row) + markdownParts.append(rowLine) - return '\n'.join(markdown_parts) + return '\n'.join(markdownParts) except Exception as e: self.logger.warning(f"Error rendering table: {str(e)}") return "" - def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str: + def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str: """Render a JSON bullet list to markdown.""" try: - items = list_data.get("items", []) + items = listData.get("items", []) if not items: return "" - markdown_parts = [] + markdownParts = [] for item in items: if isinstance(item, str): - markdown_parts.append(f"- {item}") + markdownParts.append(f"- {item}") elif isinstance(item, dict) and "text" in item: - markdown_parts.append(f"- {item['text']}") + markdownParts.append(f"- {item['text']}") - return '\n'.join(markdown_parts) + return '\n'.join(markdownParts) except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") return "" - def _render_json_heading(self, heading_data: Dict[str, Any]) -> str: + def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str: """Render a JSON heading to markdown.""" try: - level = heading_data.get("level", 1) - text = heading_data.get("text", "") + level = headingData.get("level", 1) + text = headingData.get("text", "") if text: level = max(1, min(6, level)) @@ -175,21 +175,21 @@ class RendererMarkdown(BaseRenderer): self.logger.warning(f"Error rendering heading: {str(e)}") return "" - def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str: + def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str: """Render a JSON paragraph to markdown.""" try: - text = paragraph_data.get("text", "") + text = paragraphData.get("text", "") return text if text else "" except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") return "" - def _render_json_code_block(self, code_data: Dict[str, Any]) -> str: + def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str: """Render a JSON code block to markdown.""" try: - code = code_data.get("code", "") - language = code_data.get("language", "") + code = codeData.get("code", "") + language = codeData.get("language", "") if code: if language: @@ -203,19 +203,19 @@ class RendererMarkdown(BaseRenderer): self.logger.warning(f"Error rendering code block: {str(e)}") return "" - def _render_json_image(self, image_data: Dict[str, Any]) -> str: + def _renderJsonImage(self, imageData: Dict[str, Any]) -> str: """Render a JSON image to markdown.""" try: - alt_text = image_data.get("altText", "Image") - base64_data = image_data.get("base64Data", "") + altText = imageData.get("altText", "Image") + base64Data = imageData.get("base64Data", "") - if base64_data: + if base64Data: # For base64 images, we can't embed them directly in markdown # So we'll use a placeholder with the alt text - return f"![{alt_text}](data:image/png;base64,{base64_data[:50]}...)" + return f"![{altText}](data:image/png;base64,{base64Data[:50]}...)" else: - return f"![{alt_text}](image-placeholder)" + return f"![{altText}](image-placeholder)" except Exception as e: self.logger.warning(f"Error rendering image: {str(e)}") - return f"![{image_data.get('altText', 'Image')}](image-error)" + return f"![{imageData.get('altText', 'Image')}](image-error)" diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py index b80e6197..f2b15e46 100644 --- a/modules/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -22,32 +22,32 @@ class RendererPdf(BaseRenderer): """Renders content to PDF format using reportlab.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported PDF formats.""" return ['pdf'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['document', 'print'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for PDF renderer.""" return 120 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to PDF format using AI-analyzed styling.""" try: if not REPORTLAB_AVAILABLE: # Fallback to HTML if reportlab not available from .rendererHtml import RendererHtml html_renderer = RendererHtml() - html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service) + html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService) return html_content, "text/html" # Generate PDF using AI-analyzed styling - pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service) + pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService) return pdf_content, "application/pdf" @@ -56,11 +56,11 @@ class RendererPdf(BaseRenderer): # Return minimal fallback return f"PDF Generation Error: {str(e)}", "text/plain" - async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate PDF content from structured JSON document using AI-generated styling.""" try: # Get AI-generated styling definitions - styles = await self._get_pdf_styles(user_prompt, ai_service) + styles = await self._getPdfStyles(userPrompt, aiService) # Validate JSON structure if not isinstance(json_content, dict): @@ -93,10 +93,10 @@ class RendererPdf(BaseRenderer): story = [] # Title page - title_style = self._create_title_style(styles) + title_style = self._createTitleStyle(styles) story.append(Paragraph(document_title, title_style)) story.append(Spacer(1, 50)) # Increased spacing to prevent overlap - story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles))) + story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._createNormalStyle(styles))) story.append(Spacer(1, 30)) # Add spacing before page break story.append(PageBreak()) @@ -105,7 +105,7 @@ class RendererPdf(BaseRenderer): self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER") for i, section in enumerate(sections): self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER") - section_elements = self._render_json_section(section, styles) + section_elements = self._renderJsonSection(section, styles) self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER") story.extend(section_elements) @@ -123,7 +123,7 @@ class RendererPdf(BaseRenderer): self.logger.error(f"Error generating PDF from JSON: {str(e)}") raise Exception(f"PDF generation failed: {str(e)}") - async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: + async def _getPdfStyles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: """Get PDF styling definitions using base template AI styling.""" style_schema = { "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30}, @@ -136,21 +136,21 @@ class RendererPdf(BaseRenderer): "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6} } - style_template = self._create_ai_style_template("pdf", user_prompt, style_schema) + style_template = self._createAiStyleTemplate("pdf", user_prompt, style_schema) # Use base template method like DOCX does (this works!) - styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles()) + styles = await self._getAiStyles(ai_service, style_template, self._getDefaultPdfStyles()) if styles is None: - return self._get_default_pdf_styles() + return self._getDefaultPdfStyles() # Convert colors to PDF format after getting styles - styles = self._convert_colors_format(styles) + styles = self._convertColorsFormat(styles) # Validate and fix contrast issues - return self._validate_pdf_styles_contrast(styles) + return self._validatePdfStylesContrast(styles) - async def _get_ai_styles_with_pdf_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: + async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: """Get AI styles with proper PDF color conversion.""" if not ai_service: return default_styles @@ -279,7 +279,7 @@ class RendererPdf(BaseRenderer): return default_styles # Convert colors to PDF format (keep as hex strings, PDF renderer will convert them) - styles = self._convert_colors_format(styles) + styles = self._convertColorsFormat(styles) return styles @@ -287,7 +287,7 @@ class RendererPdf(BaseRenderer): self.logger.warning(f"AI styling failed: {str(e)}, using defaults") return default_styles - def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Convert colors to proper format for PDF compatibility.""" try: for style_name, style_config in styles.items(): @@ -304,7 +304,7 @@ class RendererPdf(BaseRenderer): self.logger.warning(f"Color conversion failed: {str(e)}") return styles - def _get_safe_color(self, color_value: str, default: str = "#000000") -> str: + def _getSafeColor(self, color_value: str, default: str = "#000000") -> str: """Get a safe hex color value for PDF.""" if isinstance(color_value, str) and color_value.startswith('#'): if len(color_value) == 7: @@ -313,7 +313,7 @@ class RendererPdf(BaseRenderer): return color_value return default - def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _validatePdfStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix contrast issues in AI-generated styles.""" try: # Fix table header contrast @@ -348,9 +348,9 @@ class RendererPdf(BaseRenderer): except Exception as e: self.logger.warning(f"Style validation failed: {str(e)}") - return self._get_default_pdf_styles() + return self._getDefaultPdfStyles() - def _get_default_pdf_styles(self) -> Dict[str, Any]: + def _getDefaultPdfStyles(self) -> Dict[str, Any]: """Default PDF styles.""" return { "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30}, @@ -363,27 +363,27 @@ class RendererPdf(BaseRenderer): "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6} } - def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle: + def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle: """Create title style from style definitions.""" title_style_def = styles.get("title", {}) # DEBUG: Show what color and spacing is being used for title title_color = title_style_def.get("color", "#1F4E79") title_space_after = title_style_def.get("space_after", 30) - self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hex_to_color(title_color)}", "PDF_RENDERER") + self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER") self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER") return ParagraphStyle( 'CustomTitle', fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20 spaceAfter=title_style_def.get("space_after", 30), - alignment=self._get_alignment(title_style_def.get("align", "center")), - textColor=self._hex_to_color(title_color), + alignment=self._getAlignment(title_style_def.get("align", "center")), + textColor=self._hexToColor(title_color), leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles spaceBefore=0 # Ensure no space before title ) - def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle: + def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle: """Create heading style from style definitions.""" heading_key = f"heading{level}" heading_style_def = styles.get(heading_key, styles.get("heading1", {})) @@ -393,11 +393,11 @@ class RendererPdf(BaseRenderer): fontSize=heading_style_def.get("font_size", 18 - level * 2), spaceAfter=heading_style_def.get("space_after", 12), spaceBefore=heading_style_def.get("space_before", 12), - alignment=self._get_alignment(heading_style_def.get("align", "left")), - textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F")) + alignment=self._getAlignment(heading_style_def.get("align", "left")), + textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F")) ) - def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle: + def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle: """Create normal paragraph style from style definitions.""" paragraph_style_def = styles.get("paragraph", {}) @@ -405,12 +405,12 @@ class RendererPdf(BaseRenderer): 'CustomNormal', fontSize=paragraph_style_def.get("font_size", 11), spaceAfter=paragraph_style_def.get("space_after", 6), - alignment=self._get_alignment(paragraph_style_def.get("align", "left")), - textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")), + alignment=self._getAlignment(paragraph_style_def.get("align", "left")), + textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")), leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11) ) - def _get_alignment(self, align: str) -> int: + def _getAlignment(self, align: str) -> int: """Convert alignment string to reportlab alignment constant.""" if not align or not isinstance(align, str): return TA_LEFT @@ -426,7 +426,7 @@ class RendererPdf(BaseRenderer): } return align_map.get(align.lower().strip(), TA_LEFT) - def _get_table_alignment(self, align: str) -> str: + def _getTableAlignment(self, align: str) -> str: """Convert alignment string to ReportLab table alignment string.""" if not align or not isinstance(align, str): return 'LEFT' @@ -442,7 +442,7 @@ class RendererPdf(BaseRenderer): } return align_map.get(align.lower().strip(), 'LEFT') - def _hex_to_color(self, hex_color: str) -> colors.Color: + def _hexToColor(self, hex_color: str) -> colors.Color: """Convert hex color to reportlab color.""" try: hex_color = hex_color.lstrip('#') @@ -464,38 +464,38 @@ class RendererPdf(BaseRenderer): except: return colors.black - def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a single JSON section to PDF elements using AI-generated styles.""" try: - section_type = self._get_section_type(section) - elements = self._get_section_data(section) + section_type = self._getSectionType(section) + elements = self._getSectionData(section) # Process each element in the section all_elements = [] for element in elements: if section_type == "table": - all_elements.extend(self._render_json_table(element, styles)) + all_elements.extend(self._renderJsonTable(element, styles)) elif section_type == "bullet_list": - all_elements.extend(self._render_json_bullet_list(element, styles)) + all_elements.extend(self._renderJsonBulletList(element, styles)) elif section_type == "heading": - all_elements.extend(self._render_json_heading(element, styles)) + all_elements.extend(self._renderJsonHeading(element, styles)) elif section_type == "paragraph": - all_elements.extend(self._render_json_paragraph(element, styles)) + all_elements.extend(self._renderJsonParagraph(element, styles)) elif section_type == "code_block": - all_elements.extend(self._render_json_code_block(element, styles)) + all_elements.extend(self._renderJsonCodeBlock(element, styles)) elif section_type == "image": - all_elements.extend(self._render_json_image(element, styles)) + all_elements.extend(self._renderJsonImage(element, styles)) else: # Fallback to paragraph for unknown types - all_elements.extend(self._render_json_paragraph(element, styles)) + all_elements.extend(self._renderJsonParagraph(element, styles)) return all_elements except Exception as e: - self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") + self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}") return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))] - def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON table to PDF elements using AI-generated styles.""" try: headers = table_data.get("headers", []) @@ -517,7 +517,7 @@ class RendererPdf(BaseRenderer): table_style = [ ('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))), ('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))), - ('ALIGN', (0, 0), (-1, -1), self._get_table_alignment(table_cell_style.get("align", "left"))), + ('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'), ('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), @@ -534,7 +534,7 @@ class RendererPdf(BaseRenderer): self.logger.warning(f"Error rendering table: {str(e)}") return [] - def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON bullet list to PDF elements using AI-generated styles.""" try: items = list_data.get("items", []) @@ -556,7 +556,7 @@ class RendererPdf(BaseRenderer): self.logger.warning(f"Error rendering bullet list: {str(e)}") return [] - def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON heading to PDF elements using AI-generated styles.""" try: level = heading_data.get("level", 1) @@ -564,7 +564,7 @@ class RendererPdf(BaseRenderer): if text: level = max(1, min(6, level)) - heading_style = self._create_heading_style(styles, level) + heading_style = self._createHeadingStyle(styles, level) return [Paragraph(text, heading_style)] return [] @@ -573,13 +573,13 @@ class RendererPdf(BaseRenderer): self.logger.warning(f"Error rendering heading: {str(e)}") return [] - def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON paragraph to PDF elements using AI-generated styles.""" try: text = paragraph_data.get("text", "") if text: - return [Paragraph(text, self._create_normal_style(styles))] + return [Paragraph(text, self._createNormalStyle(styles))] return [] @@ -587,7 +587,7 @@ class RendererPdf(BaseRenderer): self.logger.warning(f"Error rendering paragraph: {str(e)}") return [] - def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON code block to PDF elements using AI-generated styles.""" try: code = code_data.get("code", "") @@ -601,7 +601,7 @@ class RendererPdf(BaseRenderer): lang_style = ParagraphStyle( 'CodeLanguage', fontSize=code_style_def.get("font_size", 9), - textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")), + textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")), fontName='Helvetica-Bold' ) elements.append(Paragraph(f"Code ({language}):", lang_style)) @@ -609,9 +609,9 @@ class RendererPdf(BaseRenderer): code_style = ParagraphStyle( 'CodeBlock', fontSize=code_style_def.get("font_size", 9), - textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")), + textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")), fontName=code_style_def.get("font", "Courier"), - backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")), + backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")), spaceAfter=code_style_def.get("space_after", 6) ) elements.append(Paragraph(code, code_style)) @@ -624,7 +624,7 @@ class RendererPdf(BaseRenderer): self.logger.warning(f"Error rendering code block: {str(e)}") return [] - def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: + def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: """Render a JSON image to PDF elements.""" try: base64_data = image_data.get("base64Data", "") @@ -632,10 +632,10 @@ class RendererPdf(BaseRenderer): if base64_data: # For now, just add a placeholder since reportlab image handling is complex - return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))] + return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))] return [] except Exception as e: self.logger.warning(f"Error rendering image: {str(e)}") - return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))] \ No newline at end of file + return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._createNormalStyle(styles))] \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 5c6de723..701030d7 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -12,23 +12,23 @@ class RendererPptx(BaseRenderer): def __init__(self): super().__init__() - self.supported_formats = ["pptx", "ppt"] - self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation" + self.supportedFormats = ["pptx", "ppt"] + self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation" @classmethod - def get_supported_formats(cls) -> list: + def getSupportedFormats(cls) -> list: """Get list of supported output formats.""" return ["pptx", "ppt"] - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """ Render content as PowerPoint presentation from JSON data. Args: - extracted_content: JSON content to render as presentation + extractedContent: JSON content to render as presentation title: Title for the presentation - user_prompt: User prompt for AI styling - ai_service: AI service for styling + userPrompt: User prompt for AI styling + aiService: AI service for styling **kwargs: Additional rendering options Returns: @@ -43,7 +43,7 @@ class RendererPptx(BaseRenderer): import re # Get AI-generated styling definitions first - styles = await self._get_pptx_styles(user_prompt, ai_service) + styles = await self._getPptxStyles(userPrompt, aiService) # Create new presentation prs = Presentation() @@ -58,13 +58,13 @@ class RendererPptx(BaseRenderer): prs.slide_height = Inches(7.5) # Generate slides from JSON content - slides_data = await self._parse_json_to_slides(extracted_content, title, styles) - logger.info(f"Parsed {len(slides_data)} slides from JSON content") + slidesData = await self._parseJsonToSlides(extractedContent, title, styles) + logger.info(f"Parsed {len(slidesData)} slides from JSON content") # Debug: Show first 200 chars of content - logger.info(f"JSON content preview: {str(extracted_content)[:200]}...") + logger.info(f"JSON content preview: {str(extractedContent)[:200]}...") - for i, slide_data in enumerate(slides_data): + for i, slide_data in enumerate(slidesData): logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars") # Debug: Show slide content preview slide_content = slide_data.get('content', '') @@ -74,8 +74,8 @@ class RendererPptx(BaseRenderer): logger.warning(f" ⚠️ Slide {i+1} has NO content!") # Create slide with appropriate layout based on content - slide_layout_index = self._get_slide_layout_index(slide_data, styles) - slide_layout = prs.slide_layouts[slide_layout_index] + slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles) + slide_layout = prs.slide_layouts[slideLayoutIndex] slide = prs.slides.add_slide(slide_layout) # Set title with AI-generated styling @@ -153,7 +153,7 @@ class RendererPptx(BaseRenderer): p.alignment = PP_ALIGN.LEFT # If no slides were created, create a default slide - if not slides_data: + if not slidesData: slide_layout = prs.slide_layouts[0] # Title slide layout slide = prs.slides.add_slide(slide_layout) @@ -198,7 +198,7 @@ class RendererPptx(BaseRenderer): logger.error(f"Error rendering PowerPoint presentation: {str(e)}") return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain" - def _parse_content_to_slides(self, content: str, title: str) -> list: + def _parseContentToSlides(self, content: str, title: str) -> list: """ Parse content into slide data structure. @@ -212,7 +212,7 @@ class RendererPptx(BaseRenderer): slides = [] # Split content by slide markers or headers - slide_sections = self._split_content_into_slides(content) + slide_sections = self._splitContentIntoSlides(content) for i, section in enumerate(slide_sections): if section.strip(): @@ -239,7 +239,7 @@ class RendererPptx(BaseRenderer): return slides - def _split_content_into_slides(self, content: str) -> list: + def _splitContentIntoSlides(self, content: str) -> list: """ Split content into individual slides based on headers and structure. @@ -299,11 +299,11 @@ class RendererPptx(BaseRenderer): return [content.strip()] - def get_output_mime_type(self) -> str: + def getOutputMimeType(self) -> str: """Get MIME type for rendered output.""" - return self.output_mime_type + return self.outputMimeType - async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: + async def _getPptxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]: """Get PowerPoint styling definitions using base template AI styling.""" style_schema = { "title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"}, @@ -323,21 +323,21 @@ class RendererPptx(BaseRenderer): "executive_ready": True } - style_template = self._create_professional_pptx_template(user_prompt, style_schema) - # Use our own _get_ai_styles_with_pptx_colors method to ensure proper color conversion - styles = await self._get_ai_styles_with_pptx_colors(ai_service, style_template, self._get_default_pptx_styles()) + style_template = self._createProfessionalPptxTemplate(userPrompt, style_schema) + # Use our own _getAiStylesWithPptxColors method to ensure proper color conversion + styles = await self._getAiStylesWithPptxColors(aiService, style_template, self._getDefaultPptxStyles()) # Validate PowerPoint-specific requirements - return self._validate_pptx_styles_readability(styles) + return self._validatePptxStylesReadability(styles) - def _create_professional_pptx_template(self, user_prompt: str, style_schema: Dict[str, Any]) -> str: + def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str: """Create a professional PowerPoint-specific AI style template for corporate-quality slides.""" import json schema_json = json.dumps(style_schema, indent=4) return f"""Customize the JSON below for professional PowerPoint slides. -User Request: {user_prompt or "Create professional corporate slides"} +User Request: {userPrompt or "Create professional corporate slides"} Rules: - Use professional colors (blues, grays, deep greens) @@ -351,9 +351,9 @@ Return ONLY this JSON with your changes: JSON ONLY. NO OTHER TEXT.""" - async def _get_ai_styles_with_pptx_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: + async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: """Get AI styles with proper PowerPoint color conversion.""" - if not ai_service: + if not aiService: return default_styles try: @@ -365,11 +365,11 @@ JSON ONLY. NO OTHER TEXT.""" request = AiCallRequest(prompt=style_template, context="", options=request_options) # Check if AI service is properly configured - if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects: + if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects: self.logger.warning("AI service not properly configured, using defaults") return default_styles - response = await ai_service.aiObjects.call(request) + response = await aiService.aiObjects.call(request) # Check if response is valid if not response: @@ -445,7 +445,7 @@ JSON ONLY. NO OTHER TEXT.""" return default_styles # Convert colors to PowerPoint RGB format - styles = self._convert_colors_format(styles) + styles = self._convertColorsFormat(styles) return styles @@ -453,7 +453,7 @@ JSON ONLY. NO OTHER TEXT.""" self.logger.warning(f"AI styling failed: {str(e)}, using defaults") return default_styles - def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Convert hex colors to RGB format for PowerPoint compatibility.""" try: for style_name, style_config in styles.items(): @@ -477,7 +477,7 @@ JSON ONLY. NO OTHER TEXT.""" self.logger.warning(f"Color conversion failed: {str(e)}") return styles - def _get_safe_color(self, color_value, default=(0, 0, 0)) -> tuple: + def _getSafeColor(self, color_value, default=(0, 0, 0)) -> tuple: """Get a safe RGB color tuple for PowerPoint.""" if isinstance(color_value, tuple) and len(color_value) == 3: return color_value @@ -495,7 +495,7 @@ JSON ONLY. NO OTHER TEXT.""" return (r, g, b) return default - def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _validatePptxStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix readability issues in AI-generated styles.""" try: # Ensure minimum font sizes for PowerPoint readability @@ -519,9 +519,9 @@ JSON ONLY. NO OTHER TEXT.""" except Exception as e: logger.warning(f"Style validation failed: {str(e)}") - return self._get_default_pptx_styles() + return self._getDefaultPptxStyles() - def _get_default_pptx_styles(self) -> Dict[str, Any]: + def _getDefaultPptxStyles(self) -> Dict[str, Any]: """Default PowerPoint styles with corporate professional color scheme.""" return { "title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"}, @@ -541,7 +541,7 @@ JSON ONLY. NO OTHER TEXT.""" "executive_ready": True } - async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]: + async def _parseJsonToSlides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]: """ Parse JSON content into slide data structure. @@ -569,12 +569,12 @@ JSON ONLY. NO OTHER TEXT.""" # Create title slide slides.append({ "title": document_title, - "content": "Generated by PowerOn AI System\n\n" + self._format_timestamp() + "content": "Generated by PowerOn AI System\n\n" + self._formatTimestamp() }) # Process sections into slides based on content and user intent sections = json_content.get("sections", []) - slides.extend(self._create_slides_from_sections(sections, styles)) + slides.extend(self._createSlidesFromSections(sections, styles)) # If no content slides were created, create a default content slide if len(slides) == 1: # Only title slide @@ -595,7 +595,7 @@ JSON ONLY. NO OTHER TEXT.""" } ] - def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: + def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: """Create a slide from a JSON section.""" try: # Get section title from data or use default @@ -616,15 +616,15 @@ JSON ONLY. NO OTHER TEXT.""" content_parts = [] if content_type == "table": - content_parts.append(self._format_table_for_slide(elements)) + content_parts.append(self._formatTableForSlide(elements)) elif content_type == "list": - content_parts.append(self._format_list_for_slide(elements)) + content_parts.append(self._formatListForSlide(elements)) elif content_type == "heading": - content_parts.append(self._format_heading_for_slide(elements)) + content_parts.append(self._formatHeadingForSlide(elements)) elif content_type == "paragraph": - content_parts.append(self._format_paragraph_for_slide(elements)) + content_parts.append(self._formatParagraphForSlide(elements)) elif content_type == "code": - content_parts.append(self._format_code_for_slide(elements)) + content_parts.append(self._formatCodeForSlide(elements)) else: content_parts.append(self._format_paragraph_for_slide(elements)) @@ -640,7 +640,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error creating slide from section: {str(e)}") return None - def _format_table_for_slide(self, elements: List[Dict[str, Any]]) -> str: + def _formatTableForSlide(self, elements: List[Dict[str, Any]]) -> str: """Format table data for slide presentation.""" try: # Extract table data from elements array @@ -681,7 +681,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error formatting table for slide: {str(e)}") return "" - def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str: + def _formatListForSlide(self, list_data: Dict[str, Any]) -> str: """Format list data for slide presentation.""" try: items = list_data.get("items", []) @@ -713,7 +713,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error formatting list for slide: {str(e)}") return "" - def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str: + def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str: """Format heading data for slide presentation.""" try: text = heading_data.get("text", "") @@ -728,7 +728,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error formatting heading for slide: {str(e)}") return "" - def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str: + def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str: """Format paragraph data for slide presentation.""" try: text = paragraph_data.get("text", "") @@ -747,7 +747,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error formatting paragraph for slide: {str(e)}") return "" - def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str: + def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str: """Format code data for slide presentation.""" try: code = code_data.get("code", "") @@ -770,7 +770,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error formatting code for slide: {str(e)}") return "" - def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int: + def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int: """Determine the best professional slide layout based on content.""" try: content = slide_data.get("content", "") @@ -804,7 +804,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error determining slide layout: {str(e)}") return 1 # Default to title and content layout - def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: + def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: """Create slides from sections based on content density and user intent.""" try: slides = [] @@ -834,7 +834,7 @@ JSON ONLY. NO OTHER TEXT.""" break else: # Add content to current slide - formatted_content = self._format_section_content(section) + formatted_content = self._formatSectionContent(section) if formatted_content: current_slide_content.append(formatted_content) @@ -851,7 +851,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error creating slides from sections: {str(e)}") return [] - def _format_section_content(self, section: Dict[str, Any]) -> str: + def _formatSectionContent(self, section: Dict[str, Any]) -> str: """Format section content for slide presentation.""" try: content_type = section.get("content_type", "paragraph") @@ -861,15 +861,15 @@ JSON ONLY. NO OTHER TEXT.""" content_parts = [] for element in elements: if content_type == "table": - content_parts.append(self._format_table_for_slide([element])) + content_parts.append(self._formatTableForSlide([element])) elif content_type == "list": - content_parts.append(self._format_list_for_slide([element])) + content_parts.append(self._formatListForSlide([element])) elif content_type == "heading": - content_parts.append(self._format_heading_for_slide([element])) + content_parts.append(self._formatHeadingForSlide([element])) elif content_type == "paragraph": - content_parts.append(self._format_paragraph_for_slide([element])) + content_parts.append(self._formatParagraphForSlide([element])) elif content_type == "code": - content_parts.append(self._format_code_for_slide([element])) + content_parts.append(self._formatCodeForSlide([element])) else: content_parts.append(self._format_paragraph_for_slide([element])) @@ -879,7 +879,7 @@ JSON ONLY. NO OTHER TEXT.""" logger.warning(f"Error formatting section content: {str(e)}") return "" - def _format_timestamp(self) -> str: + def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" from datetime import datetime, UTC return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index 68ccfdbe..f24afa95 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -9,7 +9,7 @@ class RendererText(BaseRenderer): """Renders content to plain text format with format-specific extraction.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported text formats (excluding formats with dedicated renderers).""" return [ 'txt', 'text', 'plain', @@ -32,7 +32,7 @@ class RendererText(BaseRenderer): ] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return [ 'ascii', 'utf8', 'utf-8', 'code', 'source', @@ -41,166 +41,166 @@ class RendererText(BaseRenderer): ] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for text renderer.""" return 90 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to plain text format.""" try: # Generate text from JSON structure - text_content = self._generate_text_from_json(extracted_content, title) + textContent = self._generateTextFromJson(extractedContent, title) - return text_content, "text/plain" + return textContent, "text/plain" except Exception as e: self.logger.error(f"Error rendering text: {str(e)}") # Return minimal text fallback return f"{title}\n\nError rendering report: {str(e)}", "text/plain" - def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str: + def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate text content from structured JSON document.""" try: # Validate JSON structure - if not isinstance(json_content, dict): + if not isinstance(jsonContent, dict): raise ValueError("JSON content must be a dictionary") - if "sections" not in json_content: + if "sections" not in jsonContent: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + documentTitle = jsonContent.get("metadata", {}).get("title", title) # Build text content - text_parts = [] + textParts = [] # Document title - text_parts.append(document_title) - text_parts.append("=" * len(document_title)) - text_parts.append("") + textParts.append(documentTitle) + textParts.append("=" * len(documentTitle)) + textParts.append("") # Process each section - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) for section in sections: - section_text = self._render_json_section(section) - if section_text: - text_parts.append(section_text) - text_parts.append("") # Add spacing between sections + sectionText = self._renderJsonSection(section) + if sectionText: + textParts.append(sectionText) + textParts.append("") # Add spacing between sections # Add generation info - text_parts.append("") - text_parts.append(f"Generated: {self._format_timestamp()}") + textParts.append("") + textParts.append(f"Generated: {self._formatTimestamp()}") - return '\n'.join(text_parts) + return '\n'.join(textParts) except Exception as e: self.logger.error(f"Error generating text from JSON: {str(e)}") raise Exception(f"Text generation failed: {str(e)}") - def _render_json_section(self, section: Dict[str, Any]) -> str: + def _renderJsonSection(self, section: Dict[str, Any]) -> str: """Render a single JSON section to text.""" try: - section_type = self._get_section_type(section) - section_data = self._get_section_data(section) + sectionType = self._getSectionType(section) + sectionData = self._getSectionData(section) - if section_type == "table": + if sectionType == "table": # Process the section data to extract table structure - processed_data = self._process_section_by_type(section) - return self._render_json_table(processed_data) - elif section_type == "bullet_list": + processedData = self._processSectionByType(section) + return self._renderJsonTable(processedData) + elif sectionType == "bullet_list": # Process the section data to extract bullet list structure - processed_data = self._process_section_by_type(section) - return self._render_json_bullet_list(processed_data) - elif section_type == "heading": + processedData = self._processSectionByType(section) + return self._renderJsonBulletList(processedData) + elif sectionType == "heading": # Render each heading element in the elements array - # section_data is already the elements array from _get_section_data - rendered_elements = [] - for element in section_data: - rendered_elements.append(self._render_json_heading(element)) - return "\n".join(rendered_elements) - elif section_type == "paragraph": + # sectionData is already the elements array from _getSectionData + renderedElements = [] + for element in sectionData: + renderedElements.append(self._renderJsonHeading(element)) + return "\n".join(renderedElements) + elif sectionType == "paragraph": # Render each paragraph element in the elements array - # section_data is already the elements array from _get_section_data - rendered_elements = [] - for element in section_data: - rendered_elements.append(self._render_json_paragraph(element)) - return "\n".join(rendered_elements) - elif section_type == "code_block": + # sectionData is already the elements array from _getSectionData + renderedElements = [] + for element in sectionData: + renderedElements.append(self._renderJsonParagraph(element)) + return "\n".join(renderedElements) + elif sectionType == "code_block": # Process the section data to extract code block structure - processed_data = self._process_section_by_type(section) - return self._render_json_code_block(processed_data) - elif section_type == "image": + processedData = self._processSectionByType(section) + return self._renderJsonCodeBlock(processedData) + elif sectionType == "image": # Process the section data to extract image structure - processed_data = self._process_section_by_type(section) - return self._render_json_image(processed_data) + processedData = self._processSectionByType(section) + return self._renderJsonImage(processedData) else: # Fallback to paragraph for unknown types - render each element - # section_data is already the elements array from _get_section_data - rendered_elements = [] - for element in section_data: - rendered_elements.append(self._render_json_paragraph(element)) - return "\n".join(rendered_elements) + # sectionData is already the elements array from _getSectionData + renderedElements = [] + for element in sectionData: + renderedElements.append(self._renderJsonParagraph(element)) + return "\n".join(renderedElements) except Exception as e: - self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") + self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}") return f"[Error rendering section: {str(e)}]" - def _render_json_table(self, table_data: Dict[str, Any]) -> str: + def _renderJsonTable(self, tableData: Dict[str, Any]) -> str: """Render a JSON table to text.""" try: - headers = table_data.get("headers", []) - rows = table_data.get("rows", []) + headers = tableData.get("headers", []) + rows = tableData.get("rows", []) if not headers or not rows: return "" - text_parts = [] + textParts = [] # Create table header - header_line = " | ".join(str(header) for header in headers) - text_parts.append(header_line) + headerLine = " | ".join(str(header) for header in headers) + textParts.append(headerLine) # Add separator line - separator_line = " | ".join("-" * len(str(header)) for header in headers) - text_parts.append(separator_line) + separatorLine = " | ".join("-" * len(str(header)) for header in headers) + textParts.append(separatorLine) # Add data rows for row in rows: - row_line = " | ".join(str(cell_data) for cell_data in row) - text_parts.append(row_line) + rowLine = " | ".join(str(cellData) for cellData in row) + textParts.append(rowLine) - return '\n'.join(text_parts) + return '\n'.join(textParts) except Exception as e: self.logger.warning(f"Error rendering table: {str(e)}") return "" - def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str: + def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str: """Render a JSON bullet list to text.""" try: - items = list_data.get("items", []) + items = listData.get("items", []) if not items: return "" - text_parts = [] + textParts = [] for item in items: if isinstance(item, str): - text_parts.append(f"- {item}") + textParts.append(f"- {item}") elif isinstance(item, dict) and "text" in item: - text_parts.append(f"- {item['text']}") + textParts.append(f"- {item['text']}") - return '\n'.join(text_parts) + return '\n'.join(textParts) except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") return "" - def _render_json_heading(self, heading_data: Dict[str, Any]) -> str: + def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str: """Render a JSON heading to text.""" try: - level = heading_data.get("level", 1) - text = heading_data.get("text", "") + level = headingData.get("level", 1) + text = headingData.get("text", "") if text: level = max(1, min(6, level)) @@ -217,21 +217,21 @@ class RendererText(BaseRenderer): self.logger.warning(f"Error rendering heading: {str(e)}") return "" - def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str: + def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str: """Render a JSON paragraph to text.""" try: - text = paragraph_data.get("text", "") + text = paragraphData.get("text", "") return text if text else "" except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") return "" - def _render_json_code_block(self, code_data: Dict[str, Any]) -> str: + def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str: """Render a JSON code block to text.""" try: - code = code_data.get("code", "") - language = code_data.get("language", "") + code = codeData.get("code", "") + language = codeData.get("language", "") if code: if language: @@ -245,12 +245,12 @@ class RendererText(BaseRenderer): self.logger.warning(f"Error rendering code block: {str(e)}") return "" - def _render_json_image(self, image_data: Dict[str, Any]) -> str: + def _renderJsonImage(self, imageData: Dict[str, Any]) -> str: """Render a JSON image to text.""" try: - alt_text = image_data.get("altText", "Image") - return f"[Image: {alt_text}]" + altText = imageData.get("altText", "Image") + return f"[Image: {altText}]" except Exception as e: self.logger.warning(f"Error rendering image: {str(e)}") - return f"[Image: {image_data.get('altText', 'Image')}]" + return f"[Image: {imageData.get('altText', 'Image')}]" diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index 19b36a52..49bd2749 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -21,41 +21,41 @@ class RendererXlsx(BaseRenderer): """Renders content to Excel format using openpyxl.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported Excel formats.""" return ['xlsx', 'xls', 'excel'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['spreadsheet', 'workbook'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for Excel renderer.""" return 110 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to Excel format using AI-analyzed styling.""" try: if not OPENPYXL_AVAILABLE: # Fallback to CSV if openpyxl not available from .rendererCsv import RendererCsv - csv_renderer = RendererCsv() - csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service) - return csv_content, "text/csv" + csvRenderer = RendererCsv() + csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService) + return csvContent, "text/csv" # Generate Excel using AI-analyzed styling - excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service) + excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService) - return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" except Exception as e: self.logger.error(f"Error rendering Excel: {str(e)}") # Return CSV fallback return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv" - def _generate_excel(self, content: str, title: str) -> str: + def _generateExcel(self, content: str, title: str) -> str: """Generate Excel content using openpyxl.""" try: # Create workbook @@ -65,14 +65,14 @@ class RendererXlsx(BaseRenderer): wb.remove(wb.active) # Create sheets - summary_sheet = wb.create_sheet("Summary", 0) - data_sheet = wb.create_sheet("Data", 1) - analysis_sheet = wb.create_sheet("Analysis", 2) + summarySheet = wb.create_sheet("Summary", 0) + dataSheet = wb.create_sheet("Data", 1) + analysisSheet = wb.create_sheet("Analysis", 2) # Add content to sheets - self._populate_summary_sheet(summary_sheet, title) - self._populate_data_sheet(data_sheet, content) - self._populate_analysis_sheet(analysis_sheet, content) + self._populateSummarySheet(summarySheet, title) + self._populateDataSheet(dataSheet, content) + self._populateAnalysisSheet(analysisSheet, content) # Save to buffer buffer = io.BytesIO() @@ -80,16 +80,16 @@ class RendererXlsx(BaseRenderer): buffer.seek(0) # Convert to base64 - excel_bytes = buffer.getvalue() - excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') + excelBytes = buffer.getvalue() + excelBase64 = base64.b64encode(excelBytes).decode('utf-8') - return excel_base64 + return excelBase64 except Exception as e: self.logger.error(f"Error generating Excel: {str(e)}") raise - def _populate_summary_sheet(self, sheet, title: str): + def _populateSummarySheet(self, sheet, title: str): """Populate the summary sheet.""" try: # Title @@ -99,7 +99,7 @@ class RendererXlsx(BaseRenderer): # Generation info sheet['A3'] = "Generated:" - sheet['B3'] = self._format_timestamp() + sheet['B3'] = self._formatTimestamp() sheet['A4'] = "Status:" sheet['B4'] = "Generated Successfully" @@ -116,7 +116,7 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not populate summary sheet: {str(e)}") - def _populate_data_sheet(self, sheet, content: str): + def _populateDataSheet(self, sheet, content: str): """Populate the data sheet.""" try: # Headers @@ -138,8 +138,8 @@ class RendererXlsx(BaseRenderer): # Check for table data (lines with |) if '|' in line: cells = [cell.strip() for cell in line.split('|') if cell.strip()] - for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns - sheet.cell(row=row, column=col, value=cell_data) + for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns + sheet.cell(row=row, column=col, value=cellData) row += 1 else: # Regular content @@ -153,7 +153,7 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not populate data sheet: {str(e)}") - def _populate_analysis_sheet(self, sheet, content: str): + def _populateAnalysisSheet(self, sheet, content: str): """Populate the analysis sheet.""" try: # Title @@ -169,17 +169,17 @@ class RendererXlsx(BaseRenderer): row += 1 # Count different types of content - table_lines = sum(1 for line in lines if '|' in line) - list_lines = sum(1 for line in lines if line.startswith(('- ', '* '))) - text_lines = len(lines) - table_lines - list_lines + tableLines = sum(1 for line in lines if '|' in line) + listLines = sum(1 for line in lines if line.startswith(('- ', '* '))) + textLines = len(lines) - tableLines - listLines sheet[f'A{row}'] = f"Total Lines: {len(lines)}" row += 1 - sheet[f'A{row}'] = f"Table Rows: {table_lines}" + sheet[f'A{row}'] = f"Table Rows: {tableLines}" row += 1 - sheet[f'A{row}'] = f"List Items: {list_lines}" + sheet[f'A{row}'] = f"List Items: {listLines}" row += 1 - sheet[f'A{row}'] = f"Text Lines: {text_lines}" + sheet[f'A{row}'] = f"Text Lines: {textLines}" row += 2 # Recommendations @@ -198,35 +198,35 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not populate analysis sheet: {str(e)}") - async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate Excel content from structured JSON document using AI-generated styling.""" try: # Debug output - self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(json_content)}", "EXCEL_RENDERER") - self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}", "EXCEL_RENDERER") + self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER") + self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER") # Get AI-generated styling definitions - styles = await self._get_excel_styles(user_prompt, ai_service) + styles = await self._getExcelStyles(userPrompt, aiService) # Validate JSON structure - if not isinstance(json_content, dict): + if not isinstance(jsonContent, dict): raise ValueError("JSON content must be a dictionary") - if "sections" not in json_content: + if "sections" not in jsonContent: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + document_title = jsonContent.get("metadata", {}).get("title", title) # Create workbook wb = Workbook() # Create sheets based on content - sheets = self._create_excel_sheets(wb, json_content, styles) + sheets = self._createExcelSheets(wb, jsonContent, styles) self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER") # Populate sheets with content - self._populate_excel_sheets(sheets, json_content, styles) + self._populateExcelSheets(sheets, jsonContent, styles) # Save to buffer buffer = io.BytesIO() @@ -234,24 +234,24 @@ class RendererXlsx(BaseRenderer): buffer.seek(0) # Convert to base64 - excel_bytes = buffer.getvalue() - self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excel_bytes)}", "EXCEL_RENDERER") + excelBytes = buffer.getvalue() + self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excelBytes)}", "EXCEL_RENDERER") try: - excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') - self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excel_base64)}", "EXCEL_RENDERER") + excelBase64 = base64.b64encode(excelBytes).decode('utf-8') + self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excelBase64)}", "EXCEL_RENDERER") except Exception as b64_error: self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER") raise - return excel_base64 + return excelBase64 except Exception as e: self.logger.error(f"Error generating Excel from JSON: {str(e)}") raise Exception(f"Excel generation failed: {str(e)}") - async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: + async def _getExcelStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]: """Get Excel styling definitions using base template AI styling.""" - style_schema = { + styleSchema = { "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"}, "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"}, "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"}, @@ -261,26 +261,26 @@ class RendererXlsx(BaseRenderer): "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"} } - style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema) - # Use our own _get_ai_styles_with_excel_colors method to ensure proper color conversion - styles = await self._get_ai_styles_with_excel_colors(ai_service, style_template, self._get_default_excel_styles()) + styleTemplate = self._createAiStyleTemplate("xlsx", userPrompt, styleSchema) + # Use our own _getAiStylesWithExcelColors method to ensure proper color conversion + styles = await self._getAiStylesWithExcelColors(aiService, styleTemplate, self._getDefaultExcelStyles()) # Validate and fix contrast issues - return self._validate_excel_styles_contrast(styles) + return self._validateExcelStylesContrast(styles) - async def _get_ai_styles_with_excel_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: + async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]: """Get AI styles with proper Excel color conversion.""" - if not ai_service: - return default_styles + if not aiService: + return defaultStyles try: from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum - request_options = AiCallOptions() - request_options.operationType = OperationTypeEnum.DATA_GENERATE + requestOptions = AiCallOptions() + requestOptions.operationType = OperationTypeEnum.DATA_GENERATE - request = AiCallRequest(prompt=style_template, context="", options=request_options) - response = await ai_service.aiObjects.call(request) + request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions) + response = await aiService.aiObjects.call(request) import json import re @@ -291,7 +291,7 @@ class RendererXlsx(BaseRenderer): # Check if result is empty if not result: self.logger.warning("AI styling returned empty response, using defaults") - return default_styles + return defaultStyles # Extract JSON from markdown if present json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) @@ -312,46 +312,46 @@ class RendererXlsx(BaseRenderer): styles = json.loads(result) except json.JSONDecodeError as json_error: self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults") - return default_styles + return defaultStyles # Convert colors to Excel aRGB format - styles = self._convert_colors_format(styles) + styles = self._convertColorsFormat(styles) return styles except Exception as e: self.logger.warning(f"AI styling failed: {str(e)}, using defaults") - return default_styles + return defaultStyles - def _get_safe_color(self, color_value: str, default: str = "FF000000") -> str: + def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str: """Get a safe aRGB color value for Excel (without # prefix).""" - if not isinstance(color_value, str): + if not isinstance(colorValue, str): return default # Remove # prefix if present - if color_value.startswith('#'): - color_value = color_value[1:] + if colorValue.startswith('#'): + colorValue = colorValue[1:] - if len(color_value) == 6: + if len(colorValue) == 6: # Convert RRGGBB to AARRGGBB - return f"FF{color_value}" - elif len(color_value) == 8: + return f"FF{colorValue}" + elif len(colorValue) == 8: # Already aRGB format - return color_value + return colorValue else: # Unexpected format, return default return default - def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Convert hex colors to aRGB format for Excel compatibility.""" try: self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER") - for style_name, style_config in styles.items(): - if isinstance(style_config, dict): - for prop, value in style_config.items(): + for styleName, styleConfig in styles.items(): + if isinstance(styleConfig, dict): + for prop, value in styleConfig.items(): if isinstance(value, str) and value.startswith('#') and len(value) == 7: # Convert #RRGGBB to #AARRGGBB (add FF alpha channel) - styles[style_name][prop] = f"FF{value[1:]}" + styles[styleName][prop] = f"FF{value[1:]}" elif isinstance(value, str) and value.startswith('#') and len(value) == 9: pass # Already aRGB format elif isinstance(value, str) and value.startswith('#'): @@ -360,34 +360,34 @@ class RendererXlsx(BaseRenderer): except Exception as e: return styles - def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _validateExcelStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix contrast issues in AI-generated styles.""" try: # Fix table header contrast if "table_header" in styles: header = styles["table_header"] - bg_color = header.get("background", "#FFFFFF") - text_color = header.get("text_color", "#000000") + bgColor = header.get("background", "#FFFFFF") + textColor = header.get("text_color", "#000000") # If both are white or both are dark, fix it - if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": + if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF": header["background"] = "#4F4F4F" header["text_color"] = "#FFFFFF" - elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": + elif bgColor.upper() == "#000000" and textColor.upper() == "#000000": header["background"] = "#4F4F4F" header["text_color"] = "#FFFFFF" # Fix table cell contrast if "table_cell" in styles: cell = styles["table_cell"] - bg_color = cell.get("background", "#FFFFFF") - text_color = cell.get("text_color", "#000000") + bgColor = cell.get("background", "#FFFFFF") + textColor = cell.get("text_color", "#000000") # If both are white or both are dark, fix it - if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": + if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF": cell["background"] = "#FFFFFF" cell["text_color"] = "#2F2F2F" - elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": + elif bgColor.upper() == "#000000" and textColor.upper() == "#000000": cell["background"] = "#FFFFFF" cell["text_color"] = "#2F2F2F" @@ -395,9 +395,9 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Style validation failed: {str(e)}") - return self._get_default_excel_styles() + return self._getDefaultExcelStyles() - def _get_default_excel_styles(self) -> Dict[str, Any]: + def _getDefaultExcelStyles(self) -> Dict[str, Any]: """Default Excel styles with aRGB color format.""" return { "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"}, @@ -409,104 +409,104 @@ class RendererXlsx(BaseRenderer): "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"} } - def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: + def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: """Create Excel sheets based on content structure and user intent.""" sheets = {} # Get sheet names from AI styles or generate based on content - sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content)) - self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheet_names}", "EXCEL_RENDERER") + sheetNames = styles.get("sheet_names", self._generateSheetNamesFromContent(jsonContent)) + self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheetNames}", "EXCEL_RENDERER") # Create sheets - for i, sheet_name in enumerate(sheet_names): + for i, sheetName in enumerate(sheetNames): if i == 0: # Use the default sheet for the first sheet sheet = wb.active - sheet.title = sheet_name + sheet.title = sheetName else: # Create additional sheets - sheet = wb.create_sheet(sheet_name, i) - sheets[sheet_name.lower()] = sheet + sheet = wb.create_sheet(sheetName, i) + sheets[sheetName.lower()] = sheet return sheets - def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]: + def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]: """Generate sheet names based on actual content structure.""" - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) # If no sections, create a single sheet if not sections: return ["Content"] # Generate sheet names based on content structure - sheet_names = [] + sheetNames = [] # Check if we have multiple table sections - table_sections = [s for s in sections if s.get("content_type") == "table"] + tableSections = [s for s in sections if s.get("content_type") == "table"] - if len(table_sections) > 1: + if len(tableSections) > 1: # Create separate sheets for each table - for i, section in enumerate(table_sections, 1): - section_title = section.get("title", f"Table {i}") - sheet_names.append(section_title[:31]) # Excel sheet name limit + for i, section in enumerate(tableSections, 1): + sectionTitle = section.get("title", f"Table {i}") + sheetNames.append(sectionTitle[:31]) # Excel sheet name limit else: # Single table or mixed content - create main sheet - document_title = json_content.get("metadata", {}).get("title", "Document") - sheet_names.append(document_title[:31]) # Excel sheet name limit + documentTitle = jsonContent.get("metadata", {}).get("title", "Document") + sheetNames.append(documentTitle[:31]) # Excel sheet name limit # Add additional sheets for other content types - content_types = set() + contentTypes = set() for section in sections: - content_type = section.get("content_type", "paragraph") - content_types.add(content_type) + contentType = section.get("content_type", "paragraph") + contentTypes.add(contentType) - if "table" in content_types and len(table_sections) == 1: - sheet_names.append("Table Data") - if "list" in content_types: - sheet_names.append("Lists") - if "paragraph" in content_types or "heading" in content_types: - sheet_names.append("Text") + if "table" in contentTypes and len(tableSections) == 1: + sheetNames.append("Table Data") + if "list" in contentTypes: + sheetNames.append("Lists") + if "paragraph" in contentTypes or "heading" in contentTypes: + sheetNames.append("Text") # Limit to 4 sheets maximum - return sheet_names[:4] + return sheetNames[:4] - def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None: """Populate Excel sheets with content from JSON based on actual sheet names.""" try: # Get the actual sheet names that were created - sheet_names = list(sheets.keys()) + sheetNames = list(sheets.keys()) - if not sheet_names: + if not sheetNames: return - sections = json_content.get("sections", []) - table_sections = [s for s in sections if s.get("content_type") == "table"] + sections = jsonContent.get("sections", []) + tableSections = [s for s in sections if s.get("content_type") == "table"] - if len(table_sections) > 1: + if len(tableSections) > 1: # Multiple tables - populate each sheet with its corresponding table - for i, section in enumerate(table_sections): - if i < len(sheet_names): - sheet_name = sheet_names[i] - sheet = sheets[sheet_name] - self._populate_table_sheet(sheet, section, styles, f"Table {i+1}") + for i, section in enumerate(tableSections): + if i < len(sheetNames): + sheetName = sheetNames[i] + sheet = sheets[sheetName] + self._populateTableSheet(sheet, section, styles, f"Table {i+1}") else: # Single table or mixed content - use original logic - first_sheet_name = sheet_names[0] - self._populate_main_sheet(sheets[first_sheet_name], json_content, styles) + firstSheetName = sheetNames[0] + self._populateMainSheet(sheets[firstSheetName], jsonContent, styles) # If we have multiple sheets, distribute content by type - if len(sheet_names) > 1: - self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:]) + if len(sheetNames) > 1: + self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:]) except Exception as e: self.logger.warning(f"Could not populate Excel sheets: {str(e)}") - def _populate_table_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheet_title: str): + def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str): """Populate a sheet with a single table section.""" try: # Sheet title - sheet['A1'] = sheet_title - sheet['A1'].font = Font(size=16, bold=True, color=self._get_safe_color(styles.get("title", {}).get("color", "FF1F4E79"))) + sheet['A1'] = sheetTitle + sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(styles.get("title", {}).get("color", "FF1F4E79"))) sheet['A1'].alignment = Alignment(horizontal="center") # Get table data from elements (canonical JSON format) @@ -528,9 +528,9 @@ class RendererXlsx(BaseRenderer): for col, header in enumerate(headers, 1): cell = sheet.cell(row=3, column=col, value=header) if header_style.get("bold"): - cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000"))) + cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000"))) if header_style.get("background"): - cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid") + cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid") # Add rows cell_style = styles.get("table_cell", {}) @@ -538,7 +538,7 @@ class RendererXlsx(BaseRenderer): for col_idx, cell_value in enumerate(row_data, 1): cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value) if cell_style.get("text_color"): - cell.font = Font(color=self._get_safe_color(cell_style["text_color"])) + cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) # Auto-adjust column widths for col in range(1, len(headers) + 1): @@ -547,17 +547,17 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not populate table sheet: {str(e)}") - def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]): + def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]): """Populate the main sheet with document overview and all content.""" try: # Document title - document_title = json_content.get("metadata", {}).get("title", "Generated Report") - sheet['A1'] = document_title + documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report") + sheet['A1'] = documentTitle # Safety check for title style title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"}) try: - safe_color = self._get_safe_color(title_style["color"]) + safe_color = self._getSafeColor(title_style["color"]) sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color) sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) except Exception as font_error: @@ -567,12 +567,12 @@ class RendererXlsx(BaseRenderer): # Generation info sheet['A3'] = "Generated:" - sheet['B3'] = self._format_timestamp() + sheet['B3'] = self._formatTimestamp() sheet['A4'] = "Status:" sheet['B4'] = "Generated Successfully" # Document metadata - metadata = json_content.get("metadata", {}) + metadata = jsonContent.get("metadata", {}) if metadata: sheet['A6'] = "Document Information:" sheet['A6'].font = Font(bold=True) @@ -585,7 +585,7 @@ class RendererXlsx(BaseRenderer): row += 1 # Content overview - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) sheet[f'A{row + 1}'] = "Content Overview:" sheet[f'A{row + 1}'].font = Font(bold=True) @@ -605,7 +605,7 @@ class RendererXlsx(BaseRenderer): # Add all content to this sheet row += 2 for section in sections: - row = self._add_section_to_sheet(sheet, section, styles, row) + row = self._addSectionToSheet(sheet, section, styles, row) row += 1 # Empty row between sections # Auto-adjust column widths @@ -615,34 +615,34 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not populate main sheet: {str(e)}") - def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]): + def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]): """Populate additional sheets based on content types.""" try: - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) - for sheet_name in sheet_names: - if sheet_name not in sheets: + for sheetName in sheetNames: + if sheetName not in sheets: continue - sheet = sheets[sheet_name] - sheet_title = sheet_name.title() - sheet['A1'] = sheet_title + sheet = sheets[sheetName] + sheetTitle = sheetName.title() + sheet['A1'] = sheetTitle sheet['A1'].font = Font(size=16, bold=True) row = 3 # Filter sections by content type - if sheet_name == "tables": + if sheetName == "tables": filtered_sections = [s for s in sections if s.get("content_type") == "table"] - elif sheet_name == "lists": + elif sheetName == "lists": filtered_sections = [s for s in sections if s.get("content_type") == "list"] - elif sheet_name == "text": + elif sheetName == "text": filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]] else: filtered_sections = sections for section in filtered_sections: - row = self._add_section_to_sheet(sheet, section, styles, row) + row = self._addSectionToSheet(sheet, section, styles, row) row += 1 # Empty row between sections # Auto-adjust column widths @@ -652,15 +652,15 @@ class RendererXlsx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not populate content type sheets: {str(e)}") - def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: + def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a section to a sheet and return the next row.""" try: # Add section title section_title = section.get("title") if section_title: - sheet[f'A{start_row}'] = f"# {section_title}" - sheet[f'A{start_row}'].font = Font(bold=True) - start_row += 1 + sheet[f'A{startRow}'] = f"# {section_title}" + sheet[f'A{startRow}'].font = Font(bold=True) + startRow += 1 # Process section based on type section_type = section.get("content_type", "paragraph") @@ -669,23 +669,23 @@ class RendererXlsx(BaseRenderer): elements = section.get("elements", []) for element in elements: if section_type == "table": - start_row = self._add_table_to_excel(sheet, element, styles, start_row) + startRow = self._addTableToExcel(sheet, element, styles, startRow) elif section_type == "list": - start_row = self._add_list_to_excel(sheet, element, styles, start_row) + startRow = self._addListToExcel(sheet, element, styles, startRow) elif section_type == "paragraph": - start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row) + startRow = self._addParagraphToExcel(sheet, element, styles, startRow) elif section_type == "heading": - start_row = self._add_heading_to_excel(sheet, element, styles, start_row) + startRow = self._addHeadingToExcel(sheet, element, styles, startRow) else: - start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row) + startRow = self._addParagraphToExcel(sheet, element, styles, startRow) - return start_row + return startRow except Exception as e: self.logger.warning(f"Could not add section to sheet: {str(e)}") - return start_row + 1 + return startRow + 1 - def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: + def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a table element to Excel sheet.""" try: # In canonical JSON format, table elements have headers and rows directly @@ -693,99 +693,99 @@ class RendererXlsx(BaseRenderer): rows = element.get("rows", []) if not headers and not rows: - return start_row + return startRow # Add headers header_style = styles.get("table_header", {}) for col, header in enumerate(headers, 1): - cell = sheet.cell(row=start_row, column=col, value=header) + cell = sheet.cell(row=startRow, column=col, value=header) if header_style.get("bold"): - cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000"))) + cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000"))) if header_style.get("background"): - cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid") + cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid") - start_row += 1 + startRow += 1 # Add rows cell_style = styles.get("table_cell", {}) for row_data in rows: for col, cell_value in enumerate(row_data, 1): - cell = sheet.cell(row=start_row, column=col, value=cell_value) + cell = sheet.cell(row=startRow, column=col, value=cell_value) if cell_style.get("text_color"): - cell.font = Font(color=self._get_safe_color(cell_style["text_color"])) - start_row += 1 + cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) + startRow += 1 - return start_row + return startRow except Exception as e: self.logger.warning(f"Could not add table to Excel: {str(e)}") - return start_row + 1 + return startRow + 1 - def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: + def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a list element to Excel sheet.""" try: list_items = element.get("items", []) list_style = styles.get("bullet_list", {}) for item in list_items: - sheet.cell(row=start_row, column=1, value=f"• {item}") + sheet.cell(row=startRow, column=1, value=f"• {item}") if list_style.get("color"): - sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(list_style["color"])) - start_row += 1 + sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"])) + startRow += 1 - return start_row + return startRow except Exception as e: self.logger.warning(f"Could not add list to Excel: {str(e)}") - return start_row + 1 + return startRow + 1 - def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: + def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a paragraph element to Excel sheet.""" try: text = element.get("text", "") if text: - sheet.cell(row=start_row, column=1, value=text) + sheet.cell(row=startRow, column=1, value=text) paragraph_style = styles.get("paragraph", {}) if paragraph_style.get("color"): - sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(paragraph_style["color"])) + sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"])) - start_row += 1 + startRow += 1 - return start_row + return startRow except Exception as e: self.logger.warning(f"Could not add paragraph to Excel: {str(e)}") - return start_row + 1 + return startRow + 1 - def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: + def _addHeadingToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: """Add a heading element to Excel sheet.""" try: text = element.get("text", "") level = element.get("level", 1) if text: - sheet.cell(row=start_row, column=1, value=text) + sheet.cell(row=startRow, column=1, value=text) heading_style = styles.get("heading", {}) font_size = heading_style.get("font_size", 14) if level > 1: font_size = max(10, font_size - (level - 1) * 2) - sheet.cell(row=start_row, column=1).font = Font( + sheet.cell(row=startRow, column=1).font = Font( size=font_size, bold=True, - color=self._get_safe_color(heading_style.get("color", "FF000000")) + color=self._getSafeColor(heading_style.get("color", "FF000000")) ) - start_row += 1 + startRow += 1 - return start_row + return startRow except Exception as e: self.logger.warning(f"Could not add heading to Excel: {str(e)}") - return start_row + 1 + return startRow + 1 - def _format_timestamp(self) -> str: + def _formatTimestamp(self) -> str: """Format current timestamp for document generation.""" return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") diff --git a/modules/services/serviceGeneration/subJsonSchema.py b/modules/services/serviceGeneration/subJsonSchema.py index 72c722b1..07812bd2 100644 --- a/modules/services/serviceGeneration/subJsonSchema.py +++ b/modules/services/serviceGeneration/subJsonSchema.py @@ -1,25 +1,32 @@ """ -JSON Schema definitions for AI-generated document structures. -This module provides schemas that guide AI to generate structured JSON output. +JSON Schema definitions for AI-generated document structures (unified). +This module provides schemas that guide AI to generate structured JSON output +that matches the master template in modules.datamodels.datamodelJson. """ from typing import Dict, Any -def get_multi_document_subJsonSchema() -> Dict[str, Any]: - """Get the JSON schema for multi-document generation.""" +def getMultiDocumentSchema() -> Dict[str, Any]: + """Get the JSON schema for multi-document generation (unified).""" return { "type": "object", "required": ["metadata", "documents"], "properties": { "metadata": { "type": "object", - "required": ["title", "split_strategy"], + "required": ["split_strategy"], "properties": { - "title": {"type": "string", "description": "Document title"}, "split_strategy": { "type": "string", - "enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"], + "enum": [ + "single_document", + "per_entity", + "by_section", + "by_criteria", + "by_data_type", + "custom" + ], "description": "Strategy for splitting content into multiple files" }, "splitCriteria": { @@ -30,7 +37,6 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]: "type": "string", "description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')" }, - "author": {"type": "string", "description": "Document author (optional)"}, "source_documents": { "type": "array", "items": {"type": "string"}, @@ -38,7 +44,7 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]: }, "extraction_method": { "type": "string", - "default": "ai_extraction", + "default": "ai_generation", "description": "Method used for extraction" } } @@ -64,7 +70,15 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]: "title": {"type": "string", "description": "Section title (optional)"}, "content_type": { "type": "string", - "enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"], + "enum": [ + "table", + "bullet_list", + "paragraph", + "heading", + "code_block", + "image", + "mixed" + ], "description": "Primary content type of this section" }, "elements": { @@ -76,7 +90,8 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]: {"$ref": "#/definitions/bullet_list"}, {"$ref": "#/definitions/paragraph"}, {"$ref": "#/definitions/heading"}, - {"$ref": "#/definitions/code_block"} + {"$ref": "#/definitions/code_block"}, + {"$ref": "#/definitions/image"} ] } }, @@ -191,11 +206,20 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]: "code": {"type": "string", "description": "Code content"}, "language": {"type": "string", "description": "Programming language (optional)"} } + }, + "image": { + "type": "object", + "required": ["url"], + "properties": { + "url": {"type": "string", "description": "Image URL or data URI"}, + "caption": {"type": "string", "description": "Image caption (optional)"}, + "alt": {"type": "string", "description": "Alt text (optional)"} + } } } } -def get_document_subJsonSchema() -> Dict[str, Any]: +def getDocumentSchema() -> Dict[str, Any]: """Get the JSON schema for structured document generation (single document).""" return { "type": "object", @@ -206,7 +230,6 @@ def get_document_subJsonSchema() -> Dict[str, Any]: "required": ["title"], "properties": { "title": {"type": "string", "description": "Document title"}, - "author": {"type": "string", "description": "Document author (optional)"}, "source_documents": { "type": "array", "items": {"type": "string"}, @@ -214,7 +237,7 @@ def get_document_subJsonSchema() -> Dict[str, Any]: }, "extraction_method": { "type": "string", - "default": "ai_extraction", + "default": "ai_generation", "description": "Method used for extraction" } } @@ -230,7 +253,15 @@ def get_document_subJsonSchema() -> Dict[str, Any]: "title": {"type": "string", "description": "Section title (optional)"}, "content_type": { "type": "string", - "enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"], + "enum": [ + "table", + "bullet_list", + "paragraph", + "heading", + "code_block", + "image", + "mixed" + ], "description": "Primary content type of this section" }, "elements": { @@ -242,7 +273,8 @@ def get_document_subJsonSchema() -> Dict[str, Any]: {"$ref": "#/definitions/bullet_list"}, {"$ref": "#/definitions/paragraph"}, {"$ref": "#/definitions/heading"}, - {"$ref": "#/definitions/code_block"} + {"$ref": "#/definitions/code_block"}, + {"$ref": "#/definitions/image"} ] } }, @@ -359,12 +391,21 @@ def get_document_subJsonSchema() -> Dict[str, Any]: "code": {"type": "string", "description": "Code content"}, "language": {"type": "string", "description": "Programming language (optional)"} } + }, + "image": { + "type": "object", + "required": ["url"], + "properties": { + "url": {"type": "string", "description": "Image URL or data URI"}, + "caption": {"type": "string", "description": "Image caption (optional)"}, + "alt": {"type": "string", "description": "Alt text (optional)"} + } } } } -def get_extraction_prompt_template() -> str: +def getExtractionPromptTemplate() -> str: """Get the template for AI extraction prompts that request JSON output.""" return """ You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document. @@ -390,7 +431,7 @@ Return only the JSON structure following the schema. Do not include any text bef """ -def get_generation_prompt_template() -> str: +def getGenerationPromptTemplate() -> str: """Get the template for AI generation prompts that work with JSON input.""" return """ You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content. @@ -416,31 +457,31 @@ Return only the enhanced JSON structure following the schema. Do not include any """ -def get_adaptive_json_schema(prompt_analysis: Dict[str, Any] = None) -> Dict[str, Any]: +def getAdaptiveJsonSchema(promptAnalysis: Dict[str, Any] = None) -> Dict[str, Any]: """Automatically select appropriate schema based on prompt analysis.""" - if prompt_analysis and prompt_analysis.get("is_multi_file", False): - return get_multi_document_subJsonSchema() + if promptAnalysis and promptAnalysis.get("is_multi_file", False): + return getMultiDocumentSchema() else: - return get_document_subJsonSchema() + return getDocumentSchema() -def validate_json_document(json_data: Dict[str, Any]) -> bool: - """Validate that the JSON data follows the document schema.""" +def validateJsonDocument(jsonData: Dict[str, Any]) -> bool: + """Validate that the JSON data follows the unified document schema.""" try: # Basic validation - check required fields - if not isinstance(json_data, dict): + if not isinstance(jsonData, dict): return False # Check if it's multi-document or single-document structure - if "documents" in json_data: + if "documents" in jsonData: # Multi-document structure - if "metadata" not in json_data: + if "metadata" not in jsonData: return False - metadata = json_data["metadata"] - if not isinstance(metadata, dict) or "title" not in metadata or "split_strategy" not in metadata: + metadata = jsonData["metadata"] + if not isinstance(metadata, dict) or "split_strategy" not in metadata: return False - documents = json_data["documents"] + documents = jsonData["documents"] if not isinstance(documents, list): return False @@ -469,7 +510,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool: return False # Validate content_type - valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"] + valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"] if section["content_type"] not in valid_types: return False @@ -477,16 +518,16 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool: if not isinstance(section["elements"], list): return False - elif "sections" in json_data: + elif "sections" in jsonData: # Single-document structure (existing validation) - if "metadata" not in json_data: + if "metadata" not in jsonData: return False - metadata = json_data["metadata"] + metadata = jsonData["metadata"] if not isinstance(metadata, dict) or "title" not in metadata: return False - sections = json_data["sections"] + sections = jsonData["sections"] if not isinstance(sections, list): return False @@ -501,7 +542,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool: return False # Validate content_type - valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"] + valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"] if section["content_type"] not in valid_types: return False diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py index ae744664..055bf397 100644 --- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py +++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py @@ -5,83 +5,10 @@ This module builds prompts for generating documents from extracted content. import logging from typing import Dict, Any +from modules.datamodels.datamodelJson import jsonTemplateDocument logger = logging.getLogger(__name__) -# Centralized JSON structure template for document generation -# Includes examples for all content types so AI knows the structure patterns -TEMPLATE_JSON_DOCUMENT_GENERATION = """{ - "metadata": { - "split_strategy": "single_document", - "source_documents": [], - "extraction_method": "ai_generation" - }, - "documents": [ - { - "id": "doc_1", - "title": "{{DOCUMENT_TITLE}}", - "filename": "document.json", - "sections": [ - { - "id": "section_heading_example", - "content_type": "heading", - "elements": [ - {"level": 1, "text": "Heading Text"} - ], - "order": 0 - }, - { - "id": "section_paragraph_example", - "content_type": "paragraph", - "elements": [ - {"text": "Paragraph text content"} - ], - "order": 0 - }, - { - "id": "section_list_example", - "content_type": "list", - "elements": [ - { - "items": [ - {"text": "Item 1"}, - {"text": "Item 2"} - ], - "list_type": "numbered" - } - ], - "order": 0 - }, - { - "id": "section_table_example", - "content_type": "table", - "elements": [ - { - "headers": ["Column 1", "Column 2"], - "rows": [ - ["Row 1 Col 1", "Row 1 Col 2"], - ["Row 2 Col 1", "Row 2 Col 2"] - ], - "caption": "Table caption" - } - ], - "order": 0 - }, - { - "id": "section_code_example", - "content_type": "code", - "elements": [ - { - "code": "function example() { return true; }", - "language": "javascript" - } - ], - "order": 0 - } - ] - } - ] -}""" async def buildGenerationPrompt( @@ -106,99 +33,101 @@ async def buildGenerationPrompt( Complete generation prompt string """ # Create a template - let AI generate title if not provided - title_value = title if title else "Generated Document" - json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value) + titleValue = title if title else "Generated Document" + jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue) # Build prompt based on whether this is a continuation or first call # Check if we have valid continuation context with actual JSON fragment - has_continuation = ( + hasContinuation = ( continuationContext and continuationContext.get("section_count", 0) > 0 and continuationContext.get("last_raw_json", "") and continuationContext.get("last_raw_json", "").strip() != "{}" ) - if has_continuation: + if hasContinuation: # CONTINUATION PROMPT - user already received first part, continue from where it stopped - last_raw_json = continuationContext.get("last_raw_json", "") - last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"} - last_items_from_fragment = continuationContext.get("last_items_from_fragment", "") - total_items_count = continuationContext.get("total_items_count", 0) + lastRawJson = continuationContext.get("last_raw_json", "") + lastItemObject = continuationContext.get("last_item_object", "") # Full object like {"text": "value"} + lastItemsFromFragment = continuationContext.get("last_items_from_fragment", "") + totalItemsCount = continuationContext.get("total_items_count", 0) # Show the last few items to indicate where to continue (limit fragment size) # Extract just the ending portion of the JSON to show where it cut off - fragment_snippet = "" - if last_raw_json: + fragmentSnippet = "" + if lastRawJson: # Show last 1500 chars or the whole thing if shorter - just enough to show the cut point - fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json + fragmentSnippet = lastRawJson[-1500:] if len(lastRawJson) > 1500 else lastRawJson # Add ellipsis if truncated - if len(last_raw_json) > 1500: - fragment_snippet = "..." + fragment_snippet + if len(lastRawJson) > 1500: + fragmentSnippet = "..." + fragmentSnippet # Build clear continuation guidance - continuation_guidance = [] + continuationGuidance = [] - if total_items_count > 0: - continuation_guidance.append(f"You have already generated {total_items_count} items.") + if totalItemsCount > 0: + continuationGuidance.append(f"You have already generated {totalItemsCount} items.") # Show the last complete item object (full object format) - if last_item_object: - continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.") + if lastItemObject: + continuationGuidance.append(f"Last item in previous response: {lastItemObject}. Continue with the NEXT item after this.") - continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped." + continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped." - generation_prompt = f"""User request: "{userPrompt}" + generationPrompt = f"""User request: "{userPrompt}" The user already received part of the response. Continue generating the remaining content. -{continuation_text} +{continuationText} Previous response ended here (JSON was cut off at this point): ```json -{fragment_snippet if fragment_snippet else "(No fragment available)"} +{fragmentSnippet if fragmentSnippet else "(No fragment available)"} ``` JSON structure template: -{json_template} +{jsonTemplate} Instructions: -- Return full JSON structure (metadata + documents + sections) -- Continue from where it stopped - add NEW items only, do not repeat old items -- Use the element structures shown in the template -- Generate all remaining content needed to complete the user request -- Fill with actual content (no comments, no "Add more..." text, no placeholders) -- When fully complete, add "complete_response": true at root level -- Return only valid JSON (no comments, no markdown blocks) +- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes. +- Arrays must contain ONLY JSON values; do not include comments or ellipses. +- Use ONLY the element structures shown in the template. +- Continue from where it stopped — add NEW items only; do not repeat existing items. +- Generate all remaining content needed to complete the user request. +- Fill with actual content (no placeholders or instructional text such as "Add more..."). +- When fully complete, add "complete_response": true at root level. +- Output JSON only; no markdown fences or extra text before/after. Continue generating: """ else: # FIRST CALL - initial generation - generation_prompt = f"""User request: "{userPrompt}" + generationPrompt = f"""User request: "{userPrompt}" -Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning. +Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content. -JSON structure template (reference only - shows the pattern): -{json_template} +JSON structure template: +{jsonTemplate} Instructions: -- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning -- Do NOT continue from the template examples above - create your own sections -- Generate complete content based on the user request -- Use the element structures shown in the template (heading, paragraph, list, table, code) -- Create your own section IDs (do not use the example IDs like "section_heading_example") -- When fully complete, add "complete_response": true at root level -- Return only valid JSON (no comments, no markdown blocks, no text before/after) +- Start your response with {{"metadata": ...}} — return COMPLETE, STRICT JSON. +- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes. +- Arrays must contain ONLY JSON values; do not include comments or ellipses. +- Do NOT reuse the example section IDs from the template; create your own. +- Use ONLY the element structures shown in the template. +- Generate complete content based on the user request. +- When fully complete, add "complete_response": true at root level. +- Output JSON only; no markdown fences or any additional text. Generate your complete response starting from {{"metadata": ...}}: """ # If we have extracted content, prepend it to the prompt if extracted_content: - generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS: + generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS: {extracted_content} -{generation_prompt}""" +{generationPrompt}""" - return generation_prompt.strip() + return generationPrompt.strip() diff --git a/modules/services/serviceNeutralization/mainServiceNeutralization.py b/modules/services/serviceNeutralization/mainServiceNeutralization.py index c48939f6..c6aa717a 100644 --- a/modules/services/serviceNeutralization/mainServiceNeutralization.py +++ b/modules/services/serviceNeutralization/mainServiceNeutralization.py @@ -152,11 +152,11 @@ class NeutralizationService: try: # Auto-detect content type if not provided if textType is None: - textType = self.commonUtils.detect_content_type(text) + textType = self.commonUtils.detectContentType(text) # Check if content is binary data - if self.binaryProcessor.is_binary_content(text): - data, mapping, replaced_fields, processed_info = self.binaryProcessor.process_binary_content(text) + if self.binaryProcessor.isBinaryContent(text): + data, mapping, replaced_fields, processed_info = self.binaryProcessor.processBinaryContent(text) neutralized_text = text if isinstance(data, str) else str(data) attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()] return NeutralizationResult( @@ -169,13 +169,13 @@ class NeutralizationService: # Inline former _processData routing if textType in ['csv', 'json', 'xml']: if textType == 'csv': - data, mapping, replaced_fields, processed_info = self.listProcessor.process_csv_content(text) + data, mapping, replaced_fields, processed_info = self.listProcessor.processCsvContent(text) elif textType == 'json': - data, mapping, replaced_fields, processed_info = self.listProcessor.process_json_content(text) + data, mapping, replaced_fields, processed_info = self.listProcessor.processJsonContent(text) else: # xml - data, mapping, replaced_fields, processed_info = self.listProcessor.process_xml_content(text) + data, mapping, replaced_fields, processed_info = self.listProcessor.processXmlContent(text) else: - data, mapping, replaced_fields, processed_info = self.textProcessor.process_text_content(text) + data, mapping, replaced_fields, processed_info = self.textProcessor.processTextContent(text) # Stringify data consistently if textType == 'csv': try: diff --git a/modules/services/serviceNeutralization/subParseString.py b/modules/services/serviceNeutralization/subParseString.py index 5c92e110..85235da9 100644 --- a/modules/services/serviceNeutralization/subParseString.py +++ b/modules/services/serviceNeutralization/subParseString.py @@ -6,7 +6,7 @@ Handles pattern matching and replacement for emails, phones, addresses, IDs and import re import uuid from typing import Dict, List, Tuple, Any -from modules.services.serviceNeutralization.subPatterns import DataPatterns, find_patterns_in_text +from modules.services.serviceNeutralization.subPatterns import DataPatterns, findPatternsInText class StringParser: """Handles string parsing and replacement operations""" @@ -22,7 +22,7 @@ class StringParser: self.NamesToParse = NamesToParse or [] self.mapping = {} - def is_placeholder(self, text: str) -> bool: + def _isPlaceholder(self, text: str) -> bool: """ Check if text is already a placeholder in format [tag.uuid] @@ -34,7 +34,7 @@ class StringParser: """ return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text)) - def replace_pattern_matches(self, text: str) -> str: + def _replacePatternMatches(self, text: str) -> str: """ Replace pattern-based matches (emails, phones, etc.) in text @@ -44,37 +44,37 @@ class StringParser: Returns: str: Text with pattern matches replaced """ - pattern_matches = find_patterns_in_text(text, self.data_patterns) + patternMatches = findPatternsInText(text, self.data_patterns) # Process pattern matches from right to left to avoid position shifts - for pattern_name, matched_text, start, end in reversed(pattern_matches): + for patternName, matchedText, start, end in reversed(patternMatches): # Skip if already a placeholder - if self.is_placeholder(matched_text): + if self._isPlaceholder(matchedText): continue # Skip if contains placeholder characters - if '[' in matched_text or ']' in matched_text: + if '[' in matchedText or ']' in matchedText: continue - if matched_text not in self.mapping: + if matchedText not in self.mapping: # Generate a UUID for the placeholder - placeholder_id = str(uuid.uuid4()) + placeholderId = str(uuid.uuid4()) # Create placeholder in format [type.uuid] - type_mapping = { + typeMapping = { 'email': 'email', 'phone': 'phone', 'address': 'address', 'id': 'id' } - placeholder_type = type_mapping.get(pattern_name, 'data') - self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]" + placeholderType = typeMapping.get(patternName, 'data') + self.mapping[matchedText] = f"[{placeholderType}.{placeholderId}]" - replacement = self.mapping[matched_text] + replacement = self.mapping[matchedText] text = text[:start] + replacement + text[end:] return text - def replace_custom_names(self, text: str) -> str: + def _replaceCustomNames(self, text: str) -> str: """ Replace custom names from the user list in text @@ -96,19 +96,19 @@ class StringParser: # Replace each match with a placeholder for match in reversed(matches): # Process from right to left to avoid position shifts - matched_text = match.group() - if matched_text not in self.mapping: + matchedText = match.group() + if matchedText not in self.mapping: # Generate a UUID for the placeholder - placeholder_id = str(uuid.uuid4()) - self.mapping[matched_text] = f"[name.{placeholder_id}]" + placeholderId = str(uuid.uuid4()) + self.mapping[matchedText] = f"[name.{placeholderId}]" - replacement = self.mapping[matched_text] + replacement = self.mapping[matchedText] start, end = match.span() text = text[:start] + replacement + text[end:] return text - def process_string(self, text: str) -> str: + def processString(self, text: str) -> str: """ Process a string by replacing patterns first, then custom names @@ -118,18 +118,18 @@ class StringParser: Returns: str: Processed text with replacements """ - if self.is_placeholder(text): + if self._isPlaceholder(text): return text # Step 1: Replace pattern-based matches FIRST - text = self.replace_pattern_matches(text) + text = self._replacePatternMatches(text) # Step 2: Replace custom names SECOND - text = self.replace_custom_names(text) + text = self._replaceCustomNames(text) return text - def process_json_value(self, value: Any) -> Any: + def processJsonValue(self, value: Any) -> Any: """ Process a JSON value for anonymization @@ -140,15 +140,15 @@ class StringParser: Any: Processed value """ if isinstance(value, str): - return self.process_string(value) + return self.processString(value) elif isinstance(value, dict): - return {k: self.process_json_value(v) for k, v in value.items()} + return {k: self.processJsonValue(v) for k, v in value.items()} elif isinstance(value, list): - return [self.process_json_value(item) for item in value] + return [self.processJsonValue(item) for item in value] else: return value - def get_mapping(self) -> Dict[str, str]: + def getMapping(self) -> Dict[str, str]: """ Get the current mapping of original values to placeholders @@ -157,6 +157,6 @@ class StringParser: """ return self.mapping.copy() - def clear_mapping(self): + def clearMapping(self): """Clear the current mapping""" self.mapping.clear() diff --git a/modules/services/serviceNeutralization/subPatterns.py b/modules/services/serviceNeutralization/subPatterns.py index 175a690f..57334ef2 100644 --- a/modules/services/serviceNeutralization/subPatterns.py +++ b/modules/services/serviceNeutralization/subPatterns.py @@ -316,7 +316,7 @@ class TextTablePatterns: """Patterns for identifying table-like structures in text""" @staticmethod - def get_patterns() -> List[Tuple[str, str]]: + def getPatterns() -> List[Tuple[str, str]]: return [ # key: value pattern (with optional whitespace) (r'^([^:]+):\s*(.+)$', ':'), @@ -329,15 +329,15 @@ class TextTablePatterns: ] @staticmethod - def is_table_line(line: str) -> bool: + def _isTableLine(line: str) -> bool: """Check if a line matches any table pattern""" - patterns = TextTablePatterns.get_patterns() + patterns = TextTablePatterns.getPatterns() return any(re.match(pattern[0], line.strip()) for pattern in patterns) @staticmethod - def extract_key_value(line: str) -> Optional[Tuple[str, str]]: + def extractKeyValue(line: str) -> Optional[Tuple[str, str]]: """Extract key and value from a table line""" - patterns = TextTablePatterns.get_patterns() + patterns = TextTablePatterns.getPatterns() for pattern, separator in patterns: match = re.match(pattern, line.strip()) if match: @@ -346,7 +346,7 @@ class TextTablePatterns: return key, value return None -def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pattern]: +def getPatternForHeader(header: str, patterns: List[Pattern]) -> Optional[Pattern]: """ Find matching pattern for a header @@ -368,7 +368,7 @@ def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pat return pattern return None -def find_patterns_in_text(text: str, patterns: List[Pattern]) -> List[tuple]: +def findPatternsInText(text: str, patterns: List[Pattern]) -> List[tuple]: """ Find all pattern matches in text diff --git a/modules/services/serviceNeutralization/subProcessBinary.py b/modules/services/serviceNeutralization/subProcessBinary.py index 67c73bc1..3b61b635 100644 --- a/modules/services/serviceNeutralization/subProcessBinary.py +++ b/modules/services/serviceNeutralization/subProcessBinary.py @@ -27,7 +27,7 @@ class BinaryProcessor: 'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'] } - def detect_binary_type(self, content: str) -> str: + def _detectBinaryType(self, content: str) -> str: """ Detect if content is binary data and determine type @@ -54,7 +54,7 @@ class BinaryProcessor: return 'text' - def is_binary_content(self, content: str) -> bool: + def isBinaryContent(self, content: str) -> bool: """ Check if content is binary data @@ -64,9 +64,9 @@ class BinaryProcessor: Returns: bool: True if content is binary """ - return self.detect_binary_type(content) == 'binary' + return self._detectBinaryType(content) == 'binary' - def process_binary_content(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]: + def processBinaryContent(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]: """ Process binary content for anonymization @@ -83,15 +83,15 @@ class BinaryProcessor: # 3. Handling metadata and embedded content # 4. Preserving binary integrity while removing sensitive data - processed_info = { + processedInfo = { 'type': 'binary', 'status': 'not_implemented', 'message': 'Binary data neutralization not yet implemented' } - return content, {}, [], processed_info + return content, {}, [], processedInfo - def get_supported_types(self) -> Dict[str, list]: + def getSupportedTypes(self) -> Dict[str, list]: """ Get list of supported binary file types diff --git a/modules/services/serviceNeutralization/subProcessCommon.py b/modules/services/serviceNeutralization/subProcessCommon.py index de3fa290..01e6cb25 100644 --- a/modules/services/serviceNeutralization/subProcessCommon.py +++ b/modules/services/serviceNeutralization/subProcessCommon.py @@ -33,7 +33,7 @@ class CommonUtils: """Common utility functions for data processing""" @staticmethod - def normalize_whitespace(text: str) -> str: + def normalizeWhitespace(text: str) -> str: """ Normalize whitespace in text @@ -48,7 +48,7 @@ class CommonUtils: return text.strip() @staticmethod - def is_table_line(line: str) -> bool: + def _isTableLine(line: str) -> bool: """ Check if a line represents a table row @@ -62,7 +62,7 @@ class CommonUtils: re.match(r'^\s*[^\t]+\t[^\t]+$', line)) @staticmethod - def detect_content_type(content: str) -> str: + def detectContentType(content: str) -> str: """ Detect the type of content based on its structure @@ -98,7 +98,7 @@ class CommonUtils: return 'text' @staticmethod - def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]: + def mergeMappings(*mappings: Dict[str, str]) -> Dict[str, str]: """ Merge multiple mapping dictionaries @@ -114,21 +114,21 @@ class CommonUtils: return merged @staticmethod - def create_placeholder(placeholder_type: str, placeholder_id: str) -> str: + def createPlaceholder(placeholderType: str, placeholderId: str) -> str: """ Create a placeholder string in the format [type.uuid] Args: - placeholder_type: Type of placeholder (email, phone, name, etc.) - placeholder_id: Unique identifier for the placeholder + placeholderType: Type of placeholder (email, phone, name, etc.) + placeholderId: Unique identifier for the placeholder Returns: str: Formatted placeholder string """ - return f"[{placeholder_type}.{placeholder_id}]" + return f"[{placeholderType}.{placeholderId}]" @staticmethod - def validate_placeholder(placeholder: str) -> bool: + def validatePlaceholder(placeholder: str) -> bool: """ Validate if a string is a valid placeholder @@ -141,7 +141,7 @@ class CommonUtils: return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder)) @staticmethod - def extract_placeholder_info(placeholder: str) -> Optional[tuple]: + def extractPlaceholderInfo(placeholder: str) -> Optional[tuple]: """ Extract type and ID from a placeholder diff --git a/modules/services/serviceNeutralization/subProcessList.py b/modules/services/serviceNeutralization/subProcessList.py index 3d5e7900..1d39ab9e 100644 --- a/modules/services/serviceNeutralization/subProcessList.py +++ b/modules/services/serviceNeutralization/subProcessList.py @@ -10,7 +10,7 @@ from typing import Dict, List, Any, Union from dataclasses import dataclass from io import StringIO from modules.services.serviceNeutralization.subParseString import StringParser -from modules.services.serviceNeutralization.subPatterns import get_pattern_for_header, HeaderPatterns +from modules.services.serviceNeutralization.subPatterns import getPatternForHeader, HeaderPatterns @dataclass class TableData: @@ -32,7 +32,7 @@ class ListProcessor: self.string_parser = StringParser(NamesToParse) self.header_patterns = HeaderPatterns.patterns - def anonymize_table(self, table: TableData) -> TableData: + def _anonymizeTable(self, table: TableData) -> TableData: """ Anonymize table data based on headers @@ -42,28 +42,28 @@ class ListProcessor: Returns: TableData: Anonymized table """ - anonymized_table = TableData( + anonymizedTable = TableData( headers=table.headers.copy(), rows=[row.copy() for row in table.rows], source_type=table.source_type ) - for i, header in enumerate(anonymized_table.headers): - pattern = get_pattern_for_header(header, self.header_patterns) + for i, header in enumerate(anonymizedTable.headers): + pattern = getPatternForHeader(header, self.header_patterns) if pattern: - for row in anonymized_table.rows: + for row in anonymizedTable.rows: if row[i] is not None: original = str(row[i]) if original not in self.string_parser.mapping: # Generate a UUID for the placeholder import uuid - placeholder_id = str(uuid.uuid4()) + placeholderId = str(uuid.uuid4()) self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1) row[i] = self.string_parser.mapping[original] - return anonymized_table + return anonymizedTable - def process_csv_content(self, content: str) -> tuple: + def processCsvContent(self, content: str) -> tuple: """ Process CSV content @@ -81,29 +81,29 @@ class ListProcessor: ) if not table.rows: - return None, self.string_parser.get_mapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0} + return None, self.string_parser.getMapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0} - anonymized_table = self.anonymize_table(table) + anonymizedTable = self._anonymizeTable(table) # Track replaced fields - replaced_fields = [] - for i, header in enumerate(anonymized_table.headers): - for orig_row, anon_row in zip(table.rows, anonymized_table.rows): - if anon_row[i] != orig_row[i]: - replaced_fields.append(header) + replacedFields = [] + for i, header in enumerate(anonymizedTable.headers): + for origRow, anonRow in zip(table.rows, anonymizedTable.rows): + if anonRow[i] != origRow[i]: + replacedFields.append(header) # Convert back to DataFrame - result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers) + result = pd.DataFrame(anonymizedTable.rows, columns=anonymizedTable.headers) - processed_info = { + processedInfo = { 'type': 'table', 'headers': table.headers, 'row_count': len(table.rows) } - return result, self.string_parser.get_mapping(), replaced_fields, processed_info + return result, self.string_parser.getMapping(), replacedFields, processedInfo - def process_json_content(self, content: str) -> tuple: + def processJsonContent(self, content: str) -> tuple: """ Process JSON content @@ -116,13 +116,13 @@ class ListProcessor: data = json.loads(content) # Process JSON recursively using string parser - result = self.string_parser.process_json_value(data) + result = self.string_parser.processJsonValue(data) - processed_info = {'type': 'json'} + processedInfo = {'type': 'json'} - return result, self.string_parser.get_mapping(), [], processed_info + return result, self.string_parser.getMapping(), [], processedInfo - def anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str: + def _anonymizeXmlElement(self, element: ET.Element, indent: str = '') -> str: """ Recursively process XML element and return formatted string @@ -134,69 +134,69 @@ class ListProcessor: Formatted XML string """ # Process attributes - processed_attrs = {} - for attr_name, attr_value in element.attrib.items(): + processedAttrs = {} + for attrName, attrValue in element.attrib.items(): # Check if attribute name matches any header patterns - pattern = get_pattern_for_header(attr_name, self.header_patterns) + pattern = getPatternForHeader(attrName, self.header_patterns) if pattern: - if attr_value not in self.string_parser.mapping: + if attrValue not in self.string_parser.mapping: # Generate a UUID for the placeholder import uuid - placeholder_id = str(uuid.uuid4()) + placeholderId = str(uuid.uuid4()) # Create placeholder in format [type.uuid] - type_mapping = { + typeMapping = { 'email': 'email', 'phone': 'phone', 'name': 'name', 'address': 'address', 'id': 'id' } - placeholder_type = type_mapping.get(pattern.name, 'data') - self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]" - processed_attrs[attr_name] = self.string_parser.mapping[attr_value] + placeholderType = typeMapping.get(pattern.name, 'data') + self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]" + processedAttrs[attrName] = self.string_parser.mapping[attrValue] else: # Check if attribute value matches any data patterns - from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns - matches = find_patterns_in_text(attr_value, DataPatterns.patterns) + from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns + matches = findPatternsInText(attrValue, DataPatterns.patterns) if matches: - pattern_name = matches[0][0] - pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None) + patternName = matches[0][0] + pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None) if pattern: - if attr_value not in self.string_parser.mapping: + if attrValue not in self.string_parser.mapping: # Generate a UUID for the placeholder import uuid - placeholder_id = str(uuid.uuid4()) + placeholderId = str(uuid.uuid4()) # Create placeholder in format [type.uuid] - type_mapping = { + typeMapping = { 'email': 'email', 'phone': 'phone', 'name': 'name', 'address': 'address', 'id': 'id' } - placeholder_type = type_mapping.get(pattern_name, 'data') - self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]" - processed_attrs[attr_name] = self.string_parser.mapping[attr_value] + placeholderType = typeMapping.get(patternName, 'data') + self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]" + processedAttrs[attrName] = self.string_parser.mapping[attrValue] else: - processed_attrs[attr_name] = attr_value + processedAttrs[attrName] = attrValue else: - processed_attrs[attr_name] = attr_value + processedAttrs[attrName] = attrValue - attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items()) + attrs = ' '.join(f'{k}="{v}"' for k, v in processedAttrs.items()) attrs = f' {attrs}' if attrs else '' # Process text content text = element.text.strip() if element.text and element.text.strip() else '' if text: # Skip if already a placeholder - if not self.string_parser.is_placeholder(text): + if not self.string_parser._isPlaceholder(text): # Check if text matches any patterns - from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns - pattern_matches = find_patterns_in_text(text, DataPatterns.patterns) + from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns + patternMatches = findPatternsInText(text, DataPatterns.patterns) - if pattern_matches: - pattern_name = pattern_matches[0][0] - pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None) + if patternMatches: + patternName = patternMatches[0][0] + pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None) if pattern: if text not in self.string_parser.mapping: # Generate a UUID for the placeholder @@ -210,8 +210,8 @@ class ListProcessor: 'address': 'address', 'id': 'id' } - placeholder_type = type_mapping.get(pattern_name, 'data') - self.string_parser.mapping[text] = f"[{placeholder_type}.{placeholder_id}]" + placeholderType = typeMapping.get(patternName, 'data') + self.string_parser.mapping[text] = f"[{placeholderType}.{placeholderId}]" text = self.string_parser.mapping[text] else: # Check if text matches any custom names from the user list @@ -230,8 +230,8 @@ class ListProcessor: # Process child elements children = [] for child in element: - child_str = self.anonymize_xml_element(child, indent + ' ') - children.append(child_str) + childStr = self._anonymizeXmlElement(child, indent + ' ') + children.append(childStr) # Build element string if not children and not text: @@ -246,7 +246,7 @@ class ListProcessor: result.append(f"{indent}") return '\n'.join(result) - def process_xml_content(self, content: str) -> tuple: + def processXmlContent(self, content: str) -> tuple: """ Process XML content @@ -259,21 +259,21 @@ class ListProcessor: root = ET.fromstring(content) # Process XML recursively with proper formatting - result = self.anonymize_xml_element(root) + result = self._anonymizeXmlElement(root) - processed_info = {'type': 'xml'} + processedInfo = {'type': 'xml'} - return result, self.string_parser.get_mapping(), [], processed_info + return result, self.string_parser.getMapping(), [], processedInfo - def get_mapping(self) -> Dict[str, str]: + def getMapping(self) -> Dict[str, str]: """ Get the current mapping of original values to placeholders Returns: Dict[str, str]: Mapping dictionary """ - return self.string_parser.get_mapping() + return self.string_parser.getMapping() - def clear_mapping(self): + def clearMapping(self): """Clear the current mapping""" - self.string_parser.clear_mapping() + self.string_parser.clearMapping() diff --git a/modules/services/serviceNeutralization/subProcessText.py b/modules/services/serviceNeutralization/subProcessText.py index 98d8cbde..2c183828 100644 --- a/modules/services/serviceNeutralization/subProcessText.py +++ b/modules/services/serviceNeutralization/subProcessText.py @@ -25,7 +25,7 @@ class TextProcessor: """ self.string_parser = StringParser(NamesToParse) - def extract_tables_from_text(self, content: str) -> tuple: + def _extractTablesFromText(self, content: str) -> tuple: """ Extract tables and plain text from content @@ -38,11 +38,11 @@ class TextProcessor: # For now, process the entire content as plain text # This can be extended later to detect table-like structures tables = [] - plain_texts = [PlainText(content=content, source_type='text_plain')] + plainTexts = [PlainText(content=content, source_type='text_plain')] - return tables, plain_texts + return tables, plainTexts - def anonymize_plain_text(self, text: PlainText) -> PlainText: + def _anonymizePlainText(self, text: PlainText) -> PlainText: """ Anonymize plain text content @@ -53,11 +53,11 @@ class TextProcessor: PlainText: Anonymized text """ # Use the string parser to process the content - anonymized_content = self.string_parser.process_string(text.content) + anonymizedContent = self.string_parser.processString(text.content) - return PlainText(content=anonymized_content, source_type=text.source_type) + return PlainText(content=anonymizedContent, source_type=text.source_type) - def process_text_content(self, content: str) -> tuple: + def processTextContent(self, content: str) -> tuple: """ Process text content and return anonymized data @@ -68,35 +68,35 @@ class TextProcessor: Tuple of (anonymized_content, mapping, replaced_fields, processed_info) """ # Extract tables and plain text sections - tables, plain_texts = self.extract_tables_from_text(content) + tables, plainTexts = self._extractTablesFromText(content) # Process plain text sections - anonymized_texts = [self.anonymize_plain_text(text) for text in plain_texts] + anonymizedTexts = [self._anonymizePlainText(text) for text in plainTexts] # Combine all processed content result = content - for text, anonymized_text in zip(plain_texts, anonymized_texts): - if text.content != anonymized_text.content: - result = result.replace(text.content, anonymized_text.content) + for text, anonymizedText in zip(plainTexts, anonymizedTexts): + if text.content != anonymizedText.content: + result = result.replace(text.content, anonymizedText.content) # Get processing information - processed_info = { + processedInfo = { 'type': 'text', 'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables] if tables else []) } - return result, self.string_parser.get_mapping(), [], processed_info + return result, self.string_parser.getMapping(), [], processedInfo - def get_mapping(self) -> Dict[str, str]: + def getMapping(self) -> Dict[str, str]: """ Get the current mapping of original values to placeholders Returns: Dict[str, str]: Mapping dictionary """ - return self.string_parser.get_mapping() + return self.string_parser.getMapping() - def clear_mapping(self): + def clearMapping(self): """Clear the current mapping""" - self.string_parser.clear_mapping() + self.string_parser.clearMapping() diff --git a/modules/services/serviceSharepoint/mainServiceSharepoint.py b/modules/services/serviceSharepoint/mainServiceSharepoint.py index ec05f881..481c6818 100644 --- a/modules/services/serviceSharepoint/mainServiceSharepoint.py +++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py @@ -20,8 +20,8 @@ class SharepointService: Use setAccessTokenFromConnection() method to configure the access token before making API calls. """ self.services = serviceCenter - self.access_token = None - self.base_url = "https://graph.microsoft.com/v1.0" + self.accessToken = None + self.baseUrl = "https://graph.microsoft.com/v1.0" def setAccessTokenFromConnection(self, userConnection) -> bool: """Set access token from UserConnection. @@ -52,21 +52,21 @@ class SharepointService: logger.error(f"Error setting access token: {str(e)}") return False - async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]: + async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]: """Make a Microsoft Graph API call with proper error handling.""" try: - if self.access_token is None: + if self.accessToken is None: logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.") return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."} headers = { - "Authorization": f"Bearer {self.access_token}", + "Authorization": f"Bearer {self.accessToken}", "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json" } # Remove leading slash from endpoint to avoid double slash - clean_endpoint = endpoint.lstrip('/') - url = f"{self.base_url}/{clean_endpoint}" + cleanEndpoint = endpoint.lstrip('/') + url = f"{self.baseUrl}/{cleanEndpoint}" logger.debug(f"Making Graph API call: {method} {url}") timeout = aiohttp.ClientTimeout(total=30) @@ -106,10 +106,10 @@ class SharepointService: logger.error(f"Error making Graph API call: {str(e)}") return {"error": f"Error making Graph API call: {str(e)}"} - async def discover_sites(self) -> List[Dict[str, Any]]: + async def discoverSites(self) -> List[Dict[str, Any]]: """Discover all SharePoint sites accessible to the user.""" try: - result = await self._make_graph_api_call("sites?search=*") + result = await self._makeGraphApiCall("sites?search=*") if "error" in result: logger.error(f"Error discovering SharePoint sites: {result['error']}") @@ -118,9 +118,9 @@ class SharepointService: sites = result.get("value", []) logger.info(f"Discovered {len(sites)} SharePoint sites") - processed_sites = [] + processedSites = [] for site in sites: - site_info = { + siteInfo = { "id": site.get("id"), "displayName": site.get("displayName"), "name": site.get("name"), @@ -129,24 +129,24 @@ class SharepointService: "createdDateTime": site.get("createdDateTime"), "lastModifiedDateTime": site.get("lastModifiedDateTime") } - processed_sites.append(site_info) - logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}") + processedSites.append(siteInfo) + logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}") - return processed_sites + return processedSites except Exception as e: logger.error(f"Error discovering SharePoint sites: {str(e)}") return [] - async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]: + async def findSiteByName(self, siteName: str) -> Optional[Dict[str, Any]]: """Find a specific SharePoint site by name using direct Graph API call.""" try: # Try to get the site directly by name using Graph API - endpoint = f"sites/{site_name}" - result = await self._make_graph_api_call(endpoint) + endpoint = f"sites/{siteName}" + result = await self._makeGraphApiCall(endpoint) if result and "error" not in result: - site_info = { + siteInfo = { "id": result.get("id"), "displayName": result.get("displayName"), "name": result.get("name"), @@ -155,15 +155,15 @@ class SharepointService: "createdDateTime": result.get("createdDateTime"), "lastModifiedDateTime": result.get("lastModifiedDateTime") } - logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}") - return site_info + logger.info(f"Found site directly: {siteInfo['displayName']} - {siteInfo['webUrl']}") + return siteInfo except Exception as e: - logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}") + logger.debug(f"Direct site lookup failed for '{siteName}': {str(e)}") # Fallback to discovery if direct lookup fails - logger.info(f"Direct lookup failed, trying discovery for site: {site_name}") - sites = await self.discover_sites() + logger.info(f"Direct lookup failed, trying discovery for site: {siteName}") + sites = await self.discoverSites() if not sites: logger.warning("No sites discovered") return None @@ -174,46 +174,46 @@ class SharepointService: # Try exact match first for site in sites: - if site.get("displayName", "").strip().lower() == site_name.strip().lower(): + if site.get("displayName", "").strip().lower() == siteName.strip().lower(): logger.info(f"Found exact match: {site.get('displayName')}") return site # Try partial match for site in sites: - if site_name.lower() in site.get("displayName", "").lower(): + if siteName.lower() in site.get("displayName", "").lower(): logger.info(f"Found partial match: {site.get('displayName')}") return site - logger.warning(f"No site found matching: {site_name}") + logger.warning(f"No site found matching: {siteName}") return None - async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]: + async def findSiteByWebUrl(self, webUrl: str) -> Optional[Dict[str, Any]]: """Find a SharePoint site using its web URL (useful for guest sites).""" try: # Use the web URL format: sites/{hostname}:/sites/{site-path} # Extract hostname and site path from the web URL - if not web_url.startswith("https://"): - web_url = f"https://{web_url}" + if not webUrl.startswith("https://"): + webUrl = f"https://{webUrl}" # Parse the URL to extract hostname and site path from urllib.parse import urlparse - parsed = urlparse(web_url) + parsed = urlparse(webUrl) hostname = parsed.hostname - path_parts = parsed.path.strip('/').split('/') + pathParts = parsed.path.strip('/').split('/') - if len(path_parts) >= 2 and path_parts[0] == 'sites': - site_path = '/'.join(path_parts[1:]) # Everything after 'sites/' + if len(pathParts) >= 2 and pathParts[0] == 'sites': + sitePath = '/'.join(pathParts[1:]) # Everything after 'sites/' else: - logger.error(f"Invalid SharePoint URL format: {web_url}") + logger.error(f"Invalid SharePoint URL format: {webUrl}") return None - endpoint = f"sites/{hostname}:/sites/{site_path}" + endpoint = f"sites/{hostname}:/sites/{sitePath}" logger.debug(f"Trying web URL format: {endpoint}") - result = await self._make_graph_api_call(endpoint) + result = await self._makeGraphApiCall(endpoint) if result and "error" not in result: - site_info = { + siteInfo = { "id": result.get("id"), "displayName": result.get("displayName"), "name": result.get("name"), @@ -222,33 +222,33 @@ class SharepointService: "createdDateTime": result.get("createdDateTime"), "lastModifiedDateTime": result.get("lastModifiedDateTime") } - logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") - return site_info + logger.info(f"Found site by web URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})") + return siteInfo else: - logger.warning(f"Site not found using web URL: {web_url}") + logger.warning(f"Site not found using web URL: {webUrl}") return None except Exception as e: logger.error(f"Error finding site by web URL: {str(e)}") return None - async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]: + async def findSiteByUrl(self, hostname: str, sitePath: str) -> Optional[Dict[str, Any]]: """Find a SharePoint site using the site URL format.""" try: # For guest sites, try different URL formats - url_formats = [ - f"sites/{hostname}:/sites/{site_path}", # Standard format - f"sites/{hostname}:/sites/{site_path}/", # With trailing slash - f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase - f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash + urlFormats = [ + f"sites/{hostname}:/sites/{sitePath}", # Standard format + f"sites/{hostname}:/sites/{sitePath}/", # With trailing slash + f"sites/{hostname}:/sites/{sitePath.lower()}", # Lowercase + f"sites/{hostname}:/sites/{sitePath.lower()}/", # Lowercase with slash ] - for endpoint in url_formats: + for endpoint in urlFormats: logger.debug(f"Trying URL format: {endpoint}") - result = await self._make_graph_api_call(endpoint) + result = await self._makeGraphApiCall(endpoint) if result and "error" not in result: - site_info = { + siteInfo = { "id": result.get("id"), "displayName": result.get("displayName"), "name": result.get("name"), @@ -257,29 +257,29 @@ class SharepointService: "createdDateTime": result.get("createdDateTime"), "lastModifiedDateTime": result.get("lastModifiedDateTime") } - logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") - return site_info + logger.info(f"Found site by URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})") + return siteInfo else: logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}") - logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}") + logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{sitePath}") return None except Exception as e: logger.error(f"Error finding site by URL: {str(e)}") return None - async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]: + async def getFolderByPath(self, siteId: str, folderPath: str) -> Optional[Dict[str, Any]]: """Get folder information by path within a site.""" try: # Clean the path - clean_path = folder_path.lstrip('/') - endpoint = f"sites/{site_id}/drive/root:/{clean_path}" + cleanPath = folderPath.lstrip('/') + endpoint = f"sites/{siteId}/drive/root:/{cleanPath}" - result = await self._make_graph_api_call(endpoint) + result = await self._makeGraphApiCall(endpoint) if "error" in result: - logger.warning(f"Folder not found at path {folder_path}: {result['error']}") + logger.warning(f"Folder not found at path {folderPath}: {result['error']}") return None return result @@ -288,43 +288,43 @@ class SharepointService: logger.error(f"Error getting folder by path: {str(e)}") return None - async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]: + async def uploadFile(self, siteId: str, folderPath: str, fileName: str, content: bytes) -> Dict[str, Any]: """Upload a file to SharePoint.""" try: # Clean the path - clean_path = folder_path.lstrip('/') - upload_path = f"{clean_path.rstrip('/')}/{file_name}" - endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content" + cleanPath = folderPath.lstrip('/') + uploadPath = f"{cleanPath.rstrip('/')}/{fileName}" + endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content" logger.info(f"Uploading file to: {endpoint}") - result = await self._make_graph_api_call(endpoint, method="PUT", data=content) + result = await self._makeGraphApiCall(endpoint, method="PUT", data=content) if "error" in result: logger.error(f"Upload failed: {result['error']}") return result - logger.info(f"File uploaded successfully: {file_name}") + logger.info(f"File uploaded successfully: {fileName}") return result except Exception as e: logger.error(f"Error uploading file: {str(e)}") return {"error": f"Error uploading file: {str(e)}"} - async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]: + async def downloadFile(self, siteId: str, fileId: str) -> Optional[bytes]: """Download a file from SharePoint.""" try: - if self.access_token is None: + if self.accessToken is None: logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.") return None - endpoint = f"sites/{site_id}/drive/items/{file_id}/content" + endpoint = f"sites/{siteId}/drive/items/{fileId}/content" - headers = {"Authorization": f"Bearer {self.access_token}"} + headers = {"Authorization": f"Bearer {self.accessToken}"} timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: - async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response: + async with session.get(f"{self.baseUrl}/{endpoint}", headers=headers) as response: if response.status == 200: return await response.read() else: @@ -335,32 +335,32 @@ class SharepointService: logger.error(f"Error downloading file: {str(e)}") return None - async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]: + async def listFolderContents(self, siteId: str, folderPath: str = "") -> List[Dict[str, Any]]: """List contents of a folder.""" try: - if not folder_path or folder_path == "/": - endpoint = f"sites/{site_id}/drive/root/children" + if not folderPath or folderPath == "/": + endpoint = f"sites/{siteId}/drive/root/children" else: - clean_path = folder_path.lstrip('/') - endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children" + cleanPath = folderPath.lstrip('/') + endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children" - result = await self._make_graph_api_call(endpoint) + result = await self._makeGraphApiCall(endpoint) if "error" in result: logger.warning(f"Failed to list folder contents: {result['error']}") return None items = result.get("value", []) - processed_items = [] + processedItems = [] for item in items: # Determine if it's a folder or file - is_folder = 'folder' in item + isFolder = 'folder' in item - item_info = { + itemInfo = { "id": item.get("id"), "name": item.get("name"), - "type": "folder" if is_folder else "file", + "type": "folder" if isFolder else "file", "size": item.get("size", 0), "createdDateTime": item.get("createdDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"), @@ -368,42 +368,42 @@ class SharepointService: } if "file" in item: - item_info["mimeType"] = item["file"].get("mimeType") - item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") + itemInfo["mimeType"] = item["file"].get("mimeType") + itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") if "folder" in item: - item_info["childCount"] = item["folder"].get("childCount", 0) + itemInfo["childCount"] = item["folder"].get("childCount", 0) - processed_items.append(item_info) + processedItems.append(itemInfo) - return processed_items + return processedItems except Exception as e: logger.error(f"Error listing folder contents: {str(e)}") return [] - async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]: + async def searchFiles(self, siteId: str, query: str) -> List[Dict[str, Any]]: """Search for files in a site.""" try: - search_query = query.replace("'", "''") # Escape single quotes for OData - endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')" + searchQuery = query.replace("'", "''") # Escape single quotes for OData + endpoint = f"sites/{siteId}/drive/root/search(q='{searchQuery}')" - result = await self._make_graph_api_call(endpoint) + result = await self._makeGraphApiCall(endpoint) if "error" in result: logger.warning(f"Search failed: {result['error']}") return [] items = result.get("value", []) - processed_items = [] + processedItems = [] for item in items: - is_folder = 'folder' in item + isFolder = 'folder' in item - item_info = { + itemInfo = { "id": item.get("id"), "name": item.get("name"), - "type": "folder" if is_folder else "file", + "type": "folder" if isFolder else "file", "size": item.get("size", 0), "createdDateTime": item.get("createdDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"), @@ -412,64 +412,64 @@ class SharepointService: } if "file" in item: - item_info["mimeType"] = item["file"].get("mimeType") - item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") + itemInfo["mimeType"] = item["file"].get("mimeType") + itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") - processed_items.append(item_info) + processedItems.append(itemInfo) - return processed_items + return processedItems except Exception as e: logger.error(f"Error searching files: {str(e)}") return [] - async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None: + async def copyFileAsync(self, siteId: str, sourceFolder: str, sourceFile: str, destFolder: str, destFile: str) -> None: """Copy a file from source to destination folder (like original synchronizer).""" try: # First, download the source file - source_path = f"{source_folder}/{source_file}" - file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path) + sourcePath = f"{sourceFolder}/{sourceFile}" + fileContent = await self.downloadFileByPath(siteId=siteId, filePath=sourcePath) - if not file_content: - raise Exception(f"Failed to download source file: {source_path}") + if not fileContent: + raise Exception(f"Failed to download source file: {sourcePath}") # Upload to destination - await self.upload_file( - site_id=site_id, - folder_path=dest_folder, - file_name=dest_file, - content=file_content + await self.uploadFile( + siteId=siteId, + folderPath=destFolder, + fileName=destFile, + content=fileContent ) - logger.info(f"File copied: {source_file} -> {dest_file}") + logger.info(f"File copied: {sourceFile} -> {destFile}") except Exception as e: # Provide more specific error information - error_msg = str(e) - if "itemNotFound" in error_msg or "404" in error_msg: - raise Exception(f"Source file not found (404): {source_path} - {error_msg}") + errorMsg = str(e) + if "itemNotFound" in errorMsg or "404" in errorMsg: + raise Exception(f"Source file not found (404): {sourcePath} - {errorMsg}") else: - raise Exception(f"Error copying file: {error_msg}") + raise Exception(f"Error copying file: {errorMsg}") - async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]: + async def downloadFileByPath(self, siteId: str, filePath: str) -> Optional[bytes]: """Download a file by its path within a site.""" try: - if self.access_token is None: + if self.accessToken is None: logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.") return None # Clean the path - clean_path = file_path.strip('/') - endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content" + cleanPath = filePath.strip('/') + endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/content" # Use direct HTTP call for file downloads (binary content) headers = { - "Authorization": f"Bearer {self.access_token}", + "Authorization": f"Bearer {self.accessToken}", } # Remove leading slash from endpoint to avoid double slash - clean_endpoint = endpoint.lstrip('/') - url = f"{self.base_url}/{clean_endpoint}" + cleanEndpoint = endpoint.lstrip('/') + url = f"{self.baseUrl}/{cleanEndpoint}" logger.debug(f"Downloading file: GET {url}") timeout = aiohttp.ClientTimeout(total=30) diff --git a/modules/services/serviceUtils/mainServiceUtils.py b/modules/services/serviceUtils/mainServiceUtils.py index 1824eab7..c8a78fea 100644 --- a/modules/services/serviceUtils/mainServiceUtils.py +++ b/modules/services/serviceUtils/mainServiceUtils.py @@ -7,7 +7,7 @@ import logging from typing import Any, Optional, Dict, Callable, List from modules.shared.configuration import APP_CONFIG from modules.shared.eventManagement import eventManager -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp from modules.shared import jsonUtils logger = logging.getLogger(__name__) @@ -122,7 +122,7 @@ class UtilsService: float: Current UTC timestamp in seconds """ try: - return get_utc_timestamp() + return getUtcTimestamp() except Exception as e: logger.error(f"Error getting UTC timestamp: {str(e)}") return 0.0 @@ -185,6 +185,75 @@ class UtilsService: # Silent fail to never break main flow pass + # ===== Prompt sanitization ===== + + def sanitizePromptContent(self, content: str, contentType: str = "text") -> str: + """ + Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation. + + This is the single source of truth for all prompt sanitization across the system. + Replaces all scattered sanitization functions with a unified approach. + + Args: + content: The content to sanitize + contentType: Type of content ("text", "userinput", "json", "document") + + Returns: + Safely sanitized content ready for AI prompt insertion + """ + if not content: + return "" + + try: + import re + # Convert to string if not already + content_str = str(content) + + # Remove null bytes and control characters (except newlines and tabs) + sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str) + + # Handle different content types with appropriate sanitization + if contentType == "userinput": + # Extra security for user-controlled content + # Escape curly braces to prevent placeholder injection + sanitized = sanitized.replace('{', '{{').replace('}', '}}') + # Escape quotes and wrap in single quotes + sanitized = sanitized.replace('"', '\\"').replace("'", "\\'") + return f"'{sanitized}'" + + elif contentType == "json": + # For JSON content, escape quotes and backslashes + sanitized = sanitized.replace('\\', '\\\\') + sanitized = sanitized.replace('"', '\\"') + sanitized = sanitized.replace('\n', '\\n') + sanitized = sanitized.replace('\r', '\\r') + sanitized = sanitized.replace('\t', '\\t') + + elif contentType == "document": + # For document content, escape special characters + sanitized = sanitized.replace('\\', '\\\\') + sanitized = sanitized.replace('"', '\\"') + sanitized = sanitized.replace("'", "\\'") + sanitized = sanitized.replace('\n', '\\n') + sanitized = sanitized.replace('\r', '\\r') + sanitized = sanitized.replace('\t', '\\t') + + else: # contentType == "text" or default + # Basic text sanitization + sanitized = sanitized.replace('\\', '\\\\') + sanitized = sanitized.replace('"', '\\"') + sanitized = sanitized.replace("'", "\\'") + sanitized = sanitized.replace('\n', '\\n') + sanitized = sanitized.replace('\r', '\\r') + sanitized = sanitized.replace('\t', '\\t') + + return sanitized + + except Exception as e: + logger.error(f"Error sanitizing prompt content: {str(e)}") + # Return a safe fallback + return "[ERROR: Content could not be safely sanitized]" + # ===== JSON utility wrappers ===== def jsonStripCodeFences(self, text: str) -> str: diff --git a/modules/shared/attributeUtils.py b/modules/shared/attributeUtils.py index 6c56e855..90f3a766 100644 --- a/modules/shared/attributeUtils.py +++ b/modules/shared/attributeUtils.py @@ -34,54 +34,54 @@ class AttributeDefinition(BaseModel): MODEL_LABELS: Dict[str, Dict[str, Dict[str, str]]] = {} -def register_model_labels(model_name: str, model_label: Dict[str, str], labels: Dict[str, Dict[str, str]]): +def registerModelLabels(modelName: str, modelLabel: Dict[str, str], labels: Dict[str, Dict[str, str]]): """ Register labels for a model's attributes and the model itself. Args: - model_name: Name of the model class - model_label: Dictionary mapping language codes to model labels + modelName: Name of the model class + modelLabel: Dictionary mapping language codes to model labels e.g. {"en": "Prompt", "fr": "Invite"} labels: Dictionary mapping attribute names to their translations e.g. {"name": {"en": "Name", "fr": "Nom"}} """ - MODEL_LABELS[model_name] = {"model": model_label, "attributes": labels} + MODEL_LABELS[modelName] = {"model": modelLabel, "attributes": labels} -def get_model_labels(model_name: str, language: str = "en") -> Dict[str, str]: +def getModelLabels(modelName: str, language: str = "en") -> Dict[str, str]: """ Get labels for a model's attributes in the specified language. Args: - model_name: Name of the model class + modelName: Name of the model class language: Language code (default: "en") Returns: Dictionary mapping attribute names to their labels in the specified language """ - model_data = MODEL_LABELS.get(model_name, {}) - attribute_labels = model_data.get("attributes", {}) + modelData = MODEL_LABELS.get(modelName, {}) + attributeLabels = modelData.get("attributes", {}) return { attr: translations.get(language, translations.get("en", attr)) - for attr, translations in attribute_labels.items() + for attr, translations in attributeLabels.items() } -def get_model_label(model_name: str, language: str = "en") -> str: +def getModelLabel(modelName: str, language: str = "en") -> str: """ Get the label for a model in the specified language. Args: - model_name: Name of the model class + modelName: Name of the model class language: Language code (default: "en") Returns: Model label in the specified language, or model name if no label exists """ - model_data = MODEL_LABELS.get(model_name, {}) - model_label = model_data.get("model", {}) - return model_label.get(language, model_label.get("en", model_name)) + modelData = MODEL_LABELS.get(modelName, {}) + modelLabel = modelData.get("model", {}) + return modelLabel.get(language, modelLabel.get("en", modelName)) def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguage: str = "en") -> Dict[str, Any]: @@ -100,8 +100,8 @@ def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguag attributes = [] model_name = modelClass.__name__ - labels = get_model_labels(model_name, userLanguage) - model_label = get_model_label(model_name, userLanguage) + labels = getModelLabels(model_name, userLanguage) + model_label = getModelLabel(model_name, userLanguage) # Pydantic v2 only fields = modelClass.model_fields diff --git a/modules/shared/auditLogger.py b/modules/shared/auditLogger.py index dab32fa9..3155f681 100644 --- a/modules/shared/auditLogger.py +++ b/modules/shared/auditLogger.py @@ -19,42 +19,42 @@ class DailyRotatingFileHandler(RotatingFileHandler): The log file name includes the current date and switches at midnight. """ - def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs): - self.log_dir = log_dir - self.filename_prefix = filename_prefix - self.current_date = None - self.current_file = None + def __init__(self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs): + self.logDir = logDir + self.filenamePrefix = filenamePrefix + self.currentDate = None + self.currentFile = None # Initialize with today's file - self._update_file_if_needed() + self._updateFileIfNeeded() # Call parent constructor with current file - super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs) + super().__init__(self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs) - def _update_file_if_needed(self): + def _updateFileIfNeeded(self): """Update the log file if the date has changed""" today = datetime.now().strftime("%Y%m%d") - if self.current_date != today: - self.current_date = today - new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log") + if self.currentDate != today: + self.currentDate = today + newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log") - if self.current_file != new_file: - self.current_file = new_file + if self.currentFile != newFile: + self.currentFile = newFile return True return False def emit(self, record): """Emit a log record, switching files if date has changed""" # Check if we need to switch to a new file - if self._update_file_if_needed(): + if self._updateFileIfNeeded(): # Close current file and open new one if self.stream: self.stream.close() self.stream = None # Update the baseFilename for the parent class - self.baseFilename = self.current_file + self.baseFilename = self.currentFile # Reopen the stream if not self.delay: self.stream = self._open() @@ -68,9 +68,9 @@ class AuditLogger: def __init__(self): self.logger = None - self._setup_audit_logger() + self._setupAuditLogger() - def _setup_audit_logger(self): + def _setupAuditLogger(self): """Setup the audit logger with daily file rotation""" try: # Get log directory from config @@ -96,10 +96,10 @@ class AuditLogger: backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5)) fileHandler = DailyRotatingFileHandler( - log_dir=logDir, - filename_prefix="log_audit", - max_bytes=rotationSize, - backup_count=backupCount + logDir=logDir, + filenamePrefix="log_audit", + maxBytes=rotationSize, + backupCount=backupCount ) # Create formatter for audit log @@ -120,9 +120,9 @@ class AuditLogger: self.logger = logging.getLogger(__name__) self.logger.error(f"Failed to setup audit logger: {str(e)}") - def log_event(self, - user_id: str, - mandate_id: str, + def logEvent(self, + userId: str, + mandateId: str, category: str, action: str, details: str = "", @@ -131,8 +131,8 @@ class AuditLogger: Log an audit event Args: - user_id: User identifier - mandate_id: Mandate identifier (can be empty if not applicable) + userId: User identifier + mandateId: Mandate identifier (can be empty if not applicable) category: Event category (e.g., 'key', 'access', 'data') action: Specific action (e.g., 'decode', 'login', 'logout') details: Additional details about the event @@ -148,50 +148,50 @@ class AuditLogger: # Format the audit log entry # Format: timestamp | userid | mandateid | category | action | details - audit_entry = f"{user_id} | {mandate_id} | {category} | {action} | {details}" + auditEntry = f"{userId} | {mandateId} | {category} | {action} | {details}" # Log the event - self.logger.info(audit_entry) + self.logger.info(auditEntry) except Exception as e: # Use standard logger as fallback logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}") - def log_key_access(self, user_id: str, mandate_id: str, key_name: str, action: str) -> None: + def logKeyAccess(self, userId: str, mandateId: str, keyName: str, action: str) -> None: """Log key access events (decode/encode)""" - self.log_event( - user_id=user_id, - mandate_id=mandate_id, + self.logEvent( + userId=userId, + mandateId=mandateId, category="key", action=action, - details=key_name + details=keyName ) - def log_user_access(self, user_id: str, mandate_id: str, action: str, success_info: str = "") -> None: + def logUserAccess(self, userId: str, mandateId: str, action: str, successInfo: str = "") -> None: """Log user access events (login/logout)""" - self.log_event( - user_id=user_id, - mandate_id=mandate_id, + self.logEvent( + userId=userId, + mandateId=mandateId, category="access", action=action, - details=success_info + details=successInfo ) - def log_data_access(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None: + def logDataAccess(self, userId: str, mandateId: str, action: str, details: str = "") -> None: """Log data access events""" - self.log_event( - user_id=user_id, - mandate_id=mandate_id, + self.logEvent( + userId=userId, + mandateId=mandateId, category="data", action=action, details=details ) - def log_security_event(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None: + def logSecurityEvent(self, userId: str, mandateId: str, action: str, details: str = "") -> None: """Log security-related events""" - self.log_event( - user_id=user_id, - mandate_id=mandate_id, + self.logEvent( + userId=userId, + mandateId=mandateId, category="security", action=action, details=details diff --git a/modules/shared/configuration.py b/modules/shared/configuration.py index c8b77773..19f01a5c 100644 --- a/modules/shared/configuration.py +++ b/modules/shared/configuration.py @@ -199,10 +199,10 @@ class Configuration: # Log audit event for secret key access try: from modules.shared.auditLogger import audit_logger - audit_logger.log_key_access( - user_id=user_id, - mandate_id="system", - key_name=key, + audit_logger.logKeyAccess( + userId=user_id, + mandateId="system", + keyName=key, action="decode" ) except Exception: @@ -211,9 +211,9 @@ class Configuration: if value.startswith("{") and value.endswith("}"): # Handle JSON secrets (keys ending with _API_KEY that contain JSON) - return handleSecretJson(value, user_id, key) + return handleSecretJson(value, userId=user_id, keyName=key) else: - return handleSecretText(value, user_id, key) + return handleSecretText(value, userId=user_id, keyName=key) return value return default @@ -235,31 +235,31 @@ class Configuration: """Set a configuration value (for testing/overrides)""" self._data[key] = value -def handleSecretText(value: str, user_id: str = "system", key_name: str = "unknown") -> str: +def handleSecretText(value: str, userId: str = "system", keyName: str = "unknown") -> str: """ Handle secret values with encryption/decryption support. Args: value: The secret value to handle (may be encrypted) - user_id: The user ID making the request (default: "system") - key_name: The name of the key being decrypted (default: "unknown") + userId: The user ID making the request (default: "system") + keyName: The name of the key being decrypted (default: "unknown") Returns: str: Processed secret value (decrypted if encrypted) """ - if _is_encrypted_value(value): - return decrypt_value(value, user_id, key_name) + if _isEncryptedValue(value): + return decryptValue(value, userId, keyName) return value -def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unknown") -> str: +def handleSecretJson(value: str, userId: str = "system", keyName: str = "unknown") -> str: """ Handle JSON secret values (like Google service account keys) with encryption/decryption support. Validates that the value is valid JSON after decryption. Args: value: The JSON secret value to handle (may be encrypted) - user_id: The user ID making the request (default: "system") - key_name: The name of the key being decrypted (default: "unknown") + userId: The user ID making the request (default: "system") + keyName: The name of the key being decrypted (default: "unknown") Returns: str: Processed JSON secret value (decrypted if encrypted) @@ -268,15 +268,15 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno ValueError: If the value is not valid JSON after decryption """ # Decrypt if encrypted - if _is_encrypted_value(value): - decrypted_value = decrypt_value(value, user_id, key_name) + if _isEncryptedValue(value): + decryptedValue = decryptValue(value, userId, keyName) else: - decrypted_value = value + decryptedValue = value try: # Validate that it's valid JSON - json.loads(decrypted_value) - return decrypted_value + json.loads(decryptedValue) + return decryptedValue except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in secret value: {e}") @@ -284,12 +284,12 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno # Structure: {user_id: {key_name: [timestamps]}} _decryption_attempts = {} -def _get_master_key(env_type: str = None) -> bytes: +def _getMasterKey(envType: str = None) -> bytes: """ Get the master key for the specified environment. Args: - env_type: The environment type (dev, int, prod, etc.). If None, uses current config. + envType: The environment type (dev, int, prod, etc.). If None, uses current config. Returns: bytes: The master key for encryption/decryption @@ -298,24 +298,24 @@ def _get_master_key(env_type: str = None) -> bytes: ValueError: If no master key is found """ # Get the key location from config - key_location = APP_CONFIG.get('APP_KEY_SYSVAR') - if env_type is None: - env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev') + keyLocation = APP_CONFIG.get('APP_KEY_SYSVAR') + if envType is None: + envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev') - if not key_location: + if not keyLocation: raise ValueError("APP_KEY_SYSVAR not configured") # First try to get from environment variable - master_key = os.environ.get(key_location) + masterKey = os.environ.get(keyLocation) - if master_key: + if masterKey: # If found in environment, use it directly - return master_key.encode('utf-8') + return masterKey.encode('utf-8') # If not in environment, try to read from file - if os.path.exists(key_location): + if os.path.exists(keyLocation): try: - with open(key_location, 'r') as f: + with open(keyLocation, 'r') as f: content = f.read().strip() # Parse the key file format: env = key @@ -326,26 +326,26 @@ def _get_master_key(env_type: str = None) -> bytes: continue if '=' in line: - key_env, key_value = line.split('=', 1) - key_env = key_env.strip() - key_value = key_value.strip() + keyEnv, keyValue = line.split('=', 1) + keyEnv = keyEnv.strip() + keyValue = keyValue.strip() - if key_env == env_type: - return key_value.encode('utf-8') + if keyEnv == envType: + return keyValue.encode('utf-8') - raise ValueError(f"No key found for environment '{env_type}' in {key_location}") + raise ValueError(f"No key found for environment '{envType}' in {keyLocation}") except Exception as e: - raise ValueError(f"Error reading key file {key_location}: {e}") + raise ValueError(f"Error reading key file {keyLocation}: {e}") - raise ValueError(f"Master key not found. Checked environment variable '{key_location}' and file path") + raise ValueError(f"Master key not found. Checked environment variable '{keyLocation}' and file path") -def _derive_encryption_key(master_key: bytes) -> bytes: +def _deriveEncryptionKey(masterKey: bytes) -> bytes: """ Derive a 32-byte encryption key from the master key using PBKDF2. Args: - master_key: The master key bytes + masterKey: The master key bytes Returns: bytes: 32-byte derived key suitable for Fernet @@ -360,9 +360,9 @@ def _derive_encryption_key(master_key: bytes) -> bytes: iterations=100000, ) - return base64.urlsafe_b64encode(kdf.derive(master_key)) + return base64.urlsafe_b64encode(kdf.derive(masterKey)) -def _is_encrypted_value(value: str) -> bool: +def _isEncryptedValue(value: str) -> bool: """ Check if a value is encrypted (starts with environment-specific prefix). @@ -382,64 +382,64 @@ def _is_encrypted_value(value: str) -> bool: value.startswith('TEST_ENC:') or value.startswith('STAGING_ENC:')) -def _get_encryption_prefix(env_type: str) -> str: +def _getEncryptionPrefix(envType: str) -> str: """ Get the encryption prefix for the given environment type. Args: - env_type: The environment type (dev, int, prod, etc.) + envType: The environment type (dev, int, prod, etc.) Returns: str: The encryption prefix """ - return f"{env_type.upper()}_ENC:" + return f"{envType.upper()}_ENC:" -def _check_decryption_rate_limit(user_id: str, key_name: str, max_per_second: int = 10) -> bool: +def _checkDecryptionRateLimit(userId: str, keyName: str, maxPerSecond: int = 10) -> bool: """ Check if decryption is allowed based on rate limiting (max 10 per second per user per key). Args: - user_id: The user ID making the request - key_name: The name of the key being decrypted - max_per_second: Maximum decryptions per second (default: 10) + userId: The user ID making the request + keyName: The name of the key being decrypted + maxPerSecond: Maximum decryptions per second (default: 10) Returns: bool: True if allowed, False if rate limited """ - current_time = time.time() + currentTime = time.time() # Initialize tracking for this user if not exists - if user_id not in _decryption_attempts: - _decryption_attempts[user_id] = {} + if userId not in _decryption_attempts: + _decryption_attempts[userId] = {} # Initialize tracking for this key if not exists - if key_name not in _decryption_attempts[user_id]: - _decryption_attempts[user_id][key_name] = [] + if keyName not in _decryption_attempts[userId]: + _decryption_attempts[userId][keyName] = [] # Clean old attempts (older than 1 second) - _decryption_attempts[user_id][key_name] = [ - timestamp for timestamp in _decryption_attempts[user_id][key_name] - if current_time - timestamp < 1.0 + _decryption_attempts[userId][keyName] = [ + timestamp for timestamp in _decryption_attempts[userId][keyName] + if currentTime - timestamp < 1.0 ] # Check if we're within rate limit - if len(_decryption_attempts[user_id][key_name]) >= max_per_second: - logger.warning(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' ({max_per_second}/sec)") + if len(_decryption_attempts[userId][keyName]) >= maxPerSecond: + logger.warning(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' ({maxPerSecond}/sec)") return False # Record this attempt - _decryption_attempts[user_id][key_name].append(current_time) + _decryption_attempts[userId][keyName].append(currentTime) return True -def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key_name: str = "unknown") -> str: +def encryptValue(value: str, envType: str = None, userId: str = "system", keyName: str = "unknown") -> str: """ Encrypt a value using the master key for the specified environment. Args: value: The plain text value to encrypt - env_type: The environment type (dev, int, prod). If None, uses current environment. - user_id: The user ID making the request (default: "system") - key_name: The name of the key being encrypted (default: "unknown") + envType: The environment type (dev, int, prod). If None, uses current environment. + userId: The user ID making the request (default: "system") + keyName: The name of the key being encrypted (default: "unknown") Returns: str: The encrypted value with prefix @@ -447,48 +447,48 @@ def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key Raises: ValueError: If encryption fails """ - if env_type is None: - env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev') + if envType is None: + envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev') try: - master_key = _get_master_key(env_type) - derived_key = _derive_encryption_key(master_key) - fernet = Fernet(derived_key) + masterKey = _getMasterKey(envType) + derivedKey = _deriveEncryptionKey(masterKey) + fernet = Fernet(derivedKey) # Encrypt the value - encrypted_bytes = fernet.encrypt(value.encode('utf-8')) - encrypted_b64 = base64.urlsafe_b64encode(encrypted_bytes).decode('utf-8') + encryptedBytes = fernet.encrypt(value.encode('utf-8')) + encryptedB64 = base64.urlsafe_b64encode(encryptedBytes).decode('utf-8') # Add environment prefix - prefix = _get_encryption_prefix(env_type) - encrypted_value = f"{prefix}{encrypted_b64}" + prefix = _getEncryptionPrefix(envType) + encryptedValue = f"{prefix}{encryptedB64}" # Log audit event for encryption try: from modules.shared.auditLogger import audit_logger - audit_logger.log_key_access( - user_id=user_id, - mandate_id="system", - key_name=key_name, + audit_logger.logKeyAccess( + userId=userId, + mandateId="system", + keyName=keyName, action="encrypt" ) except Exception: # Don't fail if audit logging fails pass - return encrypted_value + return encryptedValue except Exception as e: raise ValueError(f"Encryption failed: {e}") -def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = "unknown") -> str: +def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "unknown") -> str: """ Decrypt a value using the master key for the current environment. Args: - encrypted_value: The encrypted value with prefix - user_id: The user ID making the request (default: "system") - key_name: The name of the key being decrypted (default: "unknown") + encryptedValue: The encrypted value with prefix + userId: The user ID making the request (default: "system") + keyName: The name of the key being decrypted (default: "unknown") Returns: str: The decrypted plain text value @@ -496,59 +496,59 @@ def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = Raises: ValueError: If decryption fails """ - if not _is_encrypted_value(encrypted_value): - return encrypted_value # Return as-is if not encrypted + if not _isEncryptedValue(encryptedValue): + return encryptedValue # Return as-is if not encrypted # Check rate limiting (10 per second per user per key) - if not _check_decryption_rate_limit(user_id, key_name, max_per_second=10): - raise ValueError(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' (10/sec)") + if not _checkDecryptionRateLimit(userId, keyName, maxPerSecond=10): + raise ValueError(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' (10/sec)") try: # Extract environment type from prefix - if encrypted_value.startswith('DEV_ENC:'): - env_type = 'dev' + if encryptedValue.startswith('DEV_ENC:'): + envType = 'dev' prefix = 'DEV_ENC:' - elif encrypted_value.startswith('INT_ENC:'): - env_type = 'int' + elif encryptedValue.startswith('INT_ENC:'): + envType = 'int' prefix = 'INT_ENC:' - elif encrypted_value.startswith('PROD_ENC:'): - env_type = 'prod' + elif encryptedValue.startswith('PROD_ENC:'): + envType = 'prod' prefix = 'PROD_ENC:' - elif encrypted_value.startswith('TEST_ENC:'): - env_type = 'test' + elif encryptedValue.startswith('TEST_ENC:'): + envType = 'test' prefix = 'TEST_ENC:' - elif encrypted_value.startswith('STAGING_ENC:'): - env_type = 'staging' + elif encryptedValue.startswith('STAGING_ENC:'): + envType = 'staging' prefix = 'STAGING_ENC:' else: raise ValueError(f"Invalid encryption prefix. Expected DEV_ENC:, INT_ENC:, PROD_ENC:, TEST_ENC:, or STAGING_ENC:") - encrypted_part = encrypted_value[len(prefix):] + encryptedPart = encryptedValue[len(prefix):] # Get master key for the specific environment and derive encryption key - master_key = _get_master_key(env_type) - derived_key = _derive_encryption_key(master_key) - fernet = Fernet(derived_key) + masterKey = _getMasterKey(envType) + derivedKey = _deriveEncryptionKey(masterKey) + fernet = Fernet(derivedKey) # Decode and decrypt - encrypted_bytes = base64.urlsafe_b64decode(encrypted_part.encode('utf-8')) - decrypted_bytes = fernet.decrypt(encrypted_bytes) - decrypted_value = decrypted_bytes.decode('utf-8') + encryptedBytes = base64.urlsafe_b64decode(encryptedPart.encode('utf-8')) + decryptedBytes = fernet.decrypt(encryptedBytes) + decryptedValue = decryptedBytes.decode('utf-8') # Log audit event for decryption try: from modules.shared.auditLogger import audit_logger - audit_logger.log_key_access( - user_id=user_id, - mandate_id="system", - key_name=key_name, + audit_logger.logKeyAccess( + userId=userId, + mandateId="system", + keyName=keyName, action="decrypt" ) except Exception: # Don't fail if audit logging fails pass - return decrypted_value + return decryptedValue except Exception as e: raise ValueError(f"Decryption failed: {e}") diff --git a/modules/shared/debugLogger.py b/modules/shared/debugLogger.py index a4fd0032..69e2f39c 100644 --- a/modules/shared/debugLogger.py +++ b/modules/shared/debugLogger.py @@ -120,8 +120,8 @@ def debugLogToFile(message: str, context: str = "DEBUG") -> None: debug_file = os.path.join(debug_dir, "debug_workflow.log") # Format the debug entry - from modules.shared.timezoneUtils import get_utc_timestamp - timestamp = get_utc_timestamp() + from modules.shared.timezoneUtils import getUtcTimestamp + timestamp = getUtcTimestamp() debug_entry = f"[{timestamp}] [{context}] {message}\n" # Write to debug file diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index 12b044f1..71b807b4 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -102,7 +102,7 @@ def parseJsonOrRaise(text: Union[str, bytes]) -> Union[Dict, List]: return obj -def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]: +def mergeRootLists(jsonParts: List[Union[str, Dict, List]]) -> Dict[str, Any]: """ Generic merger for root-level lists: take first dict as base; for each subsequent part: - if value is list and same key exists as list, extend it @@ -112,7 +112,7 @@ def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]: """ base: Optional[Dict[str, Any]] = None parsed: List[Dict[str, Any]] = [] - for part in json_parts: + for part in jsonParts: if isinstance(part, (dict, list)): obj = part else: @@ -146,61 +146,61 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]: # Strategy 1: Try to extract sections from the entire text first # This handles cases where the JSON structure is broken but content is intact - extracted_sections = _extractSectionsRegex(text) - if extracted_sections: - logger.info(f"Extracted {len(extracted_sections)} sections using regex") + extractedSections = _extractSectionsRegex(text) + if extractedSections: + logger.info(f"Extracted {len(extractedSections)} sections using regex") return { "metadata": { "split_strategy": "single_document", "source_documents": [], "extraction_method": "ai_generation" }, - "documents": [{"sections": extracted_sections}] + "documents": [{"sections": extractedSections}] } # Strategy 2: Progressive parsing - try to find longest valid prefix - best_result = None - best_valid_length = 0 + bestResult = None + bestValidLength = 0 # Try different step sizes to find the best valid JSON - for step_size in [100, 50, 10, 1]: - for i in range(len(text), 0, -step_size): - test_str = text[:i] - closed_str = _closeJsonStructures(test_str) - obj, err, _ = tryParseJson(closed_str) + for stepSize in [100, 50, 10, 1]: + for i in range(len(text), 0, -stepSize): + testStr = text[:i] + closedStr = _closeJsonStructures(testStr) + obj, err, _ = tryParseJson(closedStr) if err is None and isinstance(obj, dict): - best_result = obj - best_valid_length = i - logger.debug(f"Progressive parsing success at length {i} (step: {step_size})") + bestResult = obj + bestValidLength = i + logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})") break - if best_result: + if bestResult: break - if best_result: - logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})") + if bestResult: + logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})") # Check if we have sections in the result - sections = extractSectionsFromDocument(best_result) + sections = extractSectionsFromDocument(bestResult) if sections: logger.info(f"Progressive parsing found {len(sections)} sections") - return best_result + return bestResult else: # No sections found in progressive parsing, try to extract from broken part logger.info("Progressive parsing found no sections, trying to extract from broken part") - extracted_sections = _extractSectionsRegex(text[best_valid_length:]) - if extracted_sections: - logger.info(f"Extracted {len(extracted_sections)} sections from broken part") + extractedSections = _extractSectionsRegex(text[bestValidLength:]) + if extractedSections: + logger.info(f"Extracted {len(extractedSections)} sections from broken part") # Merge with the valid part - if "documents" not in best_result: - best_result["documents"] = [] - if not best_result["documents"]: - best_result["documents"] = [{"sections": []}] - best_result["documents"][0]["sections"].extend(extracted_sections) - return best_result + if "documents" not in bestResult: + bestResult["documents"] = [] + if not bestResult["documents"]: + bestResult["documents"] = [{"sections": []}] + bestResult["documents"][0]["sections"].extend(extractedSections) + return bestResult # Strategy 3: Structure closing - close incomplete structures - closed_str = _closeJsonStructures(text) - obj, err, _ = tryParseJson(closed_str) + closedStr = _closeJsonStructures(text) + obj, err, _ = tryParseJson(closedStr) if err is None and isinstance(obj, dict): logger.info("Repaired JSON using structure closing") return obj @@ -217,16 +217,16 @@ def _closeJsonStructures(text: str) -> str: return text # Count open/close brackets and braces - open_braces = text.count('{') - close_braces = text.count('}') - open_brackets = text.count('[') - close_brackets = text.count(']') + openBraces = text.count('{') + closeBraces = text.count('}') + openBrackets = text.count('[') + closeBrackets = text.count(']') # Close incomplete structures result = text - for _ in range(open_braces - close_braces): + for _ in range(openBraces - closeBraces): result += '}' - for _ in range(open_brackets - close_brackets): + for _ in range(openBrackets - closeBrackets): result += ']' return result @@ -242,32 +242,32 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]: sections = [] # Pattern to find section objects - section_pattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)' + sectionPattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)' - for match in re.finditer(section_pattern, text, re.IGNORECASE): - section_id = match.group(1) - content_type = match.group(2) + for match in re.finditer(sectionPattern, text, re.IGNORECASE): + sectionId = match.group(1) + contentType = match.group(2) order = int(match.group(3)) # Try to extract elements array - look for the elements array after this section - elements_match = re.search( + elementsMatch = re.search( r'"elements"\s*:\s*\[(.*?)\]', text[match.end():match.end()+5000] # Look ahead for elements (large range) ) elements = [] - if elements_match: + if elementsMatch: try: - elements_str = '[' + elements_match.group(1) + ']' - elements = json.loads(elements_str) + elementsStr = '[' + elementsMatch.group(1) + ']' + elements = json.loads(elementsStr) except: # If JSON parsing fails, try to extract individual items manually - elements_text = elements_match.group(1) - elements = _extractElementsFromText(elements_text, content_type) + elementsText = elementsMatch.group(1) + elements = _extractElementsFromText(elementsText, contentType) sections.append({ - "id": section_id, - "content_type": content_type, + "id": sectionId, + "content_type": contentType, "elements": elements, "order": order }) @@ -279,7 +279,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]: return sections -def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict[str, Any]]: +def _extractElementsFromText(elementsText: str, contentType: str) -> List[Dict[str, Any]]: """ Extract elements from text when JSON parsing fails. Generic approach that works for any content type. @@ -290,11 +290,11 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict elements = [] - if content_type == "list": + if contentType == "list": # Look for {"text": "..."} patterns, including incomplete ones - text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text) + text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText) # Also look for incomplete patterns like {"text": "36 - incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) + incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText) # Combine both complete and incomplete items all_items = text_items + incomplete_items @@ -303,41 +303,41 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict # Remove the last item if it appears to be incomplete/corrupted if unique_items: - unique_items = _removeLastIncompleteItem(unique_items, elements_text) + unique_items = _removeLastIncompleteItem(unique_items, elementsText) elements = [{"text": item} for item in unique_items] - elif content_type == "paragraph": + elif contentType == "paragraph": # Look for {"text": "..."} patterns, including incomplete ones - text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text) - incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) + text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText) + incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText) all_items = text_items + incomplete_items unique_items = list(dict.fromkeys([item for item in all_items if item.strip()])) # Remove the last item if it appears to be incomplete/corrupted if unique_items: - unique_items = _removeLastIncompleteItem(unique_items, elements_text) + unique_items = _removeLastIncompleteItem(unique_items, elementsText) elements = [{"text": item} for item in unique_items] - elif content_type == "heading": + elif contentType == "heading": # Look for {"level": X, "text": "..."} patterns, including incomplete ones - heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elements_text) - incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) + heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elementsText) + incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText) all_items = heading_items + incomplete_heading_items unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()])) # Remove the last item if it appears to be incomplete/corrupted if unique_items: - unique_items = _removeLastIncompleteItem(unique_items, elements_text) + unique_items = _removeLastIncompleteItem(unique_items, elementsText) elements = [{"level": level, "text": text} for level, text in unique_items] - elif content_type == "table": + elif contentType == "table": # Look for table patterns - table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elements_text) + table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elementsText) for headers_str, rows_str, caption in table_items: # Extract headers headers = re.findall(r'"([^"]+)"', headers_str) @@ -354,31 +354,31 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict "caption": caption }) - elif content_type == "code": + elif contentType == "code": # Look for {"code": "...", "language": "..."} patterns, including incomplete ones - code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elements_text) - incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) + code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elementsText) + incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elementsText) all_items = code_items + [(code, "unknown") for code in incomplete_code_items] unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()])) # Remove the last item if it appears to be incomplete/corrupted if unique_items: - unique_items = _removeLastIncompleteItem(unique_items, elements_text) + unique_items = _removeLastIncompleteItem(unique_items, elementsText) elements = [{"code": code, "language": lang} for code, lang in unique_items] else: # Generic fallback - look for any text content, including incomplete - text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elements_text) - incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) + text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elementsText) + incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText) all_items = text_items + incomplete_text_items unique_items = list(dict.fromkeys([item for item in all_items if item.strip()])) # Remove the last item if it appears to be incomplete/corrupted if unique_items: - unique_items = _removeLastIncompleteItem(unique_items, elements_text) + unique_items = _removeLastIncompleteItem(unique_items, elementsText) elements = [{"text": item} for item in unique_items] diff --git a/modules/shared/timezoneUtils.py b/modules/shared/timezoneUtils.py index 1656f0f6..4e2141b7 100644 --- a/modules/shared/timezoneUtils.py +++ b/modules/shared/timezoneUtils.py @@ -6,7 +6,7 @@ Ensures all timestamps are properly handled as UTC. from datetime import datetime, timezone import time -def get_utc_now() -> datetime: +def getUtcNow() -> datetime: """ Get current time in UTC with timezone info. @@ -15,7 +15,7 @@ def get_utc_now() -> datetime: """ return datetime.now(timezone.utc) -def get_utc_timestamp() -> float: +def getUtcTimestamp() -> float: """ Get current UTC timestamp (seconds since epoch with millisecond precision). @@ -24,14 +24,14 @@ def get_utc_timestamp() -> float: """ return time.time() -def create_expiration_timestamp(expires_in_seconds: int) -> float: +def createExpirationTimestamp(expiresInSeconds: int) -> float: """ Create a new expiration timestamp from seconds until expiration. Args: - expires_in_seconds (int): Seconds until expiration + expiresInSeconds (int): Seconds until expiration Returns: float: UTC timestamp in seconds """ - return get_utc_timestamp() + expires_in_seconds + return getUtcTimestamp() + expiresInSeconds \ No newline at end of file diff --git a/modules/workflows/processing/adaptive/adaptiveLearningEngine.py b/modules/workflows/processing/adaptive/adaptiveLearningEngine.py index 107821e7..5573e93b 100644 --- a/modules/workflows/processing/adaptive/adaptiveLearningEngine.py +++ b/modules/workflows/processing/adaptive/adaptiveLearningEngine.py @@ -22,13 +22,11 @@ class AdaptiveLearningEngine: workflowId: str, attemptNumber: int): """Record validation result and learn from it""" try: - actionType = actionContext.get('actionType', 'unknown') actionName = actionContext.get('actionName', 'unknown') # Store validation history validationEntry = { 'workflowId': workflowId, - 'actionType': actionType, 'actionName': actionName, 'attemptNumber': attemptNumber, 'validationResult': validationResult, @@ -42,17 +40,17 @@ class AdaptiveLearningEngine: # Track patterns if validationResult.get('overallSuccess', False): - self.successPatterns[actionType].append(validationEntry) + self.successPatterns[actionName].append(validationEntry) else: - self.failurePatterns[actionType].append(validationEntry) + self.failurePatterns[actionName].append(validationEntry) # Update attempt count - self.actionAttempts[f"{workflowId}:{actionType}"] += 1 + self.actionAttempts[f"{workflowId}:{actionName}"] += 1 # Generate learning insights - self._generateLearningInsights(workflowId, actionType) + self._generateLearningInsights(workflowId, actionName) - logger.info(f"Recorded validation for {actionType} (attempt {attemptNumber}): " + logger.info(f"Recorded validation for {actionName} (attempt {attemptNumber}): " f"Success={validationResult.get('overallSuccess', False)}, " f"Quality={validationResult.get('qualityScore', 0.0)}") @@ -86,21 +84,21 @@ class AdaptiveLearningEngine: logger.error(f"Error generating adaptive context: {str(e)}") return {} - def getAdaptiveContextForParameters(self, workflowId: str, actionType: str, + def getAdaptiveContextForParameters(self, workflowId: str, actionName: str, parametersContext: str) -> Dict[str, Any]: """Generate adaptive context for parameter selection prompt""" try: - # Get validation history for this specific action type + # Get validation history for this specific action name actionValidations = [ v for v in self.validationHistory - if v['workflowId'] == workflowId and v['actionType'] == actionType + if v['workflowId'] == workflowId and v['actionName'] == actionName ][-3:] # Last 3 attempts for this action # Analyze what went wrong in previous attempts failureAnalysis = self._analyzeParameterFailures(actionValidations) # Generate specific parameter guidance - parameterGuidance = self._generateParameterGuidance(actionType, parametersContext, failureAnalysis) + parameterGuidance = self._generateParameterGuidance(actionName, parametersContext, failureAnalysis) return { 'actionValidations': actionValidations, @@ -206,36 +204,28 @@ class AdaptiveLearningEngine: return "\n".join(guidance_parts) if guidance_parts else "No specific guidance available." - def _generateParameterGuidance(self, actionType: str, parametersContext: str, + def _generateParameterGuidance(self, actionName: str, parametersContext: str, failureAnalysis: Dict[str, Any]) -> str: - """Generate specific parameter guidance based on previous failures""" + """Generate generic parameter guidance based on previous failures (no app-specific logic).""" if not failureAnalysis.get('hasFailures', False): return "No previous parameter failures. Use standard parameter values." - guidance_parts = [] + guidanceParts = [] - # Add attempt awareness + # Attempt awareness attemptNumber = failureAnalysis.get('attemptNumber', 1) - if attemptNumber >= 3: - guidance_parts.append(f"ATTEMPT #{attemptNumber}: Previous attempts failed. Adjust parameters based on validation feedback.") + if attemptNumber and attemptNumber >= 3: + guidanceParts.append(f"Attempt #{attemptNumber}: Adjust parameters based on validation feedback.") - # Add specific parameter guidance based on action type - if actionType == "outlook.composeAndSendEmailWithContext": - guidance_parts.append("EMAIL PARAMETER GUIDANCE:") - guidance_parts.append("- context: Be very specific about account (valueon), appointment time (Friday), and requirements") - guidance_parts.append("- emailStyle: Use 'formal' for business emails") - guidance_parts.append("- maxLength: Set to 2000+ for detailed emails with summaries") - - # Add specific guidance based on common failures - commonIssues = failureAnalysis.get('commonIssues', {}) - if any("account" in str(issue).lower() for issue in commonIssues.keys()): - guidance_parts.append("- context: MUST specify 'from valueon account' explicitly") - if any("attachment" in str(issue).lower() for issue in commonIssues.keys()): - guidance_parts.append("- documentList: Ensure PDF is properly referenced") - if any("summary" in str(issue).lower() for issue in commonIssues.keys()): - guidance_parts.append("- context: MUST request '10-12 sentence German summary' explicitly") + # Generic issues summary + commonIssues = failureAnalysis.get('commonIssues', {}) or {} + if commonIssues: + guidanceParts.append("Address the following parameter issues:") + for issueKey, issueDesc in commonIssues.items(): + guidanceParts.append(f"- {issueKey}: {issueDesc}") - return "\n".join(guidance_parts) if guidance_parts else "Use standard parameter values." + # Keep guidance format stable + return "\n".join(guidanceParts) if guidanceParts else "Use standard parameter values." def _getEscalationLevel(self, workflowId: str) -> str: """Determine escalation level based on failure patterns""" @@ -251,7 +241,7 @@ class AdaptiveLearningEngine: else: return "low" - def _generateLearningInsights(self, workflowId: str, actionType: str): + def _generateLearningInsights(self, workflowId: str, actionName: str): """Generate learning insights for a workflow""" if workflowId not in self.learningInsights: self.learningInsights[workflowId] = {} @@ -263,7 +253,7 @@ class AdaptiveLearningEngine: 'totalAttempts': len(workflowValidations), 'successfulAttempts': len([v for v in workflowValidations if v['success']]), 'failedAttempts': len([v for v in workflowValidations if not v['success']]), - 'lastActionType': actionType, + 'lastActionName': actionName, 'escalationLevel': self._getEscalationLevel(workflowId) } diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index 3ef531d5..6ac81df8 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -26,14 +26,14 @@ class ContentValidator: if isinstance(data, dict) and 'content' in data: content = data['content'] # For large content, check size before converting to string - if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold + if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold # For very large content, return a size indicator instead return f"[Large document content - {len(str(content))} characters - truncated for validation]" return str(content) else: content = data # For large content, check size before converting to string - if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold + if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold return f"[Large document content - {len(str(content))} characters - truncated for validation]" return str(content) return "" diff --git a/modules/workflows/processing/adaptive/intentAnalyzer.py b/modules/workflows/processing/adaptive/intentAnalyzer.py index 324549b3..2dd99a94 100644 --- a/modules/workflows/processing/adaptive/intentAnalyzer.py +++ b/modules/workflows/processing/adaptive/intentAnalyzer.py @@ -30,7 +30,7 @@ class IntentAnalyzer: analysisPrompt = f""" You are an intent analyzer. Analyze the user's request to understand what they want delivered. -USER REQUEST: {self.services.ai.sanitizePromptContent(userPrompt, 'userinput')} +USER REQUEST: {self.services.utils.sanitizePromptContent(userPrompt, 'userinput')} CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''} diff --git a/modules/workflows/processing/modes/modeActionplan.py b/modules/workflows/processing/modes/modeActionplan.py index 9a54c43e..06a8ae4b 100644 --- a/modules/workflows/processing/modes/modeActionplan.py +++ b/modules/workflows/processing/modes/modeActionplan.py @@ -571,7 +571,7 @@ class ActionplanMode(BaseMode): actionData["execParameters"] = {} # Use generic field separation based on ActionItem model - simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData) + simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData) # Create action in database createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields) @@ -715,7 +715,7 @@ class ActionplanMode(BaseMode): actionData["execParameters"] = {} # Use generic field separation based on ActionItem model - simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData) + simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData) # Create action in database createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields) diff --git a/modules/workflows/processing/modes/modeReact.py b/modules/workflows/processing/modes/modeReact.py index 0dc580bf..6aa6505a 100644 --- a/modules/workflows/processing/modes/modeReact.py +++ b/modules/workflows/processing/modes/modeReact.py @@ -98,7 +98,12 @@ class ReactMode(BaseMode): # NEW: Add content validation (against original cleaned user prompt / workflow intent) if getattr(self, 'workflowIntent', None) and result.documents: - validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent) + # Validate ONLY the produced JSON (structured content), not rendered files + from types import SimpleNamespace + validationDocs = [] + if hasattr(result, 'content') and result.content: + validationDocs.append(SimpleNamespace(documentName='generated.json', documentData={'content': result.content})) + validationResult = await self.contentValidator.validateContent(validationDocs, self.workflowIntent) observation['contentValidation'] = validationResult quality_score = validationResult.get('qualityScore', 0.0) if quality_score is None: @@ -106,9 +111,9 @@ class ReactMode(BaseMode): logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})") # NEW: Record validation result for adaptive learning + actionValue = selection.get('action', 'unknown') actionContext = { - 'actionType': selection.get('action', {}).get('action', 'unknown'), - 'actionName': selection.get('action', {}).get('action', 'unknown'), + 'actionName': actionValue, 'workflowId': context.workflow_id } @@ -747,7 +752,7 @@ Return only the user-friendly message, no technical details.""" actionData["execParameters"] = {} # Use generic field separation based on ActionItem model - simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData) + simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData) # Create action in database createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields) @@ -838,7 +843,7 @@ Return only the user-friendly message, no technical details.""" actionData["execParameters"] = {} # Use generic field separation based on ActionItem model - simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData) + simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData) # Create action in database createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields) diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py index 9bfc6070..9996b455 100644 --- a/modules/workflows/workflowManager.py +++ b/modules/workflows/workflowManager.py @@ -215,7 +215,7 @@ class WorkflowManager: " }\n" " ]\n" "}\n\n" - f"User message:\n{self.services.ai.sanitizePromptContent(userInput.prompt, 'userinput')}" + f"User message:\n{self.services.utils.sanitizePromptContent(userInput.prompt, 'userinput')}" ) # Call AI analyzer (planning call - will use static parameters) diff --git a/naming_violations_report.csv b/naming_violations_report.csv new file mode 100644 index 00000000..a80259f6 --- /dev/null +++ b/naming_violations_report.csv @@ -0,0 +1,107 @@ +Module,Function Names,Parameter Names,Variable Names,Total +modules/workflows/methods/methodSharepoint.py,0,2,211,213 +modules/workflows/methods/methodOutlook.py,0,3,131,134 +modules/services/serviceAi/subDocumentProcessing.py,0,0,104,104 +modules/features/syncDelta/mainSyncDelta.py,1,10,88,99 +modules/shared/jsonUtils.py,0,3,88,91 +modules/services/serviceGeneration/renderers/rendererDocx.py,3,8,79,90 +modules/services/serviceWorkflow/mainServiceWorkflow.py,0,3,85,88 +modules/services/serviceGeneration/renderers/rendererPptx.py,2,7,73,82 +modules/services/serviceGeneration/renderers/rendererPdf.py,3,8,50,61 +modules/connectors/connectorVoiceGoogle.py,1,2,52,55 +modules/services/serviceGeneration/renderers/rendererHtml.py,3,6,46,55 +modules/services/serviceGeneration/renderers/rendererBaseTemplate.py,3,21,27,51 +modules/shared/configuration.py,2,17,30,49 +modules/services/serviceExtraction/subMerger.py,2,5,31,38 +modules/connectors/connectorDbPostgre.py,0,14,20,34 +modules/interfaces/interfaceDbAppObjects.py,0,8,26,34 +modules/routes/routeSecurityGoogle.py,0,0,32,32 +modules/shared/attributeUtils.py,3,4,25,32 +modules/interfaces/interfaceDbChatObjects.py,0,4,27,31 +modules/routes/routeSecurityAdmin.py,0,2,28,30 +modules/services/serviceNeutralization/subProcessList.py,7,0,22,29 +modules/services/serviceGeneration/renderers/rendererText.py,3,7,19,29 +modules/routes/routeSecurityMsft.py,0,0,27,27 +modules/services/serviceGeneration/renderers/rendererMarkdown.py,3,7,17,27 +modules/services/serviceGeneration/renderers/rendererXlsx.py,3,0,24,27 +modules/services/serviceGeneration/renderers/rendererImage.py,3,2,21,26 +modules/security/tokenManager.py,4,7,14,25 +modules/workflows/workflowManager.py,0,0,25,25 +modules/services/serviceGeneration/renderers/rendererCsv.py,3,5,17,25 +modules/shared/auditLogger.py,5,16,3,24 +modules/shared/debugLogger.py,0,0,24,24 +modules/workflows/processing/shared/placeholderFactory.py,0,0,24,24 +modules/interfaces/interfaceDbAppAccess.py,0,2,21,23 +modules/connectors/connectorTicketsJira.py,0,0,22,22 +modules/services/serviceGeneration/renderers/registry.py,7,3,12,22 +modules/routes/routeDataConnections.py,1,1,19,21 +modules/security/tokenRefreshService.py,0,2,19,21 +modules/services/serviceExtraction/extractors/extractorPptx.py,0,1,16,17 +modules/routes/routeSecurityLocal.py,0,0,16,16 +modules/workflows/methods/methodBase.py,0,4,12,16 +modules/services/serviceGeneration/mainServiceGeneration.py,0,4,11,15 +modules/services/serviceUtils/mainServiceUtils.py,0,14,1,15 +modules/features/neutralizePlayground/mainNeutralizePlayground.py,8,5,2,15 +modules/interfaces/interfaceTicketObjects.py,0,5,9,14 +modules/services/serviceNeutralization/subParseString.py,7,0,6,13 +modules/workflows/processing/modes/modeReact.py,0,1,11,12 +modules/interfaces/interfaceDbComponentAccess.py,0,2,9,11 +modules/services/serviceAi/subCoreAi.py,0,0,11,11 +modules/services/serviceExtraction/subRegistry.py,0,0,11,11 +modules/services/serviceNeutralization/mainServiceNeutralization.py,0,2,9,11 +modules/interfaces/interfaceAiObjects.py,0,0,10,10 +modules/services/serviceAi/subSharedAiUtils.py,0,3,7,10 +modules/connectors/connectorDbJson.py,0,3,6,9 +modules/workflows/methods/methodAi.py,0,0,9,9 +modules/services/serviceExtraction/subPromptBuilderExtraction.py,0,0,9,9 +modules/services/serviceGeneration/subDocumentUtility.py,0,3,6,9 +modules/services/serviceNeutralization/subProcessCommon.py,7,2,0,9 +modules/services/serviceNeutralization/subProcessText.py,5,0,4,9 +modules/interfaces/interfaceDbChatAccess.py,0,2,6,8 +modules/security/auth.py,0,1,7,8 +modules/aicore/aicorePluginAnthropic.py,0,0,7,7 +modules/security/tokenRefreshMiddleware.py,0,2,4,6 +modules/services/serviceGeneration/renderers/rendererJson.py,3,0,3,6 +analyze_naming_violations.py,5,0,0,5 +modules/aicore/aicorePluginOpenai.py,0,0,5,5 +modules/routes/routeVoiceGoogle.py,0,0,5,5 +modules/shared/eventManagement.py,2,3,0,5 +modules/workflows/processing/adaptive/intentAnalyzer.py,0,0,5,5 +modules/workflows/processing/shared/executionState.py,0,5,0,5 +modules/services/serviceGeneration/subJsonSchema.py,0,0,5,5 +modules/services/serviceNeutralization/subPatterns.py,5,0,0,5 +modules/services/serviceNeutralization/subProcessBinary.py,4,0,1,5 +modules/services/serviceExtraction/extractors/extractorXlsx.py,0,0,5,5 +modules/interfaces/interfaceDbComponentObjects.py,0,3,1,4 +modules/routes/routeDataNeutralization.py,0,0,4,4 +modules/routes/routeWorkflows.py,0,0,4,4 +modules/shared/timezoneUtils.py,3,1,0,4 +modules/workflows/processing/adaptive/contentValidator.py,0,0,4,4 +modules/workflows/processing/core/messageCreator.py,0,0,4,4 +modules/services/serviceSharepoint/mainServiceSharepoint.py,0,0,4,4 +modules/routes/routeDataUsers.py,0,0,3,3 +modules/services/serviceExtraction/subPipeline.py,0,0,3,3 +app.py,0,0,2,2 +modules/datamodels/datamodelChat.py,0,1,1,2 +modules/routes/routeAttributes.py,0,0,2,2 +modules/routes/routeDataPrompts.py,0,0,2,2 +modules/security/csrf.py,0,1,1,2 +modules/security/jwtService.py,0,0,2,2 +modules/workflows/processing/adaptive/learningEngine.py,0,0,2,2 +modules/workflows/processing/modes/modeActionplan.py,0,0,2,2 +modules/workflows/processing/shared/methodDiscovery.py,0,0,2,2 +modules/services/serviceNormalization/mainServiceNormalization.py,0,0,2,2 +modules/services/serviceExtraction/extractors/extractorImage.py,0,0,2,2 +modules/aicore/aicoreBase.py,0,0,1,1 +modules/aicore/aicoreModelSelector.py,0,0,1,1 +modules/connectors/connectorTicketsClickup.py,0,0,1,1 +modules/datamodels/datamodelDocument.py,0,1,0,1 +modules/datamodels/datamodelSecurity.py,0,0,1,1 +modules/routes/routeAdmin.py,0,0,1,1 +modules/routes/routeDataFiles.py,0,0,1,1 +modules/workflows/processing/workflowProcessor.py,0,0,1,1 +modules/workflows/processing/adaptive/adaptiveLearningEngine.py,0,0,1,1 +modules/workflows/processing/core/actionExecutor.py,0,0,1,1 +modules/workflows/processing/core/taskPlanner.py,0,0,1,1 +modules/workflows/processing/modes/modeBase.py,0,0,1,1 +modules/services/serviceAi/subDocumentGeneration.py,0,0,1,1 diff --git a/processDocumentsWithContinuation_usage_analysis.md b/processDocumentsWithContinuation_usage_analysis.md new file mode 100644 index 00000000..3dedc405 --- /dev/null +++ b/processDocumentsWithContinuation_usage_analysis.md @@ -0,0 +1,184 @@ +# Analysis: `processDocumentsWithContinuation` and Subfunctions Usage + +## Executive Summary + +**FINDING**: The function `processDocumentsWithContinuation` in `subDocumentProcessing.py` is **NOT USED** anywhere in the active codebase. The continuation chain was only referenced by the deleted `subDocumentGeneration.py` module. + +--- + +## Main Function: `processDocumentsWithContinuation` + +**Location**: `gateway/modules/services/serviceAi/subDocumentProcessing.py:303` + +**Status**: ❌ **NOT USED** + +### Usage Search Results + +- ❌ No actual code calls to `.processDocumentsWithContinuation(` +- ⚠️ Only mentioned in documentation files: + - `wiki/poweron/appdoc/doc_system_function_relationship_ai.md` (documentation) + - `gateway/callAiWithDocumentGeneration_usage_analysis.md` (previous analysis - noted it was called by deleted code) + +### Why It's Not Used + +The only caller was `subDocumentGeneration._processDocumentsUnified()` which we just deleted. The current active codebase uses `subCoreAi.callAiDocuments()` which has its own continuation logic via `_callAiWithLooping()`. + +--- + +## Function Call Chain Analysis + +``` +processDocumentsWithContinuation (line 303) - ❌ NOT USED + ├─> _buildContinuationPrompt (line 319, 324) - ❌ ONLY USED HERE + └─> _processWithContinuationLoop (line 322, 373) - ❌ ONLY USED HERE + ├─> _buildContinuationIterationPrompt (line 393, 459) - ❌ ONLY USED HERE + └─> processDocumentsPerChunkJsonWithPrompt (line 402) - ✅ USED ELSEWHERE +``` + +--- + +## Subfunction Analysis + +### 1. `_buildContinuationPrompt` +**Location**: Line 324-371 +**Status**: ✅ **USED** (but only internally) +**Called by**: `processDocumentsWithContinuation` (line 319) +**Effectively**: ❌ **UNUSED** (because parent function is unused) + +**Internal Usage**: +- Called from `processDocumentsWithContinuation` at line 319 + +**Functionality**: +- Builds a prompt with continuation instructions +- Adds JSON structure requirements with `"continue": true/false` flag +- Adds `continuation_context` field specification + +**Note**: This uses a different continuation pattern than `SubCoreAi._callAiWithLooping()`: +- This uses `"continue": true/false + "continuation_context"` for document sections +- SubCoreAi uses `buildContinuationContext()` with `last_raw_json` + +--- + +### 2. `_processWithContinuationLoop` +**Location**: Line 373-457 +**Status**: ✅ **USED** (but only internally) +**Called by**: `processDocumentsWithContinuation` (line 322) +**Effectively**: ❌ **UNUSED** (because parent function is unused) + +**Internal Usage**: +- Called from `processDocumentsWithContinuation` at line 322 + +**External Dependencies**: +- Calls `self._buildContinuationIterationPrompt()` (line 393) +- Calls `self.processDocumentsPerChunkJsonWithPrompt()` (line 402) + +**Functionality**: +- Implements continuation loop (max 10 iterations) +- Accumulates sections across iterations +- Checks `continue` flag and `continuation_context` to determine if more iterations needed +- Builds final result with accumulated sections + +--- + +### 3. `_buildContinuationIterationPrompt` +**Location**: Line 459-498 +**Status**: ✅ **USED** (but only internally) +**Called by**: `_processWithContinuationLoop` (line 393) +**Effectively**: ❌ **UNUSED** (because parent chain is unused) + +**Internal Usage**: +- Called from `_processWithContinuationLoop` at line 393 (in loop, conditionally) + +**Functionality**: +- Builds a prompt for continuation iteration with context +- Includes summary of previously generated content (last 3 sections) +- Includes continuation instructions with last section ID, element index, remaining requirements + +--- + +### 4. `processDocumentsPerChunkJsonWithPrompt` +**Location**: Line 219-301 +**Status**: ✅ **USED ELSEWHERE** +**Called by**: + - `_processWithContinuationLoop` (line 402) + - Also referenced in backup files (not active code) + +**Internal Usage**: +- Called from `_processWithContinuationLoop` at line 402 + +**External Usage Search**: +- ✅ Used internally by continuation loop +- ⚠️ Referenced in `local/backup/backup_mainServiceAi.py.txt` (backup file, not active) +- ❌ Not used by any other active code + +**Functionality**: +- Processes documents with per-chunk AI calls +- Uses a custom prompt instead of default extraction prompt +- Returns merged JSON document + +**Note**: This function itself is only used by the continuation loop. However, it's a more general function that could be useful, so it's not "dead code" - it's just currently only used by unused code. + +--- + +## Summary Table + +| Function | Line | Status | Called By | Effectively Used? | +|----------|------|--------|-----------|-------------------| +| `processDocumentsWithContinuation` | 303 | ❌ Not used | (external) | ❌ No | +| `_buildContinuationPrompt` | 324 | ✅ Used internally | `processDocumentsWithContinuation:319` | ❌ No | +| `_processWithContinuationLoop` | 373 | ✅ Used internally | `processDocumentsWithContinuation:322` | ❌ No | +| `_buildContinuationIterationPrompt` | 459 | ✅ Used internally | `_processWithContinuationLoop:393` | ❌ No | +| `processDocumentsPerChunkJsonWithPrompt` | 219 | ✅ Used internally | `_processWithContinuationLoop:402` | ⚠️ **ONLY USED BY UNUSED CODE** | + +--- + +## Current Active Implementation + +The active continuation logic is in `subCoreAi.callAiDocuments()` → `_callAiWithLooping()`: +- Uses `buildGenerationPrompt()` with `continuationContext` parameter +- Uses `buildContinuationContext()` to build context from sections +- Different continuation pattern (uses `last_raw_json` instead of `continuation_context`) + +--- + +## Dead Code Identification + +**Completely Unused Chain** (can be safely removed): +1. ✅ `processDocumentsWithContinuation` - entry point, not called +2. ✅ `_buildContinuationPrompt` - only used by #1 +3. ✅ `_processWithContinuationLoop` - only used by #1 +4. ✅ `_buildContinuationIterationPrompt` - only used by #3 + +**Potentially Unused** (only used by dead code): +- ⚠️ `processDocumentsPerChunkJsonWithPrompt` - only caller is dead code, but function is general-purpose + +--- + +## Recommendations + +1. **Remove Dead Code Chain**: All four functions (`processDocumentsWithContinuation`, `_buildContinuationPrompt`, `_processWithContinuationLoop`, `_buildContinuationIterationPrompt`) can be safely removed. + +2. **For `processDocumentsPerChunkJsonWithPrompt`**: + - **Option A**: Remove if not needed (it's only used by the dead continuation chain) + - **Option B**: Keep if it might be useful for future custom prompt processing + - **Recommendation**: Since it's a general-purpose function that could be useful, keep it but note that it's currently unused. + +3. **If Keeping**: Document why this continuation logic exists but is unused, or mark as deprecated/legacy alternative to `_callAiWithLooping()`. + +--- + +## Verification Commands + +To verify these findings: + +```bash +# Search for actual function calls (should return no results for the main function) +grep -r "\.processDocumentsWithContinuation(" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup + +# Search for _buildContinuationPrompt usage (should only find the definition) +grep -r "_buildContinuationPrompt" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md" + +# Search for _processWithContinuationLoop usage (should only find the definition) +grep -r "_processWithContinuationLoop" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md" +``` + diff --git a/tool_security_encrypt_all_env_files.py b/tool_security_encrypt_all_env_files.py index df9cb71a..7e39e720 100644 --- a/tool_security_encrypt_all_env_files.py +++ b/tool_security_encrypt_all_env_files.py @@ -39,7 +39,7 @@ else: # Import encryption functions try: - from modules.shared.configuration import encrypt_value + from modules.shared.configuration import encryptValue except ImportError as e: print(f"Error: Could not import encryption functions from shared.configuration: {e}") print(f"Make sure you're running this script from the gateway directory") @@ -254,7 +254,7 @@ def encrypt_all_secrets_in_file(file_path: Path, dry_run: bool = False, create_b print(f" 🔐 Encrypting {key}...") # Encrypt the value using the environment type from the file - encrypted_value = encrypt_value(value, file_env_type) + encrypted_value = encryptValue(value, file_env_type) # Replace the line in the file content new_line = f"{key} = {encrypted_value}\n" diff --git a/tool_security_encrypt_config_value.py b/tool_security_encrypt_config_value.py index c807db66..ced8c7cc 100644 --- a/tool_security_encrypt_config_value.py +++ b/tool_security_encrypt_config_value.py @@ -30,7 +30,7 @@ from datetime import datetime # Add the modules directory to the Python path sys.path.insert(0, str(Path(__file__).parent / 'modules')) -from shared.configuration import encrypt_value, decrypt_value, _is_encrypted_value +from modules.shared.configuration import encryptValue, decryptValue, isEncryptedValue def get_env_type_from_file(file_path: Path) -> str: """ @@ -247,7 +247,7 @@ def encrypt_all_secrets_in_file(file_path: Path, env_type: str, dry_run: bool = print(f" 🔐 Encrypting {key}...") # Encrypt the value using the environment type from the file - encrypted_value = encrypt_value(value, file_env_type) + encrypted_value = encryptValue(value, file_env_type) # Replace the line in the file content new_line = f"{key} = {encrypted_value}\n" @@ -360,8 +360,8 @@ def main(): # Handle decryption if args.decrypt: - if _is_encrypted_value(args.decrypt): - decrypted = decrypt_value(args.decrypt) + if isEncryptedValue(args.decrypt): + decrypted = decryptValue(args.decrypt) print(f"Decrypted value: {decrypted}") else: print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)") @@ -411,7 +411,7 @@ def main(): return # Encrypt the value - encrypted_value = encrypt_value(value_to_encrypt, args.env) + encrypted_value = encryptValue(value_to_encrypt, args.env) print(f"\n✓ Encryption successful!") print(f"Environment: {args.env or 'current'}")