diff --git a/analyze_naming_violations.py b/analyze_naming_violations.py new file mode 100644 index 00000000..a4f9b30f --- /dev/null +++ b/analyze_naming_violations.py @@ -0,0 +1,242 @@ +""" +Script to analyze codebase for snake_case naming violations that should be camelStyle. +Excludes routes (decorated endpoint functions) and JSON field names. +""" +import ast +import os +import re +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Tuple +import csv + +# Patterns to exclude (external library interfaces, etc.) +EXCLUDE_PATTERNS = [ + r'@.*\.(get|post|put|delete|patch|options|head)', # FastAPI route decorators + r'self\.(db|db_|model|orm)', # Database ORM attributes + r'\.(objects|query|filter|get|all)', # ORM methods + r'(request|response|response_model|status_code)', # FastAPI params + r'(snake_case|kebab-case)', # String literals +] + +# External library attribute patterns (should not be changed) +EXTERNAL_LIB_ATTRIBUTES = { + 'pydantic', 'fastapi', 'sqlalchemy', 'psycopg', 'requests', + 'aiohttp', 'azure', 'google', 'openai', 'anthropic', 'reportlab', + 'docx', 'pptx', 'openpyxl', 'json', 'logging', 'datetime', 'typing' +} + +def isRouteFile(filePath: str) -> bool: + """Check if file is a route file""" + return 'routes' in filePath or 'route' in os.path.basename(filePath).lower() + +def shouldExcludeName(name: str, context: str = "") -> bool: + """Check if a name should be excluded from analysis""" + # Skip if it's a builtin or external library attribute + if name.startswith('__') and name.endswith('__'): + return True + + # Skip if context suggests external library usage + for pattern in EXCLUDE_PATTERNS: + if re.search(pattern, context, re.IGNORECASE): + return True + + return False + +def isSnakeCase(name: str) -> bool: + """Check if a name is snake_case""" + if not name or name.startswith('_'): + return False + # Check if contains underscore and is not all caps + return '_' in name and not name.isupper() + +def analyzeFile(filePath: str) -> Dict[str, List[str]]: + """Analyze a Python file for naming violations""" + violations = { + 'functions': [], + 'parameters': [], + 'variables': [] + } + + try: + with open(filePath, 'r', encoding='utf-8') as f: + content = f.read() + tree = ast.parse(content, filename=filePath) + except (SyntaxError, UnicodeDecodeError): + return violations + + # Track current context + currentClass = None + inRouteDecorator = False + + class NamingAnalyzer(ast.NodeVisitor): + def __init__(self): + self.violations = violations + self.currentClass = None + self.inRouteDecorator = False + self.functionDefs = [] + + def visit_FunctionDef(self, node): + # Check if this is a route endpoint (has FastAPI decorator) + isRouteEndpoint = False + for decorator in node.decorator_list: + if isinstance(decorator, ast.Attribute): + if decorator.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']: + isRouteEndpoint = True + break + elif isinstance(decorator, ast.Call): + if isinstance(decorator.func, ast.Attribute): + if decorator.func.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']: + isRouteEndpoint = True + break + + # Skip route endpoint function names + # But we still need to check their parameters and variables + funcName = node.name + if not isRouteEndpoint and isSnakeCase(funcName) and not shouldExcludeName(funcName): + self.violations['functions'].append(f"{funcName} (line {node.lineno})") + + # Analyze parameters + for arg in node.args.args: + if arg.arg != 'self' and arg.arg != 'cls': + paramName = arg.arg + if isSnakeCase(paramName) and not shouldExcludeName(paramName): + self.violations['parameters'].append(f"{paramName} in {funcName} (line {node.lineno})") + + # Analyze function body for local variables + for stmt in node.body: + self.visit(stmt) + + def visit_ClassDef(self, node): + oldClass = self.currentClass + self.currentClass = node.name + self.generic_visit(node) + self.currentClass = oldClass + + def visit_Assign(self, node): + for target in node.targets: + if isinstance(target, ast.Name): + varName = target.id + # Skip constants (ALL_CAPS), builtins, and private (_xxx) + if varName.isupper() or varName.startswith('_'): + continue + # Local variables should be camelStyle + if isSnakeCase(varName) and not shouldExcludeName(varName): + self.violations['variables'].append(f"{varName} (line {node.lineno})") + + def visit_For(self, node): + if isinstance(node.target, ast.Name): + varName = node.target.id + if isSnakeCase(varName) and not shouldExcludeName(varName): + self.violations['variables'].append(f"{varName} (line {node.lineno})") + self.generic_visit(node) + + def visit_With(self, node): + if node.items: + for item in node.items: + if item.optional_vars: + if isinstance(item.optional_vars, ast.Name): + varName = item.optional_vars.id + if isSnakeCase(varName) and not shouldExcludeName(varName): + self.violations['variables'].append(f"{varName} (line {node.lineno})") + self.generic_visit(node) + + analyzer = NamingAnalyzer() + analyzer.visit(tree) + + return violations + +def analyzeCodebase(rootDir: str = 'gateway') -> Dict[str, Dict[str, int]]: + """Analyze entire codebase""" + results = defaultdict(lambda: { + 'functions': 0, + 'parameters': 0, + 'variables': 0, + 'details': { + 'functions': [], + 'parameters': [], + 'variables': [] + } + }) + + # Handle both absolute and relative paths + rootPath = Path(rootDir) + if not rootPath.exists(): + # Try relative to current directory + rootPath = Path('.').resolve() / rootDir + if not rootPath.exists(): + # Try just current directory if we're already in gateway + rootPath = Path('.') + + # Find all Python files + for pyFile in rootPath.rglob('*.py'): + # Skip route files for function name analysis (but analyze their internals) + filePath = str(pyFile.relative_to(rootPath)) + + # Skip test files and special scripts + if 'test' in filePath.lower() or 'tool_' in filePath or '__pycache__' in filePath: + continue + + violations = analyzeFile(str(pyFile)) + + # Check if there are any violations + totalViolations = len(violations['functions']) + len(violations['parameters']) + len(violations['variables']) + if totalViolations > 0: + moduleName = filePath.replace('\\', '/') + results[moduleName]['functions'] = len(violations['functions']) + results[moduleName]['parameters'] = len(violations['parameters']) + results[moduleName]['variables'] = len(violations['variables']) + results[moduleName]['details'] = violations + + return results + +def generateCSV(results: Dict[str, Dict[str, int]], outputFile: str = 'naming_violations.csv'): + """Generate CSV report""" + with open(outputFile, 'w', newline='', encoding='utf-8') as f: + writer = csv.writer(f) + writer.writerow(['Module', 'Function Names', 'Parameter Names', 'Variable Names', 'Total']) + + # Sort by total violations + sortedResults = sorted( + results.items(), + key=lambda x: x[1]['functions'] + x[1]['parameters'] + x[1]['variables'], + reverse=True + ) + + rowsWritten = 0 + for module, stats in sortedResults: + total = stats['functions'] + stats['parameters'] + stats['variables'] + if total > 0: + writer.writerow([ + module, + stats['functions'], + stats['parameters'], + stats['variables'], + total + ]) + rowsWritten += 1 + + if rowsWritten == 0: + print("WARNING: No rows written to CSV despite finding violations!") + + print(f"CSV report generated: {outputFile}") + print(f"Total modules analyzed: {len(results)}") + + # Print summary + totalFuncs = sum(r['functions'] for r in results.values()) + totalParams = sum(r['parameters'] for r in results.values()) + totalVars = sum(r['variables'] for r in results.values()) + print(f"\nSummary:") + print(f" Function names: {totalFuncs}") + print(f" Parameter names: {totalParams}") + print(f" Variable names: {totalVars}") + print(f" Total violations: {totalFuncs + totalParams + totalVars}") + +if __name__ == '__main__': + print("Analyzing codebase for naming violations...") + results = analyzeCodebase('gateway') + + # Write CSV to gateway directory + outputPath = Path('gateway') / 'naming_violations_report.csv' + generateCSV(results, str(outputPath)) + diff --git a/app.py b/app.py index e91a7892..d5254f9c 100644 --- a/app.py +++ b/app.py @@ -24,45 +24,45 @@ class DailyRotatingFileHandler(RotatingFileHandler): """ def __init__( - self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs + self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs ): - self.log_dir = log_dir - self.filename_prefix = filename_prefix - self.current_date = None - self.current_file = None + self.logDir = logDir + self.filenamePrefix = filenamePrefix + self.currentDate = None + self.currentFile = None # Initialize with today's file - self._update_file_if_needed() + self._updateFileIfNeeded() # Call parent constructor with current file super().__init__( - self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs + self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs ) - def _update_file_if_needed(self): + def _updateFileIfNeeded(self): """Update the log file if the date has changed""" today = datetime.now().strftime("%Y%m%d") - if self.current_date != today: - self.current_date = today - new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log") + if self.currentDate != today: + self.currentDate = today + newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log") - if self.current_file != new_file: - self.current_file = new_file + if self.currentFile != newFile: + self.currentFile = newFile return True return False def emit(self, record): """Emit a log record, switching files if date has changed""" # Check if we need to switch to a new file - if self._update_file_if_needed(): + if self._updateFileIfNeeded(): # Close current file and open new one if self.stream: self.stream.close() self.stream = None # Update the baseFilename for the parent class - self.baseFilename = self.current_file + self.baseFilename = self.currentFile # Reopen the stream if not self.delay: self.stream = self._open() @@ -200,10 +200,10 @@ def initLogging(): backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5)) fileHandler = DailyRotatingFileHandler( - log_dir=logDir, - filename_prefix="log_app", - max_bytes=rotationSize, - backup_count=backupCount, + logDir=logDir, + filenamePrefix="log_app", + maxBytes=rotationSize, + backupCount=backupCount, encoding="utf-8", ) fileHandler.setFormatter(fileFormatter) @@ -252,7 +252,7 @@ def initLogging(): ) -def make_sqlalchemy_db_url() -> str: +def makeSqlalchemyDbUrl() -> str: host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost") port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432") db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway") @@ -299,17 +299,17 @@ app = FastAPI( # Configure OpenAPI security scheme for Swagger UI # This adds the "Authorize" button to the /docs page -security_scheme = HTTPBearer() +securityScheme = HTTPBearer() app.openapi_schema = None # Reset schema to regenerate with security -def custom_openapi(): +def customOpenapi(): if app.openapi_schema: return app.openapi_schema from fastapi.openapi.utils import get_openapi - openapi_schema = get_openapi( + openapiSchema = get_openapi( title=app.title, version="1.0.0", description=app.description, @@ -317,7 +317,7 @@ def custom_openapi(): ) # Add security scheme definition - openapi_schema["components"]["securitySchemes"] = { + openapiSchema["components"]["securitySchemes"] = { "BearerAuth": { "type": "http", "scheme": "bearer", @@ -328,20 +328,20 @@ def custom_openapi(): # Apply security globally to all endpoints # Individual endpoints can override this if needed - openapi_schema["security"] = [{"BearerAuth": []}] + openapiSchema["security"] = [{"BearerAuth": []}] - app.openapi_schema = openapi_schema + app.openapi_schema = openapiSchema return app.openapi_schema -app.openapi = custom_openapi +app.openapi = customOpenapi # Parse CORS origins from environment variable -def get_allowed_origins(): - origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080") +def getAllowedOrigins(): + originsStr = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080") # Split by comma and strip whitespace - origins = [origin.strip() for origin in origins_str.split(",")] + origins = [origin.strip() for origin in originsStr.split(",")] logger.info(f"CORS allowed origins: {origins}") return origins @@ -349,7 +349,7 @@ def get_allowed_origins(): # CORS configuration using environment variables app.add_middleware( CORSMiddleware, - allow_origins=get_allowed_origins(), + allow_origins=getAllowedOrigins(), allow_credentials=True, allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], allow_headers=["*"], diff --git a/modules/connectors/connectorDbJson.py b/modules/connectors/connectorDbJson.py index 999814db..9ad73e8c 100644 --- a/modules/connectors/connectorDbJson.py +++ b/modules/connectors/connectorDbJson.py @@ -7,7 +7,7 @@ from pydantic import BaseModel import threading import time -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp logger = logging.getLogger(__name__) @@ -232,7 +232,7 @@ class DatabaseConnector: raise ValueError(f"Record ID mismatch: file name ID ({recordId}) does not match record ID ({record['id']})") # Add metadata - currentTime = get_utc_timestamp() + currentTime = getUtcTimestamp() if "_createdAt" not in record: record["_createdAt"] = currentTime record["_createdBy"] = self.userId diff --git a/modules/connectors/connectorDbPostgre.py b/modules/connectors/connectorDbPostgre.py index ef937a7c..58d17b66 100644 --- a/modules/connectors/connectorDbPostgre.py +++ b/modules/connectors/connectorDbPostgre.py @@ -6,7 +6,7 @@ import uuid from pydantic import BaseModel, Field import threading -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp from modules.shared.configuration import APP_CONFIG logger = logging.getLogger(__name__) @@ -287,7 +287,7 @@ class DatabaseConnector: INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt") VALUES (%s, %s, %s) """, - (table_name, initial_id, get_utc_timestamp()), + (table_name, initial_id, getUtcTimestamp()), ) self.connection.commit() @@ -611,7 +611,7 @@ class DatabaseConnector: raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}") # Add metadata - currentTime = get_utc_timestamp() + currentTime = getUtcTimestamp() if "_createdAt" not in record: record["_createdAt"] = currentTime record["_createdBy"] = self.userId diff --git a/modules/connectors/connectorTicketsClickup.py b/modules/connectors/connectorTicketsClickup.py index 7d92f54a..6d22c93b 100644 --- a/modules/connectors/connectorTicketsClickup.py +++ b/modules/connectors/connectorTicketsClickup.py @@ -32,7 +32,7 @@ class ConnectorTicketClickup(TicketBase): "Content-Type": "application/json", } - async def read_attributes(self) -> list[TicketFieldAttribute]: + async def readAttributes(self) -> list[TicketFieldAttribute]: """Fetch field attributes. Uses list custom fields if listId provided; else basic fields.""" attributes: list[TicketFieldAttribute] = [] try: @@ -65,7 +65,7 @@ class ConnectorTicketClickup(TicketBase): logger.error(f"ClickUp read_attributes error: {e}") return attributes - async def read_tasks(self, *, limit: int = 0) -> list[dict]: + async def readTasks(self, *, limit: int = 0) -> list[dict]: """Read tasks from ClickUp, always returning full task records. If list_id is set, read from that list; otherwise read from team. """ @@ -102,7 +102,7 @@ class ConnectorTicketClickup(TicketBase): logger.error(f"ClickUp read_tasks error: {e}") return tasks - async def write_tasks(self, tasklist: list[dict]) -> None: + async def writeTasks(self, tasklist: list[dict]) -> None: """Update tasks in ClickUp. Expects each item to contain {'ID' or 'id' or 'task_id', 'fields': {...}}""" try: async with aiohttp.ClientSession() as session: diff --git a/modules/connectors/connectorTicketsJira.py b/modules/connectors/connectorTicketsJira.py index eb665036..27fbc676 100644 --- a/modules/connectors/connectorTicketsJira.py +++ b/modules/connectors/connectorTicketsJira.py @@ -29,7 +29,7 @@ class ConnectorTicketJira(TicketBase): self.ticketType = ticketType - async def read_attributes(self) -> list[TicketFieldAttribute]: + async def readAttributes(self) -> list[TicketFieldAttribute]: """ Read field attributes from Jira by querying for a single issue and extracting the field mappings. @@ -130,7 +130,7 @@ class ConnectorTicketJira(TicketBase): logger.error(f"Error while calling fields API: {str(e)}") return [] - async def read_tasks(self, *, limit: int = 0) -> list[dict]: + async def readTasks(self, *, limit: int = 0) -> list[dict]: """ Read tasks from Jira with pagination support. @@ -253,7 +253,7 @@ class ConnectorTicketJira(TicketBase): logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}") raise - async def write_tasks(self, tasklist: list[dict]) -> None: + async def writeTasks(self, tasklist: list[dict]) -> None: """ Write/update tasks to Jira. diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py index 2bf4aff3..5cb43f35 100644 --- a/modules/connectors/connectorVoiceGoogle.py +++ b/modules/connectors/connectorVoiceGoogle.py @@ -26,18 +26,18 @@ class ConnectorGoogleSpeech: """ try: # Get JSON key from config.ini - api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET") + apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET") - if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE": + if not apiKey or apiKey == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE": raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key") # Parse the JSON key and set up authentication try: - credentials_info = json.loads(api_key) + credentialsInfo = json.loads(apiKey) # Create credentials object directly (no file needed!) from google.oauth2 import service_account - credentials = service_account.Credentials.from_service_account_info(credentials_info) + credentials = service_account.Credentials.from_service_account_info(credentialsInfo) logger.info("✅ Using Google Speech credentials from config.ini") @@ -55,8 +55,8 @@ class ConnectorGoogleSpeech: logger.error(f"❌ Failed to initialize Google Cloud clients: {e}") raise - async def speech_to_text(self, audio_content: bytes, language: str = "de-DE", - sample_rate: int = None, channels: int = None) -> Dict: + async def speech_to_text(self, audioContent: bytes, language: str = "de-DE", + sampleRate: int = None, channels: int = None) -> Dict: """ Convert speech to text using Google Cloud Speech-to-Text API. @@ -71,8 +71,8 @@ class ConnectorGoogleSpeech: """ try: # Auto-detect audio format if not provided - if sample_rate is None or channels is None: - validation = self.validate_audio_format(audio_content) + if sampleRate is None or channels is None: + validation = self.validate_audio_format(audioContent) if not validation["valid"]: return { "success": False, @@ -80,59 +80,59 @@ class ConnectorGoogleSpeech: "confidence": 0.0, "error": f"Invalid audio format: {validation.get('error', 'Unknown error')}" } - sample_rate = validation["sample_rate"] + sampleRate = validation["sample_rate"] channels = validation["channels"] - audio_format = validation["format"] - logger.info(f"Auto-detected audio: {audio_format}, {sample_rate}Hz, {channels}ch") + audioFormat = validation["format"] + logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch") logger.info(f"Processing audio with Google Cloud Speech-to-Text") - logger.info(f"Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch") + logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch") # Configure audio settings - audio = speech.RecognitionAudio(content=audio_content) + audio = speech.RecognitionAudio(content=audioContent) # Determine encoding based on detected format # Google Cloud Speech API has specific requirements for different formats - if audio_format == "webm_opus": + if audioFormat == "webm_opus": # For WEBM OPUS, we need to ensure proper format encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS # WEBM_OPUS requires specific sample rate handling - must match header - if sample_rate != 48000: - logger.warning(f"WEBM_OPUS detected but sample rate is {sample_rate}, adjusting to 48000") - sample_rate = 48000 + if sampleRate != 48000: + logger.warning(f"WEBM_OPUS detected but sample rate is {sampleRate}, adjusting to 48000") + sampleRate = 48000 # For WEBM_OPUS, don't specify sample_rate_hertz in config # Google Cloud will read it from the WEBM header - use_sample_rate = False - elif audio_format == "linear16": + useSampleRate = False + elif audioFormat == "linear16": # For LINEAR16 format (PCM) encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 # Ensure sample rate is reasonable - if sample_rate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]: - logger.warning(f"Unusual sample rate {sample_rate}, adjusting to 16000") - sample_rate = 16000 - use_sample_rate = True - elif audio_format == "mp3": + if sampleRate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]: + logger.warning(f"Unusual sample rate {sampleRate}, adjusting to 16000") + sampleRate = 16000 + useSampleRate = True + elif audioFormat == "mp3": # For MP3 format encoding = speech.RecognitionConfig.AudioEncoding.MP3 - use_sample_rate = True - elif audio_format == "flac": + useSampleRate = True + elif audioFormat == "flac": # For FLAC format encoding = speech.RecognitionConfig.AudioEncoding.FLAC - use_sample_rate = True - elif audio_format == "wav": + useSampleRate = True + elif audioFormat == "wav": # For WAV format encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 - use_sample_rate = True + useSampleRate = True else: # For unknown formats, try LINEAR16 as fallback encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 - sample_rate = 16000 # Use standard sample rate + sampleRate = 16000 # Use standard sample rate channels = 1 # Use mono - use_sample_rate = True - logger.warning(f"Unknown audio format '{audio_format}', using LINEAR16 encoding with 16000Hz") + useSampleRate = True + logger.warning(f"Unknown audio format '{audioFormat}', using LINEAR16 encoding with 16000Hz") # Build config based on format requirements - config_params = { + configParams = { "encoding": encoding, "audio_channel_count": channels, "language_code": language, @@ -145,13 +145,13 @@ class ConnectorGoogleSpeech: } # Only add sample_rate_hertz if needed (not for WEBM_OPUS) - if use_sample_rate: - config_params["sample_rate_hertz"] = sample_rate - logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sample_rate}, channels={channels}, language={language}") + if useSampleRate: + configParams["sample_rate_hertz"] = sampleRate + logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sampleRate}, channels={channels}, language={language}") else: logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}") - config = speech.RecognitionConfig(**config_params) + config = speech.RecognitionConfig(**configParams) # Perform speech recognition logger.info("Sending audio to Google Cloud Speech-to-Text...") @@ -162,12 +162,12 @@ class ConnectorGoogleSpeech: response = self.speech_client.recognize(config=config, audio=audio) logger.debug(f"Google Cloud response: {response}") - except Exception as api_error: - logger.error(f"Google Cloud API error: {api_error}") + except Exception as apiError: + logger.error(f"Google Cloud API error: {apiError}") # Try with different encoding as fallback if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16: logger.info("Trying fallback with LINEAR16 encoding...") - fallback_config = speech.RecognitionConfig( + fallbackConfig = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, # Use standard sample rate audio_channel_count=1, @@ -177,13 +177,13 @@ class ConnectorGoogleSpeech: ) try: - response = self.speech_client.recognize(config=fallback_config, audio=audio) + response = self.speech_client.recognize(config=fallbackConfig, audio=audio) logger.debug(f"Google Cloud fallback response: {response}") - except Exception as fallback_error: - logger.error(f"Google Cloud fallback error: {fallback_error}") - raise api_error + except Exception as fallbackError: + logger.error(f"Google Cloud fallback error: {fallbackError}") + raise apiError else: - raise api_error + raise apiError # Process results if response.results: @@ -234,18 +234,18 @@ class ConnectorGoogleSpeech: if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16: # For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts - if audio_format != "webm_opus": + if audioFormat != "webm_opus": # Try LINEAR16 with detected sample rate for non-WEBM formats fallback_configs.append({ "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16, - "sample_rate": sample_rate, + "sample_rate": sampleRate, "channels": channels, "use_sample_rate": True, - "description": f"LINEAR16 with {sample_rate}Hz" + "description": f"LINEAR16 with {sampleRate}Hz" }) # For WEBM_OPUS, only try compatible sample rates or skip sample rate specification - if audio_format == "webm_opus": + if audioFormat == "webm_opus": # Try WEBM_OPUS without sample rate specification (let Google read from header) fallback_configs.append({ "encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS, @@ -273,7 +273,7 @@ class ConnectorGoogleSpeech: else: # For other formats, try standard sample rates for std_rate in [16000, 8000, 22050, 44100]: - if std_rate != sample_rate: + if std_rate != sampleRate: fallback_configs.append({ "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16, "sample_rate": std_rate, diff --git a/modules/datamodels/datamodelChat.py b/modules/datamodels/datamodelChat.py index 93f8bf62..3c03e64b 100644 --- a/modules/datamodels/datamodelChat.py +++ b/modules/datamodels/datamodelChat.py @@ -3,8 +3,8 @@ from typing import List, Dict, Any, Optional from enum import Enum from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp import uuid @@ -26,7 +26,7 @@ class ChatStat(BaseModel): priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation") -register_model_labels( +registerModelLabels( "ChatStat", {"en": "Chat Statistics", "fr": "Statistiques de chat"}, { @@ -51,7 +51,7 @@ class ChatLog(BaseModel): message: str = Field(description="Log message") type: str = Field(description="Log type (info, warning, error, etc.)") timestamp: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="When the log entry was created (UTC timestamp in seconds)", ) status: Optional[str] = Field(None, description="Status of the log entry") @@ -63,7 +63,7 @@ class ChatLog(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatLog", {"en": "Chat Log", "fr": "Journal de chat"}, { @@ -96,7 +96,7 @@ class ChatDocument(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatDocument", {"en": "Chat Document", "fr": "Document de chat"}, { @@ -133,7 +133,7 @@ class ContentMetadata(BaseModel): base64Encoded: bool = Field(description="Whether the data is base64 encoded") -register_model_labels( +registerModelLabels( "ContentMetadata", {"en": "Content Metadata", "fr": "Métadonnées du contenu"}, { @@ -157,7 +157,7 @@ class ContentItem(BaseModel): metadata: ContentMetadata = Field(description="Content metadata") -register_model_labels( +registerModelLabels( "ContentItem", {"en": "Content Item", "fr": "Élément de contenu"}, { @@ -175,7 +175,7 @@ class ChatContentExtracted(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatContentExtracted", {"en": "Extracted Content", "fr": "Contenu extrait"}, { @@ -209,7 +209,7 @@ class ChatMessage(BaseModel): description="Sequence number of the message (set automatically)" ) publishedAt: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="When the message was published (UTC timestamp in seconds)", ) success: Optional[bool] = Field( @@ -235,7 +235,7 @@ class ChatMessage(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatMessage", {"en": "Chat Message", "fr": "Message de chat"}, { @@ -331,14 +331,14 @@ class ChatWorkflow(BaseModel): frontend_required=False, ) lastActivity: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="Timestamp of last activity (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False, ) startedAt: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="When the workflow started (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, @@ -395,7 +395,7 @@ class ChatWorkflow(BaseModel): ) -register_model_labels( +registerModelLabels( "ChatWorkflow", {"en": "Chat Workflow", "fr": "Flux de travail de chat"}, { @@ -426,7 +426,7 @@ class UserInputRequest(BaseModel): userLanguage: str = Field(default="en", description="User's preferred language") -register_model_labels( +registerModelLabels( "UserInputRequest", {"en": "User Input Request", "fr": "Demande de saisie utilisateur"}, { @@ -445,7 +445,7 @@ class ActionDocument(BaseModel): mimeType: str = Field(description="MIME type of the document") -register_model_labels( +registerModelLabels( "ActionDocument", {"en": "Action Document", "fr": "Document d'action"}, { @@ -485,7 +485,7 @@ class ActionResult(BaseModel): return cls(success=False, documents=documents or [], error=error) -register_model_labels( +registerModelLabels( "ActionResult", {"en": "Action Result", "fr": "Résultat de l'action"}, { @@ -504,7 +504,7 @@ class ActionSelection(BaseModel): ) -register_model_labels( +registerModelLabels( "ActionSelection", {"en": "Action Selection", "fr": "Sélection d'action"}, { @@ -520,7 +520,7 @@ class ActionParameters(BaseModel): ) -register_model_labels( +registerModelLabels( "ActionParameters", {"en": "Action Parameters", "fr": "Paramètres d'action"}, { @@ -535,7 +535,7 @@ class ObservationPreview(BaseModel): snippet: str = Field(description="Short snippet or summary") -register_model_labels( +registerModelLabels( "ObservationPreview", {"en": "Observation Preview", "fr": "Aperçu d'observation"}, { @@ -558,7 +558,7 @@ class Observation(BaseModel): ) -register_model_labels( +registerModelLabels( "Observation", {"en": "Observation", "fr": "Observation"}, { @@ -579,7 +579,7 @@ class TaskStatus(str, Enum): CANCELLED = "cancelled" -register_model_labels( +registerModelLabels( "TaskStatus", {"en": "Task Status", "fr": "Statut de la tâche"}, { @@ -599,7 +599,7 @@ class DocumentExchange(BaseModel): ) -register_model_labels( +registerModelLabels( "DocumentExchange", {"en": "Document Exchange", "fr": "Échange de documents"}, { @@ -650,7 +650,7 @@ class ActionItem(BaseModel): self.error = error_message -register_model_labels( +registerModelLabels( "ActionItem", {"en": "Task Action", "fr": "Action de tâche"}, { @@ -683,7 +683,7 @@ class TaskResult(BaseModel): error: Optional[str] = Field(None, description="Error message if task failed") -register_model_labels( +registerModelLabels( "TaskResult", {"en": "Task Result", "fr": "Résultat de tâche"}, { @@ -728,7 +728,7 @@ class TaskItem(BaseModel): ) -register_model_labels( +registerModelLabels( "TaskItem", {"en": "Task", "fr": "Tâche"}, { @@ -758,7 +758,7 @@ class TaskStep(BaseModel): ) -register_model_labels( +registerModelLabels( "TaskStep", {"en": "Task Step", "fr": "Étape de tâche"}, { @@ -805,7 +805,7 @@ class TaskHandover(BaseModel): ) -register_model_labels( +registerModelLabels( "TaskHandover", {"en": "Task Handover", "fr": "Transfert de tâche"}, { @@ -879,7 +879,7 @@ class ReviewResult(BaseModel): ) -register_model_labels( +registerModelLabels( "ReviewResult", {"en": "Review Result", "fr": "Résultat de l'évaluation"}, { @@ -904,7 +904,7 @@ class TaskPlan(BaseModel): ) -register_model_labels( +registerModelLabels( "TaskPlan", {"en": "Task Plan", "fr": "Plan de tâches"}, { @@ -927,7 +927,7 @@ class PromptPlaceholder(BaseModel): ) -register_model_labels( +registerModelLabels( "PromptPlaceholder", {"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"}, { @@ -943,7 +943,7 @@ class PromptBundle(BaseModel): placeholders: List[PromptPlaceholder] = Field(default_factory=list) -register_model_labels( +registerModelLabels( "PromptBundle", {"en": "Prompt Bundle", "fr": "Lot d'invite"}, { diff --git a/modules/datamodels/datamodelDocument.py b/modules/datamodels/datamodelDocument.py index a437b6f1..33472130 100644 --- a/modules/datamodels/datamodelDocument.py +++ b/modules/datamodels/datamodelDocument.py @@ -81,11 +81,11 @@ class StructuredDocument(BaseModel): summary: Optional[str] = Field(default=None, description="Document summary") tags: List[str] = Field(default_factory=list, description="Document tags") - def get_sections_by_type(self, content_type: str) -> List[DocumentSection]: + def getSectionsByType(self, content_type: str) -> List[DocumentSection]: """Get all sections of a specific content type.""" return [section for section in self.sections if section.content_type == content_type] - def get_all_tables(self) -> List[TableData]: + def getAllTables(self) -> List[TableData]: """Get all table data from the document.""" tables = [] for section in self.sections: @@ -94,7 +94,7 @@ class StructuredDocument(BaseModel): tables.append(element) return tables - def get_all_lists(self) -> List[BulletList]: + def getAllLists(self) -> List[BulletList]: """Get all lists from the document.""" lists = [] for section in self.sections: diff --git a/modules/datamodels/datamodelFiles.py b/modules/datamodels/datamodelFiles.py index 6dcb3a0e..e1f802b7 100644 --- a/modules/datamodels/datamodelFiles.py +++ b/modules/datamodels/datamodelFiles.py @@ -2,8 +2,8 @@ from typing import Dict, Any, Optional, Union from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp import uuid import base64 @@ -15,9 +15,9 @@ class FileItem(BaseModel): mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False) fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False) fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False) - creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) -register_model_labels( +registerModelLabels( "FileItem", {"en": "File Item", "fr": "Élément de fichier"}, { @@ -45,7 +45,7 @@ class FilePreview(BaseModel): if isinstance(data.get("content"), bytes): data["content"] = base64.b64encode(data["content"]).decode("utf-8") return data -register_model_labels( +registerModelLabels( "FilePreview", {"en": "File Preview", "fr": "Aperçu du fichier"}, { @@ -62,7 +62,7 @@ class FileData(BaseModel): id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") data: str = Field(description="File data content") base64Encoded: bool = Field(description="Whether the data is base64 encoded") -register_model_labels( +registerModelLabels( "FileData", {"en": "File Data", "fr": "Données de fichier"}, { diff --git a/modules/datamodels/datamodelJson.py b/modules/datamodels/datamodelJson.py new file mode 100644 index 00000000..0af89375 --- /dev/null +++ b/modules/datamodels/datamodelJson.py @@ -0,0 +1,90 @@ +""" +Unified JSON document schema and helpers used by both generation prompts and renderers. + +This defines a single canonical template and the supported section types. +""" + +from typing import List + +# Canonical list of supported section types across the system +supportedSectionTypes: List[str] = [ + "table", + "bullet_list", + "heading", + "paragraph", + "code_block", + "image", +] + +# Canonical JSON template used for AI generation (documents array + sections) +# Rendering pipelines can select the first document and read its sections. +jsonTemplateDocument: str = """{ + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }, + "documents": [ + { + "id": "doc_1", + "title": "{{DOCUMENT_TITLE}}", + "filename": "document.json", + "sections": [ + { + "id": "section_heading_example", + "content_type": "heading", + "elements": [ + {"level": 1, "text": "Heading Text"} + ], + "order": 0 + }, + { + "id": "section_paragraph_example", + "content_type": "paragraph", + "elements": [ + {"text": "Paragraph text content"} + ], + "order": 0 + }, + { + "id": "section_bullet_list_example", + "content_type": "bullet_list", + "elements": [ + { + "items": ["Item 1", "Item 2"] + } + ], + "order": 0 + }, + { + "id": "section_table_example", + "content_type": "table", + "elements": [ + { + "headers": ["Column 1", "Column 2"], + "rows": [ + ["Row 1 Col 1", "Row 1 Col 2"], + ["Row 2 Col 1", "Row 2 Col 2"] + ], + "caption": "Table caption" + } + ], + "order": 0 + }, + { + "id": "section_code_example", + "content_type": "code_block", + "elements": [ + { + "code": "function example() { return true; }", + "language": "javascript" + } + ], + "order": 0 + } + ] + } + ] +}""" + + diff --git a/modules/datamodels/datamodelNeutralizer.py b/modules/datamodels/datamodelNeutralizer.py index 93f751e5..60894dff 100644 --- a/modules/datamodels/datamodelNeutralizer.py +++ b/modules/datamodels/datamodelNeutralizer.py @@ -3,7 +3,7 @@ import uuid from typing import Optional from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels +from modules.shared.attributeUtils import registerModelLabels class DataNeutraliserConfig(BaseModel): @@ -14,7 +14,7 @@ class DataNeutraliserConfig(BaseModel): namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False) sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False) sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False) -register_model_labels( +registerModelLabels( "DataNeutraliserConfig", {"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"}, { @@ -35,7 +35,7 @@ class DataNeutralizerAttributes(BaseModel): originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True) fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True) -register_model_labels( +registerModelLabels( "DataNeutralizerAttributes", {"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"}, { diff --git a/modules/datamodels/datamodelSecurity.py b/modules/datamodels/datamodelSecurity.py index cb629a01..42b9a1ad 100644 --- a/modules/datamodels/datamodelSecurity.py +++ b/modules/datamodels/datamodelSecurity.py @@ -2,8 +2,8 @@ from typing import Optional from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp from .datamodelUam import AuthAuthority from enum import Enum import uuid @@ -51,7 +51,7 @@ class Token(BaseModel): use_enum_values = True -register_model_labels( +registerModelLabels( "Token", {"en": "Token", "fr": "Jeton"}, { @@ -95,7 +95,7 @@ class AuthEvent(BaseModel): frontend_required=True, ) timestamp: float = Field( - default_factory=get_utc_timestamp, + default_factory=getUtcTimestamp, description="Unix timestamp when the event occurred", frontend_type="datetime", frontend_readonly=True, @@ -131,7 +131,7 @@ class AuthEvent(BaseModel): ) -register_model_labels( +registerModelLabels( "AuthEvent", {"en": "Authentication Event", "fr": "Événement d'authentification"}, { diff --git a/modules/datamodels/datamodelTickets.py b/modules/datamodels/datamodelTickets.py index 40478bc6..5d800b15 100644 --- a/modules/datamodels/datamodelTickets.py +++ b/modules/datamodels/datamodelTickets.py @@ -11,12 +11,12 @@ class TicketFieldAttribute(BaseModel): class TicketBase(ABC): @abstractmethod - async def read_attributes(self) -> list[TicketFieldAttribute]: ... + async def readAttributes(self) -> list[TicketFieldAttribute]: ... @abstractmethod - async def read_tasks(self, *, limit: int = 0) -> list[dict]: ... + async def readTasks(self, *, limit: int = 0) -> list[dict]: ... @abstractmethod - async def write_tasks(self, tasklist: list[dict]) -> None: ... + async def writeTasks(self, tasklist: list[dict]) -> None: ... diff --git a/modules/datamodels/datamodelUam.py b/modules/datamodels/datamodelUam.py index 747bfc27..0bf71fa9 100644 --- a/modules/datamodels/datamodelUam.py +++ b/modules/datamodels/datamodelUam.py @@ -4,8 +4,8 @@ import uuid from typing import Optional from enum import Enum from pydantic import BaseModel, Field, EmailStr -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp class AuthAuthority(str, Enum): @@ -34,7 +34,7 @@ class Mandate(BaseModel): {"value": "it", "label": {"en": "Italiano", "fr": "Italien"}}, ]) enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) -register_model_labels( +registerModelLabels( "Mandate", {"en": "Mandate", "fr": "Mandat"}, { @@ -62,8 +62,8 @@ class UserConnection(BaseModel): {"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}}, {"value": "pending", "label": {"en": "Pending", "fr": "En attente"}}, ]) - connectedAt: float = Field(default_factory=get_utc_timestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) - lastChecked: float = Field(default_factory=get_utc_timestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + connectedAt: float = Field(default_factory=getUtcTimestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + lastChecked: float = Field(default_factory=getUtcTimestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[ {"value": "active", "label": {"en": "Active", "fr": "Actif"}}, @@ -71,7 +71,7 @@ class UserConnection(BaseModel): {"value": "none", "label": {"en": "None", "fr": "Aucun"}}, ]) tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) -register_model_labels( +registerModelLabels( "UserConnection", {"en": "User Connection", "fr": "Connexion utilisateur"}, { @@ -113,7 +113,7 @@ class User(BaseModel): {"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}}, ]) mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) -register_model_labels( +registerModelLabels( "User", {"en": "User", "fr": "Utilisateur"}, { @@ -131,7 +131,7 @@ register_model_labels( class UserInDB(User): hashedPassword: Optional[str] = Field(None, description="Hash of the user password") -register_model_labels( +registerModelLabels( "UserInDB", {"en": "User Access", "fr": "Accès de l'utilisateur"}, {"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}}, diff --git a/modules/datamodels/datamodelUtils.py b/modules/datamodels/datamodelUtils.py index c928cd47..67a42534 100644 --- a/modules/datamodels/datamodelUtils.py +++ b/modules/datamodels/datamodelUtils.py @@ -1,7 +1,7 @@ """Utility datamodels: Prompt.""" from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels +from modules.shared.attributeUtils import registerModelLabels import uuid @@ -10,7 +10,7 @@ class Prompt(BaseModel): mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True) name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True) -register_model_labels( +registerModelLabels( "Prompt", {"en": "Prompt", "fr": "Invite"}, { diff --git a/modules/datamodels/datamodelVoice.py b/modules/datamodels/datamodelVoice.py index 8be43b58..6ecdd857 100644 --- a/modules/datamodels/datamodelVoice.py +++ b/modules/datamodels/datamodelVoice.py @@ -1,8 +1,8 @@ """Voice settings datamodel.""" from pydantic import BaseModel, Field -from modules.shared.attributeUtils import register_model_labels -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.attributeUtils import registerModelLabels +from modules.shared.timezoneUtils import getUtcTimestamp import uuid @@ -15,11 +15,11 @@ class VoiceSettings(BaseModel): ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True) translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False) - creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) - lastModified: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) + lastModified: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) -register_model_labels( +registerModelLabels( "VoiceSettings", {"en": "Voice Settings", "fr": "Paramètres vocaux"}, { diff --git a/modules/features/neutralizePlayground/mainNeutralizePlayground.py b/modules/features/neutralizePlayground/mainNeutralizePlayground.py index 52d6a7ce..df486c8f 100644 --- a/modules/features/neutralizePlayground/mainNeutralizePlayground.py +++ b/modules/features/neutralizePlayground/mainNeutralizePlayground.py @@ -43,10 +43,6 @@ class NeutralizationPlayground: 'errors': errors, } - async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]: - from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService - processor = SharepointProcessor(self.currentUser, self.services) - return await processor.processSharepointFiles(sourcePath, targetPath) # Cleanup attributes def cleanAttributes(self, fileId: str) -> bool: @@ -77,49 +73,51 @@ class NeutralizationPlayground: } # Additional methods needed by the route - def get_config(self) -> Optional[DataNeutraliserConfig]: + def getConfig(self) -> Optional[DataNeutraliserConfig]: """Get neutralization configuration""" return self.services.neutralization.getConfig() - def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig: + def saveConfig(self, configData: Dict[str, Any]) -> DataNeutraliserConfig: """Save neutralization configuration""" - return self.services.neutralization.saveConfig(config_data) + return self.services.neutralization.saveConfig(configData) - def neutralize_text(self, text: str, file_id: str = None) -> Dict[str, Any]: + def neutralizeText(self, text: str, fileId: str = None) -> Dict[str, Any]: """Neutralize text content""" return self.services.neutralization.processText(text) - def resolve_text(self, text: str) -> str: + def resolveText(self, text: str) -> str: """Resolve UIDs in neutralized text back to original text""" return self.services.neutralization.resolveText(text) - def get_attributes(self, file_id: str = None) -> List[DataNeutralizerAttributes]: + def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]: """Get neutralization attributes, optionally filtered by file ID""" try: - all_attributes = self.services.neutralization.getAttributes() - if file_id: - return [attr for attr in all_attributes if attr.fileId == file_id] - return all_attributes + allAttributes = self.services.neutralization.getAttributes() + if fileId: + return [attr for attr in allAttributes if attr.fileId == fileId] + return allAttributes except Exception as e: logger.error(f"Error getting attributes: {str(e)}") return [] - async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]: + async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]: """Process files from SharePoint source path and store neutralized files in target path""" - return await self.processSharepointFiles(source_path, target_path) + from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService + processor = SharepointProcessor(self.currentUser, self.services) + return await processor.processSharepointFiles(sourcePath, targetPath) - def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]: + def batchNeutralizeFiles(self, filesData: List[Dict[str, Any]]) -> Dict[str, Any]: """Process multiple files for neutralization""" - file_ids = [file_data.get('fileId') for file_data in files_data if file_data.get('fileId')] - return self.processFiles(file_ids) + fileIds = [fileData.get('fileId') for fileData in filesData if fileData.get('fileId')] + return self.processFiles(fileIds) - def get_processing_stats(self) -> Dict[str, Any]: + def getProcessingStats(self) -> Dict[str, Any]: """Get neutralization processing statistics""" return self.getStats() - def cleanup_file_attributes(self, file_id: str) -> bool: + def cleanupFileAttributes(self, fileId: str) -> bool: """Clean up neutralization attributes for a specific file""" - return self.cleanAttributes(file_id) + return self.cleanAttributes(fileId) # Internal SharePoint helper module separated to keep feature logic tidy @@ -208,7 +206,7 @@ class SharepointProcessor: siteUrl, _ = self._parseSharepointPath(sharepointPath) if not siteUrl: return False - siteInfo = await self.services.sharepoint.find_site_by_web_url(siteUrl) + siteInfo = await self.services.sharepoint.findSiteByWebUrl(siteUrl) return siteInfo is not None except Exception: return False @@ -219,17 +217,17 @@ class SharepointProcessor: targetSite, targetFolder = self._parseSharepointPath(targetPath) if not sourceSite or not targetSite: return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']} - sourceSiteInfo = await self.services.sharepoint.find_site_by_web_url(sourceSite) + sourceSiteInfo = await self.services.sharepoint.findSiteByWebUrl(sourceSite) if not sourceSiteInfo: return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']} - targetSiteInfo = await self.services.sharepoint.find_site_by_web_url(targetSite) + targetSiteInfo = await self.services.sharepoint.findSiteByWebUrl(targetSite) if not targetSiteInfo: return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']} logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}") - files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], sourceFolder) + files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], sourceFolder) if not files: logger.warning(f"No files found in folder '{sourceFolder}', trying root folder") - files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], '') + files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], '') if files: folders = [f for f in files if f.get('type') == 'folder'] folderNames = [f.get('name') for f in folders] @@ -251,7 +249,7 @@ class SharepointProcessor: async def _processSingle(fileInfo: Dict[str, Any]): try: - fileContent = await self.services.sharepoint.download_file(sourceSiteInfo['id'], fileInfo['id']) + fileContent = await self.services.sharepoint.downloadFile(sourceSiteInfo['id'], fileInfo['id']) if not fileContent: return {'error': f"Failed to download file: {fileInfo['name']}"} try: @@ -260,7 +258,7 @@ class SharepointProcessor: textContent = fileContent.decode('latin-1') result = self.services.neutralization.processText(textContent) neutralizedFilename = f"neutralized_{fileInfo['name']}" - uploadResult = await self.services.sharepoint.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8')) + uploadResult = await self.services.sharepoint.uploadFile(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8')) if 'error' in uploadResult: return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"} return { diff --git a/modules/features/syncDelta/mainSyncDelta.py b/modules/features/syncDelta/mainSyncDelta.py index fa8c1f93..3fc9e7af 100644 --- a/modules/features/syncDelta/mainSyncDelta.py +++ b/modules/features/syncDelta/mainSyncDelta.py @@ -204,9 +204,9 @@ class ManagerSyncDelta: logger.info( f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}" ) - resolved = await self.services.sharepoint.find_site_by_url( + resolved = await self.services.sharepoint.findSiteByUrl( hostname=self.SHAREPOINT_HOSTNAME, - site_path=self.SHAREPOINT_SITE_PATH + sitePath=self.SHAREPOINT_SITE_PATH ) if not resolved: @@ -223,9 +223,9 @@ class ManagerSyncDelta: # Test site access by listing root of the drive logger.info("Testing site access using resolved site ID...") - test_result = await self.services.sharepoint.list_folder_contents( - site_id=self.targetSite["id"], - folder_path="" + test_result = await self.services.sharepoint.listFolderContents( + siteId=self.targetSite["id"], + folderPath="" ) if test_result is not None: @@ -293,8 +293,8 @@ class ManagerSyncDelta: existing_headers = {"header1": "Header 1", "header2": "Header 2"} try: file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}" - excel_content = await self.services.sharepoint.download_file_by_path( - site_id=self.targetSite['id'], file_path=file_path + excel_content = await self.services.sharepoint.downloadFileByPath( + siteId=self.targetSite['id'], filePath=file_path ) existing_data, existing_headers = self.parseExcelContent(excel_content) except Exception: @@ -307,16 +307,16 @@ class ManagerSyncDelta: await self.backupSharepointFile(filename=sync_file_name) excel_bytes = self.createExcelContent(merged_data, existing_headers) - await self.services.sharepoint.upload_file( - site_id=self.targetSite['id'], - folder_path=self.SHAREPOINT_MAIN_FOLDER, - file_name=sync_file_name, + await self.services.sharepoint.uploadFile( + siteId=self.targetSite['id'], + folderPath=self.SHAREPOINT_MAIN_FOLDER, + fileName=sync_file_name, content=excel_bytes, ) # Import back to tickets try: - excel_content = await self.services.sharepoint.download_file_by_path( - site_id=self.targetSite['id'], file_path=file_path + excel_content = await self.services.sharepoint.downloadFileByPath( + siteId=self.targetSite['id'], filePath=file_path ) excel_rows, _ = self.parseExcelContent(excel_content) self._log_audit_event("SYNC_IMPORT", "INFO", f"Importing {len(excel_rows)} Excel rows back to tickets") @@ -333,8 +333,8 @@ class ManagerSyncDelta: existing_data: list[dict] = [] try: file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}" - csv_content = await self.services.sharepoint.download_file_by_path( - site_id=self.targetSite['id'], file_path=file_path + csv_content = await self.services.sharepoint.downloadFileByPath( + siteId=self.targetSite['id'], filePath=file_path ) csv_lines = csv_content.decode('utf-8').split('\n') if len(csv_lines) >= 2: @@ -348,16 +348,16 @@ class ManagerSyncDelta: await self.backupSharepointFile(filename=sync_file_name) merged_data, _ = self.mergeJiraWithExistingDetailed(data_list, existing_data) csv_bytes = self.createCsvContent(merged_data, existing_headers) - await self.services.sharepoint.upload_file( - site_id=self.targetSite['id'], - folder_path=self.SHAREPOINT_MAIN_FOLDER, - file_name=sync_file_name, + await self.services.sharepoint.uploadFile( + siteId=self.targetSite['id'], + folderPath=self.SHAREPOINT_MAIN_FOLDER, + fileName=sync_file_name, content=csv_bytes, ) # Import from CSV try: - csv_content = await self.services.sharepoint.download_file_by_path( - site_id=self.targetSite['id'], file_path=file_path + csv_content = await self.services.sharepoint.downloadFileByPath( + siteId=self.targetSite['id'], filePath=file_path ) df = pd.read_csv(io.BytesIO(csv_content), skiprows=2, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python') csv_rows = df.to_dict('records') @@ -388,12 +388,12 @@ class ManagerSyncDelta: try: timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y%m%d_%H%M%S") backup_filename = f"backup_{timestamp}_{filename}" - await self.services.sharepoint.copy_file_async( - site_id=self.targetSite['id'], - source_folder=self.SHAREPOINT_MAIN_FOLDER, - source_file=filename, - dest_folder=self.SHAREPOINT_BACKUP_FOLDER, - dest_file=backup_filename, + await self.services.sharepoint.copyFileAsync( + siteId=self.targetSite['id'], + sourceFolder=self.SHAREPOINT_MAIN_FOLDER, + sourceFile=filename, + destFolder=self.SHAREPOINT_BACKUP_FOLDER, + destFile=backup_filename, ) self._log_audit_event("SYNC_BACKUP", "SUCCESS", f"Backed up file: {filename} -> {backup_filename}") return True @@ -679,7 +679,7 @@ class ManagerSyncDelta: connectorType=connectorType, connectorParams=connectorParams, ) - attributes = await ticket_interface.connector_ticket.read_attributes() + attributes = await ticket_interface.connector_ticket.readAttributes() if not attributes: logger.warning("No ticket attributes returned; nothing to write.") return False @@ -713,7 +713,7 @@ class ManagerSyncDelta: connectorType=connectorType, connectorParams=connectorParams, ) - tickets = await ticket_interface.connector_ticket.read_tasks(limit=sampleLimit) + tickets = await ticket_interface.connector_ticket.readTasks(limit=sampleLimit) if not tickets: logger.warning("No tickets returned; nothing to write.") return False diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index e58fa1ef..c6b0e62c 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -54,8 +54,6 @@ class AiObjects: # No need to manually create connectors - they're auto-discovered return cls() - - def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str: """Select the best model using dynamic model selection system.""" # Get available models from the dynamic registry diff --git a/modules/interfaces/interfaceDbAppObjects.py b/modules/interfaces/interfaceDbAppObjects.py index e37c98f8..8da37568 100644 --- a/modules/interfaces/interfaceDbAppObjects.py +++ b/modules/interfaces/interfaceDbAppObjects.py @@ -10,7 +10,7 @@ import uuid from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.shared.configuration import APP_CONFIG -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp from modules.interfaces.interfaceDbAppAccess import AppAccess from modules.datamodels.datamodelUam import ( User, @@ -604,8 +604,8 @@ class AppObjects: externalUsername=externalUsername, externalEmail=externalEmail, status=status, - connectedAt=get_utc_timestamp(), - lastChecked=get_utc_timestamp(), + connectedAt=getUtcTimestamp(), + lastChecked=getUtcTimestamp(), expiresAt=None, # Optional field, set to None by default ) @@ -755,7 +755,7 @@ class AppObjects: if not token.id: token.id = str(uuid.uuid4()) if not token.createdAt: - token.createdAt = get_utc_timestamp() + token.createdAt = getUtcTimestamp() # If replace_existing is True, delete old access tokens for this user and authority first if replace_existing: @@ -822,7 +822,7 @@ class AppObjects: if not token.id: token.id = str(uuid.uuid4()) if not token.createdAt: - token.createdAt = get_utc_timestamp() + token.createdAt = getUtcTimestamp() # Convert to dict and ensure all fields are properly set token_dict = token.model_dump() @@ -932,7 +932,7 @@ class AppObjects: return True tokenUpdate = { "status": TokenStatus.REVOKED, - "revokedAt": get_utc_timestamp(), + "revokedAt": getUtcTimestamp(), "revokedBy": revokedBy, "reason": reason or "revoked", } @@ -970,7 +970,7 @@ class AppObjects: t["id"], { "status": TokenStatus.REVOKED, - "revokedAt": get_utc_timestamp(), + "revokedAt": getUtcTimestamp(), "revokedBy": revokedBy, "reason": reason or "session logout", }, @@ -1008,7 +1008,7 @@ class AppObjects: t["id"], { "status": TokenStatus.REVOKED, - "revokedAt": get_utc_timestamp(), + "revokedAt": getUtcTimestamp(), "revokedBy": revokedBy, "reason": reason or "admin revoke", }, @@ -1022,7 +1022,7 @@ class AppObjects: def cleanupExpiredTokens(self) -> int: """Clean up expired tokens for all connections, returns count of cleaned tokens""" try: - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() cleaned_count = 0 # Get all tokens @@ -1100,7 +1100,7 @@ class AppObjects: # Update existing config update_data = existing_config.model_dump() update_data.update(config_data) - update_data["updatedAt"] = get_utc_timestamp() + update_data["updatedAt"] = getUtcTimestamp() updated_config = DataNeutraliserConfig(**update_data) self.db.recordModify( diff --git a/modules/interfaces/interfaceDbChatObjects.py b/modules/interfaces/interfaceDbChatObjects.py index 92dcccf6..deea239a 100644 --- a/modules/interfaces/interfaceDbChatObjects.py +++ b/modules/interfaces/interfaceDbChatObjects.py @@ -31,7 +31,7 @@ from modules.datamodels.datamodelUam import User # DYNAMIC PART: Connectors to the Interface from modules.connectors.connectorDbPostgre import DatabaseConnector -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp # Basic Configurations from modules.shared.configuration import APP_CONFIG @@ -66,56 +66,56 @@ class ChatObjects: # ===== Generic Utility Methods ===== - def _is_object_field(self, field_type) -> bool: + def _isObjectField(self, fieldType) -> bool: """Check if a field type represents a complex object (not a simple type).""" # Simple scalar types - if field_type in (str, int, float, bool, type(None)): + if fieldType in (str, int, float, bool, type(None)): return False # Everything else is an object return True - def _separate_object_fields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]: + def _separateObjectFields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]: """Separate simple fields from object fields based on Pydantic model structure.""" - simple_fields = {} - object_fields = {} + simpleFields = {} + objectFields = {} # Get field information from the Pydantic model - model_fields = model_class.model_fields + modelFields = model_class.model_fields - for field_name, value in data.items(): + for fieldName, value in data.items(): # Check if this field should be stored as JSONB in the database - if field_name in model_fields: - field_info = model_fields[field_name] + if fieldName in modelFields: + fieldInfo = modelFields[fieldName] # Pydantic v2 only - field_type = field_info.annotation + fieldType = fieldInfo.annotation # Always route relational/object fields to object_fields for separate handling - if field_name in ['documents', 'stats']: - object_fields[field_name] = value + if fieldName in ['documents', 'stats']: + objectFields[fieldName] = value continue # Check if this is a JSONB field (Dict, List, or complex types) - if (field_type == dict or - field_type == list or - (hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or - field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']): + if (fieldType == dict or + fieldType == list or + (hasattr(fieldType, '__origin__') and fieldType.__origin__ in (dict, list)) or + fieldName in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']): # Store as JSONB - include in simple_fields for database storage - simple_fields[field_name] = value + simpleFields[fieldName] = value elif isinstance(value, (str, int, float, bool, type(None))): # Simple scalar types - simple_fields[field_name] = value + simpleFields[fieldName] = value else: # Complex objects that should be filtered out - object_fields[field_name] = value + objectFields[fieldName] = value else: # Field not in model - treat as scalar if simple, otherwise filter out if isinstance(value, (str, int, float, bool, type(None))): - simple_fields[field_name] = value + simpleFields[fieldName] = value else: - object_fields[field_name] = value + objectFields[fieldName] = value - return simple_fields, object_fields + return simpleFields, objectFields def _initializeServices(self): pass @@ -240,8 +240,8 @@ class ChatObjects: currentAction=workflow.get("currentAction", 0), totalTasks=workflow.get("totalTasks", 0), totalActions=workflow.get("totalActions", 0), - lastActivity=workflow.get("lastActivity", get_utc_timestamp()), - startedAt=workflow.get("startedAt", get_utc_timestamp()), + lastActivity=workflow.get("lastActivity", getUtcTimestamp()), + startedAt=workflow.get("startedAt", getUtcTimestamp()), logs=logs, messages=messages, stats=stats, @@ -257,7 +257,7 @@ class ChatObjects: raise PermissionError("No permission to create workflows") # Set timestamp if not present - currentTime = get_utc_timestamp() + currentTime = getUtcTimestamp() if "startedAt" not in workflowData: workflowData["startedAt"] = currentTime @@ -265,10 +265,10 @@ class ChatObjects: workflowData["lastActivity"] = currentTime # Use generic field separation based on ChatWorkflow model - simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) + simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData) # Create workflow in database - created = self.db.recordCreate(ChatWorkflow, simple_fields) + created = self.db.recordCreate(ChatWorkflow, simpleFields) # Convert to ChatWorkflow model (empty related data for new workflow) @@ -302,13 +302,13 @@ class ChatObjects: raise PermissionError(f"No permission to update workflow {workflowId}") # Use generic field separation based on ChatWorkflow model - simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) + simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData) # Set update time for main workflow - simple_fields["lastActivity"] = get_utc_timestamp() + simpleFields["lastActivity"] = getUtcTimestamp() # Update main workflow in database - updated = self.db.recordModify(ChatWorkflow, workflowId, simple_fields) + updated = self.db.recordModify(ChatWorkflow, workflowId, simpleFields) # Removed cascade writes for logs/messages/stats during workflow update. # CUD for child entities must be executed via dedicated service methods. @@ -423,7 +423,7 @@ class ChatObjects: role=msg.get("role", "assistant"), status=msg.get("status", "step"), sequenceNr=msg.get("sequenceNr", 0), - publishedAt=msg.get("publishedAt", get_utc_timestamp()), + publishedAt=msg.get("publishedAt", getUtcTimestamp()), success=msg.get("success"), actionId=msg.get("actionId"), actionMethod=msg.get("actionMethod"), @@ -490,20 +490,30 @@ class ChatObjects: messageData["actionNumber"] = workflow.currentAction # Use generic field separation based on ChatMessage model - simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) + simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData) # Handle documents separately - they will be stored in normalized documents table - documents_to_create = object_fields.get("documents", []) + documents_to_create = objectFields.get("documents", []) # Create message in normalized table using only simple fields - createdMessage = self.db.recordCreate(ChatMessage, simple_fields) + createdMessage = self.db.recordCreate(ChatMessage, simpleFields) # Create documents in normalized documents table created_documents = [] for doc_data in documents_to_create: - # Use the document data directly - doc_dict = doc_data + # Normalize to plain dict before assignment + if isinstance(doc_data, ChatDocument): + doc_dict = doc_data.model_dump() + elif isinstance(doc_data, dict): + doc_dict = dict(doc_data) + else: + # Attempt to coerce to ChatDocument then dump + try: + doc_dict = ChatDocument(**doc_data).model_dump() + except Exception: + logger.error("Invalid document data type for message creation") + continue doc_dict["messageId"] = createdMessage["id"] created_doc = self.createDocument(doc_dict) @@ -522,8 +532,8 @@ class ChatObjects: role=createdMessage.get("role", "assistant"), status=createdMessage.get("status", "step"), sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number - publishedAt=createdMessage.get("publishedAt", get_utc_timestamp()), - stats=object_fields.get("stats"), # Use stats from object_fields + publishedAt=createdMessage.get("publishedAt", getUtcTimestamp()), + stats=objectFields.get("stats"), # Use stats from objectFields roundNumber=createdMessage.get("roundNumber"), taskNumber=createdMessage.get("taskNumber"), actionNumber=createdMessage.get("actionNumber"), @@ -588,31 +598,41 @@ class ChatObjects: raise PermissionError(f"No permission to modify workflow {workflowId}") # Use generic field separation based on ChatMessage model - simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) + simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData) # Ensure required fields present for key in ["role", "agentName"]: - if key not in simple_fields and key not in existingMessage: - simple_fields[key] = "assistant" if key == "role" else "" + if key not in simpleFields and key not in existingMessage: + simpleFields[key] = "assistant" if key == "role" else "" # Ensure ID is in the dataset - if 'id' not in simple_fields: - simple_fields['id'] = messageId + if 'id' not in simpleFields: + simpleFields['id'] = messageId # Convert createdAt to startedAt if needed - if "createdAt" in simple_fields and "startedAt" not in simple_fields: - simple_fields["startedAt"] = simple_fields["createdAt"] - del simple_fields["createdAt"] + if "createdAt" in simpleFields and "startedAt" not in simpleFields: + simpleFields["startedAt"] = simpleFields["createdAt"] + del simpleFields["createdAt"] # Update the message with simple fields only - updatedMessage = self.db.recordModify(ChatMessage, messageId, simple_fields) + updatedMessage = self.db.recordModify(ChatMessage, messageId, simpleFields) # Handle object field updates (documents, stats) inline - if 'documents' in object_fields: - documents_data = object_fields['documents'] + if 'documents' in objectFields: + documents_data = objectFields['documents'] try: for doc_data in documents_data: - doc_dict = doc_data + # Normalize to dict before mutation + if isinstance(doc_data, ChatDocument): + doc_dict = doc_data.model_dump() + elif isinstance(doc_data, dict): + doc_dict = dict(doc_data) + else: + try: + doc_dict = ChatDocument(**doc_data).model_dump() + except Exception: + logger.error("Invalid document data type for message update") + continue doc_dict["messageId"] = messageId self.createDocument(doc_dict) except Exception as e: @@ -732,11 +752,9 @@ class ChatObjects: def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument: """Creates a document for a message in normalized table.""" try: - # Validate document data + # Validate and normalize document data to dict document = ChatDocument(**documentData) - - # Create document in normalized table - created = self.db.recordCreate(ChatDocument, document) + created = self.db.recordCreate(ChatDocument, document.model_dump()) return ChatDocument(**created) @@ -785,7 +803,7 @@ class ChatObjects: # Make sure required fields are present if "timestamp" not in logData: - logData["timestamp"] = get_utc_timestamp() + logData["timestamp"] = getUtcTimestamp() # Add status information if not present if "status" not in logData and "type" in logData: @@ -882,7 +900,7 @@ class ChatObjects: messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId}) for msg in messages: # Apply timestamp filtering in Python - msg_timestamp = msg.get("publishedAt", get_utc_timestamp()) + msg_timestamp = msg.get("publishedAt", getUtcTimestamp()) if afterTimestamp is not None and msg_timestamp <= afterTimestamp: continue @@ -900,7 +918,7 @@ class ChatObjects: role=msg.get("role", "assistant"), status=msg.get("status", "step"), sequenceNr=msg.get("sequenceNr", 0), - publishedAt=msg.get("publishedAt", get_utc_timestamp()), + publishedAt=msg.get("publishedAt", getUtcTimestamp()), success=msg.get("success"), actionId=msg.get("actionId"), actionMethod=msg.get("actionMethod"), @@ -923,7 +941,7 @@ class ChatObjects: logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) for log in logs: # Apply timestamp filtering in Python - log_timestamp = log.get("timestamp", get_utc_timestamp()) + log_timestamp = log.get("timestamp", getUtcTimestamp()) if afterTimestamp is not None and log_timestamp <= afterTimestamp: continue @@ -938,7 +956,7 @@ class ChatObjects: stats = self.getStats(workflowId) for stat in stats: # Apply timestamp filtering in Python - stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else get_utc_timestamp() + stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else getUtcTimestamp() if afterTimestamp is not None and stat_timestamp <= afterTimestamp: continue diff --git a/modules/interfaces/interfaceDbComponentObjects.py b/modules/interfaces/interfaceDbComponentObjects.py index d2a74e69..20e7aae1 100644 --- a/modules/interfaces/interfaceDbComponentObjects.py +++ b/modules/interfaces/interfaceDbComponentObjects.py @@ -16,7 +16,7 @@ from modules.datamodels.datamodelUtils import Prompt from modules.datamodels.datamodelVoice import VoiceSettings from modules.datamodels.datamodelUam import User, Mandate from modules.shared.configuration import APP_CONFIG -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp logger = logging.getLogger(__name__) @@ -466,7 +466,7 @@ class ComponentObjects: # Ensure proper values, use defaults for invalid data creationDate = file.get("creationDate") if creationDate is None or not isinstance(creationDate, (int, float)) or creationDate <= 0: - creationDate = get_utc_timestamp() + creationDate = getUtcTimestamp() fileName = file.get("fileName") if not fileName or fileName == "None": @@ -503,7 +503,7 @@ class ComponentObjects: # Get creation date from record or use current time creationDate = file.get("creationDate") if not creationDate: - creationDate = get_utc_timestamp() + creationDate = getUtcTimestamp() return FileItem( id=file.get("id"), @@ -881,9 +881,9 @@ class ComponentObjects: # Ensure timestamps are set for validation settings_data = filteredSettings[0] if not settings_data.get("creationDate"): - settings_data["creationDate"] = get_utc_timestamp() + settings_data["creationDate"] = getUtcTimestamp() if not settings_data.get("lastModified"): - settings_data["lastModified"] = get_utc_timestamp() + settings_data["lastModified"] = getUtcTimestamp() return VoiceSettings(**settings_data) @@ -931,7 +931,7 @@ class ComponentObjects: raise ValueError(f"Voice settings not found for user {userId}") # Update lastModified timestamp - updateData["lastModified"] = get_utc_timestamp() + updateData["lastModified"] = getUtcTimestamp() # Update voice settings record success = self.db.recordModify(VoiceSettings, existingSettings.id, updateData) diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index b7a090fa..8c76d455 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -31,7 +31,7 @@ class TicketInterface: self.task_sync_definition = task_sync_definition async def exportTicketsAsList(self) -> list[dict]: - tickets: list[dict] = await self.connector_ticket.read_tasks(limit=0) + tickets: list[dict] = await self.connector_ticket.readTasks(limit=0) transformed_tasks = self._transformTicketRecords(tickets, includePut=True) # Return plain dictionaries filtered by presence of ID rows: list[dict] = [] @@ -57,7 +57,7 @@ class TicketInterface: if fields: updates.append({"ID": task_id, "fields": fields}) if updates: - await self.connector_ticket.write_tasks(updates) + await self.connector_ticket.writeTasks(updates) def _transformTicketRecords( self, tasks: list[dict], includePut: bool = False diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py index a1d0b96c..2bb1b729 100644 --- a/modules/interfaces/interfaceVoiceObjects.py +++ b/modules/interfaces/interfaceVoiceObjects.py @@ -10,7 +10,7 @@ from typing import Dict, Any, Optional, List from modules.connectors.connectorVoiceGoogle import ConnectorGoogleSpeech from modules.datamodels.datamodelVoice import VoiceSettings from modules.datamodels.datamodelUam import User -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp logger = logging.getLogger(__name__) @@ -269,7 +269,7 @@ class VoiceObjects: logger.info(f"Creating voice settings: {settingsData}") # Add timestamps - currentTime = get_utc_timestamp() + currentTime = getUtcTimestamp() settingsData["creationDate"] = currentTime settingsData["lastModified"] = currentTime @@ -298,7 +298,7 @@ class VoiceObjects: logger.info(f"Updating voice settings for user {userId}: {settingsData}") # Add last modified timestamp - settingsData["lastModified"] = get_utc_timestamp() + settingsData["lastModified"] = getUtcTimestamp() # Create updated VoiceSettings object voiceSettings = VoiceSettings(**settingsData) diff --git a/modules/routes/routeDataConnections.py b/modules/routes/routeDataConnections.py index 1c49fa13..eec8d140 100644 --- a/modules/routes/routeDataConnections.py +++ b/modules/routes/routeDataConnections.py @@ -18,7 +18,7 @@ from modules.datamodels.datamodelUam import User, UserConnection, AuthAuthority, from modules.datamodels.datamodelSecurity import Token from modules.security.auth import getCurrentUser, limiter from modules.interfaces.interfaceDbAppObjects import getInterface -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp # Configure logger logger = logging.getLogger(__name__) @@ -64,7 +64,7 @@ def get_token_status_for_connection(interface, connection_id: str) -> tuple[str, if not expires_at: return "none", None - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() # Add 5 minute buffer for proactive refresh buffer_time = 5 * 60 # 5 minutes in seconds @@ -247,7 +247,7 @@ async def update_connection( setattr(connection, field, value) # Update lastChecked timestamp using UTC timestamp - connection.lastChecked = get_utc_timestamp() + connection.lastChecked = getUtcTimestamp() # Update connection - models now handle timestamp serialization automatically interface.db.recordModify(UserConnection, connectionId, connection.model_dump()) @@ -382,7 +382,7 @@ async def disconnect_service( # Update connection status connection.status = ConnectionStatus.INACTIVE - connection.lastChecked = get_utc_timestamp() + connection.lastChecked = getUtcTimestamp() # Update connection record - models now handle timestamp serialization automatically interface.db.recordModify(UserConnection, connectionId, connection.model_dump()) diff --git a/modules/routes/routeDataNeutralization.py b/modules/routes/routeDataNeutralization.py index 61e8c25d..07d6c025 100644 --- a/modules/routes/routeDataNeutralization.py +++ b/modules/routes/routeDataNeutralization.py @@ -35,7 +35,7 @@ async def get_neutralization_config( """Get data neutralization configuration""" try: service = NeutralizationPlayground(currentUser) - config = service.get_config() + config = service.getConfig() if not config: # Return default config instead of 404 @@ -69,7 +69,7 @@ async def save_neutralization_config( """Save or update data neutralization configuration""" try: service = NeutralizationPlayground(currentUser) - config = service.save_config(config_data) + config = service.saveConfig(config_data) return config @@ -99,7 +99,7 @@ async def neutralize_text( ) service = NeutralizationPlayground(currentUser) - result = service.neutralize_text(text, file_id) + result = service.neutralizeText(text, file_id) return result @@ -130,7 +130,7 @@ async def resolve_text( ) service = NeutralizationPlayground(currentUser) - resolved_text = service.resolve_text(text) + resolved_text = service.resolveText(text) return {"resolved_text": resolved_text} @@ -153,7 +153,7 @@ async def get_neutralization_attributes( """Get neutralization attributes, optionally filtered by file ID""" try: service = NeutralizationPlayground(currentUser) - attributes = service.get_attributes(fileId) + attributes = service.getAttributes(fileId) return attributes @@ -183,7 +183,7 @@ async def process_sharepoint_files( ) service = NeutralizationPlayground(currentUser) - result = await service.process_sharepoint_files(source_path, target_path) + result = await service.processSharepointFiles(source_path, target_path) return result @@ -212,7 +212,7 @@ async def batch_process_files( ) service = NeutralizationPlayground(currentUser) - result = service.batch_neutralize_files(files_data) + result = service.batchNeutralizeFiles(files_data) return result @@ -234,7 +234,7 @@ async def get_neutralization_stats( """Get neutralization processing statistics""" try: service = NeutralizationPlayground(currentUser) - stats = service.get_processing_stats() + stats = service.getProcessingStats() return stats @@ -255,7 +255,7 @@ async def cleanup_file_attributes( """Clean up neutralization attributes for a specific file""" try: service = NeutralizationPlayground(currentUser) - success = service.cleanup_file_attributes(fileId) + success = service.cleanupFileAttributes(fileId) if success: return {"message": f"Successfully cleaned up attributes for file {fileId}"} diff --git a/modules/routes/routeDataUsers.py b/modules/routes/routeDataUsers.py index 7a0ea735..7b7e627f 100644 --- a/modules/routes/routeDataUsers.py +++ b/modules/routes/routeDataUsers.py @@ -181,9 +181,9 @@ async def reset_user_password( # Log password reset try: from modules.shared.auditLogger import audit_logger - audit_logger.log_security_event( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logSecurityEvent( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="password_reset", details=f"Reset password for user {userId}" ) @@ -257,9 +257,9 @@ async def change_password( # Log password change try: from modules.shared.auditLogger import audit_logger - audit_logger.log_security_event( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logSecurityEvent( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="password_change", details="User changed their own password" ) diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py index 8177da8d..fbd9a445 100644 --- a/modules/routes/routeSecurityGoogle.py +++ b/modules/routes/routeSecurityGoogle.py @@ -14,7 +14,7 @@ from modules.shared.configuration import APP_CONFIG from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection from modules.security.auth import getCurrentUser, limiter -from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp +from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp # Configure logger logger = logging.getLogger(__name__) @@ -356,7 +356,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenRefresh=token_response.get("refresh_token", ""), tokenType="bearer", expiresAt=jwt_expires_at.timestamp(), - createdAt=get_utc_timestamp() + createdAt=getUtcTimestamp() ) # Save access token (no connectionId) @@ -460,8 +460,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse logger.info(f"Updating connection {connection_id} for user {user.username}") # Update connection with external service details connection.status = ConnectionStatus.ACTIVE - connection.lastChecked = get_utc_timestamp() - connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0) + connection.lastChecked = getUtcTimestamp() + connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0) connection.externalId = user_info.get("id") connection.externalUsername = user_info.get("email") connection.externalEmail = user_info.get("email") @@ -479,8 +479,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenAccess=token_response["access_token"], tokenRefresh=token_response.get("refresh_token", ""), tokenType=token_response.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)), - createdAt=get_utc_timestamp() + expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)), + createdAt=getUtcTimestamp() ) interface.saveConnectionToken(token) @@ -498,8 +498,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse id: '{connection.id}', status: 'connected', type: 'google', - lastChecked: {get_utc_timestamp()}, - expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))} + lastChecked: {getUtcTimestamp()}, + expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))} }} }}, '*'); // Wait for message to be sent before closing @@ -592,11 +592,11 @@ async def logout( # Log successful logout try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logUserAccess( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="logout", - success_info="google_auth_logout" + successInfo="google_auth_logout" ) except Exception: # Don't fail if audit logging fails @@ -726,12 +726,12 @@ async def refresh_token( # Update the connection status and timing google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt - google_connection.lastChecked = get_utc_timestamp() + google_connection.lastChecked = getUtcTimestamp() google_connection.status = ConnectionStatus.ACTIVE appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.model_dump()) # Calculate time until expiration - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0 return { diff --git a/modules/routes/routeSecurityLocal.py b/modules/routes/routeSecurityLocal.py index d705d5c6..0a2fff71 100644 --- a/modules/routes/routeSecurityLocal.py +++ b/modules/routes/routeSecurityLocal.py @@ -131,11 +131,11 @@ async def login( # Log successful login try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id=str(user.id), - mandate_id=str(user.mandateId), + audit_logger.logUserAccess( + userId=str(user.id), + mandateId=str(user.mandateId), action="login", - success_info="local_auth_success" + successInfo="local_auth_success" ) except Exception: # Don't fail if audit logging fails @@ -159,11 +159,11 @@ async def login( # Log failed login attempt try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id="unknown", - mandate_id="unknown", + audit_logger.logUserAccess( + userId="unknown", + mandateId="unknown", action="login", - success_info=f"failed: {error_msg}" + successInfo=f"failed: {error_msg}" ) except Exception: # Don't fail if audit logging fails @@ -367,11 +367,11 @@ async def logout(request: Request, response: Response, currentUser: User = Depen # Log successful logout try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logUserAccess( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="logout", - success_info=f"revoked_tokens: {revoked}" + successInfo=f"revoked_tokens: {revoked}" ) except Exception: # Don't fail if audit logging fails diff --git a/modules/routes/routeSecurityMsft.py b/modules/routes/routeSecurityMsft.py index 6c16c37e..30c5d33e 100644 --- a/modules/routes/routeSecurityMsft.py +++ b/modules/routes/routeSecurityMsft.py @@ -16,7 +16,7 @@ from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatu from modules.datamodels.datamodelSecurity import Token from modules.security.auth import getCurrentUser, limiter from modules.security.jwtService import createAccessToken -from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp +from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp # Configure logger logger = logging.getLogger(__name__) @@ -199,8 +199,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenAccess=token_response["access_token"], tokenRefresh=token_response.get("refresh_token", ""), tokenType=token_response.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)), - createdAt=get_utc_timestamp() + expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)), + createdAt=getUtcTimestamp() ) # Save access token (no connectionId) @@ -225,7 +225,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenAccess=jwt_token, tokenType="bearer", expiresAt=jwt_expires_at.timestamp(), - createdAt=get_utc_timestamp() + createdAt=getUtcTimestamp() ) # Save JWT access token @@ -332,8 +332,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse logger.info(f"Updating connection {connection_id} for user {user.username}") # Update connection with external service details connection.status = ConnectionStatus.ACTIVE - connection.lastChecked = get_utc_timestamp() - connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0) + connection.lastChecked = getUtcTimestamp() + connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0) connection.externalId = user_info.get("id") connection.externalUsername = user_info.get("userPrincipalName") connection.externalEmail = user_info.get("mail") @@ -351,8 +351,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse tokenAccess=token_response["access_token"], tokenRefresh=token_response.get("refresh_token", ""), tokenType=token_response.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)), - createdAt=get_utc_timestamp() + expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)), + createdAt=getUtcTimestamp() ) @@ -373,8 +373,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse id: '{connection.id}', status: 'connected', type: 'msft', - lastChecked: {get_utc_timestamp()}, - expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))} + lastChecked: {getUtcTimestamp()}, + expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))} }} }}, '*'); // Wait for message to be sent before closing @@ -467,11 +467,11 @@ async def logout( # Log successful logout try: from modules.shared.auditLogger import audit_logger - audit_logger.log_user_access( - user_id=str(currentUser.id), - mandate_id=str(currentUser.mandateId), + audit_logger.logUserAccess( + userId=str(currentUser.id), + mandateId=str(currentUser.mandateId), action="logout", - success_info="microsoft_auth_logout" + successInfo="microsoft_auth_logout" ) except Exception: # Don't fail if audit logging fails @@ -575,27 +575,27 @@ async def refresh_token( from modules.security.tokenManager import TokenManager token_manager = TokenManager() - refreshed_token = token_manager.refresh_token(current_token) - if refreshed_token: + refreshedToken = token_manager.refreshToken(current_token) + if refreshedToken: # Save the new connection token (which will automatically replace old ones) - appInterface.saveConnectionToken(refreshed_token) + appInterface.saveConnectionToken(refreshedToken) # Update the connection's expiration time - msft_connection.expiresAt = float(refreshed_token.expiresAt) - msft_connection.lastChecked = get_utc_timestamp() + msft_connection.expiresAt = float(refreshedToken.expiresAt) + msft_connection.lastChecked = getUtcTimestamp() msft_connection.status = ConnectionStatus.ACTIVE # Save updated connection appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.model_dump()) # Calculate time until expiration - current_time = get_utc_timestamp() - expires_in = int(refreshed_token.expiresAt - current_time) + current_time = getUtcTimestamp() + expiresIn = int(refreshedToken.expiresAt - current_time) return { "message": "Token refreshed successfully", - "expires_at": refreshed_token.expiresAt, - "expires_in_seconds": expires_in + "expires_at": refreshedToken.expiresAt, + "expires_in_seconds": expiresIn } else: raise HTTPException( diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py index 17c6e73d..0e1b009f 100644 --- a/modules/routes/routeVoiceGoogle.py +++ b/modules/routes/routeVoiceGoogle.py @@ -18,26 +18,26 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix="/voice-google", tags=["Voice Google"]) # Store active WebSocket connections -active_connections: Dict[str, WebSocket] = {} +activeConnections: Dict[str, WebSocket] = {} class ConnectionManager: def __init__(self): - self.active_connections: List[WebSocket] = [] + self.activeConnections: List[WebSocket] = [] - async def connect(self, websocket: WebSocket, connection_id: str): + async def connect(self, websocket: WebSocket, connectionId: str): await websocket.accept() - self.active_connections.append(websocket) - active_connections[connection_id] = websocket - logger.info(f"WebSocket connected: {connection_id}") + self.activeConnections.append(websocket) + activeConnections[connectionId] = websocket + logger.info(f"WebSocket connected: {connectionId}") - def disconnect(self, websocket: WebSocket, connection_id: str): - if websocket in self.active_connections: - self.active_connections.remove(websocket) - if connection_id in active_connections: - del active_connections[connection_id] - logger.info(f"WebSocket disconnected: {connection_id}") + def disconnect(self, websocket: WebSocket, connectionId: str): + if websocket in self.activeConnections: + self.activeConnections.remove(websocket) + if connectionId in activeConnections: + del activeConnections[connectionId] + logger.info(f"WebSocket disconnected: {connectionId}") - async def send_personal_message(self, message: dict, websocket: WebSocket): + async def sendPersonalMessage(self, message: dict, websocket: WebSocket): try: await websocket.send_text(json.dumps(message)) except Exception as e: @@ -45,10 +45,10 @@ class ConnectionManager: manager = ConnectionManager() -def get_voice_interface(current_user: User) -> VoiceObjects: +def _getVoiceInterface(currentUser: User) -> VoiceObjects: """Get voice interface instance with user context.""" try: - return getVoiceInterface(current_user) + return getVoiceInterface(currentUser) except Exception as e: logger.error(f"Failed to initialize voice interface: {e}") raise HTTPException( @@ -58,23 +58,23 @@ def get_voice_interface(current_user: User) -> VoiceObjects: @router.post("/speech-to-text") async def speech_to_text( - audio_file: UploadFile = File(...), + audioFile: UploadFile = File(...), language: str = Form("de-DE"), - current_user: User = Depends(getCurrentUser) + currentUser: User = Depends(getCurrentUser) ): """Convert speech to text using Google Cloud Speech-to-Text API.""" try: - logger.info(f"🎤 Speech-to-text request: {audio_file.filename}, language: {language}") + logger.info(f"🎤 Speech-to-text request: {audioFile.filename}, language: {language}") # Read audio file - audio_content = await audio_file.read() - logger.info(f"📊 Audio file size: {len(audio_content)} bytes") + audioContent = await audioFile.read() + logger.info(f"📊 Audio file size: {len(audioContent)} bytes") # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Validate audio format - validation = voice_interface.validateAudioFormat(audio_content) + validation = voiceInterface.validateAudioFormat(audioContent) if not validation["valid"]: raise HTTPException( @@ -83,8 +83,8 @@ async def speech_to_text( ) # Perform speech recognition - result = await voice_interface.speechToText( - audioContent=audio_content, + result = await voiceInterface.speechToText( + audioContent=audioContent, language=language ) @@ -95,7 +95,7 @@ async def speech_to_text( "confidence": result["confidence"], "language": result["language"], "audio_info": { - "size": len(audio_content), + "size": len(audioContent), "format": validation["format"], "estimated_duration": validation.get("estimated_duration", 0) } @@ -118,13 +118,13 @@ async def speech_to_text( @router.post("/translate") async def translate_text( text: str = Form(...), - source_language: str = Form("de"), - target_language: str = Form("en"), - current_user: User = Depends(getCurrentUser) + sourceLanguage: str = Form("de"), + targetLanguage: str = Form("en"), + currentUser: User = Depends(getCurrentUser) ): """Translate text using Google Cloud Translation API.""" try: - logger.info(f"🌐 Translation request: '{text}' ({source_language} -> {target_language})") + logger.info(f"🌐 Translation request: '{text}' ({sourceLanguage} -> {targetLanguage})") if not text.strip(): raise HTTPException( @@ -133,13 +133,13 @@ async def translate_text( ) # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Perform translation - result = await voice_interface.translateText( + result = await voiceInterface.translateText( text=text, - sourceLanguage=source_language, - targetLanguage=target_language + sourceLanguage=sourceLanguage, + targetLanguage=targetLanguage ) if result["success"]: @@ -167,21 +167,21 @@ async def translate_text( @router.post("/realtime-interpreter") async def realtime_interpreter( - audio_file: UploadFile = File(...), - from_language: str = Form("de-DE"), - to_language: str = Form("en-US"), - connection_id: str = Form(None), - current_user: User = Depends(getCurrentUser) + audioFile: UploadFile = File(...), + fromLanguage: str = Form("de-DE"), + toLanguage: str = Form("en-US"), + connectionId: str = Form(None), + currentUser: User = Depends(getCurrentUser) ): """Real-time interpreter: speech to translated text using Google Cloud APIs.""" try: - logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}") - logger.info(f" From: {from_language} -> To: {to_language}") - logger.info(f" MIME type: {audio_file.content_type}") + logger.info(f"🔄 Real-time interpreter request: {audioFile.filename}") + logger.info(f" From: {fromLanguage} -> To: {toLanguage}") + logger.info(f" MIME type: {audioFile.content_type}") # Read audio file - audio_content = await audio_file.read() - logger.info(f"📊 Audio file size: {len(audio_content)} bytes") + audioContent = await audioFile.read() + logger.info(f"📊 Audio file size: {len(audioContent)} bytes") # Save audio file for debugging with correct extension # file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav" @@ -192,10 +192,10 @@ async def realtime_interpreter( # logger.info(f"💾 Saved audio file for debugging: {debug_filename}") # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Validate audio format - validation = voice_interface.validateAudioFormat(audio_content) + validation = voiceInterface.validateAudioFormat(audioContent) if not validation["valid"]: raise HTTPException( @@ -204,10 +204,10 @@ async def realtime_interpreter( ) # Perform complete pipeline: Speech-to-Text + Translation - result = await voice_interface.speechToTranslatedText( - audioContent=audio_content, - fromLanguage=from_language, - toLanguage=to_language + result = await voiceInterface.speechToTranslatedText( + audioContent=audioContent, + fromLanguage=fromLanguage, + toLanguage=toLanguage ) if result["success"]: @@ -223,7 +223,7 @@ async def realtime_interpreter( "source_language": result["source_language"], "target_language": result["target_language"], "audio_info": { - "size": len(audio_content), + "size": len(audioContent), "format": validation["format"], "estimated_duration": validation.get("estimated_duration", 0) } @@ -249,7 +249,7 @@ async def text_to_speech( text: str = Form(...), language: str = Form("de-DE"), voice: str = Form(None), - current_user: User = Depends(getCurrentUser) + currentUser: User = Depends(getCurrentUser) ): """Convert text to speech using Google Cloud Text-to-Speech.""" try: @@ -261,8 +261,8 @@ async def text_to_speech( detail="Empty text provided for text-to-speech" ) - voice_interface = get_voice_interface(current_user) - result = await voice_interface.textToSpeech( + voiceInterface = _getVoiceInterface(currentUser) + result = await voiceInterface.textToSpeech( text=text, languageCode=language, voiceName=voice @@ -294,13 +294,13 @@ async def text_to_speech( ) @router.get("/languages") -async def get_available_languages(current_user: User = Depends(getCurrentUser)): +async def get_available_languages(currentUser: User = Depends(getCurrentUser)): """Get available languages from Google Cloud Text-to-Speech.""" try: logger.info("🌐 Getting available languages from Google Cloud TTS") - voice_interface = get_voice_interface(current_user) - result = await voice_interface.getAvailableLanguages() + voiceInterface = _getVoiceInterface(currentUser) + result = await voiceInterface.getAvailableLanguages() if result["success"]: return { @@ -324,21 +324,21 @@ async def get_available_languages(current_user: User = Depends(getCurrentUser)): @router.get("/voices") async def get_available_voices( - language_code: Optional[str] = None, - current_user: User = Depends(getCurrentUser) + languageCode: Optional[str] = None, + currentUser: User = Depends(getCurrentUser) ): """Get available voices from Google Cloud Text-to-Speech.""" try: - logger.info(f"🎤 Getting available voices, language filter: {language_code}") + logger.info(f"🎤 Getting available voices, language filter: {languageCode}") - voice_interface = get_voice_interface(current_user) - result = await voice_interface.getAvailableVoices(languageCode=language_code) + voiceInterface = _getVoiceInterface(currentUser) + result = await voiceInterface.getAvailableVoices(languageCode=languageCode) if result["success"]: return { "success": True, "voices": result["voices"], - "language_filter": language_code + "language_filter": languageCode } else: raise HTTPException( @@ -356,11 +356,11 @@ async def get_available_voices( ) @router.get("/health") -async def health_check(current_user: User = Depends(getCurrentUser)): +async def health_check(currentUser: User = Depends(getCurrentUser)): """Health check for Google Cloud voice services.""" try: - voice_interface = get_voice_interface(current_user) - test_result = await voice_interface.healthCheck() + voiceInterface = _getVoiceInterface(currentUser) + test_result = await voiceInterface.healthCheck() return test_result @@ -372,16 +372,16 @@ async def health_check(current_user: User = Depends(getCurrentUser)): } @router.get("/settings") -async def get_voice_settings(current_user: User = Depends(getCurrentUser)): +async def get_voice_settings(currentUser: User = Depends(getCurrentUser)): """Get voice settings for the current user.""" try: - logger.info(f"Getting voice settings for user: {current_user.id}") + logger.info(f"Getting voice settings for user: {currentUser.id}") # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Get or create voice settings for the user - voice_settings = voice_interface.getOrCreateVoiceSettings(current_user.id) + voice_settings = voiceInterface.getOrCreateVoiceSettings(currentUser.id) if voice_settings: # Return user settings @@ -425,16 +425,16 @@ async def get_voice_settings(current_user: User = Depends(getCurrentUser)): @router.post("/settings") async def save_voice_settings( settings: Dict[str, Any] = Body(...), - current_user: User = Depends(getCurrentUser) + currentUser: User = Depends(getCurrentUser) ): """Save voice settings for the current user.""" try: - logger.info(f"Saving voice settings for user: {current_user.id}") + logger.info(f"Saving voice settings for user: {currentUser.id}") logger.info(f"Settings: {settings}") # Validate required settings - required_fields = ["sttLanguage", "ttsLanguage", "ttsVoice"] - for field in required_fields: + requiredFields = ["sttLanguage", "ttsLanguage", "ttsVoice"] + for field in requiredFields: if field not in settings: raise HTTPException( status_code=400, @@ -448,23 +448,23 @@ async def save_voice_settings( settings["targetLanguage"] = "en-US" # Get voice interface - voice_interface = get_voice_interface(current_user) + voiceInterface = _getVoiceInterface(currentUser) # Check if settings already exist for this user - existing_settings = voice_interface.getVoiceSettings(current_user.id) + existing_settings = voiceInterface.getVoiceSettings(currentUser.id) if existing_settings: # Update existing settings - logger.info(f"Updating existing voice settings for user {current_user.id}") - updated_settings = voice_interface.updateVoiceSettings(current_user.id, settings) - logger.info(f"Voice settings updated for user {current_user.id}: {updated_settings}") + logger.info(f"Updating existing voice settings for user {currentUser.id}") + updated_settings = voiceInterface.updateVoiceSettings(currentUser.id, settings) + logger.info(f"Voice settings updated for user {currentUser.id}: {updated_settings}") else: # Create new settings - logger.info(f"Creating new voice settings for user {current_user.id}") + logger.info(f"Creating new voice settings for user {currentUser.id}") # Add userId to settings - settings["userId"] = current_user.id - created_settings = voice_interface.createVoiceSettings(settings) - logger.info(f"Voice settings created for user {current_user.id}: {created_settings}") + settings["userId"] = currentUser.id + created_settings = voiceInterface.createVoiceSettings(settings) + logger.info(f"Voice settings created for user {currentUser.id}: {created_settings}") return { "success": True, @@ -486,25 +486,25 @@ async def save_voice_settings( @router.websocket("/ws/realtime-interpreter") async def websocket_realtime_interpreter( websocket: WebSocket, - user_id: str = "default", - from_language: str = "de-DE", - to_language: str = "en-US" + userId: str = "default", + fromLanguage: str = "de-DE", + toLanguage: str = "en-US" ): """WebSocket endpoint for real-time voice interpretation""" - connection_id = f"realtime_{user_id}_{from_language}_{to_language}" + connectionId = f"realtime_{userId}_{fromLanguage}_{toLanguage}" try: - await manager.connect(websocket, connection_id) + await manager.connect(websocket, connectionId) # Send connection confirmation - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "connected", - "connection_id": connection_id, + "connection_id": connectionId, "message": "Connected to real-time interpreter" }, websocket) # Initialize voice interface - voice_interface = get_voice_interface(User(id=user_id)) + voiceInterface = _getVoiceInterface(User(id=userId)) while True: # Receive message from client @@ -515,7 +515,7 @@ async def websocket_realtime_interpreter( # Process audio chunk try: # Decode base64 audio data - audio_data = base64.b64decode(message["data"]) + audioData = base64.b64decode(message["data"]) # For now, just acknowledge receipt # In a full implementation, this would: @@ -524,9 +524,9 @@ async def websocket_realtime_interpreter( # 3. Send partial results back # 4. Handle translation - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "audio_received", - "chunk_size": len(audio_data), + "chunk_size": len(audioData), "timestamp": message.get("timestamp") }, websocket) @@ -539,7 +539,7 @@ async def websocket_realtime_interpreter( elif message["type"] == "ping": # Respond to ping - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "pong", "timestamp": message.get("timestamp") }, websocket) @@ -548,32 +548,32 @@ async def websocket_realtime_interpreter( logger.warning(f"Unknown message type: {message['type']}") except WebSocketDisconnect: - manager.disconnect(websocket, connection_id) - logger.info(f"Client disconnected: {connection_id}") + manager.disconnect(websocket, connectionId) + logger.info(f"Client disconnected: {connectionId}") except Exception as e: logger.error(f"WebSocket error: {e}") - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) @router.websocket("/ws/speech-to-text") async def websocket_speech_to_text( websocket: WebSocket, - user_id: str = "default", + userId: str = "default", language: str = "de-DE" ): """WebSocket endpoint for real-time speech-to-text""" - connection_id = f"stt_{user_id}_{language}" + connectionId = f"stt_{userId}_{language}" try: - await manager.connect(websocket, connection_id) + await manager.connect(websocket, connectionId) - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "connected", - "connection_id": connection_id, + "connection_id": connectionId, "message": "Connected to speech-to-text" }, websocket) # Initialize voice interface - voice_interface = get_voice_interface(User(id=user_id)) + voiceInterface = _getVoiceInterface(User(id=userId)) while True: data = await websocket.receive_text() @@ -581,12 +581,12 @@ async def websocket_speech_to_text( if message["type"] == "audio_chunk": try: - audio_data = base64.b64decode(message["data"]) + audioData = base64.b64decode(message["data"]) # Process audio chunk # This would integrate with Google Cloud Speech-to-Text streaming API - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "transcription_result", "text": "Audio chunk received", # Placeholder "confidence": 0.95, @@ -595,39 +595,39 @@ async def websocket_speech_to_text( except Exception as e: logger.error(f"Error processing audio: {e}") - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "error", "error": f"Failed to process audio: {str(e)}" }, websocket) elif message["type"] == "ping": - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "pong", "timestamp": message.get("timestamp") }, websocket) except WebSocketDisconnect: - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) except Exception as e: logger.error(f"WebSocket error: {e}") - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) @router.websocket("/ws/text-to-speech") async def websocket_text_to_speech( websocket: WebSocket, - user_id: str = "default", + userId: str = "default", language: str = "de-DE", voice: str = "de-DE-Wavenet-A" ): """WebSocket endpoint for real-time text-to-speech""" - connection_id = f"tts_{user_id}_{language}_{voice}" + connectionId = f"tts_{userId}_{language}_{voice}" try: - await manager.connect(websocket, connection_id) + await manager.connect(websocket, connectionId) - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "connected", - "connection_id": connection_id, + "connection_id": connectionId, "message": "Connected to text-to-speech" }, websocket) @@ -643,7 +643,7 @@ async def websocket_text_to_speech( # This would integrate with Google Cloud Text-to-Speech API # For now, send a placeholder response - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "audio_data", "audio": "base64_encoded_audio_here", # Placeholder "format": "mp3" @@ -651,19 +651,19 @@ async def websocket_text_to_speech( except Exception as e: logger.error(f"Error processing text-to-speech: {e}") - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "error", "error": f"Failed to process text: {str(e)}" }, websocket) elif message["type"] == "ping": - await manager.send_personal_message({ + await manager.sendPersonalMessage({ "type": "pong", "timestamp": message.get("timestamp") }, websocket) except WebSocketDisconnect: - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) except Exception as e: logger.error(f"WebSocket error: {e}") - manager.disconnect(websocket, connection_id) + manager.disconnect(websocket, connectionId) diff --git a/modules/security/jwtService.py b/modules/security/jwtService.py index 87e226c7..ab5a9392 100644 --- a/modules/security/jwtService.py +++ b/modules/security/jwtService.py @@ -9,7 +9,7 @@ from fastapi import Response from jose import jwt from modules.shared.configuration import APP_CONFIG -from modules.shared.timezoneUtils import get_utc_now +from modules.shared.timezoneUtils import getUtcNow # Config SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET") @@ -30,7 +30,7 @@ def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> T import uuid toEncode["jti"] = str(uuid.uuid4()) - expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)) + expire = getUtcNow() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)) toEncode.update({"exp": expire}) encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM) return encodedJwt, expire @@ -44,7 +44,7 @@ def createRefreshToken(data: dict) -> Tuple[str, "datetime"]: toEncode["jti"] = str(uuid.uuid4()) toEncode["type"] = "refresh" - expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS) + expire = getUtcNow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS) toEncode.update({"exp": expire}) encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM) return encodedJwt, expire diff --git a/modules/security/tokenManager.py b/modules/security/tokenManager.py index 07c5c2a9..42c4a7cf 100644 --- a/modules/security/tokenManager.py +++ b/modules/security/tokenManager.py @@ -10,7 +10,7 @@ from typing import Optional, Dict, Any, Callable from modules.datamodels.datamodelSecurity import Token from modules.datamodels.datamodelUam import AuthAuthority from modules.shared.configuration import APP_CONFIG -from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp, createExpirationTimestamp logger = logging.getLogger(__name__) @@ -27,54 +27,54 @@ class TokenManager: self.google_client_id = APP_CONFIG.get("Service_GOOGLE_CLIENT_ID") self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET") - def refresh_microsoft_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]: + def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]: """Refresh Microsoft OAuth token using refresh token""" try: - logger.debug(f"refresh_microsoft_token: Starting Microsoft token refresh for user {user_id}") - logger.debug(f"refresh_microsoft_token: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}") + logger.debug(f"refreshMicrosoftToken: Starting Microsoft token refresh for user {userId}") + logger.debug(f"refreshMicrosoftToken: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}") if not self.msft_client_id or not self.msft_client_secret: logger.error("Microsoft OAuth configuration not found") return None # Microsoft token refresh endpoint - token_url = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token" - logger.debug(f"refresh_microsoft_token: Using token URL: {token_url}") + tokenUrl = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token" + logger.debug(f"refreshMicrosoftToken: Using token URL: {tokenUrl}") # Prepare refresh request data = { "client_id": self.msft_client_id, "client_secret": self.msft_client_secret, "grant_type": "refresh_token", - "refresh_token": refresh_token, + "refresh_token": refreshToken, "scope": "Mail.ReadWrite Mail.Send Mail.ReadWrite.Shared User.Read" } - logger.debug(f"refresh_microsoft_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})") + logger.debug(f"refreshMicrosoftToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})") # Make refresh request with httpx.Client(timeout=30.0) as client: - logger.debug(f"refresh_microsoft_token: Making HTTP request to Microsoft OAuth endpoint") - response = client.post(token_url, data=data) - logger.debug(f"refresh_microsoft_token: HTTP response status: {response.status_code}") + logger.debug(f"refreshMicrosoftToken: Making HTTP request to Microsoft OAuth endpoint") + response = client.post(tokenUrl, data=data) + logger.debug(f"refreshMicrosoftToken: HTTP response status: {response.status_code}") if response.status_code == 200: - token_data = response.json() - logger.debug(f"refresh_microsoft_token: Token refresh successful, creating new token") + tokenData = response.json() + logger.debug(f"refreshMicrosoftToken: Token refresh successful, creating new token") # Create new token - new_token = Token( - userId=user_id, + newToken = Token( + userId=userId, authority=AuthAuthority.MSFT, - connectionId=old_token.connectionId, # Preserve connection ID - tokenAccess=token_data["access_token"], - tokenRefresh=token_data.get("refresh_token", refresh_token), # Keep old refresh token if new one not provided - tokenType=token_data.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)), - createdAt=get_utc_timestamp() + connectionId=oldToken.connectionId, # Preserve connection ID + tokenAccess=tokenData["access_token"], + tokenRefresh=tokenData.get("refresh_token", refreshToken), # Keep old refresh token if new one not provided + tokenType=tokenData.get("token_type", "bearer"), + expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)), + createdAt=getUtcTimestamp() ) - logger.debug(f"refresh_microsoft_token: New token created with ID: {new_token.id}") - return new_token + logger.debug(f"refreshMicrosoftToken: New token created with ID: {newToken.id}") + return newToken else: logger.error(f"Failed to refresh Microsoft token: {response.status_code} - {response.text}") return None @@ -83,70 +83,70 @@ class TokenManager: logger.error(f"Error refreshing Microsoft token: {str(e)}") return None - def refresh_google_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]: + def refreshGoogleToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]: """Refresh Google OAuth token using refresh token""" try: - logger.debug(f"refresh_google_token: Starting Google token refresh for user {user_id}") - logger.debug(f"refresh_google_token: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}") + logger.debug(f"refreshGoogleToken: Starting Google token refresh for user {userId}") + logger.debug(f"refreshGoogleToken: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}") if not self.google_client_id or not self.google_client_secret: logger.error("Google OAuth configuration not found") return None # Google token refresh endpoint - token_url = "https://oauth2.googleapis.com/token" - logger.debug(f"refresh_google_token: Using token URL: {token_url}") + tokenUrl = "https://oauth2.googleapis.com/token" + logger.debug(f"refreshGoogleToken: Using token URL: {tokenUrl}") # Prepare refresh request data = { "client_id": self.google_client_id, "client_secret": self.google_client_secret, "grant_type": "refresh_token", - "refresh_token": refresh_token + "refresh_token": refreshToken } - logger.debug(f"refresh_google_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})") + logger.debug(f"refreshGoogleToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})") # Make refresh request with httpx.Client(timeout=30.0) as client: - logger.debug(f"refresh_google_token: Making HTTP request to Google OAuth endpoint") - response = client.post(token_url, data=data) - logger.debug(f"refresh_google_token: HTTP response status: {response.status_code}") + logger.debug(f"refreshGoogleToken: Making HTTP request to Google OAuth endpoint") + response = client.post(tokenUrl, data=data) + logger.debug(f"refreshGoogleToken: HTTP response status: {response.status_code}") if response.status_code == 200: - token_data = response.json() - logger.debug(f"refresh_google_token: Token refresh successful, creating new token") + tokenData = response.json() + logger.debug(f"refreshGoogleToken: Token refresh successful, creating new token") # Validate the response contains required fields - if "access_token" not in token_data: + if "access_token" not in tokenData: logger.error("Google token refresh response missing access_token") return None # Create new token - new_token = Token( - userId=user_id, + newToken = Token( + userId=userId, authority=AuthAuthority.GOOGLE, - connectionId=old_token.connectionId, # Preserve connection ID - tokenAccess=token_data["access_token"], - tokenRefresh=token_data.get("refresh_token", refresh_token), # Use new refresh token if provided - tokenType=token_data.get("token_type", "bearer"), - expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)), - createdAt=get_utc_timestamp() + connectionId=oldToken.connectionId, # Preserve connection ID + tokenAccess=tokenData["access_token"], + tokenRefresh=tokenData.get("refresh_token", refreshToken), # Use new refresh token if provided + tokenType=tokenData.get("token_type", "bearer"), + expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)), + createdAt=getUtcTimestamp() ) - logger.debug(f"refresh_google_token: New token created with ID: {new_token.id}") - return new_token + logger.debug(f"refreshGoogleToken: New token created with ID: {newToken.id}") + return newToken else: - error_details = response.text - logger.error(f"Failed to refresh Google token: {response.status_code} - {error_details}") + errorDetails = response.text + logger.error(f"Failed to refresh Google token: {response.status_code} - {errorDetails}") # Handle specific error cases if response.status_code == 400: try: - error_data = response.json() - error_code = error_data.get("error") - if error_code == "invalid_grant": + errorData = response.json() + errorCode = errorData.get("error") + if errorCode == "invalid_grant": logger.warning("Google refresh token is invalid or expired - user needs to re-authenticate") - elif error_code == "invalid_client": + elif errorCode == "invalid_client": logger.error("Google OAuth client configuration is invalid") except: pass @@ -157,55 +157,55 @@ class TokenManager: logger.error(f"Error refreshing Google token: {str(e)}") return None - def refresh_token(self, old_token: Token) -> Optional[Token]: + def refreshToken(self, oldToken: Token) -> Optional[Token]: """Refresh an expired token using the appropriate OAuth service""" try: - logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}") - logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}") + logger.debug(f"refreshToken: Starting refresh for token {oldToken.id}, authority: {oldToken.authority}") + logger.debug(f"refreshToken: Token details: userId={oldToken.userId}, connectionId={oldToken.connectionId}, hasRefreshToken={bool(oldToken.tokenRefresh)}") # Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly # Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed try: - now_ts = get_utc_timestamp() - created_ts = float(old_token.createdAt) if old_token.createdAt is not None else 0.0 - seconds_since_last_refresh = now_ts - created_ts - if seconds_since_last_refresh < 10 * 60: + nowTs = getUtcTimestamp() + createdTs = float(oldToken.createdAt) if oldToken.createdAt is not None else 0.0 + secondsSinceLastRefresh = nowTs - createdTs + if secondsSinceLastRefresh < 10 * 60: logger.info( - f"refresh_token: Skipping refresh for connection {old_token.connectionId} due to cooldown. " - f"Last refresh {int(seconds_since_last_refresh)}s ago (< 600s)." + f"refreshToken: Skipping refresh for connection {oldToken.connectionId} due to cooldown. " + f"Last refresh {int(secondsSinceLastRefresh)}s ago (< 600s)." ) # Return the existing token to avoid caller errors while preventing provider rate limits - return old_token + return oldToken except Exception: # If any issue reading timestamps, proceed with normal refresh to be safe pass - if not old_token.tokenRefresh: - logger.warning(f"No refresh token available for {old_token.authority}") + if not oldToken.tokenRefresh: + logger.warning(f"No refresh token available for {oldToken.authority}") return None # Route to appropriate refresh method - if old_token.authority == AuthAuthority.MSFT: - logger.debug(f"refresh_token: Refreshing Microsoft token") - return self.refresh_microsoft_token(old_token.tokenRefresh, old_token.userId, old_token) - elif old_token.authority == AuthAuthority.GOOGLE: - logger.debug(f"refresh_token: Refreshing Google token") - return self.refresh_google_token(old_token.tokenRefresh, old_token.userId, old_token) + if oldToken.authority == AuthAuthority.MSFT: + logger.debug(f"refreshToken: Refreshing Microsoft token") + return self.refreshMicrosoftToken(oldToken.tokenRefresh, oldToken.userId, oldToken) + elif oldToken.authority == AuthAuthority.GOOGLE: + logger.debug(f"refreshToken: Refreshing Google token") + return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken) else: - logger.warning(f"Unknown authority for token refresh: {old_token.authority}") + logger.warning(f"Unknown authority for token refresh: {oldToken.authority}") return None except Exception as e: logger.error(f"Error refreshing token: {str(e)}") return None - def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]: + def ensureFreshToken(self, token: Token, *, secondsBeforeExpiry: int = 30 * 60, saveCallback: Optional[Callable[[Token], None]] = None) -> Optional[Token]: """Ensure a token is fresh; refresh if expiring within threshold. Args: token: Existing token to validate/refresh. - seconds_before_expiry: Threshold window to proactively refresh. - save_callback: Optional function to persist a refreshed token. + secondsBeforeExpiry: Threshold window to proactively refresh. + saveCallback: Optional function to persist a refreshed token. Returns: A fresh token (refreshed or original) or None if refresh failed. @@ -214,31 +214,31 @@ class TokenManager: if token is None: return None - now_ts = get_utc_timestamp() - expires_at = token.expiresAt or 0 + nowTs = getUtcTimestamp() + expiresAt = token.expiresAt or 0 # If token expires within the threshold, try to refresh - if expires_at and expires_at < (now_ts + seconds_before_expiry): + if expiresAt and expiresAt < (nowTs + secondsBeforeExpiry): logger.info( - f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon " - f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh." + f"ensureFreshToken: Token for connection {token.connectionId} expiring soon " + f"(in {max(0, expiresAt - nowTs)}s). Attempting proactive refresh." ) - refreshed = self.refresh_token(token) + refreshed = self.refreshToken(token) if refreshed: - if save_callback is not None: + if saveCallback is not None: try: - save_callback(refreshed) + saveCallback(refreshed) except Exception as e: - logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}") + logger.warning(f"ensureFreshToken: Failed to persist refreshed token: {e}") return refreshed else: - logger.warning("ensure_fresh_token: Token refresh failed") + logger.warning("ensureFreshToken: Token refresh failed") return None # Token is sufficiently fresh return token except Exception as e: - logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}") + logger.error(f"ensureFreshToken: Error ensuring fresh token: {e}") return None # Convenience wrapper to fetch and ensure fresh token for a connection via interface layer @@ -256,10 +256,10 @@ class TokenManager: token = interfaceDbApp.getConnectionToken(connectionId) if not token: return None - return self.ensure_fresh_token( + return self.ensureFreshToken( token, - seconds_before_expiry=secondsBeforeExpiry, - save_callback=lambda t: interfaceDbApp.saveConnectionToken(t) + secondsBeforeExpiry=secondsBeforeExpiry, + saveCallback=lambda t: interfaceDbApp.saveConnectionToken(t) ) except Exception as e: logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}") diff --git a/modules/security/tokenRefreshMiddleware.py b/modules/security/tokenRefreshMiddleware.py index 2ced531c..b7131a40 100644 --- a/modules/security/tokenRefreshMiddleware.py +++ b/modules/security/tokenRefreshMiddleware.py @@ -11,7 +11,7 @@ from starlette.middleware.base import BaseHTTPMiddleware from typing import Callable import asyncio from modules.security.tokenRefreshService import token_refresh_service -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp logger = logging.getLogger(__name__) @@ -135,7 +135,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware): try: # Perform proactive refresh in background asyncio.create_task(self._proactive_refresh_tokens(user_id)) - self.last_check[user_id] = get_utc_timestamp() + self.last_check[user_id] = getUtcTimestamp() except Exception as e: logger.warning(f"Error scheduling proactive refresh: {str(e)}") @@ -160,7 +160,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware): Check if we should perform proactive refresh for this user """ try: - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() last_check = self.last_check.get(user_id, 0) # Check every 5 minutes diff --git a/modules/security/tokenRefreshService.py b/modules/security/tokenRefreshService.py index ff7ac720..24a99e3b 100644 --- a/modules/security/tokenRefreshService.py +++ b/modules/security/tokenRefreshService.py @@ -9,7 +9,7 @@ to ensure users don't experience token expiration issues. import logging from typing import Dict, Any from modules.datamodels.datamodelUam import UserConnection, AuthAuthority -from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.timezoneUtils import getUtcTimestamp from modules.shared.auditLogger import audit_logger logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ class TokenRefreshService: def _is_rate_limited(self, connection_id: str) -> bool: """Check if connection is rate limited for refresh attempts""" - now = get_utc_timestamp() + now = getUtcTimestamp() if connection_id not in self.rate_limit_map: return False @@ -39,7 +39,7 @@ class TokenRefreshService: def _record_refresh_attempt(self, connection_id: str) -> None: """Record a refresh attempt for rate limiting""" - now = get_utc_timestamp() + now = getUtcTimestamp() if connection_id not in self.rate_limit_map: self.rate_limit_map[connection_id] = [] self.rate_limit_map[connection_id].append(now) @@ -60,14 +60,14 @@ class TokenRefreshService: token_manager = TokenManager() # Attempt to refresh the token - refreshed_token = token_manager.refresh_token(current_token) - if refreshed_token: + refreshedToken = token_manager.refreshToken(current_token) + if refreshedToken: # Save the refreshed token - interface.saveConnectionToken(refreshed_token) + interface.saveConnectionToken(refreshedToken) # Update connection status interface.db.recordModify(UserConnection, connection.id, { - "lastChecked": get_utc_timestamp(), + "lastChecked": getUtcTimestamp(), "expiresAt": refreshed_token.expiresAt }) @@ -75,9 +75,9 @@ class TokenRefreshService: # Log audit event try: - audit_logger.log_security_event( - user_id=str(connection.userId), - mandate_id="system", + audit_logger.logSecurityEvent( + userId=str(connection.userId), + mandateId="system", action="token_refresh", details=f"Google token refreshed for connection {connection.id}" ) @@ -109,14 +109,14 @@ class TokenRefreshService: token_manager = TokenManager() # Attempt to refresh the token - refreshed_token = token_manager.refresh_token(current_token) - if refreshed_token: + refreshedToken = token_manager.refreshToken(current_token) + if refreshedToken: # Save the refreshed token - interface.saveConnectionToken(refreshed_token) + interface.saveConnectionToken(refreshedToken) # Update connection status interface.db.recordModify(UserConnection, connection.id, { - "lastChecked": get_utc_timestamp(), + "lastChecked": getUtcTimestamp(), "expiresAt": refreshed_token.expiresAt }) @@ -124,9 +124,9 @@ class TokenRefreshService: # Log audit event try: - audit_logger.log_security_event( - user_id=str(connection.userId), - mandate_id="system", + audit_logger.logSecurityEvent( + userId=str(connection.userId), + mandateId="system", action="token_refresh", details=f"Microsoft token refreshed for connection {connection.id}" ) @@ -234,7 +234,7 @@ class TokenRefreshService: refreshed_count = 0 failed_count = 0 rate_limited_count = 0 - current_time = get_utc_timestamp() + current_time = getUtcTimestamp() five_minutes = 5 * 60 # 5 minutes in seconds # Process each connection diff --git a/modules/services/__init__.py b/modules/services/__init__.py index 0f269e28..87b13207 100644 --- a/modules/services/__init__.py +++ b/modules/services/__init__.py @@ -11,18 +11,18 @@ class PublicService: - Optional name_filter predicate for allow-list patterns """ - def __init__(self, target: Any, functions_only: bool = True, name_filter=None): + def __init__(self, target: Any, functionsOnly: bool = True, nameFilter=None): self._target = target - self._functions_only = functions_only - self._name_filter = name_filter + self._functionsOnly = functionsOnly + self._nameFilter = nameFilter def __getattr__(self, name: str): if name.startswith('_'): raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private") - if self._name_filter and not self._name_filter(name): + if self._nameFilter and not self._nameFilter(name): raise AttributeError(f"'{name}' not exposed by policy") attr = getattr(self._target, name) - if self._functions_only and not callable(attr): + if self._functionsOnly and not callable(attr): raise AttributeError(f"'{name}' is not a function") return attr @@ -30,8 +30,8 @@ class PublicService: names = [ n for n in dir(self._target) if not n.startswith('_') - and (not self._functions_only or callable(getattr(self._target, n, None))) - and (self._name_filter(n) if self._name_filter else True) + and (not self._functionsOnly or callable(getattr(self._target, n, None))) + and (self._nameFilter(n) if self._nameFilter else True) ] return sorted(names) @@ -70,7 +70,7 @@ class Services: self.sharepoint = PublicService(SharepointService(self)) from .serviceAi.mainServiceAi import AiService - self.ai = PublicService(AiService(self)) + self.ai = PublicService(AiService(self), functionsOnly=False) from .serviceTicket.mainServiceTicket import TicketService self.ticket = PublicService(TicketService(self)) diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index b1326967..87b56ceb 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -1,30 +1,26 @@ +import json import logging -from typing import Dict, Any, List, Optional, Union +import time +from typing import Dict, Any, List, Optional, Tuple, Union from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService -from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum +from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum from modules.interfaces.interfaceAiObjects import AiObjects -from modules.services.serviceAi.subCoreAi import SubCoreAi from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing -from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration -from modules.services.serviceAi.subSharedAiUtils import sanitizePromptContent - +from modules.shared.jsonUtils import ( + extractJsonString, + repairBrokenJson, + extractSectionsFromDocument, + buildContinuationContext +) logger = logging.getLogger(__name__) +# Rebuild the model to resolve forward references +AiCallRequest.model_rebuild() + class AiService: - """Lightweight AI service orchestrator that delegates to specialized sub-modules. - - Manager delegates to specialized sub-modules: - - SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls) - - SubDocumentProcessing: Document chunking, processing, and merging logic - - SubDocumentGeneration: Single-file and multi-file document generation - - The main service acts as a coordinator: - 1. Manages lazy initialization of sub-modules - 2. Delegates operations to appropriate sub-modules - 3. Maintains the same public API for backward compatibility - """ + """AI service with core operations integrated.""" def __init__(self, serviceCenter=None) -> None: """Initialize AI service with service center access. @@ -34,64 +30,638 @@ class AiService: """ self.services = serviceCenter # Only depend on interfaces - self.aiObjects = None # Will be initialized in create() - self._extractionService = None # Lazy initialization - self._coreAi = None # Lazy initialization - self._documentProcessor = None # Lazy initialization - self._documentGenerator = None # Lazy initialization + self.aiObjects = None # Will be initialized in create() or _ensureAiObjectsInitialized() + # Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready + self.extractionService = None + self.documentProcessor = None - @property - def extractionService(self): - """Lazy initialization of extraction service.""" - if self._extractionService is None: - logger.info("Lazy initializing ExtractionService...") - self._extractionService = ExtractionService(self.services) - return self._extractionService - - @property - def coreAi(self): - """Lazy initialization of core AI service.""" - if self._coreAi is None: - if self.aiObjects is None: - raise RuntimeError("AiService.aiObjects must be initialized before accessing coreAi. Use await AiService.create() or await service._ensureAiObjectsInitialized()") - logger.info("Lazy initializing SubCoreAi...") - self._coreAi = SubCoreAi(self.services, self.aiObjects) - return self._coreAi - - @property - def documentProcessor(self): - """Lazy initialization of document processing service.""" - if self._documentProcessor is None: - logger.info("Lazy initializing SubDocumentProcessing...") - self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects) - return self._documentProcessor - - - @property - def documentGenerator(self): - """Lazy initialization of document generation service.""" - if self._documentGenerator is None: - logger.info("Lazy initializing SubDocumentGeneration...") - self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor) - return self._documentGenerator + def _initializeSubmodules(self): + """Initialize all submodules after aiObjects is ready.""" + if self.aiObjects is None: + raise RuntimeError("aiObjects must be initialized before initializing submodules") + + if self.extractionService is None: + logger.info("Initializing ExtractionService...") + self.extractionService = ExtractionService(self.services) + + if self.documentProcessor is None: + logger.info("Initializing SubDocumentProcessing...") + self.documentProcessor = SubDocumentProcessing(self.services, self.aiObjects) async def _ensureAiObjectsInitialized(self): - """Ensure aiObjects is initialized.""" + """Ensure aiObjects is initialized and submodules are ready.""" if self.aiObjects is None: logger.info("Lazy initializing AiObjects...") self.aiObjects = await AiObjects.create() logger.info("AiObjects initialization completed") + # Initialize submodules after aiObjects is ready + self._initializeSubmodules() @classmethod async def create(cls, serviceCenter=None) -> "AiService": - """Create AiService instance with all connectors initialized.""" + """Create AiService instance with all connectors and submodules initialized.""" logger.info("AiService.create() called") instance = cls(serviceCenter) logger.info("AiService created, about to call AiObjects.create()...") instance.aiObjects = await AiObjects.create() logger.info("AiObjects.create() completed") + # Initialize all submodules after aiObjects is ready + instance._initializeSubmodules() + logger.info("AiService submodules initialized") return instance + # Helper methods + + def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str: + """ + Build full prompt by replacing placeholders with their content. + Uses the new {{KEY:placeholder}} format. + + Args: + prompt: The base prompt template + placeholders: Dictionary of placeholder key-value pairs + + Returns: + Prompt with placeholders replaced + """ + if not placeholders: + return prompt + + full_prompt = prompt + for placeholder, content in placeholders.items(): + # Skip if content is None or empty + if content is None: + continue + # Replace {{KEY:placeholder}} + full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content)) + + return full_prompt + + async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions: + """Analyze prompt to determine appropriate AiCallOptions parameters.""" + try: + # Get dynamic enum values from Pydantic models + operationTypes = [e.value for e in OperationTypeEnum] + priorities = [e.value for e in PriorityEnum] + processingModes = [e.value for e in ProcessingModeEnum] + + # Create analysis prompt for AI to determine operation type and parameters + analysisPrompt = f""" +You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters. + +PROMPT TO ANALYZE: +{self.services.utils.sanitizePromptContent(prompt, 'userinput')} + +Based on the prompt content, determine: +1. operationType: Choose the most appropriate from: {', '.join(operationTypes)} +2. priority: Choose from: {', '.join(priorities)} +3. processingMode: Choose from: {', '.join(processingModes)} +4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas) +5. compressContext: true/false (true to summarize context, false to process fully) + +Respond with ONLY a JSON object in this exact format: +{{ + "operationType": "dataAnalyse", + "priority": "balanced", + "processingMode": "basic", + "compressPrompt": true, + "compressContext": true +}} +""" + + # Use AI to analyze the prompt + request = AiCallRequest( + prompt=analysisPrompt, + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.SPEED, + processingMode=ProcessingModeEnum.BASIC, + compressPrompt=True, + compressContext=False + ) + ) + + response = await self.aiObjects.call(request) + + # Parse AI response + try: + jsonStart = response.content.find('{') + jsonEnd = response.content.rfind('}') + 1 + if jsonStart != -1 and jsonEnd > jsonStart: + analysis = json.loads(response.content[jsonStart:jsonEnd]) + + # Map string values to enums + operationType = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse')) + priority = PriorityEnum(analysis.get('priority', 'balanced')) + processingMode = ProcessingModeEnum(analysis.get('processingMode', 'basic')) + + return AiCallOptions( + operationType=operationType, + priority=priority, + processingMode=processingMode, + compressPrompt=analysis.get('compressPrompt', True), + compressContext=analysis.get('compressContext', True) + ) + except Exception as e: + logger.warning(f"Failed to parse AI analysis response: {e}") + + except Exception as e: + logger.warning(f"Prompt analysis failed: {e}") + + # Fallback to default options + return AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.BASIC + ) + + async def _callAiWithLooping( + self, + prompt: str, + options: AiCallOptions, + debugPrefix: str = "ai_call", + promptBuilder: Optional[callable] = None, + promptArgs: Optional[Dict[str, Any]] = None, + operationId: Optional[str] = None + ) -> str: + """ + Shared core function for AI calls with repair-based looping system. + Automatically repairs broken JSON and continues generation seamlessly. + + Args: + prompt: The prompt to send to AI + options: AI call configuration options + debugPrefix: Prefix for debug file names + promptBuilder: Optional function to rebuild prompts for continuation + promptArgs: Optional arguments for prompt builder + operationId: Optional operation ID for progress tracking + + Returns: + Complete AI response after all iterations + """ + maxIterations = 50 # Prevent infinite loops + iteration = 0 + allSections = [] # Accumulate all sections across iterations + lastRawResponse = None # Store last raw JSON response for continuation + + while iteration < maxIterations: + iteration += 1 + + # Update progress for iteration start + if operationId: + if iteration == 1: + self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}") + else: + # For continuation iterations, show progress incrementally + baseProgress = 0.5 + (min(iteration - 1, maxIterations) / maxIterations * 0.4) # Progress from 0.5 to 0.9 over maxIterations iterations + self.services.workflow.progressLogUpdate(operationId, baseProgress, f"Continuing generation (iteration {iteration})") + + # Build iteration prompt + if len(allSections) > 0 and promptBuilder and promptArgs: + # This is a continuation - build continuation context with raw JSON and rebuild prompt + continuationContext = buildContinuationContext(allSections, lastRawResponse) + if not lastRawResponse: + logger.warning(f"Iteration {iteration}: No previous response available for continuation!") + + # Rebuild prompt with continuation context using the provided prompt builder + iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext) + else: + # First iteration - use original prompt + iterationPrompt = prompt + + # Make AI call + try: + if operationId and iteration == 1: + self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model") + request = AiCallRequest( + prompt=iterationPrompt, + context="", + options=options + ) + + # Write the ACTUAL prompt sent to AI + if iteration == 1: + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt") + else: + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") + + response = await self.aiObjects.call(request) + result = response.content + + # Update progress after AI call + if operationId: + if iteration == 1: + self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})") + else: + progress = 0.6 + (min(iteration - 1, 10) * 0.03) + self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})") + + # Write raw AI response to debug file + if iteration == 1: + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") + else: + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") + + # Emit stats for this iteration + self.services.workflow.storeWorkflowStat( + self.services.currentWorkflow, + response, + f"ai.call.{debugPrefix}.iteration_{iteration}" + ) + + if not result or not result.strip(): + logger.warning(f"Iteration {iteration}: Empty response, stopping") + break + + # Store raw response for continuation (even if broken) + lastRawResponse = result + + # Check for complete_response flag in raw response (before parsing) + import re + if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE): + pass # Flag detected, will stop in _shouldContinueGeneration + + # Extract sections from response (handles both valid and broken JSON) + extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix) + + # Update progress after parsing + if operationId: + if extractedSections: + self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})") + + if not extractedSections: + # If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry + if iteration > 1 and not wasJsonComplete: + logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt") + continue + # Otherwise, stop if no sections + logger.warning(f"Iteration {iteration}: No sections extracted, stopping") + break + + # Add new sections to accumulator + allSections.extend(extractedSections) + + # Check if we should continue (completion detection) + if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result): + continue + else: + # Done - build final result + if operationId: + self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)") + break + + except Exception as e: + logger.error(f"Error in AI call iteration {iteration}: {str(e)}") + break + + if iteration >= maxIterations: + logger.warning(f"AI call stopped after maximum iterations ({maxIterations})") + + # Build final result from accumulated sections + final_result = self._buildFinalResultFromSections(allSections) + + # Write final result to debug file + self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result") + + return final_result + + def _extractSectionsFromResponse( + self, + result: str, + iteration: int, + debugPrefix: str + ) -> Tuple[List[Dict[str, Any]], bool]: + """ + Extract sections from AI response, handling both valid and broken JSON. + Uses repair mechanism for broken JSON. + Checks for "complete_response": true flag to determine completion. + Returns (sections, wasJsonComplete) + """ + # First, try to parse as valid JSON + try: + extracted = extractJsonString(result) + parsed_result = json.loads(extracted) + + # Check if AI marked response as complete + isComplete = parsed_result.get("complete_response", False) == True + + # Extract sections from parsed JSON + sections = extractSectionsFromDocument(parsed_result) + + # If AI marked as complete, always return as complete + if isComplete: + return sections, True + + # If in continuation mode (iteration > 1), continuation responses are expected to be fragments + # A fragment with 0 extractable sections means JSON is incomplete - need another iteration + if len(sections) == 0 and iteration > 1: + return sections, False # Mark as incomplete so loop continues + + # First iteration with 0 sections means empty response - stop + if len(sections) == 0: + return sections, True # Complete but empty + + return sections, True # JSON was complete with sections + + except json.JSONDecodeError as e: + # Broken JSON - try repair mechanism (normal in iterative generation) + self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}") + + # Try to repair + repaired_json = repairBrokenJson(result) + + if repaired_json: + # Extract sections from repaired JSON + sections = extractSectionsFromDocument(repaired_json) + return sections, False # JSON was broken but repaired + else: + # Repair failed - log error + logger.error(f"Iteration {iteration}: All repair strategies failed") + return [], False + + except Exception as e: + logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}") + return [], False + + def _shouldContinueGeneration( + self, + allSections: List[Dict[str, Any]], + iteration: int, + wasJsonComplete: bool, + rawResponse: str = None + ) -> bool: + """ + Determine if generation should continue based on JSON completeness and complete_response flag. + Returns True if we should continue, False if done. + """ + if len(allSections) == 0: + return True # No sections yet, continue + + # Check for complete_response flag in raw response + if rawResponse: + import re + if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE): + return False + + # If JSON was complete (and no complete_response flag), we're done + # If JSON was broken and repaired, continue to get more content + if wasJsonComplete: + return False + else: + return True + + def _buildFinalResultFromSections( + self, + allSections: List[Dict[str, Any]] + ) -> str: + """ + Build final JSON result from accumulated sections. + """ + if not allSections: + return "" + + # Build documents structure + # Assuming single document for now + documents = [{ + "id": "doc_1", + "title": "Generated Document", # This should come from prompt + "filename": "document.json", + "sections": allSections + }] + + result = { + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }, + "documents": documents + } + + return json.dumps(result, indent=2) + + # Public API Methods + + # Planning AI Call + async def callAiPlanning( + self, + prompt: str, + placeholders: Optional[List[PromptPlaceholder]] = None + ) -> str: + """ + Planning AI call for task planning, action planning, action selection, etc. + Always uses static parameters optimized for planning tasks. + + Args: + prompt: The planning prompt + placeholders: Optional list of placeholder replacements + + Returns: + Planning JSON response + """ + await self._ensureAiObjectsInitialized() + + # Planning calls always use static parameters + options = AiCallOptions( + operationType=OperationTypeEnum.PLAN, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + compressPrompt=False, + compressContext=False + ) + + # Build full prompt with placeholders + if placeholders: + placeholdersDict = {p.label: p.content for p in placeholders} + fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict) + else: + fullPrompt = prompt + + # Root-cause fix: planning must return raw single-shot JSON, not section-based output + request = AiCallRequest( + prompt=fullPrompt, + context="", + options=options + ) + + # Debug: persist prompt/response for analysis + self.services.utils.writeDebugFile(fullPrompt, "plan_prompt") + response = await self.aiObjects.call(request) + result = response.content or "" + self.services.utils.writeDebugFile(result, "plan_response") + return result + + # Document Generation AI Call + async def callAiDocuments( + self, + prompt: str, + documents: Optional[List[ChatDocument]] = None, + options: Optional[AiCallOptions] = None, + outputFormat: Optional[str] = None, + title: Optional[str] = None + ) -> Union[str, Dict[str, Any]]: + """ + Document generation AI call for all non-planning calls. + Uses the current unified path with extraction and generation. + + Args: + prompt: The main prompt for the AI call + documents: Optional list of documents to process + options: AI call configuration options + outputFormat: Optional output format for document generation + title: Optional title for generated documents + + Returns: + AI response as string, or dict with documents if outputFormat is specified + """ + await self._ensureAiObjectsInitialized() + + # Create separate operationId for detailed progress tracking + workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}" + aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}" + + # Start progress tracking for this operation + self.services.workflow.progressLogStart( + aiOperationId, + "AI call with documents", + "Document Generation", + f"Format: {outputFormat or 'text'}" + ) + + try: + # Ensure AI connectors are initialized before delegating to documentProcessor/generator + if hasattr(self.services, 'ai') and hasattr(self.services.ai, '_ensureAiObjectsInitialized'): + await self.services.ai._ensureAiObjectsInitialized() + if options is None or (hasattr(options, 'operationType') and options.operationType is None): + # Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None) + self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters") + options = await self._analyzePromptAndCreateOptions(prompt) + + # Route image-generation requests directly to image pipeline to avoid JSON loop + imgFormats = {"png", "jpg", "jpeg", "webp", "image", "base64"} + opType = getattr(options, "operationType", None) + fmt = (outputFormat or "").lower() if outputFormat else None + isImageRequest = (opType == OperationTypeEnum.IMAGE_GENERATE) or (fmt in imgFormats) + if isImageRequest: + self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") + imageResponse = await self.generateImage(prompt, options=options) + self.services.workflow.progressLogUpdate(aiOperationId, 0.9, "Image generated") + self.services.workflow.progressLogFinish(aiOperationId, True) + return imageResponse + + # CRITICAL: For document generation with JSON templates, NEVER compress the prompt + # Compressing would truncate the template structure and confuse the AI + if outputFormat: # Document generation with structured output + if not options: + options = AiCallOptions() + options.compressPrompt = False # JSON templates must NOT be truncated + options.compressContext = False # Context also should not be compressed + + # Handle document generation with specific output format using unified approach + if outputFormat: + # Use unified generation method for all document generation + if documents and len(documents) > 0: + self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents") + extracted_content = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId) + else: + self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation") + extracted_content = None + + self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") + from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt + # First call without continuation context + generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None) + + # Prepare prompt builder arguments for continuation + promptArgs = { + "outputFormat": outputFormat, + "userPrompt": prompt, + "title": title, + "extracted_content": extracted_content + } + + self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") + generated_json = await self._callAiWithLooping( + generation_prompt, + options, + "document_generation", + buildGenerationPrompt, + promptArgs, + aiOperationId + ) + + self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON") + # Parse the generated JSON (extract fenced/embedded JSON first) + try: + extracted_json = self.services.utils.jsonExtractString(generated_json) + generated_data = json.loads(extracted_json) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse generated JSON: {str(e)}") + logger.error(f"JSON content length: {len(generated_json)}") + logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}") + logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}") + + # Write the problematic JSON to debug file + self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") + + self.services.workflow.progressLogFinish(aiOperationId, False) + return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"} + + self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format") + # Render to final format using the existing renderer + try: + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + generationService = GenerationService(self.services) + rendered_content, mime_type = await generationService.renderReport( + generated_data, outputFormat, title or "Generated Document", prompt, self + ) + + # Build result in the expected format + result = { + "success": True, + "content": generated_data, + "documents": [{ + "documentName": f"generated.{outputFormat}", + "documentData": rendered_content, + "mimeType": mime_type, + "title": title or "Generated Document" + }], + "is_multi_file": False, + "format": outputFormat, + "title": title, + "split_strategy": "single", + "total_documents": 1, + "processed_documents": 1 + } + + # Log AI response for debugging + self.services.utils.writeDebugFile(str(result), "document_generation_response", documents) + + self.services.workflow.progressLogFinish(aiOperationId, True) + return result + + except Exception as e: + logger.error(f"Error rendering document: {str(e)}") + self.services.workflow.progressLogFinish(aiOperationId, False) + return {"success": False, "error": f"Rendering failed: {str(e)}"} + + # Handle text calls (no output format specified) + self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call") + if documents: + # Use document processing for text calls with documents + result = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId) + else: + # Use shared core function for direct text calls + result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId) + + self.services.workflow.progressLogFinish(aiOperationId, True) + return result + + except Exception as e: + logger.error(f"Error in callAiDocuments: {str(e)}") + self.services.workflow.progressLogFinish(aiOperationId, False) + raise + # AI Image Analysis async def readImage( self, @@ -102,7 +672,64 @@ class AiService: ) -> str: """Call AI for image analysis using interface.call() with contentParts.""" await self._ensureAiObjectsInitialized() - return await self.coreAi.readImage(prompt, imageData, mimeType, options) + + try: + # Check if imageData is valid + if not imageData: + error_msg = "No image data provided" + logger.error(f"Error in AI image analysis: {error_msg}") + return f"Error: {error_msg}" + + + # Always use IMAGE_ANALYSE operation type for image processing + if options is None: + options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE) + else: + # Override the operation type to ensure image analysis + options.operationType = OperationTypeEnum.IMAGE_ANALYSE + + # Create content parts with image data + from modules.datamodels.datamodelExtraction import ContentPart + import base64 + + # ContentPart.data must be a string - convert bytes to base64 if needed + if isinstance(imageData, bytes): + imageDataStr = base64.b64encode(imageData).decode('utf-8') + else: + # Already a base64 string + imageDataStr = imageData + + imagePart = ContentPart( + id="image_0", + parentId=None, + label="Image", + typeGroup="image", + mimeType=mimeType or "image/jpeg", + data=imageDataStr, # Must be a string (base64 encoded) + metadata={"imageAnalysis": True} + ) + + # Create request with content parts + request = AiCallRequest( + prompt=prompt, + context="", + options=options, + contentParts=[imagePart] + ) + + response = await self.aiObjects.call(request) + result = response.content + + # Check if result is valid + if not result or (isinstance(result, str) and not result.strip()): + error_msg = f"No response from AI image analysis (result: {repr(result)})" + logger.error(f"Error in AI image analysis: {error_msg}") + return f"Error: {error_msg}" + + return result + except Exception as e: + logger.error(f"Error in AI image analysis: {str(e)}") + return f"Error: {str(e)}" # AI Image Generation async def generateImage( @@ -115,34 +742,19 @@ class AiService: ) -> Dict[str, Any]: """Generate an image using AI using interface.generateImage().""" await self._ensureAiObjectsInitialized() - return await self.coreAi.generateImage(prompt, size, quality, style, options) - - - # Core AI Methods - Delegating to SubCoreAi - async def callAiPlanning( - self, - prompt: str, - placeholders: Optional[List[PromptPlaceholder]] = None - ) -> str: - """Planning AI call for task planning, action planning, action selection, etc.""" - await self._ensureAiObjectsInitialized() - # Always use "json" for planning calls since they return JSON - return await self.coreAi.callAiPlanning(prompt, placeholders) - - async def callAiDocuments( - self, - prompt: str, - documents: Optional[List[ChatDocument]] = None, - options: Optional[AiCallOptions] = None, - outputFormat: Optional[str] = None, - title: Optional[str] = None - ) -> Union[str, Dict[str, Any]]: - """Document generation AI call for all non-planning calls.""" - await self._ensureAiObjectsInitialized() - return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title) - - def sanitizePromptContent(self, content: str, contentType: str = "text") -> str: - """Sanitize prompt content to prevent injection attacks and ensure safe presentation.""" - return sanitizePromptContent(content, contentType) - + + try: + response = await self.aiObjects.generateImage(prompt, size, quality, style, options) + + # Emit stats for image generation + self.services.workflow.storeWorkflowStat( + self.services.currentWorkflow, + response, + f"ai.generate.image" + ) + + return response + except Exception as e: + logger.error(f"Error in AI image generation: {str(e)}") + return {"success": False, "error": str(e)} diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py deleted file mode 100644 index e35af0d0..00000000 --- a/modules/services/serviceAi/subCoreAi.py +++ /dev/null @@ -1,687 +0,0 @@ -import json -import logging -from typing import Dict, Any, List, Optional, Tuple, Union -from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument -from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum -from modules.services.serviceAi.subSharedAiUtils import ( - buildPromptWithPlaceholders, - extractTextFromContentParts, - reduceText, - determineCallType -) -from modules.shared.jsonUtils import ( - extractJsonString, - repairBrokenJson, - extractSectionsFromDocument, - buildContinuationContext -) - -logger = logging.getLogger(__name__) - -# Repair-based looping system - no longer needs LOOP_INSTRUCTION_TEXT -# Sections are accumulated and repair mechanism handles broken JSON automatically - -# Rebuild the model to resolve forward references -AiCallRequest.model_rebuild() - - -class SubCoreAi: - """Core AI operations including image analysis, text generation, and planning calls.""" - - def __init__(self, services, aiObjects): - """Initialize core AI operations. - - Args: - services: Service center instance for accessing other services - aiObjects: Initialized AiObjects instance - """ - self.services = services - self.aiObjects = aiObjects - - async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions: - """Analyze prompt to determine appropriate AiCallOptions parameters.""" - try: - # Get dynamic enum values from Pydantic models - operation_types = [e.value for e in OperationTypeEnum] - priorities = [e.value for e in PriorityEnum] - processing_modes = [e.value for e in ProcessingModeEnum] - - # Create analysis prompt for AI to determine operation type and parameters - analysisPrompt = f""" -You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters. - -PROMPT TO ANALYZE: -{self.services.ai.sanitizePromptContent(prompt, 'userinput')} - -Based on the prompt content, determine: -1. operationType: Choose the most appropriate from: {', '.join(operation_types)} -2. priority: Choose from: {', '.join(priorities)} -3. processingMode: Choose from: {', '.join(processing_modes)} -4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas) -5. compressContext: true/false (true to summarize context, false to process fully) - -Respond with ONLY a JSON object in this exact format: -{{ - "operationType": "dataAnalyse", - "priority": "balanced", - "processingMode": "basic", - "compressPrompt": true, - "compressContext": true -}} -""" - - # Use AI to analyze the prompt - request = AiCallRequest( - prompt=analysisPrompt, - options=AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, - priority=PriorityEnum.SPEED, - processingMode=ProcessingModeEnum.BASIC, - compressPrompt=True, - compressContext=False - ) - ) - - response = await self.aiObjects.call(request) - - # Parse AI response - try: - import json - json_start = response.content.find('{') - json_end = response.content.rfind('}') + 1 - if json_start != -1 and json_end > json_start: - analysis = json.loads(response.content[json_start:json_end]) - - # Map string values to enums - operation_type = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse')) - priority = PriorityEnum(analysis.get('priority', 'balanced')) - processing_mode = ProcessingModeEnum(analysis.get('processingMode', 'basic')) - - return AiCallOptions( - operationType=operation_type, - priority=priority, - processingMode=processing_mode, - compressPrompt=analysis.get('compressPrompt', True), - compressContext=analysis.get('compressContext', True) - ) - except Exception as e: - logger.warning(f"Failed to parse AI analysis response: {e}") - - except Exception as e: - logger.warning(f"Prompt analysis failed: {e}") - - # Fallback to default options - return AiCallOptions( - operationType=OperationTypeEnum.DATA_ANALYSE, - priority=PriorityEnum.BALANCED, - processingMode=ProcessingModeEnum.BASIC - ) - - - - # Shared Core Function for AI Calls with Looping and Repair - async def _callAiWithLooping( - self, - prompt: str, - options: AiCallOptions, - debugPrefix: str = "ai_call", - promptBuilder: Optional[callable] = None, - promptArgs: Optional[Dict[str, Any]] = None, - operationId: Optional[str] = None - ) -> str: - """ - Shared core function for AI calls with repair-based looping system. - Automatically repairs broken JSON and continues generation seamlessly. - - Args: - prompt: The prompt to send to AI - options: AI call configuration options - debugPrefix: Prefix for debug file names - promptBuilder: Optional function to rebuild prompts for continuation - promptArgs: Optional arguments for prompt builder - operationId: Optional operation ID for progress tracking - - Returns: - Complete AI response after all iterations - """ - max_iterations = 50 # Prevent infinite loops - iteration = 0 - allSections = [] # Accumulate all sections across iterations - lastRawResponse = None # Store last raw JSON response for continuation - - while iteration < max_iterations: - iteration += 1 - - # Update progress for iteration start - if operationId: - if iteration == 1: - self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}") - else: - # For continuation iterations, show progress incrementally - base_progress = 0.5 + (min(iteration - 1, max_iterations) / max_iterations * 0.4) # Progress from 0.5 to 0.9 over max_iterations iterations - self.services.workflow.progressLogUpdate(operationId, base_progress, f"Continuing generation (iteration {iteration})") - - # Build iteration prompt - if len(allSections) > 0 and promptBuilder and promptArgs: - # This is a continuation - build continuation context with raw JSON and rebuild prompt - continuationContext = buildContinuationContext(allSections, lastRawResponse) - if not lastRawResponse: - logger.warning(f"Iteration {iteration}: No previous response available for continuation!") - - # Rebuild prompt with continuation context using the provided prompt builder - iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext) - else: - # First iteration - use original prompt - iterationPrompt = prompt - - # Make AI call - try: - if operationId and iteration == 1: - self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model") - from modules.datamodels.datamodelAi import AiCallRequest - request = AiCallRequest( - prompt=iterationPrompt, - context="", - options=options - ) - - # Write the ACTUAL prompt sent to AI - if iteration == 1: - self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt") - else: - self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") - - response = await self.aiObjects.call(request) - result = response.content - - # Update progress after AI call - if operationId: - if iteration == 1: - self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})") - else: - progress = 0.6 + (min(iteration - 1, 10) * 0.03) - self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})") - - # Write raw AI response to debug file - if iteration == 1: - self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") - else: - self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") - - # Emit stats for this iteration - self.services.workflow.storeWorkflowStat( - self.services.currentWorkflow, - response, - f"ai.call.{debugPrefix}.iteration_{iteration}" - ) - - if not result or not result.strip(): - logger.warning(f"Iteration {iteration}: Empty response, stopping") - break - - # Store raw response for continuation (even if broken) - lastRawResponse = result - - # Check for complete_response flag in raw response (before parsing) - import re - if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE): - pass # Flag detected, will stop in _shouldContinueGeneration - - # Extract sections from response (handles both valid and broken JSON) - extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix) - - # Update progress after parsing - if operationId: - if extractedSections: - self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})") - - if not extractedSections: - # If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry - if iteration > 1 and not wasJsonComplete: - logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt") - continue - # Otherwise, stop if no sections - logger.warning(f"Iteration {iteration}: No sections extracted, stopping") - break - - # Add new sections to accumulator - allSections.extend(extractedSections) - - # Check if we should continue (completion detection) - if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result): - continue - else: - # Done - build final result - if operationId: - self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)") - break - - except Exception as e: - logger.error(f"Error in AI call iteration {iteration}: {str(e)}") - break - - if iteration >= max_iterations: - logger.warning(f"AI call stopped after maximum iterations ({max_iterations})") - - # Build final result from accumulated sections - final_result = self._buildFinalResultFromSections(allSections) - - # Write final result to debug file - self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result") - - return final_result - - def _extractSectionsFromResponse( - self, - result: str, - iteration: int, - debugPrefix: str - ) -> Tuple[List[Dict[str, Any]], bool]: - """ - Extract sections from AI response, handling both valid and broken JSON. - Uses repair mechanism for broken JSON. - Checks for "complete_response": true flag to determine completion. - Returns (sections, wasJsonComplete) - """ - # First, try to parse as valid JSON - try: - extracted = extractJsonString(result) - parsed_result = json.loads(extracted) - - # Check if AI marked response as complete - isComplete = parsed_result.get("complete_response", False) == True - - # Extract sections from parsed JSON - sections = extractSectionsFromDocument(parsed_result) - - # If AI marked as complete, always return as complete - if isComplete: - return sections, True - - # If in continuation mode (iteration > 1), continuation responses are expected to be fragments - # A fragment with 0 extractable sections means JSON is incomplete - need another iteration - if len(sections) == 0 and iteration > 1: - return sections, False # Mark as incomplete so loop continues - - # First iteration with 0 sections means empty response - stop - if len(sections) == 0: - return sections, True # Complete but empty - - return sections, True # JSON was complete with sections - - except json.JSONDecodeError as e: - # Broken JSON - try repair mechanism (normal in iterative generation) - self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}") - - # Try to repair - repaired_json = repairBrokenJson(result) - - if repaired_json: - # Extract sections from repaired JSON - sections = extractSectionsFromDocument(repaired_json) - return sections, False # JSON was broken but repaired - else: - # Repair failed - log error - logger.error(f"Iteration {iteration}: All repair strategies failed") - return [], False - - except Exception as e: - logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}") - return [], False - - def _shouldContinueGeneration( - self, - allSections: List[Dict[str, Any]], - iteration: int, - wasJsonComplete: bool, - rawResponse: str = None - ) -> bool: - """ - Determine if generation should continue based on JSON completeness and complete_response flag. - Returns True if we should continue, False if done. - """ - if len(allSections) == 0: - return True # No sections yet, continue - - # Check for complete_response flag in raw response - if rawResponse: - import re - if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE): - return False - - # If JSON was complete (and no complete_response flag), we're done - # If JSON was broken and repaired, continue to get more content - if wasJsonComplete: - return False - else: - return True - - def _buildFinalResultFromSections( - self, - allSections: List[Dict[str, Any]] - ) -> str: - """ - Build final JSON result from accumulated sections. - """ - if not allSections: - return "" - - # Build documents structure - # Assuming single document for now - documents = [{ - "id": "doc_1", - "title": "Generated Document", # This should come from prompt - "filename": "document.json", - "sections": allSections - }] - - result = { - "metadata": { - "split_strategy": "single_document", - "source_documents": [], - "extraction_method": "ai_generation" - }, - "documents": documents - } - - return json.dumps(result, indent=2) - - # Old _buildContinuationPrompt and _mergeJsonContent methods removed - # Now handled by repair mechanism in jsonUtils.py and section accumulation - - - # Planning AI Call - async def callAiPlanning( - self, - prompt: str, - placeholders: Optional[List[PromptPlaceholder]] = None - ) -> str: - """ - Planning AI call for task planning, action planning, action selection, etc. - Always uses static parameters optimized for planning tasks. - - Args: - prompt: The planning prompt - placeholders: Optional list of placeholder replacements - - Returns: - Planning JSON response - """ - # Planning calls always use static parameters - options = AiCallOptions( - operationType=OperationTypeEnum.PLAN, - priority=PriorityEnum.QUALITY, - processingMode=ProcessingModeEnum.DETAILED, - compressPrompt=False, - compressContext=False - ) - - # Build full prompt with placeholders - if placeholders: - placeholders_dict = {p.label: p.content for p in placeholders} - full_prompt = buildPromptWithPlaceholders(prompt, placeholders_dict) - else: - full_prompt = prompt - - # Use shared core function with planning-specific debug prefix - return await self._callAiWithLooping(full_prompt, options, "plan") - - # Document Generation AI Call - async def callAiDocuments( - self, - prompt: str, - documents: Optional[List[ChatDocument]] = None, - options: Optional[AiCallOptions] = None, - outputFormat: Optional[str] = None, - title: Optional[str] = None - ) -> Union[str, Dict[str, Any]]: - """ - Document generation AI call for all non-planning calls. - Uses the current unified path with extraction and generation. - - Args: - prompt: The main prompt for the AI call - documents: Optional list of documents to process - options: AI call configuration options - outputFormat: Optional output format for document generation - title: Optional title for generated documents - - Returns: - AI response as string, or dict with documents if outputFormat is specified - """ - # Create separate operationId for detailed progress tracking - import time - import uuid - workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}" - aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}" - - # Start progress tracking for this operation - self.services.workflow.progressLogStart( - aiOperationId, - "AI call with documents", - "Document Generation", - f"Format: {outputFormat or 'text'}" - ) - - try: - if options is None or (hasattr(options, 'operationType') and options.operationType is None): - # Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None) - self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters") - options = await self._analyzePromptAndCreateOptions(prompt) - - # CRITICAL: For document generation with JSON templates, NEVER compress the prompt - # Compressing would truncate the template structure and confuse the AI - if outputFormat: # Document generation with structured output - if not options: - options = AiCallOptions() - options.compressPrompt = False # JSON templates must NOT be truncated - options.compressContext = False # Context also should not be compressed - - # Handle document generation with specific output format using unified approach - if outputFormat: - # Use unified generation method for all document generation - if documents and len(documents) > 0: - self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents") - extracted_content = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId) - else: - self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation") - extracted_content = None - - self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt - # First call without continuation context - generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None) - - # Prepare prompt builder arguments for continuation - promptArgs = { - "outputFormat": outputFormat, - "userPrompt": prompt, - "title": title, - "extracted_content": extracted_content - } - - self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") - generated_json = await self._callAiWithLooping( - generation_prompt, - options, - "document_generation", - buildGenerationPrompt, - promptArgs, - aiOperationId - ) - - self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON") - # Parse the generated JSON (extract fenced/embedded JSON first) - try: - extracted_json = self.services.utils.jsonExtractString(generated_json) - generated_data = json.loads(extracted_json) - except json.JSONDecodeError as e: - logger.error(f"Failed to parse generated JSON: {str(e)}") - logger.error(f"JSON content length: {len(generated_json)}") - logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}") - logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}") - - # Write the problematic JSON to debug file - self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") - - self.services.workflow.progressLogFinish(aiOperationId, False) - return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"} - - self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format") - # Render to final format using the existing renderer - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - rendered_content, mime_type = await generationService.renderReport( - generated_data, outputFormat, title or "Generated Document", prompt, self - ) - - # Build result in the expected format - result = { - "success": True, - "content": generated_data, - "documents": [{ - "documentName": f"generated.{outputFormat}", - "documentData": rendered_content, - "mimeType": mime_type, - "title": title or "Generated Document" - }], - "is_multi_file": False, - "format": outputFormat, - "title": title, - "split_strategy": "single", - "total_documents": 1, - "processed_documents": 1 - } - - # Log AI response for debugging - self.services.utils.writeDebugFile(str(result), "document_generation_response", documents) - - self.services.workflow.progressLogFinish(aiOperationId, True) - return result - - except Exception as e: - logger.error(f"Error rendering document: {str(e)}") - self.services.workflow.progressLogFinish(aiOperationId, False) - return {"success": False, "error": f"Rendering failed: {str(e)}"} - - # Handle text calls (no output format specified) - self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call") - if documents: - # Use document processing for text calls with documents - result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId) - else: - # Use shared core function for direct text calls - result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId) - - self.services.workflow.progressLogFinish(aiOperationId, True) - return result - - except Exception as e: - logger.error(f"Error in callAiDocuments: {str(e)}") - self.services.workflow.progressLogFinish(aiOperationId, False) - raise - - - # AI Image Analysis - async def readImage( - self, - prompt: str, - imageData: Union[str, bytes], - mimeType: str = None, - options: Optional[AiCallOptions] = None, - ) -> str: - """Call AI for image analysis using interface.call() with contentParts.""" - try: - # Check if imageData is valid - if not imageData: - error_msg = "No image data provided" - logger.error(f"Error in AI image analysis: {error_msg}") - return f"Error: {error_msg}" - - - # Always use IMAGE_ANALYSE operation type for image processing - if options is None: - options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE) - else: - # Override the operation type to ensure image analysis - options.operationType = OperationTypeEnum.IMAGE_ANALYSE - - # Create content parts with image data - from modules.datamodels.datamodelExtraction import ContentPart - import base64 - - # ContentPart.data must be a string - convert bytes to base64 if needed - if isinstance(imageData, bytes): - imageDataStr = base64.b64encode(imageData).decode('utf-8') - else: - # Already a base64 string - imageDataStr = imageData - - imagePart = ContentPart( - id="image_0", - parentId=None, - label="Image", - typeGroup="image", - mimeType=mimeType or "image/jpeg", - data=imageDataStr, # Must be a string (base64 encoded) - metadata={"imageAnalysis": True} - ) - - # Create request with content parts - from modules.datamodels.datamodelAi import AiCallRequest - request = AiCallRequest( - prompt=prompt, - context="", - options=options, - contentParts=[imagePart] - ) - - response = await self.aiObjects.call(request) - result = response.content - - # Check if result is valid - if not result or (isinstance(result, str) and not result.strip()): - error_msg = f"No response from AI image analysis (result: {repr(result)})" - logger.error(f"Error in AI image analysis: {error_msg}") - return f"Error: {error_msg}" - - return result - except Exception as e: - logger.error(f"Error in AI image analysis: {str(e)}") - return f"Error: {str(e)}" - - # AI Image Generation - async def generateImage( - self, - prompt: str, - size: str = "1024x1024", - quality: str = "standard", - style: str = "vivid", - options: Optional[AiCallOptions] = None, - ) -> Dict[str, Any]: - """Generate an image using AI using interface.generateImage().""" - try: - response = await self.aiObjects.generateImage(prompt, size, quality, style, options) - - # Emit stats for image generation - self.services.workflow.storeWorkflowStat( - self.services.currentWorkflow, - response, - f"ai.generate.image" - ) - - # Convert response to dict format for backward compatibility - if hasattr(response, 'content'): - return { - "success": True, - "content": response.content, - "modelName": response.modelName, - "priceUsd": response.priceUsd, - "processingTime": response.processingTime - } - else: - return response - except Exception as e: - logger.error(f"Error in AI image generation: {str(e)}") - return {"success": False, "error": str(e)} diff --git a/modules/services/serviceAi/subDocumentGeneration.py b/modules/services/serviceAi/subDocumentGeneration.py deleted file mode 100644 index 351f68cc..00000000 --- a/modules/services/serviceAi/subDocumentGeneration.py +++ /dev/null @@ -1,500 +0,0 @@ -import re -import json -import logging -import time -from datetime import datetime, UTC -from typing import Dict, Any, List, Optional -from modules.datamodels.datamodelChat import ChatDocument -from modules.datamodels.datamodelAi import AiCallOptions - -logger = logging.getLogger(__name__) - - -class SubDocumentGeneration: - """Document generation operations including single-file and multi-file generation.""" - - def __init__(self, services, aiObjects, documentProcessor): - """Initialize document generation service. - - Args: - services: Service center instance for accessing other services - aiObjects: Initialized AiObjects instance - documentProcessor: Document processing service instance - """ - self.services = services - self.aiObjects = aiObjects - self.documentProcessor = documentProcessor - - async def callAiWithDocumentGeneration( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions, - outputFormat: str, - title: Optional[str] - ) -> Dict[str, Any]: - """ - Unified document generation method that handles both single and multi-file cases. - Always uses multi-file approach internally. - - Args: - prompt: The main prompt for the AI call - documents: Optional list of documents to process - options: AI call configuration options - outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx) - title: Optional title for generated documents - - Returns: - Dict with generated documents and metadata in unified structure - """ - try: - # 1. Get unified extraction prompt - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - - extractionPrompt = await generationService.getAdaptiveExtractionPrompt( - outputFormat=outputFormat, - userPrompt=prompt, - title=title, - aiService=self - ) - - # 2. Process with unified pipeline (always multi-file approach) - aiResponse = await self._processDocumentsUnified( - documents, extractionPrompt, options - ) - - # 3. Return unified result structure - return await self._buildUnifiedResult(aiResponse, outputFormat, title) - - except Exception as e: - logger.error(f"Error in unified document generation: {str(e)}") - return self._buildErrorResult(str(e), outputFormat, title) - - async def _processDocumentsUnified( - self, - documents: Optional[List[ChatDocument]], - extractionPrompt: str, - options: AiCallOptions - ) -> Dict[str, Any]: - """ - Unified document processing that handles both single and multi-file cases. - Always processes as multi-file structure internally. - """ - - # Init progress logger - workflow = self.services.currentWorkflow - operationId = f"docGenUnified_{workflow.id}_{int(time.time())}" - - try: - # Start progress tracking - self.services.workflow.progressLogStart( - operationId, - "Generate", - "Unified Document Generation", - f"Processing {len(documents) if documents else 0} documents" - ) - - # Update progress - generating extraction prompt - self.services.workflow.progressLogUpdate(operationId, 0.1, "Generating prompt") - - # Write prompt to debug file - self.services.utils.writeDebugFile(extractionPrompt, "extraction_prompt", documents) - - # Process with unified JSON pipeline using continuation logic - aiResponse = await self.documentProcessor.processDocumentsWithContinuation( - documents, extractionPrompt, options - ) - - # Update progress - AI processing completed - self.services.workflow.progressLogUpdate(operationId, 0.6, "Processing done") - - - - # Write AI response to debug file - response_json = json.dumps(aiResponse, indent=2, ensure_ascii=False) if isinstance(aiResponse, dict) else str(aiResponse) - self.services.utils.writeDebugFile(response_json, "ai_response", documents) - - # Validate response structure - if not self._validateUnifiedResponseStructure(aiResponse): - raise Exception("AI response is not valid unified document structure") - - # Emit raw extracted data as a chat message attachment - try: - await self._postRawDataChatMessage(aiResponse, label="raw_extraction_unified") - except Exception: - logger.warning("Failed to emit raw extraction chat message (unified)") - - # Complete progress tracking - self.services.workflow.progressLogFinish(operationId, True) - - return aiResponse - - except Exception as e: - logger.error(f"Error in unified document processing: {str(e)}") - self.services.workflow.progressLogFinish(operationId, False) - raise - - def _validateUnifiedResponseStructure(self, response: Dict[str, Any]) -> bool: - """ - Unified validation that checks for document structure. - Handles both multi-file (documents array) and single-file (sections array) structures. - """ - try: - if not isinstance(response, dict): - logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}") - return False - - # Check for documents array (multi-file structure) - hasDocuments = "documents" in response - isDocumentsList = isinstance(response.get("documents"), list) - - # Check for sections array (single-file structure) - hasSections = "sections" in response - isSectionsList = isinstance(response.get("sections"), list) - - if hasDocuments and isDocumentsList: - # Multi-file structure - documents = response.get("documents", []) - if not documents: - logger.warning("Unified validation failed: documents array is empty") - return False - - # Validate each document individually - validDocuments = 0 - for i, doc in enumerate(documents): - if self._validateDocumentStructure(doc, i): - validDocuments += 1 - else: - logger.warning(f"Document {i} failed validation, but continuing with others") - - # Process succeeds if at least one document is valid - if validDocuments == 0: - logger.error("Unified validation failed: no valid documents found") - return False - - logger.info(f"Unified validation passed: {validDocuments}/{len(documents)} documents valid") - return True - - elif hasSections and isSectionsList: - # Single-file structure - convert to multi-file format - logger.info("Converting single-file structure to multi-file format") - sections = response.get("sections", []) - if not sections: - logger.warning("Unified validation failed: sections array is empty") - return False - - # Convert to documents array format - response["documents"] = [{ - "id": "document_1", - "title": response.get("metadata", {}).get("title", "Generated Document"), - "filename": "document_1", - "sections": sections - }] - - logger.info("Successfully converted single-file structure to multi-file format") - return True - - else: - # No valid structure found - fail with clear error details - logger.error("Unified validation failed: No valid structure found") - logger.error(f"Response type: {type(response)}") - logger.error(f"Available keys: {list(response.keys()) if isinstance(response, dict) else 'Not a dict'}") - logger.error(f"hasDocuments={hasDocuments}, isDocumentsList={isDocumentsList}") - logger.error(f"hasSections={hasSections}, isSectionsList={isSectionsList}") - logger.error(f"Full response: {response}") - return False - - except Exception as e: - logger.warning(f"Unified response validation failed with exception: {str(e)}") - return False - - def _validateDocumentStructure(self, document: Dict[str, Any], documentIndex: int) -> bool: - """ - Validate individual document structure. - Returns True if document is valid, False otherwise. - Does not fail the entire process if one document is invalid. - """ - try: - if not isinstance(document, dict): - logger.error(f"Document {documentIndex} validation failed: not a dict, got {type(document)}") - logger.error(f"Document {documentIndex} content: {document}") - return False - - # Check for required fields - hasTitle = "title" in document - hasSections = "sections" in document - isSectionsList = isinstance(document.get("sections"), list) - - logger.debug(f"Document {documentIndex} structure check:") - logger.debug(f" - hasTitle: {hasTitle}") - logger.debug(f" - hasSections: {hasSections}") - logger.debug(f" - isSectionsList: {isSectionsList}") - logger.debug(f" - available keys: {list(document.keys())}") - - if not (hasTitle and hasSections and isSectionsList): - logger.error(f"Document {documentIndex} validation failed:") - logger.error(f" - title present: {hasTitle}") - logger.error(f" - sections present: {hasSections}") - logger.error(f" - sections is list: {isSectionsList}") - logger.error(f" - document content: {document}") - return False - - sections = document.get("sections", []) - if not sections: - logger.error(f"Document {documentIndex} validation failed: sections array is empty") - logger.error(f" - document content: {document}") - return False - - logger.info(f"Document {documentIndex} validation passed") - return True - - except Exception as e: - logger.error(f"Document {documentIndex} validation failed with exception: {str(e)}") - logger.error(f" - document content: {document}") - return False - - async def _buildUnifiedResult( - self, - aiResponse: Dict[str, Any], - outputFormat: str, - title: str - ) -> Dict[str, Any]: - """ - Build unified result structure that always returns array-based format. - Content is always a multi-document structure. - """ - try: - # Process all documents uniformly - generatedDocuments = [] - documents = aiResponse.get("documents", []) - - for i, docData in enumerate(documents): - try: - processedDocument = await self._processDocument( - docData, outputFormat, title, i - ) - generatedDocuments.append(processedDocument) - except Exception as e: - logger.warning(f"Failed to process document {i}: {str(e)}, skipping") - continue - - if not generatedDocuments: - raise Exception("No documents could be processed successfully") - - # Build unified result - result = { - "success": True, - "content": aiResponse, # Always multi-document structure - "documents": generatedDocuments, # Always array - "is_multi_file": len(generatedDocuments) > 1, - "format": outputFormat, - "title": title, - "total_documents": len(generatedDocuments), - "processed_documents": len(generatedDocuments) - } - - return result - - except Exception as e: - logger.error(f"Error building unified result: {str(e)}") - return self._buildErrorResult(str(e), outputFormat, title) - - async def _processDocument( - self, - docData: Dict[str, Any], - outputFormat: str, - title: str, - documentIndex: int - ) -> Dict[str, Any]: - """ - Process individual document with content enhancement and rendering. - """ - try: - # Get generation service - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - - # Use AI generation to enhance the extracted JSON before rendering - enhancedContent = docData # Default to original - if docData.get("sections"): - try: - # Get generation prompt directly - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt - generationPrompt = await buildGenerationPrompt( - outputFormat=outputFormat, - userPrompt=title, - title=docData.get("title", title) - ) - - # Prepare the AI call - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum - requestOptions = AiCallOptions() - requestOptions.operationType = OperationTypeEnum.DATA_GENERATE - - # Create context with the extracted JSON content - context = f"Extracted JSON content:\n{json.dumps(docData, indent=2)}" - - request = AiCallRequest( - prompt=generationPrompt, - context=context, - options=requestOptions - ) - - # Write document generation prompt to debug file - self.services.utils.writeDebugFile(generationPrompt, "document_generation_enhancement_prompt") - - # Call AI to enhance the content - response = await self.aiObjects.call(request) - - # Write document generation response to debug file - self.services.utils.writeDebugFile(response.content or '', "document_generation_enhancement_response") - - if response and response.content: - # Parse the AI response as JSON - try: - result = response.content.strip() - - # Extract JSON from markdown if present - jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) - if jsonMatch: - result = jsonMatch.group(1).strip() - elif result.startswith('```json'): - result = re.sub(r'^```json\s*', '', result) - result = re.sub(r'\s*```$', '', result) - elif result.startswith('```'): - result = re.sub(r'^```\s*', '', result) - result = re.sub(r'\s*```$', '', result) - - # Try to parse JSON - enhancedContent = json.loads(result) - logger.info(f"AI enhanced JSON content successfully for document {documentIndex}") - - except json.JSONDecodeError as e: - logger.warning(f"AI generation returned invalid JSON for document {documentIndex}: {str(e)}, using original content") - enhancedContent = docData - else: - logger.warning(f"AI generation returned empty response for document {documentIndex}, using original content") - enhancedContent = docData - - except Exception as e: - logger.warning(f"AI generation failed for document {documentIndex}: {str(e)}, using original content") - enhancedContent = docData - - # Render the enhanced JSON content - renderedContent, mimeType = await generationService.renderReport( - extractedContent=enhancedContent, - outputFormat=outputFormat, - title=docData.get("title", title), - userPrompt=title, - aiService=self - ) - - # Generate proper filename - baseFilename = docData.get("filename", f"document_{documentIndex + 1}") - if '.' in baseFilename: - baseFilename = baseFilename.rsplit('.', 1)[0] - - # Add proper extension based on output format - if outputFormat.lower() == "docx": - filename = f"{baseFilename}.docx" - elif outputFormat.lower() == "pdf": - filename = f"{baseFilename}.pdf" - elif outputFormat.lower() == "html": - filename = f"{baseFilename}.html" - else: - filename = f"{baseFilename}.{outputFormat}" - - return { - "documentName": filename, - "documentData": renderedContent, - "mimeType": mimeType, - "title": docData.get("title", title), - "documentIndex": documentIndex - } - - except Exception as e: - logger.error(f"Error processing document {documentIndex}: {str(e)}") - raise - - def _buildErrorResult(self, errorMessage: str, outputFormat: str, title: str) -> Dict[str, Any]: - """ - Build error result with unified structure. - """ - return { - "success": False, - "error": errorMessage, - "content": {}, - "documents": [], - "is_multi_file": False, - "format": outputFormat, - "title": title, - "split_strategy": "error", - "total_documents": 0, - "processed_documents": 0 - } - - async def _callAiJson( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions - ) -> Dict[str, Any]: - """ - Handle AI calls with document processing for JSON output. - Returns structured JSON document instead of text. - """ - # Process documents with JSON merging - return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options) - - - async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None: - """ - Create a ChatMessage with the extracted raw JSON attached as a file so the user - has access to the data even if downstream processing fails. - """ - try: - services = self.services - workflow = services.currentWorkflow - - # Serialize payload - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - content_text = json.dumps(payload, ensure_ascii=False, indent=2) - content_bytes = content_text.encode('utf-8') - - # Store as file via component storage - file_name = f"{label}_{ts}.json" - file_item = services.interfaceDbComponent.createFile( - name=file_name, - mimeType="application/json", - content=content_bytes - ) - services.interfaceDbComponent.createFileData(file_item.id, content_bytes) - - # Lookup file info for ChatDocument - file_info = services.workflow.getFileInfo(file_item.id) - doc = ChatDocument( - messageId="", # set after message creation - fileId=file_item.id, - fileName=file_info.get("fileName", file_name) if file_info else file_name, - fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes), - mimeType=file_info.get("mimeType", "application/json") if file_info else "application/json" - ) - - # Create message referencing the file - include document in initial call - messageData = { - "workflowId": workflow.id, - "role": "assistant", - "message": "Raw extraction data saved", - "status": "data", - "sequenceNr": len(getattr(workflow, 'messages', []) or []) + 1, - "publishedAt": services.utils.timestampGetUtc(), - "documentsLabel": label, - "documents": [] - } - - # Store message with document included from the start - services.workflow.storeMessageWithDocuments(services.workflow.workflow, messageData, [doc]) - except Exception: - # Non-fatal; ignore if storage or chat creation fails - return \ No newline at end of file diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py index a9d01a8a..2903c066 100644 --- a/modules/services/serviceAi/subDocumentProcessing.py +++ b/modules/services/serviceAi/subDocumentProcessing.py @@ -5,7 +5,16 @@ import time from typing import Dict, Any, List, Optional from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum -from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted, PartResult, ExtractionOptions, MergeStrategy +from modules.datamodels.datamodelExtraction import ContentExtracted, PartResult, ExtractionOptions, MergeStrategy +# Resolve forward refs for ExtractionOptions (OperationTypeEnum) at runtime without using unsupported args +try: + # Import here to avoid circular import at module load time + from modules.datamodels.datamodelAi import OperationTypeEnum + # Provide parent namespace so Pydantic can resolve forward refs + ExtractionOptions.__pydantic_parent_namespace__ = {"OperationTypeEnum": OperationTypeEnum} + ExtractionOptions.model_rebuild() +except Exception as _e: + logging.getLogger(__name__).warning(f"ExtractionOptions forward-ref rebuild skipped: {_e}") from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService logger = logging.getLogger(__name__) @@ -121,373 +130,6 @@ class SubDocumentProcessing: self.services.workflow.progressLogFinish(operationId, False) raise - async def processDocumentsPerChunkJson( - self, - documents: List[ChatDocument], - prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with model-aware chunking and merge results in JSON mode. - Returns structured JSON document instead of text. - """ - if not documents: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - # Build extraction options using Pydantic model - mergeStrategy = MergeStrategy( - useIntelligentMerging=True, - prompt=prompt, - groupBy="typeGroup", - orderBy="id", - mergeType="concatenate" - ) - - extractionOptions = ExtractionOptions( - prompt=prompt, - operationType=options.operationType if options else OperationTypeEnum.DATA_EXTRACT, - processDocumentsIndividually=True, - mergeStrategy=mergeStrategy - ) - - logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.prompt)} chars, operationType={extractionOptions.operationType}") - - try: - # Extract content WITHOUT chunking - extractionResult = self.extractionService.extractContent(documents, extractionOptions) - - if not isinstance(extractionResult, list): - return {"metadata": {"title": "Error Document"}, "sections": []} - - # Process parts with model-aware chunking - partResults = await self._processPartsWithMapping(extractionResult, prompt, options) - - # Convert to JSON format (simplified for now) - mergedJsonDocument = self._convertPartResultsToJson(partResults, options) - - # Normalize merged JSON into a single canonical table (only if table content exists) - try: - from modules.services.serviceNormalization.mainServiceNormalization import NormalizationService - normalizer = NormalizationService(self.services) - inventory = normalizer.discoverStructures(mergedJsonDocument) - - # Check if any table content was discovered - tableHeaders = inventory.get("tableHeaders", []) - if not tableHeaders: - logger.info("No table content found in merged JSON, skipping normalization and returning original structure") - else: - # Use workflow id as cache key - cacheKey = self.services.currentWorkflow.id - # Provide the extraction/merge prompt context when available to help mapping - mergePrompt = prompt - mapping = await normalizer.requestHeaderMapping(inventory, cacheKey, None, mergePrompt) - canonical = normalizer.applyMapping(mergedJsonDocument, mapping) - report = normalizer.validateCanonical(canonical) - if report.get('success'): - mergedJsonDocument = canonical - else: - raise ValueError('Normalization produced zero rows') - except Exception as e: - # Log normalization failure but don't re-raise - continue with original merged JSON - logger.warning(f"Normalization failed (expected): {str(e)}") - logger.debug(f"Normalization error type: {type(e).__name__}") - # Continue with original merged JSON instead of re-raising - - # Save merged JSON extraction content to debug - jsonStr = json.dumps(mergedJsonDocument, ensure_ascii=False, indent=2) - self.services.utils.writeDebugFile(jsonStr, "extraction_merged_json") - - return mergedJsonDocument - - except Exception as e: - logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}") - logger.error(f"Exception type: {type(e).__name__}") - logger.error(f"Exception args: {e.args}") - import traceback - logger.error(f"Traceback: {traceback.format_exc()}") - return {"metadata": {"title": "Error Document"}, "sections": []} - - async def processDocumentsPerChunkJsonWithPrompt( - self, - documents: List[ChatDocument], - custom_prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with per-chunk AI calls and merge results in JSON mode. - Uses a custom prompt instead of the default extraction prompt. - Enhanced with partial results continuation logic. - """ - if not documents: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - # Build extraction options using Pydantic model (model-aware chunking in AI call phase) - mergeStrategy = MergeStrategy( - useIntelligentMerging=True, - prompt=custom_prompt, - groupBy="typeGroup", - orderBy="id", - mergeType="concatenate" - ) - - extractionOptions = ExtractionOptions( - prompt=custom_prompt, # Use the custom prompt instead of default - operationType=options.operationType if options else OperationTypeEnum.DATA_EXTRACT, - processDocumentsIndividually=True, # Process each document separately - imageMaxPixels=1024 * 1024, - imageQuality=85, - mergeStrategy=mergeStrategy - ) - - logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.prompt)} chars, operationType={extractionOptions.operationType}") - - try: - # Extract content with chunking - extractionResult = self.extractionService.extractContent(documents, extractionOptions) - - if not isinstance(extractionResult, list): - return {"metadata": {"title": "Error Document"}, "sections": []} - - # Process chunks with proper mapping - logger.info(f"Processing {len(extractionResult)} chunks with custom prompt") - logger.debug(f"Custom prompt preview: {custom_prompt[:200]}...") - - # Debug: Show what content is being processed (before filtering) - for i, ec in enumerate(extractionResult): - if hasattr(ec, 'parts'): - for j, part in enumerate(ec.parts): - if not (hasattr(part, 'data') and part.data): - # Check if this is an empty container chunk (which is expected) - part_type = getattr(part, 'typeGroup', None) - part_mime = getattr(part, 'mimeType', '') - - is_empty_container = ( - part_type == "container" and - part_mime and - 'document' in part_mime.lower() - ) - - if not is_empty_container: - logger.warning(f"Part {j} has no data - typeGroup='{part_type}', mimeType='{part_mime}'") - - chunkResults = await self._processChunksWithMapping(extractionResult, custom_prompt, options, generate_json=True) - - # Debug: Show what chunks were actually processed (after filtering) - logger.info(f"After filtering: {len(chunkResults)} chunks will be processed") - - # Merge with JSON mode - mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options) - - # Debug: Show what the AI actually returned - logger.info(f"AI returned document with keys: {list(mergedJsonDocument.keys())}") - if 'documents' in mergedJsonDocument: - logger.info(f"Number of documents: {len(mergedJsonDocument['documents'])}") - elif 'sections' in mergedJsonDocument: - logger.info(f"Number of sections: {len(mergedJsonDocument['sections'])}") - - return mergedJsonDocument - - except Exception as e: - logger.error(f"Error in per-chunk JSON processing: {str(e)}") - return {"metadata": {"title": "Error Document"}, "sections": []} - - async def processDocumentsWithContinuation( - self, - documents: List[ChatDocument], - custom_prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with partial results continuation logic. - Handles AI responses that indicate partial completion and loops until complete. - """ - if not documents: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - logger.info("Starting document processing with continuation logic") - - # Build enhanced prompt with continuation instructions - enhanced_prompt = self._buildContinuationPrompt(custom_prompt) - - # Process with continuation logic - return await self._processWithContinuationLoop(documents, enhanced_prompt, options) - - def _buildContinuationPrompt(self, base_prompt: str) -> str: - """ - Build a prompt that includes partial results continuation instructions. - - NOTE: This uses a different continuation pattern than SubCoreAi: - - This uses "continue": true/false + "continuation_context" for document sections - - Kept separate because it's tightly coupled to document processing needs - """ - continuation_instructions = """ - -IMPORTANT CHUNKING LOGIC: -- If the response is too large to generate completely in one response, set "continue": true -- When "continue": true, include a "continuation_context" field with: - - "last_section_id": "id of the last completed section" - - "last_element_index": "index of the last completed element in that section" - - "remaining_requirements": "brief description of what still needs to be generated" -- The AI will be called again with this context to continue generation -- Only set "continue": false when the response is completely generated - -OUTPUT FORMAT: Return only valid JSON in this exact structure: -{ - "metadata": { - "title": "Document Title" - }, - "sections": [ - { - "id": "section_1", - "content_type": "paragraph", - "elements": [ - { - "text": "This is the actual content that should be generated." - } - ], - "order": 1 - } - ], - "continue": false, - "continuation_context": { - "last_section_id": "section_1", - "last_element_index": 0, - "remaining_requirements": "description of what still needs to be generated" - } -} - -The AI should generate content using the canonical format with "sections" and "elements". -""" - - return f"{base_prompt}{continuation_instructions}" - - async def _processWithContinuationLoop( - self, - documents: List[ChatDocument], - enhanced_prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with continuation loop until complete. - """ - max_iterations = 10 # Prevent infinite loops - iteration = 0 - accumulated_sections = [] - continuation_context = None - - while iteration < max_iterations: - iteration += 1 - logger.info(f"Continuation iteration {iteration}/{max_iterations}") - - # Build prompt for this iteration - if continuation_context: - iteration_prompt = self._buildContinuationIterationPrompt( - enhanced_prompt, continuation_context, accumulated_sections - ) - else: - iteration_prompt = enhanced_prompt - - # Process documents for this iteration - try: - # Use the existing processing method - result = await self.processDocumentsPerChunkJsonWithPrompt( - documents, iteration_prompt, options - ) - - # Check if this is a valid JSON response - if not isinstance(result, dict): - logger.warning(f"Iteration {iteration}: Invalid result type, stopping") - break - - # Extract sections from result - sections = result.get("sections", []) - if not sections: - logger.warning(f"Iteration {iteration}: No sections found, stopping") - break - - # Add sections to accumulated results - for section in sections: - # Update section order to maintain sequence - section["order"] = len(accumulated_sections) + 1 - accumulated_sections.append(section) - - # Check if continuation is needed - continue_flag = result.get("continue", False) - continuation_context = result.get("continuation_context") - - logger.info(f"Iteration {iteration}: Added {len(sections)} sections, continue={continue_flag}") - - if not continue_flag: - logger.info(f"Continuation complete after {iteration} iterations") - break - - if not continuation_context: - logger.warning(f"Iteration {iteration}: continue=true but no continuation_context, stopping") - break - - except Exception as e: - logger.error(f"Iteration {iteration} failed: {str(e)}") - break - - if iteration >= max_iterations: - logger.warning(f"Continuation stopped after maximum iterations ({max_iterations})") - - # Build final result - final_result = { - "metadata": { - "title": "Generated Document", - "total_sections": len(accumulated_sections), - "iterations": iteration, - "continuation_used": iteration > 1 - }, - "sections": accumulated_sections, - "continue": False - } - - logger.info(f"Final result: {len(accumulated_sections)} sections from {iteration} iterations") - return final_result - - def _buildContinuationIterationPrompt( - self, - base_prompt: str, - continuation_context: Dict[str, Any], - accumulated_sections: List[Dict[str, Any]] - ) -> str: - """ - Build a prompt for continuation iteration with context. - """ - last_section_id = continuation_context.get("last_section_id", "") - last_element_index = continuation_context.get("last_element_index", 0) - remaining_requirements = continuation_context.get("remaining_requirements", "") - - # Build context of what's already been generated - context_summary = "PREVIOUSLY GENERATED CONTENT:\n" - for i, section in enumerate(accumulated_sections[-3:]): # Show last 3 sections for context - context_summary += f"Section {i+1}: {section.get('id', 'unknown')}\n" - if 'elements' in section and section['elements']: - first_element = section['elements'][0] - if 'text' in first_element: - preview = first_element['text'][:100] + "..." if len(first_element['text']) > 100 else first_element['text'] - context_summary += f" Preview: {preview}\n" - - continuation_prompt = f""" -{base_prompt} - -{context_summary} - -CONTINUATION INSTRUCTIONS: -- Continue from where you left off -- Last completed section: {last_section_id} -- Last completed element index: {last_element_index} -- Remaining requirements: {remaining_requirements} -- Generate the next part of the content -- Maintain consistency with previously generated content -- Use the same JSON format as before -- Set "continue": true if more content is needed, false if complete -""" - - return continuation_prompt - async def callAiText( self, prompt: str, @@ -513,8 +155,8 @@ CONTINUATION INSTRUCTIONS: import asyncio # Collect all parts that need processing - parts_to_process = [] - part_index = 0 + partsToProcess = [] + partIndex = 0 for ec in extractionResult: for part in ec.parts: @@ -524,24 +166,24 @@ CONTINUATION INSTRUCTIONS: logger.debug(f"Skipping empty container part: mimeType={part.mimeType}") continue - parts_to_process.append({ + partsToProcess.append({ 'part': part, - 'part_index': part_index, + 'part_index': partIndex, 'document_id': ec.id }) - part_index += 1 + partIndex += 1 - logger.info(f"Processing {len(parts_to_process)} parts with model-aware chunking") + logger.info(f"Processing {len(partsToProcess)} parts with model-aware chunking") - total_parts = len(parts_to_process) + totalParts = len(partsToProcess) # Process parts in parallel - processed_count = [0] # Use list to allow modification in nested function + processedCount = [0] # Use list to allow modification in nested function - async def process_single_part(part_info: Dict) -> PartResult: - part = part_info['part'] - part_index = part_info['part_index'] - document_id = part_info['document_id'] + async def processSinglePart(partInfo: Dict) -> PartResult: + part = partInfo['part'] + part_index = partInfo['part_index'] + documentId = partInfo['document_id'] start_time = time.time() @@ -556,10 +198,10 @@ CONTINUATION INSTRUCTIONS: ) # Update progress before AI call - if operationId and total_parts > 0: - processed_count[0] += 1 - progress = 0.3 + (processed_count[0] / total_parts * 0.6) # Progress from 0.3 to 0.9 - self.services.workflow.progressLogUpdate(operationId, progress, f"Processing part {processed_count[0]}/{total_parts}") + if operationId and totalParts > 0: + processedCount[0] += 1 + progress = 0.3 + (processedCount[0] / totalParts * 0.6) # Progress from 0.3 to 0.9 + self.services.workflow.progressLogUpdate(operationId, progress, f"Processing part {processedCount[0]}/{totalParts}") # Call AI with model-aware chunking response = await self.aiObjects.call(request) @@ -570,7 +212,7 @@ CONTINUATION INSTRUCTIONS: originalPart=part, aiResult=response.content, partIndex=part_index, - documentId=document_id, + documentId=documentId, processingTime=processing_time, metadata={ "success": True, @@ -590,7 +232,7 @@ CONTINUATION INSTRUCTIONS: originalPart=part, aiResult=f"[Error processing part: {str(e)}]", partIndex=part_index, - documentId=document_id, + documentId=documentId, processingTime=processing_time, metadata={ "success": False, @@ -601,25 +243,25 @@ CONTINUATION INSTRUCTIONS: ) # Process parts with concurrency control - max_concurrent = 5 + maxConcurrent = 5 if options and hasattr(options, 'maxConcurrentParts'): - max_concurrent = options.maxConcurrentParts + maxConcurrent = options.maxConcurrentParts - semaphore = asyncio.Semaphore(max_concurrent) + semaphore = asyncio.Semaphore(maxConcurrent) - async def process_with_semaphore(part_info): + async def processWithSemaphore(partInfo): async with semaphore: - return await process_single_part(part_info) + return await processSinglePart(partInfo) - tasks = [process_with_semaphore(part_info) for part_info in parts_to_process] - part_results = await asyncio.gather(*tasks, return_exceptions=True) + tasks = [processWithSemaphore(part_info) for part_info in partsToProcess] + partResults = await asyncio.gather(*tasks, return_exceptions=True) # Handle exceptions - processed_results = [] - for i, result in enumerate(part_results): + processedResults = [] + for i, result in enumerate(partResults): if isinstance(result, Exception): - part_info = parts_to_process[i] - processed_results.append(PartResult( + part_info = partsToProcess[i] + processedResults.append(PartResult( originalPart=part_info['part'], aiResult=f"[Error in parallel processing: {str(result)}]", partIndex=part_info['part_index'], @@ -628,412 +270,10 @@ CONTINUATION INSTRUCTIONS: metadata={"success": False, "error": str(result)} )) elif result is not None: - processed_results.append(result) + processedResults.append(result) - logger.info(f"Completed processing {len(processed_results)} parts") - return processed_results - - async def _processChunksWithMapping( - self, - extractionResult: List[ContentExtracted], - prompt: str, - options: Optional[AiCallOptions] = None, - generate_json: bool = False - ) -> List[ChunkResult]: - """Process chunks with proper mapping to preserve relationships.""" - from modules.datamodels.datamodelExtraction import ChunkResult - import asyncio - - # Collect all chunks that need processing with proper indexing - chunks_to_process = [] - chunk_index = 0 - - for ec in extractionResult: - # Get document MIME type from metadata - document_mime_type = None - for part in ec.parts: - if part.metadata and 'documentMimeType' in part.metadata: - document_mime_type = part.metadata['documentMimeType'] - break - - for part in ec.parts: - if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"): - # Skip empty container chunks (they're just metadata containers) - if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0): - logger.debug(f"Skipping empty container chunk: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}") - continue - - chunks_to_process.append({ - 'part': part, - 'chunk_index': chunk_index, - 'document_id': ec.id, - 'document_mime_type': document_mime_type - }) - chunk_index += 1 - - logger.info(f"Processing {len(chunks_to_process)} chunks with proper mapping") - - # Process chunks in parallel with proper mapping - async def process_single_chunk(chunk_info: Dict) -> ChunkResult: - part = chunk_info['part'] - chunk_index = chunk_info['chunk_index'] - document_id = chunk_info['document_id'] - document_mime_type = chunk_info.get('document_mime_type', part.mimeType) - - start_time = time.time() - - try: - # FIXED: Check MIME type first, then fallback to typeGroup - is_image = ( - (document_mime_type and document_mime_type.startswith('image/')) or - (part.mimeType and part.mimeType.startswith('image/')) or - (part.typeGroup == "image") - ) - - # Debug logging - self.services.utils.debugLogToFile(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}") - - if is_image: - # Use the same extraction prompt for image analysis (contains table JSON format) - self.services.utils.debugLogToFile(f"Processing image chunk {chunk_index}: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - - # Check if image data is available - if not part.data: - error_msg = f"No image data available for chunk {chunk_index}" - logger.warning(error_msg) - ai_result = f"Error: {error_msg}" - else: - try: - # Import here to avoid circular imports - from modules.services.serviceAi.subCoreAi import SubCoreAi - core_ai = SubCoreAi(self.services, self.aiObjects) - - ai_result = await core_ai.readImage( - prompt=prompt, - imageData=part.data, - mimeType=part.mimeType, - options=options - ) - - self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE") - # Save image extraction response to debug file - self.services.utils.writeDebugFile(ai_result or 'No response', f"extraction_image_chunk_{chunk_index}_response") - - # Check if result is empty or None - if not ai_result or not ai_result.strip(): - logger.warning(f"Image chunk {chunk_index} returned empty response from AI") - ai_result = "No content detected in image" - - except Exception as e: - logger.error(f"Error processing image chunk {chunk_index}: {str(e)}") - ai_result = f"Error analyzing image: {str(e)}" - - # If generating JSON, clean image analysis result - if generate_json: - try: - - # Clean the response - remove markdown code blocks if present - cleaned_result = ai_result.strip() - - # Remove various markdown patterns - if cleaned_result.startswith('```json'): - cleaned_result = re.sub(r'^```json\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - elif cleaned_result.startswith('```'): - cleaned_result = re.sub(r'^```\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - - # Remove any leading/trailing text that's not JSON - # Look for the first { and last } to extract JSON - first_brace = cleaned_result.find('{') - last_brace = cleaned_result.rfind('}') - - if first_brace != -1 and last_brace != -1 and last_brace > first_brace: - cleaned_result = cleaned_result[first_brace:last_brace + 1] - - # Additional cleaning for common AI response issues - cleaned_result = cleaned_result.strip() - - # Validate JSON - json.loads(cleaned_result) - ai_result = cleaned_result # Use cleaned version - self.services.utils.debugLogToFile(f"Image chunk {chunk_index} JSON validation successful", "AI_SERVICE") - - except json.JSONDecodeError as e: - logger.warning(f"Image chunk {chunk_index} returned invalid JSON: {str(e)}") - logger.warning(f"Raw response was: '{ai_result[:500]}...'") - - # Create fallback JSON with the actual response content (not the error message) - # Use the original AI response content, not the error message - fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected" - - self.services.utils.debugLogToFile(f"IMAGE FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE") - - ai_result = json.dumps({ - "metadata": {"title": f"Image Analysis - Chunk {chunk_index}"}, - "sections": [{ - "id": f"image_section_{chunk_index}", - "content_type": "paragraph", - "elements": [{"text": fallback_content}] - }] - }) - self.services.utils.debugLogToFile(f"Created fallback JSON for image chunk {chunk_index} with actual content", "AI_SERVICE") - elif part.typeGroup in ("container", "binary"): - # Handle ALL container and binary content generically - let AI process any document type - self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: typeGroup={part.typeGroup}, mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - - # Skip empty container chunks (they're just metadata containers) - if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0): - self.services.utils.debugLogToFile(f"DEBUG: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}") - # Skip processing this chunk - pass - elif part.mimeType and part.data and len(part.data.strip()) > 0: - # Process any document container as text content - request_options = options if options is not None else AiCallOptions() - request_options.operationType = OperationTypeEnum.DATA_EXTRACT - self.services.utils.debugLogToFile(f"EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}") - - # Log extraction prompt and context - self.services.utils.debugLogToFile(f"EXTRACTION PROMPT: {prompt}", "AI_SERVICE") - self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE") - - # Strengthen prompt to forbid fabrication for text/container extraction - augmented_prompt = ( - f"{prompt}\n\n" - "CRITICAL RULES (NO FABRICATION):\n" - "- Use ONLY content present in the provided CONTEXT.\n" - "- Do NOT create, infer, or guess values not explicitly in the context.\n" - "- If a value is missing, leave the cell empty or omit the row.\n" - ) - request = AiCallRequest( - prompt=augmented_prompt, - context=part.data, - options=request_options - ) - response = await self.aiObjects.call(request) - ai_result = response.content - - # Log extraction response - self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") - - # Save extraction prompt and response to debug - self.services.utils.writeDebugFile(augmented_prompt, f"extraction_chunk_{chunk_index}_prompt") - self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response") - - # If generating JSON, validate the response - if generate_json: - try: - - # Clean the response - remove markdown code blocks if present - cleaned_result = ai_result.strip() - - # Remove various markdown patterns - if cleaned_result.startswith('```json'): - cleaned_result = re.sub(r'^```json\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - elif cleaned_result.startswith('```'): - cleaned_result = re.sub(r'^```\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - - # Remove any leading/trailing text that's not JSON - # Look for the first { and last } to extract JSON - first_brace = cleaned_result.find('{') - last_brace = cleaned_result.rfind('}') - - if first_brace != -1 and last_brace != -1 and last_brace > first_brace: - cleaned_result = cleaned_result[first_brace:last_brace + 1] - - # Additional cleaning for common AI response issues - cleaned_result = cleaned_result.strip() - - # Validate JSON - json.loads(cleaned_result) - ai_result = cleaned_result # Use cleaned version - - except json.JSONDecodeError as e: - logger.warning(f"Container chunk {chunk_index} ({part.mimeType}) returned invalid JSON: {str(e)}") - logger.warning(f"Raw response was: '{ai_result[:500]}...'") - - # Create fallback JSON with the actual response content (not the error message) - # Use the original AI response content, not the error message - fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected" - - self.services.utils.debugLogToFile(f"FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE") - - ai_result = json.dumps({ - "metadata": {"title": f"Document Analysis - Chunk {chunk_index}"}, - "sections": [{ - "id": f"analysis_section_{chunk_index}", - "content_type": "paragraph", - "elements": [{"text": fallback_content}] - }] - }) - self.services.utils.debugLogToFile(f"Created fallback JSON for container chunk {chunk_index} with actual content", "AI_SERVICE") - else: - # Skip empty or invalid container/binary content - don't create a result - self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - # Return None to indicate this chunk should be completely skipped - return None - else: - # Ensure options is not None and set correct operation type for text - request_options = options if options is not None else AiCallOptions() - # FIXED: Set operation type to general for text processing - request_options.operationType = OperationTypeEnum.DATA_EXTRACT - self.services.utils.debugLogToFile(f"EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}") - - # Log extraction context length - self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE") - - # Debug: Log the actual prompt being sent to AI - logger.debug(f"AI PROMPT PREVIEW: {prompt[:300]}...") - logger.debug(f"AI CONTEXT PREVIEW: {part.data[:200] if part.data else 'None'}...") - - # Strengthen prompt to forbid fabrication for text extraction - augmented_prompt_text = ( - f"{prompt}\n\n" - "CRITICAL RULES (NO FABRICATION):\n" - "- Use ONLY content present in the provided CONTEXT.\n" - "- Do NOT create, infer, or guess values not explicitly in the context.\n" - "- If a value is missing, leave the cell empty or omit the row.\n" - ) - request = AiCallRequest( - prompt=augmented_prompt_text, - context=part.data, - options=request_options - ) - response = await self.aiObjects.call(request) - - # Debug: Log what AI actually returned - logger.debug(f"AI RESPONSE PREVIEW: {response.content[:300] if response.content else 'None'}...") - ai_result = response.content - - # Log extraction response length - self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") - - # Save extraction prompt and response to debug - self.services.utils.writeDebugFile(augmented_prompt_text, f"extraction_chunk_{chunk_index}_prompt") - self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response") - - # If generating JSON, validate the response - if generate_json: - try: - - # Clean the response - remove markdown code blocks and extra formatting - cleaned_result = ai_result.strip() - - # Remove any markdown code block markers (```json, ```, etc.) - cleaned_result = re.sub(r'^```(?:json)?\s*', '', cleaned_result, flags=re.MULTILINE) - cleaned_result = re.sub(r'\s*```\s*$', '', cleaned_result, flags=re.MULTILINE) - - # Remove any remaining ``` markers anywhere in the text - cleaned_result = re.sub(r'```', '', cleaned_result) - - # Try to extract JSON from the response if it's embedded in other text - json_match = re.search(r'\{.*\}', cleaned_result, re.DOTALL) - if json_match: - cleaned_result = json_match.group(0) - - # Validate JSON - json.loads(cleaned_result) - ai_result = cleaned_result # Use cleaned version - - except json.JSONDecodeError as e: - logger.warning(f"Chunk {chunk_index} returned invalid JSON: {str(e)}") - # Create fallback JSON - ai_result = json.dumps({ - "metadata": {"title": "Error Section"}, - "sections": [{ - "id": f"error_section_{chunk_index}", - "content_type": "paragraph", - "elements": [{"text": f"Error parsing JSON: {str(e)}"}] - }] - }) - - processing_time = time.time() - start_time - - logger.info(f"Chunk {chunk_index} processed: {len(ai_result)} chars in {processing_time:.2f}s") - - return ChunkResult( - originalChunk=part, - aiResult=ai_result, - chunkIndex=chunk_index, - documentId=document_id, - processingTime=processing_time, - metadata={ - "success": True, - "chunkSize": len(part.data) if part.data else 0, - "resultSize": len(ai_result), - "typeGroup": part.typeGroup - } - ) - - except Exception as e: - processing_time = time.time() - start_time - logger.warning(f"Error processing chunk {chunk_index}: {str(e)}") - - return ChunkResult( - originalChunk=part, - aiResult=f"[Error processing chunk: {str(e)}]", - chunkIndex=chunk_index, - documentId=document_id, - processingTime=processing_time, - metadata={ - "success": False, - "error": str(e), - "chunkSize": len(part.data) if part.data else 0, - "typeGroup": part.typeGroup - } - ) - - # Process chunks with concurrency control - max_concurrent = 5 # Default concurrency - if options and hasattr(options, 'maxConcurrentChunks'): - max_concurrent = options.maxConcurrentChunks - elif options and hasattr(options, 'maxParallelChunks'): - max_concurrent = options.maxParallelChunks - - logger.info(f"Processing {len(chunks_to_process)} chunks with max concurrency: {max_concurrent}") - self.services.utils.debugLogToFile(f"DEBUG: Chunks to process: {len(chunks_to_process)}", "AI_SERVICE") - for i, chunk_info in enumerate(chunks_to_process): - self.services.utils.debugLogToFile(f"DEBUG: Chunk {i}: typeGroup={chunk_info['part'].typeGroup}, mimeType={chunk_info['part'].mimeType}, data_length={len(chunk_info['part'].data) if chunk_info['part'].data else 0}", "AI_SERVICE") - - # Create semaphore for concurrency control - semaphore = asyncio.Semaphore(max_concurrent) - - async def process_with_semaphore(chunk_info): - async with semaphore: - return await process_single_chunk(chunk_info) - - # Process all chunks in parallel with concurrency control - tasks = [process_with_semaphore(chunk_info) for chunk_info in chunks_to_process] - self.services.utils.debugLogToFile(f"DEBUG: Created {len(tasks)} tasks for parallel processing", "AI_SERVICE") - chunk_results = await asyncio.gather(*tasks, return_exceptions=True) - self.services.utils.debugLogToFile(f"DEBUG: Got {len(chunk_results)} results from parallel processing", "AI_SERVICE") - - # Handle any exceptions in the gather itself - processed_results = [] - for i, result in enumerate(chunk_results): - if isinstance(result, Exception): - # Create error ChunkResult - chunk_info = chunks_to_process[i] - processed_results.append(ChunkResult( - originalChunk=chunk_info['part'], - aiResult=f"[Error in parallel processing: {str(result)}]", - chunkIndex=chunk_info['chunk_index'], - documentId=chunk_info['document_id'], - processingTime=0.0, - metadata={"success": False, "error": str(result)} - )) - elif result is not None: - # Only add non-None results (skip empty containers) - processed_results.append(result) - - logger.info(f"Completed processing {len(processed_results)} chunks") - return processed_results + logger.info(f"Completed processing {len(processedResults)} parts") + return processedResults def _mergePartResults( self, @@ -1221,253 +461,3 @@ CONTINUATION INSTRUCTIONS: logger.info(f"Converted {len(partResults)} parts to JSON format using existing sophisticated merging system") return merged_document - - def _mergeChunkResults( - self, - chunkResults: List[ChunkResult], - options: Optional[AiCallOptions] = None - ) -> str: - """Merge chunk results using existing sophisticated merging system.""" - if not chunkResults: - return "" - - # Convert ChunkResults back to ContentParts for existing merger system - from modules.datamodels.datamodelExtraction import ContentPart - content_parts = [] - for chunk_result in chunkResults: - # Create ContentPart from ChunkResult with proper typeGroup - content_part = ContentPart( - id=chunk_result.originalChunk.id, - parentId=chunk_result.originalChunk.parentId, - label=chunk_result.originalChunk.label, - typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup - mimeType=chunk_result.originalChunk.mimeType, - data=chunk_result.aiResult, # Use AI result as data - metadata={ - **chunk_result.originalChunk.metadata, - "aiResult": True, - "chunk": True, - "chunkIndex": chunk_result.chunkIndex, - "documentId": chunk_result.documentId, - "processingTime": chunk_result.processingTime, - "success": chunk_result.metadata.get("success", False) - } - ) - content_parts.append(content_part) - - # Use existing merging strategy from options - merge_strategy = MergeStrategy( - useIntelligentMerging=True, - groupBy="documentId", # Group by document - orderBy="chunkIndex", # Order by chunk index - mergeType="concatenate" - ) - - - # Apply existing merging logic using the sophisticated merging system - from modules.services.serviceExtraction.subPipeline import _applyMerging - merged_parts = _applyMerging(content_parts, merge_strategy) - - # Convert merged parts back to final string - final_content = "\n\n".join([part.data for part in merged_parts]) - - logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system") - return final_content.strip() - - def _mergeChunkResultsClean( - self, - chunkResults: List[ChunkResult], - options: Optional[AiCallOptions] = None - ) -> str: - """Merge chunk results in CLEAN mode using existing sophisticated merging system.""" - if not chunkResults: - return "" - - # Convert ChunkResults back to ContentParts for existing merger system - from modules.datamodels.datamodelExtraction import ContentPart - content_parts = [] - for chunk_result in chunkResults: - # Skip empty or error chunks in clean mode - if not chunk_result.metadata.get("success", False): - continue - if not chunk_result.aiResult or not chunk_result.aiResult.strip(): - continue - # Skip container/binary chunks in clean mode - if chunk_result.aiResult.startswith("[Skipped ") and "content:" in chunk_result.aiResult: - continue - - # Create ContentPart from ChunkResult with proper typeGroup - content_part = ContentPart( - id=chunk_result.originalChunk.id, - parentId=chunk_result.originalChunk.parentId, - label=chunk_result.originalChunk.label, - typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup - mimeType=chunk_result.originalChunk.mimeType, - data=chunk_result.aiResult, # Use AI result as data - metadata={ - **chunk_result.originalChunk.metadata, - "aiResult": True, - "chunk": True, - "chunkIndex": chunk_result.chunkIndex, - "documentId": chunk_result.documentId, - "processingTime": chunk_result.processingTime, - "success": chunk_result.metadata.get("success", False) - } - ) - content_parts.append(content_part) - - # Use existing merging strategy for clean mode - merge_strategy = MergeStrategy( - useIntelligentMerging=True, - groupBy="documentId", # Group by document - orderBy="chunkIndex", # Order by chunk index - mergeType="concatenate" - ) - - - # Apply existing merging logic using the sophisticated merging system - from modules.services.serviceExtraction.subPipeline import _applyMerging - merged_parts = _applyMerging(content_parts, merge_strategy) - - # Convert merged parts back to final string - final_content = "\n\n".join([part.data for part in merged_parts]) - - logger.info(f"Merged {len(content_parts)} chunks in clean mode using existing sophisticated merging system") - return final_content.strip() - - def _mergeChunkResultsJson( - self, - chunkResults: List[ChunkResult], - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """Merge chunk results in JSON mode using existing sophisticated merging system.""" - if not chunkResults: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - # Convert ChunkResults back to ContentParts for existing merger system - from modules.datamodels.datamodelExtraction import ContentPart - content_parts = [] - for chunk_result in chunkResults: - # Create ContentPart from ChunkResult with proper typeGroup - content_part = ContentPart( - id=chunk_result.originalChunk.id, - parentId=chunk_result.originalChunk.parentId, - label=chunk_result.originalChunk.label, - typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup - mimeType=chunk_result.originalChunk.mimeType, - data=chunk_result.aiResult, # Use AI result as data - metadata={ - **chunk_result.originalChunk.metadata, - "aiResult": True, - "chunk": True, - "chunkIndex": chunk_result.chunkIndex, - "documentId": chunk_result.documentId, - "processingTime": chunk_result.processingTime, - "success": chunk_result.metadata.get("success", False) - } - ) - content_parts.append(content_part) - - # Use existing merging strategy for JSON mode - merge_strategy = MergeStrategy( - useIntelligentMerging=True, - groupBy="documentId", # Group by document - orderBy="chunkIndex", # Order by chunk index - mergeType="concatenate" - ) - - - # Apply existing merging logic using the sophisticated merging system - from modules.services.serviceExtraction.subPipeline import _applyMerging - merged_parts = _applyMerging(content_parts, merge_strategy) - - # Convert merged parts to JSON format - all_sections = [] - document_titles = [] - - for part in merged_parts: - if part.metadata.get("success", False): - try: - # Parse JSON from AI result - chunk_json = json.loads(part.data) - - # Check if this is a multi-file response (has "documents" key) - if isinstance(chunk_json, dict) and "documents" in chunk_json: - # This is a multi-file response - merge all documents - logger.debug(f"Processing multi-file response from part {part.id} with {len(chunk_json['documents'])} documents") - - # Return multi-file response directly - return { - "metadata": chunk_json.get("metadata", {"title": "Merged Document"}), - "documents": chunk_json["documents"] - } - - # Extract sections from single-file response - elif isinstance(chunk_json, dict) and "sections" in chunk_json: - for section in chunk_json["sections"]: - # Add part context to section - section["metadata"] = section.get("metadata", {}) - section["metadata"]["source_part"] = part.id - section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown") - section["metadata"]["chunk_index"] = part.metadata.get("chunkIndex", 0) - all_sections.append(section) - - # Extract document title - if isinstance(chunk_json, dict) and "metadata" in chunk_json: - title = chunk_json["metadata"].get("title", "") - if title and title not in document_titles: - document_titles.append(title) - - except json.JSONDecodeError as e: - logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}") - # Create a fallback section for invalid JSON - fallback_section = { - "id": f"error_section_{part.id}", - "title": "Error Section", - "content_type": "paragraph", - "elements": [{ - "text": f"Error parsing part {part.id}: {str(e)}" - }], - "order": part.metadata.get("chunkIndex", 0), - "metadata": { - "source_document": part.metadata.get("documentId", "unknown"), - "part_id": part.id, - "error": str(e) - } - } - all_sections.append(fallback_section) - else: - # Handle error parts - error_section = { - "id": f"error_section_{part.id}", - "title": "Error Section", - "content_type": "paragraph", - "elements": [{ - "text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}" - }], - "order": part.metadata.get("chunkIndex", 0), - "metadata": { - "source_document": part.metadata.get("documentId", "unknown"), - "part_id": part.id, - "error": part.metadata.get('error', 'Unknown error') - } - } - all_sections.append(error_section) - - # Sort sections by order - all_sections.sort(key=lambda x: x.get("order", 0)) - - # Create merged document with sections - merged_document = { - "metadata": { - "title": document_titles[0] if document_titles else "Merged Document", - "extraction_method": "ai_json_extraction_with_merging", - "version": "2.0" - }, - "sections": all_sections, - "summary": f"Merged document using sophisticated merging system", - "tags": ["merged", "ai_generated", "sophisticated_merging"] - } - - logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system (JSON mode)") - return merged_document diff --git a/modules/services/serviceAi/subSharedAiUtils.py b/modules/services/serviceAi/subSharedAiUtils.py deleted file mode 100644 index 1dcf6c41..00000000 --- a/modules/services/serviceAi/subSharedAiUtils.py +++ /dev/null @@ -1,165 +0,0 @@ -""" -Shared utilities for AI services to eliminate code duplication. - -This module contains common functions used across multiple AI service modules -to maintain DRY principles and ensure consistency. -""" - -import re -import logging -from typing import Dict, Any, List, Optional, Union - -logger = logging.getLogger(__name__) - - -def buildPromptWithPlaceholders(prompt: str, placeholders: Optional[Dict[str, str]]) -> str: - """ - Build full prompt by replacing placeholders with their content. - Uses the new {{KEY:placeholder}} format. - - Args: - prompt: The base prompt template - placeholders: Dictionary of placeholder key-value pairs - - Returns: - Prompt with placeholders replaced - """ - if not placeholders: - return prompt - - full_prompt = prompt - for placeholder, content in placeholders.items(): - # Skip if content is None or empty - if content is None: - continue - # Replace {{KEY:placeholder}} - full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content)) - - return full_prompt - - -def sanitizePromptContent(content: str, contentType: str = "text") -> str: - """ - Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation. - - This is the single source of truth for all prompt sanitization across the system. - Replaces all scattered sanitization functions with a unified approach. - - Args: - content: The content to sanitize - contentType: Type of content ("text", "userinput", "json", "document") - - Returns: - Safely sanitized content ready for AI prompt insertion - """ - if not content: - return "" - - try: - # Convert to string if not already - content_str = str(content) - - # Remove null bytes and control characters (except newlines and tabs) - sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str) - - # Handle different content types with appropriate sanitization - if contentType == "userinput": - # Extra security for user-controlled content - # Escape curly braces to prevent placeholder injection - sanitized = sanitized.replace('{', '{{').replace('}', '}}') - # Escape quotes and wrap in single quotes - sanitized = sanitized.replace('"', '\\"').replace("'", "\\'") - return f"'{sanitized}'" - - elif contentType == "json": - # For JSON content, escape quotes and backslashes - sanitized = sanitized.replace('\\', '\\\\') - sanitized = sanitized.replace('"', '\\"') - sanitized = sanitized.replace('\n', '\\n') - sanitized = sanitized.replace('\r', '\\r') - sanitized = sanitized.replace('\t', '\\t') - - elif contentType == "document": - # For document content, escape special characters - sanitized = sanitized.replace('\\', '\\\\') - sanitized = sanitized.replace('"', '\\"') - sanitized = sanitized.replace("'", "\\'") - sanitized = sanitized.replace('\n', '\\n') - sanitized = sanitized.replace('\r', '\\r') - sanitized = sanitized.replace('\t', '\\t') - - else: # contentType == "text" or default - # Basic text sanitization - sanitized = sanitized.replace('\\', '\\\\') - sanitized = sanitized.replace('"', '\\"') - sanitized = sanitized.replace("'", "\\'") - sanitized = sanitized.replace('\n', '\\n') - sanitized = sanitized.replace('\r', '\\r') - sanitized = sanitized.replace('\t', '\\t') - - return sanitized - - except Exception as e: - logger.error(f"Error sanitizing prompt content: {str(e)}") - # Return a safe fallback - return "[ERROR: Content could not be safely sanitized]" - - -def extractTextFromContentParts(extracted_content) -> str: - """ - Extract text content from ExtractionService ContentPart objects. - - Args: - extracted_content: ContentExtracted object with parts - - Returns: - Concatenated text content from all text/table/structure parts - """ - if not extracted_content or not hasattr(extracted_content, 'parts'): - return "" - - text_parts = [] - for part in extracted_content.parts: - if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']: - if hasattr(part, 'data') and part.data: - text_parts.append(part.data) - - return "\n\n".join(text_parts) - - -def reduceText(text: str, reduction_factor: float) -> str: - """ - Reduce text size by the specified factor. - - Args: - text: Text to reduce - reduction_factor: Factor by which to reduce (0.0 to 1.0) - - Returns: - Reduced text with truncation indicator - """ - if reduction_factor >= 1.0: - return text - - target_length = int(len(text) * reduction_factor) - return text[:target_length] + "... [reduced]" - - -def determineCallType(documents: Optional[List], operation_type: str) -> str: - """ - Determine call type based on documents and operation type. - - Args: - documents: List of ChatDocument objects - operation_type: Type of operation being performed - - Returns: - Call type: "plan" or "text" - """ - has_documents = documents is not None and len(documents) > 0 - is_planning_operation = operation_type == "plan" - - if not has_documents and is_planning_operation: - return "plan" - else: - return "text" diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index 62931565..1e0c1d21 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -19,6 +19,16 @@ class ExtractionService: self.services = services self._extractorRegistry = ExtractorRegistry() self._chunkerRegistry = ChunkerRegistry() + # Ensure AI connectors are discovered so pricing models are available + try: + # If internal model is missing, trigger discovery and registration + if modelRegistry.getModel("internal-extractor") is None: + discovered = modelRegistry.discoverConnectors() + for connector in discovered: + modelRegistry.registerConnector(connector) + except Exception: + # Propagate actual errors during use; init should be fast and side-effect free otherwise + pass def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]: """ @@ -82,12 +92,12 @@ class ExtractionService: p.metadata["documentMimeType"] = documentData["mimeType"] # Log chunking information - chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)] - if chunked_parts: + chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)] + if chunkedParts: logger.debug(f"=== CHUNKING RESULTS ===") logger.debug(f"Total parts: {len(ec.parts)}") - logger.debug(f"Chunked parts: {len(chunked_parts)}") - for chunk in chunked_parts: + logger.debug(f"Chunked parts: {len(chunkedParts)}") + for chunk in chunkedParts: logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})") else: logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits") @@ -101,8 +111,11 @@ class ExtractionService: # Emit stats for extraction operation # Use internal extraction model for pricing - modelName = "internal_extraction" + modelName = "internal-extractor" model = modelRegistry.getModel(modelName) + # Hard fail if model is missing; caller must ensure connectors are registered + if model is None or model.calculatePriceUsd is None: + raise RuntimeError(f"Pricing model not available: {modelName}") priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived) # Create AiCallResponse with real calculation diff --git a/modules/services/serviceExtraction/subMerger.py b/modules/services/serviceExtraction/subMerger.py index aa9bf1f5..9e8120d0 100644 --- a/modules/services/serviceExtraction/subMerger.py +++ b/modules/services/serviceExtraction/subMerger.py @@ -20,13 +20,13 @@ class IntelligentTokenAwareMerger: 4. Minimize total number of AI calls """ - def __init__(self, model_capabilities: Dict[str, Any]): - self.max_tokens = model_capabilities.get("maxTokens", 4000) - self.safety_margin = model_capabilities.get("safetyMargin", 0.1) - self.effective_max_tokens = int(self.max_tokens * (1 - self.safety_margin)) - self.chars_per_token = model_capabilities.get("charsPerToken", 4) # Rough estimation + def __init__(self, modelCapabilities: Dict[str, Any]): + self.maxTokens = modelCapabilities.get("maxTokens", 4000) + self.safetyMargin = modelCapabilities.get("safetyMargin", 0.1) + self.effectiveMaxTokens = int(self.maxTokens * (1 - self.safetyMargin)) + self.charsPerToken = modelCapabilities.get("charsPerToken", 4) # Rough estimation - def merge_chunks_intelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]: + def mergeChunksIntelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]: """ Merge chunks intelligently based on token limits. @@ -40,125 +40,125 @@ class IntelligentTokenAwareMerger: if not chunks: return chunks - logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, max_tokens={self.effective_max_tokens}") + logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, maxTokens={self.effectiveMaxTokens}") # Calculate tokens for prompt - prompt_tokens = self._estimate_tokens(prompt) - available_tokens = self.effective_max_tokens - prompt_tokens + promptTokens = self._estimateTokens(prompt) + availableTokens = self.effectiveMaxTokens - promptTokens - logger.info(f"📊 Prompt tokens: {prompt_tokens}, Available for content: {available_tokens}") + logger.info(f"📊 Prompt tokens: {promptTokens}, Available for content: {availableTokens}") # Group chunks by document and type for semantic coherence - grouped_chunks = self._group_chunks_by_document_and_type(chunks) + groupedChunks = self._groupChunksByDocumentAndType(chunks) - merged_parts = [] + mergedParts = [] - for group_key, group_chunks in grouped_chunks.items(): - logger.info(f"📁 Processing group: {group_key} ({len(group_chunks)} chunks)") + for groupKey, groupChunks in groupedChunks.items(): + logger.info(f"📁 Processing group: {groupKey} ({len(groupChunks)} chunks)") # Merge chunks within this group optimally - group_merged = self._merge_group_optimally(group_chunks, available_tokens) - merged_parts.extend(group_merged) + groupMerged = self._mergeGroupOptimally(groupChunks, availableTokens) + mergedParts.extend(groupMerged) - logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(merged_parts)} parts") - return merged_parts + logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(mergedParts)} parts") + return mergedParts - def _group_chunks_by_document_and_type(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]: + def _groupChunksByDocumentAndType(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]: """Group chunks by document and type for semantic coherence.""" groups = {} for chunk in chunks: # Create group key: document_id + type_group - doc_id = chunk.metadata.get("documentId", "unknown") - type_group = chunk.typeGroup - group_key = f"{doc_id}_{type_group}" - - if group_key not in groups: - groups[group_key] = [] - groups[group_key].append(chunk) + docId = chunk.metadata.get("documentId", "unknown") + typeGroup = chunk.typeGroup + groupKey = f"{docId}_{typeGroup}" + if groupKey not in groups: + groups[groupKey] = [] + groups[groupKey].append(chunk) + return groups - def _merge_group_optimally(self, chunks: List[ContentPart], available_tokens: int) -> List[ContentPart]: + def _mergeGroupOptimally(self, chunks: List[ContentPart], availableTokens: int) -> List[ContentPart]: """Merge chunks within a group optimally to minimize AI calls.""" if not chunks: return [] # Sort chunks by size (smallest first for better packing) - sorted_chunks = sorted(chunks, key=lambda c: self._estimate_tokens(c.data)) + sortedChunks = sorted(chunks, key=lambda c: self._estimateTokens(c.data)) - merged_parts = [] - current_group = [] - current_tokens = 0 + mergedParts = [] + currentGroup = [] + currentTokens = 0 - for chunk in sorted_chunks: - chunk_tokens = self._estimate_tokens(chunk.data) + for chunk in sortedChunks: + chunkTokens = self._estimateTokens(chunk.data) # Special case: If single chunk is already at max size, process it alone - if chunk_tokens >= available_tokens * 0.9: # 90% of available tokens + if chunkTokens >= availableTokens * 0.9: # 90% of available tokens # Finalize current group if it exists - if current_group: - merged_part = self._create_merged_part(current_group, current_tokens) - merged_parts.append(merged_part) - current_group = [] - current_tokens = 0 + if currentGroup: + mergedPart = self._createMergedPart(currentGroup, currentTokens) + mergedParts.append(mergedPart) + currentGroup = [] + currentTokens = 0 # Process large chunk individually - merged_parts.append(chunk) - logger.debug(f"🔍 Large chunk processed individually: {chunk_tokens} tokens") + mergedParts.append(chunk) + logger.debug(f"🔍 Large chunk processed individually: {chunkTokens} tokens") continue # If adding this chunk would exceed limit, finalize current group - if current_tokens + chunk_tokens > available_tokens and current_group: - merged_part = self._create_merged_part(current_group, current_tokens) - merged_parts.append(merged_part) - current_group = [chunk] - current_tokens = chunk_tokens + if currentTokens + chunkTokens > availableTokens and currentGroup: + mergedPart = self._createMergedPart(currentGroup, currentTokens) + mergedParts.append(mergedPart) + currentGroup = [chunk] + currentTokens = chunkTokens else: - current_group.append(chunk) - current_tokens += chunk_tokens + currentGroup.append(chunk) + currentTokens += chunkTokens # Finalize remaining group - if current_group: - merged_part = self._create_merged_part(current_group, current_tokens) - merged_parts.append(merged_part) + if currentGroup: + mergedPart = self._createMergedPart(currentGroup, currentTokens) + mergedParts.append(mergedPart) - logger.info(f"📦 Group merged: {len(chunks)} → {len(merged_parts)} parts") - return merged_parts + logger.info(f"📦 Group merged: {len(chunks)} → {len(mergedParts)} parts") + return mergedParts - def _create_merged_part(self, chunks: List[ContentPart], total_tokens: int) -> ContentPart: + def _createMergedPart(self, chunks: List[ContentPart], totalTokens: int) -> ContentPart: """Create a merged ContentPart from multiple chunks.""" if len(chunks) == 1: return chunks[0] # No need to merge single chunk # Combine data with semantic separators - combined_data = self._combine_chunk_data(chunks) + combinedData = self._combineChunkData(chunks) # Use metadata from first chunk as base - base_chunk = chunks[0] - merged_metadata = base_chunk.metadata.copy() - merged_metadata.update({ + baseChunk = chunks[0] + mergedMetadata = baseChunk.metadata.copy() + mergedMetadata.update({ "merged": True, "originalChunkCount": len(chunks), - "totalTokens": total_tokens, + "totalTokens": totalTokens, "originalChunkIds": [c.id for c in chunks], - "size": len(combined_data.encode('utf-8')) + "size": len(combinedData.encode('utf-8')) }) - merged_part = ContentPart( + mergedPart = ContentPart( id=makeId(), - parentId=base_chunk.parentId, + parentId=baseChunk.parentId, label=f"merged_{len(chunks)}_chunks", - typeGroup=base_chunk.typeGroup, - mimeType=base_chunk.mimeType, - data=combined_data, - metadata=merged_metadata + typeGroup=baseChunk.typeGroup, + mimeType=baseChunk.mimeType, + data=combinedData, + metadata=mergedMetadata ) - logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {total_tokens} tokens") - return merged_part + logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {totalTokens} tokens") + return mergedPart - def _combine_chunk_data(self, chunks: List[ContentPart]) -> str: + def _combineChunkData(self, chunks: List[ContentPart]) -> str: """Combine chunk data with appropriate separators.""" if not chunks: return "" @@ -173,37 +173,37 @@ class IntelligentTokenAwareMerger: return separator.join([chunk.data for chunk in chunks]) - def _estimate_tokens(self, text: str) -> int: + def _estimateTokens(self, text: str) -> int: """Estimate token count for text.""" if not text: return 0 - return len(text) // self.chars_per_token + return len(text) // self.charsPerToken - def calculate_optimization_stats(self, original_chunks: List[ContentPart], merged_parts: List[ContentPart]) -> Dict[str, Any]: + def calculateOptimizationStats(self, originalChunks: List[ContentPart], mergedParts: List[ContentPart]) -> Dict[str, Any]: """Calculate optimization statistics with detailed analysis.""" - original_calls = len(original_chunks) - optimized_calls = len(merged_parts) - reduction_percent = ((original_calls - optimized_calls) / original_calls * 100) if original_calls > 0 else 0 + originalCalls = len(originalChunks) + optimizedCalls = len(mergedParts) + reductionPercent = ((originalCalls - optimizedCalls) / originalCalls * 100) if originalCalls > 0 else 0 # Analyze chunk sizes - large_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) >= self.effective_max_tokens * 0.9] - small_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) < self.effective_max_tokens * 0.9] + largeChunks = [c for c in originalChunks if self._estimateTokens(c.data) >= self.effectiveMaxTokens * 0.9] + smallChunks = [c for c in originalChunks if self._estimateTokens(c.data) < self.effectiveMaxTokens * 0.9] # Calculate theoretical maximum optimization (if all small chunks could be merged) - theoretical_min_calls = len(large_chunks) + max(1, len(small_chunks) // 3) # Assume 3 small chunks per call - theoretical_reduction = ((original_calls - theoretical_min_calls) / original_calls * 100) if original_calls > 0 else 0 + theoreticalMinCalls = len(largeChunks) + max(1, len(smallChunks) // 3) # Assume 3 small chunks per call + theoreticalReduction = ((originalCalls - theoreticalMinCalls) / originalCalls * 100) if originalCalls > 0 else 0 return { - "original_ai_calls": original_calls, - "optimized_ai_calls": optimized_calls, - "reduction_percent": round(reduction_percent, 1), - "cost_savings": f"{reduction_percent:.1f}%", - "efficiency_gain": f"{original_calls / optimized_calls:.1f}x" if optimized_calls > 0 else "∞", + "original_ai_calls": originalCalls, + "optimized_ai_calls": optimizedCalls, + "reduction_percent": round(reductionPercent, 1), + "cost_savings": f"{reductionPercent:.1f}%", + "efficiency_gain": f"{originalCalls / optimizedCalls:.1f}x" if optimizedCalls > 0 else "∞", "analysis": { - "large_chunks": len(large_chunks), - "small_chunks": len(small_chunks), - "theoretical_min_calls": theoretical_min_calls, - "theoretical_reduction": round(theoretical_reduction, 1), - "optimization_potential": "high" if reduction_percent > 50 else "moderate" if reduction_percent > 20 else "low" + "large_chunks": len(largeChunks), + "small_chunks": len(smallChunks), + "theoretical_min_calls": theoreticalMinCalls, + "theoretical_reduction": round(theoreticalReduction, 1), + "optimization_potential": "high" if reductionPercent > 50 else "moderate" if reductionPercent > 20 else "low" } } diff --git a/modules/services/serviceExtraction/subPipeline.py b/modules/services/serviceExtraction/subPipeline.py index e935f3c3..f36afe8e 100644 --- a/modules/services/serviceExtraction/subPipeline.py +++ b/modules/services/serviceExtraction/subPipeline.py @@ -96,10 +96,10 @@ def _applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[Con subMerger = IntelligentTokenAwareMerger(model_capabilities) # Use intelligent merging for all parts - merged = subMerger.merge_chunks_intelligently(parts, strategy.prompt or "") + merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "") # Calculate and log optimization stats - stats = subMerger.calculate_optimization_stats(parts, merged) + stats = subMerger.calculateOptimizationStats(parts, merged) logger.info(f"🧠 Intelligent merging stats: {stats}") logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)") diff --git a/modules/services/serviceExtraction/subPromptBuilderExtraction.py b/modules/services/serviceExtraction/subPromptBuilderExtraction.py index 5b887482..a796ea3b 100644 --- a/modules/services/serviceExtraction/subPromptBuilderExtraction.py +++ b/modules/services/serviceExtraction/subPromptBuilderExtraction.py @@ -101,7 +101,7 @@ async def buildExtractionPrompt( # Build base prompt adaptive_prompt = f""" -{services.ai.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt} +{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt} You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output. diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index 41bce06d..9dddb49d 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -37,13 +37,13 @@ class GenerationService: return [] # Process each document from the AI action result - processed_documents = [] + processedDocuments = [] for doc in documents: - processed_doc = self.processSingleDocument(doc, action) - if processed_doc: - processed_documents.append(processed_doc) + processedDoc = self.processSingleDocument(doc, action) + if processedDoc: + processedDocuments.append(processedDoc) - return processed_documents + return processedDocuments except Exception as e: logger.error(f"Error processing action result documents: {str(e)}") return [] @@ -77,20 +77,20 @@ class GenerationService: try: processed_docs = self.processActionResultDocuments(action_result, action, workflow) - created_documents = [] + createdDocuments = [] for i, doc_data in enumerate(processed_docs): try: - document_name = doc_data['fileName'] - document_data = doc_data['content'] - mime_type = doc_data['mimeType'] + documentName = doc_data['fileName'] + documentData = doc_data['content'] + mimeType = doc_data['mimeType'] # Convert document data to string content - content = convertDocumentDataToString(document_data, getFileExtension(document_name)) + content = convertDocumentDataToString(documentData, getFileExtension(documentName)) # Skip empty or minimal content - minimal_content_patterns = ['{}', '[]', 'null', '""', "''"] - if not content or content.strip() == "" or content.strip() in minimal_content_patterns: - logger.warning(f"Empty or minimal content for document {document_name}, skipping") + minimalContentPatterns = ['{}', '[]', 'null', '""', "''"] + if not content or content.strip() == "" or content.strip() in minimalContentPatterns: + logger.warning(f"Empty or minimal content for document {documentName}, skipping") continue # Normalize file extension based on mime type if missing or incorrect @@ -105,35 +105,35 @@ class GenerationService: "text/plain": ".txt", "application/json": ".json", } - expected_ext = mime_to_ext.get(mime_type) - if expected_ext: - if not document_name.lower().endswith(expected_ext): + expectedExt = mime_to_ext.get(mimeType) + if expectedExt: + if not documentName.lower().endswith(expectedExt): # Append/replace extension to match mime type - if "." in document_name: - document_name = document_name.rsplit(".", 1)[0] + expected_ext + if "." in documentName: + documentName = documentName.rsplit(".", 1)[0] + expectedExt else: - document_name = document_name + expected_ext + documentName = documentName + expectedExt except Exception: pass # Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text base64encoded = False try: - binary_mime_types = { + binaryMimeTypes = { "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/pdf", } - if isinstance(document_data, str) and mime_type in binary_mime_types: + if isinstance(documentData, str) and mimeType in binaryMimeTypes: base64encoded = True except Exception: base64encoded = False # Create document with file in one step using interfaces directly document = self._createDocument( - fileName=document_name, - mimeType=mime_type, + fileName=documentName, + mimeType=mimeType, content=content, base64encoded=base64encoded, messageId=message_id @@ -141,14 +141,14 @@ class GenerationService: if document: # Set workflow context on the document if possible self._setDocumentWorkflowContext(document, action, workflow) - created_documents.append(document) + createdDocuments.append(document) else: - logger.error(f"Failed to create ChatDocument object for {document_name}") + logger.error(f"Failed to create ChatDocument object for {documentName}") except Exception as e: logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}") continue - return created_documents + return createdDocuments except Exception as e: logger.error(f"Error creating documents from action result: {str(e)}") return [] @@ -157,28 +157,28 @@ class GenerationService: """Set workflow context on a document for proper routing and labeling""" try: # Get current workflow context directly from workflow object - workflow_context = self._getWorkflowContext(workflow) - workflow_stats = self._getWorkflowStats(workflow) + workflowContext = self._getWorkflowContext(workflow) + workflowStats = self._getWorkflowStats(workflow) - current_round = workflow_context.get('currentRound', 0) - current_task = workflow_context.get('currentTask', 0) - current_action = workflow_context.get('currentAction', 0) + currentRound = workflowContext.get('currentRound', 0) + currentTask = workflowContext.get('currentTask', 0) + currentAction = workflowContext.get('currentAction', 0) # Try to set workflow context attributes if they exist if hasattr(document, 'roundNumber'): - document.roundNumber = current_round + document.roundNumber = currentRound if hasattr(document, 'taskNumber'): - document.taskNumber = current_task + document.taskNumber = currentTask if hasattr(document, 'actionNumber'): - document.actionNumber = current_action + document.actionNumber = currentAction if hasattr(document, 'actionId'): document.actionId = action.id if hasattr(action, 'id') else None # Set additional workflow metadata if available if hasattr(document, 'workflowId'): - document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None) + document.workflowId = workflowStats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None) if hasattr(document, 'workflowStatus'): - document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown') + document.workflowStatus = workflowStats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown') except Exception as e: @@ -355,17 +355,17 @@ class GenerationService: def _getFormatRenderer(self, output_format: str): """Get the appropriate renderer for the specified format using auto-discovery.""" try: - from .renderers.registry import get_renderer - renderer = get_renderer(output_format, services=self.services) + from .renderers.registry import getRenderer + renderer = getRenderer(output_format, services=self.services) if renderer: return renderer # Fallback to text renderer if no specific renderer found logger.warning(f"No renderer found for format {output_format}, falling back to text") - fallback_renderer = get_renderer('text', services=self.services) - if fallback_renderer: - return fallback_renderer + fallbackRenderer = getRenderer('text', services=self.services) + if fallbackRenderer: + return fallbackRenderer logger.error("Even text renderer fallback failed") return None diff --git a/modules/services/serviceGeneration/renderers/registry.py b/modules/services/serviceGeneration/renderers/registry.py index bb890a82..5065424e 100644 --- a/modules/services/serviceGeneration/renderers/registry.py +++ b/modules/services/serviceGeneration/renderers/registry.py @@ -17,7 +17,7 @@ class RendererRegistry: self._format_mappings: Dict[str, str] = {} self._discovered = False - def discover_renderers(self) -> None: + def discoverRenderers(self) -> None: """Automatically discover and register all renderers by scanning files.""" if self._discovered: return @@ -28,38 +28,38 @@ class RendererRegistry: from pathlib import Path # Get the directory containing this registry file - current_dir = Path(__file__).parent - renderers_dir = current_dir + currentDir = Path(__file__).parent + renderersDir = currentDir # Get the package name dynamically - package_name = __name__.rsplit('.', 1)[0] + packageName = __name__.rsplit('.', 1)[0] # Scan all Python files in the renderers directory - for file_path in renderers_dir.glob("*.py"): - if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']: + for filePath in renderersDir.glob("*.py"): + if filePath.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']: continue # Extract module name from filename - module_name = file_path.stem + moduleName = filePath.stem try: # Import the module dynamically - full_module_name = f"{package_name}.{module_name}" - module = importlib.import_module(full_module_name) + fullModuleName = f"{packageName}.{moduleName}" + module = importlib.import_module(fullModuleName) # Look for renderer classes in the module - for attr_name in dir(module): - attr = getattr(module, attr_name) + for attrName in dir(module): + attr = getattr(module, attrName) if (isinstance(attr, type) and issubclass(attr, BaseRenderer) and attr != BaseRenderer and - hasattr(attr, 'get_supported_formats')): + hasattr(attr, 'getSupportedFormats')): # Register the renderer - self._register_renderer_class(attr) + self._registerRendererClass(attr) except Exception as e: - logger.warning(f"Could not load renderer from {module_name}: {str(e)}") + logger.warning(f"Could not load renderer from {moduleName}: {str(e)}") continue self._discovered = True @@ -68,72 +68,72 @@ class RendererRegistry: logger.error(f"Error during renderer discovery: {str(e)}") self._discovered = True # Mark as discovered to avoid repeated attempts - def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None: + def _registerRendererClass(self, rendererClass: Type[BaseRenderer]) -> None: """Register a renderer class with its supported formats.""" try: # Get supported formats from the renderer class - supported_formats = renderer_class.get_supported_formats() + supportedFormats = rendererClass.getSupportedFormats() - for format_name in supported_formats: + for formatName in supportedFormats: # Register primary format - self._renderers[format_name.lower()] = renderer_class + self._renderers[formatName.lower()] = rendererClass # Register aliases if any - if hasattr(renderer_class, 'get_format_aliases'): - aliases = renderer_class.get_format_aliases() + if hasattr(rendererClass, 'getFormatAliases'): + aliases = rendererClass.getFormatAliases() for alias in aliases: - self._format_mappings[alias.lower()] = format_name.lower() + self._format_mappings[alias.lower()] = formatName.lower() - logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}") + logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}") except Exception as e: - logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}") + logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}") - def get_renderer(self, output_format: str, services=None) -> Optional[BaseRenderer]: + def getRenderer(self, outputFormat: str, services=None) -> Optional[BaseRenderer]: """Get a renderer instance for the specified format.""" if not self._discovered: - self.discover_renderers() + self.discoverRenderers() # Normalize format name - format_name = output_format.lower().strip() + formatName = outputFormat.lower().strip() # Check for aliases first - if format_name in self._format_mappings: - format_name = self._format_mappings[format_name] + if formatName in self._format_mappings: + formatName = self._format_mappings[formatName] # Get renderer class - renderer_class = self._renderers.get(format_name) + rendererClass = self._renderers.get(formatName) - if renderer_class: + if rendererClass: try: - return renderer_class(services=services) + return rendererClass(services=services) except Exception as e: - logger.error(f"Error creating renderer instance for {format_name}: {str(e)}") + logger.error(f"Error creating renderer instance for {formatName}: {str(e)}") return None - logger.warning(f"No renderer found for format: {output_format}") + logger.warning(f"No renderer found for format: {outputFormat}") return None - def get_supported_formats(self) -> List[str]: + def getSupportedFormats(self) -> List[str]: """Get list of all supported formats.""" if not self._discovered: - self.discover_renderers() + self.discoverRenderers() formats = list(self._renderers.keys()) formats.extend(self._format_mappings.keys()) return sorted(set(formats)) - def get_renderer_info(self) -> Dict[str, Dict[str, str]]: + def getRendererInfo(self) -> Dict[str, Dict[str, str]]: """Get information about all registered renderers.""" if not self._discovered: - self.discover_renderers() + self.discoverRenderers() info = {} - for format_name, renderer_class in self._renderers.items(): - info[format_name] = { - 'class_name': renderer_class.__name__, - 'module': renderer_class.__module__, - 'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description' + for formatName, rendererClass in self._renderers.items(): + info[formatName] = { + 'class_name': rendererClass.__name__, + 'module': rendererClass.__module__, + 'description': getattr(rendererClass, '__doc__', 'No description').strip().split('\n')[0] if rendererClass.__doc__ else 'No description' } return info @@ -141,14 +141,14 @@ class RendererRegistry: # Global registry instance _registry = RendererRegistry() -def get_renderer(output_format: str, services=None) -> Optional[BaseRenderer]: +def getRenderer(outputFormat: str, services=None) -> Optional[BaseRenderer]: """Get a renderer instance for the specified format.""" - return _registry.get_renderer(output_format, services) + return _registry.getRenderer(outputFormat, services) -def get_supported_formats() -> List[str]: +def getSupportedFormats() -> List[str]: """Get list of all supported formats.""" - return _registry.get_supported_formats() + return _registry.getSupportedFormats() -def get_renderer_info() -> Dict[str, Dict[str, str]]: +def getRendererInfo() -> Dict[str, Dict[str, str]]: """Get information about all registered renderers.""" - return _registry.get_renderer_info() + return _registry.getRendererInfo() diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index 5444525a..566c7765 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -4,6 +4,7 @@ Base renderer class for all format renderers. from abc import ABC, abstractmethod from typing import Dict, Any, Tuple, List +from modules.datamodels.datamodelJson import supportedSectionTypes import json import logging import re @@ -23,7 +24,7 @@ class BaseRenderer(ABC): self.services = services # Add services attribute @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """ Return list of supported format names for this renderer. Override this method in subclasses to specify supported formats. @@ -31,7 +32,7 @@ class BaseRenderer(ABC): return [] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """ Return list of format aliases for this renderer. Override this method in subclasses to specify format aliases. @@ -39,7 +40,7 @@ class BaseRenderer(ABC): return [] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """ Return priority for this renderer (higher number = higher priority). Used when multiple renderers support the same format. @@ -47,43 +48,43 @@ class BaseRenderer(ABC): return 0 @abstractmethod - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """ Render extracted JSON content to the target format. Args: - extracted_content: Structured JSON content with sections and metadata + extractedContent: Structured JSON content with sections and metadata title: Report title - user_prompt: Original user prompt for context - ai_service: AI service instance for additional processing + userPrompt: Original user prompt for context + aiService: AI service instance for additional processing Returns: - tuple: (rendered_content, mime_type) + tuple: (renderedContent, mimeType) """ pass - def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]: + def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]: """Extract sections from report data.""" - return report_data.get('sections', []) + return reportData.get('sections', []) - def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]: + def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]: """Extract metadata from report data.""" - return report_data.get('metadata', {}) + return reportData.get('metadata', {}) - def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str: + def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str: """Get title from report data or use fallback.""" - metadata = report_data.get('metadata', {}) - return metadata.get('title', fallback_title) + metadata = reportData.get('metadata', {}) + return metadata.get('title', fallbackTitle) - def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool: + def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool: """Validate that JSON content has the expected structure.""" - if not isinstance(json_content, dict): + if not isinstance(jsonContent, dict): return False - if "sections" not in json_content: + if "sections" not in jsonContent: return False - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) if not isinstance(sections, list): return False @@ -96,14 +97,14 @@ class BaseRenderer(ABC): return True - def _get_section_type(self, section: Dict[str, Any]) -> str: + def _getSectionType(self, section: Dict[str, Any]) -> str: """Get the type of a section; default to 'paragraph' for non-dict inputs.""" if isinstance(section, dict): return section.get("content_type", "paragraph") # If section is a list or any other type, treat as paragraph elements return "paragraph" - def _get_section_data(self, section: Dict[str, Any]) -> List[Dict[str, Any]]: + def _getSectionData(self, section: Dict[str, Any]) -> List[Dict[str, Any]]: """Get the elements of a section; if a list is provided directly, return it.""" if isinstance(section, dict): return section.get("elements", []) @@ -111,21 +112,30 @@ class BaseRenderer(ABC): return section return [] - def _get_section_id(self, section: Dict[str, Any]) -> str: + def _getSectionId(self, section: Dict[str, Any]) -> str: """Get the ID of a section (if available).""" if isinstance(section, dict): return section.get("id", "unknown") return "unknown" - def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]: + def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]: """Extract table headers and rows from section data.""" - headers = section_data.get("headers", []) - rows = section_data.get("rows", []) + # Normalize when elements array was passed in + if isinstance(sectionData, list) and sectionData: + candidate = sectionData[0] + sectionData = candidate if isinstance(candidate, dict) else {} + headers = sectionData.get("headers", []) + rows = sectionData.get("rows", []) return headers, rows - def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]: + def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]: """Extract bullet list items from section data.""" - items = section_data.get("items", []) + # Normalize when elements array or raw list was passed in + if isinstance(sectionData, list): + # Already a list of items (strings or dicts) + items = sectionData + else: + items = sectionData.get("items", []) result = [] for item in items: if isinstance(item, str): @@ -134,29 +144,47 @@ class BaseRenderer(ABC): result.append(item["text"]) return result - def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]: + def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]: """Extract heading level and text from section data.""" - level = section_data.get("level", 1) - text = section_data.get("text", "") + # Normalize when elements array was passed in + if isinstance(sectionData, list) and sectionData: + sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {} + level = sectionData.get("level", 1) + text = sectionData.get("text", "") return level, text - def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str: + def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str: """Extract paragraph text from section data.""" - return section_data.get("text", "") + if isinstance(sectionData, list): + # Join multiple paragraph elements if provided as a list + texts = [] + for el in sectionData: + if isinstance(el, dict) and "text" in el: + texts.append(el["text"]) + elif isinstance(el, str): + texts.append(el) + return "\n".join(texts) + return sectionData.get("text", "") - def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]: + def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: """Extract code and language from section data.""" - code = section_data.get("code", "") - language = section_data.get("language", "") + # Normalize when elements array was passed in + if isinstance(sectionData, list) and sectionData: + sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {} + code = sectionData.get("code", "") + language = sectionData.get("language", "") return code, language - def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]: + def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]: """Extract base64 data and alt text from section data.""" - base64_data = section_data.get("base64Data", "") - alt_text = section_data.get("altText", "Image") - return base64_data, alt_text + # Normalize when elements array was passed in + if isinstance(sectionData, list) and sectionData: + sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {} + base64Data = sectionData.get("base64Data", "") + altText = sectionData.get("altText", "Image") + return base64Data, altText - def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any: + def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any: """ Render an image section. This is a base implementation that should be overridden by format-specific renderers. @@ -168,47 +196,47 @@ class BaseRenderer(ABC): Returns: Format-specific image representation """ - section_data = self._get_section_data(section) - base64_data, alt_text = self._extract_image_data(section_data) + sectionData = self._getSectionData(section) + base64Data, altText = self._extractImageData(sectionData) # Base implementation returns a simple dict # Format-specific renderers should override this method return { "content_type": "image", - "base64Data": base64_data, - "altText": alt_text, - "width": section_data.get("width", None), - "height": section_data.get("height", None), - "caption": section_data.get("caption", "") + "base64Data": base64Data, + "altText": altText, + "width": sectionData.get("width", None), + "height": sectionData.get("height", None), + "caption": sectionData.get("caption", "") } - def _validate_image_data(self, base64_data: str, alt_text: str) -> bool: + def _validateImageData(self, base64Data: str, altText: str) -> bool: """Validate image data.""" - if not base64_data: + if not base64Data: self.logger.warning("Image section has no base64 data") return False - if not alt_text: + if not altText: self.logger.warning("Image section has no alt text") return False # Basic base64 validation try: - base64.b64decode(base64_data, validate=True) + base64.b64decode(base64Data, validate=True) return True except Exception as e: self.logger.warning(f"Invalid base64 image data: {str(e)}") return False - def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]: + def _getImageDimensions(self, base64Data: str) -> Tuple[int, int]: """ Get image dimensions from base64 data. This is a helper method that format-specific renderers can use. """ try: # Decode base64 data - image_data = base64.b64decode(base64_data) - image = Image.open(io.BytesIO(image_data)) + imageData = base64.b64decode(base64Data) + image = Image.open(io.BytesIO(imageData)) return image.size # Returns (width, height) @@ -216,89 +244,89 @@ class BaseRenderer(ABC): self.logger.warning(f"Could not determine image dimensions: {str(e)}") return (0, 0) - def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str: + def _resizeImageIfNeeded(self, base64Data: str, maxWidth: int = 800, maxHeight: int = 600) -> str: """ Resize image if it exceeds maximum dimensions. Returns the resized image as base64 string. """ try: # Decode base64 data - image_data = base64.b64decode(base64_data) - image = Image.open(io.BytesIO(image_data)) + imageData = base64.b64decode(base64Data) + image = Image.open(io.BytesIO(imageData)) # Check if resizing is needed width, height = image.size - if width <= max_width and height <= max_height: - return base64_data # No resizing needed + if width <= maxWidth and height <= maxHeight: + return base64Data # No resizing needed # Calculate new dimensions maintaining aspect ratio - ratio = min(max_width / width, max_height / height) - new_width = int(width * ratio) - new_height = int(height * ratio) + ratio = min(maxWidth / width, maxHeight / height) + newWidth = int(width * ratio) + newHeight = int(height * ratio) # Resize image - resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) + resizedImage = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS) # Convert back to base64 buffer = io.BytesIO() - resized_image.save(buffer, format=image.format or 'PNG') - resized_data = buffer.getvalue() + resizedImage.save(buffer, format=image.format or 'PNG') + resizedData = buffer.getvalue() - return base64.b64encode(resized_data).decode('utf-8') + return base64.b64encode(resizedData).decode('utf-8') except Exception as e: self.logger.warning(f"Could not resize image: {str(e)}") - return base64_data # Return original if resize fails + return base64Data # Return original if resize fails - def _get_supported_section_types(self) -> List[str]: - """Return list of supported section types.""" - return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"] + def _getSupportedSectionTypes(self) -> List[str]: + """Return list of supported section types (from unified schema).""" + return supportedSectionTypes - def _is_valid_section_type(self, section_type: str) -> bool: + def _isValidSectionType(self, sectionType: str) -> bool: """Check if a section type is valid.""" - return section_type in self._get_supported_section_types() + return sectionType in self._getSupportedSectionTypes() - def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]: + def _processSectionByType(self, section: Dict[str, Any]) -> Dict[str, Any]: """Process a section and return structured data based on its type.""" - section_type = self._get_section_type(section) - section_data = self._get_section_data(section) + sectionType = self._getSectionType(section) + sectionData = self._getSectionData(section) - if section_type == "table": - headers, rows = self._extract_table_data(section_data) + if sectionType == "table": + headers, rows = self._extractTableData(sectionData) return {"content_type": "table", "headers": headers, "rows": rows} - elif section_type == "bullet_list": - items = self._extract_bullet_list_items(section_data) + elif sectionType == "bullet_list": + items = self._extractBulletListItems(sectionData) return {"content_type": "bullet_list", "items": items} - elif section_type == "heading": - level, text = self._extract_heading_data(section_data) + elif sectionType == "heading": + level, text = self._extractHeadingData(sectionData) return {"content_type": "heading", "level": level, "text": text} - elif section_type == "paragraph": - text = self._extract_paragraph_text(section_data) + elif sectionType == "paragraph": + text = self._extractParagraphText(sectionData) return {"content_type": "paragraph", "text": text} - elif section_type == "code_block": - code, language = self._extract_code_block_data(section_data) + elif sectionType == "code_block": + code, language = self._extractCodeBlockData(sectionData) return {"content_type": "code_block", "code": code, "language": language} - elif section_type == "image": - base64_data, alt_text = self._extract_image_data(section_data) + elif sectionType == "image": + base64Data, altText = self._extractImageData(sectionData) # Validate image data - if self._validate_image_data(base64_data, alt_text): + if self._validateImageData(base64Data, altText): return { "content_type": "image", - "base64Data": base64_data, - "altText": alt_text, - "width": section_data.get("width"), - "height": section_data.get("height"), - "caption": section_data.get("caption", "") + "base64Data": base64Data, + "altText": altText, + "width": sectionData.get("width") if isinstance(sectionData, dict) else None, + "height": sectionData.get("height") if isinstance(sectionData, dict) else None, + "caption": sectionData.get("caption", "") if isinstance(sectionData, dict) else "" } else: # Return placeholder if image data is invalid - return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"} + return {"content_type": "paragraph", "text": f"[Image: {altText}]"} else: # Fallback to paragraph - text = self._extract_paragraph_text(section_data) + text = self._extractParagraphText(sectionData) return {"content_type": "paragraph", "text": text} - def _format_timestamp(self, timestamp: str = None) -> str: + def _formatTimestamp(self, timestamp: str = None) -> str: """Format timestamp for display.""" if timestamp: return timestamp @@ -306,38 +334,38 @@ class BaseRenderer(ABC): # ===== GENERIC AI STYLING HELPERS ===== - async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: + async def _getAiStyles(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]: """ Generic AI styling method that can be used by all renderers. Args: - ai_service: AI service instance - style_template: Format-specific style template - default_styles: Default styles to fall back to + aiService: AI service instance + styleTemplate: Format-specific style template + defaultStyles: Default styles to fall back to Returns: Dict with styling definitions """ # DEBUG: Show which renderer is calling this method - if not ai_service: - return default_styles + if not aiService: + return defaultStyles try: - request_options = AiCallOptions() - request_options.operationType = OperationTypeEnum.DATA_GENERATE + requestOptions = AiCallOptions() + requestOptions.operationType = OperationTypeEnum.DATA_GENERATE - request = AiCallRequest(prompt=style_template, context="", options=request_options) + request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions) # DEBUG: Show the actual prompt being sent to AI self.logger.debug(f"AI Style Template Prompt:") - self.logger.debug(f"{style_template}") + self.logger.debug(f"{styleTemplate}") - response = await ai_service.aiObjects.call(request) + response = await aiService.aiObjects.call(request) # Save styling prompt and response to debug - self.services.utils.writeDebugFile(style_template, "renderer_styling_prompt") + self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt") self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response") # Clean and parse JSON @@ -346,12 +374,12 @@ class BaseRenderer(ABC): # Check if result is empty if not result: self.logger.warning("AI styling returned empty response, using defaults") - return default_styles + return defaultStyles # Extract JSON from markdown if present - json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) - if json_match: - result = json_match.group(1).strip() + jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) + if jsonMatch: + result = jsonMatch.group(1).strip() elif result.startswith('```json'): result = re.sub(r'^```json\s*', '', result) result = re.sub(r'\s*```$', '', result) @@ -362,8 +390,8 @@ class BaseRenderer(ABC): # Try to parse JSON try: styles = json.loads(result) - except json.JSONDecodeError as json_error: - self.logger.warning(f"AI styling returned invalid JSON: {json_error}") + except json.JSONDecodeError as jsonError: + self.logger.warning(f"AI styling returned invalid JSON: {jsonError}") # Use print instead of logger to avoid truncation self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER") @@ -372,88 +400,88 @@ class BaseRenderer(ABC): self.logger.warning(f"Raw content that failed to parse: {result}") # Try to fix incomplete JSON by adding missing closing braces - open_braces = result.count('{') - close_braces = result.count('}') + openBraces = result.count('{') + closeBraces = result.count('}') - if open_braces > close_braces: + if openBraces > closeBraces: # JSON is incomplete, add missing closing braces - missing_braces = open_braces - close_braces - result = result + '}' * missing_braces - self.logger.info(f"Added {missing_braces} missing closing brace(s)") + missingBraces = openBraces - closeBraces + result = result + '}' * missingBraces + self.logger.info(f"Added {missingBraces} missing closing brace(s)") self.logger.debug(f"Fixed JSON: {result}") # Try parsing the fixed JSON try: styles = json.loads(result) self.logger.info("Successfully fixed incomplete JSON") - except json.JSONDecodeError as fix_error: - self.logger.warning(f"Fixed JSON still invalid: {fix_error}") + except json.JSONDecodeError as fixError: + self.logger.warning(f"Fixed JSON still invalid: {fixError}") self.logger.warning(f"Fixed JSON content: {result}") # Try to extract just the JSON part if it's embedded in text - json_start = result.find('{') - json_end = result.rfind('}') - if json_start != -1 and json_end != -1 and json_end > json_start: - json_part = result[json_start:json_end+1] + jsonStart = result.find('{') + jsonEnd = result.rfind('}') + if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart: + jsonPart = result[jsonStart:jsonEnd+1] try: - styles = json.loads(json_part) + styles = json.loads(jsonPart) self.logger.info("Successfully extracted JSON from explanatory text") except json.JSONDecodeError: self.logger.warning("Could not extract valid JSON from response, using defaults") - return default_styles + return defaultStyles else: - return default_styles + return defaultStyles else: # Try to extract just the JSON part if it's embedded in text - json_start = result.find('{') - json_end = result.rfind('}') - if json_start != -1 and json_end != -1 and json_end > json_start: - json_part = result[json_start:json_end+1] + jsonStart = result.find('{') + jsonEnd = result.rfind('}') + if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart: + jsonPart = result[jsonStart:jsonEnd+1] try: - styles = json.loads(json_part) + styles = json.loads(jsonPart) self.logger.info("Successfully extracted JSON from explanatory text") except json.JSONDecodeError: self.logger.warning("Could not extract valid JSON from response, using defaults") - return default_styles + return defaultStyles else: - return default_styles + return defaultStyles # Convert colors to appropriate format - styles = self._convert_colors_format(styles) + styles = self._convertColorsFormat(styles) return styles except Exception as e: self.logger.warning(f"AI styling failed: {str(e)}, using defaults") - return default_styles + return defaultStyles - def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]: """ Convert colors to appropriate format based on renderer type. Override this method in subclasses for format-specific color handling. """ return styles - def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str: + def _createAiStyleTemplate(self, formatName: str, userPrompt: str, styleSchema: Dict[str, Any]) -> str: """ Create a standardized AI style template for any format. Args: - format_name: Name of the format (e.g., "docx", "xlsx", "pptx") - user_prompt: User's original prompt - style_schema: Format-specific style schema + formatName: Name of the format (e.g., "docx", "xlsx", "pptx") + userPrompt: User's original prompt + styleSchema: Format-specific style schema Returns: Formatted prompt string """ - schema_json = json.dumps(style_schema, indent=4) + schemaJson = json.dumps(styleSchema, indent=4) # DEBUG: Show the schema being sent - return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents. + return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {formatName.upper()} documents. Use this schema as a template and customize the values for professional document styling: -{schema_json} +{schemaJson} Requirements: - Return ONLY the complete JSON object (no markdown, no explanations) diff --git a/modules/services/serviceGeneration/renderers/rendererCsv.py b/modules/services/serviceGeneration/renderers/rendererCsv.py index 5ad2d4bc..3e1ef3d5 100644 --- a/modules/services/serviceGeneration/renderers/rendererCsv.py +++ b/modules/services/serviceGeneration/renderers/rendererCsv.py @@ -9,163 +9,163 @@ class RendererCsv(BaseRenderer): """Renders content to CSV format with format-specific extraction.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported CSV formats.""" return ['csv'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['spreadsheet', 'table'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for CSV renderer.""" return 70 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to CSV format.""" try: # Generate CSV directly from JSON (no styling needed for CSV) - csv_content = await self._generate_csv_from_json(extracted_content, title) + csvContent = await self._generateCsvFromJson(extractedContent, title) - return csv_content, "text/csv" + return csvContent, "text/csv" except Exception as e: self.logger.error(f"Error rendering CSV: {str(e)}") # Return minimal CSV fallback return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv" - async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str: + async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: """Generate CSV content from structured JSON document.""" try: # Validate JSON structure - if not isinstance(json_content, dict): + if not isinstance(jsonContent, dict): raise ValueError("JSON content must be a dictionary") - if "sections" not in json_content: + if "sections" not in jsonContent: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + documentTitle = jsonContent.get("metadata", {}).get("title", title) # Generate CSV content - csv_rows = [] + csvRows = [] # Add title row - if document_title: - csv_rows.append([document_title]) - csv_rows.append([]) # Empty row + if documentTitle: + csvRows.append([documentTitle]) + csvRows.append([]) # Empty row # Process each section in order - sections = json_content.get("sections", []) + sections = jsonContent.get("sections", []) for section in sections: - section_csv = self._render_json_section_to_csv(section) - if section_csv: - csv_rows.extend(section_csv) - csv_rows.append([]) # Empty row between sections + sectionCsv = self._renderJsonSectionToCsv(section) + if sectionCsv: + csvRows.extend(sectionCsv) + csvRows.append([]) # Empty row between sections # Convert to CSV string - csv_content = self._convert_rows_to_csv(csv_rows) + csvContent = self._convertRowsToCsv(csvRows) - return csv_content + return csvContent except Exception as e: self.logger.error(f"Error generating CSV from JSON: {str(e)}") raise Exception(f"CSV generation failed: {str(e)}") - def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]: + def _renderJsonSectionToCsv(self, section: Dict[str, Any]) -> List[List[str]]: """Render a single JSON section to CSV rows.""" try: - section_type = section.get("content_type", "paragraph") + sectionType = section.get("content_type", "paragraph") elements = section.get("elements", []) - csv_rows = [] + csvRows = [] # Add section title if available - section_title = section.get("title") - if section_title: - csv_rows.append([f"# {section_title}"]) + sectionTitle = section.get("title") + if sectionTitle: + csvRows.append([f"# {sectionTitle}"]) # Process each element in the section for element in elements: - if section_type == "table": - csv_rows.extend(self._render_json_table_to_csv(element)) - elif section_type == "list": - csv_rows.extend(self._render_json_list_to_csv(element)) - elif section_type == "heading": - csv_rows.extend(self._render_json_heading_to_csv(element)) - elif section_type == "paragraph": - csv_rows.extend(self._render_json_paragraph_to_csv(element)) - elif section_type == "code": - csv_rows.extend(self._render_json_code_to_csv(element)) + if sectionType == "table": + csvRows.extend(self._renderJsonTableToCsv(element)) + elif sectionType == "list": + csvRows.extend(self._renderJsonListToCsv(element)) + elif sectionType == "heading": + csvRows.extend(self._renderJsonHeadingToCsv(element)) + elif sectionType == "paragraph": + csvRows.extend(self._renderJsonParagraphToCsv(element)) + elif sectionType == "code": + csvRows.extend(self._renderJsonCodeToCsv(element)) else: # Fallback to paragraph for unknown types - csv_rows.extend(self._render_json_paragraph_to_csv(element)) + csvRows.extend(self._renderJsonParagraphToCsv(element)) - return csv_rows + return csvRows except Exception as e: self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}") return [["[Error rendering section]"]] - def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonTableToCsv(self, tableData: Dict[str, Any]) -> List[List[str]]: """Render a JSON table to CSV rows.""" try: - headers = table_data.get("headers", []) - rows = table_data.get("rows", []) + headers = tableData.get("headers", []) + rows = tableData.get("rows", []) - csv_rows = [] + csvRows = [] if headers: - csv_rows.append(headers) + csvRows.append(headers) if rows: - csv_rows.extend(rows) + csvRows.extend(rows) - return csv_rows + return csvRows except Exception as e: self.logger.warning(f"Error rendering table: {str(e)}") return [["[Error rendering table]"]] - def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonListToCsv(self, listData: Dict[str, Any]) -> List[List[str]]: """Render a JSON list to CSV rows.""" try: - items = list_data.get("items", []) - csv_rows = [] + items = listData.get("items", []) + csvRows = [] for item in items: if isinstance(item, dict): text = item.get("text", "") subitems = item.get("subitems", []) - csv_rows.append([text]) + csvRows.append([text]) # Add subitems as indented rows for subitem in subitems: if isinstance(subitem, dict): - csv_rows.append([f" - {subitem.get('text', '')}"]) + csvRows.append([f" - {subitem.get('text', '')}"]) else: - csv_rows.append([f" - {subitem}"]) + csvRows.append([f" - {subitem}"]) else: - csv_rows.append([str(item)]) + csvRows.append([str(item)]) - return csv_rows + return csvRows except Exception as e: self.logger.warning(f"Error rendering list: {str(e)}") return [["[Error rendering list]"]] - def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonHeadingToCsv(self, headingData: Dict[str, Any]) -> List[List[str]]: """Render a JSON heading to CSV rows.""" try: - text = heading_data.get("text", "") - level = heading_data.get("level", 1) + text = headingData.get("text", "") + level = headingData.get("level", 1) if text: # Use # symbols for heading levels - heading_text = f"{'#' * level} {text}" - return [[heading_text]] + headingText = f"{'#' * level} {text}" + return [[headingText]] return [] @@ -173,30 +173,30 @@ class RendererCsv(BaseRenderer): self.logger.warning(f"Error rendering heading: {str(e)}") return [["[Error rendering heading]"]] - def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonParagraphToCsv(self, paragraphData: Dict[str, Any]) -> List[List[str]]: """Render a JSON paragraph to CSV rows.""" try: - text = paragraph_data.get("text", "") + text = paragraphData.get("text", "") if text: # Split long paragraphs into multiple rows if needed if len(text) > 100: words = text.split() rows = [] - current_row = [] - current_length = 0 + currentRow = [] + currentLength = 0 for word in words: - if current_length + len(word) > 100 and current_row: - rows.append([" ".join(current_row)]) - current_row = [word] - current_length = len(word) + if currentLength + len(word) > 100 and currentRow: + rows.append([" ".join(currentRow)]) + currentRow = [word] + currentLength = len(word) else: - current_row.append(word) - current_length += len(word) + 1 + currentRow.append(word) + currentLength += len(word) + 1 - if current_row: - rows.append([" ".join(current_row)]) + if currentRow: + rows.append([" ".join(currentRow)]) return rows else: @@ -208,30 +208,30 @@ class RendererCsv(BaseRenderer): self.logger.warning(f"Error rendering paragraph: {str(e)}") return [["[Error rendering paragraph]"]] - def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]: + def _renderJsonCodeToCsv(self, codeData: Dict[str, Any]) -> List[List[str]]: """Render a JSON code block to CSV rows.""" try: - code = code_data.get("code", "") - language = code_data.get("language", "") + code = codeData.get("code", "") + language = codeData.get("language", "") - csv_rows = [] + csvRows = [] if language: - csv_rows.append([f"Code ({language}):"]) + csvRows.append([f"Code ({language}):"]) if code: # Split code into lines - code_lines = code.split('\n') - for line in code_lines: - csv_rows.append([f" {line}"]) + codeLines = code.split('\n') + for line in codeLines: + csvRows.append([f" {line}"]) - return csv_rows + return csvRows except Exception as e: self.logger.warning(f"Error rendering code block: {str(e)}") return [["[Error rendering code block]"]] - def _convert_rows_to_csv(self, rows: List[List[str]]) -> str: + def _convertRowsToCsv(self, rows: List[List[str]]) -> str: """Convert rows to CSV string.""" import csv import io @@ -245,7 +245,7 @@ class RendererCsv(BaseRenderer): return output.getvalue() - def _clean_csv_content(self, content: str, title: str) -> str: + def _cleanCsvContent(self, content: str, title: str) -> str: """Clean and validate CSV content from AI.""" content = content.strip() diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index 42bb71f3..6db48c32 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -21,33 +21,33 @@ class RendererDocx(BaseRenderer): """Renders content to DOCX format using python-docx.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported DOCX formats.""" return ['docx', 'doc'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['word', 'document'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for DOCX renderer.""" return 115 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to DOCX format using AI-analyzed styling.""" - self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...", "DOCX_RENDERER") + self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER") try: if not DOCX_AVAILABLE: # Fallback to HTML if python-docx not available from .rendererHtml import RendererHtml - html_renderer = RendererHtml() - html_content, _ = await html_renderer.render(extracted_content, title) - return html_content, "text/html" + htmlRenderer = RendererHtml() + htmlContent, _ = await htmlRenderer.render(extractedContent, title) + return htmlContent, "text/html" # Generate DOCX using AI-analyzed styling - docx_content = await self._generate_docx_from_json(extracted_content, title, user_prompt, ai_service) + docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService) return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document" @@ -56,18 +56,18 @@ class RendererDocx(BaseRenderer): # Return minimal fallback return f"DOCX Generation Error: {str(e)}", "text/plain" - async def _generate_docx_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate DOCX content from structured JSON document using AI-generated styling.""" try: # Create new document doc = Document() # Get AI-generated styling definitions - self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...") - styles = await self._get_docx_styles(user_prompt, ai_service) + self.logger.info(f"About to call AI styling with user_prompt: {userPrompt[:100] if userPrompt else 'None'}...") + styles = await self._getDocxStyles(userPrompt, aiService) # Apply basic document setup - self._setup_basic_document_styles(doc) + self._setupBasicDocumentStyles(doc) # Validate JSON structure if not isinstance(json_content, dict): @@ -104,7 +104,7 @@ class RendererDocx(BaseRenderer): self.logger.error(f"Error generating DOCX from JSON: {str(e)}") raise Exception(f"DOCX generation failed: {str(e)}") - async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: + async def _getDocxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]: """Get DOCX styling definitions using base template AI styling.""" style_schema = { "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"}, @@ -118,13 +118,13 @@ class RendererDocx(BaseRenderer): "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} } - style_template = self._create_ai_style_template("docx", user_prompt, style_schema) - styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles()) + style_template = self._createAiStyleTemplate("docx", userPrompt, style_schema) + styles = await self._getAiStyles(aiService, style_template, self._getDefaultStyles()) # Validate and fix contrast issues - return self._validate_styles_contrast(styles) + return self._validateStylesContrast(styles) - def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: + def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: """Validate and fix contrast issues in AI-generated styles.""" try: # Fix table header contrast @@ -159,9 +159,9 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Style validation failed: {str(e)}") - return self._get_default_styles() + return self._getDefaultStyles() - def _get_default_styles(self) -> Dict[str, Any]: + def _getDefaultStyles(self) -> Dict[str, Any]: """Default DOCX styles.""" return { "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"}, @@ -175,7 +175,7 @@ class RendererDocx(BaseRenderer): "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} } - def _setup_basic_document_styles(self, doc: Document) -> None: + def _setupBasicDocumentStyles(self, doc: Document) -> None: """Set up basic document styles.""" try: # Set default font @@ -189,7 +189,7 @@ class RendererDocx(BaseRenderer): - def _clear_template_content(self, doc: Document) -> None: + def _clearTemplateContent(self, doc: Document) -> None: """Clear template content while preserving styles.""" try: # Remove all paragraphs except keep the styles @@ -204,7 +204,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not clear template content: {str(e)}") - def _render_json_section(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a single JSON section to DOCX using AI-generated styles.""" try: section_type = section.get("content_type", "paragraph") @@ -213,27 +213,27 @@ class RendererDocx(BaseRenderer): # Process each element in the section for element in elements: if section_type == "table": - self._render_json_table(doc, element, styles) + self._renderJsonTable(doc, element, styles) elif section_type == "bullet_list": - self._render_json_bullet_list(doc, element, styles) + self._renderJsonBulletList(doc, element, styles) elif section_type == "heading": - self._render_json_heading(doc, element, styles) + self._renderJsonHeading(doc, element, styles) elif section_type == "paragraph": - self._render_json_paragraph(doc, element, styles) + self._renderJsonParagraph(doc, element, styles) elif section_type == "code_block": - self._render_json_code_block(doc, element, styles) + self._renderJsonCodeBlock(doc, element, styles) elif section_type == "image": - self._render_json_image(doc, element, styles) + self._renderJsonImage(doc, element, styles) else: # Fallback to paragraph for unknown types - self._render_json_paragraph(doc, element, styles) + self._renderJsonParagraph(doc, element, styles) except Exception as e: self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}") # Add error paragraph as fallback error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]") - def _render_json_table(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON table to DOCX using AI-generated styles.""" try: headers = table_data.get("headers", []) @@ -249,7 +249,7 @@ class RendererDocx(BaseRenderer): # Apply table borders based on AI style border_style = styles["table_border"]["style"] if border_style == "horizontal_only": - self._apply_horizontal_borders_only(table) + self._applyHorizontalBordersOnly(table) elif border_style == "grid": table.style = 'Table Grid' # else: no borders @@ -264,7 +264,7 @@ class RendererDocx(BaseRenderer): # Apply background color bg_color = header_style["background"].lstrip('#') - self._set_cell_background(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16))) + self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16))) # Apply text styling for paragraph in cell.paragraphs: @@ -296,7 +296,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering table: {str(e)}") - def _apply_horizontal_borders_only(self, table) -> None: + def _applyHorizontalBordersOnly(self, table) -> None: """Apply only horizontal borders to the table (no vertical borders).""" try: from docx.oxml.shared import OxmlElement, qn @@ -359,7 +359,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not apply horizontal borders: {str(e)}") - def _set_cell_background(self, cell, color: RGBColor) -> None: + def _setCellBackground(self, cell, color: RGBColor) -> None: """Set the background color of a table cell.""" try: from docx.oxml.shared import OxmlElement, qn @@ -389,7 +389,7 @@ class RendererDocx(BaseRenderer): self.logger.warning(f"Could not set cell background: {str(e)}") - def _render_json_bullet_list(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON bullet list to DOCX using AI-generated styles.""" try: items = list_data.get("items", []) @@ -404,7 +404,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering bullet list: {str(e)}") - def _render_json_heading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON heading to DOCX using AI-generated styles.""" try: level = heading_data.get("level", 1) @@ -417,7 +417,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering heading: {str(e)}") - def _render_json_paragraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON paragraph to DOCX using AI-generated styles.""" try: text = paragraph_data.get("text", "") @@ -428,7 +428,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering paragraph: {str(e)}") - def _render_json_code_block(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON code block to DOCX using AI-generated styles.""" try: code = code_data.get("code", "") @@ -447,7 +447,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Error rendering code block: {str(e)}") - def _render_json_image(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None: + def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None: """Render a JSON image to DOCX.""" try: base64_data = image_data.get("base64Data", "") @@ -465,7 +465,7 @@ class RendererDocx(BaseRenderer): self.logger.warning(f"Error rendering image: {str(e)}") doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]") - def _extract_structure_from_prompt(self, user_prompt: str, title: str) -> Dict[str, Any]: + def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]: """Extract document structure from user prompt.""" structure = { 'title': title, @@ -473,21 +473,21 @@ class RendererDocx(BaseRenderer): 'format': 'standard' } - if not user_prompt: + if not userPrompt: return structure # Extract title from prompt if not provided if not title or title == "Generated Document": # Look for "create a ... document" or "generate a ... report" import re - title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', user_prompt.lower()) + title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower()) if title_match: structure['title'] = title_match.group(1).strip().title() # Extract sections from numbered lists in prompt import re section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)' - sections = re.findall(section_pattern, user_prompt) + sections = re.findall(section_pattern, userPrompt) for num, section_text in sections: structure['sections'].append({ @@ -498,7 +498,7 @@ class RendererDocx(BaseRenderer): # If no numbered sections found, try to extract from "including:" patterns if not structure['sections']: - including_match = re.search(r'including:\s*(.+?)(?:\.|$)', user_prompt, re.DOTALL) + including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL) if including_match: including_text = including_match.group(1) # Split by common separators @@ -516,7 +516,7 @@ class RendererDocx(BaseRenderer): if not structure['sections']: # Look for bullet points or dashes bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)' - bullets = re.findall(bullet_pattern, user_prompt) + bullets = re.findall(bullet_pattern, userPrompt) for i, bullet in enumerate(bullets, 1): bullet = bullet.strip() if bullet and len(bullet) > 3: @@ -529,7 +529,7 @@ class RendererDocx(BaseRenderer): # If still no sections, extract from sentence structure if not structure['sections']: # Split prompt into sentences and use as sections - sentences = re.split(r'[.!?]\s+', user_prompt) + sentences = re.split(r'[.!?]\s+', userPrompt) for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections sentence = sentence.strip() if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')): @@ -545,7 +545,7 @@ class RendererDocx(BaseRenderer): action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate'] found_actions = [] for action in action_words: - if action in user_prompt.lower(): + if action in userPrompt.lower(): found_actions.append(action.title()) if found_actions: @@ -565,7 +565,7 @@ class RendererDocx(BaseRenderer): return structure - def _generate_from_structure(self, doc, content: str, structure: Dict[str, Any]): + def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]): """Generate DOCX content based on extracted structure.""" # Add sections based on prompt structure for section in structure['sections']: @@ -574,7 +574,7 @@ class RendererDocx(BaseRenderer): # Add AI-generated content for this section # Try to extract relevant content for this section from the AI response - section_content = self._extract_section_content(content, section['title']) + section_content = self._extractSectionContent(content, section['title']) if section_content: doc.add_paragraph(section_content) @@ -590,7 +590,7 @@ class RendererDocx(BaseRenderer): doc.add_heading("Complete Analysis", level=1) doc.add_paragraph(content) - def _extract_section_content(self, content: str, section_title: str) -> str: + def _extractSectionContent(self, content: str, section_title: str) -> str: """Extract relevant content for a specific section from AI response.""" if not content or not section_title: return "" @@ -613,7 +613,7 @@ class RendererDocx(BaseRenderer): return "" - def _setup_document_styles(self, doc): + def _setupDocumentStyles(self, doc): """Set up document styles.""" try: # Set default font @@ -632,7 +632,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not set up document styles: {str(e)}") - def _process_section(self, doc, lines: list): + def _processSection(self, doc, lines: list): """Process a section of content into DOCX elements.""" for line in lines: if not line.strip(): @@ -641,9 +641,9 @@ class RendererDocx(BaseRenderer): # Check for tables (lines with |) if '|' in line and not line.startswith('|'): # This might be part of a table, process as table - table_data = self._extract_table_data(lines) + table_data = self._extractTableData(lines) if table_data: - self._add_table(doc, table_data) + self._addTable(doc, table_data) return # Check for lists @@ -657,7 +657,7 @@ class RendererDocx(BaseRenderer): # Regular paragraph doc.add_paragraph(line) - def _extract_table_data(self, lines: list) -> list: + def _extractTableData(self, lines: list) -> list: """Extract table data from lines.""" table_data = [] in_table = False @@ -676,7 +676,7 @@ class RendererDocx(BaseRenderer): return table_data if len(table_data) > 1 else [] - def _add_table(self, doc, table_data: list): + def _addTable(self, doc, table_data: list): """Add a table to the document.""" try: if not table_data: @@ -693,12 +693,12 @@ class RendererDocx(BaseRenderer): table.rows[row_idx].cells[col_idx].text = cell_data # Style the table - self._style_table(table) + self._styleTable(table) except Exception as e: self.logger.warning(f"Could not add table: {str(e)}") - def _style_table(self, table): + def _styleTable(self, table): """Apply styling to the table.""" try: # Style header row @@ -711,7 +711,7 @@ class RendererDocx(BaseRenderer): except Exception as e: self.logger.warning(f"Could not style table: {str(e)}") - def _process_table_row(self, doc, line: str): + def _processTableRow(self, doc, line: str): """Process a table row and add it to the document.""" if not line.strip(): return @@ -745,7 +745,7 @@ class RendererDocx(BaseRenderer): # Not a table row, treat as regular text doc.add_paragraph(line) - def _clean_ai_content(self, content: str) -> str: + def _cleanAiContent(self, content: str) -> str: """Clean AI-generated content by removing debug information and duplicates.""" if not content: return "" @@ -781,7 +781,7 @@ class RendererDocx(BaseRenderer): return '\n\n'.join(unique_sections) - def _process_tables(self, doc, content: str) -> str: + def _processTables(self, doc, content: str) -> str: """ Process tables in the content (both CSV and pipe-separated) and convert them to Word tables. Returns the content with tables replaced by placeholders. @@ -864,13 +864,13 @@ class RendererDocx(BaseRenderer): return '\n'.join(processed_lines) - def _parse_and_format_content(self, doc, content: str, title: str): + def _parseAndFormatContent(self, doc, content: str, title: str): """Parse AI-generated content in standardized format and apply proper DOCX formatting.""" if not content: return # Process tables and replace them with placeholders - content = self._process_tables(doc, content) + content = self._processTables(doc, content) # Parse content line by line in exact sequence lines = content.split('\n') @@ -920,9 +920,9 @@ class RendererDocx(BaseRenderer): # Regular paragraph else: - self._add_paragraph_to_doc(doc, line) + self._addParagraphToDoc(doc, line) - def _add_paragraph_to_doc(self, doc, text: str): + def _addParagraphToDoc(self, doc, text: str): """Add a paragraph to the document with proper formatting.""" if not text.strip(): return diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 1b202886..660a16c2 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -9,97 +9,97 @@ class RendererHtml(BaseRenderer): """Renders content to HTML format with format-specific extraction.""" @classmethod - def get_supported_formats(cls) -> List[str]: + def getSupportedFormats(cls) -> List[str]: """Return supported HTML formats.""" return ['html', 'htm'] @classmethod - def get_format_aliases(cls) -> List[str]: + def getFormatAliases(cls) -> List[str]: """Return format aliases.""" return ['web', 'webpage'] @classmethod - def get_priority(cls) -> int: + def getPriority(cls) -> int: """Return priority for HTML renderer.""" return 100 - async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: """Render extracted JSON content to HTML format using AI-analyzed styling.""" try: # Generate HTML using AI-analyzed styling - html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service) + htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) - return html_content, "text/html" + return htmlContent, "text/html" except Exception as e: self.logger.error(f"Error rendering HTML: {str(e)}") # Return minimal HTML fallback return f"
Error rendering report: {str(e)}
", "text/html" - async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: + async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: """Generate HTML content from structured JSON document using AI-generated styling.""" try: # Get AI-generated styling definitions - styles = await self._get_html_styles(user_prompt, ai_service) + styles = await self._getHtmlStyles(userPrompt, aiService) # Validate JSON structure - if not isinstance(json_content, dict): + if not isinstance(jsonContent, dict): raise ValueError("JSON content must be a dictionary") - if "sections" not in json_content: + if "sections" not in jsonContent: raise ValueError("JSON content must contain 'sections' field") # Use title from JSON metadata if available, otherwise use provided title - document_title = json_content.get("metadata", {}).get("title", title) + documentTitle = jsonContent.get("metadata", {}).get("title", title) # Build HTML document - html_parts = [] + htmlParts = [] # HTML document structure - html_parts.append('') - html_parts.append('') - html_parts.append('') - html_parts.append('') - html_parts.append('') - html_parts.append(f'| {header} | ') - html_parts.append('{header} | ') + htmlParts.append('
|---|---|
| {cell_data} | ') - html_parts.append('|
| {cellData} | ') + htmlParts.append('
{text}
' @@ -408,11 +408,11 @@ class RendererHtml(BaseRenderer): self.logger.warning(f"Error rendering paragraph: {str(e)}") return "" - def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON code block to HTML using AI-generated styles.""" try: - code = code_data.get("code", "") - language = code_data.get("language", "") + code = codeData.get("code", "") + language = codeData.get("language", "") if code: if language: @@ -426,17 +426,17 @@ class RendererHtml(BaseRenderer): self.logger.warning(f"Error rendering code block: {str(e)}") return "" - def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str: + def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON image to HTML.""" try: - base64_data = image_data.get("base64Data", "") - alt_text = image_data.get("altText", "Image") + base64Data = imageData.get("base64Data", "") + altText = imageData.get("altText", "Image") - if base64_data: - return f'