From c44fc92568b1ebd1c3891447206c5d47f448e1c7 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Fri, 31 Oct 2025 00:05:39 +0100
Subject: [PATCH] refactored whole codebase for camelCase part 1 of 2
---
analyze_naming_violations.py | 242 ++++
app.py | 62 +-
modules/connectors/connectorDbJson.py | 4 +-
modules/connectors/connectorDbPostgre.py | 6 +-
modules/connectors/connectorTicketsClickup.py | 6 +-
modules/connectors/connectorTicketsJira.py | 6 +-
modules/connectors/connectorVoiceGoogle.py | 100 +-
modules/datamodels/datamodelChat.py | 64 +-
modules/datamodels/datamodelDocument.py | 6 +-
modules/datamodels/datamodelFiles.py | 12 +-
modules/datamodels/datamodelJson.py | 90 ++
modules/datamodels/datamodelNeutralizer.py | 6 +-
modules/datamodels/datamodelSecurity.py | 10 +-
modules/datamodels/datamodelTickets.py | 6 +-
modules/datamodels/datamodelUam.py | 16 +-
modules/datamodels/datamodelUtils.py | 4 +-
modules/datamodels/datamodelVoice.py | 10 +-
.../mainNeutralizePlayground.py | 56 +-
modules/features/syncDelta/mainSyncDelta.py | 58 +-
modules/interfaces/interfaceAiObjects.py | 2 -
modules/interfaces/interfaceDbAppObjects.py | 20 +-
modules/interfaces/interfaceDbChatObjects.py | 138 ++-
.../interfaces/interfaceDbComponentObjects.py | 12 +-
modules/interfaces/interfaceTicketObjects.py | 4 +-
modules/interfaces/interfaceVoiceObjects.py | 6 +-
modules/routes/routeDataConnections.py | 8 +-
modules/routes/routeDataNeutralization.py | 18 +-
modules/routes/routeDataUsers.py | 12 +-
modules/routes/routeSecurityGoogle.py | 28 +-
modules/routes/routeSecurityLocal.py | 24 +-
modules/routes/routeSecurityMsft.py | 46 +-
modules/routes/routeVoiceGoogle.py | 238 ++--
modules/security/jwtService.py | 6 +-
modules/security/tokenManager.py | 176 +--
modules/security/tokenRefreshMiddleware.py | 6 +-
modules/security/tokenRefreshService.py | 36 +-
modules/services/__init__.py | 16 +-
modules/services/serviceAi/mainServiceAi.py | 792 ++++++++++--
modules/services/serviceAi/subCoreAi.py | 687 -----------
.../serviceAi/subDocumentGeneration.py | 500 --------
.../serviceAi/subDocumentProcessing.py | 1094 +----------------
.../services/serviceAi/subSharedAiUtils.py | 165 ---
.../mainServiceExtraction.py | 23 +-
.../services/serviceExtraction/subMerger.py | 178 +--
.../services/serviceExtraction/subPipeline.py | 4 +-
.../subPromptBuilderExtraction.py | 2 +-
.../mainServiceGeneration.py | 82 +-
.../serviceGeneration/renderers/registry.py | 96 +-
.../renderers/rendererBaseTemplate.py | 320 ++---
.../renderers/rendererCsv.py | 164 +--
.../renderers/rendererDocx.py | 132 +-
.../renderers/rendererHtml.py | 240 ++--
.../renderers/rendererImage.py | 202 +--
.../renderers/rendererJson.py | 22 +-
.../renderers/rendererMarkdown.py | 148 +--
.../renderers/rendererPdf.py | 124 +-
.../renderers/rendererPptx.py | 122 +-
.../renderers/rendererText.py | 168 +--
.../renderers/rendererXlsx.py | 390 +++---
.../serviceGeneration/subJsonSchema.py | 113 +-
.../subPromptBuilderGeneration.py | 165 +--
.../mainServiceNeutralization.py | 14 +-
.../serviceNeutralization/subParseString.py | 58 +-
.../serviceNeutralization/subPatterns.py | 14 +-
.../serviceNeutralization/subProcessBinary.py | 14 +-
.../serviceNeutralization/subProcessCommon.py | 20 +-
.../serviceNeutralization/subProcessList.py | 128 +-
.../serviceNeutralization/subProcessText.py | 36 +-
.../mainServiceSharepoint.py | 234 ++--
.../services/serviceUtils/mainServiceUtils.py | 73 +-
modules/shared/attributeUtils.py | 32 +-
modules/shared/auditLogger.py | 92 +-
modules/shared/configuration.py | 216 ++--
modules/shared/debugLogger.py | 4 +-
modules/shared/jsonUtils.py | 144 +--
modules/shared/timezoneUtils.py | 10 +-
.../adaptive/adaptiveLearningEngine.py | 60 +-
.../processing/adaptive/contentValidator.py | 4 +-
.../processing/adaptive/intentAnalyzer.py | 2 +-
.../processing/modes/modeActionplan.py | 4 +-
.../workflows/processing/modes/modeReact.py | 15 +-
modules/workflows/workflowManager.py | 2 +-
naming_violations_report.csv | 107 ++
...ocumentsWithContinuation_usage_analysis.md | 184 +++
tool_security_encrypt_all_env_files.py | 4 +-
tool_security_encrypt_config_value.py | 10 +-
86 files changed, 3969 insertions(+), 5005 deletions(-)
create mode 100644 analyze_naming_violations.py
create mode 100644 modules/datamodels/datamodelJson.py
delete mode 100644 modules/services/serviceAi/subCoreAi.py
delete mode 100644 modules/services/serviceAi/subDocumentGeneration.py
delete mode 100644 modules/services/serviceAi/subSharedAiUtils.py
create mode 100644 naming_violations_report.csv
create mode 100644 processDocumentsWithContinuation_usage_analysis.md
diff --git a/analyze_naming_violations.py b/analyze_naming_violations.py
new file mode 100644
index 00000000..a4f9b30f
--- /dev/null
+++ b/analyze_naming_violations.py
@@ -0,0 +1,242 @@
+"""
+Script to analyze codebase for snake_case naming violations that should be camelStyle.
+Excludes routes (decorated endpoint functions) and JSON field names.
+"""
+import ast
+import os
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List, Tuple
+import csv
+
+# Patterns to exclude (external library interfaces, etc.)
+EXCLUDE_PATTERNS = [
+ r'@.*\.(get|post|put|delete|patch|options|head)', # FastAPI route decorators
+ r'self\.(db|db_|model|orm)', # Database ORM attributes
+ r'\.(objects|query|filter|get|all)', # ORM methods
+ r'(request|response|response_model|status_code)', # FastAPI params
+ r'(snake_case|kebab-case)', # String literals
+]
+
+# External library attribute patterns (should not be changed)
+EXTERNAL_LIB_ATTRIBUTES = {
+ 'pydantic', 'fastapi', 'sqlalchemy', 'psycopg', 'requests',
+ 'aiohttp', 'azure', 'google', 'openai', 'anthropic', 'reportlab',
+ 'docx', 'pptx', 'openpyxl', 'json', 'logging', 'datetime', 'typing'
+}
+
+def isRouteFile(filePath: str) -> bool:
+ """Check if file is a route file"""
+ return 'routes' in filePath or 'route' in os.path.basename(filePath).lower()
+
+def shouldExcludeName(name: str, context: str = "") -> bool:
+ """Check if a name should be excluded from analysis"""
+ # Skip if it's a builtin or external library attribute
+ if name.startswith('__') and name.endswith('__'):
+ return True
+
+ # Skip if context suggests external library usage
+ for pattern in EXCLUDE_PATTERNS:
+ if re.search(pattern, context, re.IGNORECASE):
+ return True
+
+ return False
+
+def isSnakeCase(name: str) -> bool:
+ """Check if a name is snake_case"""
+ if not name or name.startswith('_'):
+ return False
+ # Check if contains underscore and is not all caps
+ return '_' in name and not name.isupper()
+
+def analyzeFile(filePath: str) -> Dict[str, List[str]]:
+ """Analyze a Python file for naming violations"""
+ violations = {
+ 'functions': [],
+ 'parameters': [],
+ 'variables': []
+ }
+
+ try:
+ with open(filePath, 'r', encoding='utf-8') as f:
+ content = f.read()
+ tree = ast.parse(content, filename=filePath)
+ except (SyntaxError, UnicodeDecodeError):
+ return violations
+
+ # Track current context
+ currentClass = None
+ inRouteDecorator = False
+
+ class NamingAnalyzer(ast.NodeVisitor):
+ def __init__(self):
+ self.violations = violations
+ self.currentClass = None
+ self.inRouteDecorator = False
+ self.functionDefs = []
+
+ def visit_FunctionDef(self, node):
+ # Check if this is a route endpoint (has FastAPI decorator)
+ isRouteEndpoint = False
+ for decorator in node.decorator_list:
+ if isinstance(decorator, ast.Attribute):
+ if decorator.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
+ isRouteEndpoint = True
+ break
+ elif isinstance(decorator, ast.Call):
+ if isinstance(decorator.func, ast.Attribute):
+ if decorator.func.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
+ isRouteEndpoint = True
+ break
+
+ # Skip route endpoint function names
+ # But we still need to check their parameters and variables
+ funcName = node.name
+ if not isRouteEndpoint and isSnakeCase(funcName) and not shouldExcludeName(funcName):
+ self.violations['functions'].append(f"{funcName} (line {node.lineno})")
+
+ # Analyze parameters
+ for arg in node.args.args:
+ if arg.arg != 'self' and arg.arg != 'cls':
+ paramName = arg.arg
+ if isSnakeCase(paramName) and not shouldExcludeName(paramName):
+ self.violations['parameters'].append(f"{paramName} in {funcName} (line {node.lineno})")
+
+ # Analyze function body for local variables
+ for stmt in node.body:
+ self.visit(stmt)
+
+ def visit_ClassDef(self, node):
+ oldClass = self.currentClass
+ self.currentClass = node.name
+ self.generic_visit(node)
+ self.currentClass = oldClass
+
+ def visit_Assign(self, node):
+ for target in node.targets:
+ if isinstance(target, ast.Name):
+ varName = target.id
+ # Skip constants (ALL_CAPS), builtins, and private (_xxx)
+ if varName.isupper() or varName.startswith('_'):
+ continue
+ # Local variables should be camelStyle
+ if isSnakeCase(varName) and not shouldExcludeName(varName):
+ self.violations['variables'].append(f"{varName} (line {node.lineno})")
+
+ def visit_For(self, node):
+ if isinstance(node.target, ast.Name):
+ varName = node.target.id
+ if isSnakeCase(varName) and not shouldExcludeName(varName):
+ self.violations['variables'].append(f"{varName} (line {node.lineno})")
+ self.generic_visit(node)
+
+ def visit_With(self, node):
+ if node.items:
+ for item in node.items:
+ if item.optional_vars:
+ if isinstance(item.optional_vars, ast.Name):
+ varName = item.optional_vars.id
+ if isSnakeCase(varName) and not shouldExcludeName(varName):
+ self.violations['variables'].append(f"{varName} (line {node.lineno})")
+ self.generic_visit(node)
+
+ analyzer = NamingAnalyzer()
+ analyzer.visit(tree)
+
+ return violations
+
+def analyzeCodebase(rootDir: str = 'gateway') -> Dict[str, Dict[str, int]]:
+ """Analyze entire codebase"""
+ results = defaultdict(lambda: {
+ 'functions': 0,
+ 'parameters': 0,
+ 'variables': 0,
+ 'details': {
+ 'functions': [],
+ 'parameters': [],
+ 'variables': []
+ }
+ })
+
+ # Handle both absolute and relative paths
+ rootPath = Path(rootDir)
+ if not rootPath.exists():
+ # Try relative to current directory
+ rootPath = Path('.').resolve() / rootDir
+ if not rootPath.exists():
+ # Try just current directory if we're already in gateway
+ rootPath = Path('.')
+
+ # Find all Python files
+ for pyFile in rootPath.rglob('*.py'):
+ # Skip route files for function name analysis (but analyze their internals)
+ filePath = str(pyFile.relative_to(rootPath))
+
+ # Skip test files and special scripts
+ if 'test' in filePath.lower() or 'tool_' in filePath or '__pycache__' in filePath:
+ continue
+
+ violations = analyzeFile(str(pyFile))
+
+ # Check if there are any violations
+ totalViolations = len(violations['functions']) + len(violations['parameters']) + len(violations['variables'])
+ if totalViolations > 0:
+ moduleName = filePath.replace('\\', '/')
+ results[moduleName]['functions'] = len(violations['functions'])
+ results[moduleName]['parameters'] = len(violations['parameters'])
+ results[moduleName]['variables'] = len(violations['variables'])
+ results[moduleName]['details'] = violations
+
+ return results
+
+def generateCSV(results: Dict[str, Dict[str, int]], outputFile: str = 'naming_violations.csv'):
+ """Generate CSV report"""
+ with open(outputFile, 'w', newline='', encoding='utf-8') as f:
+ writer = csv.writer(f)
+ writer.writerow(['Module', 'Function Names', 'Parameter Names', 'Variable Names', 'Total'])
+
+ # Sort by total violations
+ sortedResults = sorted(
+ results.items(),
+ key=lambda x: x[1]['functions'] + x[1]['parameters'] + x[1]['variables'],
+ reverse=True
+ )
+
+ rowsWritten = 0
+ for module, stats in sortedResults:
+ total = stats['functions'] + stats['parameters'] + stats['variables']
+ if total > 0:
+ writer.writerow([
+ module,
+ stats['functions'],
+ stats['parameters'],
+ stats['variables'],
+ total
+ ])
+ rowsWritten += 1
+
+ if rowsWritten == 0:
+ print("WARNING: No rows written to CSV despite finding violations!")
+
+ print(f"CSV report generated: {outputFile}")
+ print(f"Total modules analyzed: {len(results)}")
+
+ # Print summary
+ totalFuncs = sum(r['functions'] for r in results.values())
+ totalParams = sum(r['parameters'] for r in results.values())
+ totalVars = sum(r['variables'] for r in results.values())
+ print(f"\nSummary:")
+ print(f" Function names: {totalFuncs}")
+ print(f" Parameter names: {totalParams}")
+ print(f" Variable names: {totalVars}")
+ print(f" Total violations: {totalFuncs + totalParams + totalVars}")
+
+if __name__ == '__main__':
+ print("Analyzing codebase for naming violations...")
+ results = analyzeCodebase('gateway')
+
+ # Write CSV to gateway directory
+ outputPath = Path('gateway') / 'naming_violations_report.csv'
+ generateCSV(results, str(outputPath))
+
diff --git a/app.py b/app.py
index e91a7892..d5254f9c 100644
--- a/app.py
+++ b/app.py
@@ -24,45 +24,45 @@ class DailyRotatingFileHandler(RotatingFileHandler):
"""
def __init__(
- self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs
+ self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs
):
- self.log_dir = log_dir
- self.filename_prefix = filename_prefix
- self.current_date = None
- self.current_file = None
+ self.logDir = logDir
+ self.filenamePrefix = filenamePrefix
+ self.currentDate = None
+ self.currentFile = None
# Initialize with today's file
- self._update_file_if_needed()
+ self._updateFileIfNeeded()
# Call parent constructor with current file
super().__init__(
- self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs
+ self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs
)
- def _update_file_if_needed(self):
+ def _updateFileIfNeeded(self):
"""Update the log file if the date has changed"""
today = datetime.now().strftime("%Y%m%d")
- if self.current_date != today:
- self.current_date = today
- new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
+ if self.currentDate != today:
+ self.currentDate = today
+ newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
- if self.current_file != new_file:
- self.current_file = new_file
+ if self.currentFile != newFile:
+ self.currentFile = newFile
return True
return False
def emit(self, record):
"""Emit a log record, switching files if date has changed"""
# Check if we need to switch to a new file
- if self._update_file_if_needed():
+ if self._updateFileIfNeeded():
# Close current file and open new one
if self.stream:
self.stream.close()
self.stream = None
# Update the baseFilename for the parent class
- self.baseFilename = self.current_file
+ self.baseFilename = self.currentFile
# Reopen the stream
if not self.delay:
self.stream = self._open()
@@ -200,10 +200,10 @@ def initLogging():
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
fileHandler = DailyRotatingFileHandler(
- log_dir=logDir,
- filename_prefix="log_app",
- max_bytes=rotationSize,
- backup_count=backupCount,
+ logDir=logDir,
+ filenamePrefix="log_app",
+ maxBytes=rotationSize,
+ backupCount=backupCount,
encoding="utf-8",
)
fileHandler.setFormatter(fileFormatter)
@@ -252,7 +252,7 @@ def initLogging():
)
-def make_sqlalchemy_db_url() -> str:
+def makeSqlalchemyDbUrl() -> str:
host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost")
port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432")
db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway")
@@ -299,17 +299,17 @@ app = FastAPI(
# Configure OpenAPI security scheme for Swagger UI
# This adds the "Authorize" button to the /docs page
-security_scheme = HTTPBearer()
+securityScheme = HTTPBearer()
app.openapi_schema = None # Reset schema to regenerate with security
-def custom_openapi():
+def customOpenapi():
if app.openapi_schema:
return app.openapi_schema
from fastapi.openapi.utils import get_openapi
- openapi_schema = get_openapi(
+ openapiSchema = get_openapi(
title=app.title,
version="1.0.0",
description=app.description,
@@ -317,7 +317,7 @@ def custom_openapi():
)
# Add security scheme definition
- openapi_schema["components"]["securitySchemes"] = {
+ openapiSchema["components"]["securitySchemes"] = {
"BearerAuth": {
"type": "http",
"scheme": "bearer",
@@ -328,20 +328,20 @@ def custom_openapi():
# Apply security globally to all endpoints
# Individual endpoints can override this if needed
- openapi_schema["security"] = [{"BearerAuth": []}]
+ openapiSchema["security"] = [{"BearerAuth": []}]
- app.openapi_schema = openapi_schema
+ app.openapi_schema = openapiSchema
return app.openapi_schema
-app.openapi = custom_openapi
+app.openapi = customOpenapi
# Parse CORS origins from environment variable
-def get_allowed_origins():
- origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
+def getAllowedOrigins():
+ originsStr = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
# Split by comma and strip whitespace
- origins = [origin.strip() for origin in origins_str.split(",")]
+ origins = [origin.strip() for origin in originsStr.split(",")]
logger.info(f"CORS allowed origins: {origins}")
return origins
@@ -349,7 +349,7 @@ def get_allowed_origins():
# CORS configuration using environment variables
app.add_middleware(
CORSMiddleware,
- allow_origins=get_allowed_origins(),
+ allow_origins=getAllowedOrigins(),
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allow_headers=["*"],
diff --git a/modules/connectors/connectorDbJson.py b/modules/connectors/connectorDbJson.py
index 999814db..9ad73e8c 100644
--- a/modules/connectors/connectorDbJson.py
+++ b/modules/connectors/connectorDbJson.py
@@ -7,7 +7,7 @@ from pydantic import BaseModel
import threading
import time
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
@@ -232,7 +232,7 @@ class DatabaseConnector:
raise ValueError(f"Record ID mismatch: file name ID ({recordId}) does not match record ID ({record['id']})")
# Add metadata
- currentTime = get_utc_timestamp()
+ currentTime = getUtcTimestamp()
if "_createdAt" not in record:
record["_createdAt"] = currentTime
record["_createdBy"] = self.userId
diff --git a/modules/connectors/connectorDbPostgre.py b/modules/connectors/connectorDbPostgre.py
index ef937a7c..58d17b66 100644
--- a/modules/connectors/connectorDbPostgre.py
+++ b/modules/connectors/connectorDbPostgre.py
@@ -6,7 +6,7 @@ import uuid
from pydantic import BaseModel, Field
import threading
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
@@ -287,7 +287,7 @@ class DatabaseConnector:
INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt")
VALUES (%s, %s, %s)
""",
- (table_name, initial_id, get_utc_timestamp()),
+ (table_name, initial_id, getUtcTimestamp()),
)
self.connection.commit()
@@ -611,7 +611,7 @@ class DatabaseConnector:
raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}")
# Add metadata
- currentTime = get_utc_timestamp()
+ currentTime = getUtcTimestamp()
if "_createdAt" not in record:
record["_createdAt"] = currentTime
record["_createdBy"] = self.userId
diff --git a/modules/connectors/connectorTicketsClickup.py b/modules/connectors/connectorTicketsClickup.py
index 7d92f54a..6d22c93b 100644
--- a/modules/connectors/connectorTicketsClickup.py
+++ b/modules/connectors/connectorTicketsClickup.py
@@ -32,7 +32,7 @@ class ConnectorTicketClickup(TicketBase):
"Content-Type": "application/json",
}
- async def read_attributes(self) -> list[TicketFieldAttribute]:
+ async def readAttributes(self) -> list[TicketFieldAttribute]:
"""Fetch field attributes. Uses list custom fields if listId provided; else basic fields."""
attributes: list[TicketFieldAttribute] = []
try:
@@ -65,7 +65,7 @@ class ConnectorTicketClickup(TicketBase):
logger.error(f"ClickUp read_attributes error: {e}")
return attributes
- async def read_tasks(self, *, limit: int = 0) -> list[dict]:
+ async def readTasks(self, *, limit: int = 0) -> list[dict]:
"""Read tasks from ClickUp, always returning full task records.
If list_id is set, read from that list; otherwise read from team.
"""
@@ -102,7 +102,7 @@ class ConnectorTicketClickup(TicketBase):
logger.error(f"ClickUp read_tasks error: {e}")
return tasks
- async def write_tasks(self, tasklist: list[dict]) -> None:
+ async def writeTasks(self, tasklist: list[dict]) -> None:
"""Update tasks in ClickUp. Expects each item to contain {'ID' or 'id' or 'task_id', 'fields': {...}}"""
try:
async with aiohttp.ClientSession() as session:
diff --git a/modules/connectors/connectorTicketsJira.py b/modules/connectors/connectorTicketsJira.py
index eb665036..27fbc676 100644
--- a/modules/connectors/connectorTicketsJira.py
+++ b/modules/connectors/connectorTicketsJira.py
@@ -29,7 +29,7 @@ class ConnectorTicketJira(TicketBase):
self.ticketType = ticketType
- async def read_attributes(self) -> list[TicketFieldAttribute]:
+ async def readAttributes(self) -> list[TicketFieldAttribute]:
"""
Read field attributes from Jira by querying for a single issue
and extracting the field mappings.
@@ -130,7 +130,7 @@ class ConnectorTicketJira(TicketBase):
logger.error(f"Error while calling fields API: {str(e)}")
return []
- async def read_tasks(self, *, limit: int = 0) -> list[dict]:
+ async def readTasks(self, *, limit: int = 0) -> list[dict]:
"""
Read tasks from Jira with pagination support.
@@ -253,7 +253,7 @@ class ConnectorTicketJira(TicketBase):
logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}")
raise
- async def write_tasks(self, tasklist: list[dict]) -> None:
+ async def writeTasks(self, tasklist: list[dict]) -> None:
"""
Write/update tasks to Jira.
diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py
index 2bf4aff3..5cb43f35 100644
--- a/modules/connectors/connectorVoiceGoogle.py
+++ b/modules/connectors/connectorVoiceGoogle.py
@@ -26,18 +26,18 @@ class ConnectorGoogleSpeech:
"""
try:
# Get JSON key from config.ini
- api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
+ apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
- if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
+ if not apiKey or apiKey == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key")
# Parse the JSON key and set up authentication
try:
- credentials_info = json.loads(api_key)
+ credentialsInfo = json.loads(apiKey)
# Create credentials object directly (no file needed!)
from google.oauth2 import service_account
- credentials = service_account.Credentials.from_service_account_info(credentials_info)
+ credentials = service_account.Credentials.from_service_account_info(credentialsInfo)
logger.info("✅ Using Google Speech credentials from config.ini")
@@ -55,8 +55,8 @@ class ConnectorGoogleSpeech:
logger.error(f"❌ Failed to initialize Google Cloud clients: {e}")
raise
- async def speech_to_text(self, audio_content: bytes, language: str = "de-DE",
- sample_rate: int = None, channels: int = None) -> Dict:
+ async def speech_to_text(self, audioContent: bytes, language: str = "de-DE",
+ sampleRate: int = None, channels: int = None) -> Dict:
"""
Convert speech to text using Google Cloud Speech-to-Text API.
@@ -71,8 +71,8 @@ class ConnectorGoogleSpeech:
"""
try:
# Auto-detect audio format if not provided
- if sample_rate is None or channels is None:
- validation = self.validate_audio_format(audio_content)
+ if sampleRate is None or channels is None:
+ validation = self.validate_audio_format(audioContent)
if not validation["valid"]:
return {
"success": False,
@@ -80,59 +80,59 @@ class ConnectorGoogleSpeech:
"confidence": 0.0,
"error": f"Invalid audio format: {validation.get('error', 'Unknown error')}"
}
- sample_rate = validation["sample_rate"]
+ sampleRate = validation["sample_rate"]
channels = validation["channels"]
- audio_format = validation["format"]
- logger.info(f"Auto-detected audio: {audio_format}, {sample_rate}Hz, {channels}ch")
+ audioFormat = validation["format"]
+ logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch")
logger.info(f"Processing audio with Google Cloud Speech-to-Text")
- logger.info(f"Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch")
+ logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch")
# Configure audio settings
- audio = speech.RecognitionAudio(content=audio_content)
+ audio = speech.RecognitionAudio(content=audioContent)
# Determine encoding based on detected format
# Google Cloud Speech API has specific requirements for different formats
- if audio_format == "webm_opus":
+ if audioFormat == "webm_opus":
# For WEBM OPUS, we need to ensure proper format
encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
# WEBM_OPUS requires specific sample rate handling - must match header
- if sample_rate != 48000:
- logger.warning(f"WEBM_OPUS detected but sample rate is {sample_rate}, adjusting to 48000")
- sample_rate = 48000
+ if sampleRate != 48000:
+ logger.warning(f"WEBM_OPUS detected but sample rate is {sampleRate}, adjusting to 48000")
+ sampleRate = 48000
# For WEBM_OPUS, don't specify sample_rate_hertz in config
# Google Cloud will read it from the WEBM header
- use_sample_rate = False
- elif audio_format == "linear16":
+ useSampleRate = False
+ elif audioFormat == "linear16":
# For LINEAR16 format (PCM)
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
# Ensure sample rate is reasonable
- if sample_rate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
- logger.warning(f"Unusual sample rate {sample_rate}, adjusting to 16000")
- sample_rate = 16000
- use_sample_rate = True
- elif audio_format == "mp3":
+ if sampleRate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
+ logger.warning(f"Unusual sample rate {sampleRate}, adjusting to 16000")
+ sampleRate = 16000
+ useSampleRate = True
+ elif audioFormat == "mp3":
# For MP3 format
encoding = speech.RecognitionConfig.AudioEncoding.MP3
- use_sample_rate = True
- elif audio_format == "flac":
+ useSampleRate = True
+ elif audioFormat == "flac":
# For FLAC format
encoding = speech.RecognitionConfig.AudioEncoding.FLAC
- use_sample_rate = True
- elif audio_format == "wav":
+ useSampleRate = True
+ elif audioFormat == "wav":
# For WAV format
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
- use_sample_rate = True
+ useSampleRate = True
else:
# For unknown formats, try LINEAR16 as fallback
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
- sample_rate = 16000 # Use standard sample rate
+ sampleRate = 16000 # Use standard sample rate
channels = 1 # Use mono
- use_sample_rate = True
- logger.warning(f"Unknown audio format '{audio_format}', using LINEAR16 encoding with 16000Hz")
+ useSampleRate = True
+ logger.warning(f"Unknown audio format '{audioFormat}', using LINEAR16 encoding with 16000Hz")
# Build config based on format requirements
- config_params = {
+ configParams = {
"encoding": encoding,
"audio_channel_count": channels,
"language_code": language,
@@ -145,13 +145,13 @@ class ConnectorGoogleSpeech:
}
# Only add sample_rate_hertz if needed (not for WEBM_OPUS)
- if use_sample_rate:
- config_params["sample_rate_hertz"] = sample_rate
- logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sample_rate}, channels={channels}, language={language}")
+ if useSampleRate:
+ configParams["sample_rate_hertz"] = sampleRate
+ logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sampleRate}, channels={channels}, language={language}")
else:
logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}")
- config = speech.RecognitionConfig(**config_params)
+ config = speech.RecognitionConfig(**configParams)
# Perform speech recognition
logger.info("Sending audio to Google Cloud Speech-to-Text...")
@@ -162,12 +162,12 @@ class ConnectorGoogleSpeech:
response = self.speech_client.recognize(config=config, audio=audio)
logger.debug(f"Google Cloud response: {response}")
- except Exception as api_error:
- logger.error(f"Google Cloud API error: {api_error}")
+ except Exception as apiError:
+ logger.error(f"Google Cloud API error: {apiError}")
# Try with different encoding as fallback
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
logger.info("Trying fallback with LINEAR16 encoding...")
- fallback_config = speech.RecognitionConfig(
+ fallbackConfig = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000, # Use standard sample rate
audio_channel_count=1,
@@ -177,13 +177,13 @@ class ConnectorGoogleSpeech:
)
try:
- response = self.speech_client.recognize(config=fallback_config, audio=audio)
+ response = self.speech_client.recognize(config=fallbackConfig, audio=audio)
logger.debug(f"Google Cloud fallback response: {response}")
- except Exception as fallback_error:
- logger.error(f"Google Cloud fallback error: {fallback_error}")
- raise api_error
+ except Exception as fallbackError:
+ logger.error(f"Google Cloud fallback error: {fallbackError}")
+ raise apiError
else:
- raise api_error
+ raise apiError
# Process results
if response.results:
@@ -234,18 +234,18 @@ class ConnectorGoogleSpeech:
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
# For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts
- if audio_format != "webm_opus":
+ if audioFormat != "webm_opus":
# Try LINEAR16 with detected sample rate for non-WEBM formats
fallback_configs.append({
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
- "sample_rate": sample_rate,
+ "sample_rate": sampleRate,
"channels": channels,
"use_sample_rate": True,
- "description": f"LINEAR16 with {sample_rate}Hz"
+ "description": f"LINEAR16 with {sampleRate}Hz"
})
# For WEBM_OPUS, only try compatible sample rates or skip sample rate specification
- if audio_format == "webm_opus":
+ if audioFormat == "webm_opus":
# Try WEBM_OPUS without sample rate specification (let Google read from header)
fallback_configs.append({
"encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
@@ -273,7 +273,7 @@ class ConnectorGoogleSpeech:
else:
# For other formats, try standard sample rates
for std_rate in [16000, 8000, 22050, 44100]:
- if std_rate != sample_rate:
+ if std_rate != sampleRate:
fallback_configs.append({
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
"sample_rate": std_rate,
diff --git a/modules/datamodels/datamodelChat.py b/modules/datamodels/datamodelChat.py
index 93f8bf62..3c03e64b 100644
--- a/modules/datamodels/datamodelChat.py
+++ b/modules/datamodels/datamodelChat.py
@@ -3,8 +3,8 @@
from typing import List, Dict, Any, Optional
from enum import Enum
from pydantic import BaseModel, Field
-from modules.shared.attributeUtils import register_model_labels
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.attributeUtils import registerModelLabels
+from modules.shared.timezoneUtils import getUtcTimestamp
import uuid
@@ -26,7 +26,7 @@ class ChatStat(BaseModel):
priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation")
-register_model_labels(
+registerModelLabels(
"ChatStat",
{"en": "Chat Statistics", "fr": "Statistiques de chat"},
{
@@ -51,7 +51,7 @@ class ChatLog(BaseModel):
message: str = Field(description="Log message")
type: str = Field(description="Log type (info, warning, error, etc.)")
timestamp: float = Field(
- default_factory=get_utc_timestamp,
+ default_factory=getUtcTimestamp,
description="When the log entry was created (UTC timestamp in seconds)",
)
status: Optional[str] = Field(None, description="Status of the log entry")
@@ -63,7 +63,7 @@ class ChatLog(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"ChatLog",
{"en": "Chat Log", "fr": "Journal de chat"},
{
@@ -96,7 +96,7 @@ class ChatDocument(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"ChatDocument",
{"en": "Chat Document", "fr": "Document de chat"},
{
@@ -133,7 +133,7 @@ class ContentMetadata(BaseModel):
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
-register_model_labels(
+registerModelLabels(
"ContentMetadata",
{"en": "Content Metadata", "fr": "Métadonnées du contenu"},
{
@@ -157,7 +157,7 @@ class ContentItem(BaseModel):
metadata: ContentMetadata = Field(description="Content metadata")
-register_model_labels(
+registerModelLabels(
"ContentItem",
{"en": "Content Item", "fr": "Élément de contenu"},
{
@@ -175,7 +175,7 @@ class ChatContentExtracted(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"ChatContentExtracted",
{"en": "Extracted Content", "fr": "Contenu extrait"},
{
@@ -209,7 +209,7 @@ class ChatMessage(BaseModel):
description="Sequence number of the message (set automatically)"
)
publishedAt: float = Field(
- default_factory=get_utc_timestamp,
+ default_factory=getUtcTimestamp,
description="When the message was published (UTC timestamp in seconds)",
)
success: Optional[bool] = Field(
@@ -235,7 +235,7 @@ class ChatMessage(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"ChatMessage",
{"en": "Chat Message", "fr": "Message de chat"},
{
@@ -331,14 +331,14 @@ class ChatWorkflow(BaseModel):
frontend_required=False,
)
lastActivity: float = Field(
- default_factory=get_utc_timestamp,
+ default_factory=getUtcTimestamp,
description="Timestamp of last activity (UTC timestamp in seconds)",
frontend_type="timestamp",
frontend_readonly=True,
frontend_required=False,
)
startedAt: float = Field(
- default_factory=get_utc_timestamp,
+ default_factory=getUtcTimestamp,
description="When the workflow started (UTC timestamp in seconds)",
frontend_type="timestamp",
frontend_readonly=True,
@@ -395,7 +395,7 @@ class ChatWorkflow(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"ChatWorkflow",
{"en": "Chat Workflow", "fr": "Flux de travail de chat"},
{
@@ -426,7 +426,7 @@ class UserInputRequest(BaseModel):
userLanguage: str = Field(default="en", description="User's preferred language")
-register_model_labels(
+registerModelLabels(
"UserInputRequest",
{"en": "User Input Request", "fr": "Demande de saisie utilisateur"},
{
@@ -445,7 +445,7 @@ class ActionDocument(BaseModel):
mimeType: str = Field(description="MIME type of the document")
-register_model_labels(
+registerModelLabels(
"ActionDocument",
{"en": "Action Document", "fr": "Document d'action"},
{
@@ -485,7 +485,7 @@ class ActionResult(BaseModel):
return cls(success=False, documents=documents or [], error=error)
-register_model_labels(
+registerModelLabels(
"ActionResult",
{"en": "Action Result", "fr": "Résultat de l'action"},
{
@@ -504,7 +504,7 @@ class ActionSelection(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"ActionSelection",
{"en": "Action Selection", "fr": "Sélection d'action"},
{
@@ -520,7 +520,7 @@ class ActionParameters(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"ActionParameters",
{"en": "Action Parameters", "fr": "Paramètres d'action"},
{
@@ -535,7 +535,7 @@ class ObservationPreview(BaseModel):
snippet: str = Field(description="Short snippet or summary")
-register_model_labels(
+registerModelLabels(
"ObservationPreview",
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
{
@@ -558,7 +558,7 @@ class Observation(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"Observation",
{"en": "Observation", "fr": "Observation"},
{
@@ -579,7 +579,7 @@ class TaskStatus(str, Enum):
CANCELLED = "cancelled"
-register_model_labels(
+registerModelLabels(
"TaskStatus",
{"en": "Task Status", "fr": "Statut de la tâche"},
{
@@ -599,7 +599,7 @@ class DocumentExchange(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"DocumentExchange",
{"en": "Document Exchange", "fr": "Échange de documents"},
{
@@ -650,7 +650,7 @@ class ActionItem(BaseModel):
self.error = error_message
-register_model_labels(
+registerModelLabels(
"ActionItem",
{"en": "Task Action", "fr": "Action de tâche"},
{
@@ -683,7 +683,7 @@ class TaskResult(BaseModel):
error: Optional[str] = Field(None, description="Error message if task failed")
-register_model_labels(
+registerModelLabels(
"TaskResult",
{"en": "Task Result", "fr": "Résultat de tâche"},
{
@@ -728,7 +728,7 @@ class TaskItem(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"TaskItem",
{"en": "Task", "fr": "Tâche"},
{
@@ -758,7 +758,7 @@ class TaskStep(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"TaskStep",
{"en": "Task Step", "fr": "Étape de tâche"},
{
@@ -805,7 +805,7 @@ class TaskHandover(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"TaskHandover",
{"en": "Task Handover", "fr": "Transfert de tâche"},
{
@@ -879,7 +879,7 @@ class ReviewResult(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"ReviewResult",
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
{
@@ -904,7 +904,7 @@ class TaskPlan(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"TaskPlan",
{"en": "Task Plan", "fr": "Plan de tâches"},
{
@@ -927,7 +927,7 @@ class PromptPlaceholder(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"PromptPlaceholder",
{"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"},
{
@@ -943,7 +943,7 @@ class PromptBundle(BaseModel):
placeholders: List[PromptPlaceholder] = Field(default_factory=list)
-register_model_labels(
+registerModelLabels(
"PromptBundle",
{"en": "Prompt Bundle", "fr": "Lot d'invite"},
{
diff --git a/modules/datamodels/datamodelDocument.py b/modules/datamodels/datamodelDocument.py
index a437b6f1..33472130 100644
--- a/modules/datamodels/datamodelDocument.py
+++ b/modules/datamodels/datamodelDocument.py
@@ -81,11 +81,11 @@ class StructuredDocument(BaseModel):
summary: Optional[str] = Field(default=None, description="Document summary")
tags: List[str] = Field(default_factory=list, description="Document tags")
- def get_sections_by_type(self, content_type: str) -> List[DocumentSection]:
+ def getSectionsByType(self, content_type: str) -> List[DocumentSection]:
"""Get all sections of a specific content type."""
return [section for section in self.sections if section.content_type == content_type]
- def get_all_tables(self) -> List[TableData]:
+ def getAllTables(self) -> List[TableData]:
"""Get all table data from the document."""
tables = []
for section in self.sections:
@@ -94,7 +94,7 @@ class StructuredDocument(BaseModel):
tables.append(element)
return tables
- def get_all_lists(self) -> List[BulletList]:
+ def getAllLists(self) -> List[BulletList]:
"""Get all lists from the document."""
lists = []
for section in self.sections:
diff --git a/modules/datamodels/datamodelFiles.py b/modules/datamodels/datamodelFiles.py
index 6dcb3a0e..e1f802b7 100644
--- a/modules/datamodels/datamodelFiles.py
+++ b/modules/datamodels/datamodelFiles.py
@@ -2,8 +2,8 @@
from typing import Dict, Any, Optional, Union
from pydantic import BaseModel, Field
-from modules.shared.attributeUtils import register_model_labels
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.attributeUtils import registerModelLabels
+from modules.shared.timezoneUtils import getUtcTimestamp
import uuid
import base64
@@ -15,9 +15,9 @@ class FileItem(BaseModel):
mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False)
- creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
+ creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
-register_model_labels(
+registerModelLabels(
"FileItem",
{"en": "File Item", "fr": "Élément de fichier"},
{
@@ -45,7 +45,7 @@ class FilePreview(BaseModel):
if isinstance(data.get("content"), bytes):
data["content"] = base64.b64encode(data["content"]).decode("utf-8")
return data
-register_model_labels(
+registerModelLabels(
"FilePreview",
{"en": "File Preview", "fr": "Aperçu du fichier"},
{
@@ -62,7 +62,7 @@ class FileData(BaseModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
data: str = Field(description="File data content")
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
-register_model_labels(
+registerModelLabels(
"FileData",
{"en": "File Data", "fr": "Données de fichier"},
{
diff --git a/modules/datamodels/datamodelJson.py b/modules/datamodels/datamodelJson.py
new file mode 100644
index 00000000..0af89375
--- /dev/null
+++ b/modules/datamodels/datamodelJson.py
@@ -0,0 +1,90 @@
+"""
+Unified JSON document schema and helpers used by both generation prompts and renderers.
+
+This defines a single canonical template and the supported section types.
+"""
+
+from typing import List
+
+# Canonical list of supported section types across the system
+supportedSectionTypes: List[str] = [
+ "table",
+ "bullet_list",
+ "heading",
+ "paragraph",
+ "code_block",
+ "image",
+]
+
+# Canonical JSON template used for AI generation (documents array + sections)
+# Rendering pipelines can select the first document and read its sections.
+jsonTemplateDocument: str = """{
+ "metadata": {
+ "split_strategy": "single_document",
+ "source_documents": [],
+ "extraction_method": "ai_generation"
+ },
+ "documents": [
+ {
+ "id": "doc_1",
+ "title": "{{DOCUMENT_TITLE}}",
+ "filename": "document.json",
+ "sections": [
+ {
+ "id": "section_heading_example",
+ "content_type": "heading",
+ "elements": [
+ {"level": 1, "text": "Heading Text"}
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_paragraph_example",
+ "content_type": "paragraph",
+ "elements": [
+ {"text": "Paragraph text content"}
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_bullet_list_example",
+ "content_type": "bullet_list",
+ "elements": [
+ {
+ "items": ["Item 1", "Item 2"]
+ }
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_table_example",
+ "content_type": "table",
+ "elements": [
+ {
+ "headers": ["Column 1", "Column 2"],
+ "rows": [
+ ["Row 1 Col 1", "Row 1 Col 2"],
+ ["Row 2 Col 1", "Row 2 Col 2"]
+ ],
+ "caption": "Table caption"
+ }
+ ],
+ "order": 0
+ },
+ {
+ "id": "section_code_example",
+ "content_type": "code_block",
+ "elements": [
+ {
+ "code": "function example() { return true; }",
+ "language": "javascript"
+ }
+ ],
+ "order": 0
+ }
+ ]
+ }
+ ]
+}"""
+
+
diff --git a/modules/datamodels/datamodelNeutralizer.py b/modules/datamodels/datamodelNeutralizer.py
index 93f751e5..60894dff 100644
--- a/modules/datamodels/datamodelNeutralizer.py
+++ b/modules/datamodels/datamodelNeutralizer.py
@@ -3,7 +3,7 @@
import uuid
from typing import Optional
from pydantic import BaseModel, Field
-from modules.shared.attributeUtils import register_model_labels
+from modules.shared.attributeUtils import registerModelLabels
class DataNeutraliserConfig(BaseModel):
@@ -14,7 +14,7 @@ class DataNeutraliserConfig(BaseModel):
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
-register_model_labels(
+registerModelLabels(
"DataNeutraliserConfig",
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
{
@@ -35,7 +35,7 @@ class DataNeutralizerAttributes(BaseModel):
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
-register_model_labels(
+registerModelLabels(
"DataNeutralizerAttributes",
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
{
diff --git a/modules/datamodels/datamodelSecurity.py b/modules/datamodels/datamodelSecurity.py
index cb629a01..42b9a1ad 100644
--- a/modules/datamodels/datamodelSecurity.py
+++ b/modules/datamodels/datamodelSecurity.py
@@ -2,8 +2,8 @@
from typing import Optional
from pydantic import BaseModel, Field
-from modules.shared.attributeUtils import register_model_labels
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.attributeUtils import registerModelLabels
+from modules.shared.timezoneUtils import getUtcTimestamp
from .datamodelUam import AuthAuthority
from enum import Enum
import uuid
@@ -51,7 +51,7 @@ class Token(BaseModel):
use_enum_values = True
-register_model_labels(
+registerModelLabels(
"Token",
{"en": "Token", "fr": "Jeton"},
{
@@ -95,7 +95,7 @@ class AuthEvent(BaseModel):
frontend_required=True,
)
timestamp: float = Field(
- default_factory=get_utc_timestamp,
+ default_factory=getUtcTimestamp,
description="Unix timestamp when the event occurred",
frontend_type="datetime",
frontend_readonly=True,
@@ -131,7 +131,7 @@ class AuthEvent(BaseModel):
)
-register_model_labels(
+registerModelLabels(
"AuthEvent",
{"en": "Authentication Event", "fr": "Événement d'authentification"},
{
diff --git a/modules/datamodels/datamodelTickets.py b/modules/datamodels/datamodelTickets.py
index 40478bc6..5d800b15 100644
--- a/modules/datamodels/datamodelTickets.py
+++ b/modules/datamodels/datamodelTickets.py
@@ -11,12 +11,12 @@ class TicketFieldAttribute(BaseModel):
class TicketBase(ABC):
@abstractmethod
- async def read_attributes(self) -> list[TicketFieldAttribute]: ...
+ async def readAttributes(self) -> list[TicketFieldAttribute]: ...
@abstractmethod
- async def read_tasks(self, *, limit: int = 0) -> list[dict]: ...
+ async def readTasks(self, *, limit: int = 0) -> list[dict]: ...
@abstractmethod
- async def write_tasks(self, tasklist: list[dict]) -> None: ...
+ async def writeTasks(self, tasklist: list[dict]) -> None: ...
diff --git a/modules/datamodels/datamodelUam.py b/modules/datamodels/datamodelUam.py
index 747bfc27..0bf71fa9 100644
--- a/modules/datamodels/datamodelUam.py
+++ b/modules/datamodels/datamodelUam.py
@@ -4,8 +4,8 @@ import uuid
from typing import Optional
from enum import Enum
from pydantic import BaseModel, Field, EmailStr
-from modules.shared.attributeUtils import register_model_labels
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.attributeUtils import registerModelLabels
+from modules.shared.timezoneUtils import getUtcTimestamp
class AuthAuthority(str, Enum):
@@ -34,7 +34,7 @@ class Mandate(BaseModel):
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
])
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
-register_model_labels(
+registerModelLabels(
"Mandate",
{"en": "Mandate", "fr": "Mandat"},
{
@@ -62,8 +62,8 @@ class UserConnection(BaseModel):
{"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}},
{"value": "pending", "label": {"en": "Pending", "fr": "En attente"}},
])
- connectedAt: float = Field(default_factory=get_utc_timestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
- lastChecked: float = Field(default_factory=get_utc_timestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
+ connectedAt: float = Field(default_factory=getUtcTimestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
+ lastChecked: float = Field(default_factory=getUtcTimestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[
{"value": "active", "label": {"en": "Active", "fr": "Actif"}},
@@ -71,7 +71,7 @@ class UserConnection(BaseModel):
{"value": "none", "label": {"en": "None", "fr": "Aucun"}},
])
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
-register_model_labels(
+registerModelLabels(
"UserConnection",
{"en": "User Connection", "fr": "Connexion utilisateur"},
{
@@ -113,7 +113,7 @@ class User(BaseModel):
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
])
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
-register_model_labels(
+registerModelLabels(
"User",
{"en": "User", "fr": "Utilisateur"},
{
@@ -131,7 +131,7 @@ register_model_labels(
class UserInDB(User):
hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
-register_model_labels(
+registerModelLabels(
"UserInDB",
{"en": "User Access", "fr": "Accès de l'utilisateur"},
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},
diff --git a/modules/datamodels/datamodelUtils.py b/modules/datamodels/datamodelUtils.py
index c928cd47..67a42534 100644
--- a/modules/datamodels/datamodelUtils.py
+++ b/modules/datamodels/datamodelUtils.py
@@ -1,7 +1,7 @@
"""Utility datamodels: Prompt."""
from pydantic import BaseModel, Field
-from modules.shared.attributeUtils import register_model_labels
+from modules.shared.attributeUtils import registerModelLabels
import uuid
@@ -10,7 +10,7 @@ class Prompt(BaseModel):
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
-register_model_labels(
+registerModelLabels(
"Prompt",
{"en": "Prompt", "fr": "Invite"},
{
diff --git a/modules/datamodels/datamodelVoice.py b/modules/datamodels/datamodelVoice.py
index 8be43b58..6ecdd857 100644
--- a/modules/datamodels/datamodelVoice.py
+++ b/modules/datamodels/datamodelVoice.py
@@ -1,8 +1,8 @@
"""Voice settings datamodel."""
from pydantic import BaseModel, Field
-from modules.shared.attributeUtils import register_model_labels
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.attributeUtils import registerModelLabels
+from modules.shared.timezoneUtils import getUtcTimestamp
import uuid
@@ -15,11 +15,11 @@ class VoiceSettings(BaseModel):
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True)
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False)
- creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
- lastModified: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
+ creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
+ lastModified: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
-register_model_labels(
+registerModelLabels(
"VoiceSettings",
{"en": "Voice Settings", "fr": "Paramètres vocaux"},
{
diff --git a/modules/features/neutralizePlayground/mainNeutralizePlayground.py b/modules/features/neutralizePlayground/mainNeutralizePlayground.py
index 52d6a7ce..df486c8f 100644
--- a/modules/features/neutralizePlayground/mainNeutralizePlayground.py
+++ b/modules/features/neutralizePlayground/mainNeutralizePlayground.py
@@ -43,10 +43,6 @@ class NeutralizationPlayground:
'errors': errors,
}
- async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
- from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
- processor = SharepointProcessor(self.currentUser, self.services)
- return await processor.processSharepointFiles(sourcePath, targetPath)
# Cleanup attributes
def cleanAttributes(self, fileId: str) -> bool:
@@ -77,49 +73,51 @@ class NeutralizationPlayground:
}
# Additional methods needed by the route
- def get_config(self) -> Optional[DataNeutraliserConfig]:
+ def getConfig(self) -> Optional[DataNeutraliserConfig]:
"""Get neutralization configuration"""
return self.services.neutralization.getConfig()
- def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
+ def saveConfig(self, configData: Dict[str, Any]) -> DataNeutraliserConfig:
"""Save neutralization configuration"""
- return self.services.neutralization.saveConfig(config_data)
+ return self.services.neutralization.saveConfig(configData)
- def neutralize_text(self, text: str, file_id: str = None) -> Dict[str, Any]:
+ def neutralizeText(self, text: str, fileId: str = None) -> Dict[str, Any]:
"""Neutralize text content"""
return self.services.neutralization.processText(text)
- def resolve_text(self, text: str) -> str:
+ def resolveText(self, text: str) -> str:
"""Resolve UIDs in neutralized text back to original text"""
return self.services.neutralization.resolveText(text)
- def get_attributes(self, file_id: str = None) -> List[DataNeutralizerAttributes]:
+ def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]:
"""Get neutralization attributes, optionally filtered by file ID"""
try:
- all_attributes = self.services.neutralization.getAttributes()
- if file_id:
- return [attr for attr in all_attributes if attr.fileId == file_id]
- return all_attributes
+ allAttributes = self.services.neutralization.getAttributes()
+ if fileId:
+ return [attr for attr in allAttributes if attr.fileId == fileId]
+ return allAttributes
except Exception as e:
logger.error(f"Error getting attributes: {str(e)}")
return []
- async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]:
+ async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
"""Process files from SharePoint source path and store neutralized files in target path"""
- return await self.processSharepointFiles(source_path, target_path)
+ from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
+ processor = SharepointProcessor(self.currentUser, self.services)
+ return await processor.processSharepointFiles(sourcePath, targetPath)
- def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]:
+ def batchNeutralizeFiles(self, filesData: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Process multiple files for neutralization"""
- file_ids = [file_data.get('fileId') for file_data in files_data if file_data.get('fileId')]
- return self.processFiles(file_ids)
+ fileIds = [fileData.get('fileId') for fileData in filesData if fileData.get('fileId')]
+ return self.processFiles(fileIds)
- def get_processing_stats(self) -> Dict[str, Any]:
+ def getProcessingStats(self) -> Dict[str, Any]:
"""Get neutralization processing statistics"""
return self.getStats()
- def cleanup_file_attributes(self, file_id: str) -> bool:
+ def cleanupFileAttributes(self, fileId: str) -> bool:
"""Clean up neutralization attributes for a specific file"""
- return self.cleanAttributes(file_id)
+ return self.cleanAttributes(fileId)
# Internal SharePoint helper module separated to keep feature logic tidy
@@ -208,7 +206,7 @@ class SharepointProcessor:
siteUrl, _ = self._parseSharepointPath(sharepointPath)
if not siteUrl:
return False
- siteInfo = await self.services.sharepoint.find_site_by_web_url(siteUrl)
+ siteInfo = await self.services.sharepoint.findSiteByWebUrl(siteUrl)
return siteInfo is not None
except Exception:
return False
@@ -219,17 +217,17 @@ class SharepointProcessor:
targetSite, targetFolder = self._parseSharepointPath(targetPath)
if not sourceSite or not targetSite:
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
- sourceSiteInfo = await self.services.sharepoint.find_site_by_web_url(sourceSite)
+ sourceSiteInfo = await self.services.sharepoint.findSiteByWebUrl(sourceSite)
if not sourceSiteInfo:
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
- targetSiteInfo = await self.services.sharepoint.find_site_by_web_url(targetSite)
+ targetSiteInfo = await self.services.sharepoint.findSiteByWebUrl(targetSite)
if not targetSiteInfo:
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
- files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], sourceFolder)
+ files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], sourceFolder)
if not files:
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
- files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], '')
+ files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], '')
if files:
folders = [f for f in files if f.get('type') == 'folder']
folderNames = [f.get('name') for f in folders]
@@ -251,7 +249,7 @@ class SharepointProcessor:
async def _processSingle(fileInfo: Dict[str, Any]):
try:
- fileContent = await self.services.sharepoint.download_file(sourceSiteInfo['id'], fileInfo['id'])
+ fileContent = await self.services.sharepoint.downloadFile(sourceSiteInfo['id'], fileInfo['id'])
if not fileContent:
return {'error': f"Failed to download file: {fileInfo['name']}"}
try:
@@ -260,7 +258,7 @@ class SharepointProcessor:
textContent = fileContent.decode('latin-1')
result = self.services.neutralization.processText(textContent)
neutralizedFilename = f"neutralized_{fileInfo['name']}"
- uploadResult = await self.services.sharepoint.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
+ uploadResult = await self.services.sharepoint.uploadFile(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
if 'error' in uploadResult:
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
return {
diff --git a/modules/features/syncDelta/mainSyncDelta.py b/modules/features/syncDelta/mainSyncDelta.py
index fa8c1f93..3fc9e7af 100644
--- a/modules/features/syncDelta/mainSyncDelta.py
+++ b/modules/features/syncDelta/mainSyncDelta.py
@@ -204,9 +204,9 @@ class ManagerSyncDelta:
logger.info(
f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}"
)
- resolved = await self.services.sharepoint.find_site_by_url(
+ resolved = await self.services.sharepoint.findSiteByUrl(
hostname=self.SHAREPOINT_HOSTNAME,
- site_path=self.SHAREPOINT_SITE_PATH
+ sitePath=self.SHAREPOINT_SITE_PATH
)
if not resolved:
@@ -223,9 +223,9 @@ class ManagerSyncDelta:
# Test site access by listing root of the drive
logger.info("Testing site access using resolved site ID...")
- test_result = await self.services.sharepoint.list_folder_contents(
- site_id=self.targetSite["id"],
- folder_path=""
+ test_result = await self.services.sharepoint.listFolderContents(
+ siteId=self.targetSite["id"],
+ folderPath=""
)
if test_result is not None:
@@ -293,8 +293,8 @@ class ManagerSyncDelta:
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
try:
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
- excel_content = await self.services.sharepoint.download_file_by_path(
- site_id=self.targetSite['id'], file_path=file_path
+ excel_content = await self.services.sharepoint.downloadFileByPath(
+ siteId=self.targetSite['id'], filePath=file_path
)
existing_data, existing_headers = self.parseExcelContent(excel_content)
except Exception:
@@ -307,16 +307,16 @@ class ManagerSyncDelta:
await self.backupSharepointFile(filename=sync_file_name)
excel_bytes = self.createExcelContent(merged_data, existing_headers)
- await self.services.sharepoint.upload_file(
- site_id=self.targetSite['id'],
- folder_path=self.SHAREPOINT_MAIN_FOLDER,
- file_name=sync_file_name,
+ await self.services.sharepoint.uploadFile(
+ siteId=self.targetSite['id'],
+ folderPath=self.SHAREPOINT_MAIN_FOLDER,
+ fileName=sync_file_name,
content=excel_bytes,
)
# Import back to tickets
try:
- excel_content = await self.services.sharepoint.download_file_by_path(
- site_id=self.targetSite['id'], file_path=file_path
+ excel_content = await self.services.sharepoint.downloadFileByPath(
+ siteId=self.targetSite['id'], filePath=file_path
)
excel_rows, _ = self.parseExcelContent(excel_content)
self._log_audit_event("SYNC_IMPORT", "INFO", f"Importing {len(excel_rows)} Excel rows back to tickets")
@@ -333,8 +333,8 @@ class ManagerSyncDelta:
existing_data: list[dict] = []
try:
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
- csv_content = await self.services.sharepoint.download_file_by_path(
- site_id=self.targetSite['id'], file_path=file_path
+ csv_content = await self.services.sharepoint.downloadFileByPath(
+ siteId=self.targetSite['id'], filePath=file_path
)
csv_lines = csv_content.decode('utf-8').split('\n')
if len(csv_lines) >= 2:
@@ -348,16 +348,16 @@ class ManagerSyncDelta:
await self.backupSharepointFile(filename=sync_file_name)
merged_data, _ = self.mergeJiraWithExistingDetailed(data_list, existing_data)
csv_bytes = self.createCsvContent(merged_data, existing_headers)
- await self.services.sharepoint.upload_file(
- site_id=self.targetSite['id'],
- folder_path=self.SHAREPOINT_MAIN_FOLDER,
- file_name=sync_file_name,
+ await self.services.sharepoint.uploadFile(
+ siteId=self.targetSite['id'],
+ folderPath=self.SHAREPOINT_MAIN_FOLDER,
+ fileName=sync_file_name,
content=csv_bytes,
)
# Import from CSV
try:
- csv_content = await self.services.sharepoint.download_file_by_path(
- site_id=self.targetSite['id'], file_path=file_path
+ csv_content = await self.services.sharepoint.downloadFileByPath(
+ siteId=self.targetSite['id'], filePath=file_path
)
df = pd.read_csv(io.BytesIO(csv_content), skiprows=2, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python')
csv_rows = df.to_dict('records')
@@ -388,12 +388,12 @@ class ManagerSyncDelta:
try:
timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y%m%d_%H%M%S")
backup_filename = f"backup_{timestamp}_{filename}"
- await self.services.sharepoint.copy_file_async(
- site_id=self.targetSite['id'],
- source_folder=self.SHAREPOINT_MAIN_FOLDER,
- source_file=filename,
- dest_folder=self.SHAREPOINT_BACKUP_FOLDER,
- dest_file=backup_filename,
+ await self.services.sharepoint.copyFileAsync(
+ siteId=self.targetSite['id'],
+ sourceFolder=self.SHAREPOINT_MAIN_FOLDER,
+ sourceFile=filename,
+ destFolder=self.SHAREPOINT_BACKUP_FOLDER,
+ destFile=backup_filename,
)
self._log_audit_event("SYNC_BACKUP", "SUCCESS", f"Backed up file: {filename} -> {backup_filename}")
return True
@@ -679,7 +679,7 @@ class ManagerSyncDelta:
connectorType=connectorType,
connectorParams=connectorParams,
)
- attributes = await ticket_interface.connector_ticket.read_attributes()
+ attributes = await ticket_interface.connector_ticket.readAttributes()
if not attributes:
logger.warning("No ticket attributes returned; nothing to write.")
return False
@@ -713,7 +713,7 @@ class ManagerSyncDelta:
connectorType=connectorType,
connectorParams=connectorParams,
)
- tickets = await ticket_interface.connector_ticket.read_tasks(limit=sampleLimit)
+ tickets = await ticket_interface.connector_ticket.readTasks(limit=sampleLimit)
if not tickets:
logger.warning("No tickets returned; nothing to write.")
return False
diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py
index e58fa1ef..c6b0e62c 100644
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@@ -54,8 +54,6 @@ class AiObjects:
# No need to manually create connectors - they're auto-discovered
return cls()
-
-
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
"""Select the best model using dynamic model selection system."""
# Get available models from the dynamic registry
diff --git a/modules/interfaces/interfaceDbAppObjects.py b/modules/interfaces/interfaceDbAppObjects.py
index e37c98f8..8da37568 100644
--- a/modules/interfaces/interfaceDbAppObjects.py
+++ b/modules/interfaces/interfaceDbAppObjects.py
@@ -10,7 +10,7 @@ import uuid
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
from modules.interfaces.interfaceDbAppAccess import AppAccess
from modules.datamodels.datamodelUam import (
User,
@@ -604,8 +604,8 @@ class AppObjects:
externalUsername=externalUsername,
externalEmail=externalEmail,
status=status,
- connectedAt=get_utc_timestamp(),
- lastChecked=get_utc_timestamp(),
+ connectedAt=getUtcTimestamp(),
+ lastChecked=getUtcTimestamp(),
expiresAt=None, # Optional field, set to None by default
)
@@ -755,7 +755,7 @@ class AppObjects:
if not token.id:
token.id = str(uuid.uuid4())
if not token.createdAt:
- token.createdAt = get_utc_timestamp()
+ token.createdAt = getUtcTimestamp()
# If replace_existing is True, delete old access tokens for this user and authority first
if replace_existing:
@@ -822,7 +822,7 @@ class AppObjects:
if not token.id:
token.id = str(uuid.uuid4())
if not token.createdAt:
- token.createdAt = get_utc_timestamp()
+ token.createdAt = getUtcTimestamp()
# Convert to dict and ensure all fields are properly set
token_dict = token.model_dump()
@@ -932,7 +932,7 @@ class AppObjects:
return True
tokenUpdate = {
"status": TokenStatus.REVOKED,
- "revokedAt": get_utc_timestamp(),
+ "revokedAt": getUtcTimestamp(),
"revokedBy": revokedBy,
"reason": reason or "revoked",
}
@@ -970,7 +970,7 @@ class AppObjects:
t["id"],
{
"status": TokenStatus.REVOKED,
- "revokedAt": get_utc_timestamp(),
+ "revokedAt": getUtcTimestamp(),
"revokedBy": revokedBy,
"reason": reason or "session logout",
},
@@ -1008,7 +1008,7 @@ class AppObjects:
t["id"],
{
"status": TokenStatus.REVOKED,
- "revokedAt": get_utc_timestamp(),
+ "revokedAt": getUtcTimestamp(),
"revokedBy": revokedBy,
"reason": reason or "admin revoke",
},
@@ -1022,7 +1022,7 @@ class AppObjects:
def cleanupExpiredTokens(self) -> int:
"""Clean up expired tokens for all connections, returns count of cleaned tokens"""
try:
- current_time = get_utc_timestamp()
+ current_time = getUtcTimestamp()
cleaned_count = 0
# Get all tokens
@@ -1100,7 +1100,7 @@ class AppObjects:
# Update existing config
update_data = existing_config.model_dump()
update_data.update(config_data)
- update_data["updatedAt"] = get_utc_timestamp()
+ update_data["updatedAt"] = getUtcTimestamp()
updated_config = DataNeutraliserConfig(**update_data)
self.db.recordModify(
diff --git a/modules/interfaces/interfaceDbChatObjects.py b/modules/interfaces/interfaceDbChatObjects.py
index 92dcccf6..deea239a 100644
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@@ -31,7 +31,7 @@ from modules.datamodels.datamodelUam import User
# DYNAMIC PART: Connectors to the Interface
from modules.connectors.connectorDbPostgre import DatabaseConnector
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
# Basic Configurations
from modules.shared.configuration import APP_CONFIG
@@ -66,56 +66,56 @@ class ChatObjects:
# ===== Generic Utility Methods =====
- def _is_object_field(self, field_type) -> bool:
+ def _isObjectField(self, fieldType) -> bool:
"""Check if a field type represents a complex object (not a simple type)."""
# Simple scalar types
- if field_type in (str, int, float, bool, type(None)):
+ if fieldType in (str, int, float, bool, type(None)):
return False
# Everything else is an object
return True
- def _separate_object_fields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
+ def _separateObjectFields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
"""Separate simple fields from object fields based on Pydantic model structure."""
- simple_fields = {}
- object_fields = {}
+ simpleFields = {}
+ objectFields = {}
# Get field information from the Pydantic model
- model_fields = model_class.model_fields
+ modelFields = model_class.model_fields
- for field_name, value in data.items():
+ for fieldName, value in data.items():
# Check if this field should be stored as JSONB in the database
- if field_name in model_fields:
- field_info = model_fields[field_name]
+ if fieldName in modelFields:
+ fieldInfo = modelFields[fieldName]
# Pydantic v2 only
- field_type = field_info.annotation
+ fieldType = fieldInfo.annotation
# Always route relational/object fields to object_fields for separate handling
- if field_name in ['documents', 'stats']:
- object_fields[field_name] = value
+ if fieldName in ['documents', 'stats']:
+ objectFields[fieldName] = value
continue
# Check if this is a JSONB field (Dict, List, or complex types)
- if (field_type == dict or
- field_type == list or
- (hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or
- field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']):
+ if (fieldType == dict or
+ fieldType == list or
+ (hasattr(fieldType, '__origin__') and fieldType.__origin__ in (dict, list)) or
+ fieldName in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']):
# Store as JSONB - include in simple_fields for database storage
- simple_fields[field_name] = value
+ simpleFields[fieldName] = value
elif isinstance(value, (str, int, float, bool, type(None))):
# Simple scalar types
- simple_fields[field_name] = value
+ simpleFields[fieldName] = value
else:
# Complex objects that should be filtered out
- object_fields[field_name] = value
+ objectFields[fieldName] = value
else:
# Field not in model - treat as scalar if simple, otherwise filter out
if isinstance(value, (str, int, float, bool, type(None))):
- simple_fields[field_name] = value
+ simpleFields[fieldName] = value
else:
- object_fields[field_name] = value
+ objectFields[fieldName] = value
- return simple_fields, object_fields
+ return simpleFields, objectFields
def _initializeServices(self):
pass
@@ -240,8 +240,8 @@ class ChatObjects:
currentAction=workflow.get("currentAction", 0),
totalTasks=workflow.get("totalTasks", 0),
totalActions=workflow.get("totalActions", 0),
- lastActivity=workflow.get("lastActivity", get_utc_timestamp()),
- startedAt=workflow.get("startedAt", get_utc_timestamp()),
+ lastActivity=workflow.get("lastActivity", getUtcTimestamp()),
+ startedAt=workflow.get("startedAt", getUtcTimestamp()),
logs=logs,
messages=messages,
stats=stats,
@@ -257,7 +257,7 @@ class ChatObjects:
raise PermissionError("No permission to create workflows")
# Set timestamp if not present
- currentTime = get_utc_timestamp()
+ currentTime = getUtcTimestamp()
if "startedAt" not in workflowData:
workflowData["startedAt"] = currentTime
@@ -265,10 +265,10 @@ class ChatObjects:
workflowData["lastActivity"] = currentTime
# Use generic field separation based on ChatWorkflow model
- simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData)
+ simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
# Create workflow in database
- created = self.db.recordCreate(ChatWorkflow, simple_fields)
+ created = self.db.recordCreate(ChatWorkflow, simpleFields)
# Convert to ChatWorkflow model (empty related data for new workflow)
@@ -302,13 +302,13 @@ class ChatObjects:
raise PermissionError(f"No permission to update workflow {workflowId}")
# Use generic field separation based on ChatWorkflow model
- simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData)
+ simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
# Set update time for main workflow
- simple_fields["lastActivity"] = get_utc_timestamp()
+ simpleFields["lastActivity"] = getUtcTimestamp()
# Update main workflow in database
- updated = self.db.recordModify(ChatWorkflow, workflowId, simple_fields)
+ updated = self.db.recordModify(ChatWorkflow, workflowId, simpleFields)
# Removed cascade writes for logs/messages/stats during workflow update.
# CUD for child entities must be executed via dedicated service methods.
@@ -423,7 +423,7 @@ class ChatObjects:
role=msg.get("role", "assistant"),
status=msg.get("status", "step"),
sequenceNr=msg.get("sequenceNr", 0),
- publishedAt=msg.get("publishedAt", get_utc_timestamp()),
+ publishedAt=msg.get("publishedAt", getUtcTimestamp()),
success=msg.get("success"),
actionId=msg.get("actionId"),
actionMethod=msg.get("actionMethod"),
@@ -490,20 +490,30 @@ class ChatObjects:
messageData["actionNumber"] = workflow.currentAction
# Use generic field separation based on ChatMessage model
- simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData)
+ simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
# Handle documents separately - they will be stored in normalized documents table
- documents_to_create = object_fields.get("documents", [])
+ documents_to_create = objectFields.get("documents", [])
# Create message in normalized table using only simple fields
- createdMessage = self.db.recordCreate(ChatMessage, simple_fields)
+ createdMessage = self.db.recordCreate(ChatMessage, simpleFields)
# Create documents in normalized documents table
created_documents = []
for doc_data in documents_to_create:
- # Use the document data directly
- doc_dict = doc_data
+ # Normalize to plain dict before assignment
+ if isinstance(doc_data, ChatDocument):
+ doc_dict = doc_data.model_dump()
+ elif isinstance(doc_data, dict):
+ doc_dict = dict(doc_data)
+ else:
+ # Attempt to coerce to ChatDocument then dump
+ try:
+ doc_dict = ChatDocument(**doc_data).model_dump()
+ except Exception:
+ logger.error("Invalid document data type for message creation")
+ continue
doc_dict["messageId"] = createdMessage["id"]
created_doc = self.createDocument(doc_dict)
@@ -522,8 +532,8 @@ class ChatObjects:
role=createdMessage.get("role", "assistant"),
status=createdMessage.get("status", "step"),
sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number
- publishedAt=createdMessage.get("publishedAt", get_utc_timestamp()),
- stats=object_fields.get("stats"), # Use stats from object_fields
+ publishedAt=createdMessage.get("publishedAt", getUtcTimestamp()),
+ stats=objectFields.get("stats"), # Use stats from objectFields
roundNumber=createdMessage.get("roundNumber"),
taskNumber=createdMessage.get("taskNumber"),
actionNumber=createdMessage.get("actionNumber"),
@@ -588,31 +598,41 @@ class ChatObjects:
raise PermissionError(f"No permission to modify workflow {workflowId}")
# Use generic field separation based on ChatMessage model
- simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData)
+ simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
# Ensure required fields present
for key in ["role", "agentName"]:
- if key not in simple_fields and key not in existingMessage:
- simple_fields[key] = "assistant" if key == "role" else ""
+ if key not in simpleFields and key not in existingMessage:
+ simpleFields[key] = "assistant" if key == "role" else ""
# Ensure ID is in the dataset
- if 'id' not in simple_fields:
- simple_fields['id'] = messageId
+ if 'id' not in simpleFields:
+ simpleFields['id'] = messageId
# Convert createdAt to startedAt if needed
- if "createdAt" in simple_fields and "startedAt" not in simple_fields:
- simple_fields["startedAt"] = simple_fields["createdAt"]
- del simple_fields["createdAt"]
+ if "createdAt" in simpleFields and "startedAt" not in simpleFields:
+ simpleFields["startedAt"] = simpleFields["createdAt"]
+ del simpleFields["createdAt"]
# Update the message with simple fields only
- updatedMessage = self.db.recordModify(ChatMessage, messageId, simple_fields)
+ updatedMessage = self.db.recordModify(ChatMessage, messageId, simpleFields)
# Handle object field updates (documents, stats) inline
- if 'documents' in object_fields:
- documents_data = object_fields['documents']
+ if 'documents' in objectFields:
+ documents_data = objectFields['documents']
try:
for doc_data in documents_data:
- doc_dict = doc_data
+ # Normalize to dict before mutation
+ if isinstance(doc_data, ChatDocument):
+ doc_dict = doc_data.model_dump()
+ elif isinstance(doc_data, dict):
+ doc_dict = dict(doc_data)
+ else:
+ try:
+ doc_dict = ChatDocument(**doc_data).model_dump()
+ except Exception:
+ logger.error("Invalid document data type for message update")
+ continue
doc_dict["messageId"] = messageId
self.createDocument(doc_dict)
except Exception as e:
@@ -732,11 +752,9 @@ class ChatObjects:
def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument:
"""Creates a document for a message in normalized table."""
try:
- # Validate document data
+ # Validate and normalize document data to dict
document = ChatDocument(**documentData)
-
- # Create document in normalized table
- created = self.db.recordCreate(ChatDocument, document)
+ created = self.db.recordCreate(ChatDocument, document.model_dump())
return ChatDocument(**created)
@@ -785,7 +803,7 @@ class ChatObjects:
# Make sure required fields are present
if "timestamp" not in logData:
- logData["timestamp"] = get_utc_timestamp()
+ logData["timestamp"] = getUtcTimestamp()
# Add status information if not present
if "status" not in logData and "type" in logData:
@@ -882,7 +900,7 @@ class ChatObjects:
messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId})
for msg in messages:
# Apply timestamp filtering in Python
- msg_timestamp = msg.get("publishedAt", get_utc_timestamp())
+ msg_timestamp = msg.get("publishedAt", getUtcTimestamp())
if afterTimestamp is not None and msg_timestamp <= afterTimestamp:
continue
@@ -900,7 +918,7 @@ class ChatObjects:
role=msg.get("role", "assistant"),
status=msg.get("status", "step"),
sequenceNr=msg.get("sequenceNr", 0),
- publishedAt=msg.get("publishedAt", get_utc_timestamp()),
+ publishedAt=msg.get("publishedAt", getUtcTimestamp()),
success=msg.get("success"),
actionId=msg.get("actionId"),
actionMethod=msg.get("actionMethod"),
@@ -923,7 +941,7 @@ class ChatObjects:
logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId})
for log in logs:
# Apply timestamp filtering in Python
- log_timestamp = log.get("timestamp", get_utc_timestamp())
+ log_timestamp = log.get("timestamp", getUtcTimestamp())
if afterTimestamp is not None and log_timestamp <= afterTimestamp:
continue
@@ -938,7 +956,7 @@ class ChatObjects:
stats = self.getStats(workflowId)
for stat in stats:
# Apply timestamp filtering in Python
- stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else get_utc_timestamp()
+ stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else getUtcTimestamp()
if afterTimestamp is not None and stat_timestamp <= afterTimestamp:
continue
diff --git a/modules/interfaces/interfaceDbComponentObjects.py b/modules/interfaces/interfaceDbComponentObjects.py
index d2a74e69..20e7aae1 100644
--- a/modules/interfaces/interfaceDbComponentObjects.py
+++ b/modules/interfaces/interfaceDbComponentObjects.py
@@ -16,7 +16,7 @@ from modules.datamodels.datamodelUtils import Prompt
from modules.datamodels.datamodelVoice import VoiceSettings
from modules.datamodels.datamodelUam import User, Mandate
from modules.shared.configuration import APP_CONFIG
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
@@ -466,7 +466,7 @@ class ComponentObjects:
# Ensure proper values, use defaults for invalid data
creationDate = file.get("creationDate")
if creationDate is None or not isinstance(creationDate, (int, float)) or creationDate <= 0:
- creationDate = get_utc_timestamp()
+ creationDate = getUtcTimestamp()
fileName = file.get("fileName")
if not fileName or fileName == "None":
@@ -503,7 +503,7 @@ class ComponentObjects:
# Get creation date from record or use current time
creationDate = file.get("creationDate")
if not creationDate:
- creationDate = get_utc_timestamp()
+ creationDate = getUtcTimestamp()
return FileItem(
id=file.get("id"),
@@ -881,9 +881,9 @@ class ComponentObjects:
# Ensure timestamps are set for validation
settings_data = filteredSettings[0]
if not settings_data.get("creationDate"):
- settings_data["creationDate"] = get_utc_timestamp()
+ settings_data["creationDate"] = getUtcTimestamp()
if not settings_data.get("lastModified"):
- settings_data["lastModified"] = get_utc_timestamp()
+ settings_data["lastModified"] = getUtcTimestamp()
return VoiceSettings(**settings_data)
@@ -931,7 +931,7 @@ class ComponentObjects:
raise ValueError(f"Voice settings not found for user {userId}")
# Update lastModified timestamp
- updateData["lastModified"] = get_utc_timestamp()
+ updateData["lastModified"] = getUtcTimestamp()
# Update voice settings record
success = self.db.recordModify(VoiceSettings, existingSettings.id, updateData)
diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py
index b7a090fa..8c76d455 100644
--- a/modules/interfaces/interfaceTicketObjects.py
+++ b/modules/interfaces/interfaceTicketObjects.py
@@ -31,7 +31,7 @@ class TicketInterface:
self.task_sync_definition = task_sync_definition
async def exportTicketsAsList(self) -> list[dict]:
- tickets: list[dict] = await self.connector_ticket.read_tasks(limit=0)
+ tickets: list[dict] = await self.connector_ticket.readTasks(limit=0)
transformed_tasks = self._transformTicketRecords(tickets, includePut=True)
# Return plain dictionaries filtered by presence of ID
rows: list[dict] = []
@@ -57,7 +57,7 @@ class TicketInterface:
if fields:
updates.append({"ID": task_id, "fields": fields})
if updates:
- await self.connector_ticket.write_tasks(updates)
+ await self.connector_ticket.writeTasks(updates)
def _transformTicketRecords(
self, tasks: list[dict], includePut: bool = False
diff --git a/modules/interfaces/interfaceVoiceObjects.py b/modules/interfaces/interfaceVoiceObjects.py
index a1d0b96c..2bb1b729 100644
--- a/modules/interfaces/interfaceVoiceObjects.py
+++ b/modules/interfaces/interfaceVoiceObjects.py
@@ -10,7 +10,7 @@ from typing import Dict, Any, Optional, List
from modules.connectors.connectorVoiceGoogle import ConnectorGoogleSpeech
from modules.datamodels.datamodelVoice import VoiceSettings
from modules.datamodels.datamodelUam import User
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
@@ -269,7 +269,7 @@ class VoiceObjects:
logger.info(f"Creating voice settings: {settingsData}")
# Add timestamps
- currentTime = get_utc_timestamp()
+ currentTime = getUtcTimestamp()
settingsData["creationDate"] = currentTime
settingsData["lastModified"] = currentTime
@@ -298,7 +298,7 @@ class VoiceObjects:
logger.info(f"Updating voice settings for user {userId}: {settingsData}")
# Add last modified timestamp
- settingsData["lastModified"] = get_utc_timestamp()
+ settingsData["lastModified"] = getUtcTimestamp()
# Create updated VoiceSettings object
voiceSettings = VoiceSettings(**settingsData)
diff --git a/modules/routes/routeDataConnections.py b/modules/routes/routeDataConnections.py
index 1c49fa13..eec8d140 100644
--- a/modules/routes/routeDataConnections.py
+++ b/modules/routes/routeDataConnections.py
@@ -18,7 +18,7 @@ from modules.datamodels.datamodelUam import User, UserConnection, AuthAuthority,
from modules.datamodels.datamodelSecurity import Token
from modules.security.auth import getCurrentUser, limiter
from modules.interfaces.interfaceDbAppObjects import getInterface
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
# Configure logger
logger = logging.getLogger(__name__)
@@ -64,7 +64,7 @@ def get_token_status_for_connection(interface, connection_id: str) -> tuple[str,
if not expires_at:
return "none", None
- current_time = get_utc_timestamp()
+ current_time = getUtcTimestamp()
# Add 5 minute buffer for proactive refresh
buffer_time = 5 * 60 # 5 minutes in seconds
@@ -247,7 +247,7 @@ async def update_connection(
setattr(connection, field, value)
# Update lastChecked timestamp using UTC timestamp
- connection.lastChecked = get_utc_timestamp()
+ connection.lastChecked = getUtcTimestamp()
# Update connection - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
@@ -382,7 +382,7 @@ async def disconnect_service(
# Update connection status
connection.status = ConnectionStatus.INACTIVE
- connection.lastChecked = get_utc_timestamp()
+ connection.lastChecked = getUtcTimestamp()
# Update connection record - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
diff --git a/modules/routes/routeDataNeutralization.py b/modules/routes/routeDataNeutralization.py
index 61e8c25d..07d6c025 100644
--- a/modules/routes/routeDataNeutralization.py
+++ b/modules/routes/routeDataNeutralization.py
@@ -35,7 +35,7 @@ async def get_neutralization_config(
"""Get data neutralization configuration"""
try:
service = NeutralizationPlayground(currentUser)
- config = service.get_config()
+ config = service.getConfig()
if not config:
# Return default config instead of 404
@@ -69,7 +69,7 @@ async def save_neutralization_config(
"""Save or update data neutralization configuration"""
try:
service = NeutralizationPlayground(currentUser)
- config = service.save_config(config_data)
+ config = service.saveConfig(config_data)
return config
@@ -99,7 +99,7 @@ async def neutralize_text(
)
service = NeutralizationPlayground(currentUser)
- result = service.neutralize_text(text, file_id)
+ result = service.neutralizeText(text, file_id)
return result
@@ -130,7 +130,7 @@ async def resolve_text(
)
service = NeutralizationPlayground(currentUser)
- resolved_text = service.resolve_text(text)
+ resolved_text = service.resolveText(text)
return {"resolved_text": resolved_text}
@@ -153,7 +153,7 @@ async def get_neutralization_attributes(
"""Get neutralization attributes, optionally filtered by file ID"""
try:
service = NeutralizationPlayground(currentUser)
- attributes = service.get_attributes(fileId)
+ attributes = service.getAttributes(fileId)
return attributes
@@ -183,7 +183,7 @@ async def process_sharepoint_files(
)
service = NeutralizationPlayground(currentUser)
- result = await service.process_sharepoint_files(source_path, target_path)
+ result = await service.processSharepointFiles(source_path, target_path)
return result
@@ -212,7 +212,7 @@ async def batch_process_files(
)
service = NeutralizationPlayground(currentUser)
- result = service.batch_neutralize_files(files_data)
+ result = service.batchNeutralizeFiles(files_data)
return result
@@ -234,7 +234,7 @@ async def get_neutralization_stats(
"""Get neutralization processing statistics"""
try:
service = NeutralizationPlayground(currentUser)
- stats = service.get_processing_stats()
+ stats = service.getProcessingStats()
return stats
@@ -255,7 +255,7 @@ async def cleanup_file_attributes(
"""Clean up neutralization attributes for a specific file"""
try:
service = NeutralizationPlayground(currentUser)
- success = service.cleanup_file_attributes(fileId)
+ success = service.cleanupFileAttributes(fileId)
if success:
return {"message": f"Successfully cleaned up attributes for file {fileId}"}
diff --git a/modules/routes/routeDataUsers.py b/modules/routes/routeDataUsers.py
index 7a0ea735..7b7e627f 100644
--- a/modules/routes/routeDataUsers.py
+++ b/modules/routes/routeDataUsers.py
@@ -181,9 +181,9 @@ async def reset_user_password(
# Log password reset
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_security_event(
- user_id=str(currentUser.id),
- mandate_id=str(currentUser.mandateId),
+ audit_logger.logSecurityEvent(
+ userId=str(currentUser.id),
+ mandateId=str(currentUser.mandateId),
action="password_reset",
details=f"Reset password for user {userId}"
)
@@ -257,9 +257,9 @@ async def change_password(
# Log password change
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_security_event(
- user_id=str(currentUser.id),
- mandate_id=str(currentUser.mandateId),
+ audit_logger.logSecurityEvent(
+ userId=str(currentUser.id),
+ mandateId=str(currentUser.mandateId),
action="password_change",
details="User changed their own password"
)
diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py
index 8177da8d..fbd9a445 100644
--- a/modules/routes/routeSecurityGoogle.py
+++ b/modules/routes/routeSecurityGoogle.py
@@ -14,7 +14,7 @@ from modules.shared.configuration import APP_CONFIG
from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface
from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
from modules.security.auth import getCurrentUser, limiter
-from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp
+from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
# Configure logger
logger = logging.getLogger(__name__)
@@ -356,7 +356,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenRefresh=token_response.get("refresh_token", ""),
tokenType="bearer",
expiresAt=jwt_expires_at.timestamp(),
- createdAt=get_utc_timestamp()
+ createdAt=getUtcTimestamp()
)
# Save access token (no connectionId)
@@ -460,8 +460,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
logger.info(f"Updating connection {connection_id} for user {user.username}")
# Update connection with external service details
connection.status = ConnectionStatus.ACTIVE
- connection.lastChecked = get_utc_timestamp()
- connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0)
+ connection.lastChecked = getUtcTimestamp()
+ connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
connection.externalId = user_info.get("id")
connection.externalUsername = user_info.get("email")
connection.externalEmail = user_info.get("email")
@@ -479,8 +479,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=token_response["access_token"],
tokenRefresh=token_response.get("refresh_token", ""),
tokenType=token_response.get("token_type", "bearer"),
- expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
- createdAt=get_utc_timestamp()
+ expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
+ createdAt=getUtcTimestamp()
)
interface.saveConnectionToken(token)
@@ -498,8 +498,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
id: '{connection.id}',
status: 'connected',
type: 'google',
- lastChecked: {get_utc_timestamp()},
- expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))}
+ lastChecked: {getUtcTimestamp()},
+ expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
}}
}}, '*');
// Wait for message to be sent before closing
@@ -592,11 +592,11 @@ async def logout(
# Log successful logout
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_user_access(
- user_id=str(currentUser.id),
- mandate_id=str(currentUser.mandateId),
+ audit_logger.logUserAccess(
+ userId=str(currentUser.id),
+ mandateId=str(currentUser.mandateId),
action="logout",
- success_info="google_auth_logout"
+ successInfo="google_auth_logout"
)
except Exception:
# Don't fail if audit logging fails
@@ -726,12 +726,12 @@ async def refresh_token(
# Update the connection status and timing
google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt
- google_connection.lastChecked = get_utc_timestamp()
+ google_connection.lastChecked = getUtcTimestamp()
google_connection.status = ConnectionStatus.ACTIVE
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.model_dump())
# Calculate time until expiration
- current_time = get_utc_timestamp()
+ current_time = getUtcTimestamp()
expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0
return {
diff --git a/modules/routes/routeSecurityLocal.py b/modules/routes/routeSecurityLocal.py
index d705d5c6..0a2fff71 100644
--- a/modules/routes/routeSecurityLocal.py
+++ b/modules/routes/routeSecurityLocal.py
@@ -131,11 +131,11 @@ async def login(
# Log successful login
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_user_access(
- user_id=str(user.id),
- mandate_id=str(user.mandateId),
+ audit_logger.logUserAccess(
+ userId=str(user.id),
+ mandateId=str(user.mandateId),
action="login",
- success_info="local_auth_success"
+ successInfo="local_auth_success"
)
except Exception:
# Don't fail if audit logging fails
@@ -159,11 +159,11 @@ async def login(
# Log failed login attempt
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_user_access(
- user_id="unknown",
- mandate_id="unknown",
+ audit_logger.logUserAccess(
+ userId="unknown",
+ mandateId="unknown",
action="login",
- success_info=f"failed: {error_msg}"
+ successInfo=f"failed: {error_msg}"
)
except Exception:
# Don't fail if audit logging fails
@@ -367,11 +367,11 @@ async def logout(request: Request, response: Response, currentUser: User = Depen
# Log successful logout
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_user_access(
- user_id=str(currentUser.id),
- mandate_id=str(currentUser.mandateId),
+ audit_logger.logUserAccess(
+ userId=str(currentUser.id),
+ mandateId=str(currentUser.mandateId),
action="logout",
- success_info=f"revoked_tokens: {revoked}"
+ successInfo=f"revoked_tokens: {revoked}"
)
except Exception:
# Don't fail if audit logging fails
diff --git a/modules/routes/routeSecurityMsft.py b/modules/routes/routeSecurityMsft.py
index 6c16c37e..30c5d33e 100644
--- a/modules/routes/routeSecurityMsft.py
+++ b/modules/routes/routeSecurityMsft.py
@@ -16,7 +16,7 @@ from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatu
from modules.datamodels.datamodelSecurity import Token
from modules.security.auth import getCurrentUser, limiter
from modules.security.jwtService import createAccessToken
-from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp
+from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
# Configure logger
logger = logging.getLogger(__name__)
@@ -199,8 +199,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=token_response["access_token"],
tokenRefresh=token_response.get("refresh_token", ""),
tokenType=token_response.get("token_type", "bearer"),
- expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
- createdAt=get_utc_timestamp()
+ expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
+ createdAt=getUtcTimestamp()
)
# Save access token (no connectionId)
@@ -225,7 +225,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=jwt_token,
tokenType="bearer",
expiresAt=jwt_expires_at.timestamp(),
- createdAt=get_utc_timestamp()
+ createdAt=getUtcTimestamp()
)
# Save JWT access token
@@ -332,8 +332,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
logger.info(f"Updating connection {connection_id} for user {user.username}")
# Update connection with external service details
connection.status = ConnectionStatus.ACTIVE
- connection.lastChecked = get_utc_timestamp()
- connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0)
+ connection.lastChecked = getUtcTimestamp()
+ connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
connection.externalId = user_info.get("id")
connection.externalUsername = user_info.get("userPrincipalName")
connection.externalEmail = user_info.get("mail")
@@ -351,8 +351,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=token_response["access_token"],
tokenRefresh=token_response.get("refresh_token", ""),
tokenType=token_response.get("token_type", "bearer"),
- expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
- createdAt=get_utc_timestamp()
+ expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
+ createdAt=getUtcTimestamp()
)
@@ -373,8 +373,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
id: '{connection.id}',
status: 'connected',
type: 'msft',
- lastChecked: {get_utc_timestamp()},
- expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))}
+ lastChecked: {getUtcTimestamp()},
+ expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
}}
}}, '*');
// Wait for message to be sent before closing
@@ -467,11 +467,11 @@ async def logout(
# Log successful logout
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_user_access(
- user_id=str(currentUser.id),
- mandate_id=str(currentUser.mandateId),
+ audit_logger.logUserAccess(
+ userId=str(currentUser.id),
+ mandateId=str(currentUser.mandateId),
action="logout",
- success_info="microsoft_auth_logout"
+ successInfo="microsoft_auth_logout"
)
except Exception:
# Don't fail if audit logging fails
@@ -575,27 +575,27 @@ async def refresh_token(
from modules.security.tokenManager import TokenManager
token_manager = TokenManager()
- refreshed_token = token_manager.refresh_token(current_token)
- if refreshed_token:
+ refreshedToken = token_manager.refreshToken(current_token)
+ if refreshedToken:
# Save the new connection token (which will automatically replace old ones)
- appInterface.saveConnectionToken(refreshed_token)
+ appInterface.saveConnectionToken(refreshedToken)
# Update the connection's expiration time
- msft_connection.expiresAt = float(refreshed_token.expiresAt)
- msft_connection.lastChecked = get_utc_timestamp()
+ msft_connection.expiresAt = float(refreshedToken.expiresAt)
+ msft_connection.lastChecked = getUtcTimestamp()
msft_connection.status = ConnectionStatus.ACTIVE
# Save updated connection
appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.model_dump())
# Calculate time until expiration
- current_time = get_utc_timestamp()
- expires_in = int(refreshed_token.expiresAt - current_time)
+ current_time = getUtcTimestamp()
+ expiresIn = int(refreshedToken.expiresAt - current_time)
return {
"message": "Token refreshed successfully",
- "expires_at": refreshed_token.expiresAt,
- "expires_in_seconds": expires_in
+ "expires_at": refreshedToken.expiresAt,
+ "expires_in_seconds": expiresIn
}
else:
raise HTTPException(
diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py
index 17c6e73d..0e1b009f 100644
--- a/modules/routes/routeVoiceGoogle.py
+++ b/modules/routes/routeVoiceGoogle.py
@@ -18,26 +18,26 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
# Store active WebSocket connections
-active_connections: Dict[str, WebSocket] = {}
+activeConnections: Dict[str, WebSocket] = {}
class ConnectionManager:
def __init__(self):
- self.active_connections: List[WebSocket] = []
+ self.activeConnections: List[WebSocket] = []
- async def connect(self, websocket: WebSocket, connection_id: str):
+ async def connect(self, websocket: WebSocket, connectionId: str):
await websocket.accept()
- self.active_connections.append(websocket)
- active_connections[connection_id] = websocket
- logger.info(f"WebSocket connected: {connection_id}")
+ self.activeConnections.append(websocket)
+ activeConnections[connectionId] = websocket
+ logger.info(f"WebSocket connected: {connectionId}")
- def disconnect(self, websocket: WebSocket, connection_id: str):
- if websocket in self.active_connections:
- self.active_connections.remove(websocket)
- if connection_id in active_connections:
- del active_connections[connection_id]
- logger.info(f"WebSocket disconnected: {connection_id}")
+ def disconnect(self, websocket: WebSocket, connectionId: str):
+ if websocket in self.activeConnections:
+ self.activeConnections.remove(websocket)
+ if connectionId in activeConnections:
+ del activeConnections[connectionId]
+ logger.info(f"WebSocket disconnected: {connectionId}")
- async def send_personal_message(self, message: dict, websocket: WebSocket):
+ async def sendPersonalMessage(self, message: dict, websocket: WebSocket):
try:
await websocket.send_text(json.dumps(message))
except Exception as e:
@@ -45,10 +45,10 @@ class ConnectionManager:
manager = ConnectionManager()
-def get_voice_interface(current_user: User) -> VoiceObjects:
+def _getVoiceInterface(currentUser: User) -> VoiceObjects:
"""Get voice interface instance with user context."""
try:
- return getVoiceInterface(current_user)
+ return getVoiceInterface(currentUser)
except Exception as e:
logger.error(f"Failed to initialize voice interface: {e}")
raise HTTPException(
@@ -58,23 +58,23 @@ def get_voice_interface(current_user: User) -> VoiceObjects:
@router.post("/speech-to-text")
async def speech_to_text(
- audio_file: UploadFile = File(...),
+ audioFile: UploadFile = File(...),
language: str = Form("de-DE"),
- current_user: User = Depends(getCurrentUser)
+ currentUser: User = Depends(getCurrentUser)
):
"""Convert speech to text using Google Cloud Speech-to-Text API."""
try:
- logger.info(f"🎤 Speech-to-text request: {audio_file.filename}, language: {language}")
+ logger.info(f"🎤 Speech-to-text request: {audioFile.filename}, language: {language}")
# Read audio file
- audio_content = await audio_file.read()
- logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
+ audioContent = await audioFile.read()
+ logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
# Get voice interface
- voice_interface = get_voice_interface(current_user)
+ voiceInterface = _getVoiceInterface(currentUser)
# Validate audio format
- validation = voice_interface.validateAudioFormat(audio_content)
+ validation = voiceInterface.validateAudioFormat(audioContent)
if not validation["valid"]:
raise HTTPException(
@@ -83,8 +83,8 @@ async def speech_to_text(
)
# Perform speech recognition
- result = await voice_interface.speechToText(
- audioContent=audio_content,
+ result = await voiceInterface.speechToText(
+ audioContent=audioContent,
language=language
)
@@ -95,7 +95,7 @@ async def speech_to_text(
"confidence": result["confidence"],
"language": result["language"],
"audio_info": {
- "size": len(audio_content),
+ "size": len(audioContent),
"format": validation["format"],
"estimated_duration": validation.get("estimated_duration", 0)
}
@@ -118,13 +118,13 @@ async def speech_to_text(
@router.post("/translate")
async def translate_text(
text: str = Form(...),
- source_language: str = Form("de"),
- target_language: str = Form("en"),
- current_user: User = Depends(getCurrentUser)
+ sourceLanguage: str = Form("de"),
+ targetLanguage: str = Form("en"),
+ currentUser: User = Depends(getCurrentUser)
):
"""Translate text using Google Cloud Translation API."""
try:
- logger.info(f"🌐 Translation request: '{text}' ({source_language} -> {target_language})")
+ logger.info(f"🌐 Translation request: '{text}' ({sourceLanguage} -> {targetLanguage})")
if not text.strip():
raise HTTPException(
@@ -133,13 +133,13 @@ async def translate_text(
)
# Get voice interface
- voice_interface = get_voice_interface(current_user)
+ voiceInterface = _getVoiceInterface(currentUser)
# Perform translation
- result = await voice_interface.translateText(
+ result = await voiceInterface.translateText(
text=text,
- sourceLanguage=source_language,
- targetLanguage=target_language
+ sourceLanguage=sourceLanguage,
+ targetLanguage=targetLanguage
)
if result["success"]:
@@ -167,21 +167,21 @@ async def translate_text(
@router.post("/realtime-interpreter")
async def realtime_interpreter(
- audio_file: UploadFile = File(...),
- from_language: str = Form("de-DE"),
- to_language: str = Form("en-US"),
- connection_id: str = Form(None),
- current_user: User = Depends(getCurrentUser)
+ audioFile: UploadFile = File(...),
+ fromLanguage: str = Form("de-DE"),
+ toLanguage: str = Form("en-US"),
+ connectionId: str = Form(None),
+ currentUser: User = Depends(getCurrentUser)
):
"""Real-time interpreter: speech to translated text using Google Cloud APIs."""
try:
- logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}")
- logger.info(f" From: {from_language} -> To: {to_language}")
- logger.info(f" MIME type: {audio_file.content_type}")
+ logger.info(f"🔄 Real-time interpreter request: {audioFile.filename}")
+ logger.info(f" From: {fromLanguage} -> To: {toLanguage}")
+ logger.info(f" MIME type: {audioFile.content_type}")
# Read audio file
- audio_content = await audio_file.read()
- logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
+ audioContent = await audioFile.read()
+ logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
# Save audio file for debugging with correct extension
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
@@ -192,10 +192,10 @@ async def realtime_interpreter(
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
# Get voice interface
- voice_interface = get_voice_interface(current_user)
+ voiceInterface = _getVoiceInterface(currentUser)
# Validate audio format
- validation = voice_interface.validateAudioFormat(audio_content)
+ validation = voiceInterface.validateAudioFormat(audioContent)
if not validation["valid"]:
raise HTTPException(
@@ -204,10 +204,10 @@ async def realtime_interpreter(
)
# Perform complete pipeline: Speech-to-Text + Translation
- result = await voice_interface.speechToTranslatedText(
- audioContent=audio_content,
- fromLanguage=from_language,
- toLanguage=to_language
+ result = await voiceInterface.speechToTranslatedText(
+ audioContent=audioContent,
+ fromLanguage=fromLanguage,
+ toLanguage=toLanguage
)
if result["success"]:
@@ -223,7 +223,7 @@ async def realtime_interpreter(
"source_language": result["source_language"],
"target_language": result["target_language"],
"audio_info": {
- "size": len(audio_content),
+ "size": len(audioContent),
"format": validation["format"],
"estimated_duration": validation.get("estimated_duration", 0)
}
@@ -249,7 +249,7 @@ async def text_to_speech(
text: str = Form(...),
language: str = Form("de-DE"),
voice: str = Form(None),
- current_user: User = Depends(getCurrentUser)
+ currentUser: User = Depends(getCurrentUser)
):
"""Convert text to speech using Google Cloud Text-to-Speech."""
try:
@@ -261,8 +261,8 @@ async def text_to_speech(
detail="Empty text provided for text-to-speech"
)
- voice_interface = get_voice_interface(current_user)
- result = await voice_interface.textToSpeech(
+ voiceInterface = _getVoiceInterface(currentUser)
+ result = await voiceInterface.textToSpeech(
text=text,
languageCode=language,
voiceName=voice
@@ -294,13 +294,13 @@ async def text_to_speech(
)
@router.get("/languages")
-async def get_available_languages(current_user: User = Depends(getCurrentUser)):
+async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
"""Get available languages from Google Cloud Text-to-Speech."""
try:
logger.info("🌐 Getting available languages from Google Cloud TTS")
- voice_interface = get_voice_interface(current_user)
- result = await voice_interface.getAvailableLanguages()
+ voiceInterface = _getVoiceInterface(currentUser)
+ result = await voiceInterface.getAvailableLanguages()
if result["success"]:
return {
@@ -324,21 +324,21 @@ async def get_available_languages(current_user: User = Depends(getCurrentUser)):
@router.get("/voices")
async def get_available_voices(
- language_code: Optional[str] = None,
- current_user: User = Depends(getCurrentUser)
+ languageCode: Optional[str] = None,
+ currentUser: User = Depends(getCurrentUser)
):
"""Get available voices from Google Cloud Text-to-Speech."""
try:
- logger.info(f"🎤 Getting available voices, language filter: {language_code}")
+ logger.info(f"🎤 Getting available voices, language filter: {languageCode}")
- voice_interface = get_voice_interface(current_user)
- result = await voice_interface.getAvailableVoices(languageCode=language_code)
+ voiceInterface = _getVoiceInterface(currentUser)
+ result = await voiceInterface.getAvailableVoices(languageCode=languageCode)
if result["success"]:
return {
"success": True,
"voices": result["voices"],
- "language_filter": language_code
+ "language_filter": languageCode
}
else:
raise HTTPException(
@@ -356,11 +356,11 @@ async def get_available_voices(
)
@router.get("/health")
-async def health_check(current_user: User = Depends(getCurrentUser)):
+async def health_check(currentUser: User = Depends(getCurrentUser)):
"""Health check for Google Cloud voice services."""
try:
- voice_interface = get_voice_interface(current_user)
- test_result = await voice_interface.healthCheck()
+ voiceInterface = _getVoiceInterface(currentUser)
+ test_result = await voiceInterface.healthCheck()
return test_result
@@ -372,16 +372,16 @@ async def health_check(current_user: User = Depends(getCurrentUser)):
}
@router.get("/settings")
-async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
+async def get_voice_settings(currentUser: User = Depends(getCurrentUser)):
"""Get voice settings for the current user."""
try:
- logger.info(f"Getting voice settings for user: {current_user.id}")
+ logger.info(f"Getting voice settings for user: {currentUser.id}")
# Get voice interface
- voice_interface = get_voice_interface(current_user)
+ voiceInterface = _getVoiceInterface(currentUser)
# Get or create voice settings for the user
- voice_settings = voice_interface.getOrCreateVoiceSettings(current_user.id)
+ voice_settings = voiceInterface.getOrCreateVoiceSettings(currentUser.id)
if voice_settings:
# Return user settings
@@ -425,16 +425,16 @@ async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
@router.post("/settings")
async def save_voice_settings(
settings: Dict[str, Any] = Body(...),
- current_user: User = Depends(getCurrentUser)
+ currentUser: User = Depends(getCurrentUser)
):
"""Save voice settings for the current user."""
try:
- logger.info(f"Saving voice settings for user: {current_user.id}")
+ logger.info(f"Saving voice settings for user: {currentUser.id}")
logger.info(f"Settings: {settings}")
# Validate required settings
- required_fields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
- for field in required_fields:
+ requiredFields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
+ for field in requiredFields:
if field not in settings:
raise HTTPException(
status_code=400,
@@ -448,23 +448,23 @@ async def save_voice_settings(
settings["targetLanguage"] = "en-US"
# Get voice interface
- voice_interface = get_voice_interface(current_user)
+ voiceInterface = _getVoiceInterface(currentUser)
# Check if settings already exist for this user
- existing_settings = voice_interface.getVoiceSettings(current_user.id)
+ existing_settings = voiceInterface.getVoiceSettings(currentUser.id)
if existing_settings:
# Update existing settings
- logger.info(f"Updating existing voice settings for user {current_user.id}")
- updated_settings = voice_interface.updateVoiceSettings(current_user.id, settings)
- logger.info(f"Voice settings updated for user {current_user.id}: {updated_settings}")
+ logger.info(f"Updating existing voice settings for user {currentUser.id}")
+ updated_settings = voiceInterface.updateVoiceSettings(currentUser.id, settings)
+ logger.info(f"Voice settings updated for user {currentUser.id}: {updated_settings}")
else:
# Create new settings
- logger.info(f"Creating new voice settings for user {current_user.id}")
+ logger.info(f"Creating new voice settings for user {currentUser.id}")
# Add userId to settings
- settings["userId"] = current_user.id
- created_settings = voice_interface.createVoiceSettings(settings)
- logger.info(f"Voice settings created for user {current_user.id}: {created_settings}")
+ settings["userId"] = currentUser.id
+ created_settings = voiceInterface.createVoiceSettings(settings)
+ logger.info(f"Voice settings created for user {currentUser.id}: {created_settings}")
return {
"success": True,
@@ -486,25 +486,25 @@ async def save_voice_settings(
@router.websocket("/ws/realtime-interpreter")
async def websocket_realtime_interpreter(
websocket: WebSocket,
- user_id: str = "default",
- from_language: str = "de-DE",
- to_language: str = "en-US"
+ userId: str = "default",
+ fromLanguage: str = "de-DE",
+ toLanguage: str = "en-US"
):
"""WebSocket endpoint for real-time voice interpretation"""
- connection_id = f"realtime_{user_id}_{from_language}_{to_language}"
+ connectionId = f"realtime_{userId}_{fromLanguage}_{toLanguage}"
try:
- await manager.connect(websocket, connection_id)
+ await manager.connect(websocket, connectionId)
# Send connection confirmation
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "connected",
- "connection_id": connection_id,
+ "connection_id": connectionId,
"message": "Connected to real-time interpreter"
}, websocket)
# Initialize voice interface
- voice_interface = get_voice_interface(User(id=user_id))
+ voiceInterface = _getVoiceInterface(User(id=userId))
while True:
# Receive message from client
@@ -515,7 +515,7 @@ async def websocket_realtime_interpreter(
# Process audio chunk
try:
# Decode base64 audio data
- audio_data = base64.b64decode(message["data"])
+ audioData = base64.b64decode(message["data"])
# For now, just acknowledge receipt
# In a full implementation, this would:
@@ -524,9 +524,9 @@ async def websocket_realtime_interpreter(
# 3. Send partial results back
# 4. Handle translation
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "audio_received",
- "chunk_size": len(audio_data),
+ "chunk_size": len(audioData),
"timestamp": message.get("timestamp")
}, websocket)
@@ -539,7 +539,7 @@ async def websocket_realtime_interpreter(
elif message["type"] == "ping":
# Respond to ping
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "pong",
"timestamp": message.get("timestamp")
}, websocket)
@@ -548,32 +548,32 @@ async def websocket_realtime_interpreter(
logger.warning(f"Unknown message type: {message['type']}")
except WebSocketDisconnect:
- manager.disconnect(websocket, connection_id)
- logger.info(f"Client disconnected: {connection_id}")
+ manager.disconnect(websocket, connectionId)
+ logger.info(f"Client disconnected: {connectionId}")
except Exception as e:
logger.error(f"WebSocket error: {e}")
- manager.disconnect(websocket, connection_id)
+ manager.disconnect(websocket, connectionId)
@router.websocket("/ws/speech-to-text")
async def websocket_speech_to_text(
websocket: WebSocket,
- user_id: str = "default",
+ userId: str = "default",
language: str = "de-DE"
):
"""WebSocket endpoint for real-time speech-to-text"""
- connection_id = f"stt_{user_id}_{language}"
+ connectionId = f"stt_{userId}_{language}"
try:
- await manager.connect(websocket, connection_id)
+ await manager.connect(websocket, connectionId)
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "connected",
- "connection_id": connection_id,
+ "connection_id": connectionId,
"message": "Connected to speech-to-text"
}, websocket)
# Initialize voice interface
- voice_interface = get_voice_interface(User(id=user_id))
+ voiceInterface = _getVoiceInterface(User(id=userId))
while True:
data = await websocket.receive_text()
@@ -581,12 +581,12 @@ async def websocket_speech_to_text(
if message["type"] == "audio_chunk":
try:
- audio_data = base64.b64decode(message["data"])
+ audioData = base64.b64decode(message["data"])
# Process audio chunk
# This would integrate with Google Cloud Speech-to-Text streaming API
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "transcription_result",
"text": "Audio chunk received", # Placeholder
"confidence": 0.95,
@@ -595,39 +595,39 @@ async def websocket_speech_to_text(
except Exception as e:
logger.error(f"Error processing audio: {e}")
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "error",
"error": f"Failed to process audio: {str(e)}"
}, websocket)
elif message["type"] == "ping":
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "pong",
"timestamp": message.get("timestamp")
}, websocket)
except WebSocketDisconnect:
- manager.disconnect(websocket, connection_id)
+ manager.disconnect(websocket, connectionId)
except Exception as e:
logger.error(f"WebSocket error: {e}")
- manager.disconnect(websocket, connection_id)
+ manager.disconnect(websocket, connectionId)
@router.websocket("/ws/text-to-speech")
async def websocket_text_to_speech(
websocket: WebSocket,
- user_id: str = "default",
+ userId: str = "default",
language: str = "de-DE",
voice: str = "de-DE-Wavenet-A"
):
"""WebSocket endpoint for real-time text-to-speech"""
- connection_id = f"tts_{user_id}_{language}_{voice}"
+ connectionId = f"tts_{userId}_{language}_{voice}"
try:
- await manager.connect(websocket, connection_id)
+ await manager.connect(websocket, connectionId)
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "connected",
- "connection_id": connection_id,
+ "connection_id": connectionId,
"message": "Connected to text-to-speech"
}, websocket)
@@ -643,7 +643,7 @@ async def websocket_text_to_speech(
# This would integrate with Google Cloud Text-to-Speech API
# For now, send a placeholder response
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "audio_data",
"audio": "base64_encoded_audio_here", # Placeholder
"format": "mp3"
@@ -651,19 +651,19 @@ async def websocket_text_to_speech(
except Exception as e:
logger.error(f"Error processing text-to-speech: {e}")
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "error",
"error": f"Failed to process text: {str(e)}"
}, websocket)
elif message["type"] == "ping":
- await manager.send_personal_message({
+ await manager.sendPersonalMessage({
"type": "pong",
"timestamp": message.get("timestamp")
}, websocket)
except WebSocketDisconnect:
- manager.disconnect(websocket, connection_id)
+ manager.disconnect(websocket, connectionId)
except Exception as e:
logger.error(f"WebSocket error: {e}")
- manager.disconnect(websocket, connection_id)
+ manager.disconnect(websocket, connectionId)
diff --git a/modules/security/jwtService.py b/modules/security/jwtService.py
index 87e226c7..ab5a9392 100644
--- a/modules/security/jwtService.py
+++ b/modules/security/jwtService.py
@@ -9,7 +9,7 @@ from fastapi import Response
from jose import jwt
from modules.shared.configuration import APP_CONFIG
-from modules.shared.timezoneUtils import get_utc_now
+from modules.shared.timezoneUtils import getUtcNow
# Config
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
@@ -30,7 +30,7 @@ def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> T
import uuid
toEncode["jti"] = str(uuid.uuid4())
- expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
+ expire = getUtcNow() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire
@@ -44,7 +44,7 @@ def createRefreshToken(data: dict) -> Tuple[str, "datetime"]:
toEncode["jti"] = str(uuid.uuid4())
toEncode["type"] = "refresh"
- expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
+ expire = getUtcNow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire
diff --git a/modules/security/tokenManager.py b/modules/security/tokenManager.py
index 07c5c2a9..42c4a7cf 100644
--- a/modules/security/tokenManager.py
+++ b/modules/security/tokenManager.py
@@ -10,7 +10,7 @@ from typing import Optional, Dict, Any, Callable
from modules.datamodels.datamodelSecurity import Token
from modules.datamodels.datamodelUam import AuthAuthority
from modules.shared.configuration import APP_CONFIG
-from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp, createExpirationTimestamp
logger = logging.getLogger(__name__)
@@ -27,54 +27,54 @@ class TokenManager:
self.google_client_id = APP_CONFIG.get("Service_GOOGLE_CLIENT_ID")
self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
- def refresh_microsoft_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]:
+ def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
"""Refresh Microsoft OAuth token using refresh token"""
try:
- logger.debug(f"refresh_microsoft_token: Starting Microsoft token refresh for user {user_id}")
- logger.debug(f"refresh_microsoft_token: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}")
+ logger.debug(f"refreshMicrosoftToken: Starting Microsoft token refresh for user {userId}")
+ logger.debug(f"refreshMicrosoftToken: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}")
if not self.msft_client_id or not self.msft_client_secret:
logger.error("Microsoft OAuth configuration not found")
return None
# Microsoft token refresh endpoint
- token_url = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token"
- logger.debug(f"refresh_microsoft_token: Using token URL: {token_url}")
+ tokenUrl = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token"
+ logger.debug(f"refreshMicrosoftToken: Using token URL: {tokenUrl}")
# Prepare refresh request
data = {
"client_id": self.msft_client_id,
"client_secret": self.msft_client_secret,
"grant_type": "refresh_token",
- "refresh_token": refresh_token,
+ "refresh_token": refreshToken,
"scope": "Mail.ReadWrite Mail.Send Mail.ReadWrite.Shared User.Read"
}
- logger.debug(f"refresh_microsoft_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})")
+ logger.debug(f"refreshMicrosoftToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
# Make refresh request
with httpx.Client(timeout=30.0) as client:
- logger.debug(f"refresh_microsoft_token: Making HTTP request to Microsoft OAuth endpoint")
- response = client.post(token_url, data=data)
- logger.debug(f"refresh_microsoft_token: HTTP response status: {response.status_code}")
+ logger.debug(f"refreshMicrosoftToken: Making HTTP request to Microsoft OAuth endpoint")
+ response = client.post(tokenUrl, data=data)
+ logger.debug(f"refreshMicrosoftToken: HTTP response status: {response.status_code}")
if response.status_code == 200:
- token_data = response.json()
- logger.debug(f"refresh_microsoft_token: Token refresh successful, creating new token")
+ tokenData = response.json()
+ logger.debug(f"refreshMicrosoftToken: Token refresh successful, creating new token")
# Create new token
- new_token = Token(
- userId=user_id,
+ newToken = Token(
+ userId=userId,
authority=AuthAuthority.MSFT,
- connectionId=old_token.connectionId, # Preserve connection ID
- tokenAccess=token_data["access_token"],
- tokenRefresh=token_data.get("refresh_token", refresh_token), # Keep old refresh token if new one not provided
- tokenType=token_data.get("token_type", "bearer"),
- expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)),
- createdAt=get_utc_timestamp()
+ connectionId=oldToken.connectionId, # Preserve connection ID
+ tokenAccess=tokenData["access_token"],
+ tokenRefresh=tokenData.get("refresh_token", refreshToken), # Keep old refresh token if new one not provided
+ tokenType=tokenData.get("token_type", "bearer"),
+ expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
+ createdAt=getUtcTimestamp()
)
- logger.debug(f"refresh_microsoft_token: New token created with ID: {new_token.id}")
- return new_token
+ logger.debug(f"refreshMicrosoftToken: New token created with ID: {newToken.id}")
+ return newToken
else:
logger.error(f"Failed to refresh Microsoft token: {response.status_code} - {response.text}")
return None
@@ -83,70 +83,70 @@ class TokenManager:
logger.error(f"Error refreshing Microsoft token: {str(e)}")
return None
- def refresh_google_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]:
+ def refreshGoogleToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
"""Refresh Google OAuth token using refresh token"""
try:
- logger.debug(f"refresh_google_token: Starting Google token refresh for user {user_id}")
- logger.debug(f"refresh_google_token: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}")
+ logger.debug(f"refreshGoogleToken: Starting Google token refresh for user {userId}")
+ logger.debug(f"refreshGoogleToken: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}")
if not self.google_client_id or not self.google_client_secret:
logger.error("Google OAuth configuration not found")
return None
# Google token refresh endpoint
- token_url = "https://oauth2.googleapis.com/token"
- logger.debug(f"refresh_google_token: Using token URL: {token_url}")
+ tokenUrl = "https://oauth2.googleapis.com/token"
+ logger.debug(f"refreshGoogleToken: Using token URL: {tokenUrl}")
# Prepare refresh request
data = {
"client_id": self.google_client_id,
"client_secret": self.google_client_secret,
"grant_type": "refresh_token",
- "refresh_token": refresh_token
+ "refresh_token": refreshToken
}
- logger.debug(f"refresh_google_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})")
+ logger.debug(f"refreshGoogleToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
# Make refresh request
with httpx.Client(timeout=30.0) as client:
- logger.debug(f"refresh_google_token: Making HTTP request to Google OAuth endpoint")
- response = client.post(token_url, data=data)
- logger.debug(f"refresh_google_token: HTTP response status: {response.status_code}")
+ logger.debug(f"refreshGoogleToken: Making HTTP request to Google OAuth endpoint")
+ response = client.post(tokenUrl, data=data)
+ logger.debug(f"refreshGoogleToken: HTTP response status: {response.status_code}")
if response.status_code == 200:
- token_data = response.json()
- logger.debug(f"refresh_google_token: Token refresh successful, creating new token")
+ tokenData = response.json()
+ logger.debug(f"refreshGoogleToken: Token refresh successful, creating new token")
# Validate the response contains required fields
- if "access_token" not in token_data:
+ if "access_token" not in tokenData:
logger.error("Google token refresh response missing access_token")
return None
# Create new token
- new_token = Token(
- userId=user_id,
+ newToken = Token(
+ userId=userId,
authority=AuthAuthority.GOOGLE,
- connectionId=old_token.connectionId, # Preserve connection ID
- tokenAccess=token_data["access_token"],
- tokenRefresh=token_data.get("refresh_token", refresh_token), # Use new refresh token if provided
- tokenType=token_data.get("token_type", "bearer"),
- expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)),
- createdAt=get_utc_timestamp()
+ connectionId=oldToken.connectionId, # Preserve connection ID
+ tokenAccess=tokenData["access_token"],
+ tokenRefresh=tokenData.get("refresh_token", refreshToken), # Use new refresh token if provided
+ tokenType=tokenData.get("token_type", "bearer"),
+ expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
+ createdAt=getUtcTimestamp()
)
- logger.debug(f"refresh_google_token: New token created with ID: {new_token.id}")
- return new_token
+ logger.debug(f"refreshGoogleToken: New token created with ID: {newToken.id}")
+ return newToken
else:
- error_details = response.text
- logger.error(f"Failed to refresh Google token: {response.status_code} - {error_details}")
+ errorDetails = response.text
+ logger.error(f"Failed to refresh Google token: {response.status_code} - {errorDetails}")
# Handle specific error cases
if response.status_code == 400:
try:
- error_data = response.json()
- error_code = error_data.get("error")
- if error_code == "invalid_grant":
+ errorData = response.json()
+ errorCode = errorData.get("error")
+ if errorCode == "invalid_grant":
logger.warning("Google refresh token is invalid or expired - user needs to re-authenticate")
- elif error_code == "invalid_client":
+ elif errorCode == "invalid_client":
logger.error("Google OAuth client configuration is invalid")
except:
pass
@@ -157,55 +157,55 @@ class TokenManager:
logger.error(f"Error refreshing Google token: {str(e)}")
return None
- def refresh_token(self, old_token: Token) -> Optional[Token]:
+ def refreshToken(self, oldToken: Token) -> Optional[Token]:
"""Refresh an expired token using the appropriate OAuth service"""
try:
- logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}")
- logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}")
+ logger.debug(f"refreshToken: Starting refresh for token {oldToken.id}, authority: {oldToken.authority}")
+ logger.debug(f"refreshToken: Token details: userId={oldToken.userId}, connectionId={oldToken.connectionId}, hasRefreshToken={bool(oldToken.tokenRefresh)}")
# Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly
# Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed
try:
- now_ts = get_utc_timestamp()
- created_ts = float(old_token.createdAt) if old_token.createdAt is not None else 0.0
- seconds_since_last_refresh = now_ts - created_ts
- if seconds_since_last_refresh < 10 * 60:
+ nowTs = getUtcTimestamp()
+ createdTs = float(oldToken.createdAt) if oldToken.createdAt is not None else 0.0
+ secondsSinceLastRefresh = nowTs - createdTs
+ if secondsSinceLastRefresh < 10 * 60:
logger.info(
- f"refresh_token: Skipping refresh for connection {old_token.connectionId} due to cooldown. "
- f"Last refresh {int(seconds_since_last_refresh)}s ago (< 600s)."
+ f"refreshToken: Skipping refresh for connection {oldToken.connectionId} due to cooldown. "
+ f"Last refresh {int(secondsSinceLastRefresh)}s ago (< 600s)."
)
# Return the existing token to avoid caller errors while preventing provider rate limits
- return old_token
+ return oldToken
except Exception:
# If any issue reading timestamps, proceed with normal refresh to be safe
pass
- if not old_token.tokenRefresh:
- logger.warning(f"No refresh token available for {old_token.authority}")
+ if not oldToken.tokenRefresh:
+ logger.warning(f"No refresh token available for {oldToken.authority}")
return None
# Route to appropriate refresh method
- if old_token.authority == AuthAuthority.MSFT:
- logger.debug(f"refresh_token: Refreshing Microsoft token")
- return self.refresh_microsoft_token(old_token.tokenRefresh, old_token.userId, old_token)
- elif old_token.authority == AuthAuthority.GOOGLE:
- logger.debug(f"refresh_token: Refreshing Google token")
- return self.refresh_google_token(old_token.tokenRefresh, old_token.userId, old_token)
+ if oldToken.authority == AuthAuthority.MSFT:
+ logger.debug(f"refreshToken: Refreshing Microsoft token")
+ return self.refreshMicrosoftToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
+ elif oldToken.authority == AuthAuthority.GOOGLE:
+ logger.debug(f"refreshToken: Refreshing Google token")
+ return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
else:
- logger.warning(f"Unknown authority for token refresh: {old_token.authority}")
+ logger.warning(f"Unknown authority for token refresh: {oldToken.authority}")
return None
except Exception as e:
logger.error(f"Error refreshing token: {str(e)}")
return None
- def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
+ def ensureFreshToken(self, token: Token, *, secondsBeforeExpiry: int = 30 * 60, saveCallback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
"""Ensure a token is fresh; refresh if expiring within threshold.
Args:
token: Existing token to validate/refresh.
- seconds_before_expiry: Threshold window to proactively refresh.
- save_callback: Optional function to persist a refreshed token.
+ secondsBeforeExpiry: Threshold window to proactively refresh.
+ saveCallback: Optional function to persist a refreshed token.
Returns:
A fresh token (refreshed or original) or None if refresh failed.
@@ -214,31 +214,31 @@ class TokenManager:
if token is None:
return None
- now_ts = get_utc_timestamp()
- expires_at = token.expiresAt or 0
+ nowTs = getUtcTimestamp()
+ expiresAt = token.expiresAt or 0
# If token expires within the threshold, try to refresh
- if expires_at and expires_at < (now_ts + seconds_before_expiry):
+ if expiresAt and expiresAt < (nowTs + secondsBeforeExpiry):
logger.info(
- f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon "
- f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh."
+ f"ensureFreshToken: Token for connection {token.connectionId} expiring soon "
+ f"(in {max(0, expiresAt - nowTs)}s). Attempting proactive refresh."
)
- refreshed = self.refresh_token(token)
+ refreshed = self.refreshToken(token)
if refreshed:
- if save_callback is not None:
+ if saveCallback is not None:
try:
- save_callback(refreshed)
+ saveCallback(refreshed)
except Exception as e:
- logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}")
+ logger.warning(f"ensureFreshToken: Failed to persist refreshed token: {e}")
return refreshed
else:
- logger.warning("ensure_fresh_token: Token refresh failed")
+ logger.warning("ensureFreshToken: Token refresh failed")
return None
# Token is sufficiently fresh
return token
except Exception as e:
- logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}")
+ logger.error(f"ensureFreshToken: Error ensuring fresh token: {e}")
return None
# Convenience wrapper to fetch and ensure fresh token for a connection via interface layer
@@ -256,10 +256,10 @@ class TokenManager:
token = interfaceDbApp.getConnectionToken(connectionId)
if not token:
return None
- return self.ensure_fresh_token(
+ return self.ensureFreshToken(
token,
- seconds_before_expiry=secondsBeforeExpiry,
- save_callback=lambda t: interfaceDbApp.saveConnectionToken(t)
+ secondsBeforeExpiry=secondsBeforeExpiry,
+ saveCallback=lambda t: interfaceDbApp.saveConnectionToken(t)
)
except Exception as e:
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")
diff --git a/modules/security/tokenRefreshMiddleware.py b/modules/security/tokenRefreshMiddleware.py
index 2ced531c..b7131a40 100644
--- a/modules/security/tokenRefreshMiddleware.py
+++ b/modules/security/tokenRefreshMiddleware.py
@@ -11,7 +11,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
from typing import Callable
import asyncio
from modules.security.tokenRefreshService import token_refresh_service
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
@@ -135,7 +135,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
try:
# Perform proactive refresh in background
asyncio.create_task(self._proactive_refresh_tokens(user_id))
- self.last_check[user_id] = get_utc_timestamp()
+ self.last_check[user_id] = getUtcTimestamp()
except Exception as e:
logger.warning(f"Error scheduling proactive refresh: {str(e)}")
@@ -160,7 +160,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
Check if we should perform proactive refresh for this user
"""
try:
- current_time = get_utc_timestamp()
+ current_time = getUtcTimestamp()
last_check = self.last_check.get(user_id, 0)
# Check every 5 minutes
diff --git a/modules/security/tokenRefreshService.py b/modules/security/tokenRefreshService.py
index ff7ac720..24a99e3b 100644
--- a/modules/security/tokenRefreshService.py
+++ b/modules/security/tokenRefreshService.py
@@ -9,7 +9,7 @@ to ensure users don't experience token expiration issues.
import logging
from typing import Dict, Any
from modules.datamodels.datamodelUam import UserConnection, AuthAuthority
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
from modules.shared.auditLogger import audit_logger
logger = logging.getLogger(__name__)
@@ -24,7 +24,7 @@ class TokenRefreshService:
def _is_rate_limited(self, connection_id: str) -> bool:
"""Check if connection is rate limited for refresh attempts"""
- now = get_utc_timestamp()
+ now = getUtcTimestamp()
if connection_id not in self.rate_limit_map:
return False
@@ -39,7 +39,7 @@ class TokenRefreshService:
def _record_refresh_attempt(self, connection_id: str) -> None:
"""Record a refresh attempt for rate limiting"""
- now = get_utc_timestamp()
+ now = getUtcTimestamp()
if connection_id not in self.rate_limit_map:
self.rate_limit_map[connection_id] = []
self.rate_limit_map[connection_id].append(now)
@@ -60,14 +60,14 @@ class TokenRefreshService:
token_manager = TokenManager()
# Attempt to refresh the token
- refreshed_token = token_manager.refresh_token(current_token)
- if refreshed_token:
+ refreshedToken = token_manager.refreshToken(current_token)
+ if refreshedToken:
# Save the refreshed token
- interface.saveConnectionToken(refreshed_token)
+ interface.saveConnectionToken(refreshedToken)
# Update connection status
interface.db.recordModify(UserConnection, connection.id, {
- "lastChecked": get_utc_timestamp(),
+ "lastChecked": getUtcTimestamp(),
"expiresAt": refreshed_token.expiresAt
})
@@ -75,9 +75,9 @@ class TokenRefreshService:
# Log audit event
try:
- audit_logger.log_security_event(
- user_id=str(connection.userId),
- mandate_id="system",
+ audit_logger.logSecurityEvent(
+ userId=str(connection.userId),
+ mandateId="system",
action="token_refresh",
details=f"Google token refreshed for connection {connection.id}"
)
@@ -109,14 +109,14 @@ class TokenRefreshService:
token_manager = TokenManager()
# Attempt to refresh the token
- refreshed_token = token_manager.refresh_token(current_token)
- if refreshed_token:
+ refreshedToken = token_manager.refreshToken(current_token)
+ if refreshedToken:
# Save the refreshed token
- interface.saveConnectionToken(refreshed_token)
+ interface.saveConnectionToken(refreshedToken)
# Update connection status
interface.db.recordModify(UserConnection, connection.id, {
- "lastChecked": get_utc_timestamp(),
+ "lastChecked": getUtcTimestamp(),
"expiresAt": refreshed_token.expiresAt
})
@@ -124,9 +124,9 @@ class TokenRefreshService:
# Log audit event
try:
- audit_logger.log_security_event(
- user_id=str(connection.userId),
- mandate_id="system",
+ audit_logger.logSecurityEvent(
+ userId=str(connection.userId),
+ mandateId="system",
action="token_refresh",
details=f"Microsoft token refreshed for connection {connection.id}"
)
@@ -234,7 +234,7 @@ class TokenRefreshService:
refreshed_count = 0
failed_count = 0
rate_limited_count = 0
- current_time = get_utc_timestamp()
+ current_time = getUtcTimestamp()
five_minutes = 5 * 60 # 5 minutes in seconds
# Process each connection
diff --git a/modules/services/__init__.py b/modules/services/__init__.py
index 0f269e28..87b13207 100644
--- a/modules/services/__init__.py
+++ b/modules/services/__init__.py
@@ -11,18 +11,18 @@ class PublicService:
- Optional name_filter predicate for allow-list patterns
"""
- def __init__(self, target: Any, functions_only: bool = True, name_filter=None):
+ def __init__(self, target: Any, functionsOnly: bool = True, nameFilter=None):
self._target = target
- self._functions_only = functions_only
- self._name_filter = name_filter
+ self._functionsOnly = functionsOnly
+ self._nameFilter = nameFilter
def __getattr__(self, name: str):
if name.startswith('_'):
raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private")
- if self._name_filter and not self._name_filter(name):
+ if self._nameFilter and not self._nameFilter(name):
raise AttributeError(f"'{name}' not exposed by policy")
attr = getattr(self._target, name)
- if self._functions_only and not callable(attr):
+ if self._functionsOnly and not callable(attr):
raise AttributeError(f"'{name}' is not a function")
return attr
@@ -30,8 +30,8 @@ class PublicService:
names = [
n for n in dir(self._target)
if not n.startswith('_')
- and (not self._functions_only or callable(getattr(self._target, n, None)))
- and (self._name_filter(n) if self._name_filter else True)
+ and (not self._functionsOnly or callable(getattr(self._target, n, None)))
+ and (self._nameFilter(n) if self._nameFilter else True)
]
return sorted(names)
@@ -70,7 +70,7 @@ class Services:
self.sharepoint = PublicService(SharepointService(self))
from .serviceAi.mainServiceAi import AiService
- self.ai = PublicService(AiService(self))
+ self.ai = PublicService(AiService(self), functionsOnly=False)
from .serviceTicket.mainServiceTicket import TicketService
self.ticket = PublicService(TicketService(self))
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index b1326967..87b56ceb 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -1,30 +1,26 @@
+import json
import logging
-from typing import Dict, Any, List, Optional, Union
+import time
+from typing import Dict, Any, List, Optional, Tuple, Union
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
-from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
+from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.interfaces.interfaceAiObjects import AiObjects
-from modules.services.serviceAi.subCoreAi import SubCoreAi
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
-from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration
-from modules.services.serviceAi.subSharedAiUtils import sanitizePromptContent
-
+from modules.shared.jsonUtils import (
+ extractJsonString,
+ repairBrokenJson,
+ extractSectionsFromDocument,
+ buildContinuationContext
+)
logger = logging.getLogger(__name__)
+# Rebuild the model to resolve forward references
+AiCallRequest.model_rebuild()
+
class AiService:
- """Lightweight AI service orchestrator that delegates to specialized sub-modules.
-
- Manager delegates to specialized sub-modules:
- - SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls)
- - SubDocumentProcessing: Document chunking, processing, and merging logic
- - SubDocumentGeneration: Single-file and multi-file document generation
-
- The main service acts as a coordinator:
- 1. Manages lazy initialization of sub-modules
- 2. Delegates operations to appropriate sub-modules
- 3. Maintains the same public API for backward compatibility
- """
+ """AI service with core operations integrated."""
def __init__(self, serviceCenter=None) -> None:
"""Initialize AI service with service center access.
@@ -34,64 +30,638 @@ class AiService:
"""
self.services = serviceCenter
# Only depend on interfaces
- self.aiObjects = None # Will be initialized in create()
- self._extractionService = None # Lazy initialization
- self._coreAi = None # Lazy initialization
- self._documentProcessor = None # Lazy initialization
- self._documentGenerator = None # Lazy initialization
+ self.aiObjects = None # Will be initialized in create() or _ensureAiObjectsInitialized()
+ # Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready
+ self.extractionService = None
+ self.documentProcessor = None
- @property
- def extractionService(self):
- """Lazy initialization of extraction service."""
- if self._extractionService is None:
- logger.info("Lazy initializing ExtractionService...")
- self._extractionService = ExtractionService(self.services)
- return self._extractionService
-
- @property
- def coreAi(self):
- """Lazy initialization of core AI service."""
- if self._coreAi is None:
- if self.aiObjects is None:
- raise RuntimeError("AiService.aiObjects must be initialized before accessing coreAi. Use await AiService.create() or await service._ensureAiObjectsInitialized()")
- logger.info("Lazy initializing SubCoreAi...")
- self._coreAi = SubCoreAi(self.services, self.aiObjects)
- return self._coreAi
-
- @property
- def documentProcessor(self):
- """Lazy initialization of document processing service."""
- if self._documentProcessor is None:
- logger.info("Lazy initializing SubDocumentProcessing...")
- self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
- return self._documentProcessor
-
-
- @property
- def documentGenerator(self):
- """Lazy initialization of document generation service."""
- if self._documentGenerator is None:
- logger.info("Lazy initializing SubDocumentGeneration...")
- self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor)
- return self._documentGenerator
+ def _initializeSubmodules(self):
+ """Initialize all submodules after aiObjects is ready."""
+ if self.aiObjects is None:
+ raise RuntimeError("aiObjects must be initialized before initializing submodules")
+
+ if self.extractionService is None:
+ logger.info("Initializing ExtractionService...")
+ self.extractionService = ExtractionService(self.services)
+
+ if self.documentProcessor is None:
+ logger.info("Initializing SubDocumentProcessing...")
+ self.documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
async def _ensureAiObjectsInitialized(self):
- """Ensure aiObjects is initialized."""
+ """Ensure aiObjects is initialized and submodules are ready."""
if self.aiObjects is None:
logger.info("Lazy initializing AiObjects...")
self.aiObjects = await AiObjects.create()
logger.info("AiObjects initialization completed")
+ # Initialize submodules after aiObjects is ready
+ self._initializeSubmodules()
@classmethod
async def create(cls, serviceCenter=None) -> "AiService":
- """Create AiService instance with all connectors initialized."""
+ """Create AiService instance with all connectors and submodules initialized."""
logger.info("AiService.create() called")
instance = cls(serviceCenter)
logger.info("AiService created, about to call AiObjects.create()...")
instance.aiObjects = await AiObjects.create()
logger.info("AiObjects.create() completed")
+ # Initialize all submodules after aiObjects is ready
+ instance._initializeSubmodules()
+ logger.info("AiService submodules initialized")
return instance
+ # Helper methods
+
+ def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
+ """
+ Build full prompt by replacing placeholders with their content.
+ Uses the new {{KEY:placeholder}} format.
+
+ Args:
+ prompt: The base prompt template
+ placeholders: Dictionary of placeholder key-value pairs
+
+ Returns:
+ Prompt with placeholders replaced
+ """
+ if not placeholders:
+ return prompt
+
+ full_prompt = prompt
+ for placeholder, content in placeholders.items():
+ # Skip if content is None or empty
+ if content is None:
+ continue
+ # Replace {{KEY:placeholder}}
+ full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
+
+ return full_prompt
+
+ async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
+ """Analyze prompt to determine appropriate AiCallOptions parameters."""
+ try:
+ # Get dynamic enum values from Pydantic models
+ operationTypes = [e.value for e in OperationTypeEnum]
+ priorities = [e.value for e in PriorityEnum]
+ processingModes = [e.value for e in ProcessingModeEnum]
+
+ # Create analysis prompt for AI to determine operation type and parameters
+ analysisPrompt = f"""
+You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
+
+PROMPT TO ANALYZE:
+{self.services.utils.sanitizePromptContent(prompt, 'userinput')}
+
+Based on the prompt content, determine:
+1. operationType: Choose the most appropriate from: {', '.join(operationTypes)}
+2. priority: Choose from: {', '.join(priorities)}
+3. processingMode: Choose from: {', '.join(processingModes)}
+4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
+5. compressContext: true/false (true to summarize context, false to process fully)
+
+Respond with ONLY a JSON object in this exact format:
+{{
+ "operationType": "dataAnalyse",
+ "priority": "balanced",
+ "processingMode": "basic",
+ "compressPrompt": true,
+ "compressContext": true
+}}
+"""
+
+ # Use AI to analyze the prompt
+ request = AiCallRequest(
+ prompt=analysisPrompt,
+ options=AiCallOptions(
+ operationType=OperationTypeEnum.DATA_ANALYSE,
+ priority=PriorityEnum.SPEED,
+ processingMode=ProcessingModeEnum.BASIC,
+ compressPrompt=True,
+ compressContext=False
+ )
+ )
+
+ response = await self.aiObjects.call(request)
+
+ # Parse AI response
+ try:
+ jsonStart = response.content.find('{')
+ jsonEnd = response.content.rfind('}') + 1
+ if jsonStart != -1 and jsonEnd > jsonStart:
+ analysis = json.loads(response.content[jsonStart:jsonEnd])
+
+ # Map string values to enums
+ operationType = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
+ priority = PriorityEnum(analysis.get('priority', 'balanced'))
+ processingMode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
+
+ return AiCallOptions(
+ operationType=operationType,
+ priority=priority,
+ processingMode=processingMode,
+ compressPrompt=analysis.get('compressPrompt', True),
+ compressContext=analysis.get('compressContext', True)
+ )
+ except Exception as e:
+ logger.warning(f"Failed to parse AI analysis response: {e}")
+
+ except Exception as e:
+ logger.warning(f"Prompt analysis failed: {e}")
+
+ # Fallback to default options
+ return AiCallOptions(
+ operationType=OperationTypeEnum.DATA_ANALYSE,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.BASIC
+ )
+
+ async def _callAiWithLooping(
+ self,
+ prompt: str,
+ options: AiCallOptions,
+ debugPrefix: str = "ai_call",
+ promptBuilder: Optional[callable] = None,
+ promptArgs: Optional[Dict[str, Any]] = None,
+ operationId: Optional[str] = None
+ ) -> str:
+ """
+ Shared core function for AI calls with repair-based looping system.
+ Automatically repairs broken JSON and continues generation seamlessly.
+
+ Args:
+ prompt: The prompt to send to AI
+ options: AI call configuration options
+ debugPrefix: Prefix for debug file names
+ promptBuilder: Optional function to rebuild prompts for continuation
+ promptArgs: Optional arguments for prompt builder
+ operationId: Optional operation ID for progress tracking
+
+ Returns:
+ Complete AI response after all iterations
+ """
+ maxIterations = 50 # Prevent infinite loops
+ iteration = 0
+ allSections = [] # Accumulate all sections across iterations
+ lastRawResponse = None # Store last raw JSON response for continuation
+
+ while iteration < maxIterations:
+ iteration += 1
+
+ # Update progress for iteration start
+ if operationId:
+ if iteration == 1:
+ self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
+ else:
+ # For continuation iterations, show progress incrementally
+ baseProgress = 0.5 + (min(iteration - 1, maxIterations) / maxIterations * 0.4) # Progress from 0.5 to 0.9 over maxIterations iterations
+ self.services.workflow.progressLogUpdate(operationId, baseProgress, f"Continuing generation (iteration {iteration})")
+
+ # Build iteration prompt
+ if len(allSections) > 0 and promptBuilder and promptArgs:
+ # This is a continuation - build continuation context with raw JSON and rebuild prompt
+ continuationContext = buildContinuationContext(allSections, lastRawResponse)
+ if not lastRawResponse:
+ logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
+
+ # Rebuild prompt with continuation context using the provided prompt builder
+ iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
+ else:
+ # First iteration - use original prompt
+ iterationPrompt = prompt
+
+ # Make AI call
+ try:
+ if operationId and iteration == 1:
+ self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
+ request = AiCallRequest(
+ prompt=iterationPrompt,
+ context="",
+ options=options
+ )
+
+ # Write the ACTUAL prompt sent to AI
+ if iteration == 1:
+ self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
+ else:
+ self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
+
+ response = await self.aiObjects.call(request)
+ result = response.content
+
+ # Update progress after AI call
+ if operationId:
+ if iteration == 1:
+ self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
+ else:
+ progress = 0.6 + (min(iteration - 1, 10) * 0.03)
+ self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
+
+ # Write raw AI response to debug file
+ if iteration == 1:
+ self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
+ else:
+ self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
+
+ # Emit stats for this iteration
+ self.services.workflow.storeWorkflowStat(
+ self.services.currentWorkflow,
+ response,
+ f"ai.call.{debugPrefix}.iteration_{iteration}"
+ )
+
+ if not result or not result.strip():
+ logger.warning(f"Iteration {iteration}: Empty response, stopping")
+ break
+
+ # Store raw response for continuation (even if broken)
+ lastRawResponse = result
+
+ # Check for complete_response flag in raw response (before parsing)
+ import re
+ if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
+ pass # Flag detected, will stop in _shouldContinueGeneration
+
+ # Extract sections from response (handles both valid and broken JSON)
+ extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
+
+ # Update progress after parsing
+ if operationId:
+ if extractedSections:
+ self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
+
+ if not extractedSections:
+ # If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
+ if iteration > 1 and not wasJsonComplete:
+ logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
+ continue
+ # Otherwise, stop if no sections
+ logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
+ break
+
+ # Add new sections to accumulator
+ allSections.extend(extractedSections)
+
+ # Check if we should continue (completion detection)
+ if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
+ continue
+ else:
+ # Done - build final result
+ if operationId:
+ self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
+ break
+
+ except Exception as e:
+ logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
+ break
+
+ if iteration >= maxIterations:
+ logger.warning(f"AI call stopped after maximum iterations ({maxIterations})")
+
+ # Build final result from accumulated sections
+ final_result = self._buildFinalResultFromSections(allSections)
+
+ # Write final result to debug file
+ self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
+
+ return final_result
+
+ def _extractSectionsFromResponse(
+ self,
+ result: str,
+ iteration: int,
+ debugPrefix: str
+ ) -> Tuple[List[Dict[str, Any]], bool]:
+ """
+ Extract sections from AI response, handling both valid and broken JSON.
+ Uses repair mechanism for broken JSON.
+ Checks for "complete_response": true flag to determine completion.
+ Returns (sections, wasJsonComplete)
+ """
+ # First, try to parse as valid JSON
+ try:
+ extracted = extractJsonString(result)
+ parsed_result = json.loads(extracted)
+
+ # Check if AI marked response as complete
+ isComplete = parsed_result.get("complete_response", False) == True
+
+ # Extract sections from parsed JSON
+ sections = extractSectionsFromDocument(parsed_result)
+
+ # If AI marked as complete, always return as complete
+ if isComplete:
+ return sections, True
+
+ # If in continuation mode (iteration > 1), continuation responses are expected to be fragments
+ # A fragment with 0 extractable sections means JSON is incomplete - need another iteration
+ if len(sections) == 0 and iteration > 1:
+ return sections, False # Mark as incomplete so loop continues
+
+ # First iteration with 0 sections means empty response - stop
+ if len(sections) == 0:
+ return sections, True # Complete but empty
+
+ return sections, True # JSON was complete with sections
+
+ except json.JSONDecodeError as e:
+ # Broken JSON - try repair mechanism (normal in iterative generation)
+ self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
+
+ # Try to repair
+ repaired_json = repairBrokenJson(result)
+
+ if repaired_json:
+ # Extract sections from repaired JSON
+ sections = extractSectionsFromDocument(repaired_json)
+ return sections, False # JSON was broken but repaired
+ else:
+ # Repair failed - log error
+ logger.error(f"Iteration {iteration}: All repair strategies failed")
+ return [], False
+
+ except Exception as e:
+ logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
+ return [], False
+
+ def _shouldContinueGeneration(
+ self,
+ allSections: List[Dict[str, Any]],
+ iteration: int,
+ wasJsonComplete: bool,
+ rawResponse: str = None
+ ) -> bool:
+ """
+ Determine if generation should continue based on JSON completeness and complete_response flag.
+ Returns True if we should continue, False if done.
+ """
+ if len(allSections) == 0:
+ return True # No sections yet, continue
+
+ # Check for complete_response flag in raw response
+ if rawResponse:
+ import re
+ if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
+ return False
+
+ # If JSON was complete (and no complete_response flag), we're done
+ # If JSON was broken and repaired, continue to get more content
+ if wasJsonComplete:
+ return False
+ else:
+ return True
+
+ def _buildFinalResultFromSections(
+ self,
+ allSections: List[Dict[str, Any]]
+ ) -> str:
+ """
+ Build final JSON result from accumulated sections.
+ """
+ if not allSections:
+ return ""
+
+ # Build documents structure
+ # Assuming single document for now
+ documents = [{
+ "id": "doc_1",
+ "title": "Generated Document", # This should come from prompt
+ "filename": "document.json",
+ "sections": allSections
+ }]
+
+ result = {
+ "metadata": {
+ "split_strategy": "single_document",
+ "source_documents": [],
+ "extraction_method": "ai_generation"
+ },
+ "documents": documents
+ }
+
+ return json.dumps(result, indent=2)
+
+ # Public API Methods
+
+ # Planning AI Call
+ async def callAiPlanning(
+ self,
+ prompt: str,
+ placeholders: Optional[List[PromptPlaceholder]] = None
+ ) -> str:
+ """
+ Planning AI call for task planning, action planning, action selection, etc.
+ Always uses static parameters optimized for planning tasks.
+
+ Args:
+ prompt: The planning prompt
+ placeholders: Optional list of placeholder replacements
+
+ Returns:
+ Planning JSON response
+ """
+ await self._ensureAiObjectsInitialized()
+
+ # Planning calls always use static parameters
+ options = AiCallOptions(
+ operationType=OperationTypeEnum.PLAN,
+ priority=PriorityEnum.QUALITY,
+ processingMode=ProcessingModeEnum.DETAILED,
+ compressPrompt=False,
+ compressContext=False
+ )
+
+ # Build full prompt with placeholders
+ if placeholders:
+ placeholdersDict = {p.label: p.content for p in placeholders}
+ fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict)
+ else:
+ fullPrompt = prompt
+
+ # Root-cause fix: planning must return raw single-shot JSON, not section-based output
+ request = AiCallRequest(
+ prompt=fullPrompt,
+ context="",
+ options=options
+ )
+
+ # Debug: persist prompt/response for analysis
+ self.services.utils.writeDebugFile(fullPrompt, "plan_prompt")
+ response = await self.aiObjects.call(request)
+ result = response.content or ""
+ self.services.utils.writeDebugFile(result, "plan_response")
+ return result
+
+ # Document Generation AI Call
+ async def callAiDocuments(
+ self,
+ prompt: str,
+ documents: Optional[List[ChatDocument]] = None,
+ options: Optional[AiCallOptions] = None,
+ outputFormat: Optional[str] = None,
+ title: Optional[str] = None
+ ) -> Union[str, Dict[str, Any]]:
+ """
+ Document generation AI call for all non-planning calls.
+ Uses the current unified path with extraction and generation.
+
+ Args:
+ prompt: The main prompt for the AI call
+ documents: Optional list of documents to process
+ options: AI call configuration options
+ outputFormat: Optional output format for document generation
+ title: Optional title for generated documents
+
+ Returns:
+ AI response as string, or dict with documents if outputFormat is specified
+ """
+ await self._ensureAiObjectsInitialized()
+
+ # Create separate operationId for detailed progress tracking
+ workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
+ aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
+
+ # Start progress tracking for this operation
+ self.services.workflow.progressLogStart(
+ aiOperationId,
+ "AI call with documents",
+ "Document Generation",
+ f"Format: {outputFormat or 'text'}"
+ )
+
+ try:
+ # Ensure AI connectors are initialized before delegating to documentProcessor/generator
+ if hasattr(self.services, 'ai') and hasattr(self.services.ai, '_ensureAiObjectsInitialized'):
+ await self.services.ai._ensureAiObjectsInitialized()
+ if options is None or (hasattr(options, 'operationType') and options.operationType is None):
+ # Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
+ options = await self._analyzePromptAndCreateOptions(prompt)
+
+ # Route image-generation requests directly to image pipeline to avoid JSON loop
+ imgFormats = {"png", "jpg", "jpeg", "webp", "image", "base64"}
+ opType = getattr(options, "operationType", None)
+ fmt = (outputFormat or "").lower() if outputFormat else None
+ isImageRequest = (opType == OperationTypeEnum.IMAGE_GENERATE) or (fmt in imgFormats)
+ if isImageRequest:
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
+ imageResponse = await self.generateImage(prompt, options=options)
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.9, "Image generated")
+ self.services.workflow.progressLogFinish(aiOperationId, True)
+ return imageResponse
+
+ # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
+ # Compressing would truncate the template structure and confuse the AI
+ if outputFormat: # Document generation with structured output
+ if not options:
+ options = AiCallOptions()
+ options.compressPrompt = False # JSON templates must NOT be truncated
+ options.compressContext = False # Context also should not be compressed
+
+ # Handle document generation with specific output format using unified approach
+ if outputFormat:
+ # Use unified generation method for all document generation
+ if documents and len(documents) > 0:
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
+ extracted_content = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
+ else:
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
+ extracted_content = None
+
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
+ from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
+ # First call without continuation context
+ generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
+
+ # Prepare prompt builder arguments for continuation
+ promptArgs = {
+ "outputFormat": outputFormat,
+ "userPrompt": prompt,
+ "title": title,
+ "extracted_content": extracted_content
+ }
+
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
+ generated_json = await self._callAiWithLooping(
+ generation_prompt,
+ options,
+ "document_generation",
+ buildGenerationPrompt,
+ promptArgs,
+ aiOperationId
+ )
+
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
+ # Parse the generated JSON (extract fenced/embedded JSON first)
+ try:
+ extracted_json = self.services.utils.jsonExtractString(generated_json)
+ generated_data = json.loads(extracted_json)
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse generated JSON: {str(e)}")
+ logger.error(f"JSON content length: {len(generated_json)}")
+ logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
+ logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
+
+ # Write the problematic JSON to debug file
+ self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
+
+ self.services.workflow.progressLogFinish(aiOperationId, False)
+ return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
+
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
+ # Render to final format using the existing renderer
+ try:
+ from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+ generationService = GenerationService(self.services)
+ rendered_content, mime_type = await generationService.renderReport(
+ generated_data, outputFormat, title or "Generated Document", prompt, self
+ )
+
+ # Build result in the expected format
+ result = {
+ "success": True,
+ "content": generated_data,
+ "documents": [{
+ "documentName": f"generated.{outputFormat}",
+ "documentData": rendered_content,
+ "mimeType": mime_type,
+ "title": title or "Generated Document"
+ }],
+ "is_multi_file": False,
+ "format": outputFormat,
+ "title": title,
+ "split_strategy": "single",
+ "total_documents": 1,
+ "processed_documents": 1
+ }
+
+ # Log AI response for debugging
+ self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
+
+ self.services.workflow.progressLogFinish(aiOperationId, True)
+ return result
+
+ except Exception as e:
+ logger.error(f"Error rendering document: {str(e)}")
+ self.services.workflow.progressLogFinish(aiOperationId, False)
+ return {"success": False, "error": f"Rendering failed: {str(e)}"}
+
+ # Handle text calls (no output format specified)
+ self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
+ if documents:
+ # Use document processing for text calls with documents
+ result = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
+ else:
+ # Use shared core function for direct text calls
+ result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
+
+ self.services.workflow.progressLogFinish(aiOperationId, True)
+ return result
+
+ except Exception as e:
+ logger.error(f"Error in callAiDocuments: {str(e)}")
+ self.services.workflow.progressLogFinish(aiOperationId, False)
+ raise
+
# AI Image Analysis
async def readImage(
self,
@@ -102,7 +672,64 @@ class AiService:
) -> str:
"""Call AI for image analysis using interface.call() with contentParts."""
await self._ensureAiObjectsInitialized()
- return await self.coreAi.readImage(prompt, imageData, mimeType, options)
+
+ try:
+ # Check if imageData is valid
+ if not imageData:
+ error_msg = "No image data provided"
+ logger.error(f"Error in AI image analysis: {error_msg}")
+ return f"Error: {error_msg}"
+
+
+ # Always use IMAGE_ANALYSE operation type for image processing
+ if options is None:
+ options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
+ else:
+ # Override the operation type to ensure image analysis
+ options.operationType = OperationTypeEnum.IMAGE_ANALYSE
+
+ # Create content parts with image data
+ from modules.datamodels.datamodelExtraction import ContentPart
+ import base64
+
+ # ContentPart.data must be a string - convert bytes to base64 if needed
+ if isinstance(imageData, bytes):
+ imageDataStr = base64.b64encode(imageData).decode('utf-8')
+ else:
+ # Already a base64 string
+ imageDataStr = imageData
+
+ imagePart = ContentPart(
+ id="image_0",
+ parentId=None,
+ label="Image",
+ typeGroup="image",
+ mimeType=mimeType or "image/jpeg",
+ data=imageDataStr, # Must be a string (base64 encoded)
+ metadata={"imageAnalysis": True}
+ )
+
+ # Create request with content parts
+ request = AiCallRequest(
+ prompt=prompt,
+ context="",
+ options=options,
+ contentParts=[imagePart]
+ )
+
+ response = await self.aiObjects.call(request)
+ result = response.content
+
+ # Check if result is valid
+ if not result or (isinstance(result, str) and not result.strip()):
+ error_msg = f"No response from AI image analysis (result: {repr(result)})"
+ logger.error(f"Error in AI image analysis: {error_msg}")
+ return f"Error: {error_msg}"
+
+ return result
+ except Exception as e:
+ logger.error(f"Error in AI image analysis: {str(e)}")
+ return f"Error: {str(e)}"
# AI Image Generation
async def generateImage(
@@ -115,34 +742,19 @@ class AiService:
) -> Dict[str, Any]:
"""Generate an image using AI using interface.generateImage()."""
await self._ensureAiObjectsInitialized()
- return await self.coreAi.generateImage(prompt, size, quality, style, options)
-
-
- # Core AI Methods - Delegating to SubCoreAi
- async def callAiPlanning(
- self,
- prompt: str,
- placeholders: Optional[List[PromptPlaceholder]] = None
- ) -> str:
- """Planning AI call for task planning, action planning, action selection, etc."""
- await self._ensureAiObjectsInitialized()
- # Always use "json" for planning calls since they return JSON
- return await self.coreAi.callAiPlanning(prompt, placeholders)
-
- async def callAiDocuments(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]] = None,
- options: Optional[AiCallOptions] = None,
- outputFormat: Optional[str] = None,
- title: Optional[str] = None
- ) -> Union[str, Dict[str, Any]]:
- """Document generation AI call for all non-planning calls."""
- await self._ensureAiObjectsInitialized()
- return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title)
-
- def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
- """Sanitize prompt content to prevent injection attacks and ensure safe presentation."""
- return sanitizePromptContent(content, contentType)
-
+
+ try:
+ response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
+
+ # Emit stats for image generation
+ self.services.workflow.storeWorkflowStat(
+ self.services.currentWorkflow,
+ response,
+ f"ai.generate.image"
+ )
+
+ return response
+ except Exception as e:
+ logger.error(f"Error in AI image generation: {str(e)}")
+ return {"success": False, "error": str(e)}
diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py
deleted file mode 100644
index e35af0d0..00000000
--- a/modules/services/serviceAi/subCoreAi.py
+++ /dev/null
@@ -1,687 +0,0 @@
-import json
-import logging
-from typing import Dict, Any, List, Optional, Tuple, Union
-from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
-from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
-from modules.services.serviceAi.subSharedAiUtils import (
- buildPromptWithPlaceholders,
- extractTextFromContentParts,
- reduceText,
- determineCallType
-)
-from modules.shared.jsonUtils import (
- extractJsonString,
- repairBrokenJson,
- extractSectionsFromDocument,
- buildContinuationContext
-)
-
-logger = logging.getLogger(__name__)
-
-# Repair-based looping system - no longer needs LOOP_INSTRUCTION_TEXT
-# Sections are accumulated and repair mechanism handles broken JSON automatically
-
-# Rebuild the model to resolve forward references
-AiCallRequest.model_rebuild()
-
-
-class SubCoreAi:
- """Core AI operations including image analysis, text generation, and planning calls."""
-
- def __init__(self, services, aiObjects):
- """Initialize core AI operations.
-
- Args:
- services: Service center instance for accessing other services
- aiObjects: Initialized AiObjects instance
- """
- self.services = services
- self.aiObjects = aiObjects
-
- async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
- """Analyze prompt to determine appropriate AiCallOptions parameters."""
- try:
- # Get dynamic enum values from Pydantic models
- operation_types = [e.value for e in OperationTypeEnum]
- priorities = [e.value for e in PriorityEnum]
- processing_modes = [e.value for e in ProcessingModeEnum]
-
- # Create analysis prompt for AI to determine operation type and parameters
- analysisPrompt = f"""
-You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
-
-PROMPT TO ANALYZE:
-{self.services.ai.sanitizePromptContent(prompt, 'userinput')}
-
-Based on the prompt content, determine:
-1. operationType: Choose the most appropriate from: {', '.join(operation_types)}
-2. priority: Choose from: {', '.join(priorities)}
-3. processingMode: Choose from: {', '.join(processing_modes)}
-4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
-5. compressContext: true/false (true to summarize context, false to process fully)
-
-Respond with ONLY a JSON object in this exact format:
-{{
- "operationType": "dataAnalyse",
- "priority": "balanced",
- "processingMode": "basic",
- "compressPrompt": true,
- "compressContext": true
-}}
-"""
-
- # Use AI to analyze the prompt
- request = AiCallRequest(
- prompt=analysisPrompt,
- options=AiCallOptions(
- operationType=OperationTypeEnum.DATA_ANALYSE,
- priority=PriorityEnum.SPEED,
- processingMode=ProcessingModeEnum.BASIC,
- compressPrompt=True,
- compressContext=False
- )
- )
-
- response = await self.aiObjects.call(request)
-
- # Parse AI response
- try:
- import json
- json_start = response.content.find('{')
- json_end = response.content.rfind('}') + 1
- if json_start != -1 and json_end > json_start:
- analysis = json.loads(response.content[json_start:json_end])
-
- # Map string values to enums
- operation_type = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
- priority = PriorityEnum(analysis.get('priority', 'balanced'))
- processing_mode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
-
- return AiCallOptions(
- operationType=operation_type,
- priority=priority,
- processingMode=processing_mode,
- compressPrompt=analysis.get('compressPrompt', True),
- compressContext=analysis.get('compressContext', True)
- )
- except Exception as e:
- logger.warning(f"Failed to parse AI analysis response: {e}")
-
- except Exception as e:
- logger.warning(f"Prompt analysis failed: {e}")
-
- # Fallback to default options
- return AiCallOptions(
- operationType=OperationTypeEnum.DATA_ANALYSE,
- priority=PriorityEnum.BALANCED,
- processingMode=ProcessingModeEnum.BASIC
- )
-
-
-
- # Shared Core Function for AI Calls with Looping and Repair
- async def _callAiWithLooping(
- self,
- prompt: str,
- options: AiCallOptions,
- debugPrefix: str = "ai_call",
- promptBuilder: Optional[callable] = None,
- promptArgs: Optional[Dict[str, Any]] = None,
- operationId: Optional[str] = None
- ) -> str:
- """
- Shared core function for AI calls with repair-based looping system.
- Automatically repairs broken JSON and continues generation seamlessly.
-
- Args:
- prompt: The prompt to send to AI
- options: AI call configuration options
- debugPrefix: Prefix for debug file names
- promptBuilder: Optional function to rebuild prompts for continuation
- promptArgs: Optional arguments for prompt builder
- operationId: Optional operation ID for progress tracking
-
- Returns:
- Complete AI response after all iterations
- """
- max_iterations = 50 # Prevent infinite loops
- iteration = 0
- allSections = [] # Accumulate all sections across iterations
- lastRawResponse = None # Store last raw JSON response for continuation
-
- while iteration < max_iterations:
- iteration += 1
-
- # Update progress for iteration start
- if operationId:
- if iteration == 1:
- self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
- else:
- # For continuation iterations, show progress incrementally
- base_progress = 0.5 + (min(iteration - 1, max_iterations) / max_iterations * 0.4) # Progress from 0.5 to 0.9 over max_iterations iterations
- self.services.workflow.progressLogUpdate(operationId, base_progress, f"Continuing generation (iteration {iteration})")
-
- # Build iteration prompt
- if len(allSections) > 0 and promptBuilder and promptArgs:
- # This is a continuation - build continuation context with raw JSON and rebuild prompt
- continuationContext = buildContinuationContext(allSections, lastRawResponse)
- if not lastRawResponse:
- logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
-
- # Rebuild prompt with continuation context using the provided prompt builder
- iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
- else:
- # First iteration - use original prompt
- iterationPrompt = prompt
-
- # Make AI call
- try:
- if operationId and iteration == 1:
- self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
- from modules.datamodels.datamodelAi import AiCallRequest
- request = AiCallRequest(
- prompt=iterationPrompt,
- context="",
- options=options
- )
-
- # Write the ACTUAL prompt sent to AI
- if iteration == 1:
- self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
- else:
- self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
-
- response = await self.aiObjects.call(request)
- result = response.content
-
- # Update progress after AI call
- if operationId:
- if iteration == 1:
- self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
- else:
- progress = 0.6 + (min(iteration - 1, 10) * 0.03)
- self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
-
- # Write raw AI response to debug file
- if iteration == 1:
- self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
- else:
- self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
-
- # Emit stats for this iteration
- self.services.workflow.storeWorkflowStat(
- self.services.currentWorkflow,
- response,
- f"ai.call.{debugPrefix}.iteration_{iteration}"
- )
-
- if not result or not result.strip():
- logger.warning(f"Iteration {iteration}: Empty response, stopping")
- break
-
- # Store raw response for continuation (even if broken)
- lastRawResponse = result
-
- # Check for complete_response flag in raw response (before parsing)
- import re
- if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
- pass # Flag detected, will stop in _shouldContinueGeneration
-
- # Extract sections from response (handles both valid and broken JSON)
- extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
-
- # Update progress after parsing
- if operationId:
- if extractedSections:
- self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
-
- if not extractedSections:
- # If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
- if iteration > 1 and not wasJsonComplete:
- logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
- continue
- # Otherwise, stop if no sections
- logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
- break
-
- # Add new sections to accumulator
- allSections.extend(extractedSections)
-
- # Check if we should continue (completion detection)
- if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
- continue
- else:
- # Done - build final result
- if operationId:
- self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
- break
-
- except Exception as e:
- logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
- break
-
- if iteration >= max_iterations:
- logger.warning(f"AI call stopped after maximum iterations ({max_iterations})")
-
- # Build final result from accumulated sections
- final_result = self._buildFinalResultFromSections(allSections)
-
- # Write final result to debug file
- self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
-
- return final_result
-
- def _extractSectionsFromResponse(
- self,
- result: str,
- iteration: int,
- debugPrefix: str
- ) -> Tuple[List[Dict[str, Any]], bool]:
- """
- Extract sections from AI response, handling both valid and broken JSON.
- Uses repair mechanism for broken JSON.
- Checks for "complete_response": true flag to determine completion.
- Returns (sections, wasJsonComplete)
- """
- # First, try to parse as valid JSON
- try:
- extracted = extractJsonString(result)
- parsed_result = json.loads(extracted)
-
- # Check if AI marked response as complete
- isComplete = parsed_result.get("complete_response", False) == True
-
- # Extract sections from parsed JSON
- sections = extractSectionsFromDocument(parsed_result)
-
- # If AI marked as complete, always return as complete
- if isComplete:
- return sections, True
-
- # If in continuation mode (iteration > 1), continuation responses are expected to be fragments
- # A fragment with 0 extractable sections means JSON is incomplete - need another iteration
- if len(sections) == 0 and iteration > 1:
- return sections, False # Mark as incomplete so loop continues
-
- # First iteration with 0 sections means empty response - stop
- if len(sections) == 0:
- return sections, True # Complete but empty
-
- return sections, True # JSON was complete with sections
-
- except json.JSONDecodeError as e:
- # Broken JSON - try repair mechanism (normal in iterative generation)
- self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
-
- # Try to repair
- repaired_json = repairBrokenJson(result)
-
- if repaired_json:
- # Extract sections from repaired JSON
- sections = extractSectionsFromDocument(repaired_json)
- return sections, False # JSON was broken but repaired
- else:
- # Repair failed - log error
- logger.error(f"Iteration {iteration}: All repair strategies failed")
- return [], False
-
- except Exception as e:
- logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
- return [], False
-
- def _shouldContinueGeneration(
- self,
- allSections: List[Dict[str, Any]],
- iteration: int,
- wasJsonComplete: bool,
- rawResponse: str = None
- ) -> bool:
- """
- Determine if generation should continue based on JSON completeness and complete_response flag.
- Returns True if we should continue, False if done.
- """
- if len(allSections) == 0:
- return True # No sections yet, continue
-
- # Check for complete_response flag in raw response
- if rawResponse:
- import re
- if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
- return False
-
- # If JSON was complete (and no complete_response flag), we're done
- # If JSON was broken and repaired, continue to get more content
- if wasJsonComplete:
- return False
- else:
- return True
-
- def _buildFinalResultFromSections(
- self,
- allSections: List[Dict[str, Any]]
- ) -> str:
- """
- Build final JSON result from accumulated sections.
- """
- if not allSections:
- return ""
-
- # Build documents structure
- # Assuming single document for now
- documents = [{
- "id": "doc_1",
- "title": "Generated Document", # This should come from prompt
- "filename": "document.json",
- "sections": allSections
- }]
-
- result = {
- "metadata": {
- "split_strategy": "single_document",
- "source_documents": [],
- "extraction_method": "ai_generation"
- },
- "documents": documents
- }
-
- return json.dumps(result, indent=2)
-
- # Old _buildContinuationPrompt and _mergeJsonContent methods removed
- # Now handled by repair mechanism in jsonUtils.py and section accumulation
-
-
- # Planning AI Call
- async def callAiPlanning(
- self,
- prompt: str,
- placeholders: Optional[List[PromptPlaceholder]] = None
- ) -> str:
- """
- Planning AI call for task planning, action planning, action selection, etc.
- Always uses static parameters optimized for planning tasks.
-
- Args:
- prompt: The planning prompt
- placeholders: Optional list of placeholder replacements
-
- Returns:
- Planning JSON response
- """
- # Planning calls always use static parameters
- options = AiCallOptions(
- operationType=OperationTypeEnum.PLAN,
- priority=PriorityEnum.QUALITY,
- processingMode=ProcessingModeEnum.DETAILED,
- compressPrompt=False,
- compressContext=False
- )
-
- # Build full prompt with placeholders
- if placeholders:
- placeholders_dict = {p.label: p.content for p in placeholders}
- full_prompt = buildPromptWithPlaceholders(prompt, placeholders_dict)
- else:
- full_prompt = prompt
-
- # Use shared core function with planning-specific debug prefix
- return await self._callAiWithLooping(full_prompt, options, "plan")
-
- # Document Generation AI Call
- async def callAiDocuments(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]] = None,
- options: Optional[AiCallOptions] = None,
- outputFormat: Optional[str] = None,
- title: Optional[str] = None
- ) -> Union[str, Dict[str, Any]]:
- """
- Document generation AI call for all non-planning calls.
- Uses the current unified path with extraction and generation.
-
- Args:
- prompt: The main prompt for the AI call
- documents: Optional list of documents to process
- options: AI call configuration options
- outputFormat: Optional output format for document generation
- title: Optional title for generated documents
-
- Returns:
- AI response as string, or dict with documents if outputFormat is specified
- """
- # Create separate operationId for detailed progress tracking
- import time
- import uuid
- workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
- aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
-
- # Start progress tracking for this operation
- self.services.workflow.progressLogStart(
- aiOperationId,
- "AI call with documents",
- "Document Generation",
- f"Format: {outputFormat or 'text'}"
- )
-
- try:
- if options is None or (hasattr(options, 'operationType') and options.operationType is None):
- # Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
- self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
- options = await self._analyzePromptAndCreateOptions(prompt)
-
- # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
- # Compressing would truncate the template structure and confuse the AI
- if outputFormat: # Document generation with structured output
- if not options:
- options = AiCallOptions()
- options.compressPrompt = False # JSON templates must NOT be truncated
- options.compressContext = False # Context also should not be compressed
-
- # Handle document generation with specific output format using unified approach
- if outputFormat:
- # Use unified generation method for all document generation
- if documents and len(documents) > 0:
- self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
- extracted_content = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
- else:
- self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
- extracted_content = None
-
- self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
- from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
- # First call without continuation context
- generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
-
- # Prepare prompt builder arguments for continuation
- promptArgs = {
- "outputFormat": outputFormat,
- "userPrompt": prompt,
- "title": title,
- "extracted_content": extracted_content
- }
-
- self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
- generated_json = await self._callAiWithLooping(
- generation_prompt,
- options,
- "document_generation",
- buildGenerationPrompt,
- promptArgs,
- aiOperationId
- )
-
- self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
- # Parse the generated JSON (extract fenced/embedded JSON first)
- try:
- extracted_json = self.services.utils.jsonExtractString(generated_json)
- generated_data = json.loads(extracted_json)
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse generated JSON: {str(e)}")
- logger.error(f"JSON content length: {len(generated_json)}")
- logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
- logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
-
- # Write the problematic JSON to debug file
- self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
-
- self.services.workflow.progressLogFinish(aiOperationId, False)
- return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
-
- self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
- # Render to final format using the existing renderer
- try:
- from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
- generationService = GenerationService(self.services)
- rendered_content, mime_type = await generationService.renderReport(
- generated_data, outputFormat, title or "Generated Document", prompt, self
- )
-
- # Build result in the expected format
- result = {
- "success": True,
- "content": generated_data,
- "documents": [{
- "documentName": f"generated.{outputFormat}",
- "documentData": rendered_content,
- "mimeType": mime_type,
- "title": title or "Generated Document"
- }],
- "is_multi_file": False,
- "format": outputFormat,
- "title": title,
- "split_strategy": "single",
- "total_documents": 1,
- "processed_documents": 1
- }
-
- # Log AI response for debugging
- self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
-
- self.services.workflow.progressLogFinish(aiOperationId, True)
- return result
-
- except Exception as e:
- logger.error(f"Error rendering document: {str(e)}")
- self.services.workflow.progressLogFinish(aiOperationId, False)
- return {"success": False, "error": f"Rendering failed: {str(e)}"}
-
- # Handle text calls (no output format specified)
- self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
- if documents:
- # Use document processing for text calls with documents
- result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
- else:
- # Use shared core function for direct text calls
- result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
-
- self.services.workflow.progressLogFinish(aiOperationId, True)
- return result
-
- except Exception as e:
- logger.error(f"Error in callAiDocuments: {str(e)}")
- self.services.workflow.progressLogFinish(aiOperationId, False)
- raise
-
-
- # AI Image Analysis
- async def readImage(
- self,
- prompt: str,
- imageData: Union[str, bytes],
- mimeType: str = None,
- options: Optional[AiCallOptions] = None,
- ) -> str:
- """Call AI for image analysis using interface.call() with contentParts."""
- try:
- # Check if imageData is valid
- if not imageData:
- error_msg = "No image data provided"
- logger.error(f"Error in AI image analysis: {error_msg}")
- return f"Error: {error_msg}"
-
-
- # Always use IMAGE_ANALYSE operation type for image processing
- if options is None:
- options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
- else:
- # Override the operation type to ensure image analysis
- options.operationType = OperationTypeEnum.IMAGE_ANALYSE
-
- # Create content parts with image data
- from modules.datamodels.datamodelExtraction import ContentPart
- import base64
-
- # ContentPart.data must be a string - convert bytes to base64 if needed
- if isinstance(imageData, bytes):
- imageDataStr = base64.b64encode(imageData).decode('utf-8')
- else:
- # Already a base64 string
- imageDataStr = imageData
-
- imagePart = ContentPart(
- id="image_0",
- parentId=None,
- label="Image",
- typeGroup="image",
- mimeType=mimeType or "image/jpeg",
- data=imageDataStr, # Must be a string (base64 encoded)
- metadata={"imageAnalysis": True}
- )
-
- # Create request with content parts
- from modules.datamodels.datamodelAi import AiCallRequest
- request = AiCallRequest(
- prompt=prompt,
- context="",
- options=options,
- contentParts=[imagePart]
- )
-
- response = await self.aiObjects.call(request)
- result = response.content
-
- # Check if result is valid
- if not result or (isinstance(result, str) and not result.strip()):
- error_msg = f"No response from AI image analysis (result: {repr(result)})"
- logger.error(f"Error in AI image analysis: {error_msg}")
- return f"Error: {error_msg}"
-
- return result
- except Exception as e:
- logger.error(f"Error in AI image analysis: {str(e)}")
- return f"Error: {str(e)}"
-
- # AI Image Generation
- async def generateImage(
- self,
- prompt: str,
- size: str = "1024x1024",
- quality: str = "standard",
- style: str = "vivid",
- options: Optional[AiCallOptions] = None,
- ) -> Dict[str, Any]:
- """Generate an image using AI using interface.generateImage()."""
- try:
- response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
-
- # Emit stats for image generation
- self.services.workflow.storeWorkflowStat(
- self.services.currentWorkflow,
- response,
- f"ai.generate.image"
- )
-
- # Convert response to dict format for backward compatibility
- if hasattr(response, 'content'):
- return {
- "success": True,
- "content": response.content,
- "modelName": response.modelName,
- "priceUsd": response.priceUsd,
- "processingTime": response.processingTime
- }
- else:
- return response
- except Exception as e:
- logger.error(f"Error in AI image generation: {str(e)}")
- return {"success": False, "error": str(e)}
diff --git a/modules/services/serviceAi/subDocumentGeneration.py b/modules/services/serviceAi/subDocumentGeneration.py
deleted file mode 100644
index 351f68cc..00000000
--- a/modules/services/serviceAi/subDocumentGeneration.py
+++ /dev/null
@@ -1,500 +0,0 @@
-import re
-import json
-import logging
-import time
-from datetime import datetime, UTC
-from typing import Dict, Any, List, Optional
-from modules.datamodels.datamodelChat import ChatDocument
-from modules.datamodels.datamodelAi import AiCallOptions
-
-logger = logging.getLogger(__name__)
-
-
-class SubDocumentGeneration:
- """Document generation operations including single-file and multi-file generation."""
-
- def __init__(self, services, aiObjects, documentProcessor):
- """Initialize document generation service.
-
- Args:
- services: Service center instance for accessing other services
- aiObjects: Initialized AiObjects instance
- documentProcessor: Document processing service instance
- """
- self.services = services
- self.aiObjects = aiObjects
- self.documentProcessor = documentProcessor
-
- async def callAiWithDocumentGeneration(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]],
- options: AiCallOptions,
- outputFormat: str,
- title: Optional[str]
- ) -> Dict[str, Any]:
- """
- Unified document generation method that handles both single and multi-file cases.
- Always uses multi-file approach internally.
-
- Args:
- prompt: The main prompt for the AI call
- documents: Optional list of documents to process
- options: AI call configuration options
- outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx)
- title: Optional title for generated documents
-
- Returns:
- Dict with generated documents and metadata in unified structure
- """
- try:
- # 1. Get unified extraction prompt
- from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
- generationService = GenerationService(self.services)
-
- extractionPrompt = await generationService.getAdaptiveExtractionPrompt(
- outputFormat=outputFormat,
- userPrompt=prompt,
- title=title,
- aiService=self
- )
-
- # 2. Process with unified pipeline (always multi-file approach)
- aiResponse = await self._processDocumentsUnified(
- documents, extractionPrompt, options
- )
-
- # 3. Return unified result structure
- return await self._buildUnifiedResult(aiResponse, outputFormat, title)
-
- except Exception as e:
- logger.error(f"Error in unified document generation: {str(e)}")
- return self._buildErrorResult(str(e), outputFormat, title)
-
- async def _processDocumentsUnified(
- self,
- documents: Optional[List[ChatDocument]],
- extractionPrompt: str,
- options: AiCallOptions
- ) -> Dict[str, Any]:
- """
- Unified document processing that handles both single and multi-file cases.
- Always processes as multi-file structure internally.
- """
-
- # Init progress logger
- workflow = self.services.currentWorkflow
- operationId = f"docGenUnified_{workflow.id}_{int(time.time())}"
-
- try:
- # Start progress tracking
- self.services.workflow.progressLogStart(
- operationId,
- "Generate",
- "Unified Document Generation",
- f"Processing {len(documents) if documents else 0} documents"
- )
-
- # Update progress - generating extraction prompt
- self.services.workflow.progressLogUpdate(operationId, 0.1, "Generating prompt")
-
- # Write prompt to debug file
- self.services.utils.writeDebugFile(extractionPrompt, "extraction_prompt", documents)
-
- # Process with unified JSON pipeline using continuation logic
- aiResponse = await self.documentProcessor.processDocumentsWithContinuation(
- documents, extractionPrompt, options
- )
-
- # Update progress - AI processing completed
- self.services.workflow.progressLogUpdate(operationId, 0.6, "Processing done")
-
-
-
- # Write AI response to debug file
- response_json = json.dumps(aiResponse, indent=2, ensure_ascii=False) if isinstance(aiResponse, dict) else str(aiResponse)
- self.services.utils.writeDebugFile(response_json, "ai_response", documents)
-
- # Validate response structure
- if not self._validateUnifiedResponseStructure(aiResponse):
- raise Exception("AI response is not valid unified document structure")
-
- # Emit raw extracted data as a chat message attachment
- try:
- await self._postRawDataChatMessage(aiResponse, label="raw_extraction_unified")
- except Exception:
- logger.warning("Failed to emit raw extraction chat message (unified)")
-
- # Complete progress tracking
- self.services.workflow.progressLogFinish(operationId, True)
-
- return aiResponse
-
- except Exception as e:
- logger.error(f"Error in unified document processing: {str(e)}")
- self.services.workflow.progressLogFinish(operationId, False)
- raise
-
- def _validateUnifiedResponseStructure(self, response: Dict[str, Any]) -> bool:
- """
- Unified validation that checks for document structure.
- Handles both multi-file (documents array) and single-file (sections array) structures.
- """
- try:
- if not isinstance(response, dict):
- logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
- return False
-
- # Check for documents array (multi-file structure)
- hasDocuments = "documents" in response
- isDocumentsList = isinstance(response.get("documents"), list)
-
- # Check for sections array (single-file structure)
- hasSections = "sections" in response
- isSectionsList = isinstance(response.get("sections"), list)
-
- if hasDocuments and isDocumentsList:
- # Multi-file structure
- documents = response.get("documents", [])
- if not documents:
- logger.warning("Unified validation failed: documents array is empty")
- return False
-
- # Validate each document individually
- validDocuments = 0
- for i, doc in enumerate(documents):
- if self._validateDocumentStructure(doc, i):
- validDocuments += 1
- else:
- logger.warning(f"Document {i} failed validation, but continuing with others")
-
- # Process succeeds if at least one document is valid
- if validDocuments == 0:
- logger.error("Unified validation failed: no valid documents found")
- return False
-
- logger.info(f"Unified validation passed: {validDocuments}/{len(documents)} documents valid")
- return True
-
- elif hasSections and isSectionsList:
- # Single-file structure - convert to multi-file format
- logger.info("Converting single-file structure to multi-file format")
- sections = response.get("sections", [])
- if not sections:
- logger.warning("Unified validation failed: sections array is empty")
- return False
-
- # Convert to documents array format
- response["documents"] = [{
- "id": "document_1",
- "title": response.get("metadata", {}).get("title", "Generated Document"),
- "filename": "document_1",
- "sections": sections
- }]
-
- logger.info("Successfully converted single-file structure to multi-file format")
- return True
-
- else:
- # No valid structure found - fail with clear error details
- logger.error("Unified validation failed: No valid structure found")
- logger.error(f"Response type: {type(response)}")
- logger.error(f"Available keys: {list(response.keys()) if isinstance(response, dict) else 'Not a dict'}")
- logger.error(f"hasDocuments={hasDocuments}, isDocumentsList={isDocumentsList}")
- logger.error(f"hasSections={hasSections}, isSectionsList={isSectionsList}")
- logger.error(f"Full response: {response}")
- return False
-
- except Exception as e:
- logger.warning(f"Unified response validation failed with exception: {str(e)}")
- return False
-
- def _validateDocumentStructure(self, document: Dict[str, Any], documentIndex: int) -> bool:
- """
- Validate individual document structure.
- Returns True if document is valid, False otherwise.
- Does not fail the entire process if one document is invalid.
- """
- try:
- if not isinstance(document, dict):
- logger.error(f"Document {documentIndex} validation failed: not a dict, got {type(document)}")
- logger.error(f"Document {documentIndex} content: {document}")
- return False
-
- # Check for required fields
- hasTitle = "title" in document
- hasSections = "sections" in document
- isSectionsList = isinstance(document.get("sections"), list)
-
- logger.debug(f"Document {documentIndex} structure check:")
- logger.debug(f" - hasTitle: {hasTitle}")
- logger.debug(f" - hasSections: {hasSections}")
- logger.debug(f" - isSectionsList: {isSectionsList}")
- logger.debug(f" - available keys: {list(document.keys())}")
-
- if not (hasTitle and hasSections and isSectionsList):
- logger.error(f"Document {documentIndex} validation failed:")
- logger.error(f" - title present: {hasTitle}")
- logger.error(f" - sections present: {hasSections}")
- logger.error(f" - sections is list: {isSectionsList}")
- logger.error(f" - document content: {document}")
- return False
-
- sections = document.get("sections", [])
- if not sections:
- logger.error(f"Document {documentIndex} validation failed: sections array is empty")
- logger.error(f" - document content: {document}")
- return False
-
- logger.info(f"Document {documentIndex} validation passed")
- return True
-
- except Exception as e:
- logger.error(f"Document {documentIndex} validation failed with exception: {str(e)}")
- logger.error(f" - document content: {document}")
- return False
-
- async def _buildUnifiedResult(
- self,
- aiResponse: Dict[str, Any],
- outputFormat: str,
- title: str
- ) -> Dict[str, Any]:
- """
- Build unified result structure that always returns array-based format.
- Content is always a multi-document structure.
- """
- try:
- # Process all documents uniformly
- generatedDocuments = []
- documents = aiResponse.get("documents", [])
-
- for i, docData in enumerate(documents):
- try:
- processedDocument = await self._processDocument(
- docData, outputFormat, title, i
- )
- generatedDocuments.append(processedDocument)
- except Exception as e:
- logger.warning(f"Failed to process document {i}: {str(e)}, skipping")
- continue
-
- if not generatedDocuments:
- raise Exception("No documents could be processed successfully")
-
- # Build unified result
- result = {
- "success": True,
- "content": aiResponse, # Always multi-document structure
- "documents": generatedDocuments, # Always array
- "is_multi_file": len(generatedDocuments) > 1,
- "format": outputFormat,
- "title": title,
- "total_documents": len(generatedDocuments),
- "processed_documents": len(generatedDocuments)
- }
-
- return result
-
- except Exception as e:
- logger.error(f"Error building unified result: {str(e)}")
- return self._buildErrorResult(str(e), outputFormat, title)
-
- async def _processDocument(
- self,
- docData: Dict[str, Any],
- outputFormat: str,
- title: str,
- documentIndex: int
- ) -> Dict[str, Any]:
- """
- Process individual document with content enhancement and rendering.
- """
- try:
- # Get generation service
- from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
- generationService = GenerationService(self.services)
-
- # Use AI generation to enhance the extracted JSON before rendering
- enhancedContent = docData # Default to original
- if docData.get("sections"):
- try:
- # Get generation prompt directly
- from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
- generationPrompt = await buildGenerationPrompt(
- outputFormat=outputFormat,
- userPrompt=title,
- title=docData.get("title", title)
- )
-
- # Prepare the AI call
- from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
- requestOptions = AiCallOptions()
- requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
-
- # Create context with the extracted JSON content
- context = f"Extracted JSON content:\n{json.dumps(docData, indent=2)}"
-
- request = AiCallRequest(
- prompt=generationPrompt,
- context=context,
- options=requestOptions
- )
-
- # Write document generation prompt to debug file
- self.services.utils.writeDebugFile(generationPrompt, "document_generation_enhancement_prompt")
-
- # Call AI to enhance the content
- response = await self.aiObjects.call(request)
-
- # Write document generation response to debug file
- self.services.utils.writeDebugFile(response.content or '', "document_generation_enhancement_response")
-
- if response and response.content:
- # Parse the AI response as JSON
- try:
- result = response.content.strip()
-
- # Extract JSON from markdown if present
- jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
- if jsonMatch:
- result = jsonMatch.group(1).strip()
- elif result.startswith('```json'):
- result = re.sub(r'^```json\s*', '', result)
- result = re.sub(r'\s*```$', '', result)
- elif result.startswith('```'):
- result = re.sub(r'^```\s*', '', result)
- result = re.sub(r'\s*```$', '', result)
-
- # Try to parse JSON
- enhancedContent = json.loads(result)
- logger.info(f"AI enhanced JSON content successfully for document {documentIndex}")
-
- except json.JSONDecodeError as e:
- logger.warning(f"AI generation returned invalid JSON for document {documentIndex}: {str(e)}, using original content")
- enhancedContent = docData
- else:
- logger.warning(f"AI generation returned empty response for document {documentIndex}, using original content")
- enhancedContent = docData
-
- except Exception as e:
- logger.warning(f"AI generation failed for document {documentIndex}: {str(e)}, using original content")
- enhancedContent = docData
-
- # Render the enhanced JSON content
- renderedContent, mimeType = await generationService.renderReport(
- extractedContent=enhancedContent,
- outputFormat=outputFormat,
- title=docData.get("title", title),
- userPrompt=title,
- aiService=self
- )
-
- # Generate proper filename
- baseFilename = docData.get("filename", f"document_{documentIndex + 1}")
- if '.' in baseFilename:
- baseFilename = baseFilename.rsplit('.', 1)[0]
-
- # Add proper extension based on output format
- if outputFormat.lower() == "docx":
- filename = f"{baseFilename}.docx"
- elif outputFormat.lower() == "pdf":
- filename = f"{baseFilename}.pdf"
- elif outputFormat.lower() == "html":
- filename = f"{baseFilename}.html"
- else:
- filename = f"{baseFilename}.{outputFormat}"
-
- return {
- "documentName": filename,
- "documentData": renderedContent,
- "mimeType": mimeType,
- "title": docData.get("title", title),
- "documentIndex": documentIndex
- }
-
- except Exception as e:
- logger.error(f"Error processing document {documentIndex}: {str(e)}")
- raise
-
- def _buildErrorResult(self, errorMessage: str, outputFormat: str, title: str) -> Dict[str, Any]:
- """
- Build error result with unified structure.
- """
- return {
- "success": False,
- "error": errorMessage,
- "content": {},
- "documents": [],
- "is_multi_file": False,
- "format": outputFormat,
- "title": title,
- "split_strategy": "error",
- "total_documents": 0,
- "processed_documents": 0
- }
-
- async def _callAiJson(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]],
- options: AiCallOptions
- ) -> Dict[str, Any]:
- """
- Handle AI calls with document processing for JSON output.
- Returns structured JSON document instead of text.
- """
- # Process documents with JSON merging
- return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
-
-
- async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
- """
- Create a ChatMessage with the extracted raw JSON attached as a file so the user
- has access to the data even if downstream processing fails.
- """
- try:
- services = self.services
- workflow = services.currentWorkflow
-
- # Serialize payload
- ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
- content_text = json.dumps(payload, ensure_ascii=False, indent=2)
- content_bytes = content_text.encode('utf-8')
-
- # Store as file via component storage
- file_name = f"{label}_{ts}.json"
- file_item = services.interfaceDbComponent.createFile(
- name=file_name,
- mimeType="application/json",
- content=content_bytes
- )
- services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
-
- # Lookup file info for ChatDocument
- file_info = services.workflow.getFileInfo(file_item.id)
- doc = ChatDocument(
- messageId="", # set after message creation
- fileId=file_item.id,
- fileName=file_info.get("fileName", file_name) if file_info else file_name,
- fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
- mimeType=file_info.get("mimeType", "application/json") if file_info else "application/json"
- )
-
- # Create message referencing the file - include document in initial call
- messageData = {
- "workflowId": workflow.id,
- "role": "assistant",
- "message": "Raw extraction data saved",
- "status": "data",
- "sequenceNr": len(getattr(workflow, 'messages', []) or []) + 1,
- "publishedAt": services.utils.timestampGetUtc(),
- "documentsLabel": label,
- "documents": []
- }
-
- # Store message with document included from the start
- services.workflow.storeMessageWithDocuments(services.workflow.workflow, messageData, [doc])
- except Exception:
- # Non-fatal; ignore if storage or chat creation fails
- return
\ No newline at end of file
diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py
index a9d01a8a..2903c066 100644
--- a/modules/services/serviceAi/subDocumentProcessing.py
+++ b/modules/services/serviceAi/subDocumentProcessing.py
@@ -5,7 +5,16 @@ import time
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
-from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted, PartResult, ExtractionOptions, MergeStrategy
+from modules.datamodels.datamodelExtraction import ContentExtracted, PartResult, ExtractionOptions, MergeStrategy
+# Resolve forward refs for ExtractionOptions (OperationTypeEnum) at runtime without using unsupported args
+try:
+ # Import here to avoid circular import at module load time
+ from modules.datamodels.datamodelAi import OperationTypeEnum
+ # Provide parent namespace so Pydantic can resolve forward refs
+ ExtractionOptions.__pydantic_parent_namespace__ = {"OperationTypeEnum": OperationTypeEnum}
+ ExtractionOptions.model_rebuild()
+except Exception as _e:
+ logging.getLogger(__name__).warning(f"ExtractionOptions forward-ref rebuild skipped: {_e}")
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
logger = logging.getLogger(__name__)
@@ -121,373 +130,6 @@ class SubDocumentProcessing:
self.services.workflow.progressLogFinish(operationId, False)
raise
- async def processDocumentsPerChunkJson(
- self,
- documents: List[ChatDocument],
- prompt: str,
- options: Optional[AiCallOptions] = None
- ) -> Dict[str, Any]:
- """
- Process documents with model-aware chunking and merge results in JSON mode.
- Returns structured JSON document instead of text.
- """
- if not documents:
- return {"metadata": {"title": "Empty Document"}, "sections": []}
-
- # Build extraction options using Pydantic model
- mergeStrategy = MergeStrategy(
- useIntelligentMerging=True,
- prompt=prompt,
- groupBy="typeGroup",
- orderBy="id",
- mergeType="concatenate"
- )
-
- extractionOptions = ExtractionOptions(
- prompt=prompt,
- operationType=options.operationType if options else OperationTypeEnum.DATA_EXTRACT,
- processDocumentsIndividually=True,
- mergeStrategy=mergeStrategy
- )
-
- logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.prompt)} chars, operationType={extractionOptions.operationType}")
-
- try:
- # Extract content WITHOUT chunking
- extractionResult = self.extractionService.extractContent(documents, extractionOptions)
-
- if not isinstance(extractionResult, list):
- return {"metadata": {"title": "Error Document"}, "sections": []}
-
- # Process parts with model-aware chunking
- partResults = await self._processPartsWithMapping(extractionResult, prompt, options)
-
- # Convert to JSON format (simplified for now)
- mergedJsonDocument = self._convertPartResultsToJson(partResults, options)
-
- # Normalize merged JSON into a single canonical table (only if table content exists)
- try:
- from modules.services.serviceNormalization.mainServiceNormalization import NormalizationService
- normalizer = NormalizationService(self.services)
- inventory = normalizer.discoverStructures(mergedJsonDocument)
-
- # Check if any table content was discovered
- tableHeaders = inventory.get("tableHeaders", [])
- if not tableHeaders:
- logger.info("No table content found in merged JSON, skipping normalization and returning original structure")
- else:
- # Use workflow id as cache key
- cacheKey = self.services.currentWorkflow.id
- # Provide the extraction/merge prompt context when available to help mapping
- mergePrompt = prompt
- mapping = await normalizer.requestHeaderMapping(inventory, cacheKey, None, mergePrompt)
- canonical = normalizer.applyMapping(mergedJsonDocument, mapping)
- report = normalizer.validateCanonical(canonical)
- if report.get('success'):
- mergedJsonDocument = canonical
- else:
- raise ValueError('Normalization produced zero rows')
- except Exception as e:
- # Log normalization failure but don't re-raise - continue with original merged JSON
- logger.warning(f"Normalization failed (expected): {str(e)}")
- logger.debug(f"Normalization error type: {type(e).__name__}")
- # Continue with original merged JSON instead of re-raising
-
- # Save merged JSON extraction content to debug
- jsonStr = json.dumps(mergedJsonDocument, ensure_ascii=False, indent=2)
- self.services.utils.writeDebugFile(jsonStr, "extraction_merged_json")
-
- return mergedJsonDocument
-
- except Exception as e:
- logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}")
- logger.error(f"Exception type: {type(e).__name__}")
- logger.error(f"Exception args: {e.args}")
- import traceback
- logger.error(f"Traceback: {traceback.format_exc()}")
- return {"metadata": {"title": "Error Document"}, "sections": []}
-
- async def processDocumentsPerChunkJsonWithPrompt(
- self,
- documents: List[ChatDocument],
- custom_prompt: str,
- options: Optional[AiCallOptions] = None
- ) -> Dict[str, Any]:
- """
- Process documents with per-chunk AI calls and merge results in JSON mode.
- Uses a custom prompt instead of the default extraction prompt.
- Enhanced with partial results continuation logic.
- """
- if not documents:
- return {"metadata": {"title": "Empty Document"}, "sections": []}
-
- # Build extraction options using Pydantic model (model-aware chunking in AI call phase)
- mergeStrategy = MergeStrategy(
- useIntelligentMerging=True,
- prompt=custom_prompt,
- groupBy="typeGroup",
- orderBy="id",
- mergeType="concatenate"
- )
-
- extractionOptions = ExtractionOptions(
- prompt=custom_prompt, # Use the custom prompt instead of default
- operationType=options.operationType if options else OperationTypeEnum.DATA_EXTRACT,
- processDocumentsIndividually=True, # Process each document separately
- imageMaxPixels=1024 * 1024,
- imageQuality=85,
- mergeStrategy=mergeStrategy
- )
-
- logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.prompt)} chars, operationType={extractionOptions.operationType}")
-
- try:
- # Extract content with chunking
- extractionResult = self.extractionService.extractContent(documents, extractionOptions)
-
- if not isinstance(extractionResult, list):
- return {"metadata": {"title": "Error Document"}, "sections": []}
-
- # Process chunks with proper mapping
- logger.info(f"Processing {len(extractionResult)} chunks with custom prompt")
- logger.debug(f"Custom prompt preview: {custom_prompt[:200]}...")
-
- # Debug: Show what content is being processed (before filtering)
- for i, ec in enumerate(extractionResult):
- if hasattr(ec, 'parts'):
- for j, part in enumerate(ec.parts):
- if not (hasattr(part, 'data') and part.data):
- # Check if this is an empty container chunk (which is expected)
- part_type = getattr(part, 'typeGroup', None)
- part_mime = getattr(part, 'mimeType', '')
-
- is_empty_container = (
- part_type == "container" and
- part_mime and
- 'document' in part_mime.lower()
- )
-
- if not is_empty_container:
- logger.warning(f"Part {j} has no data - typeGroup='{part_type}', mimeType='{part_mime}'")
-
- chunkResults = await self._processChunksWithMapping(extractionResult, custom_prompt, options, generate_json=True)
-
- # Debug: Show what chunks were actually processed (after filtering)
- logger.info(f"After filtering: {len(chunkResults)} chunks will be processed")
-
- # Merge with JSON mode
- mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
-
- # Debug: Show what the AI actually returned
- logger.info(f"AI returned document with keys: {list(mergedJsonDocument.keys())}")
- if 'documents' in mergedJsonDocument:
- logger.info(f"Number of documents: {len(mergedJsonDocument['documents'])}")
- elif 'sections' in mergedJsonDocument:
- logger.info(f"Number of sections: {len(mergedJsonDocument['sections'])}")
-
- return mergedJsonDocument
-
- except Exception as e:
- logger.error(f"Error in per-chunk JSON processing: {str(e)}")
- return {"metadata": {"title": "Error Document"}, "sections": []}
-
- async def processDocumentsWithContinuation(
- self,
- documents: List[ChatDocument],
- custom_prompt: str,
- options: Optional[AiCallOptions] = None
- ) -> Dict[str, Any]:
- """
- Process documents with partial results continuation logic.
- Handles AI responses that indicate partial completion and loops until complete.
- """
- if not documents:
- return {"metadata": {"title": "Empty Document"}, "sections": []}
-
- logger.info("Starting document processing with continuation logic")
-
- # Build enhanced prompt with continuation instructions
- enhanced_prompt = self._buildContinuationPrompt(custom_prompt)
-
- # Process with continuation logic
- return await self._processWithContinuationLoop(documents, enhanced_prompt, options)
-
- def _buildContinuationPrompt(self, base_prompt: str) -> str:
- """
- Build a prompt that includes partial results continuation instructions.
-
- NOTE: This uses a different continuation pattern than SubCoreAi:
- - This uses "continue": true/false + "continuation_context" for document sections
- - Kept separate because it's tightly coupled to document processing needs
- """
- continuation_instructions = """
-
-IMPORTANT CHUNKING LOGIC:
-- If the response is too large to generate completely in one response, set "continue": true
-- When "continue": true, include a "continuation_context" field with:
- - "last_section_id": "id of the last completed section"
- - "last_element_index": "index of the last completed element in that section"
- - "remaining_requirements": "brief description of what still needs to be generated"
-- The AI will be called again with this context to continue generation
-- Only set "continue": false when the response is completely generated
-
-OUTPUT FORMAT: Return only valid JSON in this exact structure:
-{
- "metadata": {
- "title": "Document Title"
- },
- "sections": [
- {
- "id": "section_1",
- "content_type": "paragraph",
- "elements": [
- {
- "text": "This is the actual content that should be generated."
- }
- ],
- "order": 1
- }
- ],
- "continue": false,
- "continuation_context": {
- "last_section_id": "section_1",
- "last_element_index": 0,
- "remaining_requirements": "description of what still needs to be generated"
- }
-}
-
-The AI should generate content using the canonical format with "sections" and "elements".
-"""
-
- return f"{base_prompt}{continuation_instructions}"
-
- async def _processWithContinuationLoop(
- self,
- documents: List[ChatDocument],
- enhanced_prompt: str,
- options: Optional[AiCallOptions] = None
- ) -> Dict[str, Any]:
- """
- Process documents with continuation loop until complete.
- """
- max_iterations = 10 # Prevent infinite loops
- iteration = 0
- accumulated_sections = []
- continuation_context = None
-
- while iteration < max_iterations:
- iteration += 1
- logger.info(f"Continuation iteration {iteration}/{max_iterations}")
-
- # Build prompt for this iteration
- if continuation_context:
- iteration_prompt = self._buildContinuationIterationPrompt(
- enhanced_prompt, continuation_context, accumulated_sections
- )
- else:
- iteration_prompt = enhanced_prompt
-
- # Process documents for this iteration
- try:
- # Use the existing processing method
- result = await self.processDocumentsPerChunkJsonWithPrompt(
- documents, iteration_prompt, options
- )
-
- # Check if this is a valid JSON response
- if not isinstance(result, dict):
- logger.warning(f"Iteration {iteration}: Invalid result type, stopping")
- break
-
- # Extract sections from result
- sections = result.get("sections", [])
- if not sections:
- logger.warning(f"Iteration {iteration}: No sections found, stopping")
- break
-
- # Add sections to accumulated results
- for section in sections:
- # Update section order to maintain sequence
- section["order"] = len(accumulated_sections) + 1
- accumulated_sections.append(section)
-
- # Check if continuation is needed
- continue_flag = result.get("continue", False)
- continuation_context = result.get("continuation_context")
-
- logger.info(f"Iteration {iteration}: Added {len(sections)} sections, continue={continue_flag}")
-
- if not continue_flag:
- logger.info(f"Continuation complete after {iteration} iterations")
- break
-
- if not continuation_context:
- logger.warning(f"Iteration {iteration}: continue=true but no continuation_context, stopping")
- break
-
- except Exception as e:
- logger.error(f"Iteration {iteration} failed: {str(e)}")
- break
-
- if iteration >= max_iterations:
- logger.warning(f"Continuation stopped after maximum iterations ({max_iterations})")
-
- # Build final result
- final_result = {
- "metadata": {
- "title": "Generated Document",
- "total_sections": len(accumulated_sections),
- "iterations": iteration,
- "continuation_used": iteration > 1
- },
- "sections": accumulated_sections,
- "continue": False
- }
-
- logger.info(f"Final result: {len(accumulated_sections)} sections from {iteration} iterations")
- return final_result
-
- def _buildContinuationIterationPrompt(
- self,
- base_prompt: str,
- continuation_context: Dict[str, Any],
- accumulated_sections: List[Dict[str, Any]]
- ) -> str:
- """
- Build a prompt for continuation iteration with context.
- """
- last_section_id = continuation_context.get("last_section_id", "")
- last_element_index = continuation_context.get("last_element_index", 0)
- remaining_requirements = continuation_context.get("remaining_requirements", "")
-
- # Build context of what's already been generated
- context_summary = "PREVIOUSLY GENERATED CONTENT:\n"
- for i, section in enumerate(accumulated_sections[-3:]): # Show last 3 sections for context
- context_summary += f"Section {i+1}: {section.get('id', 'unknown')}\n"
- if 'elements' in section and section['elements']:
- first_element = section['elements'][0]
- if 'text' in first_element:
- preview = first_element['text'][:100] + "..." if len(first_element['text']) > 100 else first_element['text']
- context_summary += f" Preview: {preview}\n"
-
- continuation_prompt = f"""
-{base_prompt}
-
-{context_summary}
-
-CONTINUATION INSTRUCTIONS:
-- Continue from where you left off
-- Last completed section: {last_section_id}
-- Last completed element index: {last_element_index}
-- Remaining requirements: {remaining_requirements}
-- Generate the next part of the content
-- Maintain consistency with previously generated content
-- Use the same JSON format as before
-- Set "continue": true if more content is needed, false if complete
-"""
-
- return continuation_prompt
-
async def callAiText(
self,
prompt: str,
@@ -513,8 +155,8 @@ CONTINUATION INSTRUCTIONS:
import asyncio
# Collect all parts that need processing
- parts_to_process = []
- part_index = 0
+ partsToProcess = []
+ partIndex = 0
for ec in extractionResult:
for part in ec.parts:
@@ -524,24 +166,24 @@ CONTINUATION INSTRUCTIONS:
logger.debug(f"Skipping empty container part: mimeType={part.mimeType}")
continue
- parts_to_process.append({
+ partsToProcess.append({
'part': part,
- 'part_index': part_index,
+ 'part_index': partIndex,
'document_id': ec.id
})
- part_index += 1
+ partIndex += 1
- logger.info(f"Processing {len(parts_to_process)} parts with model-aware chunking")
+ logger.info(f"Processing {len(partsToProcess)} parts with model-aware chunking")
- total_parts = len(parts_to_process)
+ totalParts = len(partsToProcess)
# Process parts in parallel
- processed_count = [0] # Use list to allow modification in nested function
+ processedCount = [0] # Use list to allow modification in nested function
- async def process_single_part(part_info: Dict) -> PartResult:
- part = part_info['part']
- part_index = part_info['part_index']
- document_id = part_info['document_id']
+ async def processSinglePart(partInfo: Dict) -> PartResult:
+ part = partInfo['part']
+ part_index = partInfo['part_index']
+ documentId = partInfo['document_id']
start_time = time.time()
@@ -556,10 +198,10 @@ CONTINUATION INSTRUCTIONS:
)
# Update progress before AI call
- if operationId and total_parts > 0:
- processed_count[0] += 1
- progress = 0.3 + (processed_count[0] / total_parts * 0.6) # Progress from 0.3 to 0.9
- self.services.workflow.progressLogUpdate(operationId, progress, f"Processing part {processed_count[0]}/{total_parts}")
+ if operationId and totalParts > 0:
+ processedCount[0] += 1
+ progress = 0.3 + (processedCount[0] / totalParts * 0.6) # Progress from 0.3 to 0.9
+ self.services.workflow.progressLogUpdate(operationId, progress, f"Processing part {processedCount[0]}/{totalParts}")
# Call AI with model-aware chunking
response = await self.aiObjects.call(request)
@@ -570,7 +212,7 @@ CONTINUATION INSTRUCTIONS:
originalPart=part,
aiResult=response.content,
partIndex=part_index,
- documentId=document_id,
+ documentId=documentId,
processingTime=processing_time,
metadata={
"success": True,
@@ -590,7 +232,7 @@ CONTINUATION INSTRUCTIONS:
originalPart=part,
aiResult=f"[Error processing part: {str(e)}]",
partIndex=part_index,
- documentId=document_id,
+ documentId=documentId,
processingTime=processing_time,
metadata={
"success": False,
@@ -601,25 +243,25 @@ CONTINUATION INSTRUCTIONS:
)
# Process parts with concurrency control
- max_concurrent = 5
+ maxConcurrent = 5
if options and hasattr(options, 'maxConcurrentParts'):
- max_concurrent = options.maxConcurrentParts
+ maxConcurrent = options.maxConcurrentParts
- semaphore = asyncio.Semaphore(max_concurrent)
+ semaphore = asyncio.Semaphore(maxConcurrent)
- async def process_with_semaphore(part_info):
+ async def processWithSemaphore(partInfo):
async with semaphore:
- return await process_single_part(part_info)
+ return await processSinglePart(partInfo)
- tasks = [process_with_semaphore(part_info) for part_info in parts_to_process]
- part_results = await asyncio.gather(*tasks, return_exceptions=True)
+ tasks = [processWithSemaphore(part_info) for part_info in partsToProcess]
+ partResults = await asyncio.gather(*tasks, return_exceptions=True)
# Handle exceptions
- processed_results = []
- for i, result in enumerate(part_results):
+ processedResults = []
+ for i, result in enumerate(partResults):
if isinstance(result, Exception):
- part_info = parts_to_process[i]
- processed_results.append(PartResult(
+ part_info = partsToProcess[i]
+ processedResults.append(PartResult(
originalPart=part_info['part'],
aiResult=f"[Error in parallel processing: {str(result)}]",
partIndex=part_info['part_index'],
@@ -628,412 +270,10 @@ CONTINUATION INSTRUCTIONS:
metadata={"success": False, "error": str(result)}
))
elif result is not None:
- processed_results.append(result)
+ processedResults.append(result)
- logger.info(f"Completed processing {len(processed_results)} parts")
- return processed_results
-
- async def _processChunksWithMapping(
- self,
- extractionResult: List[ContentExtracted],
- prompt: str,
- options: Optional[AiCallOptions] = None,
- generate_json: bool = False
- ) -> List[ChunkResult]:
- """Process chunks with proper mapping to preserve relationships."""
- from modules.datamodels.datamodelExtraction import ChunkResult
- import asyncio
-
- # Collect all chunks that need processing with proper indexing
- chunks_to_process = []
- chunk_index = 0
-
- for ec in extractionResult:
- # Get document MIME type from metadata
- document_mime_type = None
- for part in ec.parts:
- if part.metadata and 'documentMimeType' in part.metadata:
- document_mime_type = part.metadata['documentMimeType']
- break
-
- for part in ec.parts:
- if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
- # Skip empty container chunks (they're just metadata containers)
- if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
- logger.debug(f"Skipping empty container chunk: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}")
- continue
-
- chunks_to_process.append({
- 'part': part,
- 'chunk_index': chunk_index,
- 'document_id': ec.id,
- 'document_mime_type': document_mime_type
- })
- chunk_index += 1
-
- logger.info(f"Processing {len(chunks_to_process)} chunks with proper mapping")
-
- # Process chunks in parallel with proper mapping
- async def process_single_chunk(chunk_info: Dict) -> ChunkResult:
- part = chunk_info['part']
- chunk_index = chunk_info['chunk_index']
- document_id = chunk_info['document_id']
- document_mime_type = chunk_info.get('document_mime_type', part.mimeType)
-
- start_time = time.time()
-
- try:
- # FIXED: Check MIME type first, then fallback to typeGroup
- is_image = (
- (document_mime_type and document_mime_type.startswith('image/')) or
- (part.mimeType and part.mimeType.startswith('image/')) or
- (part.typeGroup == "image")
- )
-
- # Debug logging
- self.services.utils.debugLogToFile(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}", "AI_SERVICE")
- logger.info(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}")
-
- if is_image:
- # Use the same extraction prompt for image analysis (contains table JSON format)
- self.services.utils.debugLogToFile(f"Processing image chunk {chunk_index}: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
-
- # Check if image data is available
- if not part.data:
- error_msg = f"No image data available for chunk {chunk_index}"
- logger.warning(error_msg)
- ai_result = f"Error: {error_msg}"
- else:
- try:
- # Import here to avoid circular imports
- from modules.services.serviceAi.subCoreAi import SubCoreAi
- core_ai = SubCoreAi(self.services, self.aiObjects)
-
- ai_result = await core_ai.readImage(
- prompt=prompt,
- imageData=part.data,
- mimeType=part.mimeType,
- options=options
- )
-
- self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE")
- # Save image extraction response to debug file
- self.services.utils.writeDebugFile(ai_result or 'No response', f"extraction_image_chunk_{chunk_index}_response")
-
- # Check if result is empty or None
- if not ai_result or not ai_result.strip():
- logger.warning(f"Image chunk {chunk_index} returned empty response from AI")
- ai_result = "No content detected in image"
-
- except Exception as e:
- logger.error(f"Error processing image chunk {chunk_index}: {str(e)}")
- ai_result = f"Error analyzing image: {str(e)}"
-
- # If generating JSON, clean image analysis result
- if generate_json:
- try:
-
- # Clean the response - remove markdown code blocks if present
- cleaned_result = ai_result.strip()
-
- # Remove various markdown patterns
- if cleaned_result.startswith('```json'):
- cleaned_result = re.sub(r'^```json\s*', '', cleaned_result)
- cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
- elif cleaned_result.startswith('```'):
- cleaned_result = re.sub(r'^```\s*', '', cleaned_result)
- cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
-
- # Remove any leading/trailing text that's not JSON
- # Look for the first { and last } to extract JSON
- first_brace = cleaned_result.find('{')
- last_brace = cleaned_result.rfind('}')
-
- if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
- cleaned_result = cleaned_result[first_brace:last_brace + 1]
-
- # Additional cleaning for common AI response issues
- cleaned_result = cleaned_result.strip()
-
- # Validate JSON
- json.loads(cleaned_result)
- ai_result = cleaned_result # Use cleaned version
- self.services.utils.debugLogToFile(f"Image chunk {chunk_index} JSON validation successful", "AI_SERVICE")
-
- except json.JSONDecodeError as e:
- logger.warning(f"Image chunk {chunk_index} returned invalid JSON: {str(e)}")
- logger.warning(f"Raw response was: '{ai_result[:500]}...'")
-
- # Create fallback JSON with the actual response content (not the error message)
- # Use the original AI response content, not the error message
- fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected"
-
- self.services.utils.debugLogToFile(f"IMAGE FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE")
-
- ai_result = json.dumps({
- "metadata": {"title": f"Image Analysis - Chunk {chunk_index}"},
- "sections": [{
- "id": f"image_section_{chunk_index}",
- "content_type": "paragraph",
- "elements": [{"text": fallback_content}]
- }]
- })
- self.services.utils.debugLogToFile(f"Created fallback JSON for image chunk {chunk_index} with actual content", "AI_SERVICE")
- elif part.typeGroup in ("container", "binary"):
- # Handle ALL container and binary content generically - let AI process any document type
- self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: typeGroup={part.typeGroup}, mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
-
- # Skip empty container chunks (they're just metadata containers)
- if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
- self.services.utils.debugLogToFile(f"DEBUG: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
- logger.info(f"Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}")
- # Skip processing this chunk
- pass
- elif part.mimeType and part.data and len(part.data.strip()) > 0:
- # Process any document container as text content
- request_options = options if options is not None else AiCallOptions()
- request_options.operationType = OperationTypeEnum.DATA_EXTRACT
- self.services.utils.debugLogToFile(f"EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}", "AI_SERVICE")
- logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
-
- # Log extraction prompt and context
- self.services.utils.debugLogToFile(f"EXTRACTION PROMPT: {prompt}", "AI_SERVICE")
- self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE")
-
- # Strengthen prompt to forbid fabrication for text/container extraction
- augmented_prompt = (
- f"{prompt}\n\n"
- "CRITICAL RULES (NO FABRICATION):\n"
- "- Use ONLY content present in the provided CONTEXT.\n"
- "- Do NOT create, infer, or guess values not explicitly in the context.\n"
- "- If a value is missing, leave the cell empty or omit the row.\n"
- )
- request = AiCallRequest(
- prompt=augmented_prompt,
- context=part.data,
- options=request_options
- )
- response = await self.aiObjects.call(request)
- ai_result = response.content
-
- # Log extraction response
- self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
-
- # Save extraction prompt and response to debug
- self.services.utils.writeDebugFile(augmented_prompt, f"extraction_chunk_{chunk_index}_prompt")
- self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response")
-
- # If generating JSON, validate the response
- if generate_json:
- try:
-
- # Clean the response - remove markdown code blocks if present
- cleaned_result = ai_result.strip()
-
- # Remove various markdown patterns
- if cleaned_result.startswith('```json'):
- cleaned_result = re.sub(r'^```json\s*', '', cleaned_result)
- cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
- elif cleaned_result.startswith('```'):
- cleaned_result = re.sub(r'^```\s*', '', cleaned_result)
- cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
-
- # Remove any leading/trailing text that's not JSON
- # Look for the first { and last } to extract JSON
- first_brace = cleaned_result.find('{')
- last_brace = cleaned_result.rfind('}')
-
- if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
- cleaned_result = cleaned_result[first_brace:last_brace + 1]
-
- # Additional cleaning for common AI response issues
- cleaned_result = cleaned_result.strip()
-
- # Validate JSON
- json.loads(cleaned_result)
- ai_result = cleaned_result # Use cleaned version
-
- except json.JSONDecodeError as e:
- logger.warning(f"Container chunk {chunk_index} ({part.mimeType}) returned invalid JSON: {str(e)}")
- logger.warning(f"Raw response was: '{ai_result[:500]}...'")
-
- # Create fallback JSON with the actual response content (not the error message)
- # Use the original AI response content, not the error message
- fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected"
-
- self.services.utils.debugLogToFile(f"FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE")
-
- ai_result = json.dumps({
- "metadata": {"title": f"Document Analysis - Chunk {chunk_index}"},
- "sections": [{
- "id": f"analysis_section_{chunk_index}",
- "content_type": "paragraph",
- "elements": [{"text": fallback_content}]
- }]
- })
- self.services.utils.debugLogToFile(f"Created fallback JSON for container chunk {chunk_index} with actual content", "AI_SERVICE")
- else:
- # Skip empty or invalid container/binary content - don't create a result
- self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
- # Return None to indicate this chunk should be completely skipped
- return None
- else:
- # Ensure options is not None and set correct operation type for text
- request_options = options if options is not None else AiCallOptions()
- # FIXED: Set operation type to general for text processing
- request_options.operationType = OperationTypeEnum.DATA_EXTRACT
- self.services.utils.debugLogToFile(f"EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}", "AI_SERVICE")
- logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
-
- # Log extraction context length
- self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE")
-
- # Debug: Log the actual prompt being sent to AI
- logger.debug(f"AI PROMPT PREVIEW: {prompt[:300]}...")
- logger.debug(f"AI CONTEXT PREVIEW: {part.data[:200] if part.data else 'None'}...")
-
- # Strengthen prompt to forbid fabrication for text extraction
- augmented_prompt_text = (
- f"{prompt}\n\n"
- "CRITICAL RULES (NO FABRICATION):\n"
- "- Use ONLY content present in the provided CONTEXT.\n"
- "- Do NOT create, infer, or guess values not explicitly in the context.\n"
- "- If a value is missing, leave the cell empty or omit the row.\n"
- )
- request = AiCallRequest(
- prompt=augmented_prompt_text,
- context=part.data,
- options=request_options
- )
- response = await self.aiObjects.call(request)
-
- # Debug: Log what AI actually returned
- logger.debug(f"AI RESPONSE PREVIEW: {response.content[:300] if response.content else 'None'}...")
- ai_result = response.content
-
- # Log extraction response length
- self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
-
- # Save extraction prompt and response to debug
- self.services.utils.writeDebugFile(augmented_prompt_text, f"extraction_chunk_{chunk_index}_prompt")
- self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response")
-
- # If generating JSON, validate the response
- if generate_json:
- try:
-
- # Clean the response - remove markdown code blocks and extra formatting
- cleaned_result = ai_result.strip()
-
- # Remove any markdown code block markers (```json, ```, etc.)
- cleaned_result = re.sub(r'^```(?:json)?\s*', '', cleaned_result, flags=re.MULTILINE)
- cleaned_result = re.sub(r'\s*```\s*$', '', cleaned_result, flags=re.MULTILINE)
-
- # Remove any remaining ``` markers anywhere in the text
- cleaned_result = re.sub(r'```', '', cleaned_result)
-
- # Try to extract JSON from the response if it's embedded in other text
- json_match = re.search(r'\{.*\}', cleaned_result, re.DOTALL)
- if json_match:
- cleaned_result = json_match.group(0)
-
- # Validate JSON
- json.loads(cleaned_result)
- ai_result = cleaned_result # Use cleaned version
-
- except json.JSONDecodeError as e:
- logger.warning(f"Chunk {chunk_index} returned invalid JSON: {str(e)}")
- # Create fallback JSON
- ai_result = json.dumps({
- "metadata": {"title": "Error Section"},
- "sections": [{
- "id": f"error_section_{chunk_index}",
- "content_type": "paragraph",
- "elements": [{"text": f"Error parsing JSON: {str(e)}"}]
- }]
- })
-
- processing_time = time.time() - start_time
-
- logger.info(f"Chunk {chunk_index} processed: {len(ai_result)} chars in {processing_time:.2f}s")
-
- return ChunkResult(
- originalChunk=part,
- aiResult=ai_result,
- chunkIndex=chunk_index,
- documentId=document_id,
- processingTime=processing_time,
- metadata={
- "success": True,
- "chunkSize": len(part.data) if part.data else 0,
- "resultSize": len(ai_result),
- "typeGroup": part.typeGroup
- }
- )
-
- except Exception as e:
- processing_time = time.time() - start_time
- logger.warning(f"Error processing chunk {chunk_index}: {str(e)}")
-
- return ChunkResult(
- originalChunk=part,
- aiResult=f"[Error processing chunk: {str(e)}]",
- chunkIndex=chunk_index,
- documentId=document_id,
- processingTime=processing_time,
- metadata={
- "success": False,
- "error": str(e),
- "chunkSize": len(part.data) if part.data else 0,
- "typeGroup": part.typeGroup
- }
- )
-
- # Process chunks with concurrency control
- max_concurrent = 5 # Default concurrency
- if options and hasattr(options, 'maxConcurrentChunks'):
- max_concurrent = options.maxConcurrentChunks
- elif options and hasattr(options, 'maxParallelChunks'):
- max_concurrent = options.maxParallelChunks
-
- logger.info(f"Processing {len(chunks_to_process)} chunks with max concurrency: {max_concurrent}")
- self.services.utils.debugLogToFile(f"DEBUG: Chunks to process: {len(chunks_to_process)}", "AI_SERVICE")
- for i, chunk_info in enumerate(chunks_to_process):
- self.services.utils.debugLogToFile(f"DEBUG: Chunk {i}: typeGroup={chunk_info['part'].typeGroup}, mimeType={chunk_info['part'].mimeType}, data_length={len(chunk_info['part'].data) if chunk_info['part'].data else 0}", "AI_SERVICE")
-
- # Create semaphore for concurrency control
- semaphore = asyncio.Semaphore(max_concurrent)
-
- async def process_with_semaphore(chunk_info):
- async with semaphore:
- return await process_single_chunk(chunk_info)
-
- # Process all chunks in parallel with concurrency control
- tasks = [process_with_semaphore(chunk_info) for chunk_info in chunks_to_process]
- self.services.utils.debugLogToFile(f"DEBUG: Created {len(tasks)} tasks for parallel processing", "AI_SERVICE")
- chunk_results = await asyncio.gather(*tasks, return_exceptions=True)
- self.services.utils.debugLogToFile(f"DEBUG: Got {len(chunk_results)} results from parallel processing", "AI_SERVICE")
-
- # Handle any exceptions in the gather itself
- processed_results = []
- for i, result in enumerate(chunk_results):
- if isinstance(result, Exception):
- # Create error ChunkResult
- chunk_info = chunks_to_process[i]
- processed_results.append(ChunkResult(
- originalChunk=chunk_info['part'],
- aiResult=f"[Error in parallel processing: {str(result)}]",
- chunkIndex=chunk_info['chunk_index'],
- documentId=chunk_info['document_id'],
- processingTime=0.0,
- metadata={"success": False, "error": str(result)}
- ))
- elif result is not None:
- # Only add non-None results (skip empty containers)
- processed_results.append(result)
-
- logger.info(f"Completed processing {len(processed_results)} chunks")
- return processed_results
+ logger.info(f"Completed processing {len(processedResults)} parts")
+ return processedResults
def _mergePartResults(
self,
@@ -1221,253 +461,3 @@ CONTINUATION INSTRUCTIONS:
logger.info(f"Converted {len(partResults)} parts to JSON format using existing sophisticated merging system")
return merged_document
-
- def _mergeChunkResults(
- self,
- chunkResults: List[ChunkResult],
- options: Optional[AiCallOptions] = None
- ) -> str:
- """Merge chunk results using existing sophisticated merging system."""
- if not chunkResults:
- return ""
-
- # Convert ChunkResults back to ContentParts for existing merger system
- from modules.datamodels.datamodelExtraction import ContentPart
- content_parts = []
- for chunk_result in chunkResults:
- # Create ContentPart from ChunkResult with proper typeGroup
- content_part = ContentPart(
- id=chunk_result.originalChunk.id,
- parentId=chunk_result.originalChunk.parentId,
- label=chunk_result.originalChunk.label,
- typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
- mimeType=chunk_result.originalChunk.mimeType,
- data=chunk_result.aiResult, # Use AI result as data
- metadata={
- **chunk_result.originalChunk.metadata,
- "aiResult": True,
- "chunk": True,
- "chunkIndex": chunk_result.chunkIndex,
- "documentId": chunk_result.documentId,
- "processingTime": chunk_result.processingTime,
- "success": chunk_result.metadata.get("success", False)
- }
- )
- content_parts.append(content_part)
-
- # Use existing merging strategy from options
- merge_strategy = MergeStrategy(
- useIntelligentMerging=True,
- groupBy="documentId", # Group by document
- orderBy="chunkIndex", # Order by chunk index
- mergeType="concatenate"
- )
-
-
- # Apply existing merging logic using the sophisticated merging system
- from modules.services.serviceExtraction.subPipeline import _applyMerging
- merged_parts = _applyMerging(content_parts, merge_strategy)
-
- # Convert merged parts back to final string
- final_content = "\n\n".join([part.data for part in merged_parts])
-
- logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system")
- return final_content.strip()
-
- def _mergeChunkResultsClean(
- self,
- chunkResults: List[ChunkResult],
- options: Optional[AiCallOptions] = None
- ) -> str:
- """Merge chunk results in CLEAN mode using existing sophisticated merging system."""
- if not chunkResults:
- return ""
-
- # Convert ChunkResults back to ContentParts for existing merger system
- from modules.datamodels.datamodelExtraction import ContentPart
- content_parts = []
- for chunk_result in chunkResults:
- # Skip empty or error chunks in clean mode
- if not chunk_result.metadata.get("success", False):
- continue
- if not chunk_result.aiResult or not chunk_result.aiResult.strip():
- continue
- # Skip container/binary chunks in clean mode
- if chunk_result.aiResult.startswith("[Skipped ") and "content:" in chunk_result.aiResult:
- continue
-
- # Create ContentPart from ChunkResult with proper typeGroup
- content_part = ContentPart(
- id=chunk_result.originalChunk.id,
- parentId=chunk_result.originalChunk.parentId,
- label=chunk_result.originalChunk.label,
- typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
- mimeType=chunk_result.originalChunk.mimeType,
- data=chunk_result.aiResult, # Use AI result as data
- metadata={
- **chunk_result.originalChunk.metadata,
- "aiResult": True,
- "chunk": True,
- "chunkIndex": chunk_result.chunkIndex,
- "documentId": chunk_result.documentId,
- "processingTime": chunk_result.processingTime,
- "success": chunk_result.metadata.get("success", False)
- }
- )
- content_parts.append(content_part)
-
- # Use existing merging strategy for clean mode
- merge_strategy = MergeStrategy(
- useIntelligentMerging=True,
- groupBy="documentId", # Group by document
- orderBy="chunkIndex", # Order by chunk index
- mergeType="concatenate"
- )
-
-
- # Apply existing merging logic using the sophisticated merging system
- from modules.services.serviceExtraction.subPipeline import _applyMerging
- merged_parts = _applyMerging(content_parts, merge_strategy)
-
- # Convert merged parts back to final string
- final_content = "\n\n".join([part.data for part in merged_parts])
-
- logger.info(f"Merged {len(content_parts)} chunks in clean mode using existing sophisticated merging system")
- return final_content.strip()
-
- def _mergeChunkResultsJson(
- self,
- chunkResults: List[ChunkResult],
- options: Optional[AiCallOptions] = None
- ) -> Dict[str, Any]:
- """Merge chunk results in JSON mode using existing sophisticated merging system."""
- if not chunkResults:
- return {"metadata": {"title": "Empty Document"}, "sections": []}
-
- # Convert ChunkResults back to ContentParts for existing merger system
- from modules.datamodels.datamodelExtraction import ContentPart
- content_parts = []
- for chunk_result in chunkResults:
- # Create ContentPart from ChunkResult with proper typeGroup
- content_part = ContentPart(
- id=chunk_result.originalChunk.id,
- parentId=chunk_result.originalChunk.parentId,
- label=chunk_result.originalChunk.label,
- typeGroup=chunk_result.originalChunk.typeGroup, # Use original typeGroup
- mimeType=chunk_result.originalChunk.mimeType,
- data=chunk_result.aiResult, # Use AI result as data
- metadata={
- **chunk_result.originalChunk.metadata,
- "aiResult": True,
- "chunk": True,
- "chunkIndex": chunk_result.chunkIndex,
- "documentId": chunk_result.documentId,
- "processingTime": chunk_result.processingTime,
- "success": chunk_result.metadata.get("success", False)
- }
- )
- content_parts.append(content_part)
-
- # Use existing merging strategy for JSON mode
- merge_strategy = MergeStrategy(
- useIntelligentMerging=True,
- groupBy="documentId", # Group by document
- orderBy="chunkIndex", # Order by chunk index
- mergeType="concatenate"
- )
-
-
- # Apply existing merging logic using the sophisticated merging system
- from modules.services.serviceExtraction.subPipeline import _applyMerging
- merged_parts = _applyMerging(content_parts, merge_strategy)
-
- # Convert merged parts to JSON format
- all_sections = []
- document_titles = []
-
- for part in merged_parts:
- if part.metadata.get("success", False):
- try:
- # Parse JSON from AI result
- chunk_json = json.loads(part.data)
-
- # Check if this is a multi-file response (has "documents" key)
- if isinstance(chunk_json, dict) and "documents" in chunk_json:
- # This is a multi-file response - merge all documents
- logger.debug(f"Processing multi-file response from part {part.id} with {len(chunk_json['documents'])} documents")
-
- # Return multi-file response directly
- return {
- "metadata": chunk_json.get("metadata", {"title": "Merged Document"}),
- "documents": chunk_json["documents"]
- }
-
- # Extract sections from single-file response
- elif isinstance(chunk_json, dict) and "sections" in chunk_json:
- for section in chunk_json["sections"]:
- # Add part context to section
- section["metadata"] = section.get("metadata", {})
- section["metadata"]["source_part"] = part.id
- section["metadata"]["source_document"] = part.metadata.get("documentId", "unknown")
- section["metadata"]["chunk_index"] = part.metadata.get("chunkIndex", 0)
- all_sections.append(section)
-
- # Extract document title
- if isinstance(chunk_json, dict) and "metadata" in chunk_json:
- title = chunk_json["metadata"].get("title", "")
- if title and title not in document_titles:
- document_titles.append(title)
-
- except json.JSONDecodeError as e:
- logger.warning(f"Failed to parse JSON from part {part.id}: {str(e)}")
- # Create a fallback section for invalid JSON
- fallback_section = {
- "id": f"error_section_{part.id}",
- "title": "Error Section",
- "content_type": "paragraph",
- "elements": [{
- "text": f"Error parsing part {part.id}: {str(e)}"
- }],
- "order": part.metadata.get("chunkIndex", 0),
- "metadata": {
- "source_document": part.metadata.get("documentId", "unknown"),
- "part_id": part.id,
- "error": str(e)
- }
- }
- all_sections.append(fallback_section)
- else:
- # Handle error parts
- error_section = {
- "id": f"error_section_{part.id}",
- "title": "Error Section",
- "content_type": "paragraph",
- "elements": [{
- "text": f"Error in part {part.id}: {part.metadata.get('error', 'Unknown error')}"
- }],
- "order": part.metadata.get("chunkIndex", 0),
- "metadata": {
- "source_document": part.metadata.get("documentId", "unknown"),
- "part_id": part.id,
- "error": part.metadata.get('error', 'Unknown error')
- }
- }
- all_sections.append(error_section)
-
- # Sort sections by order
- all_sections.sort(key=lambda x: x.get("order", 0))
-
- # Create merged document with sections
- merged_document = {
- "metadata": {
- "title": document_titles[0] if document_titles else "Merged Document",
- "extraction_method": "ai_json_extraction_with_merging",
- "version": "2.0"
- },
- "sections": all_sections,
- "summary": f"Merged document using sophisticated merging system",
- "tags": ["merged", "ai_generated", "sophisticated_merging"]
- }
-
- logger.info(f"Merged {len(chunkResults)} chunks using existing sophisticated merging system (JSON mode)")
- return merged_document
diff --git a/modules/services/serviceAi/subSharedAiUtils.py b/modules/services/serviceAi/subSharedAiUtils.py
deleted file mode 100644
index 1dcf6c41..00000000
--- a/modules/services/serviceAi/subSharedAiUtils.py
+++ /dev/null
@@ -1,165 +0,0 @@
-"""
-Shared utilities for AI services to eliminate code duplication.
-
-This module contains common functions used across multiple AI service modules
-to maintain DRY principles and ensure consistency.
-"""
-
-import re
-import logging
-from typing import Dict, Any, List, Optional, Union
-
-logger = logging.getLogger(__name__)
-
-
-def buildPromptWithPlaceholders(prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
- """
- Build full prompt by replacing placeholders with their content.
- Uses the new {{KEY:placeholder}} format.
-
- Args:
- prompt: The base prompt template
- placeholders: Dictionary of placeholder key-value pairs
-
- Returns:
- Prompt with placeholders replaced
- """
- if not placeholders:
- return prompt
-
- full_prompt = prompt
- for placeholder, content in placeholders.items():
- # Skip if content is None or empty
- if content is None:
- continue
- # Replace {{KEY:placeholder}}
- full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
-
- return full_prompt
-
-
-def sanitizePromptContent(content: str, contentType: str = "text") -> str:
- """
- Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
-
- This is the single source of truth for all prompt sanitization across the system.
- Replaces all scattered sanitization functions with a unified approach.
-
- Args:
- content: The content to sanitize
- contentType: Type of content ("text", "userinput", "json", "document")
-
- Returns:
- Safely sanitized content ready for AI prompt insertion
- """
- if not content:
- return ""
-
- try:
- # Convert to string if not already
- content_str = str(content)
-
- # Remove null bytes and control characters (except newlines and tabs)
- sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
-
- # Handle different content types with appropriate sanitization
- if contentType == "userinput":
- # Extra security for user-controlled content
- # Escape curly braces to prevent placeholder injection
- sanitized = sanitized.replace('{', '{{').replace('}', '}}')
- # Escape quotes and wrap in single quotes
- sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
- return f"'{sanitized}'"
-
- elif contentType == "json":
- # For JSON content, escape quotes and backslashes
- sanitized = sanitized.replace('\\', '\\\\')
- sanitized = sanitized.replace('"', '\\"')
- sanitized = sanitized.replace('\n', '\\n')
- sanitized = sanitized.replace('\r', '\\r')
- sanitized = sanitized.replace('\t', '\\t')
-
- elif contentType == "document":
- # For document content, escape special characters
- sanitized = sanitized.replace('\\', '\\\\')
- sanitized = sanitized.replace('"', '\\"')
- sanitized = sanitized.replace("'", "\\'")
- sanitized = sanitized.replace('\n', '\\n')
- sanitized = sanitized.replace('\r', '\\r')
- sanitized = sanitized.replace('\t', '\\t')
-
- else: # contentType == "text" or default
- # Basic text sanitization
- sanitized = sanitized.replace('\\', '\\\\')
- sanitized = sanitized.replace('"', '\\"')
- sanitized = sanitized.replace("'", "\\'")
- sanitized = sanitized.replace('\n', '\\n')
- sanitized = sanitized.replace('\r', '\\r')
- sanitized = sanitized.replace('\t', '\\t')
-
- return sanitized
-
- except Exception as e:
- logger.error(f"Error sanitizing prompt content: {str(e)}")
- # Return a safe fallback
- return "[ERROR: Content could not be safely sanitized]"
-
-
-def extractTextFromContentParts(extracted_content) -> str:
- """
- Extract text content from ExtractionService ContentPart objects.
-
- Args:
- extracted_content: ContentExtracted object with parts
-
- Returns:
- Concatenated text content from all text/table/structure parts
- """
- if not extracted_content or not hasattr(extracted_content, 'parts'):
- return ""
-
- text_parts = []
- for part in extracted_content.parts:
- if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
- if hasattr(part, 'data') and part.data:
- text_parts.append(part.data)
-
- return "\n\n".join(text_parts)
-
-
-def reduceText(text: str, reduction_factor: float) -> str:
- """
- Reduce text size by the specified factor.
-
- Args:
- text: Text to reduce
- reduction_factor: Factor by which to reduce (0.0 to 1.0)
-
- Returns:
- Reduced text with truncation indicator
- """
- if reduction_factor >= 1.0:
- return text
-
- target_length = int(len(text) * reduction_factor)
- return text[:target_length] + "... [reduced]"
-
-
-def determineCallType(documents: Optional[List], operation_type: str) -> str:
- """
- Determine call type based on documents and operation type.
-
- Args:
- documents: List of ChatDocument objects
- operation_type: Type of operation being performed
-
- Returns:
- Call type: "plan" or "text"
- """
- has_documents = documents is not None and len(documents) > 0
- is_planning_operation = operation_type == "plan"
-
- if not has_documents and is_planning_operation:
- return "plan"
- else:
- return "text"
diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py
index 62931565..1e0c1d21 100644
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@@ -19,6 +19,16 @@ class ExtractionService:
self.services = services
self._extractorRegistry = ExtractorRegistry()
self._chunkerRegistry = ChunkerRegistry()
+ # Ensure AI connectors are discovered so pricing models are available
+ try:
+ # If internal model is missing, trigger discovery and registration
+ if modelRegistry.getModel("internal-extractor") is None:
+ discovered = modelRegistry.discoverConnectors()
+ for connector in discovered:
+ modelRegistry.registerConnector(connector)
+ except Exception:
+ # Propagate actual errors during use; init should be fast and side-effect free otherwise
+ pass
def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]:
"""
@@ -82,12 +92,12 @@ class ExtractionService:
p.metadata["documentMimeType"] = documentData["mimeType"]
# Log chunking information
- chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)]
- if chunked_parts:
+ chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
+ if chunkedParts:
logger.debug(f"=== CHUNKING RESULTS ===")
logger.debug(f"Total parts: {len(ec.parts)}")
- logger.debug(f"Chunked parts: {len(chunked_parts)}")
- for chunk in chunked_parts:
+ logger.debug(f"Chunked parts: {len(chunkedParts)}")
+ for chunk in chunkedParts:
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
else:
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
@@ -101,8 +111,11 @@ class ExtractionService:
# Emit stats for extraction operation
# Use internal extraction model for pricing
- modelName = "internal_extraction"
+ modelName = "internal-extractor"
model = modelRegistry.getModel(modelName)
+ # Hard fail if model is missing; caller must ensure connectors are registered
+ if model is None or model.calculatePriceUsd is None:
+ raise RuntimeError(f"Pricing model not available: {modelName}")
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
# Create AiCallResponse with real calculation
diff --git a/modules/services/serviceExtraction/subMerger.py b/modules/services/serviceExtraction/subMerger.py
index aa9bf1f5..9e8120d0 100644
--- a/modules/services/serviceExtraction/subMerger.py
+++ b/modules/services/serviceExtraction/subMerger.py
@@ -20,13 +20,13 @@ class IntelligentTokenAwareMerger:
4. Minimize total number of AI calls
"""
- def __init__(self, model_capabilities: Dict[str, Any]):
- self.max_tokens = model_capabilities.get("maxTokens", 4000)
- self.safety_margin = model_capabilities.get("safetyMargin", 0.1)
- self.effective_max_tokens = int(self.max_tokens * (1 - self.safety_margin))
- self.chars_per_token = model_capabilities.get("charsPerToken", 4) # Rough estimation
+ def __init__(self, modelCapabilities: Dict[str, Any]):
+ self.maxTokens = modelCapabilities.get("maxTokens", 4000)
+ self.safetyMargin = modelCapabilities.get("safetyMargin", 0.1)
+ self.effectiveMaxTokens = int(self.maxTokens * (1 - self.safetyMargin))
+ self.charsPerToken = modelCapabilities.get("charsPerToken", 4) # Rough estimation
- def merge_chunks_intelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
+ def mergeChunksIntelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
"""
Merge chunks intelligently based on token limits.
@@ -40,125 +40,125 @@ class IntelligentTokenAwareMerger:
if not chunks:
return chunks
- logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, max_tokens={self.effective_max_tokens}")
+ logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, maxTokens={self.effectiveMaxTokens}")
# Calculate tokens for prompt
- prompt_tokens = self._estimate_tokens(prompt)
- available_tokens = self.effective_max_tokens - prompt_tokens
+ promptTokens = self._estimateTokens(prompt)
+ availableTokens = self.effectiveMaxTokens - promptTokens
- logger.info(f"📊 Prompt tokens: {prompt_tokens}, Available for content: {available_tokens}")
+ logger.info(f"📊 Prompt tokens: {promptTokens}, Available for content: {availableTokens}")
# Group chunks by document and type for semantic coherence
- grouped_chunks = self._group_chunks_by_document_and_type(chunks)
+ groupedChunks = self._groupChunksByDocumentAndType(chunks)
- merged_parts = []
+ mergedParts = []
- for group_key, group_chunks in grouped_chunks.items():
- logger.info(f"📁 Processing group: {group_key} ({len(group_chunks)} chunks)")
+ for groupKey, groupChunks in groupedChunks.items():
+ logger.info(f"📁 Processing group: {groupKey} ({len(groupChunks)} chunks)")
# Merge chunks within this group optimally
- group_merged = self._merge_group_optimally(group_chunks, available_tokens)
- merged_parts.extend(group_merged)
+ groupMerged = self._mergeGroupOptimally(groupChunks, availableTokens)
+ mergedParts.extend(groupMerged)
- logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(merged_parts)} parts")
- return merged_parts
+ logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(mergedParts)} parts")
+ return mergedParts
- def _group_chunks_by_document_and_type(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
+ def _groupChunksByDocumentAndType(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
"""Group chunks by document and type for semantic coherence."""
groups = {}
for chunk in chunks:
# Create group key: document_id + type_group
- doc_id = chunk.metadata.get("documentId", "unknown")
- type_group = chunk.typeGroup
- group_key = f"{doc_id}_{type_group}"
-
- if group_key not in groups:
- groups[group_key] = []
- groups[group_key].append(chunk)
+ docId = chunk.metadata.get("documentId", "unknown")
+ typeGroup = chunk.typeGroup
+ groupKey = f"{docId}_{typeGroup}"
+ if groupKey not in groups:
+ groups[groupKey] = []
+ groups[groupKey].append(chunk)
+
return groups
- def _merge_group_optimally(self, chunks: List[ContentPart], available_tokens: int) -> List[ContentPart]:
+ def _mergeGroupOptimally(self, chunks: List[ContentPart], availableTokens: int) -> List[ContentPart]:
"""Merge chunks within a group optimally to minimize AI calls."""
if not chunks:
return []
# Sort chunks by size (smallest first for better packing)
- sorted_chunks = sorted(chunks, key=lambda c: self._estimate_tokens(c.data))
+ sortedChunks = sorted(chunks, key=lambda c: self._estimateTokens(c.data))
- merged_parts = []
- current_group = []
- current_tokens = 0
+ mergedParts = []
+ currentGroup = []
+ currentTokens = 0
- for chunk in sorted_chunks:
- chunk_tokens = self._estimate_tokens(chunk.data)
+ for chunk in sortedChunks:
+ chunkTokens = self._estimateTokens(chunk.data)
# Special case: If single chunk is already at max size, process it alone
- if chunk_tokens >= available_tokens * 0.9: # 90% of available tokens
+ if chunkTokens >= availableTokens * 0.9: # 90% of available tokens
# Finalize current group if it exists
- if current_group:
- merged_part = self._create_merged_part(current_group, current_tokens)
- merged_parts.append(merged_part)
- current_group = []
- current_tokens = 0
+ if currentGroup:
+ mergedPart = self._createMergedPart(currentGroup, currentTokens)
+ mergedParts.append(mergedPart)
+ currentGroup = []
+ currentTokens = 0
# Process large chunk individually
- merged_parts.append(chunk)
- logger.debug(f"🔍 Large chunk processed individually: {chunk_tokens} tokens")
+ mergedParts.append(chunk)
+ logger.debug(f"🔍 Large chunk processed individually: {chunkTokens} tokens")
continue
# If adding this chunk would exceed limit, finalize current group
- if current_tokens + chunk_tokens > available_tokens and current_group:
- merged_part = self._create_merged_part(current_group, current_tokens)
- merged_parts.append(merged_part)
- current_group = [chunk]
- current_tokens = chunk_tokens
+ if currentTokens + chunkTokens > availableTokens and currentGroup:
+ mergedPart = self._createMergedPart(currentGroup, currentTokens)
+ mergedParts.append(mergedPart)
+ currentGroup = [chunk]
+ currentTokens = chunkTokens
else:
- current_group.append(chunk)
- current_tokens += chunk_tokens
+ currentGroup.append(chunk)
+ currentTokens += chunkTokens
# Finalize remaining group
- if current_group:
- merged_part = self._create_merged_part(current_group, current_tokens)
- merged_parts.append(merged_part)
+ if currentGroup:
+ mergedPart = self._createMergedPart(currentGroup, currentTokens)
+ mergedParts.append(mergedPart)
- logger.info(f"📦 Group merged: {len(chunks)} → {len(merged_parts)} parts")
- return merged_parts
+ logger.info(f"📦 Group merged: {len(chunks)} → {len(mergedParts)} parts")
+ return mergedParts
- def _create_merged_part(self, chunks: List[ContentPart], total_tokens: int) -> ContentPart:
+ def _createMergedPart(self, chunks: List[ContentPart], totalTokens: int) -> ContentPart:
"""Create a merged ContentPart from multiple chunks."""
if len(chunks) == 1:
return chunks[0] # No need to merge single chunk
# Combine data with semantic separators
- combined_data = self._combine_chunk_data(chunks)
+ combinedData = self._combineChunkData(chunks)
# Use metadata from first chunk as base
- base_chunk = chunks[0]
- merged_metadata = base_chunk.metadata.copy()
- merged_metadata.update({
+ baseChunk = chunks[0]
+ mergedMetadata = baseChunk.metadata.copy()
+ mergedMetadata.update({
"merged": True,
"originalChunkCount": len(chunks),
- "totalTokens": total_tokens,
+ "totalTokens": totalTokens,
"originalChunkIds": [c.id for c in chunks],
- "size": len(combined_data.encode('utf-8'))
+ "size": len(combinedData.encode('utf-8'))
})
- merged_part = ContentPart(
+ mergedPart = ContentPart(
id=makeId(),
- parentId=base_chunk.parentId,
+ parentId=baseChunk.parentId,
label=f"merged_{len(chunks)}_chunks",
- typeGroup=base_chunk.typeGroup,
- mimeType=base_chunk.mimeType,
- data=combined_data,
- metadata=merged_metadata
+ typeGroup=baseChunk.typeGroup,
+ mimeType=baseChunk.mimeType,
+ data=combinedData,
+ metadata=mergedMetadata
)
- logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {total_tokens} tokens")
- return merged_part
+ logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {totalTokens} tokens")
+ return mergedPart
- def _combine_chunk_data(self, chunks: List[ContentPart]) -> str:
+ def _combineChunkData(self, chunks: List[ContentPart]) -> str:
"""Combine chunk data with appropriate separators."""
if not chunks:
return ""
@@ -173,37 +173,37 @@ class IntelligentTokenAwareMerger:
return separator.join([chunk.data for chunk in chunks])
- def _estimate_tokens(self, text: str) -> int:
+ def _estimateTokens(self, text: str) -> int:
"""Estimate token count for text."""
if not text:
return 0
- return len(text) // self.chars_per_token
+ return len(text) // self.charsPerToken
- def calculate_optimization_stats(self, original_chunks: List[ContentPart], merged_parts: List[ContentPart]) -> Dict[str, Any]:
+ def calculateOptimizationStats(self, originalChunks: List[ContentPart], mergedParts: List[ContentPart]) -> Dict[str, Any]:
"""Calculate optimization statistics with detailed analysis."""
- original_calls = len(original_chunks)
- optimized_calls = len(merged_parts)
- reduction_percent = ((original_calls - optimized_calls) / original_calls * 100) if original_calls > 0 else 0
+ originalCalls = len(originalChunks)
+ optimizedCalls = len(mergedParts)
+ reductionPercent = ((originalCalls - optimizedCalls) / originalCalls * 100) if originalCalls > 0 else 0
# Analyze chunk sizes
- large_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) >= self.effective_max_tokens * 0.9]
- small_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) < self.effective_max_tokens * 0.9]
+ largeChunks = [c for c in originalChunks if self._estimateTokens(c.data) >= self.effectiveMaxTokens * 0.9]
+ smallChunks = [c for c in originalChunks if self._estimateTokens(c.data) < self.effectiveMaxTokens * 0.9]
# Calculate theoretical maximum optimization (if all small chunks could be merged)
- theoretical_min_calls = len(large_chunks) + max(1, len(small_chunks) // 3) # Assume 3 small chunks per call
- theoretical_reduction = ((original_calls - theoretical_min_calls) / original_calls * 100) if original_calls > 0 else 0
+ theoreticalMinCalls = len(largeChunks) + max(1, len(smallChunks) // 3) # Assume 3 small chunks per call
+ theoreticalReduction = ((originalCalls - theoreticalMinCalls) / originalCalls * 100) if originalCalls > 0 else 0
return {
- "original_ai_calls": original_calls,
- "optimized_ai_calls": optimized_calls,
- "reduction_percent": round(reduction_percent, 1),
- "cost_savings": f"{reduction_percent:.1f}%",
- "efficiency_gain": f"{original_calls / optimized_calls:.1f}x" if optimized_calls > 0 else "∞",
+ "original_ai_calls": originalCalls,
+ "optimized_ai_calls": optimizedCalls,
+ "reduction_percent": round(reductionPercent, 1),
+ "cost_savings": f"{reductionPercent:.1f}%",
+ "efficiency_gain": f"{originalCalls / optimizedCalls:.1f}x" if optimizedCalls > 0 else "∞",
"analysis": {
- "large_chunks": len(large_chunks),
- "small_chunks": len(small_chunks),
- "theoretical_min_calls": theoretical_min_calls,
- "theoretical_reduction": round(theoretical_reduction, 1),
- "optimization_potential": "high" if reduction_percent > 50 else "moderate" if reduction_percent > 20 else "low"
+ "large_chunks": len(largeChunks),
+ "small_chunks": len(smallChunks),
+ "theoretical_min_calls": theoreticalMinCalls,
+ "theoretical_reduction": round(theoreticalReduction, 1),
+ "optimization_potential": "high" if reductionPercent > 50 else "moderate" if reductionPercent > 20 else "low"
}
}
diff --git a/modules/services/serviceExtraction/subPipeline.py b/modules/services/serviceExtraction/subPipeline.py
index e935f3c3..f36afe8e 100644
--- a/modules/services/serviceExtraction/subPipeline.py
+++ b/modules/services/serviceExtraction/subPipeline.py
@@ -96,10 +96,10 @@ def _applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[Con
subMerger = IntelligentTokenAwareMerger(model_capabilities)
# Use intelligent merging for all parts
- merged = subMerger.merge_chunks_intelligently(parts, strategy.prompt or "")
+ merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "")
# Calculate and log optimization stats
- stats = subMerger.calculate_optimization_stats(parts, merged)
+ stats = subMerger.calculateOptimizationStats(parts, merged)
logger.info(f"🧠 Intelligent merging stats: {stats}")
logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
diff --git a/modules/services/serviceExtraction/subPromptBuilderExtraction.py b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
index 5b887482..a796ea3b 100644
--- a/modules/services/serviceExtraction/subPromptBuilderExtraction.py
+++ b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
@@ -101,7 +101,7 @@ async def buildExtractionPrompt(
# Build base prompt
adaptive_prompt = f"""
-{services.ai.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
+{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py
index 41bce06d..9dddb49d 100644
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@@ -37,13 +37,13 @@ class GenerationService:
return []
# Process each document from the AI action result
- processed_documents = []
+ processedDocuments = []
for doc in documents:
- processed_doc = self.processSingleDocument(doc, action)
- if processed_doc:
- processed_documents.append(processed_doc)
+ processedDoc = self.processSingleDocument(doc, action)
+ if processedDoc:
+ processedDocuments.append(processedDoc)
- return processed_documents
+ return processedDocuments
except Exception as e:
logger.error(f"Error processing action result documents: {str(e)}")
return []
@@ -77,20 +77,20 @@ class GenerationService:
try:
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
- created_documents = []
+ createdDocuments = []
for i, doc_data in enumerate(processed_docs):
try:
- document_name = doc_data['fileName']
- document_data = doc_data['content']
- mime_type = doc_data['mimeType']
+ documentName = doc_data['fileName']
+ documentData = doc_data['content']
+ mimeType = doc_data['mimeType']
# Convert document data to string content
- content = convertDocumentDataToString(document_data, getFileExtension(document_name))
+ content = convertDocumentDataToString(documentData, getFileExtension(documentName))
# Skip empty or minimal content
- minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
- if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
- logger.warning(f"Empty or minimal content for document {document_name}, skipping")
+ minimalContentPatterns = ['{}', '[]', 'null', '""', "''"]
+ if not content or content.strip() == "" or content.strip() in minimalContentPatterns:
+ logger.warning(f"Empty or minimal content for document {documentName}, skipping")
continue
# Normalize file extension based on mime type if missing or incorrect
@@ -105,35 +105,35 @@ class GenerationService:
"text/plain": ".txt",
"application/json": ".json",
}
- expected_ext = mime_to_ext.get(mime_type)
- if expected_ext:
- if not document_name.lower().endswith(expected_ext):
+ expectedExt = mime_to_ext.get(mimeType)
+ if expectedExt:
+ if not documentName.lower().endswith(expectedExt):
# Append/replace extension to match mime type
- if "." in document_name:
- document_name = document_name.rsplit(".", 1)[0] + expected_ext
+ if "." in documentName:
+ documentName = documentName.rsplit(".", 1)[0] + expectedExt
else:
- document_name = document_name + expected_ext
+ documentName = documentName + expectedExt
except Exception:
pass
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
base64encoded = False
try:
- binary_mime_types = {
+ binaryMimeTypes = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/pdf",
}
- if isinstance(document_data, str) and mime_type in binary_mime_types:
+ if isinstance(documentData, str) and mimeType in binaryMimeTypes:
base64encoded = True
except Exception:
base64encoded = False
# Create document with file in one step using interfaces directly
document = self._createDocument(
- fileName=document_name,
- mimeType=mime_type,
+ fileName=documentName,
+ mimeType=mimeType,
content=content,
base64encoded=base64encoded,
messageId=message_id
@@ -141,14 +141,14 @@ class GenerationService:
if document:
# Set workflow context on the document if possible
self._setDocumentWorkflowContext(document, action, workflow)
- created_documents.append(document)
+ createdDocuments.append(document)
else:
- logger.error(f"Failed to create ChatDocument object for {document_name}")
+ logger.error(f"Failed to create ChatDocument object for {documentName}")
except Exception as e:
logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
continue
- return created_documents
+ return createdDocuments
except Exception as e:
logger.error(f"Error creating documents from action result: {str(e)}")
return []
@@ -157,28 +157,28 @@ class GenerationService:
"""Set workflow context on a document for proper routing and labeling"""
try:
# Get current workflow context directly from workflow object
- workflow_context = self._getWorkflowContext(workflow)
- workflow_stats = self._getWorkflowStats(workflow)
+ workflowContext = self._getWorkflowContext(workflow)
+ workflowStats = self._getWorkflowStats(workflow)
- current_round = workflow_context.get('currentRound', 0)
- current_task = workflow_context.get('currentTask', 0)
- current_action = workflow_context.get('currentAction', 0)
+ currentRound = workflowContext.get('currentRound', 0)
+ currentTask = workflowContext.get('currentTask', 0)
+ currentAction = workflowContext.get('currentAction', 0)
# Try to set workflow context attributes if they exist
if hasattr(document, 'roundNumber'):
- document.roundNumber = current_round
+ document.roundNumber = currentRound
if hasattr(document, 'taskNumber'):
- document.taskNumber = current_task
+ document.taskNumber = currentTask
if hasattr(document, 'actionNumber'):
- document.actionNumber = current_action
+ document.actionNumber = currentAction
if hasattr(document, 'actionId'):
document.actionId = action.id if hasattr(action, 'id') else None
# Set additional workflow metadata if available
if hasattr(document, 'workflowId'):
- document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
+ document.workflowId = workflowStats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
if hasattr(document, 'workflowStatus'):
- document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
+ document.workflowStatus = workflowStats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
except Exception as e:
@@ -355,17 +355,17 @@ class GenerationService:
def _getFormatRenderer(self, output_format: str):
"""Get the appropriate renderer for the specified format using auto-discovery."""
try:
- from .renderers.registry import get_renderer
- renderer = get_renderer(output_format, services=self.services)
+ from .renderers.registry import getRenderer
+ renderer = getRenderer(output_format, services=self.services)
if renderer:
return renderer
# Fallback to text renderer if no specific renderer found
logger.warning(f"No renderer found for format {output_format}, falling back to text")
- fallback_renderer = get_renderer('text', services=self.services)
- if fallback_renderer:
- return fallback_renderer
+ fallbackRenderer = getRenderer('text', services=self.services)
+ if fallbackRenderer:
+ return fallbackRenderer
logger.error("Even text renderer fallback failed")
return None
diff --git a/modules/services/serviceGeneration/renderers/registry.py b/modules/services/serviceGeneration/renderers/registry.py
index bb890a82..5065424e 100644
--- a/modules/services/serviceGeneration/renderers/registry.py
+++ b/modules/services/serviceGeneration/renderers/registry.py
@@ -17,7 +17,7 @@ class RendererRegistry:
self._format_mappings: Dict[str, str] = {}
self._discovered = False
- def discover_renderers(self) -> None:
+ def discoverRenderers(self) -> None:
"""Automatically discover and register all renderers by scanning files."""
if self._discovered:
return
@@ -28,38 +28,38 @@ class RendererRegistry:
from pathlib import Path
# Get the directory containing this registry file
- current_dir = Path(__file__).parent
- renderers_dir = current_dir
+ currentDir = Path(__file__).parent
+ renderersDir = currentDir
# Get the package name dynamically
- package_name = __name__.rsplit('.', 1)[0]
+ packageName = __name__.rsplit('.', 1)[0]
# Scan all Python files in the renderers directory
- for file_path in renderers_dir.glob("*.py"):
- if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
+ for filePath in renderersDir.glob("*.py"):
+ if filePath.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
continue
# Extract module name from filename
- module_name = file_path.stem
+ moduleName = filePath.stem
try:
# Import the module dynamically
- full_module_name = f"{package_name}.{module_name}"
- module = importlib.import_module(full_module_name)
+ fullModuleName = f"{packageName}.{moduleName}"
+ module = importlib.import_module(fullModuleName)
# Look for renderer classes in the module
- for attr_name in dir(module):
- attr = getattr(module, attr_name)
+ for attrName in dir(module):
+ attr = getattr(module, attrName)
if (isinstance(attr, type) and
issubclass(attr, BaseRenderer) and
attr != BaseRenderer and
- hasattr(attr, 'get_supported_formats')):
+ hasattr(attr, 'getSupportedFormats')):
# Register the renderer
- self._register_renderer_class(attr)
+ self._registerRendererClass(attr)
except Exception as e:
- logger.warning(f"Could not load renderer from {module_name}: {str(e)}")
+ logger.warning(f"Could not load renderer from {moduleName}: {str(e)}")
continue
self._discovered = True
@@ -68,72 +68,72 @@ class RendererRegistry:
logger.error(f"Error during renderer discovery: {str(e)}")
self._discovered = True # Mark as discovered to avoid repeated attempts
- def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None:
+ def _registerRendererClass(self, rendererClass: Type[BaseRenderer]) -> None:
"""Register a renderer class with its supported formats."""
try:
# Get supported formats from the renderer class
- supported_formats = renderer_class.get_supported_formats()
+ supportedFormats = rendererClass.getSupportedFormats()
- for format_name in supported_formats:
+ for formatName in supportedFormats:
# Register primary format
- self._renderers[format_name.lower()] = renderer_class
+ self._renderers[formatName.lower()] = rendererClass
# Register aliases if any
- if hasattr(renderer_class, 'get_format_aliases'):
- aliases = renderer_class.get_format_aliases()
+ if hasattr(rendererClass, 'getFormatAliases'):
+ aliases = rendererClass.getFormatAliases()
for alias in aliases:
- self._format_mappings[alias.lower()] = format_name.lower()
+ self._format_mappings[alias.lower()] = formatName.lower()
- logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}")
+ logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}")
except Exception as e:
- logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}")
+ logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}")
- def get_renderer(self, output_format: str, services=None) -> Optional[BaseRenderer]:
+ def getRenderer(self, outputFormat: str, services=None) -> Optional[BaseRenderer]:
"""Get a renderer instance for the specified format."""
if not self._discovered:
- self.discover_renderers()
+ self.discoverRenderers()
# Normalize format name
- format_name = output_format.lower().strip()
+ formatName = outputFormat.lower().strip()
# Check for aliases first
- if format_name in self._format_mappings:
- format_name = self._format_mappings[format_name]
+ if formatName in self._format_mappings:
+ formatName = self._format_mappings[formatName]
# Get renderer class
- renderer_class = self._renderers.get(format_name)
+ rendererClass = self._renderers.get(formatName)
- if renderer_class:
+ if rendererClass:
try:
- return renderer_class(services=services)
+ return rendererClass(services=services)
except Exception as e:
- logger.error(f"Error creating renderer instance for {format_name}: {str(e)}")
+ logger.error(f"Error creating renderer instance for {formatName}: {str(e)}")
return None
- logger.warning(f"No renderer found for format: {output_format}")
+ logger.warning(f"No renderer found for format: {outputFormat}")
return None
- def get_supported_formats(self) -> List[str]:
+ def getSupportedFormats(self) -> List[str]:
"""Get list of all supported formats."""
if not self._discovered:
- self.discover_renderers()
+ self.discoverRenderers()
formats = list(self._renderers.keys())
formats.extend(self._format_mappings.keys())
return sorted(set(formats))
- def get_renderer_info(self) -> Dict[str, Dict[str, str]]:
+ def getRendererInfo(self) -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers."""
if not self._discovered:
- self.discover_renderers()
+ self.discoverRenderers()
info = {}
- for format_name, renderer_class in self._renderers.items():
- info[format_name] = {
- 'class_name': renderer_class.__name__,
- 'module': renderer_class.__module__,
- 'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description'
+ for formatName, rendererClass in self._renderers.items():
+ info[formatName] = {
+ 'class_name': rendererClass.__name__,
+ 'module': rendererClass.__module__,
+ 'description': getattr(rendererClass, '__doc__', 'No description').strip().split('\n')[0] if rendererClass.__doc__ else 'No description'
}
return info
@@ -141,14 +141,14 @@ class RendererRegistry:
# Global registry instance
_registry = RendererRegistry()
-def get_renderer(output_format: str, services=None) -> Optional[BaseRenderer]:
+def getRenderer(outputFormat: str, services=None) -> Optional[BaseRenderer]:
"""Get a renderer instance for the specified format."""
- return _registry.get_renderer(output_format, services)
+ return _registry.getRenderer(outputFormat, services)
-def get_supported_formats() -> List[str]:
+def getSupportedFormats() -> List[str]:
"""Get list of all supported formats."""
- return _registry.get_supported_formats()
+ return _registry.getSupportedFormats()
-def get_renderer_info() -> Dict[str, Dict[str, str]]:
+def getRendererInfo() -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers."""
- return _registry.get_renderer_info()
+ return _registry.getRendererInfo()
diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
index 5444525a..566c7765 100644
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@@ -4,6 +4,7 @@ Base renderer class for all format renderers.
from abc import ABC, abstractmethod
from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelJson import supportedSectionTypes
import json
import logging
import re
@@ -23,7 +24,7 @@ class BaseRenderer(ABC):
self.services = services # Add services attribute
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""
Return list of supported format names for this renderer.
Override this method in subclasses to specify supported formats.
@@ -31,7 +32,7 @@ class BaseRenderer(ABC):
return []
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""
Return list of format aliases for this renderer.
Override this method in subclasses to specify format aliases.
@@ -39,7 +40,7 @@ class BaseRenderer(ABC):
return []
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""
Return priority for this renderer (higher number = higher priority).
Used when multiple renderers support the same format.
@@ -47,43 +48,43 @@ class BaseRenderer(ABC):
return 0
@abstractmethod
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""
Render extracted JSON content to the target format.
Args:
- extracted_content: Structured JSON content with sections and metadata
+ extractedContent: Structured JSON content with sections and metadata
title: Report title
- user_prompt: Original user prompt for context
- ai_service: AI service instance for additional processing
+ userPrompt: Original user prompt for context
+ aiService: AI service instance for additional processing
Returns:
- tuple: (rendered_content, mime_type)
+ tuple: (renderedContent, mimeType)
"""
pass
- def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]:
+ def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract sections from report data."""
- return report_data.get('sections', [])
+ return reportData.get('sections', [])
- def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
+ def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
"""Extract metadata from report data."""
- return report_data.get('metadata', {})
+ return reportData.get('metadata', {})
- def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
+ def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str:
"""Get title from report data or use fallback."""
- metadata = report_data.get('metadata', {})
- return metadata.get('title', fallback_title)
+ metadata = reportData.get('metadata', {})
+ return metadata.get('title', fallbackTitle)
- def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool:
+ def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool:
"""Validate that JSON content has the expected structure."""
- if not isinstance(json_content, dict):
+ if not isinstance(jsonContent, dict):
return False
- if "sections" not in json_content:
+ if "sections" not in jsonContent:
return False
- sections = json_content.get("sections", [])
+ sections = jsonContent.get("sections", [])
if not isinstance(sections, list):
return False
@@ -96,14 +97,14 @@ class BaseRenderer(ABC):
return True
- def _get_section_type(self, section: Dict[str, Any]) -> str:
+ def _getSectionType(self, section: Dict[str, Any]) -> str:
"""Get the type of a section; default to 'paragraph' for non-dict inputs."""
if isinstance(section, dict):
return section.get("content_type", "paragraph")
# If section is a list or any other type, treat as paragraph elements
return "paragraph"
- def _get_section_data(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
+ def _getSectionData(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Get the elements of a section; if a list is provided directly, return it."""
if isinstance(section, dict):
return section.get("elements", [])
@@ -111,21 +112,30 @@ class BaseRenderer(ABC):
return section
return []
- def _get_section_id(self, section: Dict[str, Any]) -> str:
+ def _getSectionId(self, section: Dict[str, Any]) -> str:
"""Get the ID of a section (if available)."""
if isinstance(section, dict):
return section.get("id", "unknown")
return "unknown"
- def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
+ def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
"""Extract table headers and rows from section data."""
- headers = section_data.get("headers", [])
- rows = section_data.get("rows", [])
+ # Normalize when elements array was passed in
+ if isinstance(sectionData, list) and sectionData:
+ candidate = sectionData[0]
+ sectionData = candidate if isinstance(candidate, dict) else {}
+ headers = sectionData.get("headers", [])
+ rows = sectionData.get("rows", [])
return headers, rows
- def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]:
+ def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]:
"""Extract bullet list items from section data."""
- items = section_data.get("items", [])
+ # Normalize when elements array or raw list was passed in
+ if isinstance(sectionData, list):
+ # Already a list of items (strings or dicts)
+ items = sectionData
+ else:
+ items = sectionData.get("items", [])
result = []
for item in items:
if isinstance(item, str):
@@ -134,29 +144,47 @@ class BaseRenderer(ABC):
result.append(item["text"])
return result
- def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]:
+ def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]:
"""Extract heading level and text from section data."""
- level = section_data.get("level", 1)
- text = section_data.get("text", "")
+ # Normalize when elements array was passed in
+ if isinstance(sectionData, list) and sectionData:
+ sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
+ level = sectionData.get("level", 1)
+ text = sectionData.get("text", "")
return level, text
- def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str:
+ def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str:
"""Extract paragraph text from section data."""
- return section_data.get("text", "")
+ if isinstance(sectionData, list):
+ # Join multiple paragraph elements if provided as a list
+ texts = []
+ for el in sectionData:
+ if isinstance(el, dict) and "text" in el:
+ texts.append(el["text"])
+ elif isinstance(el, str):
+ texts.append(el)
+ return "\n".join(texts)
+ return sectionData.get("text", "")
- def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
+ def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
"""Extract code and language from section data."""
- code = section_data.get("code", "")
- language = section_data.get("language", "")
+ # Normalize when elements array was passed in
+ if isinstance(sectionData, list) and sectionData:
+ sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
+ code = sectionData.get("code", "")
+ language = sectionData.get("language", "")
return code, language
- def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
+ def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
"""Extract base64 data and alt text from section data."""
- base64_data = section_data.get("base64Data", "")
- alt_text = section_data.get("altText", "Image")
- return base64_data, alt_text
+ # Normalize when elements array was passed in
+ if isinstance(sectionData, list) and sectionData:
+ sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
+ base64Data = sectionData.get("base64Data", "")
+ altText = sectionData.get("altText", "Image")
+ return base64Data, altText
- def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
+ def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
"""
Render an image section. This is a base implementation that should be overridden
by format-specific renderers.
@@ -168,47 +196,47 @@ class BaseRenderer(ABC):
Returns:
Format-specific image representation
"""
- section_data = self._get_section_data(section)
- base64_data, alt_text = self._extract_image_data(section_data)
+ sectionData = self._getSectionData(section)
+ base64Data, altText = self._extractImageData(sectionData)
# Base implementation returns a simple dict
# Format-specific renderers should override this method
return {
"content_type": "image",
- "base64Data": base64_data,
- "altText": alt_text,
- "width": section_data.get("width", None),
- "height": section_data.get("height", None),
- "caption": section_data.get("caption", "")
+ "base64Data": base64Data,
+ "altText": altText,
+ "width": sectionData.get("width", None),
+ "height": sectionData.get("height", None),
+ "caption": sectionData.get("caption", "")
}
- def _validate_image_data(self, base64_data: str, alt_text: str) -> bool:
+ def _validateImageData(self, base64Data: str, altText: str) -> bool:
"""Validate image data."""
- if not base64_data:
+ if not base64Data:
self.logger.warning("Image section has no base64 data")
return False
- if not alt_text:
+ if not altText:
self.logger.warning("Image section has no alt text")
return False
# Basic base64 validation
try:
- base64.b64decode(base64_data, validate=True)
+ base64.b64decode(base64Data, validate=True)
return True
except Exception as e:
self.logger.warning(f"Invalid base64 image data: {str(e)}")
return False
- def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]:
+ def _getImageDimensions(self, base64Data: str) -> Tuple[int, int]:
"""
Get image dimensions from base64 data.
This is a helper method that format-specific renderers can use.
"""
try:
# Decode base64 data
- image_data = base64.b64decode(base64_data)
- image = Image.open(io.BytesIO(image_data))
+ imageData = base64.b64decode(base64Data)
+ image = Image.open(io.BytesIO(imageData))
return image.size # Returns (width, height)
@@ -216,89 +244,89 @@ class BaseRenderer(ABC):
self.logger.warning(f"Could not determine image dimensions: {str(e)}")
return (0, 0)
- def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str:
+ def _resizeImageIfNeeded(self, base64Data: str, maxWidth: int = 800, maxHeight: int = 600) -> str:
"""
Resize image if it exceeds maximum dimensions.
Returns the resized image as base64 string.
"""
try:
# Decode base64 data
- image_data = base64.b64decode(base64_data)
- image = Image.open(io.BytesIO(image_data))
+ imageData = base64.b64decode(base64Data)
+ image = Image.open(io.BytesIO(imageData))
# Check if resizing is needed
width, height = image.size
- if width <= max_width and height <= max_height:
- return base64_data # No resizing needed
+ if width <= maxWidth and height <= maxHeight:
+ return base64Data # No resizing needed
# Calculate new dimensions maintaining aspect ratio
- ratio = min(max_width / width, max_height / height)
- new_width = int(width * ratio)
- new_height = int(height * ratio)
+ ratio = min(maxWidth / width, maxHeight / height)
+ newWidth = int(width * ratio)
+ newHeight = int(height * ratio)
# Resize image
- resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
+ resizedImage = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS)
# Convert back to base64
buffer = io.BytesIO()
- resized_image.save(buffer, format=image.format or 'PNG')
- resized_data = buffer.getvalue()
+ resizedImage.save(buffer, format=image.format or 'PNG')
+ resizedData = buffer.getvalue()
- return base64.b64encode(resized_data).decode('utf-8')
+ return base64.b64encode(resizedData).decode('utf-8')
except Exception as e:
self.logger.warning(f"Could not resize image: {str(e)}")
- return base64_data # Return original if resize fails
+ return base64Data # Return original if resize fails
- def _get_supported_section_types(self) -> List[str]:
- """Return list of supported section types."""
- return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
+ def _getSupportedSectionTypes(self) -> List[str]:
+ """Return list of supported section types (from unified schema)."""
+ return supportedSectionTypes
- def _is_valid_section_type(self, section_type: str) -> bool:
+ def _isValidSectionType(self, sectionType: str) -> bool:
"""Check if a section type is valid."""
- return section_type in self._get_supported_section_types()
+ return sectionType in self._getSupportedSectionTypes()
- def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]:
+ def _processSectionByType(self, section: Dict[str, Any]) -> Dict[str, Any]:
"""Process a section and return structured data based on its type."""
- section_type = self._get_section_type(section)
- section_data = self._get_section_data(section)
+ sectionType = self._getSectionType(section)
+ sectionData = self._getSectionData(section)
- if section_type == "table":
- headers, rows = self._extract_table_data(section_data)
+ if sectionType == "table":
+ headers, rows = self._extractTableData(sectionData)
return {"content_type": "table", "headers": headers, "rows": rows}
- elif section_type == "bullet_list":
- items = self._extract_bullet_list_items(section_data)
+ elif sectionType == "bullet_list":
+ items = self._extractBulletListItems(sectionData)
return {"content_type": "bullet_list", "items": items}
- elif section_type == "heading":
- level, text = self._extract_heading_data(section_data)
+ elif sectionType == "heading":
+ level, text = self._extractHeadingData(sectionData)
return {"content_type": "heading", "level": level, "text": text}
- elif section_type == "paragraph":
- text = self._extract_paragraph_text(section_data)
+ elif sectionType == "paragraph":
+ text = self._extractParagraphText(sectionData)
return {"content_type": "paragraph", "text": text}
- elif section_type == "code_block":
- code, language = self._extract_code_block_data(section_data)
+ elif sectionType == "code_block":
+ code, language = self._extractCodeBlockData(sectionData)
return {"content_type": "code_block", "code": code, "language": language}
- elif section_type == "image":
- base64_data, alt_text = self._extract_image_data(section_data)
+ elif sectionType == "image":
+ base64Data, altText = self._extractImageData(sectionData)
# Validate image data
- if self._validate_image_data(base64_data, alt_text):
+ if self._validateImageData(base64Data, altText):
return {
"content_type": "image",
- "base64Data": base64_data,
- "altText": alt_text,
- "width": section_data.get("width"),
- "height": section_data.get("height"),
- "caption": section_data.get("caption", "")
+ "base64Data": base64Data,
+ "altText": altText,
+ "width": sectionData.get("width") if isinstance(sectionData, dict) else None,
+ "height": sectionData.get("height") if isinstance(sectionData, dict) else None,
+ "caption": sectionData.get("caption", "") if isinstance(sectionData, dict) else ""
}
else:
# Return placeholder if image data is invalid
- return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"}
+ return {"content_type": "paragraph", "text": f"[Image: {altText}]"}
else:
# Fallback to paragraph
- text = self._extract_paragraph_text(section_data)
+ text = self._extractParagraphText(sectionData)
return {"content_type": "paragraph", "text": text}
- def _format_timestamp(self, timestamp: str = None) -> str:
+ def _formatTimestamp(self, timestamp: str = None) -> str:
"""Format timestamp for display."""
if timestamp:
return timestamp
@@ -306,38 +334,38 @@ class BaseRenderer(ABC):
# ===== GENERIC AI STYLING HELPERS =====
- async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+ async def _getAiStyles(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
"""
Generic AI styling method that can be used by all renderers.
Args:
- ai_service: AI service instance
- style_template: Format-specific style template
- default_styles: Default styles to fall back to
+ aiService: AI service instance
+ styleTemplate: Format-specific style template
+ defaultStyles: Default styles to fall back to
Returns:
Dict with styling definitions
"""
# DEBUG: Show which renderer is calling this method
- if not ai_service:
- return default_styles
+ if not aiService:
+ return defaultStyles
try:
- request_options = AiCallOptions()
- request_options.operationType = OperationTypeEnum.DATA_GENERATE
+ requestOptions = AiCallOptions()
+ requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
- request = AiCallRequest(prompt=style_template, context="", options=request_options)
+ request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
# DEBUG: Show the actual prompt being sent to AI
self.logger.debug(f"AI Style Template Prompt:")
- self.logger.debug(f"{style_template}")
+ self.logger.debug(f"{styleTemplate}")
- response = await ai_service.aiObjects.call(request)
+ response = await aiService.aiObjects.call(request)
# Save styling prompt and response to debug
- self.services.utils.writeDebugFile(style_template, "renderer_styling_prompt")
+ self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
# Clean and parse JSON
@@ -346,12 +374,12 @@ class BaseRenderer(ABC):
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
- return default_styles
+ return defaultStyles
# Extract JSON from markdown if present
- json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
- if json_match:
- result = json_match.group(1).strip()
+ jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
+ if jsonMatch:
+ result = jsonMatch.group(1).strip()
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
@@ -362,8 +390,8 @@ class BaseRenderer(ABC):
# Try to parse JSON
try:
styles = json.loads(result)
- except json.JSONDecodeError as json_error:
- self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
+ except json.JSONDecodeError as jsonError:
+ self.logger.warning(f"AI styling returned invalid JSON: {jsonError}")
# Use print instead of logger to avoid truncation
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER")
@@ -372,88 +400,88 @@ class BaseRenderer(ABC):
self.logger.warning(f"Raw content that failed to parse: {result}")
# Try to fix incomplete JSON by adding missing closing braces
- open_braces = result.count('{')
- close_braces = result.count('}')
+ openBraces = result.count('{')
+ closeBraces = result.count('}')
- if open_braces > close_braces:
+ if openBraces > closeBraces:
# JSON is incomplete, add missing closing braces
- missing_braces = open_braces - close_braces
- result = result + '}' * missing_braces
- self.logger.info(f"Added {missing_braces} missing closing brace(s)")
+ missingBraces = openBraces - closeBraces
+ result = result + '}' * missingBraces
+ self.logger.info(f"Added {missingBraces} missing closing brace(s)")
self.logger.debug(f"Fixed JSON: {result}")
# Try parsing the fixed JSON
try:
styles = json.loads(result)
self.logger.info("Successfully fixed incomplete JSON")
- except json.JSONDecodeError as fix_error:
- self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
+ except json.JSONDecodeError as fixError:
+ self.logger.warning(f"Fixed JSON still invalid: {fixError}")
self.logger.warning(f"Fixed JSON content: {result}")
# Try to extract just the JSON part if it's embedded in text
- json_start = result.find('{')
- json_end = result.rfind('}')
- if json_start != -1 and json_end != -1 and json_end > json_start:
- json_part = result[json_start:json_end+1]
+ jsonStart = result.find('{')
+ jsonEnd = result.rfind('}')
+ if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
+ jsonPart = result[jsonStart:jsonEnd+1]
try:
- styles = json.loads(json_part)
+ styles = json.loads(jsonPart)
self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults")
- return default_styles
+ return defaultStyles
else:
- return default_styles
+ return defaultStyles
else:
# Try to extract just the JSON part if it's embedded in text
- json_start = result.find('{')
- json_end = result.rfind('}')
- if json_start != -1 and json_end != -1 and json_end > json_start:
- json_part = result[json_start:json_end+1]
+ jsonStart = result.find('{')
+ jsonEnd = result.rfind('}')
+ if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
+ jsonPart = result[jsonStart:jsonEnd+1]
try:
- styles = json.loads(json_part)
+ styles = json.loads(jsonPart)
self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults")
- return default_styles
+ return defaultStyles
else:
- return default_styles
+ return defaultStyles
# Convert colors to appropriate format
- styles = self._convert_colors_format(styles)
+ styles = self._convertColorsFormat(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
- return default_styles
+ return defaultStyles
- def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""
Convert colors to appropriate format based on renderer type.
Override this method in subclasses for format-specific color handling.
"""
return styles
- def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str:
+ def _createAiStyleTemplate(self, formatName: str, userPrompt: str, styleSchema: Dict[str, Any]) -> str:
"""
Create a standardized AI style template for any format.
Args:
- format_name: Name of the format (e.g., "docx", "xlsx", "pptx")
- user_prompt: User's original prompt
- style_schema: Format-specific style schema
+ formatName: Name of the format (e.g., "docx", "xlsx", "pptx")
+ userPrompt: User's original prompt
+ styleSchema: Format-specific style schema
Returns:
Formatted prompt string
"""
- schema_json = json.dumps(style_schema, indent=4)
+ schemaJson = json.dumps(styleSchema, indent=4)
# DEBUG: Show the schema being sent
- return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents.
+ return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {formatName.upper()} documents.
Use this schema as a template and customize the values for professional document styling:
-{schema_json}
+{schemaJson}
Requirements:
- Return ONLY the complete JSON object (no markdown, no explanations)
diff --git a/modules/services/serviceGeneration/renderers/rendererCsv.py b/modules/services/serviceGeneration/renderers/rendererCsv.py
index 5ad2d4bc..3e1ef3d5 100644
--- a/modules/services/serviceGeneration/renderers/rendererCsv.py
+++ b/modules/services/serviceGeneration/renderers/rendererCsv.py
@@ -9,163 +9,163 @@ class RendererCsv(BaseRenderer):
"""Renders content to CSV format with format-specific extraction."""
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""Return supported CSV formats."""
return ['csv']
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'table']
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""Return priority for CSV renderer."""
return 70
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to CSV format."""
try:
# Generate CSV directly from JSON (no styling needed for CSV)
- csv_content = await self._generate_csv_from_json(extracted_content, title)
+ csvContent = await self._generateCsvFromJson(extractedContent, title)
- return csv_content, "text/csv"
+ return csvContent, "text/csv"
except Exception as e:
self.logger.error(f"Error rendering CSV: {str(e)}")
# Return minimal CSV fallback
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
- async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str:
+ async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate CSV content from structured JSON document."""
try:
# Validate JSON structure
- if not isinstance(json_content, dict):
+ if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
- if "sections" not in json_content:
+ if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
- document_title = json_content.get("metadata", {}).get("title", title)
+ documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Generate CSV content
- csv_rows = []
+ csvRows = []
# Add title row
- if document_title:
- csv_rows.append([document_title])
- csv_rows.append([]) # Empty row
+ if documentTitle:
+ csvRows.append([documentTitle])
+ csvRows.append([]) # Empty row
# Process each section in order
- sections = json_content.get("sections", [])
+ sections = jsonContent.get("sections", [])
for section in sections:
- section_csv = self._render_json_section_to_csv(section)
- if section_csv:
- csv_rows.extend(section_csv)
- csv_rows.append([]) # Empty row between sections
+ sectionCsv = self._renderJsonSectionToCsv(section)
+ if sectionCsv:
+ csvRows.extend(sectionCsv)
+ csvRows.append([]) # Empty row between sections
# Convert to CSV string
- csv_content = self._convert_rows_to_csv(csv_rows)
+ csvContent = self._convertRowsToCsv(csvRows)
- return csv_content
+ return csvContent
except Exception as e:
self.logger.error(f"Error generating CSV from JSON: {str(e)}")
raise Exception(f"CSV generation failed: {str(e)}")
- def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]:
+ def _renderJsonSectionToCsv(self, section: Dict[str, Any]) -> List[List[str]]:
"""Render a single JSON section to CSV rows."""
try:
- section_type = section.get("content_type", "paragraph")
+ sectionType = section.get("content_type", "paragraph")
elements = section.get("elements", [])
- csv_rows = []
+ csvRows = []
# Add section title if available
- section_title = section.get("title")
- if section_title:
- csv_rows.append([f"# {section_title}"])
+ sectionTitle = section.get("title")
+ if sectionTitle:
+ csvRows.append([f"# {sectionTitle}"])
# Process each element in the section
for element in elements:
- if section_type == "table":
- csv_rows.extend(self._render_json_table_to_csv(element))
- elif section_type == "list":
- csv_rows.extend(self._render_json_list_to_csv(element))
- elif section_type == "heading":
- csv_rows.extend(self._render_json_heading_to_csv(element))
- elif section_type == "paragraph":
- csv_rows.extend(self._render_json_paragraph_to_csv(element))
- elif section_type == "code":
- csv_rows.extend(self._render_json_code_to_csv(element))
+ if sectionType == "table":
+ csvRows.extend(self._renderJsonTableToCsv(element))
+ elif sectionType == "list":
+ csvRows.extend(self._renderJsonListToCsv(element))
+ elif sectionType == "heading":
+ csvRows.extend(self._renderJsonHeadingToCsv(element))
+ elif sectionType == "paragraph":
+ csvRows.extend(self._renderJsonParagraphToCsv(element))
+ elif sectionType == "code":
+ csvRows.extend(self._renderJsonCodeToCsv(element))
else:
# Fallback to paragraph for unknown types
- csv_rows.extend(self._render_json_paragraph_to_csv(element))
+ csvRows.extend(self._renderJsonParagraphToCsv(element))
- return csv_rows
+ return csvRows
except Exception as e:
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
return [["[Error rendering section]"]]
- def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]:
+ def _renderJsonTableToCsv(self, tableData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON table to CSV rows."""
try:
- headers = table_data.get("headers", [])
- rows = table_data.get("rows", [])
+ headers = tableData.get("headers", [])
+ rows = tableData.get("rows", [])
- csv_rows = []
+ csvRows = []
if headers:
- csv_rows.append(headers)
+ csvRows.append(headers)
if rows:
- csv_rows.extend(rows)
+ csvRows.extend(rows)
- return csv_rows
+ return csvRows
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return [["[Error rendering table]"]]
- def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]:
+ def _renderJsonListToCsv(self, listData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON list to CSV rows."""
try:
- items = list_data.get("items", [])
- csv_rows = []
+ items = listData.get("items", [])
+ csvRows = []
for item in items:
if isinstance(item, dict):
text = item.get("text", "")
subitems = item.get("subitems", [])
- csv_rows.append([text])
+ csvRows.append([text])
# Add subitems as indented rows
for subitem in subitems:
if isinstance(subitem, dict):
- csv_rows.append([f" - {subitem.get('text', '')}"])
+ csvRows.append([f" - {subitem.get('text', '')}"])
else:
- csv_rows.append([f" - {subitem}"])
+ csvRows.append([f" - {subitem}"])
else:
- csv_rows.append([str(item)])
+ csvRows.append([str(item)])
- return csv_rows
+ return csvRows
except Exception as e:
self.logger.warning(f"Error rendering list: {str(e)}")
return [["[Error rendering list]"]]
- def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]:
+ def _renderJsonHeadingToCsv(self, headingData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON heading to CSV rows."""
try:
- text = heading_data.get("text", "")
- level = heading_data.get("level", 1)
+ text = headingData.get("text", "")
+ level = headingData.get("level", 1)
if text:
# Use # symbols for heading levels
- heading_text = f"{'#' * level} {text}"
- return [[heading_text]]
+ headingText = f"{'#' * level} {text}"
+ return [[headingText]]
return []
@@ -173,30 +173,30 @@ class RendererCsv(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return [["[Error rendering heading]"]]
- def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]:
+ def _renderJsonParagraphToCsv(self, paragraphData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON paragraph to CSV rows."""
try:
- text = paragraph_data.get("text", "")
+ text = paragraphData.get("text", "")
if text:
# Split long paragraphs into multiple rows if needed
if len(text) > 100:
words = text.split()
rows = []
- current_row = []
- current_length = 0
+ currentRow = []
+ currentLength = 0
for word in words:
- if current_length + len(word) > 100 and current_row:
- rows.append([" ".join(current_row)])
- current_row = [word]
- current_length = len(word)
+ if currentLength + len(word) > 100 and currentRow:
+ rows.append([" ".join(currentRow)])
+ currentRow = [word]
+ currentLength = len(word)
else:
- current_row.append(word)
- current_length += len(word) + 1
+ currentRow.append(word)
+ currentLength += len(word) + 1
- if current_row:
- rows.append([" ".join(current_row)])
+ if currentRow:
+ rows.append([" ".join(currentRow)])
return rows
else:
@@ -208,30 +208,30 @@ class RendererCsv(BaseRenderer):
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return [["[Error rendering paragraph]"]]
- def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]:
+ def _renderJsonCodeToCsv(self, codeData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON code block to CSV rows."""
try:
- code = code_data.get("code", "")
- language = code_data.get("language", "")
+ code = codeData.get("code", "")
+ language = codeData.get("language", "")
- csv_rows = []
+ csvRows = []
if language:
- csv_rows.append([f"Code ({language}):"])
+ csvRows.append([f"Code ({language}):"])
if code:
# Split code into lines
- code_lines = code.split('\n')
- for line in code_lines:
- csv_rows.append([f" {line}"])
+ codeLines = code.split('\n')
+ for line in codeLines:
+ csvRows.append([f" {line}"])
- return csv_rows
+ return csvRows
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
return [["[Error rendering code block]"]]
- def _convert_rows_to_csv(self, rows: List[List[str]]) -> str:
+ def _convertRowsToCsv(self, rows: List[List[str]]) -> str:
"""Convert rows to CSV string."""
import csv
import io
@@ -245,7 +245,7 @@ class RendererCsv(BaseRenderer):
return output.getvalue()
- def _clean_csv_content(self, content: str, title: str) -> str:
+ def _cleanCsvContent(self, content: str, title: str) -> str:
"""Clean and validate CSV content from AI."""
content = content.strip()
diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py
index 42bb71f3..6db48c32 100644
--- a/modules/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/services/serviceGeneration/renderers/rendererDocx.py
@@ -21,33 +21,33 @@ class RendererDocx(BaseRenderer):
"""Renders content to DOCX format using python-docx."""
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""Return supported DOCX formats."""
return ['docx', 'doc']
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['word', 'document']
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""Return priority for DOCX renderer."""
return 115
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
- self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...", "DOCX_RENDERER")
+ self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
try:
if not DOCX_AVAILABLE:
# Fallback to HTML if python-docx not available
from .rendererHtml import RendererHtml
- html_renderer = RendererHtml()
- html_content, _ = await html_renderer.render(extracted_content, title)
- return html_content, "text/html"
+ htmlRenderer = RendererHtml()
+ htmlContent, _ = await htmlRenderer.render(extractedContent, title)
+ return htmlContent, "text/html"
# Generate DOCX using AI-analyzed styling
- docx_content = await self._generate_docx_from_json(extracted_content, title, user_prompt, ai_service)
+ docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
@@ -56,18 +56,18 @@ class RendererDocx(BaseRenderer):
# Return minimal fallback
return f"DOCX Generation Error: {str(e)}", "text/plain"
- async def _generate_docx_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+ async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate DOCX content from structured JSON document using AI-generated styling."""
try:
# Create new document
doc = Document()
# Get AI-generated styling definitions
- self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...")
- styles = await self._get_docx_styles(user_prompt, ai_service)
+ self.logger.info(f"About to call AI styling with user_prompt: {userPrompt[:100] if userPrompt else 'None'}...")
+ styles = await self._getDocxStyles(userPrompt, aiService)
# Apply basic document setup
- self._setup_basic_document_styles(doc)
+ self._setupBasicDocumentStyles(doc)
# Validate JSON structure
if not isinstance(json_content, dict):
@@ -104,7 +104,7 @@ class RendererDocx(BaseRenderer):
self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
raise Exception(f"DOCX generation failed: {str(e)}")
- async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+ async def _getDocxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get DOCX styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
@@ -118,13 +118,13 @@ class RendererDocx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
}
- style_template = self._create_ai_style_template("docx", user_prompt, style_schema)
- styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles())
+ style_template = self._createAiStyleTemplate("docx", userPrompt, style_schema)
+ styles = await self._getAiStyles(aiService, style_template, self._getDefaultStyles())
# Validate and fix contrast issues
- return self._validate_styles_contrast(styles)
+ return self._validateStylesContrast(styles)
- def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
@@ -159,9 +159,9 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
- return self._get_default_styles()
+ return self._getDefaultStyles()
- def _get_default_styles(self) -> Dict[str, Any]:
+ def _getDefaultStyles(self) -> Dict[str, Any]:
"""Default DOCX styles."""
return {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
@@ -175,7 +175,7 @@ class RendererDocx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
}
- def _setup_basic_document_styles(self, doc: Document) -> None:
+ def _setupBasicDocumentStyles(self, doc: Document) -> None:
"""Set up basic document styles."""
try:
# Set default font
@@ -189,7 +189,7 @@ class RendererDocx(BaseRenderer):
- def _clear_template_content(self, doc: Document) -> None:
+ def _clearTemplateContent(self, doc: Document) -> None:
"""Clear template content while preserving styles."""
try:
# Remove all paragraphs except keep the styles
@@ -204,7 +204,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not clear template content: {str(e)}")
- def _render_json_section(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
+ def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a single JSON section to DOCX using AI-generated styles."""
try:
section_type = section.get("content_type", "paragraph")
@@ -213,27 +213,27 @@ class RendererDocx(BaseRenderer):
# Process each element in the section
for element in elements:
if section_type == "table":
- self._render_json_table(doc, element, styles)
+ self._renderJsonTable(doc, element, styles)
elif section_type == "bullet_list":
- self._render_json_bullet_list(doc, element, styles)
+ self._renderJsonBulletList(doc, element, styles)
elif section_type == "heading":
- self._render_json_heading(doc, element, styles)
+ self._renderJsonHeading(doc, element, styles)
elif section_type == "paragraph":
- self._render_json_paragraph(doc, element, styles)
+ self._renderJsonParagraph(doc, element, styles)
elif section_type == "code_block":
- self._render_json_code_block(doc, element, styles)
+ self._renderJsonCodeBlock(doc, element, styles)
elif section_type == "image":
- self._render_json_image(doc, element, styles)
+ self._renderJsonImage(doc, element, styles)
else:
# Fallback to paragraph for unknown types
- self._render_json_paragraph(doc, element, styles)
+ self._renderJsonParagraph(doc, element, styles)
except Exception as e:
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
# Add error paragraph as fallback
error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
- def _render_json_table(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+ def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON table to DOCX using AI-generated styles."""
try:
headers = table_data.get("headers", [])
@@ -249,7 +249,7 @@ class RendererDocx(BaseRenderer):
# Apply table borders based on AI style
border_style = styles["table_border"]["style"]
if border_style == "horizontal_only":
- self._apply_horizontal_borders_only(table)
+ self._applyHorizontalBordersOnly(table)
elif border_style == "grid":
table.style = 'Table Grid'
# else: no borders
@@ -264,7 +264,7 @@ class RendererDocx(BaseRenderer):
# Apply background color
bg_color = header_style["background"].lstrip('#')
- self._set_cell_background(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
+ self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
# Apply text styling
for paragraph in cell.paragraphs:
@@ -296,7 +296,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
- def _apply_horizontal_borders_only(self, table) -> None:
+ def _applyHorizontalBordersOnly(self, table) -> None:
"""Apply only horizontal borders to the table (no vertical borders)."""
try:
from docx.oxml.shared import OxmlElement, qn
@@ -359,7 +359,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not apply horizontal borders: {str(e)}")
- def _set_cell_background(self, cell, color: RGBColor) -> None:
+ def _setCellBackground(self, cell, color: RGBColor) -> None:
"""Set the background color of a table cell."""
try:
from docx.oxml.shared import OxmlElement, qn
@@ -389,7 +389,7 @@ class RendererDocx(BaseRenderer):
self.logger.warning(f"Could not set cell background: {str(e)}")
- def _render_json_bullet_list(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+ def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON bullet list to DOCX using AI-generated styles."""
try:
items = list_data.get("items", [])
@@ -404,7 +404,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
- def _render_json_heading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+ def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON heading to DOCX using AI-generated styles."""
try:
level = heading_data.get("level", 1)
@@ -417,7 +417,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
- def _render_json_paragraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+ def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON paragraph to DOCX using AI-generated styles."""
try:
text = paragraph_data.get("text", "")
@@ -428,7 +428,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
- def _render_json_code_block(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+ def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON code block to DOCX using AI-generated styles."""
try:
code = code_data.get("code", "")
@@ -447,7 +447,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
- def _render_json_image(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+ def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON image to DOCX."""
try:
base64_data = image_data.get("base64Data", "")
@@ -465,7 +465,7 @@ class RendererDocx(BaseRenderer):
self.logger.warning(f"Error rendering image: {str(e)}")
doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]")
- def _extract_structure_from_prompt(self, user_prompt: str, title: str) -> Dict[str, Any]:
+ def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]:
"""Extract document structure from user prompt."""
structure = {
'title': title,
@@ -473,21 +473,21 @@ class RendererDocx(BaseRenderer):
'format': 'standard'
}
- if not user_prompt:
+ if not userPrompt:
return structure
# Extract title from prompt if not provided
if not title or title == "Generated Document":
# Look for "create a ... document" or "generate a ... report"
import re
- title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', user_prompt.lower())
+ title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
if title_match:
structure['title'] = title_match.group(1).strip().title()
# Extract sections from numbered lists in prompt
import re
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
- sections = re.findall(section_pattern, user_prompt)
+ sections = re.findall(section_pattern, userPrompt)
for num, section_text in sections:
structure['sections'].append({
@@ -498,7 +498,7 @@ class RendererDocx(BaseRenderer):
# If no numbered sections found, try to extract from "including:" patterns
if not structure['sections']:
- including_match = re.search(r'including:\s*(.+?)(?:\.|$)', user_prompt, re.DOTALL)
+ including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL)
if including_match:
including_text = including_match.group(1)
# Split by common separators
@@ -516,7 +516,7 @@ class RendererDocx(BaseRenderer):
if not structure['sections']:
# Look for bullet points or dashes
bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
- bullets = re.findall(bullet_pattern, user_prompt)
+ bullets = re.findall(bullet_pattern, userPrompt)
for i, bullet in enumerate(bullets, 1):
bullet = bullet.strip()
if bullet and len(bullet) > 3:
@@ -529,7 +529,7 @@ class RendererDocx(BaseRenderer):
# If still no sections, extract from sentence structure
if not structure['sections']:
# Split prompt into sentences and use as sections
- sentences = re.split(r'[.!?]\s+', user_prompt)
+ sentences = re.split(r'[.!?]\s+', userPrompt)
for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections
sentence = sentence.strip()
if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
@@ -545,7 +545,7 @@ class RendererDocx(BaseRenderer):
action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
found_actions = []
for action in action_words:
- if action in user_prompt.lower():
+ if action in userPrompt.lower():
found_actions.append(action.title())
if found_actions:
@@ -565,7 +565,7 @@ class RendererDocx(BaseRenderer):
return structure
- def _generate_from_structure(self, doc, content: str, structure: Dict[str, Any]):
+ def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]):
"""Generate DOCX content based on extracted structure."""
# Add sections based on prompt structure
for section in structure['sections']:
@@ -574,7 +574,7 @@ class RendererDocx(BaseRenderer):
# Add AI-generated content for this section
# Try to extract relevant content for this section from the AI response
- section_content = self._extract_section_content(content, section['title'])
+ section_content = self._extractSectionContent(content, section['title'])
if section_content:
doc.add_paragraph(section_content)
@@ -590,7 +590,7 @@ class RendererDocx(BaseRenderer):
doc.add_heading("Complete Analysis", level=1)
doc.add_paragraph(content)
- def _extract_section_content(self, content: str, section_title: str) -> str:
+ def _extractSectionContent(self, content: str, section_title: str) -> str:
"""Extract relevant content for a specific section from AI response."""
if not content or not section_title:
return ""
@@ -613,7 +613,7 @@ class RendererDocx(BaseRenderer):
return ""
- def _setup_document_styles(self, doc):
+ def _setupDocumentStyles(self, doc):
"""Set up document styles."""
try:
# Set default font
@@ -632,7 +632,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not set up document styles: {str(e)}")
- def _process_section(self, doc, lines: list):
+ def _processSection(self, doc, lines: list):
"""Process a section of content into DOCX elements."""
for line in lines:
if not line.strip():
@@ -641,9 +641,9 @@ class RendererDocx(BaseRenderer):
# Check for tables (lines with |)
if '|' in line and not line.startswith('|'):
# This might be part of a table, process as table
- table_data = self._extract_table_data(lines)
+ table_data = self._extractTableData(lines)
if table_data:
- self._add_table(doc, table_data)
+ self._addTable(doc, table_data)
return
# Check for lists
@@ -657,7 +657,7 @@ class RendererDocx(BaseRenderer):
# Regular paragraph
doc.add_paragraph(line)
- def _extract_table_data(self, lines: list) -> list:
+ def _extractTableData(self, lines: list) -> list:
"""Extract table data from lines."""
table_data = []
in_table = False
@@ -676,7 +676,7 @@ class RendererDocx(BaseRenderer):
return table_data if len(table_data) > 1 else []
- def _add_table(self, doc, table_data: list):
+ def _addTable(self, doc, table_data: list):
"""Add a table to the document."""
try:
if not table_data:
@@ -693,12 +693,12 @@ class RendererDocx(BaseRenderer):
table.rows[row_idx].cells[col_idx].text = cell_data
# Style the table
- self._style_table(table)
+ self._styleTable(table)
except Exception as e:
self.logger.warning(f"Could not add table: {str(e)}")
- def _style_table(self, table):
+ def _styleTable(self, table):
"""Apply styling to the table."""
try:
# Style header row
@@ -711,7 +711,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not style table: {str(e)}")
- def _process_table_row(self, doc, line: str):
+ def _processTableRow(self, doc, line: str):
"""Process a table row and add it to the document."""
if not line.strip():
return
@@ -745,7 +745,7 @@ class RendererDocx(BaseRenderer):
# Not a table row, treat as regular text
doc.add_paragraph(line)
- def _clean_ai_content(self, content: str) -> str:
+ def _cleanAiContent(self, content: str) -> str:
"""Clean AI-generated content by removing debug information and duplicates."""
if not content:
return ""
@@ -781,7 +781,7 @@ class RendererDocx(BaseRenderer):
return '\n\n'.join(unique_sections)
- def _process_tables(self, doc, content: str) -> str:
+ def _processTables(self, doc, content: str) -> str:
"""
Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
Returns the content with tables replaced by placeholders.
@@ -864,13 +864,13 @@ class RendererDocx(BaseRenderer):
return '\n'.join(processed_lines)
- def _parse_and_format_content(self, doc, content: str, title: str):
+ def _parseAndFormatContent(self, doc, content: str, title: str):
"""Parse AI-generated content in standardized format and apply proper DOCX formatting."""
if not content:
return
# Process tables and replace them with placeholders
- content = self._process_tables(doc, content)
+ content = self._processTables(doc, content)
# Parse content line by line in exact sequence
lines = content.split('\n')
@@ -920,9 +920,9 @@ class RendererDocx(BaseRenderer):
# Regular paragraph
else:
- self._add_paragraph_to_doc(doc, line)
+ self._addParagraphToDoc(doc, line)
- def _add_paragraph_to_doc(self, doc, text: str):
+ def _addParagraphToDoc(self, doc, text: str):
"""Add a paragraph to the document with proper formatting."""
if not text.strip():
return
diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py
index 1b202886..660a16c2 100644
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@@ -9,97 +9,97 @@ class RendererHtml(BaseRenderer):
"""Renders content to HTML format with format-specific extraction."""
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""Return supported HTML formats."""
return ['html', 'htm']
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['web', 'webpage']
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""Return priority for HTML renderer."""
return 100
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
try:
# Generate HTML using AI-analyzed styling
- html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service)
+ htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
- return html_content, "text/html"
+ return htmlContent, "text/html"
except Exception as e:
self.logger.error(f"Error rendering HTML: {str(e)}")
# Return minimal HTML fallback
return f"{title}{title}
Error rendering report: {str(e)}
", "text/html"
- async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+ async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate HTML content from structured JSON document using AI-generated styling."""
try:
# Get AI-generated styling definitions
- styles = await self._get_html_styles(user_prompt, ai_service)
+ styles = await self._getHtmlStyles(userPrompt, aiService)
# Validate JSON structure
- if not isinstance(json_content, dict):
+ if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
- if "sections" not in json_content:
+ if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
- document_title = json_content.get("metadata", {}).get("title", title)
+ documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Build HTML document
- html_parts = []
+ htmlParts = []
# HTML document structure
- html_parts.append('')
- html_parts.append('')
- html_parts.append('')
- html_parts.append('')
- html_parts.append('')
- html_parts.append(f'{document_title}')
- html_parts.append('')
- html_parts.append('')
- html_parts.append('')
+ htmlParts.append('')
+ htmlParts.append('')
+ htmlParts.append('')
+ htmlParts.append('')
+ htmlParts.append('')
+ htmlParts.append(f'{documentTitle}')
+ htmlParts.append('')
+ htmlParts.append('')
+ htmlParts.append('')
# Document header
- html_parts.append(f'')
+ htmlParts.append(f'')
# Main content
- html_parts.append('')
+ htmlParts.append('')
# Process each section
- sections = json_content.get("sections", [])
+ sections = jsonContent.get("sections", [])
for section in sections:
- section_html = self._render_json_section(section, styles)
- if section_html:
- html_parts.append(section_html)
+ sectionHtml = self._renderJsonSection(section, styles)
+ if sectionHtml:
+ htmlParts.append(sectionHtml)
- html_parts.append('')
+ htmlParts.append('')
# Footer
- html_parts.append('')
+ htmlParts.append('')
- html_parts.append('')
- html_parts.append('')
+ htmlParts.append('')
+ htmlParts.append('')
- return '\n'.join(html_parts)
+ return '\n'.join(htmlParts)
except Exception as e:
self.logger.error(f"Error generating HTML from JSON: {str(e)}")
raise Exception(f"HTML generation failed: {str(e)}")
- async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+ async def _getHtmlStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get HTML styling definitions using base template AI styling."""
- style_schema = {
+ styleSchema = {
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
"heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
"heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
@@ -113,40 +113,40 @@ class RendererHtml(BaseRenderer):
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
}
- style_template = self._create_ai_style_template("html", user_prompt, style_schema)
- styles = await self._get_ai_styles(ai_service, style_template, self._get_default_html_styles())
+ styleTemplate = self._createAiStyleTemplate("html", userPrompt, styleSchema)
+ styles = await self._getAiStyles(aiService, styleTemplate, self._getDefaultHtmlStyles())
# Validate and fix contrast issues
- return self._validate_html_styles_contrast(styles)
+ return self._validateHtmlStylesContrast(styles)
- def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _validateHtmlStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
- bg_color = header.get("background", "#FFFFFF")
- text_color = header.get("color", "#000000")
+ bgColor = header.get("background", "#FFFFFF")
+ textColor = header.get("color", "#000000")
# If both are white or both are dark, fix it
- if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+ if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["color"] = "#FFFFFF"
- elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+ elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
header["background"] = "#4F4F4F"
header["color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
- bg_color = cell.get("background", "#FFFFFF")
- text_color = cell.get("color", "#000000")
+ bgColor = cell.get("background", "#FFFFFF")
+ textColor = cell.get("color", "#000000")
# If both are white or both are dark, fix it
- if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+ if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["color"] = "#2F2F2F"
- elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+ elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["color"] = "#2F2F2F"
@@ -154,10 +154,10 @@ class RendererHtml(BaseRenderer):
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
- return self._get_default_html_styles()
+ return self._getDefaultHtmlStyles()
- def _get_default_html_styles(self) -> Dict[str, Any]:
+ def _getDefaultHtmlStyles(self) -> Dict[str, Any]:
"""Default HTML styles."""
return {
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
@@ -173,7 +173,7 @@ class RendererHtml(BaseRenderer):
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
}
- def _generate_css_styles(self, styles: Dict[str, Any]) -> str:
+ def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
"""Generate CSS from style definitions."""
css_parts = []
@@ -271,109 +271,109 @@ class RendererHtml(BaseRenderer):
return '\n'.join(css_parts)
- def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
+ def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a single JSON section to HTML using AI-generated styles."""
try:
- section_type = self._get_section_type(section)
- section_data = self._get_section_data(section)
+ sectionType = self._getSectionType(section)
+ sectionData = self._getSectionData(section)
- if section_type == "table":
+ if sectionType == "table":
# Process the section data to extract table structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_table(processed_data, styles)
- elif section_type == "bullet_list":
+ processedData = self._processSectionByType(section)
+ return self._renderJsonTable(processedData, styles)
+ elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_bullet_list(processed_data, styles)
- elif section_type == "heading":
- return self._render_json_heading(section_data, styles)
- elif section_type == "paragraph":
- return self._render_json_paragraph(section_data, styles)
- elif section_type == "code_block":
+ processedData = self._processSectionByType(section)
+ return self._renderJsonBulletList(processedData, styles)
+ elif sectionType == "heading":
+ return self._renderJsonHeading(sectionData, styles)
+ elif sectionType == "paragraph":
+ return self._renderJsonParagraph(sectionData, styles)
+ elif sectionType == "code_block":
# Process the section data to extract code block structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_code_block(processed_data, styles)
- elif section_type == "image":
+ processedData = self._processSectionByType(section)
+ return self._renderJsonCodeBlock(processedData, styles)
+ elif sectionType == "image":
# Process the section data to extract image structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_image(processed_data, styles)
+ processedData = self._processSectionByType(section)
+ return self._renderJsonImage(processedData, styles)
else:
# Fallback to paragraph for unknown types
- return self._render_json_paragraph(section_data, styles)
+ return self._renderJsonParagraph(sectionData, styles)
except Exception as e:
- self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
+ self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return f'[Error rendering section: {str(e)}]
'
- def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+ def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON table to HTML using AI-generated styles."""
try:
- headers = table_data.get("headers", [])
- rows = table_data.get("rows", [])
+ headers = tableData.get("headers", [])
+ rows = tableData.get("rows", [])
if not headers or not rows:
return ""
- html_parts = ['']
+ htmlParts = ['']
# Table header
- html_parts.append('')
+ htmlParts.append('')
for header in headers:
- html_parts.append(f'| {header} | ')
- html_parts.append('
')
+ htmlParts.append(f'{header} | ')
+ htmlParts.append('
')
# Table body
- html_parts.append('')
+ htmlParts.append('')
for row in rows:
- html_parts.append('')
- for cell_data in row:
- html_parts.append(f'| {cell_data} | ')
- html_parts.append('
')
- html_parts.append('')
+ htmlParts.append('')
+ for cellData in row:
+ htmlParts.append(f'| {cellData} | ')
+ htmlParts.append('
')
+ htmlParts.append('')
- html_parts.append('
')
- return '\n'.join(html_parts)
+ htmlParts.append('
')
+ return '\n'.join(htmlParts)
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return ""
- def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+ def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON bullet list to HTML using AI-generated styles."""
try:
- items = list_data.get("items", [])
+ items = listData.get("items", [])
if not items:
return ""
- html_parts = ['']
+ htmlParts = ['']
for item in items:
if isinstance(item, str):
- html_parts.append(f'- {item}
')
+ htmlParts.append(f'- {item}
')
elif isinstance(item, dict) and "text" in item:
- html_parts.append(f'- {item["text"]}
')
- html_parts.append('
')
+ htmlParts.append(f'- {item["text"]}
')
+ htmlParts.append('
')
- return '\n'.join(html_parts)
+ return '\n'.join(htmlParts)
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return ""
- def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+ def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON heading to HTML using AI-generated styles."""
try:
# Normalize non-dict inputs
- if isinstance(heading_data, str):
- heading_data = {"text": heading_data, "level": 2}
- elif isinstance(heading_data, list):
+ if isinstance(headingData, str):
+ headingData = {"text": headingData, "level": 2}
+ elif isinstance(headingData, list):
# Render a list as bullet list under a default heading label
- return self._render_json_bullet_list({"items": heading_data}, styles)
- elif not isinstance(heading_data, dict):
+ return self._renderJsonBulletList({"items": headingData}, styles)
+ elif not isinstance(headingData, dict):
return ""
- level = heading_data.get("level", 1)
- text = heading_data.get("text", "")
+ level = headingData.get("level", 1)
+ text = headingData.get("text", "")
if text:
level = max(1, min(6, level))
@@ -385,19 +385,19 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return ""
- def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+ def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON paragraph to HTML using AI-generated styles."""
try:
# Normalize non-dict inputs
- if isinstance(paragraph_data, str):
- paragraph_data = {"text": paragraph_data}
- elif isinstance(paragraph_data, list):
+ if isinstance(paragraphData, str):
+ paragraphData = {"text": paragraphData}
+ elif isinstance(paragraphData, list):
# Treat list as bullet list paragraph
- return self._render_json_bullet_list({"items": paragraph_data}, styles)
- elif not isinstance(paragraph_data, dict):
+ return self._renderJsonBulletList({"items": paragraphData}, styles)
+ elif not isinstance(paragraphData, dict):
return ""
- text = paragraph_data.get("text", "")
+ text = paragraphData.get("text", "")
if text:
return f'{text}
'
@@ -408,11 +408,11 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return ""
- def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+ def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON code block to HTML using AI-generated styles."""
try:
- code = code_data.get("code", "")
- language = code_data.get("language", "")
+ code = codeData.get("code", "")
+ language = codeData.get("language", "")
if code:
if language:
@@ -426,17 +426,17 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}")
return ""
- def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+ def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON image to HTML."""
try:
- base64_data = image_data.get("base64Data", "")
- alt_text = image_data.get("altText", "Image")
+ base64Data = imageData.get("base64Data", "")
+ altText = imageData.get("altText", "Image")
- if base64_data:
- return f'
'
+ if base64Data:
+ return f'
'
return ""
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
- return f'[Image: {image_data.get("altText", "Image")}]
'
+ return f'[Image: {imageData.get("altText", "Image")}]
'
diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py
index 4db495dc..bfc89927 100644
--- a/modules/services/serviceGeneration/renderers/rendererImage.py
+++ b/modules/services/serviceGeneration/renderers/rendererImage.py
@@ -12,154 +12,156 @@ class RendererImage(BaseRenderer):
"""Renders content to image format using AI image generation."""
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""Return supported image formats."""
return ['png', 'jpg', 'jpeg', 'image']
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['img', 'picture', 'photo', 'graphic']
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""Return priority for image renderer."""
return 90
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to image format using AI image generation."""
try:
# Generate AI image from content
- image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service)
+ imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
- return image_content, "image/png"
+ return imageContent, "image/png"
except Exception as e:
self.logger.error(f"Error rendering image: {str(e)}")
# Re-raise the exception instead of using fallback
raise Exception(f"Image rendering failed: {str(e)}")
- async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+ async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate AI image from extracted content."""
try:
- if not ai_service:
+ if not aiService:
raise ValueError("AI service is required for image generation")
# Validate JSON structure
- if not isinstance(extracted_content, dict):
+ if not isinstance(extractedContent, dict):
raise ValueError("Extracted content must be a dictionary")
- if "sections" not in extracted_content:
+ if "sections" not in extractedContent:
raise ValueError("Extracted content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
- document_title = extracted_content.get("metadata", {}).get("title", title)
+ documentTitle = extractedContent.get("metadata", {}).get("title", title)
# Create AI prompt for image generation
- image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service)
+ imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)
# Save image generation prompt to debug
- ai_service.services.utils.writeDebugFile(image_prompt, "image_generation_prompt")
+ aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt")
# Generate image using AI
- image_result = await ai_service.aiObjects.generateImage(
- prompt=image_prompt,
+ imageResult = await aiService.aiObjects.generateImage(
+ prompt=imagePrompt,
size="1024x1024",
quality="standard",
style="vivid"
)
# Save image generation response to debug
- ai_service.services.utils.writeDebugFile(str(image_result), "image_generation_response")
+ aiService.services.utils.writeDebugFile(str(imageResult), "image_generation_response")
# Extract base64 image data from result
- if image_result and image_result.get("success", False):
- image_data = image_result.get("image_data", "")
- if image_data:
- return image_data
+ if imageResult and imageResult.get("success", False):
+ imageData = imageResult.get("image_data", "")
+ if imageData:
+ return imageData
else:
raise ValueError("No image data returned from AI")
else:
- error_msg = image_result.get("error", "Unknown error") if image_result else "No result"
- raise ValueError(f"AI image generation failed: {error_msg}")
+ errorMsg = imageResult.get("error", "Unknown error") if imageResult else "No result"
+ raise ValueError(f"AI image generation failed: {errorMsg}")
except Exception as e:
self.logger.error(f"Error generating AI image: {str(e)}")
raise Exception(f"AI image generation failed: {str(e)}")
- async def _create_imageGenerate_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+ async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Create a detailed prompt for AI image generation based on the content."""
try:
# Start with base prompt
- prompt_parts = []
+ promptParts = []
# Add user's original intent if available
- if user_prompt:
- prompt_parts.append(f"User Request: {ai_service.sanitizePromptContent(user_prompt, 'userinput')}")
+ if userPrompt:
+ sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
+ promptParts.append(f"User Request: {sanitized_prompt}")
# Add document title
- prompt_parts.append(f"Document Title: {title}")
+ promptParts.append(f"Document Title: {title}")
# Analyze content and create visual description
- sections = extracted_content.get("sections", [])
- content_description = self._analyze_content_for_visual_description(sections)
+ sections = extractedContent.get("sections", [])
+ contentDescription = self._analyzeContentForVisualDescription(sections)
- if content_description:
- prompt_parts.append(f"Content to Visualize: {content_description}")
+ if contentDescription:
+ promptParts.append(f"Content to Visualize: {contentDescription}")
# Add style guidance
- style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt)
- if style_guidance:
- prompt_parts.append(f"Visual Style: {style_guidance}")
+ styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt)
+ if styleGuidance:
+ promptParts.append(f"Visual Style: {styleGuidance}")
# Combine all parts
- full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts)
+ fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts)
# Add technical requirements
- full_prompt += "\n\nTechnical Requirements:"
- full_prompt += "\n- High quality, professional appearance"
- full_prompt += "\n- Clear, readable text if any text is included"
- full_prompt += "\n- Appropriate colors and layout"
- full_prompt += "\n- Suitable for business/professional use"
+ fullPrompt += "\n\nTechnical Requirements:"
+ fullPrompt += "\n- High quality, professional appearance"
+ fullPrompt += "\n- Clear, readable text if any text is included"
+ fullPrompt += "\n- Appropriate colors and layout"
+ fullPrompt += "\n- Suitable for business/professional use"
# Truncate prompt if it exceeds DALL-E's 4000 character limit
- if len(full_prompt) > 4000:
+ if len(fullPrompt) > 4000:
# Use AI to compress the prompt intelligently
- compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service)
- if compressed_prompt and len(compressed_prompt) <= 4000:
- return compressed_prompt
+ compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService)
+ if compressedPrompt and len(compressedPrompt) <= 4000:
+ return compressedPrompt
# Fallback to minimal prompt if AI compression fails or is still too long
- minimal_prompt = f"Create a professional image representing: {title}"
- if user_prompt:
- minimal_prompt += f" - {ai_service.sanitizePromptContent(user_prompt, 'userinput')}"
+ minimalPrompt = f"Create a professional image representing: {title}"
+ if userPrompt:
+ sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
+ minimalPrompt += f" - {sanitized_prompt}"
# If even the minimal prompt is too long, truncate it
- if len(minimal_prompt) > 4000:
- minimal_prompt = minimal_prompt[:3997] + "..."
+ if len(minimalPrompt) > 4000:
+ minimalPrompt = minimalPrompt[:3997] + "..."
- return minimal_prompt
+ return minimalPrompt
- return full_prompt
+ return fullPrompt
except Exception as e:
self.logger.warning(f"Error creating image prompt: {str(e)}")
# Fallback to simple prompt
return f"Create a professional image representing: {title}"
- async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str:
+ async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str:
"""Use AI to intelligently compress a long prompt while preserving key information."""
try:
- if not ai_service:
+ if not aiService:
return None
- compression_prompt = f"""
+ compressionPrompt = f"""
You are an expert at creating concise, effective prompts for AI image generation.
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
-Original prompt ({len(long_prompt)} characters):
-{long_prompt}
+Original prompt ({len(longPrompt)} characters):
+{longPrompt}
Please create a compressed version that:
1. Keeps the most important visual elements and requirements
@@ -176,7 +178,7 @@ Return only the compressed prompt, no explanations.
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
request = AiCallRequest(
- prompt=compression_prompt,
+ prompt=compressionPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
maxTokens=None, # Let the model use its full context length
@@ -184,12 +186,12 @@ Return only the compressed prompt, no explanations.
)
)
- response = await ai_service.aiObjects.call(request)
+ response = await aiService.aiObjects.call(request)
compressed = response.content.strip()
# Validate the compressed prompt
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
- self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters")
+ self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters")
return compressed
else:
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
@@ -199,42 +201,42 @@ Return only the compressed prompt, no explanations.
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
return None
- def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str:
+ def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str:
"""Analyze content sections and create a visual description for AI."""
try:
descriptions = []
for section in sections:
- section_type = self._get_section_type(section)
- section_data = self._get_section_data(section)
+ sectionType = self._getSectionType(section)
+ sectionData = self._getSectionData(section)
- if section_type == "table":
- headers = section_data.get("headers", [])
- rows = section_data.get("rows", [])
+ if sectionType == "table":
+ headers = sectionData.get("headers", [])
+ rows = sectionData.get("rows", [])
if headers and rows:
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
- elif section_type == "bullet_list":
- items = section_data.get("items", [])
+ elif sectionType == "bullet_list":
+ items = sectionData.get("items", [])
if items:
descriptions.append(f"List with {len(items)} items")
- elif section_type == "heading":
- text = section_data.get("text", "")
- level = section_data.get("level", 1)
+ elif sectionType == "heading":
+ text = sectionData.get("text", "")
+ level = sectionData.get("level", 1)
if text:
descriptions.append(f"Heading {level}: {text}")
- elif section_type == "paragraph":
- text = section_data.get("text", "")
+ elif sectionType == "paragraph":
+ text = sectionData.get("text", "")
if text and len(text) > 10: # Only include substantial paragraphs
# Truncate long text
truncated = text[:100] + "..." if len(text) > 100 else text
descriptions.append(f"Text content: {truncated}")
- elif section_type == "code_block":
- code = section_data.get("code", "")
- language = section_data.get("language", "")
+ elif sectionType == "code_block":
+ code = sectionData.get("code", "")
+ language = sectionData.get("language", "")
if code:
descriptions.append(f"Code block ({language}): {code[:50]}...")
@@ -244,42 +246,42 @@ Return only the compressed prompt, no explanations.
self.logger.warning(f"Error analyzing content: {str(e)}")
return "Document content"
- def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str:
+ def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str:
"""Determine visual style guidance based on content and user prompt."""
try:
- style_elements = []
+ styleElements = []
# Analyze user prompt for style hints
- if user_prompt:
- prompt_lower = user_prompt.lower()
+ if userPrompt:
+ promptLower = userPrompt.lower()
- if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]):
- style_elements.append("modern, clean design")
- elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]):
- style_elements.append("classic, formal design")
- elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]):
- style_elements.append("creative, artistic design")
- elif any(word in prompt_lower for word in ["corporate", "business", "professional"]):
- style_elements.append("corporate, professional design")
+ if any(word in promptLower for word in ["modern", "contemporary", "sleek"]):
+ styleElements.append("modern, clean design")
+ elif any(word in promptLower for word in ["classic", "traditional", "formal"]):
+ styleElements.append("classic, formal design")
+ elif any(word in promptLower for word in ["creative", "artistic", "colorful"]):
+ styleElements.append("creative, artistic design")
+ elif any(word in promptLower for word in ["corporate", "business", "professional"]):
+ styleElements.append("corporate, professional design")
# Analyze content type for additional style hints
- sections = extracted_content.get("sections", [])
- has_tables = any(self._get_section_type(s) == "table" for s in sections)
- has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections)
- has_code = any(self._get_section_type(s) == "code_block" for s in sections)
+ sections = extractedContent.get("sections", [])
+ hasTables = any(self._getSectionType(s) == "table" for s in sections)
+ hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
+ hasCode = any(self._getSectionType(s) == "code_block" for s in sections)
- if has_tables:
- style_elements.append("data-focused layout")
- if has_lists:
- style_elements.append("organized, structured presentation")
- if has_code:
- style_elements.append("technical, developer-friendly")
+ if hasTables:
+ styleElements.append("data-focused layout")
+ if hasLists:
+ styleElements.append("organized, structured presentation")
+ if hasCode:
+ styleElements.append("technical, developer-friendly")
# Default style if no specific guidance
- if not style_elements:
- style_elements.append("professional, clean design")
+ if not styleElements:
+ styleElements.append("professional, clean design")
- return ", ".join(style_elements)
+ return ", ".join(styleElements)
except Exception as e:
self.logger.warning(f"Error determining style guidance: {str(e)}")
diff --git a/modules/services/serviceGeneration/renderers/rendererJson.py b/modules/services/serviceGeneration/renderers/rendererJson.py
index 2ff07ad6..d688da85 100644
--- a/modules/services/serviceGeneration/renderers/rendererJson.py
+++ b/modules/services/serviceGeneration/renderers/rendererJson.py
@@ -10,40 +10,40 @@ class RendererJson(BaseRenderer):
"""Renders content to JSON format with format-specific extraction."""
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""Return supported JSON formats."""
return ['json']
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['data']
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""Return priority for JSON renderer."""
return 80
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to JSON format."""
try:
# The extracted content should already be JSON from the AI
# Just validate and format it
- json_content = self._clean_json_content(extracted_content, title)
+ jsonContent = self._cleanJsonContent(extractedContent, title)
- return json_content, "application/json"
+ return jsonContent, "application/json"
except Exception as e:
self.logger.error(f"Error rendering JSON: {str(e)}")
# Return minimal JSON fallback
- fallback_data = {
+ fallbackData = {
"title": title,
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
"metadata": {"error": str(e)}
}
- return json.dumps(fallback_data, indent=2), "application/json"
+ return json.dumps(fallbackData, indent=2), "application/json"
- def _clean_json_content(self, content: Dict[str, Any], title: str) -> str:
+ def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
"""Clean and validate JSON content from AI."""
try:
# Validate JSON structure
@@ -72,8 +72,8 @@ class RendererJson(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error cleaning JSON content: {str(e)}")
# Return minimal valid JSON
- fallback_data = {
+ fallbackData = {
"sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
"metadata": {"title": title, "error": str(e)}
}
- return json.dumps(fallback_data, indent=2, ensure_ascii=False)
+ return json.dumps(fallbackData, indent=2, ensure_ascii=False)
diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
index 59806d4c..5a91c667 100644
--- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py
+++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
@@ -9,161 +9,161 @@ class RendererMarkdown(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction."""
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""Return supported Markdown formats."""
return ['md', 'markdown']
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['mdown', 'mkd']
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""Return priority for markdown renderer."""
return 95
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to Markdown format."""
try:
# Generate markdown from JSON structure
- markdown_content = self._generate_markdown_from_json(extracted_content, title)
+ markdownContent = self._generateMarkdownFromJson(extractedContent, title)
- return markdown_content, "text/markdown"
+ return markdownContent, "text/markdown"
except Exception as e:
self.logger.error(f"Error rendering markdown: {str(e)}")
# Return minimal markdown fallback
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
- def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str:
+ def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate markdown content from structured JSON document."""
try:
# Validate JSON structure
- if not isinstance(json_content, dict):
+ if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
- if "sections" not in json_content:
+ if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
- document_title = json_content.get("metadata", {}).get("title", title)
+ documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Build markdown content
- markdown_parts = []
+ markdownParts = []
# Document title
- markdown_parts.append(f"# {document_title}")
- markdown_parts.append("")
+ markdownParts.append(f"# {documentTitle}")
+ markdownParts.append("")
# Process each section
- sections = json_content.get("sections", [])
+ sections = jsonContent.get("sections", [])
for section in sections:
- section_markdown = self._render_json_section(section)
- if section_markdown:
- markdown_parts.append(section_markdown)
- markdown_parts.append("") # Add spacing between sections
+ sectionMarkdown = self._renderJsonSection(section)
+ if sectionMarkdown:
+ markdownParts.append(sectionMarkdown)
+ markdownParts.append("") # Add spacing between sections
# Add generation info
- markdown_parts.append("---")
- markdown_parts.append(f"*Generated: {self._format_timestamp()}*")
+ markdownParts.append("---")
+ markdownParts.append(f"*Generated: {self._formatTimestamp()}*")
- return '\n'.join(markdown_parts)
+ return '\n'.join(markdownParts)
except Exception as e:
self.logger.error(f"Error generating markdown from JSON: {str(e)}")
raise Exception(f"Markdown generation failed: {str(e)}")
- def _render_json_section(self, section: Dict[str, Any]) -> str:
+ def _renderJsonSection(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to markdown."""
try:
- section_type = self._get_section_type(section)
- section_data = self._get_section_data(section)
+ sectionType = self._getSectionType(section)
+ sectionData = self._getSectionData(section)
- if section_type == "table":
+ if sectionType == "table":
# Process the section data to extract table structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_table(processed_data)
- elif section_type == "bullet_list":
+ processedData = self._processSectionByType(section)
+ return self._renderJsonTable(processedData)
+ elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_bullet_list(processed_data)
- elif section_type == "heading":
- return self._render_json_heading(section_data)
- elif section_type == "paragraph":
- return self._render_json_paragraph(section_data)
- elif section_type == "code_block":
+ processedData = self._processSectionByType(section)
+ return self._renderJsonBulletList(processedData)
+ elif sectionType == "heading":
+ return self._renderJsonHeading(sectionData)
+ elif sectionType == "paragraph":
+ return self._renderJsonParagraph(sectionData)
+ elif sectionType == "code_block":
# Process the section data to extract code block structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_code_block(processed_data)
- elif section_type == "image":
+ processedData = self._processSectionByType(section)
+ return self._renderJsonCodeBlock(processedData)
+ elif sectionType == "image":
# Process the section data to extract image structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_image(processed_data)
+ processedData = self._processSectionByType(section)
+ return self._renderJsonImage(processedData)
else:
# Fallback to paragraph for unknown types
- return self._render_json_paragraph(section_data)
+ return self._renderJsonParagraph(sectionData)
except Exception as e:
- self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
+ self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return f"*[Error rendering section: {str(e)}]*"
- def _render_json_table(self, table_data: Dict[str, Any]) -> str:
+ def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
"""Render a JSON table to markdown."""
try:
- headers = table_data.get("headers", [])
- rows = table_data.get("rows", [])
+ headers = tableData.get("headers", [])
+ rows = tableData.get("rows", [])
if not headers or not rows:
return ""
- markdown_parts = []
+ markdownParts = []
# Create table header
- header_line = " | ".join(str(header) for header in headers)
- markdown_parts.append(header_line)
+ headerLine = " | ".join(str(header) for header in headers)
+ markdownParts.append(headerLine)
# Add separator line
- separator_line = " | ".join("---" for _ in headers)
- markdown_parts.append(separator_line)
+ separatorLine = " | ".join("---" for _ in headers)
+ markdownParts.append(separatorLine)
# Add data rows
for row in rows:
- row_line = " | ".join(str(cell_data) for cell_data in row)
- markdown_parts.append(row_line)
+ rowLine = " | ".join(str(cellData) for cellData in row)
+ markdownParts.append(rowLine)
- return '\n'.join(markdown_parts)
+ return '\n'.join(markdownParts)
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return ""
- def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
+ def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
"""Render a JSON bullet list to markdown."""
try:
- items = list_data.get("items", [])
+ items = listData.get("items", [])
if not items:
return ""
- markdown_parts = []
+ markdownParts = []
for item in items:
if isinstance(item, str):
- markdown_parts.append(f"- {item}")
+ markdownParts.append(f"- {item}")
elif isinstance(item, dict) and "text" in item:
- markdown_parts.append(f"- {item['text']}")
+ markdownParts.append(f"- {item['text']}")
- return '\n'.join(markdown_parts)
+ return '\n'.join(markdownParts)
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return ""
- def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
+ def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
"""Render a JSON heading to markdown."""
try:
- level = heading_data.get("level", 1)
- text = heading_data.get("text", "")
+ level = headingData.get("level", 1)
+ text = headingData.get("text", "")
if text:
level = max(1, min(6, level))
@@ -175,21 +175,21 @@ class RendererMarkdown(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return ""
- def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
+ def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
"""Render a JSON paragraph to markdown."""
try:
- text = paragraph_data.get("text", "")
+ text = paragraphData.get("text", "")
return text if text else ""
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return ""
- def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
+ def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
"""Render a JSON code block to markdown."""
try:
- code = code_data.get("code", "")
- language = code_data.get("language", "")
+ code = codeData.get("code", "")
+ language = codeData.get("language", "")
if code:
if language:
@@ -203,19 +203,19 @@ class RendererMarkdown(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}")
return ""
- def _render_json_image(self, image_data: Dict[str, Any]) -> str:
+ def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
"""Render a JSON image to markdown."""
try:
- alt_text = image_data.get("altText", "Image")
- base64_data = image_data.get("base64Data", "")
+ altText = imageData.get("altText", "Image")
+ base64Data = imageData.get("base64Data", "")
- if base64_data:
+ if base64Data:
# For base64 images, we can't embed them directly in markdown
# So we'll use a placeholder with the alt text
- return f""
+ return f""
else:
- return f""
+ return f""
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
- return f""
+ return f""
diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py
index b80e6197..f2b15e46 100644
--- a/modules/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/services/serviceGeneration/renderers/rendererPdf.py
@@ -22,32 +22,32 @@ class RendererPdf(BaseRenderer):
"""Renders content to PDF format using reportlab."""
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""Return supported PDF formats."""
return ['pdf']
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['document', 'print']
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""Return priority for PDF renderer."""
return 120
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml
html_renderer = RendererHtml()
- html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service)
+ html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService)
return html_content, "text/html"
# Generate PDF using AI-analyzed styling
- pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service)
+ pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
return pdf_content, "application/pdf"
@@ -56,11 +56,11 @@ class RendererPdf(BaseRenderer):
# Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain"
- async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+ async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling."""
try:
# Get AI-generated styling definitions
- styles = await self._get_pdf_styles(user_prompt, ai_service)
+ styles = await self._getPdfStyles(userPrompt, aiService)
# Validate JSON structure
if not isinstance(json_content, dict):
@@ -93,10 +93,10 @@ class RendererPdf(BaseRenderer):
story = []
# Title page
- title_style = self._create_title_style(styles)
+ title_style = self._createTitleStyle(styles)
story.append(Paragraph(document_title, title_style))
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
- story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles)))
+ story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._createNormalStyle(styles)))
story.append(Spacer(1, 30)) # Add spacing before page break
story.append(PageBreak())
@@ -105,7 +105,7 @@ class RendererPdf(BaseRenderer):
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
for i, section in enumerate(sections):
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
- section_elements = self._render_json_section(section, styles)
+ section_elements = self._renderJsonSection(section, styles)
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
story.extend(section_elements)
@@ -123,7 +123,7 @@ class RendererPdf(BaseRenderer):
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
raise Exception(f"PDF generation failed: {str(e)}")
- async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+ async def _getPdfStyles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Get PDF styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
@@ -136,21 +136,21 @@ class RendererPdf(BaseRenderer):
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
}
- style_template = self._create_ai_style_template("pdf", user_prompt, style_schema)
+ style_template = self._createAiStyleTemplate("pdf", user_prompt, style_schema)
# Use base template method like DOCX does (this works!)
- styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles())
+ styles = await self._getAiStyles(ai_service, style_template, self._getDefaultPdfStyles())
if styles is None:
- return self._get_default_pdf_styles()
+ return self._getDefaultPdfStyles()
# Convert colors to PDF format after getting styles
- styles = self._convert_colors_format(styles)
+ styles = self._convertColorsFormat(styles)
# Validate and fix contrast issues
- return self._validate_pdf_styles_contrast(styles)
+ return self._validatePdfStylesContrast(styles)
- async def _get_ai_styles_with_pdf_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+ async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper PDF color conversion."""
if not ai_service:
return default_styles
@@ -279,7 +279,7 @@ class RendererPdf(BaseRenderer):
return default_styles
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
- styles = self._convert_colors_format(styles)
+ styles = self._convertColorsFormat(styles)
return styles
@@ -287,7 +287,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles
- def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert colors to proper format for PDF compatibility."""
try:
for style_name, style_config in styles.items():
@@ -304,7 +304,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Color conversion failed: {str(e)}")
return styles
- def _get_safe_color(self, color_value: str, default: str = "#000000") -> str:
+ def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
"""Get a safe hex color value for PDF."""
if isinstance(color_value, str) and color_value.startswith('#'):
if len(color_value) == 7:
@@ -313,7 +313,7 @@ class RendererPdf(BaseRenderer):
return color_value
return default
- def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _validatePdfStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
@@ -348,9 +348,9 @@ class RendererPdf(BaseRenderer):
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
- return self._get_default_pdf_styles()
+ return self._getDefaultPdfStyles()
- def _get_default_pdf_styles(self) -> Dict[str, Any]:
+ def _getDefaultPdfStyles(self) -> Dict[str, Any]:
"""Default PDF styles."""
return {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
@@ -363,27 +363,27 @@ class RendererPdf(BaseRenderer):
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
}
- def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
+ def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create title style from style definitions."""
title_style_def = styles.get("title", {})
# DEBUG: Show what color and spacing is being used for title
title_color = title_style_def.get("color", "#1F4E79")
title_space_after = title_style_def.get("space_after", 30)
- self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hex_to_color(title_color)}", "PDF_RENDERER")
+ self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER")
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
return ParagraphStyle(
'CustomTitle',
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
spaceAfter=title_style_def.get("space_after", 30),
- alignment=self._get_alignment(title_style_def.get("align", "center")),
- textColor=self._hex_to_color(title_color),
+ alignment=self._getAlignment(title_style_def.get("align", "center")),
+ textColor=self._hexToColor(title_color),
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
spaceBefore=0 # Ensure no space before title
)
- def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
+ def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
"""Create heading style from style definitions."""
heading_key = f"heading{level}"
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
@@ -393,11 +393,11 @@ class RendererPdf(BaseRenderer):
fontSize=heading_style_def.get("font_size", 18 - level * 2),
spaceAfter=heading_style_def.get("space_after", 12),
spaceBefore=heading_style_def.get("space_before", 12),
- alignment=self._get_alignment(heading_style_def.get("align", "left")),
- textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F"))
+ alignment=self._getAlignment(heading_style_def.get("align", "left")),
+ textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F"))
)
- def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
+ def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create normal paragraph style from style definitions."""
paragraph_style_def = styles.get("paragraph", {})
@@ -405,12 +405,12 @@ class RendererPdf(BaseRenderer):
'CustomNormal',
fontSize=paragraph_style_def.get("font_size", 11),
spaceAfter=paragraph_style_def.get("space_after", 6),
- alignment=self._get_alignment(paragraph_style_def.get("align", "left")),
- textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")),
+ alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
+ textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
)
- def _get_alignment(self, align: str) -> int:
+ def _getAlignment(self, align: str) -> int:
"""Convert alignment string to reportlab alignment constant."""
if not align or not isinstance(align, str):
return TA_LEFT
@@ -426,7 +426,7 @@ class RendererPdf(BaseRenderer):
}
return align_map.get(align.lower().strip(), TA_LEFT)
- def _get_table_alignment(self, align: str) -> str:
+ def _getTableAlignment(self, align: str) -> str:
"""Convert alignment string to ReportLab table alignment string."""
if not align or not isinstance(align, str):
return 'LEFT'
@@ -442,7 +442,7 @@ class RendererPdf(BaseRenderer):
}
return align_map.get(align.lower().strip(), 'LEFT')
- def _hex_to_color(self, hex_color: str) -> colors.Color:
+ def _hexToColor(self, hex_color: str) -> colors.Color:
"""Convert hex color to reportlab color."""
try:
hex_color = hex_color.lstrip('#')
@@ -464,38 +464,38 @@ class RendererPdf(BaseRenderer):
except:
return colors.black
- def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+ def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a single JSON section to PDF elements using AI-generated styles."""
try:
- section_type = self._get_section_type(section)
- elements = self._get_section_data(section)
+ section_type = self._getSectionType(section)
+ elements = self._getSectionData(section)
# Process each element in the section
all_elements = []
for element in elements:
if section_type == "table":
- all_elements.extend(self._render_json_table(element, styles))
+ all_elements.extend(self._renderJsonTable(element, styles))
elif section_type == "bullet_list":
- all_elements.extend(self._render_json_bullet_list(element, styles))
+ all_elements.extend(self._renderJsonBulletList(element, styles))
elif section_type == "heading":
- all_elements.extend(self._render_json_heading(element, styles))
+ all_elements.extend(self._renderJsonHeading(element, styles))
elif section_type == "paragraph":
- all_elements.extend(self._render_json_paragraph(element, styles))
+ all_elements.extend(self._renderJsonParagraph(element, styles))
elif section_type == "code_block":
- all_elements.extend(self._render_json_code_block(element, styles))
+ all_elements.extend(self._renderJsonCodeBlock(element, styles))
elif section_type == "image":
- all_elements.extend(self._render_json_image(element, styles))
+ all_elements.extend(self._renderJsonImage(element, styles))
else:
# Fallback to paragraph for unknown types
- all_elements.extend(self._render_json_paragraph(element, styles))
+ all_elements.extend(self._renderJsonParagraph(element, styles))
return all_elements
except Exception as e:
- self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
+ self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
- def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+ def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON table to PDF elements using AI-generated styles."""
try:
headers = table_data.get("headers", [])
@@ -517,7 +517,7 @@ class RendererPdf(BaseRenderer):
table_style = [
('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
- ('ALIGN', (0, 0), (-1, -1), self._get_table_alignment(table_cell_style.get("align", "left"))),
+ ('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
@@ -534,7 +534,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering table: {str(e)}")
return []
- def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+ def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
try:
items = list_data.get("items", [])
@@ -556,7 +556,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return []
- def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+ def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON heading to PDF elements using AI-generated styles."""
try:
level = heading_data.get("level", 1)
@@ -564,7 +564,7 @@ class RendererPdf(BaseRenderer):
if text:
level = max(1, min(6, level))
- heading_style = self._create_heading_style(styles, level)
+ heading_style = self._createHeadingStyle(styles, level)
return [Paragraph(text, heading_style)]
return []
@@ -573,13 +573,13 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return []
- def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+ def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
try:
text = paragraph_data.get("text", "")
if text:
- return [Paragraph(text, self._create_normal_style(styles))]
+ return [Paragraph(text, self._createNormalStyle(styles))]
return []
@@ -587,7 +587,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return []
- def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+ def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON code block to PDF elements using AI-generated styles."""
try:
code = code_data.get("code", "")
@@ -601,7 +601,7 @@ class RendererPdf(BaseRenderer):
lang_style = ParagraphStyle(
'CodeLanguage',
fontSize=code_style_def.get("font_size", 9),
- textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
+ textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName='Helvetica-Bold'
)
elements.append(Paragraph(f"Code ({language}):", lang_style))
@@ -609,9 +609,9 @@ class RendererPdf(BaseRenderer):
code_style = ParagraphStyle(
'CodeBlock',
fontSize=code_style_def.get("font_size", 9),
- textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
+ textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName=code_style_def.get("font", "Courier"),
- backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")),
+ backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")),
spaceAfter=code_style_def.get("space_after", 6)
)
elements.append(Paragraph(code, code_style))
@@ -624,7 +624,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}")
return []
- def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+ def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON image to PDF elements."""
try:
base64_data = image_data.get("base64Data", "")
@@ -632,10 +632,10 @@ class RendererPdf(BaseRenderer):
if base64_data:
# For now, just add a placeholder since reportlab image handling is complex
- return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))]
+ return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
return []
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
- return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))]
\ No newline at end of file
+ return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._createNormalStyle(styles))]
\ No newline at end of file
diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py
index 5c6de723..701030d7 100644
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@@ -12,23 +12,23 @@ class RendererPptx(BaseRenderer):
def __init__(self):
super().__init__()
- self.supported_formats = ["pptx", "ppt"]
- self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+ self.supportedFormats = ["pptx", "ppt"]
+ self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
@classmethod
- def get_supported_formats(cls) -> list:
+ def getSupportedFormats(cls) -> list:
"""Get list of supported output formats."""
return ["pptx", "ppt"]
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""
Render content as PowerPoint presentation from JSON data.
Args:
- extracted_content: JSON content to render as presentation
+ extractedContent: JSON content to render as presentation
title: Title for the presentation
- user_prompt: User prompt for AI styling
- ai_service: AI service for styling
+ userPrompt: User prompt for AI styling
+ aiService: AI service for styling
**kwargs: Additional rendering options
Returns:
@@ -43,7 +43,7 @@ class RendererPptx(BaseRenderer):
import re
# Get AI-generated styling definitions first
- styles = await self._get_pptx_styles(user_prompt, ai_service)
+ styles = await self._getPptxStyles(userPrompt, aiService)
# Create new presentation
prs = Presentation()
@@ -58,13 +58,13 @@ class RendererPptx(BaseRenderer):
prs.slide_height = Inches(7.5)
# Generate slides from JSON content
- slides_data = await self._parse_json_to_slides(extracted_content, title, styles)
- logger.info(f"Parsed {len(slides_data)} slides from JSON content")
+ slidesData = await self._parseJsonToSlides(extractedContent, title, styles)
+ logger.info(f"Parsed {len(slidesData)} slides from JSON content")
# Debug: Show first 200 chars of content
- logger.info(f"JSON content preview: {str(extracted_content)[:200]}...")
+ logger.info(f"JSON content preview: {str(extractedContent)[:200]}...")
- for i, slide_data in enumerate(slides_data):
+ for i, slide_data in enumerate(slidesData):
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
# Debug: Show slide content preview
slide_content = slide_data.get('content', '')
@@ -74,8 +74,8 @@ class RendererPptx(BaseRenderer):
logger.warning(f" ⚠️ Slide {i+1} has NO content!")
# Create slide with appropriate layout based on content
- slide_layout_index = self._get_slide_layout_index(slide_data, styles)
- slide_layout = prs.slide_layouts[slide_layout_index]
+ slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles)
+ slide_layout = prs.slide_layouts[slideLayoutIndex]
slide = prs.slides.add_slide(slide_layout)
# Set title with AI-generated styling
@@ -153,7 +153,7 @@ class RendererPptx(BaseRenderer):
p.alignment = PP_ALIGN.LEFT
# If no slides were created, create a default slide
- if not slides_data:
+ if not slidesData:
slide_layout = prs.slide_layouts[0] # Title slide layout
slide = prs.slides.add_slide(slide_layout)
@@ -198,7 +198,7 @@ class RendererPptx(BaseRenderer):
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
- def _parse_content_to_slides(self, content: str, title: str) -> list:
+ def _parseContentToSlides(self, content: str, title: str) -> list:
"""
Parse content into slide data structure.
@@ -212,7 +212,7 @@ class RendererPptx(BaseRenderer):
slides = []
# Split content by slide markers or headers
- slide_sections = self._split_content_into_slides(content)
+ slide_sections = self._splitContentIntoSlides(content)
for i, section in enumerate(slide_sections):
if section.strip():
@@ -239,7 +239,7 @@ class RendererPptx(BaseRenderer):
return slides
- def _split_content_into_slides(self, content: str) -> list:
+ def _splitContentIntoSlides(self, content: str) -> list:
"""
Split content into individual slides based on headers and structure.
@@ -299,11 +299,11 @@ class RendererPptx(BaseRenderer):
return [content.strip()]
- def get_output_mime_type(self) -> str:
+ def getOutputMimeType(self) -> str:
"""Get MIME type for rendered output."""
- return self.output_mime_type
+ return self.outputMimeType
- async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+ async def _getPptxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get PowerPoint styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"},
@@ -323,21 +323,21 @@ class RendererPptx(BaseRenderer):
"executive_ready": True
}
- style_template = self._create_professional_pptx_template(user_prompt, style_schema)
- # Use our own _get_ai_styles_with_pptx_colors method to ensure proper color conversion
- styles = await self._get_ai_styles_with_pptx_colors(ai_service, style_template, self._get_default_pptx_styles())
+ style_template = self._createProfessionalPptxTemplate(userPrompt, style_schema)
+ # Use our own _getAiStylesWithPptxColors method to ensure proper color conversion
+ styles = await self._getAiStylesWithPptxColors(aiService, style_template, self._getDefaultPptxStyles())
# Validate PowerPoint-specific requirements
- return self._validate_pptx_styles_readability(styles)
+ return self._validatePptxStylesReadability(styles)
- def _create_professional_pptx_template(self, user_prompt: str, style_schema: Dict[str, Any]) -> str:
+ def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str:
"""Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
import json
schema_json = json.dumps(style_schema, indent=4)
return f"""Customize the JSON below for professional PowerPoint slides.
-User Request: {user_prompt or "Create professional corporate slides"}
+User Request: {userPrompt or "Create professional corporate slides"}
Rules:
- Use professional colors (blues, grays, deep greens)
@@ -351,9 +351,9 @@ Return ONLY this JSON with your changes:
JSON ONLY. NO OTHER TEXT."""
- async def _get_ai_styles_with_pptx_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+ async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper PowerPoint color conversion."""
- if not ai_service:
+ if not aiService:
return default_styles
try:
@@ -365,11 +365,11 @@ JSON ONLY. NO OTHER TEXT."""
request = AiCallRequest(prompt=style_template, context="", options=request_options)
# Check if AI service is properly configured
- if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
+ if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects:
self.logger.warning("AI service not properly configured, using defaults")
return default_styles
- response = await ai_service.aiObjects.call(request)
+ response = await aiService.aiObjects.call(request)
# Check if response is valid
if not response:
@@ -445,7 +445,7 @@ JSON ONLY. NO OTHER TEXT."""
return default_styles
# Convert colors to PowerPoint RGB format
- styles = self._convert_colors_format(styles)
+ styles = self._convertColorsFormat(styles)
return styles
@@ -453,7 +453,7 @@ JSON ONLY. NO OTHER TEXT."""
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles
- def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to RGB format for PowerPoint compatibility."""
try:
for style_name, style_config in styles.items():
@@ -477,7 +477,7 @@ JSON ONLY. NO OTHER TEXT."""
self.logger.warning(f"Color conversion failed: {str(e)}")
return styles
- def _get_safe_color(self, color_value, default=(0, 0, 0)) -> tuple:
+ def _getSafeColor(self, color_value, default=(0, 0, 0)) -> tuple:
"""Get a safe RGB color tuple for PowerPoint."""
if isinstance(color_value, tuple) and len(color_value) == 3:
return color_value
@@ -495,7 +495,7 @@ JSON ONLY. NO OTHER TEXT."""
return (r, g, b)
return default
- def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _validatePptxStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix readability issues in AI-generated styles."""
try:
# Ensure minimum font sizes for PowerPoint readability
@@ -519,9 +519,9 @@ JSON ONLY. NO OTHER TEXT."""
except Exception as e:
logger.warning(f"Style validation failed: {str(e)}")
- return self._get_default_pptx_styles()
+ return self._getDefaultPptxStyles()
- def _get_default_pptx_styles(self) -> Dict[str, Any]:
+ def _getDefaultPptxStyles(self) -> Dict[str, Any]:
"""Default PowerPoint styles with corporate professional color scheme."""
return {
"title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"},
@@ -541,7 +541,7 @@ JSON ONLY. NO OTHER TEXT."""
"executive_ready": True
}
- async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
+ async def _parseJsonToSlides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Parse JSON content into slide data structure.
@@ -569,12 +569,12 @@ JSON ONLY. NO OTHER TEXT."""
# Create title slide
slides.append({
"title": document_title,
- "content": "Generated by PowerOn AI System\n\n" + self._format_timestamp()
+ "content": "Generated by PowerOn AI System\n\n" + self._formatTimestamp()
})
# Process sections into slides based on content and user intent
sections = json_content.get("sections", [])
- slides.extend(self._create_slides_from_sections(sections, styles))
+ slides.extend(self._createSlidesFromSections(sections, styles))
# If no content slides were created, create a default content slide
if len(slides) == 1: # Only title slide
@@ -595,7 +595,7 @@ JSON ONLY. NO OTHER TEXT."""
}
]
- def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create a slide from a JSON section."""
try:
# Get section title from data or use default
@@ -616,15 +616,15 @@ JSON ONLY. NO OTHER TEXT."""
content_parts = []
if content_type == "table":
- content_parts.append(self._format_table_for_slide(elements))
+ content_parts.append(self._formatTableForSlide(elements))
elif content_type == "list":
- content_parts.append(self._format_list_for_slide(elements))
+ content_parts.append(self._formatListForSlide(elements))
elif content_type == "heading":
- content_parts.append(self._format_heading_for_slide(elements))
+ content_parts.append(self._formatHeadingForSlide(elements))
elif content_type == "paragraph":
- content_parts.append(self._format_paragraph_for_slide(elements))
+ content_parts.append(self._formatParagraphForSlide(elements))
elif content_type == "code":
- content_parts.append(self._format_code_for_slide(elements))
+ content_parts.append(self._formatCodeForSlide(elements))
else:
content_parts.append(self._format_paragraph_for_slide(elements))
@@ -640,7 +640,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error creating slide from section: {str(e)}")
return None
- def _format_table_for_slide(self, elements: List[Dict[str, Any]]) -> str:
+ def _formatTableForSlide(self, elements: List[Dict[str, Any]]) -> str:
"""Format table data for slide presentation."""
try:
# Extract table data from elements array
@@ -681,7 +681,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting table for slide: {str(e)}")
return ""
- def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str:
+ def _formatListForSlide(self, list_data: Dict[str, Any]) -> str:
"""Format list data for slide presentation."""
try:
items = list_data.get("items", [])
@@ -713,7 +713,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting list for slide: {str(e)}")
return ""
- def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str:
+ def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str:
"""Format heading data for slide presentation."""
try:
text = heading_data.get("text", "")
@@ -728,7 +728,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting heading for slide: {str(e)}")
return ""
- def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str:
+ def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str:
"""Format paragraph data for slide presentation."""
try:
text = paragraph_data.get("text", "")
@@ -747,7 +747,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting paragraph for slide: {str(e)}")
return ""
- def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str:
+ def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str:
"""Format code data for slide presentation."""
try:
code = code_data.get("code", "")
@@ -770,7 +770,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting code for slide: {str(e)}")
return ""
- def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
+ def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
"""Determine the best professional slide layout based on content."""
try:
content = slide_data.get("content", "")
@@ -804,7 +804,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error determining slide layout: {str(e)}")
return 1 # Default to title and content layout
- def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
+ def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Create slides from sections based on content density and user intent."""
try:
slides = []
@@ -834,7 +834,7 @@ JSON ONLY. NO OTHER TEXT."""
break
else:
# Add content to current slide
- formatted_content = self._format_section_content(section)
+ formatted_content = self._formatSectionContent(section)
if formatted_content:
current_slide_content.append(formatted_content)
@@ -851,7 +851,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error creating slides from sections: {str(e)}")
return []
- def _format_section_content(self, section: Dict[str, Any]) -> str:
+ def _formatSectionContent(self, section: Dict[str, Any]) -> str:
"""Format section content for slide presentation."""
try:
content_type = section.get("content_type", "paragraph")
@@ -861,15 +861,15 @@ JSON ONLY. NO OTHER TEXT."""
content_parts = []
for element in elements:
if content_type == "table":
- content_parts.append(self._format_table_for_slide([element]))
+ content_parts.append(self._formatTableForSlide([element]))
elif content_type == "list":
- content_parts.append(self._format_list_for_slide([element]))
+ content_parts.append(self._formatListForSlide([element]))
elif content_type == "heading":
- content_parts.append(self._format_heading_for_slide([element]))
+ content_parts.append(self._formatHeadingForSlide([element]))
elif content_type == "paragraph":
- content_parts.append(self._format_paragraph_for_slide([element]))
+ content_parts.append(self._formatParagraphForSlide([element]))
elif content_type == "code":
- content_parts.append(self._format_code_for_slide([element]))
+ content_parts.append(self._formatCodeForSlide([element]))
else:
content_parts.append(self._format_paragraph_for_slide([element]))
@@ -879,7 +879,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting section content: {str(e)}")
return ""
- def _format_timestamp(self) -> str:
+ def _formatTimestamp(self) -> str:
"""Format current timestamp for presentation generation."""
from datetime import datetime, UTC
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py
index 68ccfdbe..f24afa95 100644
--- a/modules/services/serviceGeneration/renderers/rendererText.py
+++ b/modules/services/serviceGeneration/renderers/rendererText.py
@@ -9,7 +9,7 @@ class RendererText(BaseRenderer):
"""Renders content to plain text format with format-specific extraction."""
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""Return supported text formats (excluding formats with dedicated renderers)."""
return [
'txt', 'text', 'plain',
@@ -32,7 +32,7 @@ class RendererText(BaseRenderer):
]
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return [
'ascii', 'utf8', 'utf-8', 'code', 'source',
@@ -41,166 +41,166 @@ class RendererText(BaseRenderer):
]
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""Return priority for text renderer."""
return 90
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to plain text format."""
try:
# Generate text from JSON structure
- text_content = self._generate_text_from_json(extracted_content, title)
+ textContent = self._generateTextFromJson(extractedContent, title)
- return text_content, "text/plain"
+ return textContent, "text/plain"
except Exception as e:
self.logger.error(f"Error rendering text: {str(e)}")
# Return minimal text fallback
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
- def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str:
+ def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate text content from structured JSON document."""
try:
# Validate JSON structure
- if not isinstance(json_content, dict):
+ if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
- if "sections" not in json_content:
+ if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
- document_title = json_content.get("metadata", {}).get("title", title)
+ documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Build text content
- text_parts = []
+ textParts = []
# Document title
- text_parts.append(document_title)
- text_parts.append("=" * len(document_title))
- text_parts.append("")
+ textParts.append(documentTitle)
+ textParts.append("=" * len(documentTitle))
+ textParts.append("")
# Process each section
- sections = json_content.get("sections", [])
+ sections = jsonContent.get("sections", [])
for section in sections:
- section_text = self._render_json_section(section)
- if section_text:
- text_parts.append(section_text)
- text_parts.append("") # Add spacing between sections
+ sectionText = self._renderJsonSection(section)
+ if sectionText:
+ textParts.append(sectionText)
+ textParts.append("") # Add spacing between sections
# Add generation info
- text_parts.append("")
- text_parts.append(f"Generated: {self._format_timestamp()}")
+ textParts.append("")
+ textParts.append(f"Generated: {self._formatTimestamp()}")
- return '\n'.join(text_parts)
+ return '\n'.join(textParts)
except Exception as e:
self.logger.error(f"Error generating text from JSON: {str(e)}")
raise Exception(f"Text generation failed: {str(e)}")
- def _render_json_section(self, section: Dict[str, Any]) -> str:
+ def _renderJsonSection(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to text."""
try:
- section_type = self._get_section_type(section)
- section_data = self._get_section_data(section)
+ sectionType = self._getSectionType(section)
+ sectionData = self._getSectionData(section)
- if section_type == "table":
+ if sectionType == "table":
# Process the section data to extract table structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_table(processed_data)
- elif section_type == "bullet_list":
+ processedData = self._processSectionByType(section)
+ return self._renderJsonTable(processedData)
+ elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_bullet_list(processed_data)
- elif section_type == "heading":
+ processedData = self._processSectionByType(section)
+ return self._renderJsonBulletList(processedData)
+ elif sectionType == "heading":
# Render each heading element in the elements array
- # section_data is already the elements array from _get_section_data
- rendered_elements = []
- for element in section_data:
- rendered_elements.append(self._render_json_heading(element))
- return "\n".join(rendered_elements)
- elif section_type == "paragraph":
+ # sectionData is already the elements array from _getSectionData
+ renderedElements = []
+ for element in sectionData:
+ renderedElements.append(self._renderJsonHeading(element))
+ return "\n".join(renderedElements)
+ elif sectionType == "paragraph":
# Render each paragraph element in the elements array
- # section_data is already the elements array from _get_section_data
- rendered_elements = []
- for element in section_data:
- rendered_elements.append(self._render_json_paragraph(element))
- return "\n".join(rendered_elements)
- elif section_type == "code_block":
+ # sectionData is already the elements array from _getSectionData
+ renderedElements = []
+ for element in sectionData:
+ renderedElements.append(self._renderJsonParagraph(element))
+ return "\n".join(renderedElements)
+ elif sectionType == "code_block":
# Process the section data to extract code block structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_code_block(processed_data)
- elif section_type == "image":
+ processedData = self._processSectionByType(section)
+ return self._renderJsonCodeBlock(processedData)
+ elif sectionType == "image":
# Process the section data to extract image structure
- processed_data = self._process_section_by_type(section)
- return self._render_json_image(processed_data)
+ processedData = self._processSectionByType(section)
+ return self._renderJsonImage(processedData)
else:
# Fallback to paragraph for unknown types - render each element
- # section_data is already the elements array from _get_section_data
- rendered_elements = []
- for element in section_data:
- rendered_elements.append(self._render_json_paragraph(element))
- return "\n".join(rendered_elements)
+ # sectionData is already the elements array from _getSectionData
+ renderedElements = []
+ for element in sectionData:
+ renderedElements.append(self._renderJsonParagraph(element))
+ return "\n".join(renderedElements)
except Exception as e:
- self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
+ self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return f"[Error rendering section: {str(e)}]"
- def _render_json_table(self, table_data: Dict[str, Any]) -> str:
+ def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
"""Render a JSON table to text."""
try:
- headers = table_data.get("headers", [])
- rows = table_data.get("rows", [])
+ headers = tableData.get("headers", [])
+ rows = tableData.get("rows", [])
if not headers or not rows:
return ""
- text_parts = []
+ textParts = []
# Create table header
- header_line = " | ".join(str(header) for header in headers)
- text_parts.append(header_line)
+ headerLine = " | ".join(str(header) for header in headers)
+ textParts.append(headerLine)
# Add separator line
- separator_line = " | ".join("-" * len(str(header)) for header in headers)
- text_parts.append(separator_line)
+ separatorLine = " | ".join("-" * len(str(header)) for header in headers)
+ textParts.append(separatorLine)
# Add data rows
for row in rows:
- row_line = " | ".join(str(cell_data) for cell_data in row)
- text_parts.append(row_line)
+ rowLine = " | ".join(str(cellData) for cellData in row)
+ textParts.append(rowLine)
- return '\n'.join(text_parts)
+ return '\n'.join(textParts)
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return ""
- def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
+ def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
"""Render a JSON bullet list to text."""
try:
- items = list_data.get("items", [])
+ items = listData.get("items", [])
if not items:
return ""
- text_parts = []
+ textParts = []
for item in items:
if isinstance(item, str):
- text_parts.append(f"- {item}")
+ textParts.append(f"- {item}")
elif isinstance(item, dict) and "text" in item:
- text_parts.append(f"- {item['text']}")
+ textParts.append(f"- {item['text']}")
- return '\n'.join(text_parts)
+ return '\n'.join(textParts)
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return ""
- def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
+ def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
"""Render a JSON heading to text."""
try:
- level = heading_data.get("level", 1)
- text = heading_data.get("text", "")
+ level = headingData.get("level", 1)
+ text = headingData.get("text", "")
if text:
level = max(1, min(6, level))
@@ -217,21 +217,21 @@ class RendererText(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return ""
- def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
+ def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
"""Render a JSON paragraph to text."""
try:
- text = paragraph_data.get("text", "")
+ text = paragraphData.get("text", "")
return text if text else ""
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return ""
- def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
+ def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
"""Render a JSON code block to text."""
try:
- code = code_data.get("code", "")
- language = code_data.get("language", "")
+ code = codeData.get("code", "")
+ language = codeData.get("language", "")
if code:
if language:
@@ -245,12 +245,12 @@ class RendererText(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}")
return ""
- def _render_json_image(self, image_data: Dict[str, Any]) -> str:
+ def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
"""Render a JSON image to text."""
try:
- alt_text = image_data.get("altText", "Image")
- return f"[Image: {alt_text}]"
+ altText = imageData.get("altText", "Image")
+ return f"[Image: {altText}]"
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
- return f"[Image: {image_data.get('altText', 'Image')}]"
+ return f"[Image: {imageData.get('altText', 'Image')}]"
diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py
index 19b36a52..49bd2749 100644
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@@ -21,41 +21,41 @@ class RendererXlsx(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@classmethod
- def get_supported_formats(cls) -> List[str]:
+ def getSupportedFormats(cls) -> List[str]:
"""Return supported Excel formats."""
return ['xlsx', 'xls', 'excel']
@classmethod
- def get_format_aliases(cls) -> List[str]:
+ def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'workbook']
@classmethod
- def get_priority(cls) -> int:
+ def getPriority(cls) -> int:
"""Return priority for Excel renderer."""
return 110
- async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .rendererCsv import RendererCsv
- csv_renderer = RendererCsv()
- csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
- return csv_content, "text/csv"
+ csvRenderer = RendererCsv()
+ csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService)
+ return csvContent, "text/csv"
# Generate Excel using AI-analyzed styling
- excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service)
+ excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
- return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+ return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}")
# Return CSV fallback
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
- def _generate_excel(self, content: str, title: str) -> str:
+ def _generateExcel(self, content: str, title: str) -> str:
"""Generate Excel content using openpyxl."""
try:
# Create workbook
@@ -65,14 +65,14 @@ class RendererXlsx(BaseRenderer):
wb.remove(wb.active)
# Create sheets
- summary_sheet = wb.create_sheet("Summary", 0)
- data_sheet = wb.create_sheet("Data", 1)
- analysis_sheet = wb.create_sheet("Analysis", 2)
+ summarySheet = wb.create_sheet("Summary", 0)
+ dataSheet = wb.create_sheet("Data", 1)
+ analysisSheet = wb.create_sheet("Analysis", 2)
# Add content to sheets
- self._populate_summary_sheet(summary_sheet, title)
- self._populate_data_sheet(data_sheet, content)
- self._populate_analysis_sheet(analysis_sheet, content)
+ self._populateSummarySheet(summarySheet, title)
+ self._populateDataSheet(dataSheet, content)
+ self._populateAnalysisSheet(analysisSheet, content)
# Save to buffer
buffer = io.BytesIO()
@@ -80,16 +80,16 @@ class RendererXlsx(BaseRenderer):
buffer.seek(0)
# Convert to base64
- excel_bytes = buffer.getvalue()
- excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
+ excelBytes = buffer.getvalue()
+ excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
- return excel_base64
+ return excelBase64
except Exception as e:
self.logger.error(f"Error generating Excel: {str(e)}")
raise
- def _populate_summary_sheet(self, sheet, title: str):
+ def _populateSummarySheet(self, sheet, title: str):
"""Populate the summary sheet."""
try:
# Title
@@ -99,7 +99,7 @@ class RendererXlsx(BaseRenderer):
# Generation info
sheet['A3'] = "Generated:"
- sheet['B3'] = self._format_timestamp()
+ sheet['B3'] = self._formatTimestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
@@ -116,7 +116,7 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
- def _populate_data_sheet(self, sheet, content: str):
+ def _populateDataSheet(self, sheet, content: str):
"""Populate the data sheet."""
try:
# Headers
@@ -138,8 +138,8 @@ class RendererXlsx(BaseRenderer):
# Check for table data (lines with |)
if '|' in line:
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
- for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
- sheet.cell(row=row, column=col, value=cell_data)
+ for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns
+ sheet.cell(row=row, column=col, value=cellData)
row += 1
else:
# Regular content
@@ -153,7 +153,7 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate data sheet: {str(e)}")
- def _populate_analysis_sheet(self, sheet, content: str):
+ def _populateAnalysisSheet(self, sheet, content: str):
"""Populate the analysis sheet."""
try:
# Title
@@ -169,17 +169,17 @@ class RendererXlsx(BaseRenderer):
row += 1
# Count different types of content
- table_lines = sum(1 for line in lines if '|' in line)
- list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
- text_lines = len(lines) - table_lines - list_lines
+ tableLines = sum(1 for line in lines if '|' in line)
+ listLines = sum(1 for line in lines if line.startswith(('- ', '* ')))
+ textLines = len(lines) - tableLines - listLines
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
row += 1
- sheet[f'A{row}'] = f"Table Rows: {table_lines}"
+ sheet[f'A{row}'] = f"Table Rows: {tableLines}"
row += 1
- sheet[f'A{row}'] = f"List Items: {list_lines}"
+ sheet[f'A{row}'] = f"List Items: {listLines}"
row += 1
- sheet[f'A{row}'] = f"Text Lines: {text_lines}"
+ sheet[f'A{row}'] = f"Text Lines: {textLines}"
row += 2
# Recommendations
@@ -198,35 +198,35 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
- async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+ async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling."""
try:
# Debug output
- self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(json_content)}", "EXCEL_RENDERER")
- self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}", "EXCEL_RENDERER")
+ self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
+ self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
# Get AI-generated styling definitions
- styles = await self._get_excel_styles(user_prompt, ai_service)
+ styles = await self._getExcelStyles(userPrompt, aiService)
# Validate JSON structure
- if not isinstance(json_content, dict):
+ if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
- if "sections" not in json_content:
+ if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
- document_title = json_content.get("metadata", {}).get("title", title)
+ document_title = jsonContent.get("metadata", {}).get("title", title)
# Create workbook
wb = Workbook()
# Create sheets based on content
- sheets = self._create_excel_sheets(wb, json_content, styles)
+ sheets = self._createExcelSheets(wb, jsonContent, styles)
self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
# Populate sheets with content
- self._populate_excel_sheets(sheets, json_content, styles)
+ self._populateExcelSheets(sheets, jsonContent, styles)
# Save to buffer
buffer = io.BytesIO()
@@ -234,24 +234,24 @@ class RendererXlsx(BaseRenderer):
buffer.seek(0)
# Convert to base64
- excel_bytes = buffer.getvalue()
- self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excel_bytes)}", "EXCEL_RENDERER")
+ excelBytes = buffer.getvalue()
+ self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excelBytes)}", "EXCEL_RENDERER")
try:
- excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
- self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excel_base64)}", "EXCEL_RENDERER")
+ excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
+ self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excelBase64)}", "EXCEL_RENDERER")
except Exception as b64_error:
self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
raise
- return excel_base64
+ return excelBase64
except Exception as e:
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
raise Exception(f"Excel generation failed: {str(e)}")
- async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+ async def _getExcelStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get Excel styling definitions using base template AI styling."""
- style_schema = {
+ styleSchema = {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
@@ -261,26 +261,26 @@ class RendererXlsx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
}
- style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema)
- # Use our own _get_ai_styles_with_excel_colors method to ensure proper color conversion
- styles = await self._get_ai_styles_with_excel_colors(ai_service, style_template, self._get_default_excel_styles())
+ styleTemplate = self._createAiStyleTemplate("xlsx", userPrompt, styleSchema)
+ # Use our own _getAiStylesWithExcelColors method to ensure proper color conversion
+ styles = await self._getAiStylesWithExcelColors(aiService, styleTemplate, self._getDefaultExcelStyles())
# Validate and fix contrast issues
- return self._validate_excel_styles_contrast(styles)
+ return self._validateExcelStylesContrast(styles)
- async def _get_ai_styles_with_excel_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+ async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper Excel color conversion."""
- if not ai_service:
- return default_styles
+ if not aiService:
+ return defaultStyles
try:
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
- request_options = AiCallOptions()
- request_options.operationType = OperationTypeEnum.DATA_GENERATE
+ requestOptions = AiCallOptions()
+ requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
- request = AiCallRequest(prompt=style_template, context="", options=request_options)
- response = await ai_service.aiObjects.call(request)
+ request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
+ response = await aiService.aiObjects.call(request)
import json
import re
@@ -291,7 +291,7 @@ class RendererXlsx(BaseRenderer):
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
- return default_styles
+ return defaultStyles
# Extract JSON from markdown if present
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
@@ -312,46 +312,46 @@ class RendererXlsx(BaseRenderer):
styles = json.loads(result)
except json.JSONDecodeError as json_error:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
- return default_styles
+ return defaultStyles
# Convert colors to Excel aRGB format
- styles = self._convert_colors_format(styles)
+ styles = self._convertColorsFormat(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
- return default_styles
+ return defaultStyles
- def _get_safe_color(self, color_value: str, default: str = "FF000000") -> str:
+ def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
"""Get a safe aRGB color value for Excel (without # prefix)."""
- if not isinstance(color_value, str):
+ if not isinstance(colorValue, str):
return default
# Remove # prefix if present
- if color_value.startswith('#'):
- color_value = color_value[1:]
+ if colorValue.startswith('#'):
+ colorValue = colorValue[1:]
- if len(color_value) == 6:
+ if len(colorValue) == 6:
# Convert RRGGBB to AARRGGBB
- return f"FF{color_value}"
- elif len(color_value) == 8:
+ return f"FF{colorValue}"
+ elif len(colorValue) == 8:
# Already aRGB format
- return color_value
+ return colorValue
else:
# Unexpected format, return default
return default
- def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to aRGB format for Excel compatibility."""
try:
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
- for style_name, style_config in styles.items():
- if isinstance(style_config, dict):
- for prop, value in style_config.items():
+ for styleName, styleConfig in styles.items():
+ if isinstance(styleConfig, dict):
+ for prop, value in styleConfig.items():
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
- styles[style_name][prop] = f"FF{value[1:]}"
+ styles[styleName][prop] = f"FF{value[1:]}"
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
pass # Already aRGB format
elif isinstance(value, str) and value.startswith('#'):
@@ -360,34 +360,34 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
return styles
- def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _validateExcelStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
- bg_color = header.get("background", "#FFFFFF")
- text_color = header.get("text_color", "#000000")
+ bgColor = header.get("background", "#FFFFFF")
+ textColor = header.get("text_color", "#000000")
# If both are white or both are dark, fix it
- if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+ if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
- elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+ elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
- bg_color = cell.get("background", "#FFFFFF")
- text_color = cell.get("text_color", "#000000")
+ bgColor = cell.get("background", "#FFFFFF")
+ textColor = cell.get("text_color", "#000000")
# If both are white or both are dark, fix it
- if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+ if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
- elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+ elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
@@ -395,9 +395,9 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
- return self._get_default_excel_styles()
+ return self._getDefaultExcelStyles()
- def _get_default_excel_styles(self) -> Dict[str, Any]:
+ def _getDefaultExcelStyles(self) -> Dict[str, Any]:
"""Default Excel styles with aRGB color format."""
return {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
@@ -409,104 +409,104 @@ class RendererXlsx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
}
- def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
+ def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create Excel sheets based on content structure and user intent."""
sheets = {}
# Get sheet names from AI styles or generate based on content
- sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
- self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheet_names}", "EXCEL_RENDERER")
+ sheetNames = styles.get("sheet_names", self._generateSheetNamesFromContent(jsonContent))
+ self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheetNames}", "EXCEL_RENDERER")
# Create sheets
- for i, sheet_name in enumerate(sheet_names):
+ for i, sheetName in enumerate(sheetNames):
if i == 0:
# Use the default sheet for the first sheet
sheet = wb.active
- sheet.title = sheet_name
+ sheet.title = sheetName
else:
# Create additional sheets
- sheet = wb.create_sheet(sheet_name, i)
- sheets[sheet_name.lower()] = sheet
+ sheet = wb.create_sheet(sheetName, i)
+ sheets[sheetName.lower()] = sheet
return sheets
- def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]:
+ def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
"""Generate sheet names based on actual content structure."""
- sections = json_content.get("sections", [])
+ sections = jsonContent.get("sections", [])
# If no sections, create a single sheet
if not sections:
return ["Content"]
# Generate sheet names based on content structure
- sheet_names = []
+ sheetNames = []
# Check if we have multiple table sections
- table_sections = [s for s in sections if s.get("content_type") == "table"]
+ tableSections = [s for s in sections if s.get("content_type") == "table"]
- if len(table_sections) > 1:
+ if len(tableSections) > 1:
# Create separate sheets for each table
- for i, section in enumerate(table_sections, 1):
- section_title = section.get("title", f"Table {i}")
- sheet_names.append(section_title[:31]) # Excel sheet name limit
+ for i, section in enumerate(tableSections, 1):
+ sectionTitle = section.get("title", f"Table {i}")
+ sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
else:
# Single table or mixed content - create main sheet
- document_title = json_content.get("metadata", {}).get("title", "Document")
- sheet_names.append(document_title[:31]) # Excel sheet name limit
+ documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
+ sheetNames.append(documentTitle[:31]) # Excel sheet name limit
# Add additional sheets for other content types
- content_types = set()
+ contentTypes = set()
for section in sections:
- content_type = section.get("content_type", "paragraph")
- content_types.add(content_type)
+ contentType = section.get("content_type", "paragraph")
+ contentTypes.add(contentType)
- if "table" in content_types and len(table_sections) == 1:
- sheet_names.append("Table Data")
- if "list" in content_types:
- sheet_names.append("Lists")
- if "paragraph" in content_types or "heading" in content_types:
- sheet_names.append("Text")
+ if "table" in contentTypes and len(tableSections) == 1:
+ sheetNames.append("Table Data")
+ if "list" in contentTypes:
+ sheetNames.append("Lists")
+ if "paragraph" in contentTypes or "heading" in contentTypes:
+ sheetNames.append("Text")
# Limit to 4 sheets maximum
- return sheet_names[:4]
+ return sheetNames[:4]
- def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None:
+ def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets with content from JSON based on actual sheet names."""
try:
# Get the actual sheet names that were created
- sheet_names = list(sheets.keys())
+ sheetNames = list(sheets.keys())
- if not sheet_names:
+ if not sheetNames:
return
- sections = json_content.get("sections", [])
- table_sections = [s for s in sections if s.get("content_type") == "table"]
+ sections = jsonContent.get("sections", [])
+ tableSections = [s for s in sections if s.get("content_type") == "table"]
- if len(table_sections) > 1:
+ if len(tableSections) > 1:
# Multiple tables - populate each sheet with its corresponding table
- for i, section in enumerate(table_sections):
- if i < len(sheet_names):
- sheet_name = sheet_names[i]
- sheet = sheets[sheet_name]
- self._populate_table_sheet(sheet, section, styles, f"Table {i+1}")
+ for i, section in enumerate(tableSections):
+ if i < len(sheetNames):
+ sheetName = sheetNames[i]
+ sheet = sheets[sheetName]
+ self._populateTableSheet(sheet, section, styles, f"Table {i+1}")
else:
# Single table or mixed content - use original logic
- first_sheet_name = sheet_names[0]
- self._populate_main_sheet(sheets[first_sheet_name], json_content, styles)
+ firstSheetName = sheetNames[0]
+ self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
# If we have multiple sheets, distribute content by type
- if len(sheet_names) > 1:
- self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:])
+ if len(sheetNames) > 1:
+ self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
- def _populate_table_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheet_title: str):
+ def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str):
"""Populate a sheet with a single table section."""
try:
# Sheet title
- sheet['A1'] = sheet_title
- sheet['A1'].font = Font(size=16, bold=True, color=self._get_safe_color(styles.get("title", {}).get("color", "FF1F4E79")))
+ sheet['A1'] = sheetTitle
+ sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(styles.get("title", {}).get("color", "FF1F4E79")))
sheet['A1'].alignment = Alignment(horizontal="center")
# Get table data from elements (canonical JSON format)
@@ -528,9 +528,9 @@ class RendererXlsx(BaseRenderer):
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=3, column=col, value=header)
if header_style.get("bold"):
- cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
+ cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
if header_style.get("background"):
- cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
+ cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
# Add rows
cell_style = styles.get("table_cell", {})
@@ -538,7 +538,7 @@ class RendererXlsx(BaseRenderer):
for col_idx, cell_value in enumerate(row_data, 1):
cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value)
if cell_style.get("text_color"):
- cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
+ cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
# Auto-adjust column widths
for col in range(1, len(headers) + 1):
@@ -547,17 +547,17 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate table sheet: {str(e)}")
- def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]):
+ def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
"""Populate the main sheet with document overview and all content."""
try:
# Document title
- document_title = json_content.get("metadata", {}).get("title", "Generated Report")
- sheet['A1'] = document_title
+ documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
+ sheet['A1'] = documentTitle
# Safety check for title style
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
try:
- safe_color = self._get_safe_color(title_style["color"])
+ safe_color = self._getSafeColor(title_style["color"])
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
except Exception as font_error:
@@ -567,12 +567,12 @@ class RendererXlsx(BaseRenderer):
# Generation info
sheet['A3'] = "Generated:"
- sheet['B3'] = self._format_timestamp()
+ sheet['B3'] = self._formatTimestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Document metadata
- metadata = json_content.get("metadata", {})
+ metadata = jsonContent.get("metadata", {})
if metadata:
sheet['A6'] = "Document Information:"
sheet['A6'].font = Font(bold=True)
@@ -585,7 +585,7 @@ class RendererXlsx(BaseRenderer):
row += 1
# Content overview
- sections = json_content.get("sections", [])
+ sections = jsonContent.get("sections", [])
sheet[f'A{row + 1}'] = "Content Overview:"
sheet[f'A{row + 1}'].font = Font(bold=True)
@@ -605,7 +605,7 @@ class RendererXlsx(BaseRenderer):
# Add all content to this sheet
row += 2
for section in sections:
- row = self._add_section_to_sheet(sheet, section, styles, row)
+ row = self._addSectionToSheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
@@ -615,34 +615,34 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate main sheet: {str(e)}")
- def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]):
+ def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
"""Populate additional sheets based on content types."""
try:
- sections = json_content.get("sections", [])
+ sections = jsonContent.get("sections", [])
- for sheet_name in sheet_names:
- if sheet_name not in sheets:
+ for sheetName in sheetNames:
+ if sheetName not in sheets:
continue
- sheet = sheets[sheet_name]
- sheet_title = sheet_name.title()
- sheet['A1'] = sheet_title
+ sheet = sheets[sheetName]
+ sheetTitle = sheetName.title()
+ sheet['A1'] = sheetTitle
sheet['A1'].font = Font(size=16, bold=True)
row = 3
# Filter sections by content type
- if sheet_name == "tables":
+ if sheetName == "tables":
filtered_sections = [s for s in sections if s.get("content_type") == "table"]
- elif sheet_name == "lists":
+ elif sheetName == "lists":
filtered_sections = [s for s in sections if s.get("content_type") == "list"]
- elif sheet_name == "text":
+ elif sheetName == "text":
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
else:
filtered_sections = sections
for section in filtered_sections:
- row = self._add_section_to_sheet(sheet, section, styles, row)
+ row = self._addSectionToSheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
@@ -652,15 +652,15 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate content type sheets: {str(e)}")
- def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+ def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a section to a sheet and return the next row."""
try:
# Add section title
section_title = section.get("title")
if section_title:
- sheet[f'A{start_row}'] = f"# {section_title}"
- sheet[f'A{start_row}'].font = Font(bold=True)
- start_row += 1
+ sheet[f'A{startRow}'] = f"# {section_title}"
+ sheet[f'A{startRow}'].font = Font(bold=True)
+ startRow += 1
# Process section based on type
section_type = section.get("content_type", "paragraph")
@@ -669,23 +669,23 @@ class RendererXlsx(BaseRenderer):
elements = section.get("elements", [])
for element in elements:
if section_type == "table":
- start_row = self._add_table_to_excel(sheet, element, styles, start_row)
+ startRow = self._addTableToExcel(sheet, element, styles, startRow)
elif section_type == "list":
- start_row = self._add_list_to_excel(sheet, element, styles, start_row)
+ startRow = self._addListToExcel(sheet, element, styles, startRow)
elif section_type == "paragraph":
- start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
+ startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
elif section_type == "heading":
- start_row = self._add_heading_to_excel(sheet, element, styles, start_row)
+ startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
else:
- start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
+ startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
- return start_row
+ return startRow
except Exception as e:
self.logger.warning(f"Could not add section to sheet: {str(e)}")
- return start_row + 1
+ return startRow + 1
- def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+ def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a table element to Excel sheet."""
try:
# In canonical JSON format, table elements have headers and rows directly
@@ -693,99 +693,99 @@ class RendererXlsx(BaseRenderer):
rows = element.get("rows", [])
if not headers and not rows:
- return start_row
+ return startRow
# Add headers
header_style = styles.get("table_header", {})
for col, header in enumerate(headers, 1):
- cell = sheet.cell(row=start_row, column=col, value=header)
+ cell = sheet.cell(row=startRow, column=col, value=header)
if header_style.get("bold"):
- cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
+ cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
if header_style.get("background"):
- cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
+ cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
- start_row += 1
+ startRow += 1
# Add rows
cell_style = styles.get("table_cell", {})
for row_data in rows:
for col, cell_value in enumerate(row_data, 1):
- cell = sheet.cell(row=start_row, column=col, value=cell_value)
+ cell = sheet.cell(row=startRow, column=col, value=cell_value)
if cell_style.get("text_color"):
- cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
- start_row += 1
+ cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
+ startRow += 1
- return start_row
+ return startRow
except Exception as e:
self.logger.warning(f"Could not add table to Excel: {str(e)}")
- return start_row + 1
+ return startRow + 1
- def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+ def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a list element to Excel sheet."""
try:
list_items = element.get("items", [])
list_style = styles.get("bullet_list", {})
for item in list_items:
- sheet.cell(row=start_row, column=1, value=f"• {item}")
+ sheet.cell(row=startRow, column=1, value=f"• {item}")
if list_style.get("color"):
- sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(list_style["color"]))
- start_row += 1
+ sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
+ startRow += 1
- return start_row
+ return startRow
except Exception as e:
self.logger.warning(f"Could not add list to Excel: {str(e)}")
- return start_row + 1
+ return startRow + 1
- def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+ def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a paragraph element to Excel sheet."""
try:
text = element.get("text", "")
if text:
- sheet.cell(row=start_row, column=1, value=text)
+ sheet.cell(row=startRow, column=1, value=text)
paragraph_style = styles.get("paragraph", {})
if paragraph_style.get("color"):
- sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(paragraph_style["color"]))
+ sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"]))
- start_row += 1
+ startRow += 1
- return start_row
+ return startRow
except Exception as e:
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
- return start_row + 1
+ return startRow + 1
- def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+ def _addHeadingToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a heading element to Excel sheet."""
try:
text = element.get("text", "")
level = element.get("level", 1)
if text:
- sheet.cell(row=start_row, column=1, value=text)
+ sheet.cell(row=startRow, column=1, value=text)
heading_style = styles.get("heading", {})
font_size = heading_style.get("font_size", 14)
if level > 1:
font_size = max(10, font_size - (level - 1) * 2)
- sheet.cell(row=start_row, column=1).font = Font(
+ sheet.cell(row=startRow, column=1).font = Font(
size=font_size,
bold=True,
- color=self._get_safe_color(heading_style.get("color", "FF000000"))
+ color=self._getSafeColor(heading_style.get("color", "FF000000"))
)
- start_row += 1
+ startRow += 1
- return start_row
+ return startRow
except Exception as e:
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
- return start_row + 1
+ return startRow + 1
- def _format_timestamp(self) -> str:
+ def _formatTimestamp(self) -> str:
"""Format current timestamp for document generation."""
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
diff --git a/modules/services/serviceGeneration/subJsonSchema.py b/modules/services/serviceGeneration/subJsonSchema.py
index 72c722b1..07812bd2 100644
--- a/modules/services/serviceGeneration/subJsonSchema.py
+++ b/modules/services/serviceGeneration/subJsonSchema.py
@@ -1,25 +1,32 @@
"""
-JSON Schema definitions for AI-generated document structures.
-This module provides schemas that guide AI to generate structured JSON output.
+JSON Schema definitions for AI-generated document structures (unified).
+This module provides schemas that guide AI to generate structured JSON output
+that matches the master template in modules.datamodels.datamodelJson.
"""
from typing import Dict, Any
-def get_multi_document_subJsonSchema() -> Dict[str, Any]:
- """Get the JSON schema for multi-document generation."""
+def getMultiDocumentSchema() -> Dict[str, Any]:
+ """Get the JSON schema for multi-document generation (unified)."""
return {
"type": "object",
"required": ["metadata", "documents"],
"properties": {
"metadata": {
"type": "object",
- "required": ["title", "split_strategy"],
+ "required": ["split_strategy"],
"properties": {
- "title": {"type": "string", "description": "Document title"},
"split_strategy": {
"type": "string",
- "enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
+ "enum": [
+ "single_document",
+ "per_entity",
+ "by_section",
+ "by_criteria",
+ "by_data_type",
+ "custom"
+ ],
"description": "Strategy for splitting content into multiple files"
},
"splitCriteria": {
@@ -30,7 +37,6 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"type": "string",
"description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')"
},
- "author": {"type": "string", "description": "Document author (optional)"},
"source_documents": {
"type": "array",
"items": {"type": "string"},
@@ -38,7 +44,7 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
},
"extraction_method": {
"type": "string",
- "default": "ai_extraction",
+ "default": "ai_generation",
"description": "Method used for extraction"
}
}
@@ -64,7 +70,15 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"title": {"type": "string", "description": "Section title (optional)"},
"content_type": {
"type": "string",
- "enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
+ "enum": [
+ "table",
+ "bullet_list",
+ "paragraph",
+ "heading",
+ "code_block",
+ "image",
+ "mixed"
+ ],
"description": "Primary content type of this section"
},
"elements": {
@@ -76,7 +90,8 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
{"$ref": "#/definitions/bullet_list"},
{"$ref": "#/definitions/paragraph"},
{"$ref": "#/definitions/heading"},
- {"$ref": "#/definitions/code_block"}
+ {"$ref": "#/definitions/code_block"},
+ {"$ref": "#/definitions/image"}
]
}
},
@@ -191,11 +206,20 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"code": {"type": "string", "description": "Code content"},
"language": {"type": "string", "description": "Programming language (optional)"}
}
+ },
+ "image": {
+ "type": "object",
+ "required": ["url"],
+ "properties": {
+ "url": {"type": "string", "description": "Image URL or data URI"},
+ "caption": {"type": "string", "description": "Image caption (optional)"},
+ "alt": {"type": "string", "description": "Alt text (optional)"}
+ }
}
}
}
-def get_document_subJsonSchema() -> Dict[str, Any]:
+def getDocumentSchema() -> Dict[str, Any]:
"""Get the JSON schema for structured document generation (single document)."""
return {
"type": "object",
@@ -206,7 +230,6 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
"required": ["title"],
"properties": {
"title": {"type": "string", "description": "Document title"},
- "author": {"type": "string", "description": "Document author (optional)"},
"source_documents": {
"type": "array",
"items": {"type": "string"},
@@ -214,7 +237,7 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
},
"extraction_method": {
"type": "string",
- "default": "ai_extraction",
+ "default": "ai_generation",
"description": "Method used for extraction"
}
}
@@ -230,7 +253,15 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
"title": {"type": "string", "description": "Section title (optional)"},
"content_type": {
"type": "string",
- "enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
+ "enum": [
+ "table",
+ "bullet_list",
+ "paragraph",
+ "heading",
+ "code_block",
+ "image",
+ "mixed"
+ ],
"description": "Primary content type of this section"
},
"elements": {
@@ -242,7 +273,8 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
{"$ref": "#/definitions/bullet_list"},
{"$ref": "#/definitions/paragraph"},
{"$ref": "#/definitions/heading"},
- {"$ref": "#/definitions/code_block"}
+ {"$ref": "#/definitions/code_block"},
+ {"$ref": "#/definitions/image"}
]
}
},
@@ -359,12 +391,21 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
"code": {"type": "string", "description": "Code content"},
"language": {"type": "string", "description": "Programming language (optional)"}
}
+ },
+ "image": {
+ "type": "object",
+ "required": ["url"],
+ "properties": {
+ "url": {"type": "string", "description": "Image URL or data URI"},
+ "caption": {"type": "string", "description": "Image caption (optional)"},
+ "alt": {"type": "string", "description": "Alt text (optional)"}
+ }
}
}
}
-def get_extraction_prompt_template() -> str:
+def getExtractionPromptTemplate() -> str:
"""Get the template for AI extraction prompts that request JSON output."""
return """
You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document.
@@ -390,7 +431,7 @@ Return only the JSON structure following the schema. Do not include any text bef
"""
-def get_generation_prompt_template() -> str:
+def getGenerationPromptTemplate() -> str:
"""Get the template for AI generation prompts that work with JSON input."""
return """
You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content.
@@ -416,31 +457,31 @@ Return only the enhanced JSON structure following the schema. Do not include any
"""
-def get_adaptive_json_schema(prompt_analysis: Dict[str, Any] = None) -> Dict[str, Any]:
+def getAdaptiveJsonSchema(promptAnalysis: Dict[str, Any] = None) -> Dict[str, Any]:
"""Automatically select appropriate schema based on prompt analysis."""
- if prompt_analysis and prompt_analysis.get("is_multi_file", False):
- return get_multi_document_subJsonSchema()
+ if promptAnalysis and promptAnalysis.get("is_multi_file", False):
+ return getMultiDocumentSchema()
else:
- return get_document_subJsonSchema()
+ return getDocumentSchema()
-def validate_json_document(json_data: Dict[str, Any]) -> bool:
- """Validate that the JSON data follows the document schema."""
+def validateJsonDocument(jsonData: Dict[str, Any]) -> bool:
+ """Validate that the JSON data follows the unified document schema."""
try:
# Basic validation - check required fields
- if not isinstance(json_data, dict):
+ if not isinstance(jsonData, dict):
return False
# Check if it's multi-document or single-document structure
- if "documents" in json_data:
+ if "documents" in jsonData:
# Multi-document structure
- if "metadata" not in json_data:
+ if "metadata" not in jsonData:
return False
- metadata = json_data["metadata"]
- if not isinstance(metadata, dict) or "title" not in metadata or "split_strategy" not in metadata:
+ metadata = jsonData["metadata"]
+ if not isinstance(metadata, dict) or "split_strategy" not in metadata:
return False
- documents = json_data["documents"]
+ documents = jsonData["documents"]
if not isinstance(documents, list):
return False
@@ -469,7 +510,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
return False
# Validate content_type
- valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
+ valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
if section["content_type"] not in valid_types:
return False
@@ -477,16 +518,16 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
if not isinstance(section["elements"], list):
return False
- elif "sections" in json_data:
+ elif "sections" in jsonData:
# Single-document structure (existing validation)
- if "metadata" not in json_data:
+ if "metadata" not in jsonData:
return False
- metadata = json_data["metadata"]
+ metadata = jsonData["metadata"]
if not isinstance(metadata, dict) or "title" not in metadata:
return False
- sections = json_data["sections"]
+ sections = jsonData["sections"]
if not isinstance(sections, list):
return False
@@ -501,7 +542,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
return False
# Validate content_type
- valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
+ valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
if section["content_type"] not in valid_types:
return False
diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
index ae744664..055bf397 100644
--- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py
+++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
@@ -5,83 +5,10 @@ This module builds prompts for generating documents from extracted content.
import logging
from typing import Dict, Any
+from modules.datamodels.datamodelJson import jsonTemplateDocument
logger = logging.getLogger(__name__)
-# Centralized JSON structure template for document generation
-# Includes examples for all content types so AI knows the structure patterns
-TEMPLATE_JSON_DOCUMENT_GENERATION = """{
- "metadata": {
- "split_strategy": "single_document",
- "source_documents": [],
- "extraction_method": "ai_generation"
- },
- "documents": [
- {
- "id": "doc_1",
- "title": "{{DOCUMENT_TITLE}}",
- "filename": "document.json",
- "sections": [
- {
- "id": "section_heading_example",
- "content_type": "heading",
- "elements": [
- {"level": 1, "text": "Heading Text"}
- ],
- "order": 0
- },
- {
- "id": "section_paragraph_example",
- "content_type": "paragraph",
- "elements": [
- {"text": "Paragraph text content"}
- ],
- "order": 0
- },
- {
- "id": "section_list_example",
- "content_type": "list",
- "elements": [
- {
- "items": [
- {"text": "Item 1"},
- {"text": "Item 2"}
- ],
- "list_type": "numbered"
- }
- ],
- "order": 0
- },
- {
- "id": "section_table_example",
- "content_type": "table",
- "elements": [
- {
- "headers": ["Column 1", "Column 2"],
- "rows": [
- ["Row 1 Col 1", "Row 1 Col 2"],
- ["Row 2 Col 1", "Row 2 Col 2"]
- ],
- "caption": "Table caption"
- }
- ],
- "order": 0
- },
- {
- "id": "section_code_example",
- "content_type": "code",
- "elements": [
- {
- "code": "function example() { return true; }",
- "language": "javascript"
- }
- ],
- "order": 0
- }
- ]
- }
- ]
-}"""
async def buildGenerationPrompt(
@@ -106,99 +33,101 @@ async def buildGenerationPrompt(
Complete generation prompt string
"""
# Create a template - let AI generate title if not provided
- title_value = title if title else "Generated Document"
- json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
+ titleValue = title if title else "Generated Document"
+ jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
# Build prompt based on whether this is a continuation or first call
# Check if we have valid continuation context with actual JSON fragment
- has_continuation = (
+ hasContinuation = (
continuationContext
and continuationContext.get("section_count", 0) > 0
and continuationContext.get("last_raw_json", "")
and continuationContext.get("last_raw_json", "").strip() != "{}"
)
- if has_continuation:
+ if hasContinuation:
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
- last_raw_json = continuationContext.get("last_raw_json", "")
- last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
- last_items_from_fragment = continuationContext.get("last_items_from_fragment", "")
- total_items_count = continuationContext.get("total_items_count", 0)
+ lastRawJson = continuationContext.get("last_raw_json", "")
+ lastItemObject = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
+ lastItemsFromFragment = continuationContext.get("last_items_from_fragment", "")
+ totalItemsCount = continuationContext.get("total_items_count", 0)
# Show the last few items to indicate where to continue (limit fragment size)
# Extract just the ending portion of the JSON to show where it cut off
- fragment_snippet = ""
- if last_raw_json:
+ fragmentSnippet = ""
+ if lastRawJson:
# Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
- fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json
+ fragmentSnippet = lastRawJson[-1500:] if len(lastRawJson) > 1500 else lastRawJson
# Add ellipsis if truncated
- if len(last_raw_json) > 1500:
- fragment_snippet = "..." + fragment_snippet
+ if len(lastRawJson) > 1500:
+ fragmentSnippet = "..." + fragmentSnippet
# Build clear continuation guidance
- continuation_guidance = []
+ continuationGuidance = []
- if total_items_count > 0:
- continuation_guidance.append(f"You have already generated {total_items_count} items.")
+ if totalItemsCount > 0:
+ continuationGuidance.append(f"You have already generated {totalItemsCount} items.")
# Show the last complete item object (full object format)
- if last_item_object:
- continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.")
+ if lastItemObject:
+ continuationGuidance.append(f"Last item in previous response: {lastItemObject}. Continue with the NEXT item after this.")
- continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped."
+ continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."
- generation_prompt = f"""User request: "{userPrompt}"
+ generationPrompt = f"""User request: "{userPrompt}"
The user already received part of the response. Continue generating the remaining content.
-{continuation_text}
+{continuationText}
Previous response ended here (JSON was cut off at this point):
```json
-{fragment_snippet if fragment_snippet else "(No fragment available)"}
+{fragmentSnippet if fragmentSnippet else "(No fragment available)"}
```
JSON structure template:
-{json_template}
+{jsonTemplate}
Instructions:
-- Return full JSON structure (metadata + documents + sections)
-- Continue from where it stopped - add NEW items only, do not repeat old items
-- Use the element structures shown in the template
-- Generate all remaining content needed to complete the user request
-- Fill with actual content (no comments, no "Add more..." text, no placeholders)
-- When fully complete, add "complete_response": true at root level
-- Return only valid JSON (no comments, no markdown blocks)
+- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
+- Arrays must contain ONLY JSON values; do not include comments or ellipses.
+- Use ONLY the element structures shown in the template.
+- Continue from where it stopped — add NEW items only; do not repeat existing items.
+- Generate all remaining content needed to complete the user request.
+- Fill with actual content (no placeholders or instructional text such as "Add more...").
+- When fully complete, add "complete_response": true at root level.
+- Output JSON only; no markdown fences or extra text before/after.
Continue generating:
"""
else:
# FIRST CALL - initial generation
- generation_prompt = f"""User request: "{userPrompt}"
+ generationPrompt = f"""User request: "{userPrompt}"
-Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
+Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
-JSON structure template (reference only - shows the pattern):
-{json_template}
+JSON structure template:
+{jsonTemplate}
Instructions:
-- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning
-- Do NOT continue from the template examples above - create your own sections
-- Generate complete content based on the user request
-- Use the element structures shown in the template (heading, paragraph, list, table, code)
-- Create your own section IDs (do not use the example IDs like "section_heading_example")
-- When fully complete, add "complete_response": true at root level
-- Return only valid JSON (no comments, no markdown blocks, no text before/after)
+- Start your response with {{"metadata": ...}} — return COMPLETE, STRICT JSON.
+- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
+- Arrays must contain ONLY JSON values; do not include comments or ellipses.
+- Do NOT reuse the example section IDs from the template; create your own.
+- Use ONLY the element structures shown in the template.
+- Generate complete content based on the user request.
+- When fully complete, add "complete_response": true at root level.
+- Output JSON only; no markdown fences or any additional text.
Generate your complete response starting from {{"metadata": ...}}:
"""
# If we have extracted content, prepend it to the prompt
if extracted_content:
- generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
+ generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}
-{generation_prompt}"""
+{generationPrompt}"""
- return generation_prompt.strip()
+ return generationPrompt.strip()
diff --git a/modules/services/serviceNeutralization/mainServiceNeutralization.py b/modules/services/serviceNeutralization/mainServiceNeutralization.py
index c48939f6..c6aa717a 100644
--- a/modules/services/serviceNeutralization/mainServiceNeutralization.py
+++ b/modules/services/serviceNeutralization/mainServiceNeutralization.py
@@ -152,11 +152,11 @@ class NeutralizationService:
try:
# Auto-detect content type if not provided
if textType is None:
- textType = self.commonUtils.detect_content_type(text)
+ textType = self.commonUtils.detectContentType(text)
# Check if content is binary data
- if self.binaryProcessor.is_binary_content(text):
- data, mapping, replaced_fields, processed_info = self.binaryProcessor.process_binary_content(text)
+ if self.binaryProcessor.isBinaryContent(text):
+ data, mapping, replaced_fields, processed_info = self.binaryProcessor.processBinaryContent(text)
neutralized_text = text if isinstance(data, str) else str(data)
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
return NeutralizationResult(
@@ -169,13 +169,13 @@ class NeutralizationService:
# Inline former _processData routing
if textType in ['csv', 'json', 'xml']:
if textType == 'csv':
- data, mapping, replaced_fields, processed_info = self.listProcessor.process_csv_content(text)
+ data, mapping, replaced_fields, processed_info = self.listProcessor.processCsvContent(text)
elif textType == 'json':
- data, mapping, replaced_fields, processed_info = self.listProcessor.process_json_content(text)
+ data, mapping, replaced_fields, processed_info = self.listProcessor.processJsonContent(text)
else: # xml
- data, mapping, replaced_fields, processed_info = self.listProcessor.process_xml_content(text)
+ data, mapping, replaced_fields, processed_info = self.listProcessor.processXmlContent(text)
else:
- data, mapping, replaced_fields, processed_info = self.textProcessor.process_text_content(text)
+ data, mapping, replaced_fields, processed_info = self.textProcessor.processTextContent(text)
# Stringify data consistently
if textType == 'csv':
try:
diff --git a/modules/services/serviceNeutralization/subParseString.py b/modules/services/serviceNeutralization/subParseString.py
index 5c92e110..85235da9 100644
--- a/modules/services/serviceNeutralization/subParseString.py
+++ b/modules/services/serviceNeutralization/subParseString.py
@@ -6,7 +6,7 @@ Handles pattern matching and replacement for emails, phones, addresses, IDs and
import re
import uuid
from typing import Dict, List, Tuple, Any
-from modules.services.serviceNeutralization.subPatterns import DataPatterns, find_patterns_in_text
+from modules.services.serviceNeutralization.subPatterns import DataPatterns, findPatternsInText
class StringParser:
"""Handles string parsing and replacement operations"""
@@ -22,7 +22,7 @@ class StringParser:
self.NamesToParse = NamesToParse or []
self.mapping = {}
- def is_placeholder(self, text: str) -> bool:
+ def _isPlaceholder(self, text: str) -> bool:
"""
Check if text is already a placeholder in format [tag.uuid]
@@ -34,7 +34,7 @@ class StringParser:
"""
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text))
- def replace_pattern_matches(self, text: str) -> str:
+ def _replacePatternMatches(self, text: str) -> str:
"""
Replace pattern-based matches (emails, phones, etc.) in text
@@ -44,37 +44,37 @@ class StringParser:
Returns:
str: Text with pattern matches replaced
"""
- pattern_matches = find_patterns_in_text(text, self.data_patterns)
+ patternMatches = findPatternsInText(text, self.data_patterns)
# Process pattern matches from right to left to avoid position shifts
- for pattern_name, matched_text, start, end in reversed(pattern_matches):
+ for patternName, matchedText, start, end in reversed(patternMatches):
# Skip if already a placeholder
- if self.is_placeholder(matched_text):
+ if self._isPlaceholder(matchedText):
continue
# Skip if contains placeholder characters
- if '[' in matched_text or ']' in matched_text:
+ if '[' in matchedText or ']' in matchedText:
continue
- if matched_text not in self.mapping:
+ if matchedText not in self.mapping:
# Generate a UUID for the placeholder
- placeholder_id = str(uuid.uuid4())
+ placeholderId = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
- type_mapping = {
+ typeMapping = {
'email': 'email',
'phone': 'phone',
'address': 'address',
'id': 'id'
}
- placeholder_type = type_mapping.get(pattern_name, 'data')
- self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]"
+ placeholderType = typeMapping.get(patternName, 'data')
+ self.mapping[matchedText] = f"[{placeholderType}.{placeholderId}]"
- replacement = self.mapping[matched_text]
+ replacement = self.mapping[matchedText]
text = text[:start] + replacement + text[end:]
return text
- def replace_custom_names(self, text: str) -> str:
+ def _replaceCustomNames(self, text: str) -> str:
"""
Replace custom names from the user list in text
@@ -96,19 +96,19 @@ class StringParser:
# Replace each match with a placeholder
for match in reversed(matches): # Process from right to left to avoid position shifts
- matched_text = match.group()
- if matched_text not in self.mapping:
+ matchedText = match.group()
+ if matchedText not in self.mapping:
# Generate a UUID for the placeholder
- placeholder_id = str(uuid.uuid4())
- self.mapping[matched_text] = f"[name.{placeholder_id}]"
+ placeholderId = str(uuid.uuid4())
+ self.mapping[matchedText] = f"[name.{placeholderId}]"
- replacement = self.mapping[matched_text]
+ replacement = self.mapping[matchedText]
start, end = match.span()
text = text[:start] + replacement + text[end:]
return text
- def process_string(self, text: str) -> str:
+ def processString(self, text: str) -> str:
"""
Process a string by replacing patterns first, then custom names
@@ -118,18 +118,18 @@ class StringParser:
Returns:
str: Processed text with replacements
"""
- if self.is_placeholder(text):
+ if self._isPlaceholder(text):
return text
# Step 1: Replace pattern-based matches FIRST
- text = self.replace_pattern_matches(text)
+ text = self._replacePatternMatches(text)
# Step 2: Replace custom names SECOND
- text = self.replace_custom_names(text)
+ text = self._replaceCustomNames(text)
return text
- def process_json_value(self, value: Any) -> Any:
+ def processJsonValue(self, value: Any) -> Any:
"""
Process a JSON value for anonymization
@@ -140,15 +140,15 @@ class StringParser:
Any: Processed value
"""
if isinstance(value, str):
- return self.process_string(value)
+ return self.processString(value)
elif isinstance(value, dict):
- return {k: self.process_json_value(v) for k, v in value.items()}
+ return {k: self.processJsonValue(v) for k, v in value.items()}
elif isinstance(value, list):
- return [self.process_json_value(item) for item in value]
+ return [self.processJsonValue(item) for item in value]
else:
return value
- def get_mapping(self) -> Dict[str, str]:
+ def getMapping(self) -> Dict[str, str]:
"""
Get the current mapping of original values to placeholders
@@ -157,6 +157,6 @@ class StringParser:
"""
return self.mapping.copy()
- def clear_mapping(self):
+ def clearMapping(self):
"""Clear the current mapping"""
self.mapping.clear()
diff --git a/modules/services/serviceNeutralization/subPatterns.py b/modules/services/serviceNeutralization/subPatterns.py
index 175a690f..57334ef2 100644
--- a/modules/services/serviceNeutralization/subPatterns.py
+++ b/modules/services/serviceNeutralization/subPatterns.py
@@ -316,7 +316,7 @@ class TextTablePatterns:
"""Patterns for identifying table-like structures in text"""
@staticmethod
- def get_patterns() -> List[Tuple[str, str]]:
+ def getPatterns() -> List[Tuple[str, str]]:
return [
# key: value pattern (with optional whitespace)
(r'^([^:]+):\s*(.+)$', ':'),
@@ -329,15 +329,15 @@ class TextTablePatterns:
]
@staticmethod
- def is_table_line(line: str) -> bool:
+ def _isTableLine(line: str) -> bool:
"""Check if a line matches any table pattern"""
- patterns = TextTablePatterns.get_patterns()
+ patterns = TextTablePatterns.getPatterns()
return any(re.match(pattern[0], line.strip()) for pattern in patterns)
@staticmethod
- def extract_key_value(line: str) -> Optional[Tuple[str, str]]:
+ def extractKeyValue(line: str) -> Optional[Tuple[str, str]]:
"""Extract key and value from a table line"""
- patterns = TextTablePatterns.get_patterns()
+ patterns = TextTablePatterns.getPatterns()
for pattern, separator in patterns:
match = re.match(pattern, line.strip())
if match:
@@ -346,7 +346,7 @@ class TextTablePatterns:
return key, value
return None
-def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
+def getPatternForHeader(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
"""
Find matching pattern for a header
@@ -368,7 +368,7 @@ def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pat
return pattern
return None
-def find_patterns_in_text(text: str, patterns: List[Pattern]) -> List[tuple]:
+def findPatternsInText(text: str, patterns: List[Pattern]) -> List[tuple]:
"""
Find all pattern matches in text
diff --git a/modules/services/serviceNeutralization/subProcessBinary.py b/modules/services/serviceNeutralization/subProcessBinary.py
index 67c73bc1..3b61b635 100644
--- a/modules/services/serviceNeutralization/subProcessBinary.py
+++ b/modules/services/serviceNeutralization/subProcessBinary.py
@@ -27,7 +27,7 @@ class BinaryProcessor:
'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx']
}
- def detect_binary_type(self, content: str) -> str:
+ def _detectBinaryType(self, content: str) -> str:
"""
Detect if content is binary data and determine type
@@ -54,7 +54,7 @@ class BinaryProcessor:
return 'text'
- def is_binary_content(self, content: str) -> bool:
+ def isBinaryContent(self, content: str) -> bool:
"""
Check if content is binary data
@@ -64,9 +64,9 @@ class BinaryProcessor:
Returns:
bool: True if content is binary
"""
- return self.detect_binary_type(content) == 'binary'
+ return self._detectBinaryType(content) == 'binary'
- def process_binary_content(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
+ def processBinaryContent(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
"""
Process binary content for anonymization
@@ -83,15 +83,15 @@ class BinaryProcessor:
# 3. Handling metadata and embedded content
# 4. Preserving binary integrity while removing sensitive data
- processed_info = {
+ processedInfo = {
'type': 'binary',
'status': 'not_implemented',
'message': 'Binary data neutralization not yet implemented'
}
- return content, {}, [], processed_info
+ return content, {}, [], processedInfo
- def get_supported_types(self) -> Dict[str, list]:
+ def getSupportedTypes(self) -> Dict[str, list]:
"""
Get list of supported binary file types
diff --git a/modules/services/serviceNeutralization/subProcessCommon.py b/modules/services/serviceNeutralization/subProcessCommon.py
index de3fa290..01e6cb25 100644
--- a/modules/services/serviceNeutralization/subProcessCommon.py
+++ b/modules/services/serviceNeutralization/subProcessCommon.py
@@ -33,7 +33,7 @@ class CommonUtils:
"""Common utility functions for data processing"""
@staticmethod
- def normalize_whitespace(text: str) -> str:
+ def normalizeWhitespace(text: str) -> str:
"""
Normalize whitespace in text
@@ -48,7 +48,7 @@ class CommonUtils:
return text.strip()
@staticmethod
- def is_table_line(line: str) -> bool:
+ def _isTableLine(line: str) -> bool:
"""
Check if a line represents a table row
@@ -62,7 +62,7 @@ class CommonUtils:
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
@staticmethod
- def detect_content_type(content: str) -> str:
+ def detectContentType(content: str) -> str:
"""
Detect the type of content based on its structure
@@ -98,7 +98,7 @@ class CommonUtils:
return 'text'
@staticmethod
- def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]:
+ def mergeMappings(*mappings: Dict[str, str]) -> Dict[str, str]:
"""
Merge multiple mapping dictionaries
@@ -114,21 +114,21 @@ class CommonUtils:
return merged
@staticmethod
- def create_placeholder(placeholder_type: str, placeholder_id: str) -> str:
+ def createPlaceholder(placeholderType: str, placeholderId: str) -> str:
"""
Create a placeholder string in the format [type.uuid]
Args:
- placeholder_type: Type of placeholder (email, phone, name, etc.)
- placeholder_id: Unique identifier for the placeholder
+ placeholderType: Type of placeholder (email, phone, name, etc.)
+ placeholderId: Unique identifier for the placeholder
Returns:
str: Formatted placeholder string
"""
- return f"[{placeholder_type}.{placeholder_id}]"
+ return f"[{placeholderType}.{placeholderId}]"
@staticmethod
- def validate_placeholder(placeholder: str) -> bool:
+ def validatePlaceholder(placeholder: str) -> bool:
"""
Validate if a string is a valid placeholder
@@ -141,7 +141,7 @@ class CommonUtils:
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder))
@staticmethod
- def extract_placeholder_info(placeholder: str) -> Optional[tuple]:
+ def extractPlaceholderInfo(placeholder: str) -> Optional[tuple]:
"""
Extract type and ID from a placeholder
diff --git a/modules/services/serviceNeutralization/subProcessList.py b/modules/services/serviceNeutralization/subProcessList.py
index 3d5e7900..1d39ab9e 100644
--- a/modules/services/serviceNeutralization/subProcessList.py
+++ b/modules/services/serviceNeutralization/subProcessList.py
@@ -10,7 +10,7 @@ from typing import Dict, List, Any, Union
from dataclasses import dataclass
from io import StringIO
from modules.services.serviceNeutralization.subParseString import StringParser
-from modules.services.serviceNeutralization.subPatterns import get_pattern_for_header, HeaderPatterns
+from modules.services.serviceNeutralization.subPatterns import getPatternForHeader, HeaderPatterns
@dataclass
class TableData:
@@ -32,7 +32,7 @@ class ListProcessor:
self.string_parser = StringParser(NamesToParse)
self.header_patterns = HeaderPatterns.patterns
- def anonymize_table(self, table: TableData) -> TableData:
+ def _anonymizeTable(self, table: TableData) -> TableData:
"""
Anonymize table data based on headers
@@ -42,28 +42,28 @@ class ListProcessor:
Returns:
TableData: Anonymized table
"""
- anonymized_table = TableData(
+ anonymizedTable = TableData(
headers=table.headers.copy(),
rows=[row.copy() for row in table.rows],
source_type=table.source_type
)
- for i, header in enumerate(anonymized_table.headers):
- pattern = get_pattern_for_header(header, self.header_patterns)
+ for i, header in enumerate(anonymizedTable.headers):
+ pattern = getPatternForHeader(header, self.header_patterns)
if pattern:
- for row in anonymized_table.rows:
+ for row in anonymizedTable.rows:
if row[i] is not None:
original = str(row[i])
if original not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
- placeholder_id = str(uuid.uuid4())
+ placeholderId = str(uuid.uuid4())
self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1)
row[i] = self.string_parser.mapping[original]
- return anonymized_table
+ return anonymizedTable
- def process_csv_content(self, content: str) -> tuple:
+ def processCsvContent(self, content: str) -> tuple:
"""
Process CSV content
@@ -81,29 +81,29 @@ class ListProcessor:
)
if not table.rows:
- return None, self.string_parser.get_mapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
+ return None, self.string_parser.getMapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
- anonymized_table = self.anonymize_table(table)
+ anonymizedTable = self._anonymizeTable(table)
# Track replaced fields
- replaced_fields = []
- for i, header in enumerate(anonymized_table.headers):
- for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
- if anon_row[i] != orig_row[i]:
- replaced_fields.append(header)
+ replacedFields = []
+ for i, header in enumerate(anonymizedTable.headers):
+ for origRow, anonRow in zip(table.rows, anonymizedTable.rows):
+ if anonRow[i] != origRow[i]:
+ replacedFields.append(header)
# Convert back to DataFrame
- result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
+ result = pd.DataFrame(anonymizedTable.rows, columns=anonymizedTable.headers)
- processed_info = {
+ processedInfo = {
'type': 'table',
'headers': table.headers,
'row_count': len(table.rows)
}
- return result, self.string_parser.get_mapping(), replaced_fields, processed_info
+ return result, self.string_parser.getMapping(), replacedFields, processedInfo
- def process_json_content(self, content: str) -> tuple:
+ def processJsonContent(self, content: str) -> tuple:
"""
Process JSON content
@@ -116,13 +116,13 @@ class ListProcessor:
data = json.loads(content)
# Process JSON recursively using string parser
- result = self.string_parser.process_json_value(data)
+ result = self.string_parser.processJsonValue(data)
- processed_info = {'type': 'json'}
+ processedInfo = {'type': 'json'}
- return result, self.string_parser.get_mapping(), [], processed_info
+ return result, self.string_parser.getMapping(), [], processedInfo
- def anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
+ def _anonymizeXmlElement(self, element: ET.Element, indent: str = '') -> str:
"""
Recursively process XML element and return formatted string
@@ -134,69 +134,69 @@ class ListProcessor:
Formatted XML string
"""
# Process attributes
- processed_attrs = {}
- for attr_name, attr_value in element.attrib.items():
+ processedAttrs = {}
+ for attrName, attrValue in element.attrib.items():
# Check if attribute name matches any header patterns
- pattern = get_pattern_for_header(attr_name, self.header_patterns)
+ pattern = getPatternForHeader(attrName, self.header_patterns)
if pattern:
- if attr_value not in self.string_parser.mapping:
+ if attrValue not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
- placeholder_id = str(uuid.uuid4())
+ placeholderId = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
- type_mapping = {
+ typeMapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
- placeholder_type = type_mapping.get(pattern.name, 'data')
- self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
- processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
+ placeholderType = typeMapping.get(pattern.name, 'data')
+ self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
+ processedAttrs[attrName] = self.string_parser.mapping[attrValue]
else:
# Check if attribute value matches any data patterns
- from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns
- matches = find_patterns_in_text(attr_value, DataPatterns.patterns)
+ from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
+ matches = findPatternsInText(attrValue, DataPatterns.patterns)
if matches:
- pattern_name = matches[0][0]
- pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
+ patternName = matches[0][0]
+ pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
if pattern:
- if attr_value not in self.string_parser.mapping:
+ if attrValue not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
- placeholder_id = str(uuid.uuid4())
+ placeholderId = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
- type_mapping = {
+ typeMapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
- placeholder_type = type_mapping.get(pattern_name, 'data')
- self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
- processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
+ placeholderType = typeMapping.get(patternName, 'data')
+ self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
+ processedAttrs[attrName] = self.string_parser.mapping[attrValue]
else:
- processed_attrs[attr_name] = attr_value
+ processedAttrs[attrName] = attrValue
else:
- processed_attrs[attr_name] = attr_value
+ processedAttrs[attrName] = attrValue
- attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
+ attrs = ' '.join(f'{k}="{v}"' for k, v in processedAttrs.items())
attrs = f' {attrs}' if attrs else ''
# Process text content
text = element.text.strip() if element.text and element.text.strip() else ''
if text:
# Skip if already a placeholder
- if not self.string_parser.is_placeholder(text):
+ if not self.string_parser._isPlaceholder(text):
# Check if text matches any patterns
- from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns
- pattern_matches = find_patterns_in_text(text, DataPatterns.patterns)
+ from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
+ patternMatches = findPatternsInText(text, DataPatterns.patterns)
- if pattern_matches:
- pattern_name = pattern_matches[0][0]
- pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
+ if patternMatches:
+ patternName = patternMatches[0][0]
+ pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
if pattern:
if text not in self.string_parser.mapping:
# Generate a UUID for the placeholder
@@ -210,8 +210,8 @@ class ListProcessor:
'address': 'address',
'id': 'id'
}
- placeholder_type = type_mapping.get(pattern_name, 'data')
- self.string_parser.mapping[text] = f"[{placeholder_type}.{placeholder_id}]"
+ placeholderType = typeMapping.get(patternName, 'data')
+ self.string_parser.mapping[text] = f"[{placeholderType}.{placeholderId}]"
text = self.string_parser.mapping[text]
else:
# Check if text matches any custom names from the user list
@@ -230,8 +230,8 @@ class ListProcessor:
# Process child elements
children = []
for child in element:
- child_str = self.anonymize_xml_element(child, indent + ' ')
- children.append(child_str)
+ childStr = self._anonymizeXmlElement(child, indent + ' ')
+ children.append(childStr)
# Build element string
if not children and not text:
@@ -246,7 +246,7 @@ class ListProcessor:
result.append(f"{indent}{element.tag}>")
return '\n'.join(result)
- def process_xml_content(self, content: str) -> tuple:
+ def processXmlContent(self, content: str) -> tuple:
"""
Process XML content
@@ -259,21 +259,21 @@ class ListProcessor:
root = ET.fromstring(content)
# Process XML recursively with proper formatting
- result = self.anonymize_xml_element(root)
+ result = self._anonymizeXmlElement(root)
- processed_info = {'type': 'xml'}
+ processedInfo = {'type': 'xml'}
- return result, self.string_parser.get_mapping(), [], processed_info
+ return result, self.string_parser.getMapping(), [], processedInfo
- def get_mapping(self) -> Dict[str, str]:
+ def getMapping(self) -> Dict[str, str]:
"""
Get the current mapping of original values to placeholders
Returns:
Dict[str, str]: Mapping dictionary
"""
- return self.string_parser.get_mapping()
+ return self.string_parser.getMapping()
- def clear_mapping(self):
+ def clearMapping(self):
"""Clear the current mapping"""
- self.string_parser.clear_mapping()
+ self.string_parser.clearMapping()
diff --git a/modules/services/serviceNeutralization/subProcessText.py b/modules/services/serviceNeutralization/subProcessText.py
index 98d8cbde..2c183828 100644
--- a/modules/services/serviceNeutralization/subProcessText.py
+++ b/modules/services/serviceNeutralization/subProcessText.py
@@ -25,7 +25,7 @@ class TextProcessor:
"""
self.string_parser = StringParser(NamesToParse)
- def extract_tables_from_text(self, content: str) -> tuple:
+ def _extractTablesFromText(self, content: str) -> tuple:
"""
Extract tables and plain text from content
@@ -38,11 +38,11 @@ class TextProcessor:
# For now, process the entire content as plain text
# This can be extended later to detect table-like structures
tables = []
- plain_texts = [PlainText(content=content, source_type='text_plain')]
+ plainTexts = [PlainText(content=content, source_type='text_plain')]
- return tables, plain_texts
+ return tables, plainTexts
- def anonymize_plain_text(self, text: PlainText) -> PlainText:
+ def _anonymizePlainText(self, text: PlainText) -> PlainText:
"""
Anonymize plain text content
@@ -53,11 +53,11 @@ class TextProcessor:
PlainText: Anonymized text
"""
# Use the string parser to process the content
- anonymized_content = self.string_parser.process_string(text.content)
+ anonymizedContent = self.string_parser.processString(text.content)
- return PlainText(content=anonymized_content, source_type=text.source_type)
+ return PlainText(content=anonymizedContent, source_type=text.source_type)
- def process_text_content(self, content: str) -> tuple:
+ def processTextContent(self, content: str) -> tuple:
"""
Process text content and return anonymized data
@@ -68,35 +68,35 @@ class TextProcessor:
Tuple of (anonymized_content, mapping, replaced_fields, processed_info)
"""
# Extract tables and plain text sections
- tables, plain_texts = self.extract_tables_from_text(content)
+ tables, plainTexts = self._extractTablesFromText(content)
# Process plain text sections
- anonymized_texts = [self.anonymize_plain_text(text) for text in plain_texts]
+ anonymizedTexts = [self._anonymizePlainText(text) for text in plainTexts]
# Combine all processed content
result = content
- for text, anonymized_text in zip(plain_texts, anonymized_texts):
- if text.content != anonymized_text.content:
- result = result.replace(text.content, anonymized_text.content)
+ for text, anonymizedText in zip(plainTexts, anonymizedTexts):
+ if text.content != anonymizedText.content:
+ result = result.replace(text.content, anonymizedText.content)
# Get processing information
- processed_info = {
+ processedInfo = {
'type': 'text',
'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
if tables else [])
}
- return result, self.string_parser.get_mapping(), [], processed_info
+ return result, self.string_parser.getMapping(), [], processedInfo
- def get_mapping(self) -> Dict[str, str]:
+ def getMapping(self) -> Dict[str, str]:
"""
Get the current mapping of original values to placeholders
Returns:
Dict[str, str]: Mapping dictionary
"""
- return self.string_parser.get_mapping()
+ return self.string_parser.getMapping()
- def clear_mapping(self):
+ def clearMapping(self):
"""Clear the current mapping"""
- self.string_parser.clear_mapping()
+ self.string_parser.clearMapping()
diff --git a/modules/services/serviceSharepoint/mainServiceSharepoint.py b/modules/services/serviceSharepoint/mainServiceSharepoint.py
index ec05f881..481c6818 100644
--- a/modules/services/serviceSharepoint/mainServiceSharepoint.py
+++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py
@@ -20,8 +20,8 @@ class SharepointService:
Use setAccessTokenFromConnection() method to configure the access token before making API calls.
"""
self.services = serviceCenter
- self.access_token = None
- self.base_url = "https://graph.microsoft.com/v1.0"
+ self.accessToken = None
+ self.baseUrl = "https://graph.microsoft.com/v1.0"
def setAccessTokenFromConnection(self, userConnection) -> bool:
"""Set access token from UserConnection.
@@ -52,21 +52,21 @@ class SharepointService:
logger.error(f"Error setting access token: {str(e)}")
return False
- async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
+ async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
"""Make a Microsoft Graph API call with proper error handling."""
try:
- if self.access_token is None:
+ if self.accessToken is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."}
headers = {
- "Authorization": f"Bearer {self.access_token}",
+ "Authorization": f"Bearer {self.accessToken}",
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
}
# Remove leading slash from endpoint to avoid double slash
- clean_endpoint = endpoint.lstrip('/')
- url = f"{self.base_url}/{clean_endpoint}"
+ cleanEndpoint = endpoint.lstrip('/')
+ url = f"{self.baseUrl}/{cleanEndpoint}"
logger.debug(f"Making Graph API call: {method} {url}")
timeout = aiohttp.ClientTimeout(total=30)
@@ -106,10 +106,10 @@ class SharepointService:
logger.error(f"Error making Graph API call: {str(e)}")
return {"error": f"Error making Graph API call: {str(e)}"}
- async def discover_sites(self) -> List[Dict[str, Any]]:
+ async def discoverSites(self) -> List[Dict[str, Any]]:
"""Discover all SharePoint sites accessible to the user."""
try:
- result = await self._make_graph_api_call("sites?search=*")
+ result = await self._makeGraphApiCall("sites?search=*")
if "error" in result:
logger.error(f"Error discovering SharePoint sites: {result['error']}")
@@ -118,9 +118,9 @@ class SharepointService:
sites = result.get("value", [])
logger.info(f"Discovered {len(sites)} SharePoint sites")
- processed_sites = []
+ processedSites = []
for site in sites:
- site_info = {
+ siteInfo = {
"id": site.get("id"),
"displayName": site.get("displayName"),
"name": site.get("name"),
@@ -129,24 +129,24 @@ class SharepointService:
"createdDateTime": site.get("createdDateTime"),
"lastModifiedDateTime": site.get("lastModifiedDateTime")
}
- processed_sites.append(site_info)
- logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
+ processedSites.append(siteInfo)
+ logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}")
- return processed_sites
+ return processedSites
except Exception as e:
logger.error(f"Error discovering SharePoint sites: {str(e)}")
return []
- async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]:
+ async def findSiteByName(self, siteName: str) -> Optional[Dict[str, Any]]:
"""Find a specific SharePoint site by name using direct Graph API call."""
try:
# Try to get the site directly by name using Graph API
- endpoint = f"sites/{site_name}"
- result = await self._make_graph_api_call(endpoint)
+ endpoint = f"sites/{siteName}"
+ result = await self._makeGraphApiCall(endpoint)
if result and "error" not in result:
- site_info = {
+ siteInfo = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
@@ -155,15 +155,15 @@ class SharepointService:
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
- logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}")
- return site_info
+ logger.info(f"Found site directly: {siteInfo['displayName']} - {siteInfo['webUrl']}")
+ return siteInfo
except Exception as e:
- logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}")
+ logger.debug(f"Direct site lookup failed for '{siteName}': {str(e)}")
# Fallback to discovery if direct lookup fails
- logger.info(f"Direct lookup failed, trying discovery for site: {site_name}")
- sites = await self.discover_sites()
+ logger.info(f"Direct lookup failed, trying discovery for site: {siteName}")
+ sites = await self.discoverSites()
if not sites:
logger.warning("No sites discovered")
return None
@@ -174,46 +174,46 @@ class SharepointService:
# Try exact match first
for site in sites:
- if site.get("displayName", "").strip().lower() == site_name.strip().lower():
+ if site.get("displayName", "").strip().lower() == siteName.strip().lower():
logger.info(f"Found exact match: {site.get('displayName')}")
return site
# Try partial match
for site in sites:
- if site_name.lower() in site.get("displayName", "").lower():
+ if siteName.lower() in site.get("displayName", "").lower():
logger.info(f"Found partial match: {site.get('displayName')}")
return site
- logger.warning(f"No site found matching: {site_name}")
+ logger.warning(f"No site found matching: {siteName}")
return None
- async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]:
+ async def findSiteByWebUrl(self, webUrl: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using its web URL (useful for guest sites)."""
try:
# Use the web URL format: sites/{hostname}:/sites/{site-path}
# Extract hostname and site path from the web URL
- if not web_url.startswith("https://"):
- web_url = f"https://{web_url}"
+ if not webUrl.startswith("https://"):
+ webUrl = f"https://{webUrl}"
# Parse the URL to extract hostname and site path
from urllib.parse import urlparse
- parsed = urlparse(web_url)
+ parsed = urlparse(webUrl)
hostname = parsed.hostname
- path_parts = parsed.path.strip('/').split('/')
+ pathParts = parsed.path.strip('/').split('/')
- if len(path_parts) >= 2 and path_parts[0] == 'sites':
- site_path = '/'.join(path_parts[1:]) # Everything after 'sites/'
+ if len(pathParts) >= 2 and pathParts[0] == 'sites':
+ sitePath = '/'.join(pathParts[1:]) # Everything after 'sites/'
else:
- logger.error(f"Invalid SharePoint URL format: {web_url}")
+ logger.error(f"Invalid SharePoint URL format: {webUrl}")
return None
- endpoint = f"sites/{hostname}:/sites/{site_path}"
+ endpoint = f"sites/{hostname}:/sites/{sitePath}"
logger.debug(f"Trying web URL format: {endpoint}")
- result = await self._make_graph_api_call(endpoint)
+ result = await self._makeGraphApiCall(endpoint)
if result and "error" not in result:
- site_info = {
+ siteInfo = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
@@ -222,33 +222,33 @@ class SharepointService:
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
- logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
- return site_info
+ logger.info(f"Found site by web URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
+ return siteInfo
else:
- logger.warning(f"Site not found using web URL: {web_url}")
+ logger.warning(f"Site not found using web URL: {webUrl}")
return None
except Exception as e:
logger.error(f"Error finding site by web URL: {str(e)}")
return None
- async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]:
+ async def findSiteByUrl(self, hostname: str, sitePath: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using the site URL format."""
try:
# For guest sites, try different URL formats
- url_formats = [
- f"sites/{hostname}:/sites/{site_path}", # Standard format
- f"sites/{hostname}:/sites/{site_path}/", # With trailing slash
- f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase
- f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash
+ urlFormats = [
+ f"sites/{hostname}:/sites/{sitePath}", # Standard format
+ f"sites/{hostname}:/sites/{sitePath}/", # With trailing slash
+ f"sites/{hostname}:/sites/{sitePath.lower()}", # Lowercase
+ f"sites/{hostname}:/sites/{sitePath.lower()}/", # Lowercase with slash
]
- for endpoint in url_formats:
+ for endpoint in urlFormats:
logger.debug(f"Trying URL format: {endpoint}")
- result = await self._make_graph_api_call(endpoint)
+ result = await self._makeGraphApiCall(endpoint)
if result and "error" not in result:
- site_info = {
+ siteInfo = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
@@ -257,29 +257,29 @@ class SharepointService:
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
- logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
- return site_info
+ logger.info(f"Found site by URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
+ return siteInfo
else:
logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}")
- logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}")
+ logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{sitePath}")
return None
except Exception as e:
logger.error(f"Error finding site by URL: {str(e)}")
return None
- async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]:
+ async def getFolderByPath(self, siteId: str, folderPath: str) -> Optional[Dict[str, Any]]:
"""Get folder information by path within a site."""
try:
# Clean the path
- clean_path = folder_path.lstrip('/')
- endpoint = f"sites/{site_id}/drive/root:/{clean_path}"
+ cleanPath = folderPath.lstrip('/')
+ endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
- result = await self._make_graph_api_call(endpoint)
+ result = await self._makeGraphApiCall(endpoint)
if "error" in result:
- logger.warning(f"Folder not found at path {folder_path}: {result['error']}")
+ logger.warning(f"Folder not found at path {folderPath}: {result['error']}")
return None
return result
@@ -288,43 +288,43 @@ class SharepointService:
logger.error(f"Error getting folder by path: {str(e)}")
return None
- async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]:
+ async def uploadFile(self, siteId: str, folderPath: str, fileName: str, content: bytes) -> Dict[str, Any]:
"""Upload a file to SharePoint."""
try:
# Clean the path
- clean_path = folder_path.lstrip('/')
- upload_path = f"{clean_path.rstrip('/')}/{file_name}"
- endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content"
+ cleanPath = folderPath.lstrip('/')
+ uploadPath = f"{cleanPath.rstrip('/')}/{fileName}"
+ endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
logger.info(f"Uploading file to: {endpoint}")
- result = await self._make_graph_api_call(endpoint, method="PUT", data=content)
+ result = await self._makeGraphApiCall(endpoint, method="PUT", data=content)
if "error" in result:
logger.error(f"Upload failed: {result['error']}")
return result
- logger.info(f"File uploaded successfully: {file_name}")
+ logger.info(f"File uploaded successfully: {fileName}")
return result
except Exception as e:
logger.error(f"Error uploading file: {str(e)}")
return {"error": f"Error uploading file: {str(e)}"}
- async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]:
+ async def downloadFile(self, siteId: str, fileId: str) -> Optional[bytes]:
"""Download a file from SharePoint."""
try:
- if self.access_token is None:
+ if self.accessToken is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return None
- endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
+ endpoint = f"sites/{siteId}/drive/items/{fileId}/content"
- headers = {"Authorization": f"Bearer {self.access_token}"}
+ headers = {"Authorization": f"Bearer {self.accessToken}"}
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
- async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response:
+ async with session.get(f"{self.baseUrl}/{endpoint}", headers=headers) as response:
if response.status == 200:
return await response.read()
else:
@@ -335,32 +335,32 @@ class SharepointService:
logger.error(f"Error downloading file: {str(e)}")
return None
- async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]:
+ async def listFolderContents(self, siteId: str, folderPath: str = "") -> List[Dict[str, Any]]:
"""List contents of a folder."""
try:
- if not folder_path or folder_path == "/":
- endpoint = f"sites/{site_id}/drive/root/children"
+ if not folderPath or folderPath == "/":
+ endpoint = f"sites/{siteId}/drive/root/children"
else:
- clean_path = folder_path.lstrip('/')
- endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children"
+ cleanPath = folderPath.lstrip('/')
+ endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
- result = await self._make_graph_api_call(endpoint)
+ result = await self._makeGraphApiCall(endpoint)
if "error" in result:
logger.warning(f"Failed to list folder contents: {result['error']}")
return None
items = result.get("value", [])
- processed_items = []
+ processedItems = []
for item in items:
# Determine if it's a folder or file
- is_folder = 'folder' in item
+ isFolder = 'folder' in item
- item_info = {
+ itemInfo = {
"id": item.get("id"),
"name": item.get("name"),
- "type": "folder" if is_folder else "file",
+ "type": "folder" if isFolder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
@@ -368,42 +368,42 @@ class SharepointService:
}
if "file" in item:
- item_info["mimeType"] = item["file"].get("mimeType")
- item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
+ itemInfo["mimeType"] = item["file"].get("mimeType")
+ itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
if "folder" in item:
- item_info["childCount"] = item["folder"].get("childCount", 0)
+ itemInfo["childCount"] = item["folder"].get("childCount", 0)
- processed_items.append(item_info)
+ processedItems.append(itemInfo)
- return processed_items
+ return processedItems
except Exception as e:
logger.error(f"Error listing folder contents: {str(e)}")
return []
- async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]:
+ async def searchFiles(self, siteId: str, query: str) -> List[Dict[str, Any]]:
"""Search for files in a site."""
try:
- search_query = query.replace("'", "''") # Escape single quotes for OData
- endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
+ searchQuery = query.replace("'", "''") # Escape single quotes for OData
+ endpoint = f"sites/{siteId}/drive/root/search(q='{searchQuery}')"
- result = await self._make_graph_api_call(endpoint)
+ result = await self._makeGraphApiCall(endpoint)
if "error" in result:
logger.warning(f"Search failed: {result['error']}")
return []
items = result.get("value", [])
- processed_items = []
+ processedItems = []
for item in items:
- is_folder = 'folder' in item
+ isFolder = 'folder' in item
- item_info = {
+ itemInfo = {
"id": item.get("id"),
"name": item.get("name"),
- "type": "folder" if is_folder else "file",
+ "type": "folder" if isFolder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
@@ -412,64 +412,64 @@ class SharepointService:
}
if "file" in item:
- item_info["mimeType"] = item["file"].get("mimeType")
- item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
+ itemInfo["mimeType"] = item["file"].get("mimeType")
+ itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
- processed_items.append(item_info)
+ processedItems.append(itemInfo)
- return processed_items
+ return processedItems
except Exception as e:
logger.error(f"Error searching files: {str(e)}")
return []
- async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None:
+ async def copyFileAsync(self, siteId: str, sourceFolder: str, sourceFile: str, destFolder: str, destFile: str) -> None:
"""Copy a file from source to destination folder (like original synchronizer)."""
try:
# First, download the source file
- source_path = f"{source_folder}/{source_file}"
- file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path)
+ sourcePath = f"{sourceFolder}/{sourceFile}"
+ fileContent = await self.downloadFileByPath(siteId=siteId, filePath=sourcePath)
- if not file_content:
- raise Exception(f"Failed to download source file: {source_path}")
+ if not fileContent:
+ raise Exception(f"Failed to download source file: {sourcePath}")
# Upload to destination
- await self.upload_file(
- site_id=site_id,
- folder_path=dest_folder,
- file_name=dest_file,
- content=file_content
+ await self.uploadFile(
+ siteId=siteId,
+ folderPath=destFolder,
+ fileName=destFile,
+ content=fileContent
)
- logger.info(f"File copied: {source_file} -> {dest_file}")
+ logger.info(f"File copied: {sourceFile} -> {destFile}")
except Exception as e:
# Provide more specific error information
- error_msg = str(e)
- if "itemNotFound" in error_msg or "404" in error_msg:
- raise Exception(f"Source file not found (404): {source_path} - {error_msg}")
+ errorMsg = str(e)
+ if "itemNotFound" in errorMsg or "404" in errorMsg:
+ raise Exception(f"Source file not found (404): {sourcePath} - {errorMsg}")
else:
- raise Exception(f"Error copying file: {error_msg}")
+ raise Exception(f"Error copying file: {errorMsg}")
- async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]:
+ async def downloadFileByPath(self, siteId: str, filePath: str) -> Optional[bytes]:
"""Download a file by its path within a site."""
try:
- if self.access_token is None:
+ if self.accessToken is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return None
# Clean the path
- clean_path = file_path.strip('/')
- endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content"
+ cleanPath = filePath.strip('/')
+ endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/content"
# Use direct HTTP call for file downloads (binary content)
headers = {
- "Authorization": f"Bearer {self.access_token}",
+ "Authorization": f"Bearer {self.accessToken}",
}
# Remove leading slash from endpoint to avoid double slash
- clean_endpoint = endpoint.lstrip('/')
- url = f"{self.base_url}/{clean_endpoint}"
+ cleanEndpoint = endpoint.lstrip('/')
+ url = f"{self.baseUrl}/{cleanEndpoint}"
logger.debug(f"Downloading file: GET {url}")
timeout = aiohttp.ClientTimeout(total=30)
diff --git a/modules/services/serviceUtils/mainServiceUtils.py b/modules/services/serviceUtils/mainServiceUtils.py
index 1824eab7..c8a78fea 100644
--- a/modules/services/serviceUtils/mainServiceUtils.py
+++ b/modules/services/serviceUtils/mainServiceUtils.py
@@ -7,7 +7,7 @@ import logging
from typing import Any, Optional, Dict, Callable, List
from modules.shared.configuration import APP_CONFIG
from modules.shared.eventManagement import eventManager
-from modules.shared.timezoneUtils import get_utc_timestamp
+from modules.shared.timezoneUtils import getUtcTimestamp
from modules.shared import jsonUtils
logger = logging.getLogger(__name__)
@@ -122,7 +122,7 @@ class UtilsService:
float: Current UTC timestamp in seconds
"""
try:
- return get_utc_timestamp()
+ return getUtcTimestamp()
except Exception as e:
logger.error(f"Error getting UTC timestamp: {str(e)}")
return 0.0
@@ -185,6 +185,75 @@ class UtilsService:
# Silent fail to never break main flow
pass
+ # ===== Prompt sanitization =====
+
+ def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
+ """
+ Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
+
+ This is the single source of truth for all prompt sanitization across the system.
+ Replaces all scattered sanitization functions with a unified approach.
+
+ Args:
+ content: The content to sanitize
+ contentType: Type of content ("text", "userinput", "json", "document")
+
+ Returns:
+ Safely sanitized content ready for AI prompt insertion
+ """
+ if not content:
+ return ""
+
+ try:
+ import re
+ # Convert to string if not already
+ content_str = str(content)
+
+ # Remove null bytes and control characters (except newlines and tabs)
+ sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
+
+ # Handle different content types with appropriate sanitization
+ if contentType == "userinput":
+ # Extra security for user-controlled content
+ # Escape curly braces to prevent placeholder injection
+ sanitized = sanitized.replace('{', '{{').replace('}', '}}')
+ # Escape quotes and wrap in single quotes
+ sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
+ return f"'{sanitized}'"
+
+ elif contentType == "json":
+ # For JSON content, escape quotes and backslashes
+ sanitized = sanitized.replace('\\', '\\\\')
+ sanitized = sanitized.replace('"', '\\"')
+ sanitized = sanitized.replace('\n', '\\n')
+ sanitized = sanitized.replace('\r', '\\r')
+ sanitized = sanitized.replace('\t', '\\t')
+
+ elif contentType == "document":
+ # For document content, escape special characters
+ sanitized = sanitized.replace('\\', '\\\\')
+ sanitized = sanitized.replace('"', '\\"')
+ sanitized = sanitized.replace("'", "\\'")
+ sanitized = sanitized.replace('\n', '\\n')
+ sanitized = sanitized.replace('\r', '\\r')
+ sanitized = sanitized.replace('\t', '\\t')
+
+ else: # contentType == "text" or default
+ # Basic text sanitization
+ sanitized = sanitized.replace('\\', '\\\\')
+ sanitized = sanitized.replace('"', '\\"')
+ sanitized = sanitized.replace("'", "\\'")
+ sanitized = sanitized.replace('\n', '\\n')
+ sanitized = sanitized.replace('\r', '\\r')
+ sanitized = sanitized.replace('\t', '\\t')
+
+ return sanitized
+
+ except Exception as e:
+ logger.error(f"Error sanitizing prompt content: {str(e)}")
+ # Return a safe fallback
+ return "[ERROR: Content could not be safely sanitized]"
+
# ===== JSON utility wrappers =====
def jsonStripCodeFences(self, text: str) -> str:
diff --git a/modules/shared/attributeUtils.py b/modules/shared/attributeUtils.py
index 6c56e855..90f3a766 100644
--- a/modules/shared/attributeUtils.py
+++ b/modules/shared/attributeUtils.py
@@ -34,54 +34,54 @@ class AttributeDefinition(BaseModel):
MODEL_LABELS: Dict[str, Dict[str, Dict[str, str]]] = {}
-def register_model_labels(model_name: str, model_label: Dict[str, str], labels: Dict[str, Dict[str, str]]):
+def registerModelLabels(modelName: str, modelLabel: Dict[str, str], labels: Dict[str, Dict[str, str]]):
"""
Register labels for a model's attributes and the model itself.
Args:
- model_name: Name of the model class
- model_label: Dictionary mapping language codes to model labels
+ modelName: Name of the model class
+ modelLabel: Dictionary mapping language codes to model labels
e.g. {"en": "Prompt", "fr": "Invite"}
labels: Dictionary mapping attribute names to their translations
e.g. {"name": {"en": "Name", "fr": "Nom"}}
"""
- MODEL_LABELS[model_name] = {"model": model_label, "attributes": labels}
+ MODEL_LABELS[modelName] = {"model": modelLabel, "attributes": labels}
-def get_model_labels(model_name: str, language: str = "en") -> Dict[str, str]:
+def getModelLabels(modelName: str, language: str = "en") -> Dict[str, str]:
"""
Get labels for a model's attributes in the specified language.
Args:
- model_name: Name of the model class
+ modelName: Name of the model class
language: Language code (default: "en")
Returns:
Dictionary mapping attribute names to their labels in the specified language
"""
- model_data = MODEL_LABELS.get(model_name, {})
- attribute_labels = model_data.get("attributes", {})
+ modelData = MODEL_LABELS.get(modelName, {})
+ attributeLabels = modelData.get("attributes", {})
return {
attr: translations.get(language, translations.get("en", attr))
- for attr, translations in attribute_labels.items()
+ for attr, translations in attributeLabels.items()
}
-def get_model_label(model_name: str, language: str = "en") -> str:
+def getModelLabel(modelName: str, language: str = "en") -> str:
"""
Get the label for a model in the specified language.
Args:
- model_name: Name of the model class
+ modelName: Name of the model class
language: Language code (default: "en")
Returns:
Model label in the specified language, or model name if no label exists
"""
- model_data = MODEL_LABELS.get(model_name, {})
- model_label = model_data.get("model", {})
- return model_label.get(language, model_label.get("en", model_name))
+ modelData = MODEL_LABELS.get(modelName, {})
+ modelLabel = modelData.get("model", {})
+ return modelLabel.get(language, modelLabel.get("en", modelName))
def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguage: str = "en") -> Dict[str, Any]:
@@ -100,8 +100,8 @@ def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguag
attributes = []
model_name = modelClass.__name__
- labels = get_model_labels(model_name, userLanguage)
- model_label = get_model_label(model_name, userLanguage)
+ labels = getModelLabels(model_name, userLanguage)
+ model_label = getModelLabel(model_name, userLanguage)
# Pydantic v2 only
fields = modelClass.model_fields
diff --git a/modules/shared/auditLogger.py b/modules/shared/auditLogger.py
index dab32fa9..3155f681 100644
--- a/modules/shared/auditLogger.py
+++ b/modules/shared/auditLogger.py
@@ -19,42 +19,42 @@ class DailyRotatingFileHandler(RotatingFileHandler):
The log file name includes the current date and switches at midnight.
"""
- def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs):
- self.log_dir = log_dir
- self.filename_prefix = filename_prefix
- self.current_date = None
- self.current_file = None
+ def __init__(self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs):
+ self.logDir = logDir
+ self.filenamePrefix = filenamePrefix
+ self.currentDate = None
+ self.currentFile = None
# Initialize with today's file
- self._update_file_if_needed()
+ self._updateFileIfNeeded()
# Call parent constructor with current file
- super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs)
+ super().__init__(self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs)
- def _update_file_if_needed(self):
+ def _updateFileIfNeeded(self):
"""Update the log file if the date has changed"""
today = datetime.now().strftime("%Y%m%d")
- if self.current_date != today:
- self.current_date = today
- new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
+ if self.currentDate != today:
+ self.currentDate = today
+ newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
- if self.current_file != new_file:
- self.current_file = new_file
+ if self.currentFile != newFile:
+ self.currentFile = newFile
return True
return False
def emit(self, record):
"""Emit a log record, switching files if date has changed"""
# Check if we need to switch to a new file
- if self._update_file_if_needed():
+ if self._updateFileIfNeeded():
# Close current file and open new one
if self.stream:
self.stream.close()
self.stream = None
# Update the baseFilename for the parent class
- self.baseFilename = self.current_file
+ self.baseFilename = self.currentFile
# Reopen the stream
if not self.delay:
self.stream = self._open()
@@ -68,9 +68,9 @@ class AuditLogger:
def __init__(self):
self.logger = None
- self._setup_audit_logger()
+ self._setupAuditLogger()
- def _setup_audit_logger(self):
+ def _setupAuditLogger(self):
"""Setup the audit logger with daily file rotation"""
try:
# Get log directory from config
@@ -96,10 +96,10 @@ class AuditLogger:
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
fileHandler = DailyRotatingFileHandler(
- log_dir=logDir,
- filename_prefix="log_audit",
- max_bytes=rotationSize,
- backup_count=backupCount
+ logDir=logDir,
+ filenamePrefix="log_audit",
+ maxBytes=rotationSize,
+ backupCount=backupCount
)
# Create formatter for audit log
@@ -120,9 +120,9 @@ class AuditLogger:
self.logger = logging.getLogger(__name__)
self.logger.error(f"Failed to setup audit logger: {str(e)}")
- def log_event(self,
- user_id: str,
- mandate_id: str,
+ def logEvent(self,
+ userId: str,
+ mandateId: str,
category: str,
action: str,
details: str = "",
@@ -131,8 +131,8 @@ class AuditLogger:
Log an audit event
Args:
- user_id: User identifier
- mandate_id: Mandate identifier (can be empty if not applicable)
+ userId: User identifier
+ mandateId: Mandate identifier (can be empty if not applicable)
category: Event category (e.g., 'key', 'access', 'data')
action: Specific action (e.g., 'decode', 'login', 'logout')
details: Additional details about the event
@@ -148,50 +148,50 @@ class AuditLogger:
# Format the audit log entry
# Format: timestamp | userid | mandateid | category | action | details
- audit_entry = f"{user_id} | {mandate_id} | {category} | {action} | {details}"
+ auditEntry = f"{userId} | {mandateId} | {category} | {action} | {details}"
# Log the event
- self.logger.info(audit_entry)
+ self.logger.info(auditEntry)
except Exception as e:
# Use standard logger as fallback
logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}")
- def log_key_access(self, user_id: str, mandate_id: str, key_name: str, action: str) -> None:
+ def logKeyAccess(self, userId: str, mandateId: str, keyName: str, action: str) -> None:
"""Log key access events (decode/encode)"""
- self.log_event(
- user_id=user_id,
- mandate_id=mandate_id,
+ self.logEvent(
+ userId=userId,
+ mandateId=mandateId,
category="key",
action=action,
- details=key_name
+ details=keyName
)
- def log_user_access(self, user_id: str, mandate_id: str, action: str, success_info: str = "") -> None:
+ def logUserAccess(self, userId: str, mandateId: str, action: str, successInfo: str = "") -> None:
"""Log user access events (login/logout)"""
- self.log_event(
- user_id=user_id,
- mandate_id=mandate_id,
+ self.logEvent(
+ userId=userId,
+ mandateId=mandateId,
category="access",
action=action,
- details=success_info
+ details=successInfo
)
- def log_data_access(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
+ def logDataAccess(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
"""Log data access events"""
- self.log_event(
- user_id=user_id,
- mandate_id=mandate_id,
+ self.logEvent(
+ userId=userId,
+ mandateId=mandateId,
category="data",
action=action,
details=details
)
- def log_security_event(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
+ def logSecurityEvent(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
"""Log security-related events"""
- self.log_event(
- user_id=user_id,
- mandate_id=mandate_id,
+ self.logEvent(
+ userId=userId,
+ mandateId=mandateId,
category="security",
action=action,
details=details
diff --git a/modules/shared/configuration.py b/modules/shared/configuration.py
index c8b77773..19f01a5c 100644
--- a/modules/shared/configuration.py
+++ b/modules/shared/configuration.py
@@ -199,10 +199,10 @@ class Configuration:
# Log audit event for secret key access
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_key_access(
- user_id=user_id,
- mandate_id="system",
- key_name=key,
+ audit_logger.logKeyAccess(
+ userId=user_id,
+ mandateId="system",
+ keyName=key,
action="decode"
)
except Exception:
@@ -211,9 +211,9 @@ class Configuration:
if value.startswith("{") and value.endswith("}"):
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
- return handleSecretJson(value, user_id, key)
+ return handleSecretJson(value, userId=user_id, keyName=key)
else:
- return handleSecretText(value, user_id, key)
+ return handleSecretText(value, userId=user_id, keyName=key)
return value
return default
@@ -235,31 +235,31 @@ class Configuration:
"""Set a configuration value (for testing/overrides)"""
self._data[key] = value
-def handleSecretText(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
+def handleSecretText(value: str, userId: str = "system", keyName: str = "unknown") -> str:
"""
Handle secret values with encryption/decryption support.
Args:
value: The secret value to handle (may be encrypted)
- user_id: The user ID making the request (default: "system")
- key_name: The name of the key being decrypted (default: "unknown")
+ userId: The user ID making the request (default: "system")
+ keyName: The name of the key being decrypted (default: "unknown")
Returns:
str: Processed secret value (decrypted if encrypted)
"""
- if _is_encrypted_value(value):
- return decrypt_value(value, user_id, key_name)
+ if _isEncryptedValue(value):
+ return decryptValue(value, userId, keyName)
return value
-def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
+def handleSecretJson(value: str, userId: str = "system", keyName: str = "unknown") -> str:
"""
Handle JSON secret values (like Google service account keys) with encryption/decryption support.
Validates that the value is valid JSON after decryption.
Args:
value: The JSON secret value to handle (may be encrypted)
- user_id: The user ID making the request (default: "system")
- key_name: The name of the key being decrypted (default: "unknown")
+ userId: The user ID making the request (default: "system")
+ keyName: The name of the key being decrypted (default: "unknown")
Returns:
str: Processed JSON secret value (decrypted if encrypted)
@@ -268,15 +268,15 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
ValueError: If the value is not valid JSON after decryption
"""
# Decrypt if encrypted
- if _is_encrypted_value(value):
- decrypted_value = decrypt_value(value, user_id, key_name)
+ if _isEncryptedValue(value):
+ decryptedValue = decryptValue(value, userId, keyName)
else:
- decrypted_value = value
+ decryptedValue = value
try:
# Validate that it's valid JSON
- json.loads(decrypted_value)
- return decrypted_value
+ json.loads(decryptedValue)
+ return decryptedValue
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in secret value: {e}")
@@ -284,12 +284,12 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
# Structure: {user_id: {key_name: [timestamps]}}
_decryption_attempts = {}
-def _get_master_key(env_type: str = None) -> bytes:
+def _getMasterKey(envType: str = None) -> bytes:
"""
Get the master key for the specified environment.
Args:
- env_type: The environment type (dev, int, prod, etc.). If None, uses current config.
+ envType: The environment type (dev, int, prod, etc.). If None, uses current config.
Returns:
bytes: The master key for encryption/decryption
@@ -298,24 +298,24 @@ def _get_master_key(env_type: str = None) -> bytes:
ValueError: If no master key is found
"""
# Get the key location from config
- key_location = APP_CONFIG.get('APP_KEY_SYSVAR')
- if env_type is None:
- env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
+ keyLocation = APP_CONFIG.get('APP_KEY_SYSVAR')
+ if envType is None:
+ envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
- if not key_location:
+ if not keyLocation:
raise ValueError("APP_KEY_SYSVAR not configured")
# First try to get from environment variable
- master_key = os.environ.get(key_location)
+ masterKey = os.environ.get(keyLocation)
- if master_key:
+ if masterKey:
# If found in environment, use it directly
- return master_key.encode('utf-8')
+ return masterKey.encode('utf-8')
# If not in environment, try to read from file
- if os.path.exists(key_location):
+ if os.path.exists(keyLocation):
try:
- with open(key_location, 'r') as f:
+ with open(keyLocation, 'r') as f:
content = f.read().strip()
# Parse the key file format: env = key
@@ -326,26 +326,26 @@ def _get_master_key(env_type: str = None) -> bytes:
continue
if '=' in line:
- key_env, key_value = line.split('=', 1)
- key_env = key_env.strip()
- key_value = key_value.strip()
+ keyEnv, keyValue = line.split('=', 1)
+ keyEnv = keyEnv.strip()
+ keyValue = keyValue.strip()
- if key_env == env_type:
- return key_value.encode('utf-8')
+ if keyEnv == envType:
+ return keyValue.encode('utf-8')
- raise ValueError(f"No key found for environment '{env_type}' in {key_location}")
+ raise ValueError(f"No key found for environment '{envType}' in {keyLocation}")
except Exception as e:
- raise ValueError(f"Error reading key file {key_location}: {e}")
+ raise ValueError(f"Error reading key file {keyLocation}: {e}")
- raise ValueError(f"Master key not found. Checked environment variable '{key_location}' and file path")
+ raise ValueError(f"Master key not found. Checked environment variable '{keyLocation}' and file path")
-def _derive_encryption_key(master_key: bytes) -> bytes:
+def _deriveEncryptionKey(masterKey: bytes) -> bytes:
"""
Derive a 32-byte encryption key from the master key using PBKDF2.
Args:
- master_key: The master key bytes
+ masterKey: The master key bytes
Returns:
bytes: 32-byte derived key suitable for Fernet
@@ -360,9 +360,9 @@ def _derive_encryption_key(master_key: bytes) -> bytes:
iterations=100000,
)
- return base64.urlsafe_b64encode(kdf.derive(master_key))
+ return base64.urlsafe_b64encode(kdf.derive(masterKey))
-def _is_encrypted_value(value: str) -> bool:
+def _isEncryptedValue(value: str) -> bool:
"""
Check if a value is encrypted (starts with environment-specific prefix).
@@ -382,64 +382,64 @@ def _is_encrypted_value(value: str) -> bool:
value.startswith('TEST_ENC:') or
value.startswith('STAGING_ENC:'))
-def _get_encryption_prefix(env_type: str) -> str:
+def _getEncryptionPrefix(envType: str) -> str:
"""
Get the encryption prefix for the given environment type.
Args:
- env_type: The environment type (dev, int, prod, etc.)
+ envType: The environment type (dev, int, prod, etc.)
Returns:
str: The encryption prefix
"""
- return f"{env_type.upper()}_ENC:"
+ return f"{envType.upper()}_ENC:"
-def _check_decryption_rate_limit(user_id: str, key_name: str, max_per_second: int = 10) -> bool:
+def _checkDecryptionRateLimit(userId: str, keyName: str, maxPerSecond: int = 10) -> bool:
"""
Check if decryption is allowed based on rate limiting (max 10 per second per user per key).
Args:
- user_id: The user ID making the request
- key_name: The name of the key being decrypted
- max_per_second: Maximum decryptions per second (default: 10)
+ userId: The user ID making the request
+ keyName: The name of the key being decrypted
+ maxPerSecond: Maximum decryptions per second (default: 10)
Returns:
bool: True if allowed, False if rate limited
"""
- current_time = time.time()
+ currentTime = time.time()
# Initialize tracking for this user if not exists
- if user_id not in _decryption_attempts:
- _decryption_attempts[user_id] = {}
+ if userId not in _decryption_attempts:
+ _decryption_attempts[userId] = {}
# Initialize tracking for this key if not exists
- if key_name not in _decryption_attempts[user_id]:
- _decryption_attempts[user_id][key_name] = []
+ if keyName not in _decryption_attempts[userId]:
+ _decryption_attempts[userId][keyName] = []
# Clean old attempts (older than 1 second)
- _decryption_attempts[user_id][key_name] = [
- timestamp for timestamp in _decryption_attempts[user_id][key_name]
- if current_time - timestamp < 1.0
+ _decryption_attempts[userId][keyName] = [
+ timestamp for timestamp in _decryption_attempts[userId][keyName]
+ if currentTime - timestamp < 1.0
]
# Check if we're within rate limit
- if len(_decryption_attempts[user_id][key_name]) >= max_per_second:
- logger.warning(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' ({max_per_second}/sec)")
+ if len(_decryption_attempts[userId][keyName]) >= maxPerSecond:
+ logger.warning(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' ({maxPerSecond}/sec)")
return False
# Record this attempt
- _decryption_attempts[user_id][key_name].append(current_time)
+ _decryption_attempts[userId][keyName].append(currentTime)
return True
-def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key_name: str = "unknown") -> str:
+def encryptValue(value: str, envType: str = None, userId: str = "system", keyName: str = "unknown") -> str:
"""
Encrypt a value using the master key for the specified environment.
Args:
value: The plain text value to encrypt
- env_type: The environment type (dev, int, prod). If None, uses current environment.
- user_id: The user ID making the request (default: "system")
- key_name: The name of the key being encrypted (default: "unknown")
+ envType: The environment type (dev, int, prod). If None, uses current environment.
+ userId: The user ID making the request (default: "system")
+ keyName: The name of the key being encrypted (default: "unknown")
Returns:
str: The encrypted value with prefix
@@ -447,48 +447,48 @@ def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key
Raises:
ValueError: If encryption fails
"""
- if env_type is None:
- env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
+ if envType is None:
+ envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
try:
- master_key = _get_master_key(env_type)
- derived_key = _derive_encryption_key(master_key)
- fernet = Fernet(derived_key)
+ masterKey = _getMasterKey(envType)
+ derivedKey = _deriveEncryptionKey(masterKey)
+ fernet = Fernet(derivedKey)
# Encrypt the value
- encrypted_bytes = fernet.encrypt(value.encode('utf-8'))
- encrypted_b64 = base64.urlsafe_b64encode(encrypted_bytes).decode('utf-8')
+ encryptedBytes = fernet.encrypt(value.encode('utf-8'))
+ encryptedB64 = base64.urlsafe_b64encode(encryptedBytes).decode('utf-8')
# Add environment prefix
- prefix = _get_encryption_prefix(env_type)
- encrypted_value = f"{prefix}{encrypted_b64}"
+ prefix = _getEncryptionPrefix(envType)
+ encryptedValue = f"{prefix}{encryptedB64}"
# Log audit event for encryption
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_key_access(
- user_id=user_id,
- mandate_id="system",
- key_name=key_name,
+ audit_logger.logKeyAccess(
+ userId=userId,
+ mandateId="system",
+ keyName=keyName,
action="encrypt"
)
except Exception:
# Don't fail if audit logging fails
pass
- return encrypted_value
+ return encryptedValue
except Exception as e:
raise ValueError(f"Encryption failed: {e}")
-def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = "unknown") -> str:
+def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "unknown") -> str:
"""
Decrypt a value using the master key for the current environment.
Args:
- encrypted_value: The encrypted value with prefix
- user_id: The user ID making the request (default: "system")
- key_name: The name of the key being decrypted (default: "unknown")
+ encryptedValue: The encrypted value with prefix
+ userId: The user ID making the request (default: "system")
+ keyName: The name of the key being decrypted (default: "unknown")
Returns:
str: The decrypted plain text value
@@ -496,59 +496,59 @@ def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str =
Raises:
ValueError: If decryption fails
"""
- if not _is_encrypted_value(encrypted_value):
- return encrypted_value # Return as-is if not encrypted
+ if not _isEncryptedValue(encryptedValue):
+ return encryptedValue # Return as-is if not encrypted
# Check rate limiting (10 per second per user per key)
- if not _check_decryption_rate_limit(user_id, key_name, max_per_second=10):
- raise ValueError(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' (10/sec)")
+ if not _checkDecryptionRateLimit(userId, keyName, maxPerSecond=10):
+ raise ValueError(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' (10/sec)")
try:
# Extract environment type from prefix
- if encrypted_value.startswith('DEV_ENC:'):
- env_type = 'dev'
+ if encryptedValue.startswith('DEV_ENC:'):
+ envType = 'dev'
prefix = 'DEV_ENC:'
- elif encrypted_value.startswith('INT_ENC:'):
- env_type = 'int'
+ elif encryptedValue.startswith('INT_ENC:'):
+ envType = 'int'
prefix = 'INT_ENC:'
- elif encrypted_value.startswith('PROD_ENC:'):
- env_type = 'prod'
+ elif encryptedValue.startswith('PROD_ENC:'):
+ envType = 'prod'
prefix = 'PROD_ENC:'
- elif encrypted_value.startswith('TEST_ENC:'):
- env_type = 'test'
+ elif encryptedValue.startswith('TEST_ENC:'):
+ envType = 'test'
prefix = 'TEST_ENC:'
- elif encrypted_value.startswith('STAGING_ENC:'):
- env_type = 'staging'
+ elif encryptedValue.startswith('STAGING_ENC:'):
+ envType = 'staging'
prefix = 'STAGING_ENC:'
else:
raise ValueError(f"Invalid encryption prefix. Expected DEV_ENC:, INT_ENC:, PROD_ENC:, TEST_ENC:, or STAGING_ENC:")
- encrypted_part = encrypted_value[len(prefix):]
+ encryptedPart = encryptedValue[len(prefix):]
# Get master key for the specific environment and derive encryption key
- master_key = _get_master_key(env_type)
- derived_key = _derive_encryption_key(master_key)
- fernet = Fernet(derived_key)
+ masterKey = _getMasterKey(envType)
+ derivedKey = _deriveEncryptionKey(masterKey)
+ fernet = Fernet(derivedKey)
# Decode and decrypt
- encrypted_bytes = base64.urlsafe_b64decode(encrypted_part.encode('utf-8'))
- decrypted_bytes = fernet.decrypt(encrypted_bytes)
- decrypted_value = decrypted_bytes.decode('utf-8')
+ encryptedBytes = base64.urlsafe_b64decode(encryptedPart.encode('utf-8'))
+ decryptedBytes = fernet.decrypt(encryptedBytes)
+ decryptedValue = decryptedBytes.decode('utf-8')
# Log audit event for decryption
try:
from modules.shared.auditLogger import audit_logger
- audit_logger.log_key_access(
- user_id=user_id,
- mandate_id="system",
- key_name=key_name,
+ audit_logger.logKeyAccess(
+ userId=userId,
+ mandateId="system",
+ keyName=keyName,
action="decrypt"
)
except Exception:
# Don't fail if audit logging fails
pass
- return decrypted_value
+ return decryptedValue
except Exception as e:
raise ValueError(f"Decryption failed: {e}")
diff --git a/modules/shared/debugLogger.py b/modules/shared/debugLogger.py
index a4fd0032..69e2f39c 100644
--- a/modules/shared/debugLogger.py
+++ b/modules/shared/debugLogger.py
@@ -120,8 +120,8 @@ def debugLogToFile(message: str, context: str = "DEBUG") -> None:
debug_file = os.path.join(debug_dir, "debug_workflow.log")
# Format the debug entry
- from modules.shared.timezoneUtils import get_utc_timestamp
- timestamp = get_utc_timestamp()
+ from modules.shared.timezoneUtils import getUtcTimestamp
+ timestamp = getUtcTimestamp()
debug_entry = f"[{timestamp}] [{context}] {message}\n"
# Write to debug file
diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py
index 12b044f1..71b807b4 100644
--- a/modules/shared/jsonUtils.py
+++ b/modules/shared/jsonUtils.py
@@ -102,7 +102,7 @@ def parseJsonOrRaise(text: Union[str, bytes]) -> Union[Dict, List]:
return obj
-def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
+def mergeRootLists(jsonParts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
"""
Generic merger for root-level lists: take first dict as base; for each subsequent part:
- if value is list and same key exists as list, extend it
@@ -112,7 +112,7 @@ def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
"""
base: Optional[Dict[str, Any]] = None
parsed: List[Dict[str, Any]] = []
- for part in json_parts:
+ for part in jsonParts:
if isinstance(part, (dict, list)):
obj = part
else:
@@ -146,61 +146,61 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
# Strategy 1: Try to extract sections from the entire text first
# This handles cases where the JSON structure is broken but content is intact
- extracted_sections = _extractSectionsRegex(text)
- if extracted_sections:
- logger.info(f"Extracted {len(extracted_sections)} sections using regex")
+ extractedSections = _extractSectionsRegex(text)
+ if extractedSections:
+ logger.info(f"Extracted {len(extractedSections)} sections using regex")
return {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
- "documents": [{"sections": extracted_sections}]
+ "documents": [{"sections": extractedSections}]
}
# Strategy 2: Progressive parsing - try to find longest valid prefix
- best_result = None
- best_valid_length = 0
+ bestResult = None
+ bestValidLength = 0
# Try different step sizes to find the best valid JSON
- for step_size in [100, 50, 10, 1]:
- for i in range(len(text), 0, -step_size):
- test_str = text[:i]
- closed_str = _closeJsonStructures(test_str)
- obj, err, _ = tryParseJson(closed_str)
+ for stepSize in [100, 50, 10, 1]:
+ for i in range(len(text), 0, -stepSize):
+ testStr = text[:i]
+ closedStr = _closeJsonStructures(testStr)
+ obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict):
- best_result = obj
- best_valid_length = i
- logger.debug(f"Progressive parsing success at length {i} (step: {step_size})")
+ bestResult = obj
+ bestValidLength = i
+ logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
break
- if best_result:
+ if bestResult:
break
- if best_result:
- logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})")
+ if bestResult:
+ logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
# Check if we have sections in the result
- sections = extractSectionsFromDocument(best_result)
+ sections = extractSectionsFromDocument(bestResult)
if sections:
logger.info(f"Progressive parsing found {len(sections)} sections")
- return best_result
+ return bestResult
else:
# No sections found in progressive parsing, try to extract from broken part
logger.info("Progressive parsing found no sections, trying to extract from broken part")
- extracted_sections = _extractSectionsRegex(text[best_valid_length:])
- if extracted_sections:
- logger.info(f"Extracted {len(extracted_sections)} sections from broken part")
+ extractedSections = _extractSectionsRegex(text[bestValidLength:])
+ if extractedSections:
+ logger.info(f"Extracted {len(extractedSections)} sections from broken part")
# Merge with the valid part
- if "documents" not in best_result:
- best_result["documents"] = []
- if not best_result["documents"]:
- best_result["documents"] = [{"sections": []}]
- best_result["documents"][0]["sections"].extend(extracted_sections)
- return best_result
+ if "documents" not in bestResult:
+ bestResult["documents"] = []
+ if not bestResult["documents"]:
+ bestResult["documents"] = [{"sections": []}]
+ bestResult["documents"][0]["sections"].extend(extractedSections)
+ return bestResult
# Strategy 3: Structure closing - close incomplete structures
- closed_str = _closeJsonStructures(text)
- obj, err, _ = tryParseJson(closed_str)
+ closedStr = _closeJsonStructures(text)
+ obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict):
logger.info("Repaired JSON using structure closing")
return obj
@@ -217,16 +217,16 @@ def _closeJsonStructures(text: str) -> str:
return text
# Count open/close brackets and braces
- open_braces = text.count('{')
- close_braces = text.count('}')
- open_brackets = text.count('[')
- close_brackets = text.count(']')
+ openBraces = text.count('{')
+ closeBraces = text.count('}')
+ openBrackets = text.count('[')
+ closeBrackets = text.count(']')
# Close incomplete structures
result = text
- for _ in range(open_braces - close_braces):
+ for _ in range(openBraces - closeBraces):
result += '}'
- for _ in range(open_brackets - close_brackets):
+ for _ in range(openBrackets - closeBrackets):
result += ']'
return result
@@ -242,32 +242,32 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
sections = []
# Pattern to find section objects
- section_pattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
+ sectionPattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
- for match in re.finditer(section_pattern, text, re.IGNORECASE):
- section_id = match.group(1)
- content_type = match.group(2)
+ for match in re.finditer(sectionPattern, text, re.IGNORECASE):
+ sectionId = match.group(1)
+ contentType = match.group(2)
order = int(match.group(3))
# Try to extract elements array - look for the elements array after this section
- elements_match = re.search(
+ elementsMatch = re.search(
r'"elements"\s*:\s*\[(.*?)\]',
text[match.end():match.end()+5000] # Look ahead for elements (large range)
)
elements = []
- if elements_match:
+ if elementsMatch:
try:
- elements_str = '[' + elements_match.group(1) + ']'
- elements = json.loads(elements_str)
+ elementsStr = '[' + elementsMatch.group(1) + ']'
+ elements = json.loads(elementsStr)
except:
# If JSON parsing fails, try to extract individual items manually
- elements_text = elements_match.group(1)
- elements = _extractElementsFromText(elements_text, content_type)
+ elementsText = elementsMatch.group(1)
+ elements = _extractElementsFromText(elementsText, contentType)
sections.append({
- "id": section_id,
- "content_type": content_type,
+ "id": sectionId,
+ "content_type": contentType,
"elements": elements,
"order": order
})
@@ -279,7 +279,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
return sections
-def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict[str, Any]]:
+def _extractElementsFromText(elementsText: str, contentType: str) -> List[Dict[str, Any]]:
"""
Extract elements from text when JSON parsing fails.
Generic approach that works for any content type.
@@ -290,11 +290,11 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
elements = []
- if content_type == "list":
+ if contentType == "list":
# Look for {"text": "..."} patterns, including incomplete ones
- text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
+ text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
# Also look for incomplete patterns like {"text": "36
- incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+ incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
# Combine both complete and incomplete items
all_items = text_items + incomplete_items
@@ -303,41 +303,41 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
- unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+ unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"text": item} for item in unique_items]
- elif content_type == "paragraph":
+ elif contentType == "paragraph":
# Look for {"text": "..."} patterns, including incomplete ones
- text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
- incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+ text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
+ incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = text_items + incomplete_items
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
- unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+ unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"text": item} for item in unique_items]
- elif content_type == "heading":
+ elif contentType == "heading":
# Look for {"level": X, "text": "..."} patterns, including incomplete ones
- heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elements_text)
- incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+ heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elementsText)
+ incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = heading_items + incomplete_heading_items
unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()]))
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
- unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+ unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"level": level, "text": text} for level, text in unique_items]
- elif content_type == "table":
+ elif contentType == "table":
# Look for table patterns
- table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elements_text)
+ table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elementsText)
for headers_str, rows_str, caption in table_items:
# Extract headers
headers = re.findall(r'"([^"]+)"', headers_str)
@@ -354,31 +354,31 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
"caption": caption
})
- elif content_type == "code":
+ elif contentType == "code":
# Look for {"code": "...", "language": "..."} patterns, including incomplete ones
- code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elements_text)
- incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+ code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elementsText)
+ incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = code_items + [(code, "unknown") for code in incomplete_code_items]
unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()]))
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
- unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+ unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"code": code, "language": lang} for code, lang in unique_items]
else:
# Generic fallback - look for any text content, including incomplete
- text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elements_text)
- incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
+ text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elementsText)
+ incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = text_items + incomplete_text_items
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
- unique_items = _removeLastIncompleteItem(unique_items, elements_text)
+ unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"text": item} for item in unique_items]
diff --git a/modules/shared/timezoneUtils.py b/modules/shared/timezoneUtils.py
index 1656f0f6..4e2141b7 100644
--- a/modules/shared/timezoneUtils.py
+++ b/modules/shared/timezoneUtils.py
@@ -6,7 +6,7 @@ Ensures all timestamps are properly handled as UTC.
from datetime import datetime, timezone
import time
-def get_utc_now() -> datetime:
+def getUtcNow() -> datetime:
"""
Get current time in UTC with timezone info.
@@ -15,7 +15,7 @@ def get_utc_now() -> datetime:
"""
return datetime.now(timezone.utc)
-def get_utc_timestamp() -> float:
+def getUtcTimestamp() -> float:
"""
Get current UTC timestamp (seconds since epoch with millisecond precision).
@@ -24,14 +24,14 @@ def get_utc_timestamp() -> float:
"""
return time.time()
-def create_expiration_timestamp(expires_in_seconds: int) -> float:
+def createExpirationTimestamp(expiresInSeconds: int) -> float:
"""
Create a new expiration timestamp from seconds until expiration.
Args:
- expires_in_seconds (int): Seconds until expiration
+ expiresInSeconds (int): Seconds until expiration
Returns:
float: UTC timestamp in seconds
"""
- return get_utc_timestamp() + expires_in_seconds
+ return getUtcTimestamp() + expiresInSeconds
\ No newline at end of file
diff --git a/modules/workflows/processing/adaptive/adaptiveLearningEngine.py b/modules/workflows/processing/adaptive/adaptiveLearningEngine.py
index 107821e7..5573e93b 100644
--- a/modules/workflows/processing/adaptive/adaptiveLearningEngine.py
+++ b/modules/workflows/processing/adaptive/adaptiveLearningEngine.py
@@ -22,13 +22,11 @@ class AdaptiveLearningEngine:
workflowId: str, attemptNumber: int):
"""Record validation result and learn from it"""
try:
- actionType = actionContext.get('actionType', 'unknown')
actionName = actionContext.get('actionName', 'unknown')
# Store validation history
validationEntry = {
'workflowId': workflowId,
- 'actionType': actionType,
'actionName': actionName,
'attemptNumber': attemptNumber,
'validationResult': validationResult,
@@ -42,17 +40,17 @@ class AdaptiveLearningEngine:
# Track patterns
if validationResult.get('overallSuccess', False):
- self.successPatterns[actionType].append(validationEntry)
+ self.successPatterns[actionName].append(validationEntry)
else:
- self.failurePatterns[actionType].append(validationEntry)
+ self.failurePatterns[actionName].append(validationEntry)
# Update attempt count
- self.actionAttempts[f"{workflowId}:{actionType}"] += 1
+ self.actionAttempts[f"{workflowId}:{actionName}"] += 1
# Generate learning insights
- self._generateLearningInsights(workflowId, actionType)
+ self._generateLearningInsights(workflowId, actionName)
- logger.info(f"Recorded validation for {actionType} (attempt {attemptNumber}): "
+ logger.info(f"Recorded validation for {actionName} (attempt {attemptNumber}): "
f"Success={validationResult.get('overallSuccess', False)}, "
f"Quality={validationResult.get('qualityScore', 0.0)}")
@@ -86,21 +84,21 @@ class AdaptiveLearningEngine:
logger.error(f"Error generating adaptive context: {str(e)}")
return {}
- def getAdaptiveContextForParameters(self, workflowId: str, actionType: str,
+ def getAdaptiveContextForParameters(self, workflowId: str, actionName: str,
parametersContext: str) -> Dict[str, Any]:
"""Generate adaptive context for parameter selection prompt"""
try:
- # Get validation history for this specific action type
+ # Get validation history for this specific action name
actionValidations = [
v for v in self.validationHistory
- if v['workflowId'] == workflowId and v['actionType'] == actionType
+ if v['workflowId'] == workflowId and v['actionName'] == actionName
][-3:] # Last 3 attempts for this action
# Analyze what went wrong in previous attempts
failureAnalysis = self._analyzeParameterFailures(actionValidations)
# Generate specific parameter guidance
- parameterGuidance = self._generateParameterGuidance(actionType, parametersContext, failureAnalysis)
+ parameterGuidance = self._generateParameterGuidance(actionName, parametersContext, failureAnalysis)
return {
'actionValidations': actionValidations,
@@ -206,36 +204,28 @@ class AdaptiveLearningEngine:
return "\n".join(guidance_parts) if guidance_parts else "No specific guidance available."
- def _generateParameterGuidance(self, actionType: str, parametersContext: str,
+ def _generateParameterGuidance(self, actionName: str, parametersContext: str,
failureAnalysis: Dict[str, Any]) -> str:
- """Generate specific parameter guidance based on previous failures"""
+ """Generate generic parameter guidance based on previous failures (no app-specific logic)."""
if not failureAnalysis.get('hasFailures', False):
return "No previous parameter failures. Use standard parameter values."
- guidance_parts = []
+ guidanceParts = []
- # Add attempt awareness
+ # Attempt awareness
attemptNumber = failureAnalysis.get('attemptNumber', 1)
- if attemptNumber >= 3:
- guidance_parts.append(f"ATTEMPT #{attemptNumber}: Previous attempts failed. Adjust parameters based on validation feedback.")
+ if attemptNumber and attemptNumber >= 3:
+ guidanceParts.append(f"Attempt #{attemptNumber}: Adjust parameters based on validation feedback.")
- # Add specific parameter guidance based on action type
- if actionType == "outlook.composeAndSendEmailWithContext":
- guidance_parts.append("EMAIL PARAMETER GUIDANCE:")
- guidance_parts.append("- context: Be very specific about account (valueon), appointment time (Friday), and requirements")
- guidance_parts.append("- emailStyle: Use 'formal' for business emails")
- guidance_parts.append("- maxLength: Set to 2000+ for detailed emails with summaries")
-
- # Add specific guidance based on common failures
- commonIssues = failureAnalysis.get('commonIssues', {})
- if any("account" in str(issue).lower() for issue in commonIssues.keys()):
- guidance_parts.append("- context: MUST specify 'from valueon account' explicitly")
- if any("attachment" in str(issue).lower() for issue in commonIssues.keys()):
- guidance_parts.append("- documentList: Ensure PDF is properly referenced")
- if any("summary" in str(issue).lower() for issue in commonIssues.keys()):
- guidance_parts.append("- context: MUST request '10-12 sentence German summary' explicitly")
+ # Generic issues summary
+ commonIssues = failureAnalysis.get('commonIssues', {}) or {}
+ if commonIssues:
+ guidanceParts.append("Address the following parameter issues:")
+ for issueKey, issueDesc in commonIssues.items():
+ guidanceParts.append(f"- {issueKey}: {issueDesc}")
- return "\n".join(guidance_parts) if guidance_parts else "Use standard parameter values."
+ # Keep guidance format stable
+ return "\n".join(guidanceParts) if guidanceParts else "Use standard parameter values."
def _getEscalationLevel(self, workflowId: str) -> str:
"""Determine escalation level based on failure patterns"""
@@ -251,7 +241,7 @@ class AdaptiveLearningEngine:
else:
return "low"
- def _generateLearningInsights(self, workflowId: str, actionType: str):
+ def _generateLearningInsights(self, workflowId: str, actionName: str):
"""Generate learning insights for a workflow"""
if workflowId not in self.learningInsights:
self.learningInsights[workflowId] = {}
@@ -263,7 +253,7 @@ class AdaptiveLearningEngine:
'totalAttempts': len(workflowValidations),
'successfulAttempts': len([v for v in workflowValidations if v['success']]),
'failedAttempts': len([v for v in workflowValidations if not v['success']]),
- 'lastActionType': actionType,
+ 'lastActionName': actionName,
'escalationLevel': self._getEscalationLevel(workflowId)
}
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index 3ef531d5..6ac81df8 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -26,14 +26,14 @@ class ContentValidator:
if isinstance(data, dict) and 'content' in data:
content = data['content']
# For large content, check size before converting to string
- if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
+ if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
# For very large content, return a size indicator instead
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
return str(content)
else:
content = data
# For large content, check size before converting to string
- if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
+ if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
return str(content)
return ""
diff --git a/modules/workflows/processing/adaptive/intentAnalyzer.py b/modules/workflows/processing/adaptive/intentAnalyzer.py
index 324549b3..2dd99a94 100644
--- a/modules/workflows/processing/adaptive/intentAnalyzer.py
+++ b/modules/workflows/processing/adaptive/intentAnalyzer.py
@@ -30,7 +30,7 @@ class IntentAnalyzer:
analysisPrompt = f"""
You are an intent analyzer. Analyze the user's request to understand what they want delivered.
-USER REQUEST: {self.services.ai.sanitizePromptContent(userPrompt, 'userinput')}
+USER REQUEST: {self.services.utils.sanitizePromptContent(userPrompt, 'userinput')}
CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}
diff --git a/modules/workflows/processing/modes/modeActionplan.py b/modules/workflows/processing/modes/modeActionplan.py
index 9a54c43e..06a8ae4b 100644
--- a/modules/workflows/processing/modes/modeActionplan.py
+++ b/modules/workflows/processing/modes/modeActionplan.py
@@ -571,7 +571,7 @@ class ActionplanMode(BaseMode):
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
- simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
+ simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
@@ -715,7 +715,7 @@ class ActionplanMode(BaseMode):
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
- simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
+ simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
diff --git a/modules/workflows/processing/modes/modeReact.py b/modules/workflows/processing/modes/modeReact.py
index 0dc580bf..6aa6505a 100644
--- a/modules/workflows/processing/modes/modeReact.py
+++ b/modules/workflows/processing/modes/modeReact.py
@@ -98,7 +98,12 @@ class ReactMode(BaseMode):
# NEW: Add content validation (against original cleaned user prompt / workflow intent)
if getattr(self, 'workflowIntent', None) and result.documents:
- validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent)
+ # Validate ONLY the produced JSON (structured content), not rendered files
+ from types import SimpleNamespace
+ validationDocs = []
+ if hasattr(result, 'content') and result.content:
+ validationDocs.append(SimpleNamespace(documentName='generated.json', documentData={'content': result.content}))
+ validationResult = await self.contentValidator.validateContent(validationDocs, self.workflowIntent)
observation['contentValidation'] = validationResult
quality_score = validationResult.get('qualityScore', 0.0)
if quality_score is None:
@@ -106,9 +111,9 @@ class ReactMode(BaseMode):
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})")
# NEW: Record validation result for adaptive learning
+ actionValue = selection.get('action', 'unknown')
actionContext = {
- 'actionType': selection.get('action', {}).get('action', 'unknown'),
- 'actionName': selection.get('action', {}).get('action', 'unknown'),
+ 'actionName': actionValue,
'workflowId': context.workflow_id
}
@@ -747,7 +752,7 @@ Return only the user-friendly message, no technical details."""
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
- simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
+ simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
@@ -838,7 +843,7 @@ Return only the user-friendly message, no technical details."""
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
- simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
+ simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py
index 9bfc6070..9996b455 100644
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@@ -215,7 +215,7 @@ class WorkflowManager:
" }\n"
" ]\n"
"}\n\n"
- f"User message:\n{self.services.ai.sanitizePromptContent(userInput.prompt, 'userinput')}"
+ f"User message:\n{self.services.utils.sanitizePromptContent(userInput.prompt, 'userinput')}"
)
# Call AI analyzer (planning call - will use static parameters)
diff --git a/naming_violations_report.csv b/naming_violations_report.csv
new file mode 100644
index 00000000..a80259f6
--- /dev/null
+++ b/naming_violations_report.csv
@@ -0,0 +1,107 @@
+Module,Function Names,Parameter Names,Variable Names,Total
+modules/workflows/methods/methodSharepoint.py,0,2,211,213
+modules/workflows/methods/methodOutlook.py,0,3,131,134
+modules/services/serviceAi/subDocumentProcessing.py,0,0,104,104
+modules/features/syncDelta/mainSyncDelta.py,1,10,88,99
+modules/shared/jsonUtils.py,0,3,88,91
+modules/services/serviceGeneration/renderers/rendererDocx.py,3,8,79,90
+modules/services/serviceWorkflow/mainServiceWorkflow.py,0,3,85,88
+modules/services/serviceGeneration/renderers/rendererPptx.py,2,7,73,82
+modules/services/serviceGeneration/renderers/rendererPdf.py,3,8,50,61
+modules/connectors/connectorVoiceGoogle.py,1,2,52,55
+modules/services/serviceGeneration/renderers/rendererHtml.py,3,6,46,55
+modules/services/serviceGeneration/renderers/rendererBaseTemplate.py,3,21,27,51
+modules/shared/configuration.py,2,17,30,49
+modules/services/serviceExtraction/subMerger.py,2,5,31,38
+modules/connectors/connectorDbPostgre.py,0,14,20,34
+modules/interfaces/interfaceDbAppObjects.py,0,8,26,34
+modules/routes/routeSecurityGoogle.py,0,0,32,32
+modules/shared/attributeUtils.py,3,4,25,32
+modules/interfaces/interfaceDbChatObjects.py,0,4,27,31
+modules/routes/routeSecurityAdmin.py,0,2,28,30
+modules/services/serviceNeutralization/subProcessList.py,7,0,22,29
+modules/services/serviceGeneration/renderers/rendererText.py,3,7,19,29
+modules/routes/routeSecurityMsft.py,0,0,27,27
+modules/services/serviceGeneration/renderers/rendererMarkdown.py,3,7,17,27
+modules/services/serviceGeneration/renderers/rendererXlsx.py,3,0,24,27
+modules/services/serviceGeneration/renderers/rendererImage.py,3,2,21,26
+modules/security/tokenManager.py,4,7,14,25
+modules/workflows/workflowManager.py,0,0,25,25
+modules/services/serviceGeneration/renderers/rendererCsv.py,3,5,17,25
+modules/shared/auditLogger.py,5,16,3,24
+modules/shared/debugLogger.py,0,0,24,24
+modules/workflows/processing/shared/placeholderFactory.py,0,0,24,24
+modules/interfaces/interfaceDbAppAccess.py,0,2,21,23
+modules/connectors/connectorTicketsJira.py,0,0,22,22
+modules/services/serviceGeneration/renderers/registry.py,7,3,12,22
+modules/routes/routeDataConnections.py,1,1,19,21
+modules/security/tokenRefreshService.py,0,2,19,21
+modules/services/serviceExtraction/extractors/extractorPptx.py,0,1,16,17
+modules/routes/routeSecurityLocal.py,0,0,16,16
+modules/workflows/methods/methodBase.py,0,4,12,16
+modules/services/serviceGeneration/mainServiceGeneration.py,0,4,11,15
+modules/services/serviceUtils/mainServiceUtils.py,0,14,1,15
+modules/features/neutralizePlayground/mainNeutralizePlayground.py,8,5,2,15
+modules/interfaces/interfaceTicketObjects.py,0,5,9,14
+modules/services/serviceNeutralization/subParseString.py,7,0,6,13
+modules/workflows/processing/modes/modeReact.py,0,1,11,12
+modules/interfaces/interfaceDbComponentAccess.py,0,2,9,11
+modules/services/serviceAi/subCoreAi.py,0,0,11,11
+modules/services/serviceExtraction/subRegistry.py,0,0,11,11
+modules/services/serviceNeutralization/mainServiceNeutralization.py,0,2,9,11
+modules/interfaces/interfaceAiObjects.py,0,0,10,10
+modules/services/serviceAi/subSharedAiUtils.py,0,3,7,10
+modules/connectors/connectorDbJson.py,0,3,6,9
+modules/workflows/methods/methodAi.py,0,0,9,9
+modules/services/serviceExtraction/subPromptBuilderExtraction.py,0,0,9,9
+modules/services/serviceGeneration/subDocumentUtility.py,0,3,6,9
+modules/services/serviceNeutralization/subProcessCommon.py,7,2,0,9
+modules/services/serviceNeutralization/subProcessText.py,5,0,4,9
+modules/interfaces/interfaceDbChatAccess.py,0,2,6,8
+modules/security/auth.py,0,1,7,8
+modules/aicore/aicorePluginAnthropic.py,0,0,7,7
+modules/security/tokenRefreshMiddleware.py,0,2,4,6
+modules/services/serviceGeneration/renderers/rendererJson.py,3,0,3,6
+analyze_naming_violations.py,5,0,0,5
+modules/aicore/aicorePluginOpenai.py,0,0,5,5
+modules/routes/routeVoiceGoogle.py,0,0,5,5
+modules/shared/eventManagement.py,2,3,0,5
+modules/workflows/processing/adaptive/intentAnalyzer.py,0,0,5,5
+modules/workflows/processing/shared/executionState.py,0,5,0,5
+modules/services/serviceGeneration/subJsonSchema.py,0,0,5,5
+modules/services/serviceNeutralization/subPatterns.py,5,0,0,5
+modules/services/serviceNeutralization/subProcessBinary.py,4,0,1,5
+modules/services/serviceExtraction/extractors/extractorXlsx.py,0,0,5,5
+modules/interfaces/interfaceDbComponentObjects.py,0,3,1,4
+modules/routes/routeDataNeutralization.py,0,0,4,4
+modules/routes/routeWorkflows.py,0,0,4,4
+modules/shared/timezoneUtils.py,3,1,0,4
+modules/workflows/processing/adaptive/contentValidator.py,0,0,4,4
+modules/workflows/processing/core/messageCreator.py,0,0,4,4
+modules/services/serviceSharepoint/mainServiceSharepoint.py,0,0,4,4
+modules/routes/routeDataUsers.py,0,0,3,3
+modules/services/serviceExtraction/subPipeline.py,0,0,3,3
+app.py,0,0,2,2
+modules/datamodels/datamodelChat.py,0,1,1,2
+modules/routes/routeAttributes.py,0,0,2,2
+modules/routes/routeDataPrompts.py,0,0,2,2
+modules/security/csrf.py,0,1,1,2
+modules/security/jwtService.py,0,0,2,2
+modules/workflows/processing/adaptive/learningEngine.py,0,0,2,2
+modules/workflows/processing/modes/modeActionplan.py,0,0,2,2
+modules/workflows/processing/shared/methodDiscovery.py,0,0,2,2
+modules/services/serviceNormalization/mainServiceNormalization.py,0,0,2,2
+modules/services/serviceExtraction/extractors/extractorImage.py,0,0,2,2
+modules/aicore/aicoreBase.py,0,0,1,1
+modules/aicore/aicoreModelSelector.py,0,0,1,1
+modules/connectors/connectorTicketsClickup.py,0,0,1,1
+modules/datamodels/datamodelDocument.py,0,1,0,1
+modules/datamodels/datamodelSecurity.py,0,0,1,1
+modules/routes/routeAdmin.py,0,0,1,1
+modules/routes/routeDataFiles.py,0,0,1,1
+modules/workflows/processing/workflowProcessor.py,0,0,1,1
+modules/workflows/processing/adaptive/adaptiveLearningEngine.py,0,0,1,1
+modules/workflows/processing/core/actionExecutor.py,0,0,1,1
+modules/workflows/processing/core/taskPlanner.py,0,0,1,1
+modules/workflows/processing/modes/modeBase.py,0,0,1,1
+modules/services/serviceAi/subDocumentGeneration.py,0,0,1,1
diff --git a/processDocumentsWithContinuation_usage_analysis.md b/processDocumentsWithContinuation_usage_analysis.md
new file mode 100644
index 00000000..3dedc405
--- /dev/null
+++ b/processDocumentsWithContinuation_usage_analysis.md
@@ -0,0 +1,184 @@
+# Analysis: `processDocumentsWithContinuation` and Subfunctions Usage
+
+## Executive Summary
+
+**FINDING**: The function `processDocumentsWithContinuation` in `subDocumentProcessing.py` is **NOT USED** anywhere in the active codebase. The continuation chain was only referenced by the deleted `subDocumentGeneration.py` module.
+
+---
+
+## Main Function: `processDocumentsWithContinuation`
+
+**Location**: `gateway/modules/services/serviceAi/subDocumentProcessing.py:303`
+
+**Status**: ❌ **NOT USED**
+
+### Usage Search Results
+
+- ❌ No actual code calls to `.processDocumentsWithContinuation(`
+- ⚠️ Only mentioned in documentation files:
+ - `wiki/poweron/appdoc/doc_system_function_relationship_ai.md` (documentation)
+ - `gateway/callAiWithDocumentGeneration_usage_analysis.md` (previous analysis - noted it was called by deleted code)
+
+### Why It's Not Used
+
+The only caller was `subDocumentGeneration._processDocumentsUnified()` which we just deleted. The current active codebase uses `subCoreAi.callAiDocuments()` which has its own continuation logic via `_callAiWithLooping()`.
+
+---
+
+## Function Call Chain Analysis
+
+```
+processDocumentsWithContinuation (line 303) - ❌ NOT USED
+ ├─> _buildContinuationPrompt (line 319, 324) - ❌ ONLY USED HERE
+ └─> _processWithContinuationLoop (line 322, 373) - ❌ ONLY USED HERE
+ ├─> _buildContinuationIterationPrompt (line 393, 459) - ❌ ONLY USED HERE
+ └─> processDocumentsPerChunkJsonWithPrompt (line 402) - ✅ USED ELSEWHERE
+```
+
+---
+
+## Subfunction Analysis
+
+### 1. `_buildContinuationPrompt`
+**Location**: Line 324-371
+**Status**: ✅ **USED** (but only internally)
+**Called by**: `processDocumentsWithContinuation` (line 319)
+**Effectively**: ❌ **UNUSED** (because parent function is unused)
+
+**Internal Usage**:
+- Called from `processDocumentsWithContinuation` at line 319
+
+**Functionality**:
+- Builds a prompt with continuation instructions
+- Adds JSON structure requirements with `"continue": true/false` flag
+- Adds `continuation_context` field specification
+
+**Note**: This uses a different continuation pattern than `SubCoreAi._callAiWithLooping()`:
+- This uses `"continue": true/false + "continuation_context"` for document sections
+- SubCoreAi uses `buildContinuationContext()` with `last_raw_json`
+
+---
+
+### 2. `_processWithContinuationLoop`
+**Location**: Line 373-457
+**Status**: ✅ **USED** (but only internally)
+**Called by**: `processDocumentsWithContinuation` (line 322)
+**Effectively**: ❌ **UNUSED** (because parent function is unused)
+
+**Internal Usage**:
+- Called from `processDocumentsWithContinuation` at line 322
+
+**External Dependencies**:
+- Calls `self._buildContinuationIterationPrompt()` (line 393)
+- Calls `self.processDocumentsPerChunkJsonWithPrompt()` (line 402)
+
+**Functionality**:
+- Implements continuation loop (max 10 iterations)
+- Accumulates sections across iterations
+- Checks `continue` flag and `continuation_context` to determine if more iterations needed
+- Builds final result with accumulated sections
+
+---
+
+### 3. `_buildContinuationIterationPrompt`
+**Location**: Line 459-498
+**Status**: ✅ **USED** (but only internally)
+**Called by**: `_processWithContinuationLoop` (line 393)
+**Effectively**: ❌ **UNUSED** (because parent chain is unused)
+
+**Internal Usage**:
+- Called from `_processWithContinuationLoop` at line 393 (in loop, conditionally)
+
+**Functionality**:
+- Builds a prompt for continuation iteration with context
+- Includes summary of previously generated content (last 3 sections)
+- Includes continuation instructions with last section ID, element index, remaining requirements
+
+---
+
+### 4. `processDocumentsPerChunkJsonWithPrompt`
+**Location**: Line 219-301
+**Status**: ✅ **USED ELSEWHERE**
+**Called by**:
+ - `_processWithContinuationLoop` (line 402)
+ - Also referenced in backup files (not active code)
+
+**Internal Usage**:
+- Called from `_processWithContinuationLoop` at line 402
+
+**External Usage Search**:
+- ✅ Used internally by continuation loop
+- ⚠️ Referenced in `local/backup/backup_mainServiceAi.py.txt` (backup file, not active)
+- ❌ Not used by any other active code
+
+**Functionality**:
+- Processes documents with per-chunk AI calls
+- Uses a custom prompt instead of default extraction prompt
+- Returns merged JSON document
+
+**Note**: This function itself is only used by the continuation loop. However, it's a more general function that could be useful, so it's not "dead code" - it's just currently only used by unused code.
+
+---
+
+## Summary Table
+
+| Function | Line | Status | Called By | Effectively Used? |
+|----------|------|--------|-----------|-------------------|
+| `processDocumentsWithContinuation` | 303 | ❌ Not used | (external) | ❌ No |
+| `_buildContinuationPrompt` | 324 | ✅ Used internally | `processDocumentsWithContinuation:319` | ❌ No |
+| `_processWithContinuationLoop` | 373 | ✅ Used internally | `processDocumentsWithContinuation:322` | ❌ No |
+| `_buildContinuationIterationPrompt` | 459 | ✅ Used internally | `_processWithContinuationLoop:393` | ❌ No |
+| `processDocumentsPerChunkJsonWithPrompt` | 219 | ✅ Used internally | `_processWithContinuationLoop:402` | ⚠️ **ONLY USED BY UNUSED CODE** |
+
+---
+
+## Current Active Implementation
+
+The active continuation logic is in `subCoreAi.callAiDocuments()` → `_callAiWithLooping()`:
+- Uses `buildGenerationPrompt()` with `continuationContext` parameter
+- Uses `buildContinuationContext()` to build context from sections
+- Different continuation pattern (uses `last_raw_json` instead of `continuation_context`)
+
+---
+
+## Dead Code Identification
+
+**Completely Unused Chain** (can be safely removed):
+1. ✅ `processDocumentsWithContinuation` - entry point, not called
+2. ✅ `_buildContinuationPrompt` - only used by #1
+3. ✅ `_processWithContinuationLoop` - only used by #1
+4. ✅ `_buildContinuationIterationPrompt` - only used by #3
+
+**Potentially Unused** (only used by dead code):
+- ⚠️ `processDocumentsPerChunkJsonWithPrompt` - only caller is dead code, but function is general-purpose
+
+---
+
+## Recommendations
+
+1. **Remove Dead Code Chain**: All four functions (`processDocumentsWithContinuation`, `_buildContinuationPrompt`, `_processWithContinuationLoop`, `_buildContinuationIterationPrompt`) can be safely removed.
+
+2. **For `processDocumentsPerChunkJsonWithPrompt`**:
+ - **Option A**: Remove if not needed (it's only used by the dead continuation chain)
+ - **Option B**: Keep if it might be useful for future custom prompt processing
+ - **Recommendation**: Since it's a general-purpose function that could be useful, keep it but note that it's currently unused.
+
+3. **If Keeping**: Document why this continuation logic exists but is unused, or mark as deprecated/legacy alternative to `_callAiWithLooping()`.
+
+---
+
+## Verification Commands
+
+To verify these findings:
+
+```bash
+# Search for actual function calls (should return no results for the main function)
+grep -r "\.processDocumentsWithContinuation(" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup
+
+# Search for _buildContinuationPrompt usage (should only find the definition)
+grep -r "_buildContinuationPrompt" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
+
+# Search for _processWithContinuationLoop usage (should only find the definition)
+grep -r "_processWithContinuationLoop" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
+```
+
diff --git a/tool_security_encrypt_all_env_files.py b/tool_security_encrypt_all_env_files.py
index df9cb71a..7e39e720 100644
--- a/tool_security_encrypt_all_env_files.py
+++ b/tool_security_encrypt_all_env_files.py
@@ -39,7 +39,7 @@ else:
# Import encryption functions
try:
- from modules.shared.configuration import encrypt_value
+ from modules.shared.configuration import encryptValue
except ImportError as e:
print(f"Error: Could not import encryption functions from shared.configuration: {e}")
print(f"Make sure you're running this script from the gateway directory")
@@ -254,7 +254,7 @@ def encrypt_all_secrets_in_file(file_path: Path, dry_run: bool = False, create_b
print(f" 🔐 Encrypting {key}...")
# Encrypt the value using the environment type from the file
- encrypted_value = encrypt_value(value, file_env_type)
+ encrypted_value = encryptValue(value, file_env_type)
# Replace the line in the file content
new_line = f"{key} = {encrypted_value}\n"
diff --git a/tool_security_encrypt_config_value.py b/tool_security_encrypt_config_value.py
index c807db66..ced8c7cc 100644
--- a/tool_security_encrypt_config_value.py
+++ b/tool_security_encrypt_config_value.py
@@ -30,7 +30,7 @@ from datetime import datetime
# Add the modules directory to the Python path
sys.path.insert(0, str(Path(__file__).parent / 'modules'))
-from shared.configuration import encrypt_value, decrypt_value, _is_encrypted_value
+from modules.shared.configuration import encryptValue, decryptValue, isEncryptedValue
def get_env_type_from_file(file_path: Path) -> str:
"""
@@ -247,7 +247,7 @@ def encrypt_all_secrets_in_file(file_path: Path, env_type: str, dry_run: bool =
print(f" 🔐 Encrypting {key}...")
# Encrypt the value using the environment type from the file
- encrypted_value = encrypt_value(value, file_env_type)
+ encrypted_value = encryptValue(value, file_env_type)
# Replace the line in the file content
new_line = f"{key} = {encrypted_value}\n"
@@ -360,8 +360,8 @@ def main():
# Handle decryption
if args.decrypt:
- if _is_encrypted_value(args.decrypt):
- decrypted = decrypt_value(args.decrypt)
+ if isEncryptedValue(args.decrypt):
+ decrypted = decryptValue(args.decrypt)
print(f"Decrypted value: {decrypted}")
else:
print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)")
@@ -411,7 +411,7 @@ def main():
return
# Encrypt the value
- encrypted_value = encrypt_value(value_to_encrypt, args.env)
+ encrypted_value = encryptValue(value_to_encrypt, args.env)
print(f"\n✓ Encryption successful!")
print(f"Environment: {args.env or 'current'}")