refactored whole codebase for camelCase part 1 of 2

This commit is contained in:
ValueOn AG 2025-10-31 00:05:39 +01:00
parent 26b2109844
commit c44fc92568
86 changed files with 3969 additions and 5005 deletions

View file

@ -0,0 +1,242 @@
"""
Script to analyze codebase for snake_case naming violations that should be camelStyle.
Excludes routes (decorated endpoint functions) and JSON field names.
"""
import ast
import os
import re
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple
import csv
# Patterns to exclude (external library interfaces, etc.)
EXCLUDE_PATTERNS = [
r'@.*\.(get|post|put|delete|patch|options|head)', # FastAPI route decorators
r'self\.(db|db_|model|orm)', # Database ORM attributes
r'\.(objects|query|filter|get|all)', # ORM methods
r'(request|response|response_model|status_code)', # FastAPI params
r'(snake_case|kebab-case)', # String literals
]
# External library attribute patterns (should not be changed)
EXTERNAL_LIB_ATTRIBUTES = {
'pydantic', 'fastapi', 'sqlalchemy', 'psycopg', 'requests',
'aiohttp', 'azure', 'google', 'openai', 'anthropic', 'reportlab',
'docx', 'pptx', 'openpyxl', 'json', 'logging', 'datetime', 'typing'
}
def isRouteFile(filePath: str) -> bool:
"""Check if file is a route file"""
return 'routes' in filePath or 'route' in os.path.basename(filePath).lower()
def shouldExcludeName(name: str, context: str = "") -> bool:
"""Check if a name should be excluded from analysis"""
# Skip if it's a builtin or external library attribute
if name.startswith('__') and name.endswith('__'):
return True
# Skip if context suggests external library usage
for pattern in EXCLUDE_PATTERNS:
if re.search(pattern, context, re.IGNORECASE):
return True
return False
def isSnakeCase(name: str) -> bool:
"""Check if a name is snake_case"""
if not name or name.startswith('_'):
return False
# Check if contains underscore and is not all caps
return '_' in name and not name.isupper()
def analyzeFile(filePath: str) -> Dict[str, List[str]]:
"""Analyze a Python file for naming violations"""
violations = {
'functions': [],
'parameters': [],
'variables': []
}
try:
with open(filePath, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content, filename=filePath)
except (SyntaxError, UnicodeDecodeError):
return violations
# Track current context
currentClass = None
inRouteDecorator = False
class NamingAnalyzer(ast.NodeVisitor):
def __init__(self):
self.violations = violations
self.currentClass = None
self.inRouteDecorator = False
self.functionDefs = []
def visit_FunctionDef(self, node):
# Check if this is a route endpoint (has FastAPI decorator)
isRouteEndpoint = False
for decorator in node.decorator_list:
if isinstance(decorator, ast.Attribute):
if decorator.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
isRouteEndpoint = True
break
elif isinstance(decorator, ast.Call):
if isinstance(decorator.func, ast.Attribute):
if decorator.func.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
isRouteEndpoint = True
break
# Skip route endpoint function names
# But we still need to check their parameters and variables
funcName = node.name
if not isRouteEndpoint and isSnakeCase(funcName) and not shouldExcludeName(funcName):
self.violations['functions'].append(f"{funcName} (line {node.lineno})")
# Analyze parameters
for arg in node.args.args:
if arg.arg != 'self' and arg.arg != 'cls':
paramName = arg.arg
if isSnakeCase(paramName) and not shouldExcludeName(paramName):
self.violations['parameters'].append(f"{paramName} in {funcName} (line {node.lineno})")
# Analyze function body for local variables
for stmt in node.body:
self.visit(stmt)
def visit_ClassDef(self, node):
oldClass = self.currentClass
self.currentClass = node.name
self.generic_visit(node)
self.currentClass = oldClass
def visit_Assign(self, node):
for target in node.targets:
if isinstance(target, ast.Name):
varName = target.id
# Skip constants (ALL_CAPS), builtins, and private (_xxx)
if varName.isupper() or varName.startswith('_'):
continue
# Local variables should be camelStyle
if isSnakeCase(varName) and not shouldExcludeName(varName):
self.violations['variables'].append(f"{varName} (line {node.lineno})")
def visit_For(self, node):
if isinstance(node.target, ast.Name):
varName = node.target.id
if isSnakeCase(varName) and not shouldExcludeName(varName):
self.violations['variables'].append(f"{varName} (line {node.lineno})")
self.generic_visit(node)
def visit_With(self, node):
if node.items:
for item in node.items:
if item.optional_vars:
if isinstance(item.optional_vars, ast.Name):
varName = item.optional_vars.id
if isSnakeCase(varName) and not shouldExcludeName(varName):
self.violations['variables'].append(f"{varName} (line {node.lineno})")
self.generic_visit(node)
analyzer = NamingAnalyzer()
analyzer.visit(tree)
return violations
def analyzeCodebase(rootDir: str = 'gateway') -> Dict[str, Dict[str, int]]:
"""Analyze entire codebase"""
results = defaultdict(lambda: {
'functions': 0,
'parameters': 0,
'variables': 0,
'details': {
'functions': [],
'parameters': [],
'variables': []
}
})
# Handle both absolute and relative paths
rootPath = Path(rootDir)
if not rootPath.exists():
# Try relative to current directory
rootPath = Path('.').resolve() / rootDir
if not rootPath.exists():
# Try just current directory if we're already in gateway
rootPath = Path('.')
# Find all Python files
for pyFile in rootPath.rglob('*.py'):
# Skip route files for function name analysis (but analyze their internals)
filePath = str(pyFile.relative_to(rootPath))
# Skip test files and special scripts
if 'test' in filePath.lower() or 'tool_' in filePath or '__pycache__' in filePath:
continue
violations = analyzeFile(str(pyFile))
# Check if there are any violations
totalViolations = len(violations['functions']) + len(violations['parameters']) + len(violations['variables'])
if totalViolations > 0:
moduleName = filePath.replace('\\', '/')
results[moduleName]['functions'] = len(violations['functions'])
results[moduleName]['parameters'] = len(violations['parameters'])
results[moduleName]['variables'] = len(violations['variables'])
results[moduleName]['details'] = violations
return results
def generateCSV(results: Dict[str, Dict[str, int]], outputFile: str = 'naming_violations.csv'):
"""Generate CSV report"""
with open(outputFile, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Module', 'Function Names', 'Parameter Names', 'Variable Names', 'Total'])
# Sort by total violations
sortedResults = sorted(
results.items(),
key=lambda x: x[1]['functions'] + x[1]['parameters'] + x[1]['variables'],
reverse=True
)
rowsWritten = 0
for module, stats in sortedResults:
total = stats['functions'] + stats['parameters'] + stats['variables']
if total > 0:
writer.writerow([
module,
stats['functions'],
stats['parameters'],
stats['variables'],
total
])
rowsWritten += 1
if rowsWritten == 0:
print("WARNING: No rows written to CSV despite finding violations!")
print(f"CSV report generated: {outputFile}")
print(f"Total modules analyzed: {len(results)}")
# Print summary
totalFuncs = sum(r['functions'] for r in results.values())
totalParams = sum(r['parameters'] for r in results.values())
totalVars = sum(r['variables'] for r in results.values())
print(f"\nSummary:")
print(f" Function names: {totalFuncs}")
print(f" Parameter names: {totalParams}")
print(f" Variable names: {totalVars}")
print(f" Total violations: {totalFuncs + totalParams + totalVars}")
if __name__ == '__main__':
print("Analyzing codebase for naming violations...")
results = analyzeCodebase('gateway')
# Write CSV to gateway directory
outputPath = Path('gateway') / 'naming_violations_report.csv'
generateCSV(results, str(outputPath))

62
app.py
View file

@ -24,45 +24,45 @@ class DailyRotatingFileHandler(RotatingFileHandler):
""" """
def __init__( def __init__(
self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs
): ):
self.log_dir = log_dir self.logDir = logDir
self.filename_prefix = filename_prefix self.filenamePrefix = filenamePrefix
self.current_date = None self.currentDate = None
self.current_file = None self.currentFile = None
# Initialize with today's file # Initialize with today's file
self._update_file_if_needed() self._updateFileIfNeeded()
# Call parent constructor with current file # Call parent constructor with current file
super().__init__( super().__init__(
self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs
) )
def _update_file_if_needed(self): def _updateFileIfNeeded(self):
"""Update the log file if the date has changed""" """Update the log file if the date has changed"""
today = datetime.now().strftime("%Y%m%d") today = datetime.now().strftime("%Y%m%d")
if self.current_date != today: if self.currentDate != today:
self.current_date = today self.currentDate = today
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log") newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
if self.current_file != new_file: if self.currentFile != newFile:
self.current_file = new_file self.currentFile = newFile
return True return True
return False return False
def emit(self, record): def emit(self, record):
"""Emit a log record, switching files if date has changed""" """Emit a log record, switching files if date has changed"""
# Check if we need to switch to a new file # Check if we need to switch to a new file
if self._update_file_if_needed(): if self._updateFileIfNeeded():
# Close current file and open new one # Close current file and open new one
if self.stream: if self.stream:
self.stream.close() self.stream.close()
self.stream = None self.stream = None
# Update the baseFilename for the parent class # Update the baseFilename for the parent class
self.baseFilename = self.current_file self.baseFilename = self.currentFile
# Reopen the stream # Reopen the stream
if not self.delay: if not self.delay:
self.stream = self._open() self.stream = self._open()
@ -200,10 +200,10 @@ def initLogging():
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5)) backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
fileHandler = DailyRotatingFileHandler( fileHandler = DailyRotatingFileHandler(
log_dir=logDir, logDir=logDir,
filename_prefix="log_app", filenamePrefix="log_app",
max_bytes=rotationSize, maxBytes=rotationSize,
backup_count=backupCount, backupCount=backupCount,
encoding="utf-8", encoding="utf-8",
) )
fileHandler.setFormatter(fileFormatter) fileHandler.setFormatter(fileFormatter)
@ -252,7 +252,7 @@ def initLogging():
) )
def make_sqlalchemy_db_url() -> str: def makeSqlalchemyDbUrl() -> str:
host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost") host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost")
port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432") port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432")
db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway") db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway")
@ -299,17 +299,17 @@ app = FastAPI(
# Configure OpenAPI security scheme for Swagger UI # Configure OpenAPI security scheme for Swagger UI
# This adds the "Authorize" button to the /docs page # This adds the "Authorize" button to the /docs page
security_scheme = HTTPBearer() securityScheme = HTTPBearer()
app.openapi_schema = None # Reset schema to regenerate with security app.openapi_schema = None # Reset schema to regenerate with security
def custom_openapi(): def customOpenapi():
if app.openapi_schema: if app.openapi_schema:
return app.openapi_schema return app.openapi_schema
from fastapi.openapi.utils import get_openapi from fastapi.openapi.utils import get_openapi
openapi_schema = get_openapi( openapiSchema = get_openapi(
title=app.title, title=app.title,
version="1.0.0", version="1.0.0",
description=app.description, description=app.description,
@ -317,7 +317,7 @@ def custom_openapi():
) )
# Add security scheme definition # Add security scheme definition
openapi_schema["components"]["securitySchemes"] = { openapiSchema["components"]["securitySchemes"] = {
"BearerAuth": { "BearerAuth": {
"type": "http", "type": "http",
"scheme": "bearer", "scheme": "bearer",
@ -328,20 +328,20 @@ def custom_openapi():
# Apply security globally to all endpoints # Apply security globally to all endpoints
# Individual endpoints can override this if needed # Individual endpoints can override this if needed
openapi_schema["security"] = [{"BearerAuth": []}] openapiSchema["security"] = [{"BearerAuth": []}]
app.openapi_schema = openapi_schema app.openapi_schema = openapiSchema
return app.openapi_schema return app.openapi_schema
app.openapi = custom_openapi app.openapi = customOpenapi
# Parse CORS origins from environment variable # Parse CORS origins from environment variable
def get_allowed_origins(): def getAllowedOrigins():
origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080") originsStr = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
# Split by comma and strip whitespace # Split by comma and strip whitespace
origins = [origin.strip() for origin in origins_str.split(",")] origins = [origin.strip() for origin in originsStr.split(",")]
logger.info(f"CORS allowed origins: {origins}") logger.info(f"CORS allowed origins: {origins}")
return origins return origins
@ -349,7 +349,7 @@ def get_allowed_origins():
# CORS configuration using environment variables # CORS configuration using environment variables
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=get_allowed_origins(), allow_origins=getAllowedOrigins(),
allow_credentials=True, allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allow_headers=["*"], allow_headers=["*"],

View file

@ -7,7 +7,7 @@ from pydantic import BaseModel
import threading import threading
import time import time
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -232,7 +232,7 @@ class DatabaseConnector:
raise ValueError(f"Record ID mismatch: file name ID ({recordId}) does not match record ID ({record['id']})") raise ValueError(f"Record ID mismatch: file name ID ({recordId}) does not match record ID ({record['id']})")
# Add metadata # Add metadata
currentTime = get_utc_timestamp() currentTime = getUtcTimestamp()
if "_createdAt" not in record: if "_createdAt" not in record:
record["_createdAt"] = currentTime record["_createdAt"] = currentTime
record["_createdBy"] = self.userId record["_createdBy"] = self.userId

View file

@ -6,7 +6,7 @@ import uuid
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
import threading import threading
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -287,7 +287,7 @@ class DatabaseConnector:
INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt") INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt")
VALUES (%s, %s, %s) VALUES (%s, %s, %s)
""", """,
(table_name, initial_id, get_utc_timestamp()), (table_name, initial_id, getUtcTimestamp()),
) )
self.connection.commit() self.connection.commit()
@ -611,7 +611,7 @@ class DatabaseConnector:
raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}") raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}")
# Add metadata # Add metadata
currentTime = get_utc_timestamp() currentTime = getUtcTimestamp()
if "_createdAt" not in record: if "_createdAt" not in record:
record["_createdAt"] = currentTime record["_createdAt"] = currentTime
record["_createdBy"] = self.userId record["_createdBy"] = self.userId

View file

@ -32,7 +32,7 @@ class ConnectorTicketClickup(TicketBase):
"Content-Type": "application/json", "Content-Type": "application/json",
} }
async def read_attributes(self) -> list[TicketFieldAttribute]: async def readAttributes(self) -> list[TicketFieldAttribute]:
"""Fetch field attributes. Uses list custom fields if listId provided; else basic fields.""" """Fetch field attributes. Uses list custom fields if listId provided; else basic fields."""
attributes: list[TicketFieldAttribute] = [] attributes: list[TicketFieldAttribute] = []
try: try:
@ -65,7 +65,7 @@ class ConnectorTicketClickup(TicketBase):
logger.error(f"ClickUp read_attributes error: {e}") logger.error(f"ClickUp read_attributes error: {e}")
return attributes return attributes
async def read_tasks(self, *, limit: int = 0) -> list[dict]: async def readTasks(self, *, limit: int = 0) -> list[dict]:
"""Read tasks from ClickUp, always returning full task records. """Read tasks from ClickUp, always returning full task records.
If list_id is set, read from that list; otherwise read from team. If list_id is set, read from that list; otherwise read from team.
""" """
@ -102,7 +102,7 @@ class ConnectorTicketClickup(TicketBase):
logger.error(f"ClickUp read_tasks error: {e}") logger.error(f"ClickUp read_tasks error: {e}")
return tasks return tasks
async def write_tasks(self, tasklist: list[dict]) -> None: async def writeTasks(self, tasklist: list[dict]) -> None:
"""Update tasks in ClickUp. Expects each item to contain {'ID' or 'id' or 'task_id', 'fields': {...}}""" """Update tasks in ClickUp. Expects each item to contain {'ID' or 'id' or 'task_id', 'fields': {...}}"""
try: try:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:

View file

@ -29,7 +29,7 @@ class ConnectorTicketJira(TicketBase):
self.ticketType = ticketType self.ticketType = ticketType
async def read_attributes(self) -> list[TicketFieldAttribute]: async def readAttributes(self) -> list[TicketFieldAttribute]:
""" """
Read field attributes from Jira by querying for a single issue Read field attributes from Jira by querying for a single issue
and extracting the field mappings. and extracting the field mappings.
@ -130,7 +130,7 @@ class ConnectorTicketJira(TicketBase):
logger.error(f"Error while calling fields API: {str(e)}") logger.error(f"Error while calling fields API: {str(e)}")
return [] return []
async def read_tasks(self, *, limit: int = 0) -> list[dict]: async def readTasks(self, *, limit: int = 0) -> list[dict]:
""" """
Read tasks from Jira with pagination support. Read tasks from Jira with pagination support.
@ -253,7 +253,7 @@ class ConnectorTicketJira(TicketBase):
logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}") logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}")
raise raise
async def write_tasks(self, tasklist: list[dict]) -> None: async def writeTasks(self, tasklist: list[dict]) -> None:
""" """
Write/update tasks to Jira. Write/update tasks to Jira.

View file

@ -26,18 +26,18 @@ class ConnectorGoogleSpeech:
""" """
try: try:
# Get JSON key from config.ini # Get JSON key from config.ini
api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET") apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE": if not apiKey or apiKey == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key") raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key")
# Parse the JSON key and set up authentication # Parse the JSON key and set up authentication
try: try:
credentials_info = json.loads(api_key) credentialsInfo = json.loads(apiKey)
# Create credentials object directly (no file needed!) # Create credentials object directly (no file needed!)
from google.oauth2 import service_account from google.oauth2 import service_account
credentials = service_account.Credentials.from_service_account_info(credentials_info) credentials = service_account.Credentials.from_service_account_info(credentialsInfo)
logger.info("✅ Using Google Speech credentials from config.ini") logger.info("✅ Using Google Speech credentials from config.ini")
@ -55,8 +55,8 @@ class ConnectorGoogleSpeech:
logger.error(f"❌ Failed to initialize Google Cloud clients: {e}") logger.error(f"❌ Failed to initialize Google Cloud clients: {e}")
raise raise
async def speech_to_text(self, audio_content: bytes, language: str = "de-DE", async def speech_to_text(self, audioContent: bytes, language: str = "de-DE",
sample_rate: int = None, channels: int = None) -> Dict: sampleRate: int = None, channels: int = None) -> Dict:
""" """
Convert speech to text using Google Cloud Speech-to-Text API. Convert speech to text using Google Cloud Speech-to-Text API.
@ -71,8 +71,8 @@ class ConnectorGoogleSpeech:
""" """
try: try:
# Auto-detect audio format if not provided # Auto-detect audio format if not provided
if sample_rate is None or channels is None: if sampleRate is None or channels is None:
validation = self.validate_audio_format(audio_content) validation = self.validate_audio_format(audioContent)
if not validation["valid"]: if not validation["valid"]:
return { return {
"success": False, "success": False,
@ -80,59 +80,59 @@ class ConnectorGoogleSpeech:
"confidence": 0.0, "confidence": 0.0,
"error": f"Invalid audio format: {validation.get('error', 'Unknown error')}" "error": f"Invalid audio format: {validation.get('error', 'Unknown error')}"
} }
sample_rate = validation["sample_rate"] sampleRate = validation["sample_rate"]
channels = validation["channels"] channels = validation["channels"]
audio_format = validation["format"] audioFormat = validation["format"]
logger.info(f"Auto-detected audio: {audio_format}, {sample_rate}Hz, {channels}ch") logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch")
logger.info(f"Processing audio with Google Cloud Speech-to-Text") logger.info(f"Processing audio with Google Cloud Speech-to-Text")
logger.info(f"Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch") logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch")
# Configure audio settings # Configure audio settings
audio = speech.RecognitionAudio(content=audio_content) audio = speech.RecognitionAudio(content=audioContent)
# Determine encoding based on detected format # Determine encoding based on detected format
# Google Cloud Speech API has specific requirements for different formats # Google Cloud Speech API has specific requirements for different formats
if audio_format == "webm_opus": if audioFormat == "webm_opus":
# For WEBM OPUS, we need to ensure proper format # For WEBM OPUS, we need to ensure proper format
encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
# WEBM_OPUS requires specific sample rate handling - must match header # WEBM_OPUS requires specific sample rate handling - must match header
if sample_rate != 48000: if sampleRate != 48000:
logger.warning(f"WEBM_OPUS detected but sample rate is {sample_rate}, adjusting to 48000") logger.warning(f"WEBM_OPUS detected but sample rate is {sampleRate}, adjusting to 48000")
sample_rate = 48000 sampleRate = 48000
# For WEBM_OPUS, don't specify sample_rate_hertz in config # For WEBM_OPUS, don't specify sample_rate_hertz in config
# Google Cloud will read it from the WEBM header # Google Cloud will read it from the WEBM header
use_sample_rate = False useSampleRate = False
elif audio_format == "linear16": elif audioFormat == "linear16":
# For LINEAR16 format (PCM) # For LINEAR16 format (PCM)
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
# Ensure sample rate is reasonable # Ensure sample rate is reasonable
if sample_rate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]: if sampleRate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
logger.warning(f"Unusual sample rate {sample_rate}, adjusting to 16000") logger.warning(f"Unusual sample rate {sampleRate}, adjusting to 16000")
sample_rate = 16000 sampleRate = 16000
use_sample_rate = True useSampleRate = True
elif audio_format == "mp3": elif audioFormat == "mp3":
# For MP3 format # For MP3 format
encoding = speech.RecognitionConfig.AudioEncoding.MP3 encoding = speech.RecognitionConfig.AudioEncoding.MP3
use_sample_rate = True useSampleRate = True
elif audio_format == "flac": elif audioFormat == "flac":
# For FLAC format # For FLAC format
encoding = speech.RecognitionConfig.AudioEncoding.FLAC encoding = speech.RecognitionConfig.AudioEncoding.FLAC
use_sample_rate = True useSampleRate = True
elif audio_format == "wav": elif audioFormat == "wav":
# For WAV format # For WAV format
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
use_sample_rate = True useSampleRate = True
else: else:
# For unknown formats, try LINEAR16 as fallback # For unknown formats, try LINEAR16 as fallback
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16 encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
sample_rate = 16000 # Use standard sample rate sampleRate = 16000 # Use standard sample rate
channels = 1 # Use mono channels = 1 # Use mono
use_sample_rate = True useSampleRate = True
logger.warning(f"Unknown audio format '{audio_format}', using LINEAR16 encoding with 16000Hz") logger.warning(f"Unknown audio format '{audioFormat}', using LINEAR16 encoding with 16000Hz")
# Build config based on format requirements # Build config based on format requirements
config_params = { configParams = {
"encoding": encoding, "encoding": encoding,
"audio_channel_count": channels, "audio_channel_count": channels,
"language_code": language, "language_code": language,
@ -145,13 +145,13 @@ class ConnectorGoogleSpeech:
} }
# Only add sample_rate_hertz if needed (not for WEBM_OPUS) # Only add sample_rate_hertz if needed (not for WEBM_OPUS)
if use_sample_rate: if useSampleRate:
config_params["sample_rate_hertz"] = sample_rate configParams["sample_rate_hertz"] = sampleRate
logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sample_rate}, channels={channels}, language={language}") logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sampleRate}, channels={channels}, language={language}")
else: else:
logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}") logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}")
config = speech.RecognitionConfig(**config_params) config = speech.RecognitionConfig(**configParams)
# Perform speech recognition # Perform speech recognition
logger.info("Sending audio to Google Cloud Speech-to-Text...") logger.info("Sending audio to Google Cloud Speech-to-Text...")
@ -162,12 +162,12 @@ class ConnectorGoogleSpeech:
response = self.speech_client.recognize(config=config, audio=audio) response = self.speech_client.recognize(config=config, audio=audio)
logger.debug(f"Google Cloud response: {response}") logger.debug(f"Google Cloud response: {response}")
except Exception as api_error: except Exception as apiError:
logger.error(f"Google Cloud API error: {api_error}") logger.error(f"Google Cloud API error: {apiError}")
# Try with different encoding as fallback # Try with different encoding as fallback
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16: if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
logger.info("Trying fallback with LINEAR16 encoding...") logger.info("Trying fallback with LINEAR16 encoding...")
fallback_config = speech.RecognitionConfig( fallbackConfig = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000, # Use standard sample rate sample_rate_hertz=16000, # Use standard sample rate
audio_channel_count=1, audio_channel_count=1,
@ -177,13 +177,13 @@ class ConnectorGoogleSpeech:
) )
try: try:
response = self.speech_client.recognize(config=fallback_config, audio=audio) response = self.speech_client.recognize(config=fallbackConfig, audio=audio)
logger.debug(f"Google Cloud fallback response: {response}") logger.debug(f"Google Cloud fallback response: {response}")
except Exception as fallback_error: except Exception as fallbackError:
logger.error(f"Google Cloud fallback error: {fallback_error}") logger.error(f"Google Cloud fallback error: {fallbackError}")
raise api_error raise apiError
else: else:
raise api_error raise apiError
# Process results # Process results
if response.results: if response.results:
@ -234,18 +234,18 @@ class ConnectorGoogleSpeech:
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16: if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
# For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts # For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts
if audio_format != "webm_opus": if audioFormat != "webm_opus":
# Try LINEAR16 with detected sample rate for non-WEBM formats # Try LINEAR16 with detected sample rate for non-WEBM formats
fallback_configs.append({ fallback_configs.append({
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16, "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
"sample_rate": sample_rate, "sample_rate": sampleRate,
"channels": channels, "channels": channels,
"use_sample_rate": True, "use_sample_rate": True,
"description": f"LINEAR16 with {sample_rate}Hz" "description": f"LINEAR16 with {sampleRate}Hz"
}) })
# For WEBM_OPUS, only try compatible sample rates or skip sample rate specification # For WEBM_OPUS, only try compatible sample rates or skip sample rate specification
if audio_format == "webm_opus": if audioFormat == "webm_opus":
# Try WEBM_OPUS without sample rate specification (let Google read from header) # Try WEBM_OPUS without sample rate specification (let Google read from header)
fallback_configs.append({ fallback_configs.append({
"encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS, "encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
@ -273,7 +273,7 @@ class ConnectorGoogleSpeech:
else: else:
# For other formats, try standard sample rates # For other formats, try standard sample rates
for std_rate in [16000, 8000, 22050, 44100]: for std_rate in [16000, 8000, 22050, 44100]:
if std_rate != sample_rate: if std_rate != sampleRate:
fallback_configs.append({ fallback_configs.append({
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16, "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
"sample_rate": std_rate, "sample_rate": std_rate,

View file

@ -3,8 +3,8 @@
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from enum import Enum from enum import Enum
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
import uuid import uuid
@ -26,7 +26,7 @@ class ChatStat(BaseModel):
priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation") priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation")
register_model_labels( registerModelLabels(
"ChatStat", "ChatStat",
{"en": "Chat Statistics", "fr": "Statistiques de chat"}, {"en": "Chat Statistics", "fr": "Statistiques de chat"},
{ {
@ -51,7 +51,7 @@ class ChatLog(BaseModel):
message: str = Field(description="Log message") message: str = Field(description="Log message")
type: str = Field(description="Log type (info, warning, error, etc.)") type: str = Field(description="Log type (info, warning, error, etc.)")
timestamp: float = Field( timestamp: float = Field(
default_factory=get_utc_timestamp, default_factory=getUtcTimestamp,
description="When the log entry was created (UTC timestamp in seconds)", description="When the log entry was created (UTC timestamp in seconds)",
) )
status: Optional[str] = Field(None, description="Status of the log entry") status: Optional[str] = Field(None, description="Status of the log entry")
@ -63,7 +63,7 @@ class ChatLog(BaseModel):
) )
register_model_labels( registerModelLabels(
"ChatLog", "ChatLog",
{"en": "Chat Log", "fr": "Journal de chat"}, {"en": "Chat Log", "fr": "Journal de chat"},
{ {
@ -96,7 +96,7 @@ class ChatDocument(BaseModel):
) )
register_model_labels( registerModelLabels(
"ChatDocument", "ChatDocument",
{"en": "Chat Document", "fr": "Document de chat"}, {"en": "Chat Document", "fr": "Document de chat"},
{ {
@ -133,7 +133,7 @@ class ContentMetadata(BaseModel):
base64Encoded: bool = Field(description="Whether the data is base64 encoded") base64Encoded: bool = Field(description="Whether the data is base64 encoded")
register_model_labels( registerModelLabels(
"ContentMetadata", "ContentMetadata",
{"en": "Content Metadata", "fr": "Métadonnées du contenu"}, {"en": "Content Metadata", "fr": "Métadonnées du contenu"},
{ {
@ -157,7 +157,7 @@ class ContentItem(BaseModel):
metadata: ContentMetadata = Field(description="Content metadata") metadata: ContentMetadata = Field(description="Content metadata")
register_model_labels( registerModelLabels(
"ContentItem", "ContentItem",
{"en": "Content Item", "fr": "Élément de contenu"}, {"en": "Content Item", "fr": "Élément de contenu"},
{ {
@ -175,7 +175,7 @@ class ChatContentExtracted(BaseModel):
) )
register_model_labels( registerModelLabels(
"ChatContentExtracted", "ChatContentExtracted",
{"en": "Extracted Content", "fr": "Contenu extrait"}, {"en": "Extracted Content", "fr": "Contenu extrait"},
{ {
@ -209,7 +209,7 @@ class ChatMessage(BaseModel):
description="Sequence number of the message (set automatically)" description="Sequence number of the message (set automatically)"
) )
publishedAt: float = Field( publishedAt: float = Field(
default_factory=get_utc_timestamp, default_factory=getUtcTimestamp,
description="When the message was published (UTC timestamp in seconds)", description="When the message was published (UTC timestamp in seconds)",
) )
success: Optional[bool] = Field( success: Optional[bool] = Field(
@ -235,7 +235,7 @@ class ChatMessage(BaseModel):
) )
register_model_labels( registerModelLabels(
"ChatMessage", "ChatMessage",
{"en": "Chat Message", "fr": "Message de chat"}, {"en": "Chat Message", "fr": "Message de chat"},
{ {
@ -331,14 +331,14 @@ class ChatWorkflow(BaseModel):
frontend_required=False, frontend_required=False,
) )
lastActivity: float = Field( lastActivity: float = Field(
default_factory=get_utc_timestamp, default_factory=getUtcTimestamp,
description="Timestamp of last activity (UTC timestamp in seconds)", description="Timestamp of last activity (UTC timestamp in seconds)",
frontend_type="timestamp", frontend_type="timestamp",
frontend_readonly=True, frontend_readonly=True,
frontend_required=False, frontend_required=False,
) )
startedAt: float = Field( startedAt: float = Field(
default_factory=get_utc_timestamp, default_factory=getUtcTimestamp,
description="When the workflow started (UTC timestamp in seconds)", description="When the workflow started (UTC timestamp in seconds)",
frontend_type="timestamp", frontend_type="timestamp",
frontend_readonly=True, frontend_readonly=True,
@ -395,7 +395,7 @@ class ChatWorkflow(BaseModel):
) )
register_model_labels( registerModelLabels(
"ChatWorkflow", "ChatWorkflow",
{"en": "Chat Workflow", "fr": "Flux de travail de chat"}, {"en": "Chat Workflow", "fr": "Flux de travail de chat"},
{ {
@ -426,7 +426,7 @@ class UserInputRequest(BaseModel):
userLanguage: str = Field(default="en", description="User's preferred language") userLanguage: str = Field(default="en", description="User's preferred language")
register_model_labels( registerModelLabels(
"UserInputRequest", "UserInputRequest",
{"en": "User Input Request", "fr": "Demande de saisie utilisateur"}, {"en": "User Input Request", "fr": "Demande de saisie utilisateur"},
{ {
@ -445,7 +445,7 @@ class ActionDocument(BaseModel):
mimeType: str = Field(description="MIME type of the document") mimeType: str = Field(description="MIME type of the document")
register_model_labels( registerModelLabels(
"ActionDocument", "ActionDocument",
{"en": "Action Document", "fr": "Document d'action"}, {"en": "Action Document", "fr": "Document d'action"},
{ {
@ -485,7 +485,7 @@ class ActionResult(BaseModel):
return cls(success=False, documents=documents or [], error=error) return cls(success=False, documents=documents or [], error=error)
register_model_labels( registerModelLabels(
"ActionResult", "ActionResult",
{"en": "Action Result", "fr": "Résultat de l'action"}, {"en": "Action Result", "fr": "Résultat de l'action"},
{ {
@ -504,7 +504,7 @@ class ActionSelection(BaseModel):
) )
register_model_labels( registerModelLabels(
"ActionSelection", "ActionSelection",
{"en": "Action Selection", "fr": "Sélection d'action"}, {"en": "Action Selection", "fr": "Sélection d'action"},
{ {
@ -520,7 +520,7 @@ class ActionParameters(BaseModel):
) )
register_model_labels( registerModelLabels(
"ActionParameters", "ActionParameters",
{"en": "Action Parameters", "fr": "Paramètres d'action"}, {"en": "Action Parameters", "fr": "Paramètres d'action"},
{ {
@ -535,7 +535,7 @@ class ObservationPreview(BaseModel):
snippet: str = Field(description="Short snippet or summary") snippet: str = Field(description="Short snippet or summary")
register_model_labels( registerModelLabels(
"ObservationPreview", "ObservationPreview",
{"en": "Observation Preview", "fr": "Aperçu d'observation"}, {"en": "Observation Preview", "fr": "Aperçu d'observation"},
{ {
@ -558,7 +558,7 @@ class Observation(BaseModel):
) )
register_model_labels( registerModelLabels(
"Observation", "Observation",
{"en": "Observation", "fr": "Observation"}, {"en": "Observation", "fr": "Observation"},
{ {
@ -579,7 +579,7 @@ class TaskStatus(str, Enum):
CANCELLED = "cancelled" CANCELLED = "cancelled"
register_model_labels( registerModelLabels(
"TaskStatus", "TaskStatus",
{"en": "Task Status", "fr": "Statut de la tâche"}, {"en": "Task Status", "fr": "Statut de la tâche"},
{ {
@ -599,7 +599,7 @@ class DocumentExchange(BaseModel):
) )
register_model_labels( registerModelLabels(
"DocumentExchange", "DocumentExchange",
{"en": "Document Exchange", "fr": "Échange de documents"}, {"en": "Document Exchange", "fr": "Échange de documents"},
{ {
@ -650,7 +650,7 @@ class ActionItem(BaseModel):
self.error = error_message self.error = error_message
register_model_labels( registerModelLabels(
"ActionItem", "ActionItem",
{"en": "Task Action", "fr": "Action de tâche"}, {"en": "Task Action", "fr": "Action de tâche"},
{ {
@ -683,7 +683,7 @@ class TaskResult(BaseModel):
error: Optional[str] = Field(None, description="Error message if task failed") error: Optional[str] = Field(None, description="Error message if task failed")
register_model_labels( registerModelLabels(
"TaskResult", "TaskResult",
{"en": "Task Result", "fr": "Résultat de tâche"}, {"en": "Task Result", "fr": "Résultat de tâche"},
{ {
@ -728,7 +728,7 @@ class TaskItem(BaseModel):
) )
register_model_labels( registerModelLabels(
"TaskItem", "TaskItem",
{"en": "Task", "fr": "Tâche"}, {"en": "Task", "fr": "Tâche"},
{ {
@ -758,7 +758,7 @@ class TaskStep(BaseModel):
) )
register_model_labels( registerModelLabels(
"TaskStep", "TaskStep",
{"en": "Task Step", "fr": "Étape de tâche"}, {"en": "Task Step", "fr": "Étape de tâche"},
{ {
@ -805,7 +805,7 @@ class TaskHandover(BaseModel):
) )
register_model_labels( registerModelLabels(
"TaskHandover", "TaskHandover",
{"en": "Task Handover", "fr": "Transfert de tâche"}, {"en": "Task Handover", "fr": "Transfert de tâche"},
{ {
@ -879,7 +879,7 @@ class ReviewResult(BaseModel):
) )
register_model_labels( registerModelLabels(
"ReviewResult", "ReviewResult",
{"en": "Review Result", "fr": "Résultat de l'évaluation"}, {"en": "Review Result", "fr": "Résultat de l'évaluation"},
{ {
@ -904,7 +904,7 @@ class TaskPlan(BaseModel):
) )
register_model_labels( registerModelLabels(
"TaskPlan", "TaskPlan",
{"en": "Task Plan", "fr": "Plan de tâches"}, {"en": "Task Plan", "fr": "Plan de tâches"},
{ {
@ -927,7 +927,7 @@ class PromptPlaceholder(BaseModel):
) )
register_model_labels( registerModelLabels(
"PromptPlaceholder", "PromptPlaceholder",
{"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"}, {"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"},
{ {
@ -943,7 +943,7 @@ class PromptBundle(BaseModel):
placeholders: List[PromptPlaceholder] = Field(default_factory=list) placeholders: List[PromptPlaceholder] = Field(default_factory=list)
register_model_labels( registerModelLabels(
"PromptBundle", "PromptBundle",
{"en": "Prompt Bundle", "fr": "Lot d'invite"}, {"en": "Prompt Bundle", "fr": "Lot d'invite"},
{ {

View file

@ -81,11 +81,11 @@ class StructuredDocument(BaseModel):
summary: Optional[str] = Field(default=None, description="Document summary") summary: Optional[str] = Field(default=None, description="Document summary")
tags: List[str] = Field(default_factory=list, description="Document tags") tags: List[str] = Field(default_factory=list, description="Document tags")
def get_sections_by_type(self, content_type: str) -> List[DocumentSection]: def getSectionsByType(self, content_type: str) -> List[DocumentSection]:
"""Get all sections of a specific content type.""" """Get all sections of a specific content type."""
return [section for section in self.sections if section.content_type == content_type] return [section for section in self.sections if section.content_type == content_type]
def get_all_tables(self) -> List[TableData]: def getAllTables(self) -> List[TableData]:
"""Get all table data from the document.""" """Get all table data from the document."""
tables = [] tables = []
for section in self.sections: for section in self.sections:
@ -94,7 +94,7 @@ class StructuredDocument(BaseModel):
tables.append(element) tables.append(element)
return tables return tables
def get_all_lists(self) -> List[BulletList]: def getAllLists(self) -> List[BulletList]:
"""Get all lists from the document.""" """Get all lists from the document."""
lists = [] lists = []
for section in self.sections: for section in self.sections:

View file

@ -2,8 +2,8 @@
from typing import Dict, Any, Optional, Union from typing import Dict, Any, Optional, Union
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
import uuid import uuid
import base64 import base64
@ -15,9 +15,9 @@ class FileItem(BaseModel):
mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False) mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False) fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False) fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False)
creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
register_model_labels( registerModelLabels(
"FileItem", "FileItem",
{"en": "File Item", "fr": "Élément de fichier"}, {"en": "File Item", "fr": "Élément de fichier"},
{ {
@ -45,7 +45,7 @@ class FilePreview(BaseModel):
if isinstance(data.get("content"), bytes): if isinstance(data.get("content"), bytes):
data["content"] = base64.b64encode(data["content"]).decode("utf-8") data["content"] = base64.b64encode(data["content"]).decode("utf-8")
return data return data
register_model_labels( registerModelLabels(
"FilePreview", "FilePreview",
{"en": "File Preview", "fr": "Aperçu du fichier"}, {"en": "File Preview", "fr": "Aperçu du fichier"},
{ {
@ -62,7 +62,7 @@ class FileData(BaseModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
data: str = Field(description="File data content") data: str = Field(description="File data content")
base64Encoded: bool = Field(description="Whether the data is base64 encoded") base64Encoded: bool = Field(description="Whether the data is base64 encoded")
register_model_labels( registerModelLabels(
"FileData", "FileData",
{"en": "File Data", "fr": "Données de fichier"}, {"en": "File Data", "fr": "Données de fichier"},
{ {

View file

@ -0,0 +1,90 @@
"""
Unified JSON document schema and helpers used by both generation prompts and renderers.
This defines a single canonical template and the supported section types.
"""
from typing import List
# Canonical list of supported section types across the system
supportedSectionTypes: List[str] = [
"table",
"bullet_list",
"heading",
"paragraph",
"code_block",
"image",
]
# Canonical JSON template used for AI generation (documents array + sections)
# Rendering pipelines can select the first document and read its sections.
jsonTemplateDocument: str = """{
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": [
{
"id": "doc_1",
"title": "{{DOCUMENT_TITLE}}",
"filename": "document.json",
"sections": [
{
"id": "section_heading_example",
"content_type": "heading",
"elements": [
{"level": 1, "text": "Heading Text"}
],
"order": 0
},
{
"id": "section_paragraph_example",
"content_type": "paragraph",
"elements": [
{"text": "Paragraph text content"}
],
"order": 0
},
{
"id": "section_bullet_list_example",
"content_type": "bullet_list",
"elements": [
{
"items": ["Item 1", "Item 2"]
}
],
"order": 0
},
{
"id": "section_table_example",
"content_type": "table",
"elements": [
{
"headers": ["Column 1", "Column 2"],
"rows": [
["Row 1 Col 1", "Row 1 Col 2"],
["Row 2 Col 1", "Row 2 Col 2"]
],
"caption": "Table caption"
}
],
"order": 0
},
{
"id": "section_code_example",
"content_type": "code_block",
"elements": [
{
"code": "function example() { return true; }",
"language": "javascript"
}
],
"order": 0
}
]
}
]
}"""

View file

@ -3,7 +3,7 @@
import uuid import uuid
from typing import Optional from typing import Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels from modules.shared.attributeUtils import registerModelLabels
class DataNeutraliserConfig(BaseModel): class DataNeutraliserConfig(BaseModel):
@ -14,7 +14,7 @@ class DataNeutraliserConfig(BaseModel):
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False) namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False) sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False) sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
register_model_labels( registerModelLabels(
"DataNeutraliserConfig", "DataNeutraliserConfig",
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"}, {"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
{ {
@ -35,7 +35,7 @@ class DataNeutralizerAttributes(BaseModel):
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True) originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True) patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
register_model_labels( registerModelLabels(
"DataNeutralizerAttributes", "DataNeutralizerAttributes",
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"}, {"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
{ {

View file

@ -2,8 +2,8 @@
from typing import Optional from typing import Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
from .datamodelUam import AuthAuthority from .datamodelUam import AuthAuthority
from enum import Enum from enum import Enum
import uuid import uuid
@ -51,7 +51,7 @@ class Token(BaseModel):
use_enum_values = True use_enum_values = True
register_model_labels( registerModelLabels(
"Token", "Token",
{"en": "Token", "fr": "Jeton"}, {"en": "Token", "fr": "Jeton"},
{ {
@ -95,7 +95,7 @@ class AuthEvent(BaseModel):
frontend_required=True, frontend_required=True,
) )
timestamp: float = Field( timestamp: float = Field(
default_factory=get_utc_timestamp, default_factory=getUtcTimestamp,
description="Unix timestamp when the event occurred", description="Unix timestamp when the event occurred",
frontend_type="datetime", frontend_type="datetime",
frontend_readonly=True, frontend_readonly=True,
@ -131,7 +131,7 @@ class AuthEvent(BaseModel):
) )
register_model_labels( registerModelLabels(
"AuthEvent", "AuthEvent",
{"en": "Authentication Event", "fr": "Événement d'authentification"}, {"en": "Authentication Event", "fr": "Événement d'authentification"},
{ {

View file

@ -11,12 +11,12 @@ class TicketFieldAttribute(BaseModel):
class TicketBase(ABC): class TicketBase(ABC):
@abstractmethod @abstractmethod
async def read_attributes(self) -> list[TicketFieldAttribute]: ... async def readAttributes(self) -> list[TicketFieldAttribute]: ...
@abstractmethod @abstractmethod
async def read_tasks(self, *, limit: int = 0) -> list[dict]: ... async def readTasks(self, *, limit: int = 0) -> list[dict]: ...
@abstractmethod @abstractmethod
async def write_tasks(self, tasklist: list[dict]) -> None: ... async def writeTasks(self, tasklist: list[dict]) -> None: ...

View file

@ -4,8 +4,8 @@ import uuid
from typing import Optional from typing import Optional
from enum import Enum from enum import Enum
from pydantic import BaseModel, Field, EmailStr from pydantic import BaseModel, Field, EmailStr
from modules.shared.attributeUtils import register_model_labels from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
class AuthAuthority(str, Enum): class AuthAuthority(str, Enum):
@ -34,7 +34,7 @@ class Mandate(BaseModel):
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}}, {"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
]) ])
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
register_model_labels( registerModelLabels(
"Mandate", "Mandate",
{"en": "Mandate", "fr": "Mandat"}, {"en": "Mandate", "fr": "Mandat"},
{ {
@ -62,8 +62,8 @@ class UserConnection(BaseModel):
{"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}}, {"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}},
{"value": "pending", "label": {"en": "Pending", "fr": "En attente"}}, {"value": "pending", "label": {"en": "Pending", "fr": "En attente"}},
]) ])
connectedAt: float = Field(default_factory=get_utc_timestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) connectedAt: float = Field(default_factory=getUtcTimestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
lastChecked: float = Field(default_factory=get_utc_timestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) lastChecked: float = Field(default_factory=getUtcTimestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[ tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[
{"value": "active", "label": {"en": "Active", "fr": "Actif"}}, {"value": "active", "label": {"en": "Active", "fr": "Actif"}},
@ -71,7 +71,7 @@ class UserConnection(BaseModel):
{"value": "none", "label": {"en": "None", "fr": "Aucun"}}, {"value": "none", "label": {"en": "None", "fr": "Aucun"}},
]) ])
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
register_model_labels( registerModelLabels(
"UserConnection", "UserConnection",
{"en": "User Connection", "fr": "Connexion utilisateur"}, {"en": "User Connection", "fr": "Connexion utilisateur"},
{ {
@ -113,7 +113,7 @@ class User(BaseModel):
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}}, {"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
]) ])
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
register_model_labels( registerModelLabels(
"User", "User",
{"en": "User", "fr": "Utilisateur"}, {"en": "User", "fr": "Utilisateur"},
{ {
@ -131,7 +131,7 @@ register_model_labels(
class UserInDB(User): class UserInDB(User):
hashedPassword: Optional[str] = Field(None, description="Hash of the user password") hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
register_model_labels( registerModelLabels(
"UserInDB", "UserInDB",
{"en": "User Access", "fr": "Accès de l'utilisateur"}, {"en": "User Access", "fr": "Accès de l'utilisateur"},
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}}, {"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},

View file

@ -1,7 +1,7 @@
"""Utility datamodels: Prompt.""" """Utility datamodels: Prompt."""
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels from modules.shared.attributeUtils import registerModelLabels
import uuid import uuid
@ -10,7 +10,7 @@ class Prompt(BaseModel):
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True) content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True) name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
register_model_labels( registerModelLabels(
"Prompt", "Prompt",
{"en": "Prompt", "fr": "Invite"}, {"en": "Prompt", "fr": "Invite"},
{ {

View file

@ -1,8 +1,8 @@
"""Voice settings datamodel.""" """Voice settings datamodel."""
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
import uuid import uuid
@ -15,11 +15,11 @@ class VoiceSettings(BaseModel):
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True) ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True)
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False) targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False)
creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
lastModified: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) lastModified: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
register_model_labels( registerModelLabels(
"VoiceSettings", "VoiceSettings",
{"en": "Voice Settings", "fr": "Paramètres vocaux"}, {"en": "Voice Settings", "fr": "Paramètres vocaux"},
{ {

View file

@ -43,10 +43,6 @@ class NeutralizationPlayground:
'errors': errors, 'errors': errors,
} }
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
processor = SharepointProcessor(self.currentUser, self.services)
return await processor.processSharepointFiles(sourcePath, targetPath)
# Cleanup attributes # Cleanup attributes
def cleanAttributes(self, fileId: str) -> bool: def cleanAttributes(self, fileId: str) -> bool:
@ -77,49 +73,51 @@ class NeutralizationPlayground:
} }
# Additional methods needed by the route # Additional methods needed by the route
def get_config(self) -> Optional[DataNeutraliserConfig]: def getConfig(self) -> Optional[DataNeutraliserConfig]:
"""Get neutralization configuration""" """Get neutralization configuration"""
return self.services.neutralization.getConfig() return self.services.neutralization.getConfig()
def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig: def saveConfig(self, configData: Dict[str, Any]) -> DataNeutraliserConfig:
"""Save neutralization configuration""" """Save neutralization configuration"""
return self.services.neutralization.saveConfig(config_data) return self.services.neutralization.saveConfig(configData)
def neutralize_text(self, text: str, file_id: str = None) -> Dict[str, Any]: def neutralizeText(self, text: str, fileId: str = None) -> Dict[str, Any]:
"""Neutralize text content""" """Neutralize text content"""
return self.services.neutralization.processText(text) return self.services.neutralization.processText(text)
def resolve_text(self, text: str) -> str: def resolveText(self, text: str) -> str:
"""Resolve UIDs in neutralized text back to original text""" """Resolve UIDs in neutralized text back to original text"""
return self.services.neutralization.resolveText(text) return self.services.neutralization.resolveText(text)
def get_attributes(self, file_id: str = None) -> List[DataNeutralizerAttributes]: def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]:
"""Get neutralization attributes, optionally filtered by file ID""" """Get neutralization attributes, optionally filtered by file ID"""
try: try:
all_attributes = self.services.neutralization.getAttributes() allAttributes = self.services.neutralization.getAttributes()
if file_id: if fileId:
return [attr for attr in all_attributes if attr.fileId == file_id] return [attr for attr in allAttributes if attr.fileId == fileId]
return all_attributes return allAttributes
except Exception as e: except Exception as e:
logger.error(f"Error getting attributes: {str(e)}") logger.error(f"Error getting attributes: {str(e)}")
return [] return []
async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]: async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
"""Process files from SharePoint source path and store neutralized files in target path""" """Process files from SharePoint source path and store neutralized files in target path"""
return await self.processSharepointFiles(source_path, target_path) from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
processor = SharepointProcessor(self.currentUser, self.services)
return await processor.processSharepointFiles(sourcePath, targetPath)
def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]: def batchNeutralizeFiles(self, filesData: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Process multiple files for neutralization""" """Process multiple files for neutralization"""
file_ids = [file_data.get('fileId') for file_data in files_data if file_data.get('fileId')] fileIds = [fileData.get('fileId') for fileData in filesData if fileData.get('fileId')]
return self.processFiles(file_ids) return self.processFiles(fileIds)
def get_processing_stats(self) -> Dict[str, Any]: def getProcessingStats(self) -> Dict[str, Any]:
"""Get neutralization processing statistics""" """Get neutralization processing statistics"""
return self.getStats() return self.getStats()
def cleanup_file_attributes(self, file_id: str) -> bool: def cleanupFileAttributes(self, fileId: str) -> bool:
"""Clean up neutralization attributes for a specific file""" """Clean up neutralization attributes for a specific file"""
return self.cleanAttributes(file_id) return self.cleanAttributes(fileId)
# Internal SharePoint helper module separated to keep feature logic tidy # Internal SharePoint helper module separated to keep feature logic tidy
@ -208,7 +206,7 @@ class SharepointProcessor:
siteUrl, _ = self._parseSharepointPath(sharepointPath) siteUrl, _ = self._parseSharepointPath(sharepointPath)
if not siteUrl: if not siteUrl:
return False return False
siteInfo = await self.services.sharepoint.find_site_by_web_url(siteUrl) siteInfo = await self.services.sharepoint.findSiteByWebUrl(siteUrl)
return siteInfo is not None return siteInfo is not None
except Exception: except Exception:
return False return False
@ -219,17 +217,17 @@ class SharepointProcessor:
targetSite, targetFolder = self._parseSharepointPath(targetPath) targetSite, targetFolder = self._parseSharepointPath(targetPath)
if not sourceSite or not targetSite: if not sourceSite or not targetSite:
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']} return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
sourceSiteInfo = await self.services.sharepoint.find_site_by_web_url(sourceSite) sourceSiteInfo = await self.services.sharepoint.findSiteByWebUrl(sourceSite)
if not sourceSiteInfo: if not sourceSiteInfo:
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']} return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
targetSiteInfo = await self.services.sharepoint.find_site_by_web_url(targetSite) targetSiteInfo = await self.services.sharepoint.findSiteByWebUrl(targetSite)
if not targetSiteInfo: if not targetSiteInfo:
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']} return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}") logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], sourceFolder) files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], sourceFolder)
if not files: if not files:
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder") logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], '') files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], '')
if files: if files:
folders = [f for f in files if f.get('type') == 'folder'] folders = [f for f in files if f.get('type') == 'folder']
folderNames = [f.get('name') for f in folders] folderNames = [f.get('name') for f in folders]
@ -251,7 +249,7 @@ class SharepointProcessor:
async def _processSingle(fileInfo: Dict[str, Any]): async def _processSingle(fileInfo: Dict[str, Any]):
try: try:
fileContent = await self.services.sharepoint.download_file(sourceSiteInfo['id'], fileInfo['id']) fileContent = await self.services.sharepoint.downloadFile(sourceSiteInfo['id'], fileInfo['id'])
if not fileContent: if not fileContent:
return {'error': f"Failed to download file: {fileInfo['name']}"} return {'error': f"Failed to download file: {fileInfo['name']}"}
try: try:
@ -260,7 +258,7 @@ class SharepointProcessor:
textContent = fileContent.decode('latin-1') textContent = fileContent.decode('latin-1')
result = self.services.neutralization.processText(textContent) result = self.services.neutralization.processText(textContent)
neutralizedFilename = f"neutralized_{fileInfo['name']}" neutralizedFilename = f"neutralized_{fileInfo['name']}"
uploadResult = await self.services.sharepoint.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8')) uploadResult = await self.services.sharepoint.uploadFile(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
if 'error' in uploadResult: if 'error' in uploadResult:
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"} return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
return { return {

View file

@ -204,9 +204,9 @@ class ManagerSyncDelta:
logger.info( logger.info(
f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}" f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}"
) )
resolved = await self.services.sharepoint.find_site_by_url( resolved = await self.services.sharepoint.findSiteByUrl(
hostname=self.SHAREPOINT_HOSTNAME, hostname=self.SHAREPOINT_HOSTNAME,
site_path=self.SHAREPOINT_SITE_PATH sitePath=self.SHAREPOINT_SITE_PATH
) )
if not resolved: if not resolved:
@ -223,9 +223,9 @@ class ManagerSyncDelta:
# Test site access by listing root of the drive # Test site access by listing root of the drive
logger.info("Testing site access using resolved site ID...") logger.info("Testing site access using resolved site ID...")
test_result = await self.services.sharepoint.list_folder_contents( test_result = await self.services.sharepoint.listFolderContents(
site_id=self.targetSite["id"], siteId=self.targetSite["id"],
folder_path="" folderPath=""
) )
if test_result is not None: if test_result is not None:
@ -293,8 +293,8 @@ class ManagerSyncDelta:
existing_headers = {"header1": "Header 1", "header2": "Header 2"} existing_headers = {"header1": "Header 1", "header2": "Header 2"}
try: try:
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}" file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
excel_content = await self.services.sharepoint.download_file_by_path( excel_content = await self.services.sharepoint.downloadFileByPath(
site_id=self.targetSite['id'], file_path=file_path siteId=self.targetSite['id'], filePath=file_path
) )
existing_data, existing_headers = self.parseExcelContent(excel_content) existing_data, existing_headers = self.parseExcelContent(excel_content)
except Exception: except Exception:
@ -307,16 +307,16 @@ class ManagerSyncDelta:
await self.backupSharepointFile(filename=sync_file_name) await self.backupSharepointFile(filename=sync_file_name)
excel_bytes = self.createExcelContent(merged_data, existing_headers) excel_bytes = self.createExcelContent(merged_data, existing_headers)
await self.services.sharepoint.upload_file( await self.services.sharepoint.uploadFile(
site_id=self.targetSite['id'], siteId=self.targetSite['id'],
folder_path=self.SHAREPOINT_MAIN_FOLDER, folderPath=self.SHAREPOINT_MAIN_FOLDER,
file_name=sync_file_name, fileName=sync_file_name,
content=excel_bytes, content=excel_bytes,
) )
# Import back to tickets # Import back to tickets
try: try:
excel_content = await self.services.sharepoint.download_file_by_path( excel_content = await self.services.sharepoint.downloadFileByPath(
site_id=self.targetSite['id'], file_path=file_path siteId=self.targetSite['id'], filePath=file_path
) )
excel_rows, _ = self.parseExcelContent(excel_content) excel_rows, _ = self.parseExcelContent(excel_content)
self._log_audit_event("SYNC_IMPORT", "INFO", f"Importing {len(excel_rows)} Excel rows back to tickets") self._log_audit_event("SYNC_IMPORT", "INFO", f"Importing {len(excel_rows)} Excel rows back to tickets")
@ -333,8 +333,8 @@ class ManagerSyncDelta:
existing_data: list[dict] = [] existing_data: list[dict] = []
try: try:
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}" file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
csv_content = await self.services.sharepoint.download_file_by_path( csv_content = await self.services.sharepoint.downloadFileByPath(
site_id=self.targetSite['id'], file_path=file_path siteId=self.targetSite['id'], filePath=file_path
) )
csv_lines = csv_content.decode('utf-8').split('\n') csv_lines = csv_content.decode('utf-8').split('\n')
if len(csv_lines) >= 2: if len(csv_lines) >= 2:
@ -348,16 +348,16 @@ class ManagerSyncDelta:
await self.backupSharepointFile(filename=sync_file_name) await self.backupSharepointFile(filename=sync_file_name)
merged_data, _ = self.mergeJiraWithExistingDetailed(data_list, existing_data) merged_data, _ = self.mergeJiraWithExistingDetailed(data_list, existing_data)
csv_bytes = self.createCsvContent(merged_data, existing_headers) csv_bytes = self.createCsvContent(merged_data, existing_headers)
await self.services.sharepoint.upload_file( await self.services.sharepoint.uploadFile(
site_id=self.targetSite['id'], siteId=self.targetSite['id'],
folder_path=self.SHAREPOINT_MAIN_FOLDER, folderPath=self.SHAREPOINT_MAIN_FOLDER,
file_name=sync_file_name, fileName=sync_file_name,
content=csv_bytes, content=csv_bytes,
) )
# Import from CSV # Import from CSV
try: try:
csv_content = await self.services.sharepoint.download_file_by_path( csv_content = await self.services.sharepoint.downloadFileByPath(
site_id=self.targetSite['id'], file_path=file_path siteId=self.targetSite['id'], filePath=file_path
) )
df = pd.read_csv(io.BytesIO(csv_content), skiprows=2, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python') df = pd.read_csv(io.BytesIO(csv_content), skiprows=2, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python')
csv_rows = df.to_dict('records') csv_rows = df.to_dict('records')
@ -388,12 +388,12 @@ class ManagerSyncDelta:
try: try:
timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y%m%d_%H%M%S") timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y%m%d_%H%M%S")
backup_filename = f"backup_{timestamp}_{filename}" backup_filename = f"backup_{timestamp}_{filename}"
await self.services.sharepoint.copy_file_async( await self.services.sharepoint.copyFileAsync(
site_id=self.targetSite['id'], siteId=self.targetSite['id'],
source_folder=self.SHAREPOINT_MAIN_FOLDER, sourceFolder=self.SHAREPOINT_MAIN_FOLDER,
source_file=filename, sourceFile=filename,
dest_folder=self.SHAREPOINT_BACKUP_FOLDER, destFolder=self.SHAREPOINT_BACKUP_FOLDER,
dest_file=backup_filename, destFile=backup_filename,
) )
self._log_audit_event("SYNC_BACKUP", "SUCCESS", f"Backed up file: {filename} -> {backup_filename}") self._log_audit_event("SYNC_BACKUP", "SUCCESS", f"Backed up file: {filename} -> {backup_filename}")
return True return True
@ -679,7 +679,7 @@ class ManagerSyncDelta:
connectorType=connectorType, connectorType=connectorType,
connectorParams=connectorParams, connectorParams=connectorParams,
) )
attributes = await ticket_interface.connector_ticket.read_attributes() attributes = await ticket_interface.connector_ticket.readAttributes()
if not attributes: if not attributes:
logger.warning("No ticket attributes returned; nothing to write.") logger.warning("No ticket attributes returned; nothing to write.")
return False return False
@ -713,7 +713,7 @@ class ManagerSyncDelta:
connectorType=connectorType, connectorType=connectorType,
connectorParams=connectorParams, connectorParams=connectorParams,
) )
tickets = await ticket_interface.connector_ticket.read_tasks(limit=sampleLimit) tickets = await ticket_interface.connector_ticket.readTasks(limit=sampleLimit)
if not tickets: if not tickets:
logger.warning("No tickets returned; nothing to write.") logger.warning("No tickets returned; nothing to write.")
return False return False

View file

@ -54,8 +54,6 @@ class AiObjects:
# No need to manually create connectors - they're auto-discovered # No need to manually create connectors - they're auto-discovered
return cls() return cls()
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str: def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
"""Select the best model using dynamic model selection system.""" """Select the best model using dynamic model selection system."""
# Get available models from the dynamic registry # Get available models from the dynamic registry

View file

@ -10,7 +10,7 @@ import uuid
from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
from modules.interfaces.interfaceDbAppAccess import AppAccess from modules.interfaces.interfaceDbAppAccess import AppAccess
from modules.datamodels.datamodelUam import ( from modules.datamodels.datamodelUam import (
User, User,
@ -604,8 +604,8 @@ class AppObjects:
externalUsername=externalUsername, externalUsername=externalUsername,
externalEmail=externalEmail, externalEmail=externalEmail,
status=status, status=status,
connectedAt=get_utc_timestamp(), connectedAt=getUtcTimestamp(),
lastChecked=get_utc_timestamp(), lastChecked=getUtcTimestamp(),
expiresAt=None, # Optional field, set to None by default expiresAt=None, # Optional field, set to None by default
) )
@ -755,7 +755,7 @@ class AppObjects:
if not token.id: if not token.id:
token.id = str(uuid.uuid4()) token.id = str(uuid.uuid4())
if not token.createdAt: if not token.createdAt:
token.createdAt = get_utc_timestamp() token.createdAt = getUtcTimestamp()
# If replace_existing is True, delete old access tokens for this user and authority first # If replace_existing is True, delete old access tokens for this user and authority first
if replace_existing: if replace_existing:
@ -822,7 +822,7 @@ class AppObjects:
if not token.id: if not token.id:
token.id = str(uuid.uuid4()) token.id = str(uuid.uuid4())
if not token.createdAt: if not token.createdAt:
token.createdAt = get_utc_timestamp() token.createdAt = getUtcTimestamp()
# Convert to dict and ensure all fields are properly set # Convert to dict and ensure all fields are properly set
token_dict = token.model_dump() token_dict = token.model_dump()
@ -932,7 +932,7 @@ class AppObjects:
return True return True
tokenUpdate = { tokenUpdate = {
"status": TokenStatus.REVOKED, "status": TokenStatus.REVOKED,
"revokedAt": get_utc_timestamp(), "revokedAt": getUtcTimestamp(),
"revokedBy": revokedBy, "revokedBy": revokedBy,
"reason": reason or "revoked", "reason": reason or "revoked",
} }
@ -970,7 +970,7 @@ class AppObjects:
t["id"], t["id"],
{ {
"status": TokenStatus.REVOKED, "status": TokenStatus.REVOKED,
"revokedAt": get_utc_timestamp(), "revokedAt": getUtcTimestamp(),
"revokedBy": revokedBy, "revokedBy": revokedBy,
"reason": reason or "session logout", "reason": reason or "session logout",
}, },
@ -1008,7 +1008,7 @@ class AppObjects:
t["id"], t["id"],
{ {
"status": TokenStatus.REVOKED, "status": TokenStatus.REVOKED,
"revokedAt": get_utc_timestamp(), "revokedAt": getUtcTimestamp(),
"revokedBy": revokedBy, "revokedBy": revokedBy,
"reason": reason or "admin revoke", "reason": reason or "admin revoke",
}, },
@ -1022,7 +1022,7 @@ class AppObjects:
def cleanupExpiredTokens(self) -> int: def cleanupExpiredTokens(self) -> int:
"""Clean up expired tokens for all connections, returns count of cleaned tokens""" """Clean up expired tokens for all connections, returns count of cleaned tokens"""
try: try:
current_time = get_utc_timestamp() current_time = getUtcTimestamp()
cleaned_count = 0 cleaned_count = 0
# Get all tokens # Get all tokens
@ -1100,7 +1100,7 @@ class AppObjects:
# Update existing config # Update existing config
update_data = existing_config.model_dump() update_data = existing_config.model_dump()
update_data.update(config_data) update_data.update(config_data)
update_data["updatedAt"] = get_utc_timestamp() update_data["updatedAt"] = getUtcTimestamp()
updated_config = DataNeutraliserConfig(**update_data) updated_config = DataNeutraliserConfig(**update_data)
self.db.recordModify( self.db.recordModify(

View file

@ -31,7 +31,7 @@ from modules.datamodels.datamodelUam import User
# DYNAMIC PART: Connectors to the Interface # DYNAMIC PART: Connectors to the Interface
from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
# Basic Configurations # Basic Configurations
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
@ -66,56 +66,56 @@ class ChatObjects:
# ===== Generic Utility Methods ===== # ===== Generic Utility Methods =====
def _is_object_field(self, field_type) -> bool: def _isObjectField(self, fieldType) -> bool:
"""Check if a field type represents a complex object (not a simple type).""" """Check if a field type represents a complex object (not a simple type)."""
# Simple scalar types # Simple scalar types
if field_type in (str, int, float, bool, type(None)): if fieldType in (str, int, float, bool, type(None)):
return False return False
# Everything else is an object # Everything else is an object
return True return True
def _separate_object_fields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]: def _separateObjectFields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
"""Separate simple fields from object fields based on Pydantic model structure.""" """Separate simple fields from object fields based on Pydantic model structure."""
simple_fields = {} simpleFields = {}
object_fields = {} objectFields = {}
# Get field information from the Pydantic model # Get field information from the Pydantic model
model_fields = model_class.model_fields modelFields = model_class.model_fields
for field_name, value in data.items(): for fieldName, value in data.items():
# Check if this field should be stored as JSONB in the database # Check if this field should be stored as JSONB in the database
if field_name in model_fields: if fieldName in modelFields:
field_info = model_fields[field_name] fieldInfo = modelFields[fieldName]
# Pydantic v2 only # Pydantic v2 only
field_type = field_info.annotation fieldType = fieldInfo.annotation
# Always route relational/object fields to object_fields for separate handling # Always route relational/object fields to object_fields for separate handling
if field_name in ['documents', 'stats']: if fieldName in ['documents', 'stats']:
object_fields[field_name] = value objectFields[fieldName] = value
continue continue
# Check if this is a JSONB field (Dict, List, or complex types) # Check if this is a JSONB field (Dict, List, or complex types)
if (field_type == dict or if (fieldType == dict or
field_type == list or fieldType == list or
(hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or (hasattr(fieldType, '__origin__') and fieldType.__origin__ in (dict, list)) or
field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']): fieldName in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']):
# Store as JSONB - include in simple_fields for database storage # Store as JSONB - include in simple_fields for database storage
simple_fields[field_name] = value simpleFields[fieldName] = value
elif isinstance(value, (str, int, float, bool, type(None))): elif isinstance(value, (str, int, float, bool, type(None))):
# Simple scalar types # Simple scalar types
simple_fields[field_name] = value simpleFields[fieldName] = value
else: else:
# Complex objects that should be filtered out # Complex objects that should be filtered out
object_fields[field_name] = value objectFields[fieldName] = value
else: else:
# Field not in model - treat as scalar if simple, otherwise filter out # Field not in model - treat as scalar if simple, otherwise filter out
if isinstance(value, (str, int, float, bool, type(None))): if isinstance(value, (str, int, float, bool, type(None))):
simple_fields[field_name] = value simpleFields[fieldName] = value
else: else:
object_fields[field_name] = value objectFields[fieldName] = value
return simple_fields, object_fields return simpleFields, objectFields
def _initializeServices(self): def _initializeServices(self):
pass pass
@ -240,8 +240,8 @@ class ChatObjects:
currentAction=workflow.get("currentAction", 0), currentAction=workflow.get("currentAction", 0),
totalTasks=workflow.get("totalTasks", 0), totalTasks=workflow.get("totalTasks", 0),
totalActions=workflow.get("totalActions", 0), totalActions=workflow.get("totalActions", 0),
lastActivity=workflow.get("lastActivity", get_utc_timestamp()), lastActivity=workflow.get("lastActivity", getUtcTimestamp()),
startedAt=workflow.get("startedAt", get_utc_timestamp()), startedAt=workflow.get("startedAt", getUtcTimestamp()),
logs=logs, logs=logs,
messages=messages, messages=messages,
stats=stats, stats=stats,
@ -257,7 +257,7 @@ class ChatObjects:
raise PermissionError("No permission to create workflows") raise PermissionError("No permission to create workflows")
# Set timestamp if not present # Set timestamp if not present
currentTime = get_utc_timestamp() currentTime = getUtcTimestamp()
if "startedAt" not in workflowData: if "startedAt" not in workflowData:
workflowData["startedAt"] = currentTime workflowData["startedAt"] = currentTime
@ -265,10 +265,10 @@ class ChatObjects:
workflowData["lastActivity"] = currentTime workflowData["lastActivity"] = currentTime
# Use generic field separation based on ChatWorkflow model # Use generic field separation based on ChatWorkflow model
simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
# Create workflow in database # Create workflow in database
created = self.db.recordCreate(ChatWorkflow, simple_fields) created = self.db.recordCreate(ChatWorkflow, simpleFields)
# Convert to ChatWorkflow model (empty related data for new workflow) # Convert to ChatWorkflow model (empty related data for new workflow)
@ -302,13 +302,13 @@ class ChatObjects:
raise PermissionError(f"No permission to update workflow {workflowId}") raise PermissionError(f"No permission to update workflow {workflowId}")
# Use generic field separation based on ChatWorkflow model # Use generic field separation based on ChatWorkflow model
simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
# Set update time for main workflow # Set update time for main workflow
simple_fields["lastActivity"] = get_utc_timestamp() simpleFields["lastActivity"] = getUtcTimestamp()
# Update main workflow in database # Update main workflow in database
updated = self.db.recordModify(ChatWorkflow, workflowId, simple_fields) updated = self.db.recordModify(ChatWorkflow, workflowId, simpleFields)
# Removed cascade writes for logs/messages/stats during workflow update. # Removed cascade writes for logs/messages/stats during workflow update.
# CUD for child entities must be executed via dedicated service methods. # CUD for child entities must be executed via dedicated service methods.
@ -423,7 +423,7 @@ class ChatObjects:
role=msg.get("role", "assistant"), role=msg.get("role", "assistant"),
status=msg.get("status", "step"), status=msg.get("status", "step"),
sequenceNr=msg.get("sequenceNr", 0), sequenceNr=msg.get("sequenceNr", 0),
publishedAt=msg.get("publishedAt", get_utc_timestamp()), publishedAt=msg.get("publishedAt", getUtcTimestamp()),
success=msg.get("success"), success=msg.get("success"),
actionId=msg.get("actionId"), actionId=msg.get("actionId"),
actionMethod=msg.get("actionMethod"), actionMethod=msg.get("actionMethod"),
@ -490,20 +490,30 @@ class ChatObjects:
messageData["actionNumber"] = workflow.currentAction messageData["actionNumber"] = workflow.currentAction
# Use generic field separation based on ChatMessage model # Use generic field separation based on ChatMessage model
simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
# Handle documents separately - they will be stored in normalized documents table # Handle documents separately - they will be stored in normalized documents table
documents_to_create = object_fields.get("documents", []) documents_to_create = objectFields.get("documents", [])
# Create message in normalized table using only simple fields # Create message in normalized table using only simple fields
createdMessage = self.db.recordCreate(ChatMessage, simple_fields) createdMessage = self.db.recordCreate(ChatMessage, simpleFields)
# Create documents in normalized documents table # Create documents in normalized documents table
created_documents = [] created_documents = []
for doc_data in documents_to_create: for doc_data in documents_to_create:
# Use the document data directly # Normalize to plain dict before assignment
doc_dict = doc_data if isinstance(doc_data, ChatDocument):
doc_dict = doc_data.model_dump()
elif isinstance(doc_data, dict):
doc_dict = dict(doc_data)
else:
# Attempt to coerce to ChatDocument then dump
try:
doc_dict = ChatDocument(**doc_data).model_dump()
except Exception:
logger.error("Invalid document data type for message creation")
continue
doc_dict["messageId"] = createdMessage["id"] doc_dict["messageId"] = createdMessage["id"]
created_doc = self.createDocument(doc_dict) created_doc = self.createDocument(doc_dict)
@ -522,8 +532,8 @@ class ChatObjects:
role=createdMessage.get("role", "assistant"), role=createdMessage.get("role", "assistant"),
status=createdMessage.get("status", "step"), status=createdMessage.get("status", "step"),
sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number
publishedAt=createdMessage.get("publishedAt", get_utc_timestamp()), publishedAt=createdMessage.get("publishedAt", getUtcTimestamp()),
stats=object_fields.get("stats"), # Use stats from object_fields stats=objectFields.get("stats"), # Use stats from objectFields
roundNumber=createdMessage.get("roundNumber"), roundNumber=createdMessage.get("roundNumber"),
taskNumber=createdMessage.get("taskNumber"), taskNumber=createdMessage.get("taskNumber"),
actionNumber=createdMessage.get("actionNumber"), actionNumber=createdMessage.get("actionNumber"),
@ -588,31 +598,41 @@ class ChatObjects:
raise PermissionError(f"No permission to modify workflow {workflowId}") raise PermissionError(f"No permission to modify workflow {workflowId}")
# Use generic field separation based on ChatMessage model # Use generic field separation based on ChatMessage model
simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
# Ensure required fields present # Ensure required fields present
for key in ["role", "agentName"]: for key in ["role", "agentName"]:
if key not in simple_fields and key not in existingMessage: if key not in simpleFields and key not in existingMessage:
simple_fields[key] = "assistant" if key == "role" else "" simpleFields[key] = "assistant" if key == "role" else ""
# Ensure ID is in the dataset # Ensure ID is in the dataset
if 'id' not in simple_fields: if 'id' not in simpleFields:
simple_fields['id'] = messageId simpleFields['id'] = messageId
# Convert createdAt to startedAt if needed # Convert createdAt to startedAt if needed
if "createdAt" in simple_fields and "startedAt" not in simple_fields: if "createdAt" in simpleFields and "startedAt" not in simpleFields:
simple_fields["startedAt"] = simple_fields["createdAt"] simpleFields["startedAt"] = simpleFields["createdAt"]
del simple_fields["createdAt"] del simpleFields["createdAt"]
# Update the message with simple fields only # Update the message with simple fields only
updatedMessage = self.db.recordModify(ChatMessage, messageId, simple_fields) updatedMessage = self.db.recordModify(ChatMessage, messageId, simpleFields)
# Handle object field updates (documents, stats) inline # Handle object field updates (documents, stats) inline
if 'documents' in object_fields: if 'documents' in objectFields:
documents_data = object_fields['documents'] documents_data = objectFields['documents']
try: try:
for doc_data in documents_data: for doc_data in documents_data:
doc_dict = doc_data # Normalize to dict before mutation
if isinstance(doc_data, ChatDocument):
doc_dict = doc_data.model_dump()
elif isinstance(doc_data, dict):
doc_dict = dict(doc_data)
else:
try:
doc_dict = ChatDocument(**doc_data).model_dump()
except Exception:
logger.error("Invalid document data type for message update")
continue
doc_dict["messageId"] = messageId doc_dict["messageId"] = messageId
self.createDocument(doc_dict) self.createDocument(doc_dict)
except Exception as e: except Exception as e:
@ -732,11 +752,9 @@ class ChatObjects:
def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument: def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument:
"""Creates a document for a message in normalized table.""" """Creates a document for a message in normalized table."""
try: try:
# Validate document data # Validate and normalize document data to dict
document = ChatDocument(**documentData) document = ChatDocument(**documentData)
created = self.db.recordCreate(ChatDocument, document.model_dump())
# Create document in normalized table
created = self.db.recordCreate(ChatDocument, document)
return ChatDocument(**created) return ChatDocument(**created)
@ -785,7 +803,7 @@ class ChatObjects:
# Make sure required fields are present # Make sure required fields are present
if "timestamp" not in logData: if "timestamp" not in logData:
logData["timestamp"] = get_utc_timestamp() logData["timestamp"] = getUtcTimestamp()
# Add status information if not present # Add status information if not present
if "status" not in logData and "type" in logData: if "status" not in logData and "type" in logData:
@ -882,7 +900,7 @@ class ChatObjects:
messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId}) messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId})
for msg in messages: for msg in messages:
# Apply timestamp filtering in Python # Apply timestamp filtering in Python
msg_timestamp = msg.get("publishedAt", get_utc_timestamp()) msg_timestamp = msg.get("publishedAt", getUtcTimestamp())
if afterTimestamp is not None and msg_timestamp <= afterTimestamp: if afterTimestamp is not None and msg_timestamp <= afterTimestamp:
continue continue
@ -900,7 +918,7 @@ class ChatObjects:
role=msg.get("role", "assistant"), role=msg.get("role", "assistant"),
status=msg.get("status", "step"), status=msg.get("status", "step"),
sequenceNr=msg.get("sequenceNr", 0), sequenceNr=msg.get("sequenceNr", 0),
publishedAt=msg.get("publishedAt", get_utc_timestamp()), publishedAt=msg.get("publishedAt", getUtcTimestamp()),
success=msg.get("success"), success=msg.get("success"),
actionId=msg.get("actionId"), actionId=msg.get("actionId"),
actionMethod=msg.get("actionMethod"), actionMethod=msg.get("actionMethod"),
@ -923,7 +941,7 @@ class ChatObjects:
logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId})
for log in logs: for log in logs:
# Apply timestamp filtering in Python # Apply timestamp filtering in Python
log_timestamp = log.get("timestamp", get_utc_timestamp()) log_timestamp = log.get("timestamp", getUtcTimestamp())
if afterTimestamp is not None and log_timestamp <= afterTimestamp: if afterTimestamp is not None and log_timestamp <= afterTimestamp:
continue continue
@ -938,7 +956,7 @@ class ChatObjects:
stats = self.getStats(workflowId) stats = self.getStats(workflowId)
for stat in stats: for stat in stats:
# Apply timestamp filtering in Python # Apply timestamp filtering in Python
stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else get_utc_timestamp() stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else getUtcTimestamp()
if afterTimestamp is not None and stat_timestamp <= afterTimestamp: if afterTimestamp is not None and stat_timestamp <= afterTimestamp:
continue continue

View file

@ -16,7 +16,7 @@ from modules.datamodels.datamodelUtils import Prompt
from modules.datamodels.datamodelVoice import VoiceSettings from modules.datamodels.datamodelVoice import VoiceSettings
from modules.datamodels.datamodelUam import User, Mandate from modules.datamodels.datamodelUam import User, Mandate
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -466,7 +466,7 @@ class ComponentObjects:
# Ensure proper values, use defaults for invalid data # Ensure proper values, use defaults for invalid data
creationDate = file.get("creationDate") creationDate = file.get("creationDate")
if creationDate is None or not isinstance(creationDate, (int, float)) or creationDate <= 0: if creationDate is None or not isinstance(creationDate, (int, float)) or creationDate <= 0:
creationDate = get_utc_timestamp() creationDate = getUtcTimestamp()
fileName = file.get("fileName") fileName = file.get("fileName")
if not fileName or fileName == "None": if not fileName or fileName == "None":
@ -503,7 +503,7 @@ class ComponentObjects:
# Get creation date from record or use current time # Get creation date from record or use current time
creationDate = file.get("creationDate") creationDate = file.get("creationDate")
if not creationDate: if not creationDate:
creationDate = get_utc_timestamp() creationDate = getUtcTimestamp()
return FileItem( return FileItem(
id=file.get("id"), id=file.get("id"),
@ -881,9 +881,9 @@ class ComponentObjects:
# Ensure timestamps are set for validation # Ensure timestamps are set for validation
settings_data = filteredSettings[0] settings_data = filteredSettings[0]
if not settings_data.get("creationDate"): if not settings_data.get("creationDate"):
settings_data["creationDate"] = get_utc_timestamp() settings_data["creationDate"] = getUtcTimestamp()
if not settings_data.get("lastModified"): if not settings_data.get("lastModified"):
settings_data["lastModified"] = get_utc_timestamp() settings_data["lastModified"] = getUtcTimestamp()
return VoiceSettings(**settings_data) return VoiceSettings(**settings_data)
@ -931,7 +931,7 @@ class ComponentObjects:
raise ValueError(f"Voice settings not found for user {userId}") raise ValueError(f"Voice settings not found for user {userId}")
# Update lastModified timestamp # Update lastModified timestamp
updateData["lastModified"] = get_utc_timestamp() updateData["lastModified"] = getUtcTimestamp()
# Update voice settings record # Update voice settings record
success = self.db.recordModify(VoiceSettings, existingSettings.id, updateData) success = self.db.recordModify(VoiceSettings, existingSettings.id, updateData)

View file

@ -31,7 +31,7 @@ class TicketInterface:
self.task_sync_definition = task_sync_definition self.task_sync_definition = task_sync_definition
async def exportTicketsAsList(self) -> list[dict]: async def exportTicketsAsList(self) -> list[dict]:
tickets: list[dict] = await self.connector_ticket.read_tasks(limit=0) tickets: list[dict] = await self.connector_ticket.readTasks(limit=0)
transformed_tasks = self._transformTicketRecords(tickets, includePut=True) transformed_tasks = self._transformTicketRecords(tickets, includePut=True)
# Return plain dictionaries filtered by presence of ID # Return plain dictionaries filtered by presence of ID
rows: list[dict] = [] rows: list[dict] = []
@ -57,7 +57,7 @@ class TicketInterface:
if fields: if fields:
updates.append({"ID": task_id, "fields": fields}) updates.append({"ID": task_id, "fields": fields})
if updates: if updates:
await self.connector_ticket.write_tasks(updates) await self.connector_ticket.writeTasks(updates)
def _transformTicketRecords( def _transformTicketRecords(
self, tasks: list[dict], includePut: bool = False self, tasks: list[dict], includePut: bool = False

View file

@ -10,7 +10,7 @@ from typing import Dict, Any, Optional, List
from modules.connectors.connectorVoiceGoogle import ConnectorGoogleSpeech from modules.connectors.connectorVoiceGoogle import ConnectorGoogleSpeech
from modules.datamodels.datamodelVoice import VoiceSettings from modules.datamodels.datamodelVoice import VoiceSettings
from modules.datamodels.datamodelUam import User from modules.datamodels.datamodelUam import User
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -269,7 +269,7 @@ class VoiceObjects:
logger.info(f"Creating voice settings: {settingsData}") logger.info(f"Creating voice settings: {settingsData}")
# Add timestamps # Add timestamps
currentTime = get_utc_timestamp() currentTime = getUtcTimestamp()
settingsData["creationDate"] = currentTime settingsData["creationDate"] = currentTime
settingsData["lastModified"] = currentTime settingsData["lastModified"] = currentTime
@ -298,7 +298,7 @@ class VoiceObjects:
logger.info(f"Updating voice settings for user {userId}: {settingsData}") logger.info(f"Updating voice settings for user {userId}: {settingsData}")
# Add last modified timestamp # Add last modified timestamp
settingsData["lastModified"] = get_utc_timestamp() settingsData["lastModified"] = getUtcTimestamp()
# Create updated VoiceSettings object # Create updated VoiceSettings object
voiceSettings = VoiceSettings(**settingsData) voiceSettings = VoiceSettings(**settingsData)

View file

@ -18,7 +18,7 @@ from modules.datamodels.datamodelUam import User, UserConnection, AuthAuthority,
from modules.datamodels.datamodelSecurity import Token from modules.datamodels.datamodelSecurity import Token
from modules.security.auth import getCurrentUser, limiter from modules.security.auth import getCurrentUser, limiter
from modules.interfaces.interfaceDbAppObjects import getInterface from modules.interfaces.interfaceDbAppObjects import getInterface
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
# Configure logger # Configure logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -64,7 +64,7 @@ def get_token_status_for_connection(interface, connection_id: str) -> tuple[str,
if not expires_at: if not expires_at:
return "none", None return "none", None
current_time = get_utc_timestamp() current_time = getUtcTimestamp()
# Add 5 minute buffer for proactive refresh # Add 5 minute buffer for proactive refresh
buffer_time = 5 * 60 # 5 minutes in seconds buffer_time = 5 * 60 # 5 minutes in seconds
@ -247,7 +247,7 @@ async def update_connection(
setattr(connection, field, value) setattr(connection, field, value)
# Update lastChecked timestamp using UTC timestamp # Update lastChecked timestamp using UTC timestamp
connection.lastChecked = get_utc_timestamp() connection.lastChecked = getUtcTimestamp()
# Update connection - models now handle timestamp serialization automatically # Update connection - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connectionId, connection.model_dump()) interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
@ -382,7 +382,7 @@ async def disconnect_service(
# Update connection status # Update connection status
connection.status = ConnectionStatus.INACTIVE connection.status = ConnectionStatus.INACTIVE
connection.lastChecked = get_utc_timestamp() connection.lastChecked = getUtcTimestamp()
# Update connection record - models now handle timestamp serialization automatically # Update connection record - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connectionId, connection.model_dump()) interface.db.recordModify(UserConnection, connectionId, connection.model_dump())

View file

@ -35,7 +35,7 @@ async def get_neutralization_config(
"""Get data neutralization configuration""" """Get data neutralization configuration"""
try: try:
service = NeutralizationPlayground(currentUser) service = NeutralizationPlayground(currentUser)
config = service.get_config() config = service.getConfig()
if not config: if not config:
# Return default config instead of 404 # Return default config instead of 404
@ -69,7 +69,7 @@ async def save_neutralization_config(
"""Save or update data neutralization configuration""" """Save or update data neutralization configuration"""
try: try:
service = NeutralizationPlayground(currentUser) service = NeutralizationPlayground(currentUser)
config = service.save_config(config_data) config = service.saveConfig(config_data)
return config return config
@ -99,7 +99,7 @@ async def neutralize_text(
) )
service = NeutralizationPlayground(currentUser) service = NeutralizationPlayground(currentUser)
result = service.neutralize_text(text, file_id) result = service.neutralizeText(text, file_id)
return result return result
@ -130,7 +130,7 @@ async def resolve_text(
) )
service = NeutralizationPlayground(currentUser) service = NeutralizationPlayground(currentUser)
resolved_text = service.resolve_text(text) resolved_text = service.resolveText(text)
return {"resolved_text": resolved_text} return {"resolved_text": resolved_text}
@ -153,7 +153,7 @@ async def get_neutralization_attributes(
"""Get neutralization attributes, optionally filtered by file ID""" """Get neutralization attributes, optionally filtered by file ID"""
try: try:
service = NeutralizationPlayground(currentUser) service = NeutralizationPlayground(currentUser)
attributes = service.get_attributes(fileId) attributes = service.getAttributes(fileId)
return attributes return attributes
@ -183,7 +183,7 @@ async def process_sharepoint_files(
) )
service = NeutralizationPlayground(currentUser) service = NeutralizationPlayground(currentUser)
result = await service.process_sharepoint_files(source_path, target_path) result = await service.processSharepointFiles(source_path, target_path)
return result return result
@ -212,7 +212,7 @@ async def batch_process_files(
) )
service = NeutralizationPlayground(currentUser) service = NeutralizationPlayground(currentUser)
result = service.batch_neutralize_files(files_data) result = service.batchNeutralizeFiles(files_data)
return result return result
@ -234,7 +234,7 @@ async def get_neutralization_stats(
"""Get neutralization processing statistics""" """Get neutralization processing statistics"""
try: try:
service = NeutralizationPlayground(currentUser) service = NeutralizationPlayground(currentUser)
stats = service.get_processing_stats() stats = service.getProcessingStats()
return stats return stats
@ -255,7 +255,7 @@ async def cleanup_file_attributes(
"""Clean up neutralization attributes for a specific file""" """Clean up neutralization attributes for a specific file"""
try: try:
service = NeutralizationPlayground(currentUser) service = NeutralizationPlayground(currentUser)
success = service.cleanup_file_attributes(fileId) success = service.cleanupFileAttributes(fileId)
if success: if success:
return {"message": f"Successfully cleaned up attributes for file {fileId}"} return {"message": f"Successfully cleaned up attributes for file {fileId}"}

View file

@ -181,9 +181,9 @@ async def reset_user_password(
# Log password reset # Log password reset
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_security_event( audit_logger.logSecurityEvent(
user_id=str(currentUser.id), userId=str(currentUser.id),
mandate_id=str(currentUser.mandateId), mandateId=str(currentUser.mandateId),
action="password_reset", action="password_reset",
details=f"Reset password for user {userId}" details=f"Reset password for user {userId}"
) )
@ -257,9 +257,9 @@ async def change_password(
# Log password change # Log password change
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_security_event( audit_logger.logSecurityEvent(
user_id=str(currentUser.id), userId=str(currentUser.id),
mandate_id=str(currentUser.mandateId), mandateId=str(currentUser.mandateId),
action="password_change", action="password_change",
details="User changed their own password" details="User changed their own password"
) )

View file

@ -14,7 +14,7 @@ from modules.shared.configuration import APP_CONFIG
from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface
from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
from modules.security.auth import getCurrentUser, limiter from modules.security.auth import getCurrentUser, limiter
from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
# Configure logger # Configure logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -356,7 +356,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenRefresh=token_response.get("refresh_token", ""), tokenRefresh=token_response.get("refresh_token", ""),
tokenType="bearer", tokenType="bearer",
expiresAt=jwt_expires_at.timestamp(), expiresAt=jwt_expires_at.timestamp(),
createdAt=get_utc_timestamp() createdAt=getUtcTimestamp()
) )
# Save access token (no connectionId) # Save access token (no connectionId)
@ -460,8 +460,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
logger.info(f"Updating connection {connection_id} for user {user.username}") logger.info(f"Updating connection {connection_id} for user {user.username}")
# Update connection with external service details # Update connection with external service details
connection.status = ConnectionStatus.ACTIVE connection.status = ConnectionStatus.ACTIVE
connection.lastChecked = get_utc_timestamp() connection.lastChecked = getUtcTimestamp()
connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0) connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
connection.externalId = user_info.get("id") connection.externalId = user_info.get("id")
connection.externalUsername = user_info.get("email") connection.externalUsername = user_info.get("email")
connection.externalEmail = user_info.get("email") connection.externalEmail = user_info.get("email")
@ -479,8 +479,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=token_response["access_token"], tokenAccess=token_response["access_token"],
tokenRefresh=token_response.get("refresh_token", ""), tokenRefresh=token_response.get("refresh_token", ""),
tokenType=token_response.get("token_type", "bearer"), tokenType=token_response.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)), expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
createdAt=get_utc_timestamp() createdAt=getUtcTimestamp()
) )
interface.saveConnectionToken(token) interface.saveConnectionToken(token)
@ -498,8 +498,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
id: '{connection.id}', id: '{connection.id}',
status: 'connected', status: 'connected',
type: 'google', type: 'google',
lastChecked: {get_utc_timestamp()}, lastChecked: {getUtcTimestamp()},
expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))} expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
}} }}
}}, '*'); }}, '*');
// Wait for message to be sent before closing // Wait for message to be sent before closing
@ -592,11 +592,11 @@ async def logout(
# Log successful logout # Log successful logout
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access( audit_logger.logUserAccess(
user_id=str(currentUser.id), userId=str(currentUser.id),
mandate_id=str(currentUser.mandateId), mandateId=str(currentUser.mandateId),
action="logout", action="logout",
success_info="google_auth_logout" successInfo="google_auth_logout"
) )
except Exception: except Exception:
# Don't fail if audit logging fails # Don't fail if audit logging fails
@ -726,12 +726,12 @@ async def refresh_token(
# Update the connection status and timing # Update the connection status and timing
google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt
google_connection.lastChecked = get_utc_timestamp() google_connection.lastChecked = getUtcTimestamp()
google_connection.status = ConnectionStatus.ACTIVE google_connection.status = ConnectionStatus.ACTIVE
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.model_dump()) appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.model_dump())
# Calculate time until expiration # Calculate time until expiration
current_time = get_utc_timestamp() current_time = getUtcTimestamp()
expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0 expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0
return { return {

View file

@ -131,11 +131,11 @@ async def login(
# Log successful login # Log successful login
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access( audit_logger.logUserAccess(
user_id=str(user.id), userId=str(user.id),
mandate_id=str(user.mandateId), mandateId=str(user.mandateId),
action="login", action="login",
success_info="local_auth_success" successInfo="local_auth_success"
) )
except Exception: except Exception:
# Don't fail if audit logging fails # Don't fail if audit logging fails
@ -159,11 +159,11 @@ async def login(
# Log failed login attempt # Log failed login attempt
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access( audit_logger.logUserAccess(
user_id="unknown", userId="unknown",
mandate_id="unknown", mandateId="unknown",
action="login", action="login",
success_info=f"failed: {error_msg}" successInfo=f"failed: {error_msg}"
) )
except Exception: except Exception:
# Don't fail if audit logging fails # Don't fail if audit logging fails
@ -367,11 +367,11 @@ async def logout(request: Request, response: Response, currentUser: User = Depen
# Log successful logout # Log successful logout
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access( audit_logger.logUserAccess(
user_id=str(currentUser.id), userId=str(currentUser.id),
mandate_id=str(currentUser.mandateId), mandateId=str(currentUser.mandateId),
action="logout", action="logout",
success_info=f"revoked_tokens: {revoked}" successInfo=f"revoked_tokens: {revoked}"
) )
except Exception: except Exception:
# Don't fail if audit logging fails # Don't fail if audit logging fails

View file

@ -16,7 +16,7 @@ from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatu
from modules.datamodels.datamodelSecurity import Token from modules.datamodels.datamodelSecurity import Token
from modules.security.auth import getCurrentUser, limiter from modules.security.auth import getCurrentUser, limiter
from modules.security.jwtService import createAccessToken from modules.security.jwtService import createAccessToken
from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
# Configure logger # Configure logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -199,8 +199,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=token_response["access_token"], tokenAccess=token_response["access_token"],
tokenRefresh=token_response.get("refresh_token", ""), tokenRefresh=token_response.get("refresh_token", ""),
tokenType=token_response.get("token_type", "bearer"), tokenType=token_response.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)), expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
createdAt=get_utc_timestamp() createdAt=getUtcTimestamp()
) )
# Save access token (no connectionId) # Save access token (no connectionId)
@ -225,7 +225,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=jwt_token, tokenAccess=jwt_token,
tokenType="bearer", tokenType="bearer",
expiresAt=jwt_expires_at.timestamp(), expiresAt=jwt_expires_at.timestamp(),
createdAt=get_utc_timestamp() createdAt=getUtcTimestamp()
) )
# Save JWT access token # Save JWT access token
@ -332,8 +332,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
logger.info(f"Updating connection {connection_id} for user {user.username}") logger.info(f"Updating connection {connection_id} for user {user.username}")
# Update connection with external service details # Update connection with external service details
connection.status = ConnectionStatus.ACTIVE connection.status = ConnectionStatus.ACTIVE
connection.lastChecked = get_utc_timestamp() connection.lastChecked = getUtcTimestamp()
connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0) connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
connection.externalId = user_info.get("id") connection.externalId = user_info.get("id")
connection.externalUsername = user_info.get("userPrincipalName") connection.externalUsername = user_info.get("userPrincipalName")
connection.externalEmail = user_info.get("mail") connection.externalEmail = user_info.get("mail")
@ -351,8 +351,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=token_response["access_token"], tokenAccess=token_response["access_token"],
tokenRefresh=token_response.get("refresh_token", ""), tokenRefresh=token_response.get("refresh_token", ""),
tokenType=token_response.get("token_type", "bearer"), tokenType=token_response.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)), expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
createdAt=get_utc_timestamp() createdAt=getUtcTimestamp()
) )
@ -373,8 +373,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
id: '{connection.id}', id: '{connection.id}',
status: 'connected', status: 'connected',
type: 'msft', type: 'msft',
lastChecked: {get_utc_timestamp()}, lastChecked: {getUtcTimestamp()},
expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))} expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
}} }}
}}, '*'); }}, '*');
// Wait for message to be sent before closing // Wait for message to be sent before closing
@ -467,11 +467,11 @@ async def logout(
# Log successful logout # Log successful logout
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access( audit_logger.logUserAccess(
user_id=str(currentUser.id), userId=str(currentUser.id),
mandate_id=str(currentUser.mandateId), mandateId=str(currentUser.mandateId),
action="logout", action="logout",
success_info="microsoft_auth_logout" successInfo="microsoft_auth_logout"
) )
except Exception: except Exception:
# Don't fail if audit logging fails # Don't fail if audit logging fails
@ -575,27 +575,27 @@ async def refresh_token(
from modules.security.tokenManager import TokenManager from modules.security.tokenManager import TokenManager
token_manager = TokenManager() token_manager = TokenManager()
refreshed_token = token_manager.refresh_token(current_token) refreshedToken = token_manager.refreshToken(current_token)
if refreshed_token: if refreshedToken:
# Save the new connection token (which will automatically replace old ones) # Save the new connection token (which will automatically replace old ones)
appInterface.saveConnectionToken(refreshed_token) appInterface.saveConnectionToken(refreshedToken)
# Update the connection's expiration time # Update the connection's expiration time
msft_connection.expiresAt = float(refreshed_token.expiresAt) msft_connection.expiresAt = float(refreshedToken.expiresAt)
msft_connection.lastChecked = get_utc_timestamp() msft_connection.lastChecked = getUtcTimestamp()
msft_connection.status = ConnectionStatus.ACTIVE msft_connection.status = ConnectionStatus.ACTIVE
# Save updated connection # Save updated connection
appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.model_dump()) appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.model_dump())
# Calculate time until expiration # Calculate time until expiration
current_time = get_utc_timestamp() current_time = getUtcTimestamp()
expires_in = int(refreshed_token.expiresAt - current_time) expiresIn = int(refreshedToken.expiresAt - current_time)
return { return {
"message": "Token refreshed successfully", "message": "Token refreshed successfully",
"expires_at": refreshed_token.expiresAt, "expires_at": refreshedToken.expiresAt,
"expires_in_seconds": expires_in "expires_in_seconds": expiresIn
} }
else: else:
raise HTTPException( raise HTTPException(

View file

@ -18,26 +18,26 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/voice-google", tags=["Voice Google"]) router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
# Store active WebSocket connections # Store active WebSocket connections
active_connections: Dict[str, WebSocket] = {} activeConnections: Dict[str, WebSocket] = {}
class ConnectionManager: class ConnectionManager:
def __init__(self): def __init__(self):
self.active_connections: List[WebSocket] = [] self.activeConnections: List[WebSocket] = []
async def connect(self, websocket: WebSocket, connection_id: str): async def connect(self, websocket: WebSocket, connectionId: str):
await websocket.accept() await websocket.accept()
self.active_connections.append(websocket) self.activeConnections.append(websocket)
active_connections[connection_id] = websocket activeConnections[connectionId] = websocket
logger.info(f"WebSocket connected: {connection_id}") logger.info(f"WebSocket connected: {connectionId}")
def disconnect(self, websocket: WebSocket, connection_id: str): def disconnect(self, websocket: WebSocket, connectionId: str):
if websocket in self.active_connections: if websocket in self.activeConnections:
self.active_connections.remove(websocket) self.activeConnections.remove(websocket)
if connection_id in active_connections: if connectionId in activeConnections:
del active_connections[connection_id] del activeConnections[connectionId]
logger.info(f"WebSocket disconnected: {connection_id}") logger.info(f"WebSocket disconnected: {connectionId}")
async def send_personal_message(self, message: dict, websocket: WebSocket): async def sendPersonalMessage(self, message: dict, websocket: WebSocket):
try: try:
await websocket.send_text(json.dumps(message)) await websocket.send_text(json.dumps(message))
except Exception as e: except Exception as e:
@ -45,10 +45,10 @@ class ConnectionManager:
manager = ConnectionManager() manager = ConnectionManager()
def get_voice_interface(current_user: User) -> VoiceObjects: def _getVoiceInterface(currentUser: User) -> VoiceObjects:
"""Get voice interface instance with user context.""" """Get voice interface instance with user context."""
try: try:
return getVoiceInterface(current_user) return getVoiceInterface(currentUser)
except Exception as e: except Exception as e:
logger.error(f"Failed to initialize voice interface: {e}") logger.error(f"Failed to initialize voice interface: {e}")
raise HTTPException( raise HTTPException(
@ -58,23 +58,23 @@ def get_voice_interface(current_user: User) -> VoiceObjects:
@router.post("/speech-to-text") @router.post("/speech-to-text")
async def speech_to_text( async def speech_to_text(
audio_file: UploadFile = File(...), audioFile: UploadFile = File(...),
language: str = Form("de-DE"), language: str = Form("de-DE"),
current_user: User = Depends(getCurrentUser) currentUser: User = Depends(getCurrentUser)
): ):
"""Convert speech to text using Google Cloud Speech-to-Text API.""" """Convert speech to text using Google Cloud Speech-to-Text API."""
try: try:
logger.info(f"🎤 Speech-to-text request: {audio_file.filename}, language: {language}") logger.info(f"🎤 Speech-to-text request: {audioFile.filename}, language: {language}")
# Read audio file # Read audio file
audio_content = await audio_file.read() audioContent = await audioFile.read()
logger.info(f"📊 Audio file size: {len(audio_content)} bytes") logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
# Get voice interface # Get voice interface
voice_interface = get_voice_interface(current_user) voiceInterface = _getVoiceInterface(currentUser)
# Validate audio format # Validate audio format
validation = voice_interface.validateAudioFormat(audio_content) validation = voiceInterface.validateAudioFormat(audioContent)
if not validation["valid"]: if not validation["valid"]:
raise HTTPException( raise HTTPException(
@ -83,8 +83,8 @@ async def speech_to_text(
) )
# Perform speech recognition # Perform speech recognition
result = await voice_interface.speechToText( result = await voiceInterface.speechToText(
audioContent=audio_content, audioContent=audioContent,
language=language language=language
) )
@ -95,7 +95,7 @@ async def speech_to_text(
"confidence": result["confidence"], "confidence": result["confidence"],
"language": result["language"], "language": result["language"],
"audio_info": { "audio_info": {
"size": len(audio_content), "size": len(audioContent),
"format": validation["format"], "format": validation["format"],
"estimated_duration": validation.get("estimated_duration", 0) "estimated_duration": validation.get("estimated_duration", 0)
} }
@ -118,13 +118,13 @@ async def speech_to_text(
@router.post("/translate") @router.post("/translate")
async def translate_text( async def translate_text(
text: str = Form(...), text: str = Form(...),
source_language: str = Form("de"), sourceLanguage: str = Form("de"),
target_language: str = Form("en"), targetLanguage: str = Form("en"),
current_user: User = Depends(getCurrentUser) currentUser: User = Depends(getCurrentUser)
): ):
"""Translate text using Google Cloud Translation API.""" """Translate text using Google Cloud Translation API."""
try: try:
logger.info(f"🌐 Translation request: '{text}' ({source_language} -> {target_language})") logger.info(f"🌐 Translation request: '{text}' ({sourceLanguage} -> {targetLanguage})")
if not text.strip(): if not text.strip():
raise HTTPException( raise HTTPException(
@ -133,13 +133,13 @@ async def translate_text(
) )
# Get voice interface # Get voice interface
voice_interface = get_voice_interface(current_user) voiceInterface = _getVoiceInterface(currentUser)
# Perform translation # Perform translation
result = await voice_interface.translateText( result = await voiceInterface.translateText(
text=text, text=text,
sourceLanguage=source_language, sourceLanguage=sourceLanguage,
targetLanguage=target_language targetLanguage=targetLanguage
) )
if result["success"]: if result["success"]:
@ -167,21 +167,21 @@ async def translate_text(
@router.post("/realtime-interpreter") @router.post("/realtime-interpreter")
async def realtime_interpreter( async def realtime_interpreter(
audio_file: UploadFile = File(...), audioFile: UploadFile = File(...),
from_language: str = Form("de-DE"), fromLanguage: str = Form("de-DE"),
to_language: str = Form("en-US"), toLanguage: str = Form("en-US"),
connection_id: str = Form(None), connectionId: str = Form(None),
current_user: User = Depends(getCurrentUser) currentUser: User = Depends(getCurrentUser)
): ):
"""Real-time interpreter: speech to translated text using Google Cloud APIs.""" """Real-time interpreter: speech to translated text using Google Cloud APIs."""
try: try:
logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}") logger.info(f"🔄 Real-time interpreter request: {audioFile.filename}")
logger.info(f" From: {from_language} -> To: {to_language}") logger.info(f" From: {fromLanguage} -> To: {toLanguage}")
logger.info(f" MIME type: {audio_file.content_type}") logger.info(f" MIME type: {audioFile.content_type}")
# Read audio file # Read audio file
audio_content = await audio_file.read() audioContent = await audioFile.read()
logger.info(f"📊 Audio file size: {len(audio_content)} bytes") logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
# Save audio file for debugging with correct extension # Save audio file for debugging with correct extension
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav" # file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
@ -192,10 +192,10 @@ async def realtime_interpreter(
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}") # logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
# Get voice interface # Get voice interface
voice_interface = get_voice_interface(current_user) voiceInterface = _getVoiceInterface(currentUser)
# Validate audio format # Validate audio format
validation = voice_interface.validateAudioFormat(audio_content) validation = voiceInterface.validateAudioFormat(audioContent)
if not validation["valid"]: if not validation["valid"]:
raise HTTPException( raise HTTPException(
@ -204,10 +204,10 @@ async def realtime_interpreter(
) )
# Perform complete pipeline: Speech-to-Text + Translation # Perform complete pipeline: Speech-to-Text + Translation
result = await voice_interface.speechToTranslatedText( result = await voiceInterface.speechToTranslatedText(
audioContent=audio_content, audioContent=audioContent,
fromLanguage=from_language, fromLanguage=fromLanguage,
toLanguage=to_language toLanguage=toLanguage
) )
if result["success"]: if result["success"]:
@ -223,7 +223,7 @@ async def realtime_interpreter(
"source_language": result["source_language"], "source_language": result["source_language"],
"target_language": result["target_language"], "target_language": result["target_language"],
"audio_info": { "audio_info": {
"size": len(audio_content), "size": len(audioContent),
"format": validation["format"], "format": validation["format"],
"estimated_duration": validation.get("estimated_duration", 0) "estimated_duration": validation.get("estimated_duration", 0)
} }
@ -249,7 +249,7 @@ async def text_to_speech(
text: str = Form(...), text: str = Form(...),
language: str = Form("de-DE"), language: str = Form("de-DE"),
voice: str = Form(None), voice: str = Form(None),
current_user: User = Depends(getCurrentUser) currentUser: User = Depends(getCurrentUser)
): ):
"""Convert text to speech using Google Cloud Text-to-Speech.""" """Convert text to speech using Google Cloud Text-to-Speech."""
try: try:
@ -261,8 +261,8 @@ async def text_to_speech(
detail="Empty text provided for text-to-speech" detail="Empty text provided for text-to-speech"
) )
voice_interface = get_voice_interface(current_user) voiceInterface = _getVoiceInterface(currentUser)
result = await voice_interface.textToSpeech( result = await voiceInterface.textToSpeech(
text=text, text=text,
languageCode=language, languageCode=language,
voiceName=voice voiceName=voice
@ -294,13 +294,13 @@ async def text_to_speech(
) )
@router.get("/languages") @router.get("/languages")
async def get_available_languages(current_user: User = Depends(getCurrentUser)): async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
"""Get available languages from Google Cloud Text-to-Speech.""" """Get available languages from Google Cloud Text-to-Speech."""
try: try:
logger.info("🌐 Getting available languages from Google Cloud TTS") logger.info("🌐 Getting available languages from Google Cloud TTS")
voice_interface = get_voice_interface(current_user) voiceInterface = _getVoiceInterface(currentUser)
result = await voice_interface.getAvailableLanguages() result = await voiceInterface.getAvailableLanguages()
if result["success"]: if result["success"]:
return { return {
@ -324,21 +324,21 @@ async def get_available_languages(current_user: User = Depends(getCurrentUser)):
@router.get("/voices") @router.get("/voices")
async def get_available_voices( async def get_available_voices(
language_code: Optional[str] = None, languageCode: Optional[str] = None,
current_user: User = Depends(getCurrentUser) currentUser: User = Depends(getCurrentUser)
): ):
"""Get available voices from Google Cloud Text-to-Speech.""" """Get available voices from Google Cloud Text-to-Speech."""
try: try:
logger.info(f"🎤 Getting available voices, language filter: {language_code}") logger.info(f"🎤 Getting available voices, language filter: {languageCode}")
voice_interface = get_voice_interface(current_user) voiceInterface = _getVoiceInterface(currentUser)
result = await voice_interface.getAvailableVoices(languageCode=language_code) result = await voiceInterface.getAvailableVoices(languageCode=languageCode)
if result["success"]: if result["success"]:
return { return {
"success": True, "success": True,
"voices": result["voices"], "voices": result["voices"],
"language_filter": language_code "language_filter": languageCode
} }
else: else:
raise HTTPException( raise HTTPException(
@ -356,11 +356,11 @@ async def get_available_voices(
) )
@router.get("/health") @router.get("/health")
async def health_check(current_user: User = Depends(getCurrentUser)): async def health_check(currentUser: User = Depends(getCurrentUser)):
"""Health check for Google Cloud voice services.""" """Health check for Google Cloud voice services."""
try: try:
voice_interface = get_voice_interface(current_user) voiceInterface = _getVoiceInterface(currentUser)
test_result = await voice_interface.healthCheck() test_result = await voiceInterface.healthCheck()
return test_result return test_result
@ -372,16 +372,16 @@ async def health_check(current_user: User = Depends(getCurrentUser)):
} }
@router.get("/settings") @router.get("/settings")
async def get_voice_settings(current_user: User = Depends(getCurrentUser)): async def get_voice_settings(currentUser: User = Depends(getCurrentUser)):
"""Get voice settings for the current user.""" """Get voice settings for the current user."""
try: try:
logger.info(f"Getting voice settings for user: {current_user.id}") logger.info(f"Getting voice settings for user: {currentUser.id}")
# Get voice interface # Get voice interface
voice_interface = get_voice_interface(current_user) voiceInterface = _getVoiceInterface(currentUser)
# Get or create voice settings for the user # Get or create voice settings for the user
voice_settings = voice_interface.getOrCreateVoiceSettings(current_user.id) voice_settings = voiceInterface.getOrCreateVoiceSettings(currentUser.id)
if voice_settings: if voice_settings:
# Return user settings # Return user settings
@ -425,16 +425,16 @@ async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
@router.post("/settings") @router.post("/settings")
async def save_voice_settings( async def save_voice_settings(
settings: Dict[str, Any] = Body(...), settings: Dict[str, Any] = Body(...),
current_user: User = Depends(getCurrentUser) currentUser: User = Depends(getCurrentUser)
): ):
"""Save voice settings for the current user.""" """Save voice settings for the current user."""
try: try:
logger.info(f"Saving voice settings for user: {current_user.id}") logger.info(f"Saving voice settings for user: {currentUser.id}")
logger.info(f"Settings: {settings}") logger.info(f"Settings: {settings}")
# Validate required settings # Validate required settings
required_fields = ["sttLanguage", "ttsLanguage", "ttsVoice"] requiredFields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
for field in required_fields: for field in requiredFields:
if field not in settings: if field not in settings:
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
@ -448,23 +448,23 @@ async def save_voice_settings(
settings["targetLanguage"] = "en-US" settings["targetLanguage"] = "en-US"
# Get voice interface # Get voice interface
voice_interface = get_voice_interface(current_user) voiceInterface = _getVoiceInterface(currentUser)
# Check if settings already exist for this user # Check if settings already exist for this user
existing_settings = voice_interface.getVoiceSettings(current_user.id) existing_settings = voiceInterface.getVoiceSettings(currentUser.id)
if existing_settings: if existing_settings:
# Update existing settings # Update existing settings
logger.info(f"Updating existing voice settings for user {current_user.id}") logger.info(f"Updating existing voice settings for user {currentUser.id}")
updated_settings = voice_interface.updateVoiceSettings(current_user.id, settings) updated_settings = voiceInterface.updateVoiceSettings(currentUser.id, settings)
logger.info(f"Voice settings updated for user {current_user.id}: {updated_settings}") logger.info(f"Voice settings updated for user {currentUser.id}: {updated_settings}")
else: else:
# Create new settings # Create new settings
logger.info(f"Creating new voice settings for user {current_user.id}") logger.info(f"Creating new voice settings for user {currentUser.id}")
# Add userId to settings # Add userId to settings
settings["userId"] = current_user.id settings["userId"] = currentUser.id
created_settings = voice_interface.createVoiceSettings(settings) created_settings = voiceInterface.createVoiceSettings(settings)
logger.info(f"Voice settings created for user {current_user.id}: {created_settings}") logger.info(f"Voice settings created for user {currentUser.id}: {created_settings}")
return { return {
"success": True, "success": True,
@ -486,25 +486,25 @@ async def save_voice_settings(
@router.websocket("/ws/realtime-interpreter") @router.websocket("/ws/realtime-interpreter")
async def websocket_realtime_interpreter( async def websocket_realtime_interpreter(
websocket: WebSocket, websocket: WebSocket,
user_id: str = "default", userId: str = "default",
from_language: str = "de-DE", fromLanguage: str = "de-DE",
to_language: str = "en-US" toLanguage: str = "en-US"
): ):
"""WebSocket endpoint for real-time voice interpretation""" """WebSocket endpoint for real-time voice interpretation"""
connection_id = f"realtime_{user_id}_{from_language}_{to_language}" connectionId = f"realtime_{userId}_{fromLanguage}_{toLanguage}"
try: try:
await manager.connect(websocket, connection_id) await manager.connect(websocket, connectionId)
# Send connection confirmation # Send connection confirmation
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "connected", "type": "connected",
"connection_id": connection_id, "connection_id": connectionId,
"message": "Connected to real-time interpreter" "message": "Connected to real-time interpreter"
}, websocket) }, websocket)
# Initialize voice interface # Initialize voice interface
voice_interface = get_voice_interface(User(id=user_id)) voiceInterface = _getVoiceInterface(User(id=userId))
while True: while True:
# Receive message from client # Receive message from client
@ -515,7 +515,7 @@ async def websocket_realtime_interpreter(
# Process audio chunk # Process audio chunk
try: try:
# Decode base64 audio data # Decode base64 audio data
audio_data = base64.b64decode(message["data"]) audioData = base64.b64decode(message["data"])
# For now, just acknowledge receipt # For now, just acknowledge receipt
# In a full implementation, this would: # In a full implementation, this would:
@ -524,9 +524,9 @@ async def websocket_realtime_interpreter(
# 3. Send partial results back # 3. Send partial results back
# 4. Handle translation # 4. Handle translation
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "audio_received", "type": "audio_received",
"chunk_size": len(audio_data), "chunk_size": len(audioData),
"timestamp": message.get("timestamp") "timestamp": message.get("timestamp")
}, websocket) }, websocket)
@ -539,7 +539,7 @@ async def websocket_realtime_interpreter(
elif message["type"] == "ping": elif message["type"] == "ping":
# Respond to ping # Respond to ping
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "pong", "type": "pong",
"timestamp": message.get("timestamp") "timestamp": message.get("timestamp")
}, websocket) }, websocket)
@ -548,32 +548,32 @@ async def websocket_realtime_interpreter(
logger.warning(f"Unknown message type: {message['type']}") logger.warning(f"Unknown message type: {message['type']}")
except WebSocketDisconnect: except WebSocketDisconnect:
manager.disconnect(websocket, connection_id) manager.disconnect(websocket, connectionId)
logger.info(f"Client disconnected: {connection_id}") logger.info(f"Client disconnected: {connectionId}")
except Exception as e: except Exception as e:
logger.error(f"WebSocket error: {e}") logger.error(f"WebSocket error: {e}")
manager.disconnect(websocket, connection_id) manager.disconnect(websocket, connectionId)
@router.websocket("/ws/speech-to-text") @router.websocket("/ws/speech-to-text")
async def websocket_speech_to_text( async def websocket_speech_to_text(
websocket: WebSocket, websocket: WebSocket,
user_id: str = "default", userId: str = "default",
language: str = "de-DE" language: str = "de-DE"
): ):
"""WebSocket endpoint for real-time speech-to-text""" """WebSocket endpoint for real-time speech-to-text"""
connection_id = f"stt_{user_id}_{language}" connectionId = f"stt_{userId}_{language}"
try: try:
await manager.connect(websocket, connection_id) await manager.connect(websocket, connectionId)
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "connected", "type": "connected",
"connection_id": connection_id, "connection_id": connectionId,
"message": "Connected to speech-to-text" "message": "Connected to speech-to-text"
}, websocket) }, websocket)
# Initialize voice interface # Initialize voice interface
voice_interface = get_voice_interface(User(id=user_id)) voiceInterface = _getVoiceInterface(User(id=userId))
while True: while True:
data = await websocket.receive_text() data = await websocket.receive_text()
@ -581,12 +581,12 @@ async def websocket_speech_to_text(
if message["type"] == "audio_chunk": if message["type"] == "audio_chunk":
try: try:
audio_data = base64.b64decode(message["data"]) audioData = base64.b64decode(message["data"])
# Process audio chunk # Process audio chunk
# This would integrate with Google Cloud Speech-to-Text streaming API # This would integrate with Google Cloud Speech-to-Text streaming API
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "transcription_result", "type": "transcription_result",
"text": "Audio chunk received", # Placeholder "text": "Audio chunk received", # Placeholder
"confidence": 0.95, "confidence": 0.95,
@ -595,39 +595,39 @@ async def websocket_speech_to_text(
except Exception as e: except Exception as e:
logger.error(f"Error processing audio: {e}") logger.error(f"Error processing audio: {e}")
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "error", "type": "error",
"error": f"Failed to process audio: {str(e)}" "error": f"Failed to process audio: {str(e)}"
}, websocket) }, websocket)
elif message["type"] == "ping": elif message["type"] == "ping":
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "pong", "type": "pong",
"timestamp": message.get("timestamp") "timestamp": message.get("timestamp")
}, websocket) }, websocket)
except WebSocketDisconnect: except WebSocketDisconnect:
manager.disconnect(websocket, connection_id) manager.disconnect(websocket, connectionId)
except Exception as e: except Exception as e:
logger.error(f"WebSocket error: {e}") logger.error(f"WebSocket error: {e}")
manager.disconnect(websocket, connection_id) manager.disconnect(websocket, connectionId)
@router.websocket("/ws/text-to-speech") @router.websocket("/ws/text-to-speech")
async def websocket_text_to_speech( async def websocket_text_to_speech(
websocket: WebSocket, websocket: WebSocket,
user_id: str = "default", userId: str = "default",
language: str = "de-DE", language: str = "de-DE",
voice: str = "de-DE-Wavenet-A" voice: str = "de-DE-Wavenet-A"
): ):
"""WebSocket endpoint for real-time text-to-speech""" """WebSocket endpoint for real-time text-to-speech"""
connection_id = f"tts_{user_id}_{language}_{voice}" connectionId = f"tts_{userId}_{language}_{voice}"
try: try:
await manager.connect(websocket, connection_id) await manager.connect(websocket, connectionId)
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "connected", "type": "connected",
"connection_id": connection_id, "connection_id": connectionId,
"message": "Connected to text-to-speech" "message": "Connected to text-to-speech"
}, websocket) }, websocket)
@ -643,7 +643,7 @@ async def websocket_text_to_speech(
# This would integrate with Google Cloud Text-to-Speech API # This would integrate with Google Cloud Text-to-Speech API
# For now, send a placeholder response # For now, send a placeholder response
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "audio_data", "type": "audio_data",
"audio": "base64_encoded_audio_here", # Placeholder "audio": "base64_encoded_audio_here", # Placeholder
"format": "mp3" "format": "mp3"
@ -651,19 +651,19 @@ async def websocket_text_to_speech(
except Exception as e: except Exception as e:
logger.error(f"Error processing text-to-speech: {e}") logger.error(f"Error processing text-to-speech: {e}")
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "error", "type": "error",
"error": f"Failed to process text: {str(e)}" "error": f"Failed to process text: {str(e)}"
}, websocket) }, websocket)
elif message["type"] == "ping": elif message["type"] == "ping":
await manager.send_personal_message({ await manager.sendPersonalMessage({
"type": "pong", "type": "pong",
"timestamp": message.get("timestamp") "timestamp": message.get("timestamp")
}, websocket) }, websocket)
except WebSocketDisconnect: except WebSocketDisconnect:
manager.disconnect(websocket, connection_id) manager.disconnect(websocket, connectionId)
except Exception as e: except Exception as e:
logger.error(f"WebSocket error: {e}") logger.error(f"WebSocket error: {e}")
manager.disconnect(websocket, connection_id) manager.disconnect(websocket, connectionId)

View file

@ -9,7 +9,7 @@ from fastapi import Response
from jose import jwt from jose import jwt
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_now from modules.shared.timezoneUtils import getUtcNow
# Config # Config
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET") SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
@ -30,7 +30,7 @@ def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> T
import uuid import uuid
toEncode["jti"] = str(uuid.uuid4()) toEncode["jti"] = str(uuid.uuid4())
expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)) expire = getUtcNow() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
toEncode.update({"exp": expire}) toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM) encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire return encodedJwt, expire
@ -44,7 +44,7 @@ def createRefreshToken(data: dict) -> Tuple[str, "datetime"]:
toEncode["jti"] = str(uuid.uuid4()) toEncode["jti"] = str(uuid.uuid4())
toEncode["type"] = "refresh" toEncode["type"] = "refresh"
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS) expire = getUtcNow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
toEncode.update({"exp": expire}) toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM) encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire return encodedJwt, expire

View file

@ -10,7 +10,7 @@ from typing import Optional, Dict, Any, Callable
from modules.datamodels.datamodelSecurity import Token from modules.datamodels.datamodelSecurity import Token
from modules.datamodels.datamodelUam import AuthAuthority from modules.datamodels.datamodelUam import AuthAuthority
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp from modules.shared.timezoneUtils import getUtcTimestamp, createExpirationTimestamp
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -27,54 +27,54 @@ class TokenManager:
self.google_client_id = APP_CONFIG.get("Service_GOOGLE_CLIENT_ID") self.google_client_id = APP_CONFIG.get("Service_GOOGLE_CLIENT_ID")
self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET") self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
def refresh_microsoft_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]: def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
"""Refresh Microsoft OAuth token using refresh token""" """Refresh Microsoft OAuth token using refresh token"""
try: try:
logger.debug(f"refresh_microsoft_token: Starting Microsoft token refresh for user {user_id}") logger.debug(f"refreshMicrosoftToken: Starting Microsoft token refresh for user {userId}")
logger.debug(f"refresh_microsoft_token: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}") logger.debug(f"refreshMicrosoftToken: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}")
if not self.msft_client_id or not self.msft_client_secret: if not self.msft_client_id or not self.msft_client_secret:
logger.error("Microsoft OAuth configuration not found") logger.error("Microsoft OAuth configuration not found")
return None return None
# Microsoft token refresh endpoint # Microsoft token refresh endpoint
token_url = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token" tokenUrl = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token"
logger.debug(f"refresh_microsoft_token: Using token URL: {token_url}") logger.debug(f"refreshMicrosoftToken: Using token URL: {tokenUrl}")
# Prepare refresh request # Prepare refresh request
data = { data = {
"client_id": self.msft_client_id, "client_id": self.msft_client_id,
"client_secret": self.msft_client_secret, "client_secret": self.msft_client_secret,
"grant_type": "refresh_token", "grant_type": "refresh_token",
"refresh_token": refresh_token, "refresh_token": refreshToken,
"scope": "Mail.ReadWrite Mail.Send Mail.ReadWrite.Shared User.Read" "scope": "Mail.ReadWrite Mail.Send Mail.ReadWrite.Shared User.Read"
} }
logger.debug(f"refresh_microsoft_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})") logger.debug(f"refreshMicrosoftToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
# Make refresh request # Make refresh request
with httpx.Client(timeout=30.0) as client: with httpx.Client(timeout=30.0) as client:
logger.debug(f"refresh_microsoft_token: Making HTTP request to Microsoft OAuth endpoint") logger.debug(f"refreshMicrosoftToken: Making HTTP request to Microsoft OAuth endpoint")
response = client.post(token_url, data=data) response = client.post(tokenUrl, data=data)
logger.debug(f"refresh_microsoft_token: HTTP response status: {response.status_code}") logger.debug(f"refreshMicrosoftToken: HTTP response status: {response.status_code}")
if response.status_code == 200: if response.status_code == 200:
token_data = response.json() tokenData = response.json()
logger.debug(f"refresh_microsoft_token: Token refresh successful, creating new token") logger.debug(f"refreshMicrosoftToken: Token refresh successful, creating new token")
# Create new token # Create new token
new_token = Token( newToken = Token(
userId=user_id, userId=userId,
authority=AuthAuthority.MSFT, authority=AuthAuthority.MSFT,
connectionId=old_token.connectionId, # Preserve connection ID connectionId=oldToken.connectionId, # Preserve connection ID
tokenAccess=token_data["access_token"], tokenAccess=tokenData["access_token"],
tokenRefresh=token_data.get("refresh_token", refresh_token), # Keep old refresh token if new one not provided tokenRefresh=tokenData.get("refresh_token", refreshToken), # Keep old refresh token if new one not provided
tokenType=token_data.get("token_type", "bearer"), tokenType=tokenData.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)), expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
createdAt=get_utc_timestamp() createdAt=getUtcTimestamp()
) )
logger.debug(f"refresh_microsoft_token: New token created with ID: {new_token.id}") logger.debug(f"refreshMicrosoftToken: New token created with ID: {newToken.id}")
return new_token return newToken
else: else:
logger.error(f"Failed to refresh Microsoft token: {response.status_code} - {response.text}") logger.error(f"Failed to refresh Microsoft token: {response.status_code} - {response.text}")
return None return None
@ -83,70 +83,70 @@ class TokenManager:
logger.error(f"Error refreshing Microsoft token: {str(e)}") logger.error(f"Error refreshing Microsoft token: {str(e)}")
return None return None
def refresh_google_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]: def refreshGoogleToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
"""Refresh Google OAuth token using refresh token""" """Refresh Google OAuth token using refresh token"""
try: try:
logger.debug(f"refresh_google_token: Starting Google token refresh for user {user_id}") logger.debug(f"refreshGoogleToken: Starting Google token refresh for user {userId}")
logger.debug(f"refresh_google_token: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}") logger.debug(f"refreshGoogleToken: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}")
if not self.google_client_id or not self.google_client_secret: if not self.google_client_id or not self.google_client_secret:
logger.error("Google OAuth configuration not found") logger.error("Google OAuth configuration not found")
return None return None
# Google token refresh endpoint # Google token refresh endpoint
token_url = "https://oauth2.googleapis.com/token" tokenUrl = "https://oauth2.googleapis.com/token"
logger.debug(f"refresh_google_token: Using token URL: {token_url}") logger.debug(f"refreshGoogleToken: Using token URL: {tokenUrl}")
# Prepare refresh request # Prepare refresh request
data = { data = {
"client_id": self.google_client_id, "client_id": self.google_client_id,
"client_secret": self.google_client_secret, "client_secret": self.google_client_secret,
"grant_type": "refresh_token", "grant_type": "refresh_token",
"refresh_token": refresh_token "refresh_token": refreshToken
} }
logger.debug(f"refresh_google_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})") logger.debug(f"refreshGoogleToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
# Make refresh request # Make refresh request
with httpx.Client(timeout=30.0) as client: with httpx.Client(timeout=30.0) as client:
logger.debug(f"refresh_google_token: Making HTTP request to Google OAuth endpoint") logger.debug(f"refreshGoogleToken: Making HTTP request to Google OAuth endpoint")
response = client.post(token_url, data=data) response = client.post(tokenUrl, data=data)
logger.debug(f"refresh_google_token: HTTP response status: {response.status_code}") logger.debug(f"refreshGoogleToken: HTTP response status: {response.status_code}")
if response.status_code == 200: if response.status_code == 200:
token_data = response.json() tokenData = response.json()
logger.debug(f"refresh_google_token: Token refresh successful, creating new token") logger.debug(f"refreshGoogleToken: Token refresh successful, creating new token")
# Validate the response contains required fields # Validate the response contains required fields
if "access_token" not in token_data: if "access_token" not in tokenData:
logger.error("Google token refresh response missing access_token") logger.error("Google token refresh response missing access_token")
return None return None
# Create new token # Create new token
new_token = Token( newToken = Token(
userId=user_id, userId=userId,
authority=AuthAuthority.GOOGLE, authority=AuthAuthority.GOOGLE,
connectionId=old_token.connectionId, # Preserve connection ID connectionId=oldToken.connectionId, # Preserve connection ID
tokenAccess=token_data["access_token"], tokenAccess=tokenData["access_token"],
tokenRefresh=token_data.get("refresh_token", refresh_token), # Use new refresh token if provided tokenRefresh=tokenData.get("refresh_token", refreshToken), # Use new refresh token if provided
tokenType=token_data.get("token_type", "bearer"), tokenType=tokenData.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)), expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
createdAt=get_utc_timestamp() createdAt=getUtcTimestamp()
) )
logger.debug(f"refresh_google_token: New token created with ID: {new_token.id}") logger.debug(f"refreshGoogleToken: New token created with ID: {newToken.id}")
return new_token return newToken
else: else:
error_details = response.text errorDetails = response.text
logger.error(f"Failed to refresh Google token: {response.status_code} - {error_details}") logger.error(f"Failed to refresh Google token: {response.status_code} - {errorDetails}")
# Handle specific error cases # Handle specific error cases
if response.status_code == 400: if response.status_code == 400:
try: try:
error_data = response.json() errorData = response.json()
error_code = error_data.get("error") errorCode = errorData.get("error")
if error_code == "invalid_grant": if errorCode == "invalid_grant":
logger.warning("Google refresh token is invalid or expired - user needs to re-authenticate") logger.warning("Google refresh token is invalid or expired - user needs to re-authenticate")
elif error_code == "invalid_client": elif errorCode == "invalid_client":
logger.error("Google OAuth client configuration is invalid") logger.error("Google OAuth client configuration is invalid")
except: except:
pass pass
@ -157,55 +157,55 @@ class TokenManager:
logger.error(f"Error refreshing Google token: {str(e)}") logger.error(f"Error refreshing Google token: {str(e)}")
return None return None
def refresh_token(self, old_token: Token) -> Optional[Token]: def refreshToken(self, oldToken: Token) -> Optional[Token]:
"""Refresh an expired token using the appropriate OAuth service""" """Refresh an expired token using the appropriate OAuth service"""
try: try:
logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}") logger.debug(f"refreshToken: Starting refresh for token {oldToken.id}, authority: {oldToken.authority}")
logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}") logger.debug(f"refreshToken: Token details: userId={oldToken.userId}, connectionId={oldToken.connectionId}, hasRefreshToken={bool(oldToken.tokenRefresh)}")
# Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly # Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly
# Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed # Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed
try: try:
now_ts = get_utc_timestamp() nowTs = getUtcTimestamp()
created_ts = float(old_token.createdAt) if old_token.createdAt is not None else 0.0 createdTs = float(oldToken.createdAt) if oldToken.createdAt is not None else 0.0
seconds_since_last_refresh = now_ts - created_ts secondsSinceLastRefresh = nowTs - createdTs
if seconds_since_last_refresh < 10 * 60: if secondsSinceLastRefresh < 10 * 60:
logger.info( logger.info(
f"refresh_token: Skipping refresh for connection {old_token.connectionId} due to cooldown. " f"refreshToken: Skipping refresh for connection {oldToken.connectionId} due to cooldown. "
f"Last refresh {int(seconds_since_last_refresh)}s ago (< 600s)." f"Last refresh {int(secondsSinceLastRefresh)}s ago (< 600s)."
) )
# Return the existing token to avoid caller errors while preventing provider rate limits # Return the existing token to avoid caller errors while preventing provider rate limits
return old_token return oldToken
except Exception: except Exception:
# If any issue reading timestamps, proceed with normal refresh to be safe # If any issue reading timestamps, proceed with normal refresh to be safe
pass pass
if not old_token.tokenRefresh: if not oldToken.tokenRefresh:
logger.warning(f"No refresh token available for {old_token.authority}") logger.warning(f"No refresh token available for {oldToken.authority}")
return None return None
# Route to appropriate refresh method # Route to appropriate refresh method
if old_token.authority == AuthAuthority.MSFT: if oldToken.authority == AuthAuthority.MSFT:
logger.debug(f"refresh_token: Refreshing Microsoft token") logger.debug(f"refreshToken: Refreshing Microsoft token")
return self.refresh_microsoft_token(old_token.tokenRefresh, old_token.userId, old_token) return self.refreshMicrosoftToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
elif old_token.authority == AuthAuthority.GOOGLE: elif oldToken.authority == AuthAuthority.GOOGLE:
logger.debug(f"refresh_token: Refreshing Google token") logger.debug(f"refreshToken: Refreshing Google token")
return self.refresh_google_token(old_token.tokenRefresh, old_token.userId, old_token) return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
else: else:
logger.warning(f"Unknown authority for token refresh: {old_token.authority}") logger.warning(f"Unknown authority for token refresh: {oldToken.authority}")
return None return None
except Exception as e: except Exception as e:
logger.error(f"Error refreshing token: {str(e)}") logger.error(f"Error refreshing token: {str(e)}")
return None return None
def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]: def ensureFreshToken(self, token: Token, *, secondsBeforeExpiry: int = 30 * 60, saveCallback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
"""Ensure a token is fresh; refresh if expiring within threshold. """Ensure a token is fresh; refresh if expiring within threshold.
Args: Args:
token: Existing token to validate/refresh. token: Existing token to validate/refresh.
seconds_before_expiry: Threshold window to proactively refresh. secondsBeforeExpiry: Threshold window to proactively refresh.
save_callback: Optional function to persist a refreshed token. saveCallback: Optional function to persist a refreshed token.
Returns: Returns:
A fresh token (refreshed or original) or None if refresh failed. A fresh token (refreshed or original) or None if refresh failed.
@ -214,31 +214,31 @@ class TokenManager:
if token is None: if token is None:
return None return None
now_ts = get_utc_timestamp() nowTs = getUtcTimestamp()
expires_at = token.expiresAt or 0 expiresAt = token.expiresAt or 0
# If token expires within the threshold, try to refresh # If token expires within the threshold, try to refresh
if expires_at and expires_at < (now_ts + seconds_before_expiry): if expiresAt and expiresAt < (nowTs + secondsBeforeExpiry):
logger.info( logger.info(
f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon " f"ensureFreshToken: Token for connection {token.connectionId} expiring soon "
f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh." f"(in {max(0, expiresAt - nowTs)}s). Attempting proactive refresh."
) )
refreshed = self.refresh_token(token) refreshed = self.refreshToken(token)
if refreshed: if refreshed:
if save_callback is not None: if saveCallback is not None:
try: try:
save_callback(refreshed) saveCallback(refreshed)
except Exception as e: except Exception as e:
logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}") logger.warning(f"ensureFreshToken: Failed to persist refreshed token: {e}")
return refreshed return refreshed
else: else:
logger.warning("ensure_fresh_token: Token refresh failed") logger.warning("ensureFreshToken: Token refresh failed")
return None return None
# Token is sufficiently fresh # Token is sufficiently fresh
return token return token
except Exception as e: except Exception as e:
logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}") logger.error(f"ensureFreshToken: Error ensuring fresh token: {e}")
return None return None
# Convenience wrapper to fetch and ensure fresh token for a connection via interface layer # Convenience wrapper to fetch and ensure fresh token for a connection via interface layer
@ -256,10 +256,10 @@ class TokenManager:
token = interfaceDbApp.getConnectionToken(connectionId) token = interfaceDbApp.getConnectionToken(connectionId)
if not token: if not token:
return None return None
return self.ensure_fresh_token( return self.ensureFreshToken(
token, token,
seconds_before_expiry=secondsBeforeExpiry, secondsBeforeExpiry=secondsBeforeExpiry,
save_callback=lambda t: interfaceDbApp.saveConnectionToken(t) saveCallback=lambda t: interfaceDbApp.saveConnectionToken(t)
) )
except Exception as e: except Exception as e:
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}") logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")

View file

@ -11,7 +11,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
from typing import Callable from typing import Callable
import asyncio import asyncio
from modules.security.tokenRefreshService import token_refresh_service from modules.security.tokenRefreshService import token_refresh_service
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -135,7 +135,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
try: try:
# Perform proactive refresh in background # Perform proactive refresh in background
asyncio.create_task(self._proactive_refresh_tokens(user_id)) asyncio.create_task(self._proactive_refresh_tokens(user_id))
self.last_check[user_id] = get_utc_timestamp() self.last_check[user_id] = getUtcTimestamp()
except Exception as e: except Exception as e:
logger.warning(f"Error scheduling proactive refresh: {str(e)}") logger.warning(f"Error scheduling proactive refresh: {str(e)}")
@ -160,7 +160,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
Check if we should perform proactive refresh for this user Check if we should perform proactive refresh for this user
""" """
try: try:
current_time = get_utc_timestamp() current_time = getUtcTimestamp()
last_check = self.last_check.get(user_id, 0) last_check = self.last_check.get(user_id, 0)
# Check every 5 minutes # Check every 5 minutes

View file

@ -9,7 +9,7 @@ to ensure users don't experience token expiration issues.
import logging import logging
from typing import Dict, Any from typing import Dict, Any
from modules.datamodels.datamodelUam import UserConnection, AuthAuthority from modules.datamodels.datamodelUam import UserConnection, AuthAuthority
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -24,7 +24,7 @@ class TokenRefreshService:
def _is_rate_limited(self, connection_id: str) -> bool: def _is_rate_limited(self, connection_id: str) -> bool:
"""Check if connection is rate limited for refresh attempts""" """Check if connection is rate limited for refresh attempts"""
now = get_utc_timestamp() now = getUtcTimestamp()
if connection_id not in self.rate_limit_map: if connection_id not in self.rate_limit_map:
return False return False
@ -39,7 +39,7 @@ class TokenRefreshService:
def _record_refresh_attempt(self, connection_id: str) -> None: def _record_refresh_attempt(self, connection_id: str) -> None:
"""Record a refresh attempt for rate limiting""" """Record a refresh attempt for rate limiting"""
now = get_utc_timestamp() now = getUtcTimestamp()
if connection_id not in self.rate_limit_map: if connection_id not in self.rate_limit_map:
self.rate_limit_map[connection_id] = [] self.rate_limit_map[connection_id] = []
self.rate_limit_map[connection_id].append(now) self.rate_limit_map[connection_id].append(now)
@ -60,14 +60,14 @@ class TokenRefreshService:
token_manager = TokenManager() token_manager = TokenManager()
# Attempt to refresh the token # Attempt to refresh the token
refreshed_token = token_manager.refresh_token(current_token) refreshedToken = token_manager.refreshToken(current_token)
if refreshed_token: if refreshedToken:
# Save the refreshed token # Save the refreshed token
interface.saveConnectionToken(refreshed_token) interface.saveConnectionToken(refreshedToken)
# Update connection status # Update connection status
interface.db.recordModify(UserConnection, connection.id, { interface.db.recordModify(UserConnection, connection.id, {
"lastChecked": get_utc_timestamp(), "lastChecked": getUtcTimestamp(),
"expiresAt": refreshed_token.expiresAt "expiresAt": refreshed_token.expiresAt
}) })
@ -75,9 +75,9 @@ class TokenRefreshService:
# Log audit event # Log audit event
try: try:
audit_logger.log_security_event( audit_logger.logSecurityEvent(
user_id=str(connection.userId), userId=str(connection.userId),
mandate_id="system", mandateId="system",
action="token_refresh", action="token_refresh",
details=f"Google token refreshed for connection {connection.id}" details=f"Google token refreshed for connection {connection.id}"
) )
@ -109,14 +109,14 @@ class TokenRefreshService:
token_manager = TokenManager() token_manager = TokenManager()
# Attempt to refresh the token # Attempt to refresh the token
refreshed_token = token_manager.refresh_token(current_token) refreshedToken = token_manager.refreshToken(current_token)
if refreshed_token: if refreshedToken:
# Save the refreshed token # Save the refreshed token
interface.saveConnectionToken(refreshed_token) interface.saveConnectionToken(refreshedToken)
# Update connection status # Update connection status
interface.db.recordModify(UserConnection, connection.id, { interface.db.recordModify(UserConnection, connection.id, {
"lastChecked": get_utc_timestamp(), "lastChecked": getUtcTimestamp(),
"expiresAt": refreshed_token.expiresAt "expiresAt": refreshed_token.expiresAt
}) })
@ -124,9 +124,9 @@ class TokenRefreshService:
# Log audit event # Log audit event
try: try:
audit_logger.log_security_event( audit_logger.logSecurityEvent(
user_id=str(connection.userId), userId=str(connection.userId),
mandate_id="system", mandateId="system",
action="token_refresh", action="token_refresh",
details=f"Microsoft token refreshed for connection {connection.id}" details=f"Microsoft token refreshed for connection {connection.id}"
) )
@ -234,7 +234,7 @@ class TokenRefreshService:
refreshed_count = 0 refreshed_count = 0
failed_count = 0 failed_count = 0
rate_limited_count = 0 rate_limited_count = 0
current_time = get_utc_timestamp() current_time = getUtcTimestamp()
five_minutes = 5 * 60 # 5 minutes in seconds five_minutes = 5 * 60 # 5 minutes in seconds
# Process each connection # Process each connection

View file

@ -11,18 +11,18 @@ class PublicService:
- Optional name_filter predicate for allow-list patterns - Optional name_filter predicate for allow-list patterns
""" """
def __init__(self, target: Any, functions_only: bool = True, name_filter=None): def __init__(self, target: Any, functionsOnly: bool = True, nameFilter=None):
self._target = target self._target = target
self._functions_only = functions_only self._functionsOnly = functionsOnly
self._name_filter = name_filter self._nameFilter = nameFilter
def __getattr__(self, name: str): def __getattr__(self, name: str):
if name.startswith('_'): if name.startswith('_'):
raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private") raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private")
if self._name_filter and not self._name_filter(name): if self._nameFilter and not self._nameFilter(name):
raise AttributeError(f"'{name}' not exposed by policy") raise AttributeError(f"'{name}' not exposed by policy")
attr = getattr(self._target, name) attr = getattr(self._target, name)
if self._functions_only and not callable(attr): if self._functionsOnly and not callable(attr):
raise AttributeError(f"'{name}' is not a function") raise AttributeError(f"'{name}' is not a function")
return attr return attr
@ -30,8 +30,8 @@ class PublicService:
names = [ names = [
n for n in dir(self._target) n for n in dir(self._target)
if not n.startswith('_') if not n.startswith('_')
and (not self._functions_only or callable(getattr(self._target, n, None))) and (not self._functionsOnly or callable(getattr(self._target, n, None)))
and (self._name_filter(n) if self._name_filter else True) and (self._nameFilter(n) if self._nameFilter else True)
] ]
return sorted(names) return sorted(names)
@ -70,7 +70,7 @@ class Services:
self.sharepoint = PublicService(SharepointService(self)) self.sharepoint = PublicService(SharepointService(self))
from .serviceAi.mainServiceAi import AiService from .serviceAi.mainServiceAi import AiService
self.ai = PublicService(AiService(self)) self.ai = PublicService(AiService(self), functionsOnly=False)
from .serviceTicket.mainServiceTicket import TicketService from .serviceTicket.mainServiceTicket import TicketService
self.ticket = PublicService(TicketService(self)) self.ticket = PublicService(TicketService(self))

View file

@ -1,30 +1,26 @@
import json
import logging import logging
from typing import Dict, Any, List, Optional, Union import time
from typing import Dict, Any, List, Optional, Tuple, Union
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.interfaces.interfaceAiObjects import AiObjects from modules.interfaces.interfaceAiObjects import AiObjects
from modules.services.serviceAi.subCoreAi import SubCoreAi
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration from modules.shared.jsonUtils import (
from modules.services.serviceAi.subSharedAiUtils import sanitizePromptContent extractJsonString,
repairBrokenJson,
extractSectionsFromDocument,
buildContinuationContext
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Rebuild the model to resolve forward references
AiCallRequest.model_rebuild()
class AiService: class AiService:
"""Lightweight AI service orchestrator that delegates to specialized sub-modules. """AI service with core operations integrated."""
Manager delegates to specialized sub-modules:
- SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls)
- SubDocumentProcessing: Document chunking, processing, and merging logic
- SubDocumentGeneration: Single-file and multi-file document generation
The main service acts as a coordinator:
1. Manages lazy initialization of sub-modules
2. Delegates operations to appropriate sub-modules
3. Maintains the same public API for backward compatibility
"""
def __init__(self, serviceCenter=None) -> None: def __init__(self, serviceCenter=None) -> None:
"""Initialize AI service with service center access. """Initialize AI service with service center access.
@ -34,64 +30,638 @@ class AiService:
""" """
self.services = serviceCenter self.services = serviceCenter
# Only depend on interfaces # Only depend on interfaces
self.aiObjects = None # Will be initialized in create() self.aiObjects = None # Will be initialized in create() or _ensureAiObjectsInitialized()
self._extractionService = None # Lazy initialization # Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready
self._coreAi = None # Lazy initialization self.extractionService = None
self._documentProcessor = None # Lazy initialization self.documentProcessor = None
self._documentGenerator = None # Lazy initialization
@property def _initializeSubmodules(self):
def extractionService(self): """Initialize all submodules after aiObjects is ready."""
"""Lazy initialization of extraction service.""" if self.aiObjects is None:
if self._extractionService is None: raise RuntimeError("aiObjects must be initialized before initializing submodules")
logger.info("Lazy initializing ExtractionService...")
self._extractionService = ExtractionService(self.services)
return self._extractionService
@property if self.extractionService is None:
def coreAi(self): logger.info("Initializing ExtractionService...")
"""Lazy initialization of core AI service.""" self.extractionService = ExtractionService(self.services)
if self._coreAi is None:
if self.aiObjects is None:
raise RuntimeError("AiService.aiObjects must be initialized before accessing coreAi. Use await AiService.create() or await service._ensureAiObjectsInitialized()")
logger.info("Lazy initializing SubCoreAi...")
self._coreAi = SubCoreAi(self.services, self.aiObjects)
return self._coreAi
@property if self.documentProcessor is None:
def documentProcessor(self): logger.info("Initializing SubDocumentProcessing...")
"""Lazy initialization of document processing service.""" self.documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
if self._documentProcessor is None:
logger.info("Lazy initializing SubDocumentProcessing...")
self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
return self._documentProcessor
@property
def documentGenerator(self):
"""Lazy initialization of document generation service."""
if self._documentGenerator is None:
logger.info("Lazy initializing SubDocumentGeneration...")
self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor)
return self._documentGenerator
async def _ensureAiObjectsInitialized(self): async def _ensureAiObjectsInitialized(self):
"""Ensure aiObjects is initialized.""" """Ensure aiObjects is initialized and submodules are ready."""
if self.aiObjects is None: if self.aiObjects is None:
logger.info("Lazy initializing AiObjects...") logger.info("Lazy initializing AiObjects...")
self.aiObjects = await AiObjects.create() self.aiObjects = await AiObjects.create()
logger.info("AiObjects initialization completed") logger.info("AiObjects initialization completed")
# Initialize submodules after aiObjects is ready
self._initializeSubmodules()
@classmethod @classmethod
async def create(cls, serviceCenter=None) -> "AiService": async def create(cls, serviceCenter=None) -> "AiService":
"""Create AiService instance with all connectors initialized.""" """Create AiService instance with all connectors and submodules initialized."""
logger.info("AiService.create() called") logger.info("AiService.create() called")
instance = cls(serviceCenter) instance = cls(serviceCenter)
logger.info("AiService created, about to call AiObjects.create()...") logger.info("AiService created, about to call AiObjects.create()...")
instance.aiObjects = await AiObjects.create() instance.aiObjects = await AiObjects.create()
logger.info("AiObjects.create() completed") logger.info("AiObjects.create() completed")
# Initialize all submodules after aiObjects is ready
instance._initializeSubmodules()
logger.info("AiService submodules initialized")
return instance return instance
# Helper methods
def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
"""
Build full prompt by replacing placeholders with their content.
Uses the new {{KEY:placeholder}} format.
Args:
prompt: The base prompt template
placeholders: Dictionary of placeholder key-value pairs
Returns:
Prompt with placeholders replaced
"""
if not placeholders:
return prompt
full_prompt = prompt
for placeholder, content in placeholders.items():
# Skip if content is None or empty
if content is None:
continue
# Replace {{KEY:placeholder}}
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
return full_prompt
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
try:
# Get dynamic enum values from Pydantic models
operationTypes = [e.value for e in OperationTypeEnum]
priorities = [e.value for e in PriorityEnum]
processingModes = [e.value for e in ProcessingModeEnum]
# Create analysis prompt for AI to determine operation type and parameters
analysisPrompt = f"""
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
PROMPT TO ANALYZE:
{self.services.utils.sanitizePromptContent(prompt, 'userinput')}
Based on the prompt content, determine:
1. operationType: Choose the most appropriate from: {', '.join(operationTypes)}
2. priority: Choose from: {', '.join(priorities)}
3. processingMode: Choose from: {', '.join(processingModes)}
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
5. compressContext: true/false (true to summarize context, false to process fully)
Respond with ONLY a JSON object in this exact format:
{{
"operationType": "dataAnalyse",
"priority": "balanced",
"processingMode": "basic",
"compressPrompt": true,
"compressContext": true
}}
"""
# Use AI to analyze the prompt
request = AiCallRequest(
prompt=analysisPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC,
compressPrompt=True,
compressContext=False
)
)
response = await self.aiObjects.call(request)
# Parse AI response
try:
jsonStart = response.content.find('{')
jsonEnd = response.content.rfind('}') + 1
if jsonStart != -1 and jsonEnd > jsonStart:
analysis = json.loads(response.content[jsonStart:jsonEnd])
# Map string values to enums
operationType = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
priority = PriorityEnum(analysis.get('priority', 'balanced'))
processingMode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
return AiCallOptions(
operationType=operationType,
priority=priority,
processingMode=processingMode,
compressPrompt=analysis.get('compressPrompt', True),
compressContext=analysis.get('compressContext', True)
)
except Exception as e:
logger.warning(f"Failed to parse AI analysis response: {e}")
except Exception as e:
logger.warning(f"Prompt analysis failed: {e}")
# Fallback to default options
return AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC
)
async def _callAiWithLooping(
self,
prompt: str,
options: AiCallOptions,
debugPrefix: str = "ai_call",
promptBuilder: Optional[callable] = None,
promptArgs: Optional[Dict[str, Any]] = None,
operationId: Optional[str] = None
) -> str:
"""
Shared core function for AI calls with repair-based looping system.
Automatically repairs broken JSON and continues generation seamlessly.
Args:
prompt: The prompt to send to AI
options: AI call configuration options
debugPrefix: Prefix for debug file names
promptBuilder: Optional function to rebuild prompts for continuation
promptArgs: Optional arguments for prompt builder
operationId: Optional operation ID for progress tracking
Returns:
Complete AI response after all iterations
"""
maxIterations = 50 # Prevent infinite loops
iteration = 0
allSections = [] # Accumulate all sections across iterations
lastRawResponse = None # Store last raw JSON response for continuation
while iteration < maxIterations:
iteration += 1
# Update progress for iteration start
if operationId:
if iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
else:
# For continuation iterations, show progress incrementally
baseProgress = 0.5 + (min(iteration - 1, maxIterations) / maxIterations * 0.4) # Progress from 0.5 to 0.9 over maxIterations iterations
self.services.workflow.progressLogUpdate(operationId, baseProgress, f"Continuing generation (iteration {iteration})")
# Build iteration prompt
if len(allSections) > 0 and promptBuilder and promptArgs:
# This is a continuation - build continuation context with raw JSON and rebuild prompt
continuationContext = buildContinuationContext(allSections, lastRawResponse)
if not lastRawResponse:
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
# Rebuild prompt with continuation context using the provided prompt builder
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
else:
# First iteration - use original prompt
iterationPrompt = prompt
# Make AI call
try:
if operationId and iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
request = AiCallRequest(
prompt=iterationPrompt,
context="",
options=options
)
# Write the ACTUAL prompt sent to AI
if iteration == 1:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
else:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
response = await self.aiObjects.call(request)
result = response.content
# Update progress after AI call
if operationId:
if iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
else:
progress = 0.6 + (min(iteration - 1, 10) * 0.03)
self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
# Write raw AI response to debug file
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
else:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
# Emit stats for this iteration
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.call.{debugPrefix}.iteration_{iteration}"
)
if not result or not result.strip():
logger.warning(f"Iteration {iteration}: Empty response, stopping")
break
# Store raw response for continuation (even if broken)
lastRawResponse = result
# Check for complete_response flag in raw response (before parsing)
import re
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
pass # Flag detected, will stop in _shouldContinueGeneration
# Extract sections from response (handles both valid and broken JSON)
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
# Update progress after parsing
if operationId:
if extractedSections:
self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
if not extractedSections:
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
if iteration > 1 and not wasJsonComplete:
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
continue
# Otherwise, stop if no sections
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
break
# Add new sections to accumulator
allSections.extend(extractedSections)
# Check if we should continue (completion detection)
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
continue
else:
# Done - build final result
if operationId:
self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
break
except Exception as e:
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
break
if iteration >= maxIterations:
logger.warning(f"AI call stopped after maximum iterations ({maxIterations})")
# Build final result from accumulated sections
final_result = self._buildFinalResultFromSections(allSections)
# Write final result to debug file
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
return final_result
def _extractSectionsFromResponse(
self,
result: str,
iteration: int,
debugPrefix: str
) -> Tuple[List[Dict[str, Any]], bool]:
"""
Extract sections from AI response, handling both valid and broken JSON.
Uses repair mechanism for broken JSON.
Checks for "complete_response": true flag to determine completion.
Returns (sections, wasJsonComplete)
"""
# First, try to parse as valid JSON
try:
extracted = extractJsonString(result)
parsed_result = json.loads(extracted)
# Check if AI marked response as complete
isComplete = parsed_result.get("complete_response", False) == True
# Extract sections from parsed JSON
sections = extractSectionsFromDocument(parsed_result)
# If AI marked as complete, always return as complete
if isComplete:
return sections, True
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
if len(sections) == 0 and iteration > 1:
return sections, False # Mark as incomplete so loop continues
# First iteration with 0 sections means empty response - stop
if len(sections) == 0:
return sections, True # Complete but empty
return sections, True # JSON was complete with sections
except json.JSONDecodeError as e:
# Broken JSON - try repair mechanism (normal in iterative generation)
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
# Try to repair
repaired_json = repairBrokenJson(result)
if repaired_json:
# Extract sections from repaired JSON
sections = extractSectionsFromDocument(repaired_json)
return sections, False # JSON was broken but repaired
else:
# Repair failed - log error
logger.error(f"Iteration {iteration}: All repair strategies failed")
return [], False
except Exception as e:
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
return [], False
def _shouldContinueGeneration(
self,
allSections: List[Dict[str, Any]],
iteration: int,
wasJsonComplete: bool,
rawResponse: str = None
) -> bool:
"""
Determine if generation should continue based on JSON completeness and complete_response flag.
Returns True if we should continue, False if done.
"""
if len(allSections) == 0:
return True # No sections yet, continue
# Check for complete_response flag in raw response
if rawResponse:
import re
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
return False
# If JSON was complete (and no complete_response flag), we're done
# If JSON was broken and repaired, continue to get more content
if wasJsonComplete:
return False
else:
return True
def _buildFinalResultFromSections(
self,
allSections: List[Dict[str, Any]]
) -> str:
"""
Build final JSON result from accumulated sections.
"""
if not allSections:
return ""
# Build documents structure
# Assuming single document for now
documents = [{
"id": "doc_1",
"title": "Generated Document", # This should come from prompt
"filename": "document.json",
"sections": allSections
}]
result = {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": documents
}
return json.dumps(result, indent=2)
# Public API Methods
# Planning AI Call
async def callAiPlanning(
self,
prompt: str,
placeholders: Optional[List[PromptPlaceholder]] = None
) -> str:
"""
Planning AI call for task planning, action planning, action selection, etc.
Always uses static parameters optimized for planning tasks.
Args:
prompt: The planning prompt
placeholders: Optional list of placeholder replacements
Returns:
Planning JSON response
"""
await self._ensureAiObjectsInitialized()
# Planning calls always use static parameters
options = AiCallOptions(
operationType=OperationTypeEnum.PLAN,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False
)
# Build full prompt with placeholders
if placeholders:
placeholdersDict = {p.label: p.content for p in placeholders}
fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict)
else:
fullPrompt = prompt
# Root-cause fix: planning must return raw single-shot JSON, not section-based output
request = AiCallRequest(
prompt=fullPrompt,
context="",
options=options
)
# Debug: persist prompt/response for analysis
self.services.utils.writeDebugFile(fullPrompt, "plan_prompt")
response = await self.aiObjects.call(request)
result = response.content or ""
self.services.utils.writeDebugFile(result, "plan_response")
return result
# Document Generation AI Call
async def callAiDocuments(
self,
prompt: str,
documents: Optional[List[ChatDocument]] = None,
options: Optional[AiCallOptions] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None
) -> Union[str, Dict[str, Any]]:
"""
Document generation AI call for all non-planning calls.
Uses the current unified path with extraction and generation.
Args:
prompt: The main prompt for the AI call
documents: Optional list of documents to process
options: AI call configuration options
outputFormat: Optional output format for document generation
title: Optional title for generated documents
Returns:
AI response as string, or dict with documents if outputFormat is specified
"""
await self._ensureAiObjectsInitialized()
# Create separate operationId for detailed progress tracking
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
# Start progress tracking for this operation
self.services.workflow.progressLogStart(
aiOperationId,
"AI call with documents",
"Document Generation",
f"Format: {outputFormat or 'text'}"
)
try:
# Ensure AI connectors are initialized before delegating to documentProcessor/generator
if hasattr(self.services, 'ai') and hasattr(self.services.ai, '_ensureAiObjectsInitialized'):
await self.services.ai._ensureAiObjectsInitialized()
if options is None or (hasattr(options, 'operationType') and options.operationType is None):
# Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
options = await self._analyzePromptAndCreateOptions(prompt)
# Route image-generation requests directly to image pipeline to avoid JSON loop
imgFormats = {"png", "jpg", "jpeg", "webp", "image", "base64"}
opType = getattr(options, "operationType", None)
fmt = (outputFormat or "").lower() if outputFormat else None
isImageRequest = (opType == OperationTypeEnum.IMAGE_GENERATE) or (fmt in imgFormats)
if isImageRequest:
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
imageResponse = await self.generateImage(prompt, options=options)
self.services.workflow.progressLogUpdate(aiOperationId, 0.9, "Image generated")
self.services.workflow.progressLogFinish(aiOperationId, True)
return imageResponse
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
# Compressing would truncate the template structure and confuse the AI
if outputFormat: # Document generation with structured output
if not options:
options = AiCallOptions()
options.compressPrompt = False # JSON templates must NOT be truncated
options.compressContext = False # Context also should not be compressed
# Handle document generation with specific output format using unified approach
if outputFormat:
# Use unified generation method for all document generation
if documents and len(documents) > 0:
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
extracted_content = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
else:
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
extracted_content = None
self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
# First call without continuation context
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
# Prepare prompt builder arguments for continuation
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": extracted_content
}
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId
)
self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
# Parse the generated JSON (extract fenced/embedded JSON first)
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
logger.error(f"JSON content length: {len(generated_json)}")
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
# Write the problematic JSON to debug file
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
# Render to final format using the existing renderer
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, title or "Generated Document", prompt, self
)
# Build result in the expected format
result = {
"success": True,
"content": generated_data,
"documents": [{
"documentName": f"generated.{outputFormat}",
"documentData": rendered_content,
"mimeType": mime_type,
"title": title or "Generated Document"
}],
"is_multi_file": False,
"format": outputFormat,
"title": title,
"split_strategy": "single",
"total_documents": 1,
"processed_documents": 1
}
# Log AI response for debugging
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
self.services.workflow.progressLogFinish(aiOperationId, True)
return result
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": f"Rendering failed: {str(e)}"}
# Handle text calls (no output format specified)
self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
if documents:
# Use document processing for text calls with documents
result = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
else:
# Use shared core function for direct text calls
result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
self.services.workflow.progressLogFinish(aiOperationId, True)
return result
except Exception as e:
logger.error(f"Error in callAiDocuments: {str(e)}")
self.services.workflow.progressLogFinish(aiOperationId, False)
raise
# AI Image Analysis # AI Image Analysis
async def readImage( async def readImage(
self, self,
@ -102,7 +672,64 @@ class AiService:
) -> str: ) -> str:
"""Call AI for image analysis using interface.call() with contentParts.""" """Call AI for image analysis using interface.call() with contentParts."""
await self._ensureAiObjectsInitialized() await self._ensureAiObjectsInitialized()
return await self.coreAi.readImage(prompt, imageData, mimeType, options)
try:
# Check if imageData is valid
if not imageData:
error_msg = "No image data provided"
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
# Always use IMAGE_ANALYSE operation type for image processing
if options is None:
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
else:
# Override the operation type to ensure image analysis
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
# Create content parts with image data
from modules.datamodels.datamodelExtraction import ContentPart
import base64
# ContentPart.data must be a string - convert bytes to base64 if needed
if isinstance(imageData, bytes):
imageDataStr = base64.b64encode(imageData).decode('utf-8')
else:
# Already a base64 string
imageDataStr = imageData
imagePart = ContentPart(
id="image_0",
parentId=None,
label="Image",
typeGroup="image",
mimeType=mimeType or "image/jpeg",
data=imageDataStr, # Must be a string (base64 encoded)
metadata={"imageAnalysis": True}
)
# Create request with content parts
request = AiCallRequest(
prompt=prompt,
context="",
options=options,
contentParts=[imagePart]
)
response = await self.aiObjects.call(request)
result = response.content
# Check if result is valid
if not result or (isinstance(result, str) and not result.strip()):
error_msg = f"No response from AI image analysis (result: {repr(result)})"
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
return result
except Exception as e:
logger.error(f"Error in AI image analysis: {str(e)}")
return f"Error: {str(e)}"
# AI Image Generation # AI Image Generation
async def generateImage( async def generateImage(
@ -115,34 +742,19 @@ class AiService:
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Generate an image using AI using interface.generateImage().""" """Generate an image using AI using interface.generateImage()."""
await self._ensureAiObjectsInitialized() await self._ensureAiObjectsInitialized()
return await self.coreAi.generateImage(prompt, size, quality, style, options)
try:
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
# Core AI Methods - Delegating to SubCoreAi # Emit stats for image generation
async def callAiPlanning( self.services.workflow.storeWorkflowStat(
self, self.services.currentWorkflow,
prompt: str, response,
placeholders: Optional[List[PromptPlaceholder]] = None f"ai.generate.image"
) -> str: )
"""Planning AI call for task planning, action planning, action selection, etc."""
await self._ensureAiObjectsInitialized()
# Always use "json" for planning calls since they return JSON
return await self.coreAi.callAiPlanning(prompt, placeholders)
async def callAiDocuments(
self,
prompt: str,
documents: Optional[List[ChatDocument]] = None,
options: Optional[AiCallOptions] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None
) -> Union[str, Dict[str, Any]]:
"""Document generation AI call for all non-planning calls."""
await self._ensureAiObjectsInitialized()
return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title)
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
"""Sanitize prompt content to prevent injection attacks and ensure safe presentation."""
return sanitizePromptContent(content, contentType)
return response
except Exception as e:
logger.error(f"Error in AI image generation: {str(e)}")
return {"success": False, "error": str(e)}

View file

@ -1,687 +0,0 @@
import json
import logging
from typing import Dict, Any, List, Optional, Tuple, Union
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.services.serviceAi.subSharedAiUtils import (
buildPromptWithPlaceholders,
extractTextFromContentParts,
reduceText,
determineCallType
)
from modules.shared.jsonUtils import (
extractJsonString,
repairBrokenJson,
extractSectionsFromDocument,
buildContinuationContext
)
logger = logging.getLogger(__name__)
# Repair-based looping system - no longer needs LOOP_INSTRUCTION_TEXT
# Sections are accumulated and repair mechanism handles broken JSON automatically
# Rebuild the model to resolve forward references
AiCallRequest.model_rebuild()
class SubCoreAi:
"""Core AI operations including image analysis, text generation, and planning calls."""
def __init__(self, services, aiObjects):
"""Initialize core AI operations.
Args:
services: Service center instance for accessing other services
aiObjects: Initialized AiObjects instance
"""
self.services = services
self.aiObjects = aiObjects
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
try:
# Get dynamic enum values from Pydantic models
operation_types = [e.value for e in OperationTypeEnum]
priorities = [e.value for e in PriorityEnum]
processing_modes = [e.value for e in ProcessingModeEnum]
# Create analysis prompt for AI to determine operation type and parameters
analysisPrompt = f"""
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
PROMPT TO ANALYZE:
{self.services.ai.sanitizePromptContent(prompt, 'userinput')}
Based on the prompt content, determine:
1. operationType: Choose the most appropriate from: {', '.join(operation_types)}
2. priority: Choose from: {', '.join(priorities)}
3. processingMode: Choose from: {', '.join(processing_modes)}
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
5. compressContext: true/false (true to summarize context, false to process fully)
Respond with ONLY a JSON object in this exact format:
{{
"operationType": "dataAnalyse",
"priority": "balanced",
"processingMode": "basic",
"compressPrompt": true,
"compressContext": true
}}
"""
# Use AI to analyze the prompt
request = AiCallRequest(
prompt=analysisPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC,
compressPrompt=True,
compressContext=False
)
)
response = await self.aiObjects.call(request)
# Parse AI response
try:
import json
json_start = response.content.find('{')
json_end = response.content.rfind('}') + 1
if json_start != -1 and json_end > json_start:
analysis = json.loads(response.content[json_start:json_end])
# Map string values to enums
operation_type = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
priority = PriorityEnum(analysis.get('priority', 'balanced'))
processing_mode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
return AiCallOptions(
operationType=operation_type,
priority=priority,
processingMode=processing_mode,
compressPrompt=analysis.get('compressPrompt', True),
compressContext=analysis.get('compressContext', True)
)
except Exception as e:
logger.warning(f"Failed to parse AI analysis response: {e}")
except Exception as e:
logger.warning(f"Prompt analysis failed: {e}")
# Fallback to default options
return AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC
)
# Shared Core Function for AI Calls with Looping and Repair
async def _callAiWithLooping(
self,
prompt: str,
options: AiCallOptions,
debugPrefix: str = "ai_call",
promptBuilder: Optional[callable] = None,
promptArgs: Optional[Dict[str, Any]] = None,
operationId: Optional[str] = None
) -> str:
"""
Shared core function for AI calls with repair-based looping system.
Automatically repairs broken JSON and continues generation seamlessly.
Args:
prompt: The prompt to send to AI
options: AI call configuration options
debugPrefix: Prefix for debug file names
promptBuilder: Optional function to rebuild prompts for continuation
promptArgs: Optional arguments for prompt builder
operationId: Optional operation ID for progress tracking
Returns:
Complete AI response after all iterations
"""
max_iterations = 50 # Prevent infinite loops
iteration = 0
allSections = [] # Accumulate all sections across iterations
lastRawResponse = None # Store last raw JSON response for continuation
while iteration < max_iterations:
iteration += 1
# Update progress for iteration start
if operationId:
if iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
else:
# For continuation iterations, show progress incrementally
base_progress = 0.5 + (min(iteration - 1, max_iterations) / max_iterations * 0.4) # Progress from 0.5 to 0.9 over max_iterations iterations
self.services.workflow.progressLogUpdate(operationId, base_progress, f"Continuing generation (iteration {iteration})")
# Build iteration prompt
if len(allSections) > 0 and promptBuilder and promptArgs:
# This is a continuation - build continuation context with raw JSON and rebuild prompt
continuationContext = buildContinuationContext(allSections, lastRawResponse)
if not lastRawResponse:
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
# Rebuild prompt with continuation context using the provided prompt builder
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
else:
# First iteration - use original prompt
iterationPrompt = prompt
# Make AI call
try:
if operationId and iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
from modules.datamodels.datamodelAi import AiCallRequest
request = AiCallRequest(
prompt=iterationPrompt,
context="",
options=options
)
# Write the ACTUAL prompt sent to AI
if iteration == 1:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
else:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
response = await self.aiObjects.call(request)
result = response.content
# Update progress after AI call
if operationId:
if iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
else:
progress = 0.6 + (min(iteration - 1, 10) * 0.03)
self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
# Write raw AI response to debug file
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
else:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
# Emit stats for this iteration
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.call.{debugPrefix}.iteration_{iteration}"
)
if not result or not result.strip():
logger.warning(f"Iteration {iteration}: Empty response, stopping")
break
# Store raw response for continuation (even if broken)
lastRawResponse = result
# Check for complete_response flag in raw response (before parsing)
import re
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
pass # Flag detected, will stop in _shouldContinueGeneration
# Extract sections from response (handles both valid and broken JSON)
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
# Update progress after parsing
if operationId:
if extractedSections:
self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
if not extractedSections:
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
if iteration > 1 and not wasJsonComplete:
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
continue
# Otherwise, stop if no sections
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
break
# Add new sections to accumulator
allSections.extend(extractedSections)
# Check if we should continue (completion detection)
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
continue
else:
# Done - build final result
if operationId:
self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
break
except Exception as e:
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
break
if iteration >= max_iterations:
logger.warning(f"AI call stopped after maximum iterations ({max_iterations})")
# Build final result from accumulated sections
final_result = self._buildFinalResultFromSections(allSections)
# Write final result to debug file
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
return final_result
def _extractSectionsFromResponse(
self,
result: str,
iteration: int,
debugPrefix: str
) -> Tuple[List[Dict[str, Any]], bool]:
"""
Extract sections from AI response, handling both valid and broken JSON.
Uses repair mechanism for broken JSON.
Checks for "complete_response": true flag to determine completion.
Returns (sections, wasJsonComplete)
"""
# First, try to parse as valid JSON
try:
extracted = extractJsonString(result)
parsed_result = json.loads(extracted)
# Check if AI marked response as complete
isComplete = parsed_result.get("complete_response", False) == True
# Extract sections from parsed JSON
sections = extractSectionsFromDocument(parsed_result)
# If AI marked as complete, always return as complete
if isComplete:
return sections, True
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
if len(sections) == 0 and iteration > 1:
return sections, False # Mark as incomplete so loop continues
# First iteration with 0 sections means empty response - stop
if len(sections) == 0:
return sections, True # Complete but empty
return sections, True # JSON was complete with sections
except json.JSONDecodeError as e:
# Broken JSON - try repair mechanism (normal in iterative generation)
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
# Try to repair
repaired_json = repairBrokenJson(result)
if repaired_json:
# Extract sections from repaired JSON
sections = extractSectionsFromDocument(repaired_json)
return sections, False # JSON was broken but repaired
else:
# Repair failed - log error
logger.error(f"Iteration {iteration}: All repair strategies failed")
return [], False
except Exception as e:
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
return [], False
def _shouldContinueGeneration(
self,
allSections: List[Dict[str, Any]],
iteration: int,
wasJsonComplete: bool,
rawResponse: str = None
) -> bool:
"""
Determine if generation should continue based on JSON completeness and complete_response flag.
Returns True if we should continue, False if done.
"""
if len(allSections) == 0:
return True # No sections yet, continue
# Check for complete_response flag in raw response
if rawResponse:
import re
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
return False
# If JSON was complete (and no complete_response flag), we're done
# If JSON was broken and repaired, continue to get more content
if wasJsonComplete:
return False
else:
return True
def _buildFinalResultFromSections(
self,
allSections: List[Dict[str, Any]]
) -> str:
"""
Build final JSON result from accumulated sections.
"""
if not allSections:
return ""
# Build documents structure
# Assuming single document for now
documents = [{
"id": "doc_1",
"title": "Generated Document", # This should come from prompt
"filename": "document.json",
"sections": allSections
}]
result = {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": documents
}
return json.dumps(result, indent=2)
# Old _buildContinuationPrompt and _mergeJsonContent methods removed
# Now handled by repair mechanism in jsonUtils.py and section accumulation
# Planning AI Call
async def callAiPlanning(
self,
prompt: str,
placeholders: Optional[List[PromptPlaceholder]] = None
) -> str:
"""
Planning AI call for task planning, action planning, action selection, etc.
Always uses static parameters optimized for planning tasks.
Args:
prompt: The planning prompt
placeholders: Optional list of placeholder replacements
Returns:
Planning JSON response
"""
# Planning calls always use static parameters
options = AiCallOptions(
operationType=OperationTypeEnum.PLAN,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False
)
# Build full prompt with placeholders
if placeholders:
placeholders_dict = {p.label: p.content for p in placeholders}
full_prompt = buildPromptWithPlaceholders(prompt, placeholders_dict)
else:
full_prompt = prompt
# Use shared core function with planning-specific debug prefix
return await self._callAiWithLooping(full_prompt, options, "plan")
# Document Generation AI Call
async def callAiDocuments(
self,
prompt: str,
documents: Optional[List[ChatDocument]] = None,
options: Optional[AiCallOptions] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None
) -> Union[str, Dict[str, Any]]:
"""
Document generation AI call for all non-planning calls.
Uses the current unified path with extraction and generation.
Args:
prompt: The main prompt for the AI call
documents: Optional list of documents to process
options: AI call configuration options
outputFormat: Optional output format for document generation
title: Optional title for generated documents
Returns:
AI response as string, or dict with documents if outputFormat is specified
"""
# Create separate operationId for detailed progress tracking
import time
import uuid
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
# Start progress tracking for this operation
self.services.workflow.progressLogStart(
aiOperationId,
"AI call with documents",
"Document Generation",
f"Format: {outputFormat or 'text'}"
)
try:
if options is None or (hasattr(options, 'operationType') and options.operationType is None):
# Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
options = await self._analyzePromptAndCreateOptions(prompt)
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
# Compressing would truncate the template structure and confuse the AI
if outputFormat: # Document generation with structured output
if not options:
options = AiCallOptions()
options.compressPrompt = False # JSON templates must NOT be truncated
options.compressContext = False # Context also should not be compressed
# Handle document generation with specific output format using unified approach
if outputFormat:
# Use unified generation method for all document generation
if documents and len(documents) > 0:
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
extracted_content = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
else:
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
extracted_content = None
self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
# First call without continuation context
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
# Prepare prompt builder arguments for continuation
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": extracted_content
}
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId
)
self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
# Parse the generated JSON (extract fenced/embedded JSON first)
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
logger.error(f"JSON content length: {len(generated_json)}")
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
# Write the problematic JSON to debug file
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
# Render to final format using the existing renderer
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, title or "Generated Document", prompt, self
)
# Build result in the expected format
result = {
"success": True,
"content": generated_data,
"documents": [{
"documentName": f"generated.{outputFormat}",
"documentData": rendered_content,
"mimeType": mime_type,
"title": title or "Generated Document"
}],
"is_multi_file": False,
"format": outputFormat,
"title": title,
"split_strategy": "single",
"total_documents": 1,
"processed_documents": 1
}
# Log AI response for debugging
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
self.services.workflow.progressLogFinish(aiOperationId, True)
return result
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": f"Rendering failed: {str(e)}"}
# Handle text calls (no output format specified)
self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
if documents:
# Use document processing for text calls with documents
result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
else:
# Use shared core function for direct text calls
result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
self.services.workflow.progressLogFinish(aiOperationId, True)
return result
except Exception as e:
logger.error(f"Error in callAiDocuments: {str(e)}")
self.services.workflow.progressLogFinish(aiOperationId, False)
raise
# AI Image Analysis
async def readImage(
self,
prompt: str,
imageData: Union[str, bytes],
mimeType: str = None,
options: Optional[AiCallOptions] = None,
) -> str:
"""Call AI for image analysis using interface.call() with contentParts."""
try:
# Check if imageData is valid
if not imageData:
error_msg = "No image data provided"
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
# Always use IMAGE_ANALYSE operation type for image processing
if options is None:
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
else:
# Override the operation type to ensure image analysis
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
# Create content parts with image data
from modules.datamodels.datamodelExtraction import ContentPart
import base64
# ContentPart.data must be a string - convert bytes to base64 if needed
if isinstance(imageData, bytes):
imageDataStr = base64.b64encode(imageData).decode('utf-8')
else:
# Already a base64 string
imageDataStr = imageData
imagePart = ContentPart(
id="image_0",
parentId=None,
label="Image",
typeGroup="image",
mimeType=mimeType or "image/jpeg",
data=imageDataStr, # Must be a string (base64 encoded)
metadata={"imageAnalysis": True}
)
# Create request with content parts
from modules.datamodels.datamodelAi import AiCallRequest
request = AiCallRequest(
prompt=prompt,
context="",
options=options,
contentParts=[imagePart]
)
response = await self.aiObjects.call(request)
result = response.content
# Check if result is valid
if not result or (isinstance(result, str) and not result.strip()):
error_msg = f"No response from AI image analysis (result: {repr(result)})"
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
return result
except Exception as e:
logger.error(f"Error in AI image analysis: {str(e)}")
return f"Error: {str(e)}"
# AI Image Generation
async def generateImage(
self,
prompt: str,
size: str = "1024x1024",
quality: str = "standard",
style: str = "vivid",
options: Optional[AiCallOptions] = None,
) -> Dict[str, Any]:
"""Generate an image using AI using interface.generateImage()."""
try:
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
# Emit stats for image generation
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.generate.image"
)
# Convert response to dict format for backward compatibility
if hasattr(response, 'content'):
return {
"success": True,
"content": response.content,
"modelName": response.modelName,
"priceUsd": response.priceUsd,
"processingTime": response.processingTime
}
else:
return response
except Exception as e:
logger.error(f"Error in AI image generation: {str(e)}")
return {"success": False, "error": str(e)}

View file

@ -1,500 +0,0 @@
import re
import json
import logging
import time
from datetime import datetime, UTC
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallOptions
logger = logging.getLogger(__name__)
class SubDocumentGeneration:
"""Document generation operations including single-file and multi-file generation."""
def __init__(self, services, aiObjects, documentProcessor):
"""Initialize document generation service.
Args:
services: Service center instance for accessing other services
aiObjects: Initialized AiObjects instance
documentProcessor: Document processing service instance
"""
self.services = services
self.aiObjects = aiObjects
self.documentProcessor = documentProcessor
async def callAiWithDocumentGeneration(
self,
prompt: str,
documents: Optional[List[ChatDocument]],
options: AiCallOptions,
outputFormat: str,
title: Optional[str]
) -> Dict[str, Any]:
"""
Unified document generation method that handles both single and multi-file cases.
Always uses multi-file approach internally.
Args:
prompt: The main prompt for the AI call
documents: Optional list of documents to process
options: AI call configuration options
outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx)
title: Optional title for generated documents
Returns:
Dict with generated documents and metadata in unified structure
"""
try:
# 1. Get unified extraction prompt
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
extractionPrompt = await generationService.getAdaptiveExtractionPrompt(
outputFormat=outputFormat,
userPrompt=prompt,
title=title,
aiService=self
)
# 2. Process with unified pipeline (always multi-file approach)
aiResponse = await self._processDocumentsUnified(
documents, extractionPrompt, options
)
# 3. Return unified result structure
return await self._buildUnifiedResult(aiResponse, outputFormat, title)
except Exception as e:
logger.error(f"Error in unified document generation: {str(e)}")
return self._buildErrorResult(str(e), outputFormat, title)
async def _processDocumentsUnified(
self,
documents: Optional[List[ChatDocument]],
extractionPrompt: str,
options: AiCallOptions
) -> Dict[str, Any]:
"""
Unified document processing that handles both single and multi-file cases.
Always processes as multi-file structure internally.
"""
# Init progress logger
workflow = self.services.currentWorkflow
operationId = f"docGenUnified_{workflow.id}_{int(time.time())}"
try:
# Start progress tracking
self.services.workflow.progressLogStart(
operationId,
"Generate",
"Unified Document Generation",
f"Processing {len(documents) if documents else 0} documents"
)
# Update progress - generating extraction prompt
self.services.workflow.progressLogUpdate(operationId, 0.1, "Generating prompt")
# Write prompt to debug file
self.services.utils.writeDebugFile(extractionPrompt, "extraction_prompt", documents)
# Process with unified JSON pipeline using continuation logic
aiResponse = await self.documentProcessor.processDocumentsWithContinuation(
documents, extractionPrompt, options
)
# Update progress - AI processing completed
self.services.workflow.progressLogUpdate(operationId, 0.6, "Processing done")
# Write AI response to debug file
response_json = json.dumps(aiResponse, indent=2, ensure_ascii=False) if isinstance(aiResponse, dict) else str(aiResponse)
self.services.utils.writeDebugFile(response_json, "ai_response", documents)
# Validate response structure
if not self._validateUnifiedResponseStructure(aiResponse):
raise Exception("AI response is not valid unified document structure")
# Emit raw extracted data as a chat message attachment
try:
await self._postRawDataChatMessage(aiResponse, label="raw_extraction_unified")
except Exception:
logger.warning("Failed to emit raw extraction chat message (unified)")
# Complete progress tracking
self.services.workflow.progressLogFinish(operationId, True)
return aiResponse
except Exception as e:
logger.error(f"Error in unified document processing: {str(e)}")
self.services.workflow.progressLogFinish(operationId, False)
raise
def _validateUnifiedResponseStructure(self, response: Dict[str, Any]) -> bool:
"""
Unified validation that checks for document structure.
Handles both multi-file (documents array) and single-file (sections array) structures.
"""
try:
if not isinstance(response, dict):
logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
return False
# Check for documents array (multi-file structure)
hasDocuments = "documents" in response
isDocumentsList = isinstance(response.get("documents"), list)
# Check for sections array (single-file structure)
hasSections = "sections" in response
isSectionsList = isinstance(response.get("sections"), list)
if hasDocuments and isDocumentsList:
# Multi-file structure
documents = response.get("documents", [])
if not documents:
logger.warning("Unified validation failed: documents array is empty")
return False
# Validate each document individually
validDocuments = 0
for i, doc in enumerate(documents):
if self._validateDocumentStructure(doc, i):
validDocuments += 1
else:
logger.warning(f"Document {i} failed validation, but continuing with others")
# Process succeeds if at least one document is valid
if validDocuments == 0:
logger.error("Unified validation failed: no valid documents found")
return False
logger.info(f"Unified validation passed: {validDocuments}/{len(documents)} documents valid")
return True
elif hasSections and isSectionsList:
# Single-file structure - convert to multi-file format
logger.info("Converting single-file structure to multi-file format")
sections = response.get("sections", [])
if not sections:
logger.warning("Unified validation failed: sections array is empty")
return False
# Convert to documents array format
response["documents"] = [{
"id": "document_1",
"title": response.get("metadata", {}).get("title", "Generated Document"),
"filename": "document_1",
"sections": sections
}]
logger.info("Successfully converted single-file structure to multi-file format")
return True
else:
# No valid structure found - fail with clear error details
logger.error("Unified validation failed: No valid structure found")
logger.error(f"Response type: {type(response)}")
logger.error(f"Available keys: {list(response.keys()) if isinstance(response, dict) else 'Not a dict'}")
logger.error(f"hasDocuments={hasDocuments}, isDocumentsList={isDocumentsList}")
logger.error(f"hasSections={hasSections}, isSectionsList={isSectionsList}")
logger.error(f"Full response: {response}")
return False
except Exception as e:
logger.warning(f"Unified response validation failed with exception: {str(e)}")
return False
def _validateDocumentStructure(self, document: Dict[str, Any], documentIndex: int) -> bool:
"""
Validate individual document structure.
Returns True if document is valid, False otherwise.
Does not fail the entire process if one document is invalid.
"""
try:
if not isinstance(document, dict):
logger.error(f"Document {documentIndex} validation failed: not a dict, got {type(document)}")
logger.error(f"Document {documentIndex} content: {document}")
return False
# Check for required fields
hasTitle = "title" in document
hasSections = "sections" in document
isSectionsList = isinstance(document.get("sections"), list)
logger.debug(f"Document {documentIndex} structure check:")
logger.debug(f" - hasTitle: {hasTitle}")
logger.debug(f" - hasSections: {hasSections}")
logger.debug(f" - isSectionsList: {isSectionsList}")
logger.debug(f" - available keys: {list(document.keys())}")
if not (hasTitle and hasSections and isSectionsList):
logger.error(f"Document {documentIndex} validation failed:")
logger.error(f" - title present: {hasTitle}")
logger.error(f" - sections present: {hasSections}")
logger.error(f" - sections is list: {isSectionsList}")
logger.error(f" - document content: {document}")
return False
sections = document.get("sections", [])
if not sections:
logger.error(f"Document {documentIndex} validation failed: sections array is empty")
logger.error(f" - document content: {document}")
return False
logger.info(f"Document {documentIndex} validation passed")
return True
except Exception as e:
logger.error(f"Document {documentIndex} validation failed with exception: {str(e)}")
logger.error(f" - document content: {document}")
return False
async def _buildUnifiedResult(
self,
aiResponse: Dict[str, Any],
outputFormat: str,
title: str
) -> Dict[str, Any]:
"""
Build unified result structure that always returns array-based format.
Content is always a multi-document structure.
"""
try:
# Process all documents uniformly
generatedDocuments = []
documents = aiResponse.get("documents", [])
for i, docData in enumerate(documents):
try:
processedDocument = await self._processDocument(
docData, outputFormat, title, i
)
generatedDocuments.append(processedDocument)
except Exception as e:
logger.warning(f"Failed to process document {i}: {str(e)}, skipping")
continue
if not generatedDocuments:
raise Exception("No documents could be processed successfully")
# Build unified result
result = {
"success": True,
"content": aiResponse, # Always multi-document structure
"documents": generatedDocuments, # Always array
"is_multi_file": len(generatedDocuments) > 1,
"format": outputFormat,
"title": title,
"total_documents": len(generatedDocuments),
"processed_documents": len(generatedDocuments)
}
return result
except Exception as e:
logger.error(f"Error building unified result: {str(e)}")
return self._buildErrorResult(str(e), outputFormat, title)
async def _processDocument(
self,
docData: Dict[str, Any],
outputFormat: str,
title: str,
documentIndex: int
) -> Dict[str, Any]:
"""
Process individual document with content enhancement and rendering.
"""
try:
# Get generation service
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
# Use AI generation to enhance the extracted JSON before rendering
enhancedContent = docData # Default to original
if docData.get("sections"):
try:
# Get generation prompt directly
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
generationPrompt = await buildGenerationPrompt(
outputFormat=outputFormat,
userPrompt=title,
title=docData.get("title", title)
)
# Prepare the AI call
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
requestOptions = AiCallOptions()
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
# Create context with the extracted JSON content
context = f"Extracted JSON content:\n{json.dumps(docData, indent=2)}"
request = AiCallRequest(
prompt=generationPrompt,
context=context,
options=requestOptions
)
# Write document generation prompt to debug file
self.services.utils.writeDebugFile(generationPrompt, "document_generation_enhancement_prompt")
# Call AI to enhance the content
response = await self.aiObjects.call(request)
# Write document generation response to debug file
self.services.utils.writeDebugFile(response.content or '', "document_generation_enhancement_response")
if response and response.content:
# Parse the AI response as JSON
try:
result = response.content.strip()
# Extract JSON from markdown if present
jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if jsonMatch:
result = jsonMatch.group(1).strip()
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
# Try to parse JSON
enhancedContent = json.loads(result)
logger.info(f"AI enhanced JSON content successfully for document {documentIndex}")
except json.JSONDecodeError as e:
logger.warning(f"AI generation returned invalid JSON for document {documentIndex}: {str(e)}, using original content")
enhancedContent = docData
else:
logger.warning(f"AI generation returned empty response for document {documentIndex}, using original content")
enhancedContent = docData
except Exception as e:
logger.warning(f"AI generation failed for document {documentIndex}: {str(e)}, using original content")
enhancedContent = docData
# Render the enhanced JSON content
renderedContent, mimeType = await generationService.renderReport(
extractedContent=enhancedContent,
outputFormat=outputFormat,
title=docData.get("title", title),
userPrompt=title,
aiService=self
)
# Generate proper filename
baseFilename = docData.get("filename", f"document_{documentIndex + 1}")
if '.' in baseFilename:
baseFilename = baseFilename.rsplit('.', 1)[0]
# Add proper extension based on output format
if outputFormat.lower() == "docx":
filename = f"{baseFilename}.docx"
elif outputFormat.lower() == "pdf":
filename = f"{baseFilename}.pdf"
elif outputFormat.lower() == "html":
filename = f"{baseFilename}.html"
else:
filename = f"{baseFilename}.{outputFormat}"
return {
"documentName": filename,
"documentData": renderedContent,
"mimeType": mimeType,
"title": docData.get("title", title),
"documentIndex": documentIndex
}
except Exception as e:
logger.error(f"Error processing document {documentIndex}: {str(e)}")
raise
def _buildErrorResult(self, errorMessage: str, outputFormat: str, title: str) -> Dict[str, Any]:
"""
Build error result with unified structure.
"""
return {
"success": False,
"error": errorMessage,
"content": {},
"documents": [],
"is_multi_file": False,
"format": outputFormat,
"title": title,
"split_strategy": "error",
"total_documents": 0,
"processed_documents": 0
}
async def _callAiJson(
self,
prompt: str,
documents: Optional[List[ChatDocument]],
options: AiCallOptions
) -> Dict[str, Any]:
"""
Handle AI calls with document processing for JSON output.
Returns structured JSON document instead of text.
"""
# Process documents with JSON merging
return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
"""
Create a ChatMessage with the extracted raw JSON attached as a file so the user
has access to the data even if downstream processing fails.
"""
try:
services = self.services
workflow = services.currentWorkflow
# Serialize payload
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
content_text = json.dumps(payload, ensure_ascii=False, indent=2)
content_bytes = content_text.encode('utf-8')
# Store as file via component storage
file_name = f"{label}_{ts}.json"
file_item = services.interfaceDbComponent.createFile(
name=file_name,
mimeType="application/json",
content=content_bytes
)
services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
# Lookup file info for ChatDocument
file_info = services.workflow.getFileInfo(file_item.id)
doc = ChatDocument(
messageId="", # set after message creation
fileId=file_item.id,
fileName=file_info.get("fileName", file_name) if file_info else file_name,
fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
mimeType=file_info.get("mimeType", "application/json") if file_info else "application/json"
)
# Create message referencing the file - include document in initial call
messageData = {
"workflowId": workflow.id,
"role": "assistant",
"message": "Raw extraction data saved",
"status": "data",
"sequenceNr": len(getattr(workflow, 'messages', []) or []) + 1,
"publishedAt": services.utils.timestampGetUtc(),
"documentsLabel": label,
"documents": []
}
# Store message with document included from the start
services.workflow.storeMessageWithDocuments(services.workflow.workflow, messageData, [doc])
except Exception:
# Non-fatal; ignore if storage or chat creation fails
return

File diff suppressed because it is too large Load diff

View file

@ -1,165 +0,0 @@
"""
Shared utilities for AI services to eliminate code duplication.
This module contains common functions used across multiple AI service modules
to maintain DRY principles and ensure consistency.
"""
import re
import logging
from typing import Dict, Any, List, Optional, Union
logger = logging.getLogger(__name__)
def buildPromptWithPlaceholders(prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
"""
Build full prompt by replacing placeholders with their content.
Uses the new {{KEY:placeholder}} format.
Args:
prompt: The base prompt template
placeholders: Dictionary of placeholder key-value pairs
Returns:
Prompt with placeholders replaced
"""
if not placeholders:
return prompt
full_prompt = prompt
for placeholder, content in placeholders.items():
# Skip if content is None or empty
if content is None:
continue
# Replace {{KEY:placeholder}}
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
return full_prompt
def sanitizePromptContent(content: str, contentType: str = "text") -> str:
"""
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
This is the single source of truth for all prompt sanitization across the system.
Replaces all scattered sanitization functions with a unified approach.
Args:
content: The content to sanitize
contentType: Type of content ("text", "userinput", "json", "document")
Returns:
Safely sanitized content ready for AI prompt insertion
"""
if not content:
return ""
try:
# Convert to string if not already
content_str = str(content)
# Remove null bytes and control characters (except newlines and tabs)
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
# Handle different content types with appropriate sanitization
if contentType == "userinput":
# Extra security for user-controlled content
# Escape curly braces to prevent placeholder injection
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
# Escape quotes and wrap in single quotes
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
return f"'{sanitized}'"
elif contentType == "json":
# For JSON content, escape quotes and backslashes
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
elif contentType == "document":
# For document content, escape special characters
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace("'", "\\'")
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
else: # contentType == "text" or default
# Basic text sanitization
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace("'", "\\'")
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
return sanitized
except Exception as e:
logger.error(f"Error sanitizing prompt content: {str(e)}")
# Return a safe fallback
return "[ERROR: Content could not be safely sanitized]"
def extractTextFromContentParts(extracted_content) -> str:
"""
Extract text content from ExtractionService ContentPart objects.
Args:
extracted_content: ContentExtracted object with parts
Returns:
Concatenated text content from all text/table/structure parts
"""
if not extracted_content or not hasattr(extracted_content, 'parts'):
return ""
text_parts = []
for part in extracted_content.parts:
if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
if hasattr(part, 'data') and part.data:
text_parts.append(part.data)
return "\n\n".join(text_parts)
def reduceText(text: str, reduction_factor: float) -> str:
"""
Reduce text size by the specified factor.
Args:
text: Text to reduce
reduction_factor: Factor by which to reduce (0.0 to 1.0)
Returns:
Reduced text with truncation indicator
"""
if reduction_factor >= 1.0:
return text
target_length = int(len(text) * reduction_factor)
return text[:target_length] + "... [reduced]"
def determineCallType(documents: Optional[List], operation_type: str) -> str:
"""
Determine call type based on documents and operation type.
Args:
documents: List of ChatDocument objects
operation_type: Type of operation being performed
Returns:
Call type: "plan" or "text"
"""
has_documents = documents is not None and len(documents) > 0
is_planning_operation = operation_type == "plan"
if not has_documents and is_planning_operation:
return "plan"
else:
return "text"

View file

@ -19,6 +19,16 @@ class ExtractionService:
self.services = services self.services = services
self._extractorRegistry = ExtractorRegistry() self._extractorRegistry = ExtractorRegistry()
self._chunkerRegistry = ChunkerRegistry() self._chunkerRegistry = ChunkerRegistry()
# Ensure AI connectors are discovered so pricing models are available
try:
# If internal model is missing, trigger discovery and registration
if modelRegistry.getModel("internal-extractor") is None:
discovered = modelRegistry.discoverConnectors()
for connector in discovered:
modelRegistry.registerConnector(connector)
except Exception:
# Propagate actual errors during use; init should be fast and side-effect free otherwise
pass
def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]: def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]:
""" """
@ -82,12 +92,12 @@ class ExtractionService:
p.metadata["documentMimeType"] = documentData["mimeType"] p.metadata["documentMimeType"] = documentData["mimeType"]
# Log chunking information # Log chunking information
chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)] chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
if chunked_parts: if chunkedParts:
logger.debug(f"=== CHUNKING RESULTS ===") logger.debug(f"=== CHUNKING RESULTS ===")
logger.debug(f"Total parts: {len(ec.parts)}") logger.debug(f"Total parts: {len(ec.parts)}")
logger.debug(f"Chunked parts: {len(chunked_parts)}") logger.debug(f"Chunked parts: {len(chunkedParts)}")
for chunk in chunked_parts: for chunk in chunkedParts:
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})") logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
else: else:
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits") logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
@ -101,8 +111,11 @@ class ExtractionService:
# Emit stats for extraction operation # Emit stats for extraction operation
# Use internal extraction model for pricing # Use internal extraction model for pricing
modelName = "internal_extraction" modelName = "internal-extractor"
model = modelRegistry.getModel(modelName) model = modelRegistry.getModel(modelName)
# Hard fail if model is missing; caller must ensure connectors are registered
if model is None or model.calculatePriceUsd is None:
raise RuntimeError(f"Pricing model not available: {modelName}")
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived) priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
# Create AiCallResponse with real calculation # Create AiCallResponse with real calculation

View file

@ -20,13 +20,13 @@ class IntelligentTokenAwareMerger:
4. Minimize total number of AI calls 4. Minimize total number of AI calls
""" """
def __init__(self, model_capabilities: Dict[str, Any]): def __init__(self, modelCapabilities: Dict[str, Any]):
self.max_tokens = model_capabilities.get("maxTokens", 4000) self.maxTokens = modelCapabilities.get("maxTokens", 4000)
self.safety_margin = model_capabilities.get("safetyMargin", 0.1) self.safetyMargin = modelCapabilities.get("safetyMargin", 0.1)
self.effective_max_tokens = int(self.max_tokens * (1 - self.safety_margin)) self.effectiveMaxTokens = int(self.maxTokens * (1 - self.safetyMargin))
self.chars_per_token = model_capabilities.get("charsPerToken", 4) # Rough estimation self.charsPerToken = modelCapabilities.get("charsPerToken", 4) # Rough estimation
def merge_chunks_intelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]: def mergeChunksIntelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
""" """
Merge chunks intelligently based on token limits. Merge chunks intelligently based on token limits.
@ -40,125 +40,125 @@ class IntelligentTokenAwareMerger:
if not chunks: if not chunks:
return chunks return chunks
logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, max_tokens={self.effective_max_tokens}") logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, maxTokens={self.effectiveMaxTokens}")
# Calculate tokens for prompt # Calculate tokens for prompt
prompt_tokens = self._estimate_tokens(prompt) promptTokens = self._estimateTokens(prompt)
available_tokens = self.effective_max_tokens - prompt_tokens availableTokens = self.effectiveMaxTokens - promptTokens
logger.info(f"📊 Prompt tokens: {prompt_tokens}, Available for content: {available_tokens}") logger.info(f"📊 Prompt tokens: {promptTokens}, Available for content: {availableTokens}")
# Group chunks by document and type for semantic coherence # Group chunks by document and type for semantic coherence
grouped_chunks = self._group_chunks_by_document_and_type(chunks) groupedChunks = self._groupChunksByDocumentAndType(chunks)
merged_parts = [] mergedParts = []
for group_key, group_chunks in grouped_chunks.items(): for groupKey, groupChunks in groupedChunks.items():
logger.info(f"📁 Processing group: {group_key} ({len(group_chunks)} chunks)") logger.info(f"📁 Processing group: {groupKey} ({len(groupChunks)} chunks)")
# Merge chunks within this group optimally # Merge chunks within this group optimally
group_merged = self._merge_group_optimally(group_chunks, available_tokens) groupMerged = self._mergeGroupOptimally(groupChunks, availableTokens)
merged_parts.extend(group_merged) mergedParts.extend(groupMerged)
logger.info(f"✅ Intelligent merging complete: {len(chunks)}{len(merged_parts)} parts") logger.info(f"✅ Intelligent merging complete: {len(chunks)}{len(mergedParts)} parts")
return merged_parts return mergedParts
def _group_chunks_by_document_and_type(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]: def _groupChunksByDocumentAndType(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
"""Group chunks by document and type for semantic coherence.""" """Group chunks by document and type for semantic coherence."""
groups = {} groups = {}
for chunk in chunks: for chunk in chunks:
# Create group key: document_id + type_group # Create group key: document_id + type_group
doc_id = chunk.metadata.get("documentId", "unknown") docId = chunk.metadata.get("documentId", "unknown")
type_group = chunk.typeGroup typeGroup = chunk.typeGroup
group_key = f"{doc_id}_{type_group}" groupKey = f"{docId}_{typeGroup}"
if group_key not in groups: if groupKey not in groups:
groups[group_key] = [] groups[groupKey] = []
groups[group_key].append(chunk) groups[groupKey].append(chunk)
return groups return groups
def _merge_group_optimally(self, chunks: List[ContentPart], available_tokens: int) -> List[ContentPart]: def _mergeGroupOptimally(self, chunks: List[ContentPart], availableTokens: int) -> List[ContentPart]:
"""Merge chunks within a group optimally to minimize AI calls.""" """Merge chunks within a group optimally to minimize AI calls."""
if not chunks: if not chunks:
return [] return []
# Sort chunks by size (smallest first for better packing) # Sort chunks by size (smallest first for better packing)
sorted_chunks = sorted(chunks, key=lambda c: self._estimate_tokens(c.data)) sortedChunks = sorted(chunks, key=lambda c: self._estimateTokens(c.data))
merged_parts = [] mergedParts = []
current_group = [] currentGroup = []
current_tokens = 0 currentTokens = 0
for chunk in sorted_chunks: for chunk in sortedChunks:
chunk_tokens = self._estimate_tokens(chunk.data) chunkTokens = self._estimateTokens(chunk.data)
# Special case: If single chunk is already at max size, process it alone # Special case: If single chunk is already at max size, process it alone
if chunk_tokens >= available_tokens * 0.9: # 90% of available tokens if chunkTokens >= availableTokens * 0.9: # 90% of available tokens
# Finalize current group if it exists # Finalize current group if it exists
if current_group: if currentGroup:
merged_part = self._create_merged_part(current_group, current_tokens) mergedPart = self._createMergedPart(currentGroup, currentTokens)
merged_parts.append(merged_part) mergedParts.append(mergedPart)
current_group = [] currentGroup = []
current_tokens = 0 currentTokens = 0
# Process large chunk individually # Process large chunk individually
merged_parts.append(chunk) mergedParts.append(chunk)
logger.debug(f"🔍 Large chunk processed individually: {chunk_tokens} tokens") logger.debug(f"🔍 Large chunk processed individually: {chunkTokens} tokens")
continue continue
# If adding this chunk would exceed limit, finalize current group # If adding this chunk would exceed limit, finalize current group
if current_tokens + chunk_tokens > available_tokens and current_group: if currentTokens + chunkTokens > availableTokens and currentGroup:
merged_part = self._create_merged_part(current_group, current_tokens) mergedPart = self._createMergedPart(currentGroup, currentTokens)
merged_parts.append(merged_part) mergedParts.append(mergedPart)
current_group = [chunk] currentGroup = [chunk]
current_tokens = chunk_tokens currentTokens = chunkTokens
else: else:
current_group.append(chunk) currentGroup.append(chunk)
current_tokens += chunk_tokens currentTokens += chunkTokens
# Finalize remaining group # Finalize remaining group
if current_group: if currentGroup:
merged_part = self._create_merged_part(current_group, current_tokens) mergedPart = self._createMergedPart(currentGroup, currentTokens)
merged_parts.append(merged_part) mergedParts.append(mergedPart)
logger.info(f"📦 Group merged: {len(chunks)}{len(merged_parts)} parts") logger.info(f"📦 Group merged: {len(chunks)}{len(mergedParts)} parts")
return merged_parts return mergedParts
def _create_merged_part(self, chunks: List[ContentPart], total_tokens: int) -> ContentPart: def _createMergedPart(self, chunks: List[ContentPart], totalTokens: int) -> ContentPart:
"""Create a merged ContentPart from multiple chunks.""" """Create a merged ContentPart from multiple chunks."""
if len(chunks) == 1: if len(chunks) == 1:
return chunks[0] # No need to merge single chunk return chunks[0] # No need to merge single chunk
# Combine data with semantic separators # Combine data with semantic separators
combined_data = self._combine_chunk_data(chunks) combinedData = self._combineChunkData(chunks)
# Use metadata from first chunk as base # Use metadata from first chunk as base
base_chunk = chunks[0] baseChunk = chunks[0]
merged_metadata = base_chunk.metadata.copy() mergedMetadata = baseChunk.metadata.copy()
merged_metadata.update({ mergedMetadata.update({
"merged": True, "merged": True,
"originalChunkCount": len(chunks), "originalChunkCount": len(chunks),
"totalTokens": total_tokens, "totalTokens": totalTokens,
"originalChunkIds": [c.id for c in chunks], "originalChunkIds": [c.id for c in chunks],
"size": len(combined_data.encode('utf-8')) "size": len(combinedData.encode('utf-8'))
}) })
merged_part = ContentPart( mergedPart = ContentPart(
id=makeId(), id=makeId(),
parentId=base_chunk.parentId, parentId=baseChunk.parentId,
label=f"merged_{len(chunks)}_chunks", label=f"merged_{len(chunks)}_chunks",
typeGroup=base_chunk.typeGroup, typeGroup=baseChunk.typeGroup,
mimeType=base_chunk.mimeType, mimeType=baseChunk.mimeType,
data=combined_data, data=combinedData,
metadata=merged_metadata metadata=mergedMetadata
) )
logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {total_tokens} tokens") logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {totalTokens} tokens")
return merged_part return mergedPart
def _combine_chunk_data(self, chunks: List[ContentPart]) -> str: def _combineChunkData(self, chunks: List[ContentPart]) -> str:
"""Combine chunk data with appropriate separators.""" """Combine chunk data with appropriate separators."""
if not chunks: if not chunks:
return "" return ""
@ -173,37 +173,37 @@ class IntelligentTokenAwareMerger:
return separator.join([chunk.data for chunk in chunks]) return separator.join([chunk.data for chunk in chunks])
def _estimate_tokens(self, text: str) -> int: def _estimateTokens(self, text: str) -> int:
"""Estimate token count for text.""" """Estimate token count for text."""
if not text: if not text:
return 0 return 0
return len(text) // self.chars_per_token return len(text) // self.charsPerToken
def calculate_optimization_stats(self, original_chunks: List[ContentPart], merged_parts: List[ContentPart]) -> Dict[str, Any]: def calculateOptimizationStats(self, originalChunks: List[ContentPart], mergedParts: List[ContentPart]) -> Dict[str, Any]:
"""Calculate optimization statistics with detailed analysis.""" """Calculate optimization statistics with detailed analysis."""
original_calls = len(original_chunks) originalCalls = len(originalChunks)
optimized_calls = len(merged_parts) optimizedCalls = len(mergedParts)
reduction_percent = ((original_calls - optimized_calls) / original_calls * 100) if original_calls > 0 else 0 reductionPercent = ((originalCalls - optimizedCalls) / originalCalls * 100) if originalCalls > 0 else 0
# Analyze chunk sizes # Analyze chunk sizes
large_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) >= self.effective_max_tokens * 0.9] largeChunks = [c for c in originalChunks if self._estimateTokens(c.data) >= self.effectiveMaxTokens * 0.9]
small_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) < self.effective_max_tokens * 0.9] smallChunks = [c for c in originalChunks if self._estimateTokens(c.data) < self.effectiveMaxTokens * 0.9]
# Calculate theoretical maximum optimization (if all small chunks could be merged) # Calculate theoretical maximum optimization (if all small chunks could be merged)
theoretical_min_calls = len(large_chunks) + max(1, len(small_chunks) // 3) # Assume 3 small chunks per call theoreticalMinCalls = len(largeChunks) + max(1, len(smallChunks) // 3) # Assume 3 small chunks per call
theoretical_reduction = ((original_calls - theoretical_min_calls) / original_calls * 100) if original_calls > 0 else 0 theoreticalReduction = ((originalCalls - theoreticalMinCalls) / originalCalls * 100) if originalCalls > 0 else 0
return { return {
"original_ai_calls": original_calls, "original_ai_calls": originalCalls,
"optimized_ai_calls": optimized_calls, "optimized_ai_calls": optimizedCalls,
"reduction_percent": round(reduction_percent, 1), "reduction_percent": round(reductionPercent, 1),
"cost_savings": f"{reduction_percent:.1f}%", "cost_savings": f"{reductionPercent:.1f}%",
"efficiency_gain": f"{original_calls / optimized_calls:.1f}x" if optimized_calls > 0 else "", "efficiency_gain": f"{originalCalls / optimizedCalls:.1f}x" if optimizedCalls > 0 else "",
"analysis": { "analysis": {
"large_chunks": len(large_chunks), "large_chunks": len(largeChunks),
"small_chunks": len(small_chunks), "small_chunks": len(smallChunks),
"theoretical_min_calls": theoretical_min_calls, "theoretical_min_calls": theoreticalMinCalls,
"theoretical_reduction": round(theoretical_reduction, 1), "theoretical_reduction": round(theoreticalReduction, 1),
"optimization_potential": "high" if reduction_percent > 50 else "moderate" if reduction_percent > 20 else "low" "optimization_potential": "high" if reductionPercent > 50 else "moderate" if reductionPercent > 20 else "low"
} }
} }

View file

@ -96,10 +96,10 @@ def _applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[Con
subMerger = IntelligentTokenAwareMerger(model_capabilities) subMerger = IntelligentTokenAwareMerger(model_capabilities)
# Use intelligent merging for all parts # Use intelligent merging for all parts
merged = subMerger.merge_chunks_intelligently(parts, strategy.prompt or "") merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "")
# Calculate and log optimization stats # Calculate and log optimization stats
stats = subMerger.calculate_optimization_stats(parts, merged) stats = subMerger.calculateOptimizationStats(parts, merged)
logger.info(f"🧠 Intelligent merging stats: {stats}") logger.info(f"🧠 Intelligent merging stats: {stats}")
logger.debug(f"Intelligent merging: {stats['original_ai_calls']}{stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)") logger.debug(f"Intelligent merging: {stats['original_ai_calls']}{stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")

View file

@ -101,7 +101,7 @@ async def buildExtractionPrompt(
# Build base prompt # Build base prompt
adaptive_prompt = f""" adaptive_prompt = f"""
{services.ai.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt} {services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output. You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.

View file

@ -37,13 +37,13 @@ class GenerationService:
return [] return []
# Process each document from the AI action result # Process each document from the AI action result
processed_documents = [] processedDocuments = []
for doc in documents: for doc in documents:
processed_doc = self.processSingleDocument(doc, action) processedDoc = self.processSingleDocument(doc, action)
if processed_doc: if processedDoc:
processed_documents.append(processed_doc) processedDocuments.append(processedDoc)
return processed_documents return processedDocuments
except Exception as e: except Exception as e:
logger.error(f"Error processing action result documents: {str(e)}") logger.error(f"Error processing action result documents: {str(e)}")
return [] return []
@ -77,20 +77,20 @@ class GenerationService:
try: try:
processed_docs = self.processActionResultDocuments(action_result, action, workflow) processed_docs = self.processActionResultDocuments(action_result, action, workflow)
created_documents = [] createdDocuments = []
for i, doc_data in enumerate(processed_docs): for i, doc_data in enumerate(processed_docs):
try: try:
document_name = doc_data['fileName'] documentName = doc_data['fileName']
document_data = doc_data['content'] documentData = doc_data['content']
mime_type = doc_data['mimeType'] mimeType = doc_data['mimeType']
# Convert document data to string content # Convert document data to string content
content = convertDocumentDataToString(document_data, getFileExtension(document_name)) content = convertDocumentDataToString(documentData, getFileExtension(documentName))
# Skip empty or minimal content # Skip empty or minimal content
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"] minimalContentPatterns = ['{}', '[]', 'null', '""', "''"]
if not content or content.strip() == "" or content.strip() in minimal_content_patterns: if not content or content.strip() == "" or content.strip() in minimalContentPatterns:
logger.warning(f"Empty or minimal content for document {document_name}, skipping") logger.warning(f"Empty or minimal content for document {documentName}, skipping")
continue continue
# Normalize file extension based on mime type if missing or incorrect # Normalize file extension based on mime type if missing or incorrect
@ -105,35 +105,35 @@ class GenerationService:
"text/plain": ".txt", "text/plain": ".txt",
"application/json": ".json", "application/json": ".json",
} }
expected_ext = mime_to_ext.get(mime_type) expectedExt = mime_to_ext.get(mimeType)
if expected_ext: if expectedExt:
if not document_name.lower().endswith(expected_ext): if not documentName.lower().endswith(expectedExt):
# Append/replace extension to match mime type # Append/replace extension to match mime type
if "." in document_name: if "." in documentName:
document_name = document_name.rsplit(".", 1)[0] + expected_ext documentName = documentName.rsplit(".", 1)[0] + expectedExt
else: else:
document_name = document_name + expected_ext documentName = documentName + expectedExt
except Exception: except Exception:
pass pass
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text # Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
base64encoded = False base64encoded = False
try: try:
binary_mime_types = { binaryMimeTypes = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/pdf", "application/pdf",
} }
if isinstance(document_data, str) and mime_type in binary_mime_types: if isinstance(documentData, str) and mimeType in binaryMimeTypes:
base64encoded = True base64encoded = True
except Exception: except Exception:
base64encoded = False base64encoded = False
# Create document with file in one step using interfaces directly # Create document with file in one step using interfaces directly
document = self._createDocument( document = self._createDocument(
fileName=document_name, fileName=documentName,
mimeType=mime_type, mimeType=mimeType,
content=content, content=content,
base64encoded=base64encoded, base64encoded=base64encoded,
messageId=message_id messageId=message_id
@ -141,14 +141,14 @@ class GenerationService:
if document: if document:
# Set workflow context on the document if possible # Set workflow context on the document if possible
self._setDocumentWorkflowContext(document, action, workflow) self._setDocumentWorkflowContext(document, action, workflow)
created_documents.append(document) createdDocuments.append(document)
else: else:
logger.error(f"Failed to create ChatDocument object for {document_name}") logger.error(f"Failed to create ChatDocument object for {documentName}")
except Exception as e: except Exception as e:
logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}") logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
continue continue
return created_documents return createdDocuments
except Exception as e: except Exception as e:
logger.error(f"Error creating documents from action result: {str(e)}") logger.error(f"Error creating documents from action result: {str(e)}")
return [] return []
@ -157,28 +157,28 @@ class GenerationService:
"""Set workflow context on a document for proper routing and labeling""" """Set workflow context on a document for proper routing and labeling"""
try: try:
# Get current workflow context directly from workflow object # Get current workflow context directly from workflow object
workflow_context = self._getWorkflowContext(workflow) workflowContext = self._getWorkflowContext(workflow)
workflow_stats = self._getWorkflowStats(workflow) workflowStats = self._getWorkflowStats(workflow)
current_round = workflow_context.get('currentRound', 0) currentRound = workflowContext.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 0) currentTask = workflowContext.get('currentTask', 0)
current_action = workflow_context.get('currentAction', 0) currentAction = workflowContext.get('currentAction', 0)
# Try to set workflow context attributes if they exist # Try to set workflow context attributes if they exist
if hasattr(document, 'roundNumber'): if hasattr(document, 'roundNumber'):
document.roundNumber = current_round document.roundNumber = currentRound
if hasattr(document, 'taskNumber'): if hasattr(document, 'taskNumber'):
document.taskNumber = current_task document.taskNumber = currentTask
if hasattr(document, 'actionNumber'): if hasattr(document, 'actionNumber'):
document.actionNumber = current_action document.actionNumber = currentAction
if hasattr(document, 'actionId'): if hasattr(document, 'actionId'):
document.actionId = action.id if hasattr(action, 'id') else None document.actionId = action.id if hasattr(action, 'id') else None
# Set additional workflow metadata if available # Set additional workflow metadata if available
if hasattr(document, 'workflowId'): if hasattr(document, 'workflowId'):
document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None) document.workflowId = workflowStats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
if hasattr(document, 'workflowStatus'): if hasattr(document, 'workflowStatus'):
document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown') document.workflowStatus = workflowStats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
except Exception as e: except Exception as e:
@ -355,17 +355,17 @@ class GenerationService:
def _getFormatRenderer(self, output_format: str): def _getFormatRenderer(self, output_format: str):
"""Get the appropriate renderer for the specified format using auto-discovery.""" """Get the appropriate renderer for the specified format using auto-discovery."""
try: try:
from .renderers.registry import get_renderer from .renderers.registry import getRenderer
renderer = get_renderer(output_format, services=self.services) renderer = getRenderer(output_format, services=self.services)
if renderer: if renderer:
return renderer return renderer
# Fallback to text renderer if no specific renderer found # Fallback to text renderer if no specific renderer found
logger.warning(f"No renderer found for format {output_format}, falling back to text") logger.warning(f"No renderer found for format {output_format}, falling back to text")
fallback_renderer = get_renderer('text', services=self.services) fallbackRenderer = getRenderer('text', services=self.services)
if fallback_renderer: if fallbackRenderer:
return fallback_renderer return fallbackRenderer
logger.error("Even text renderer fallback failed") logger.error("Even text renderer fallback failed")
return None return None

View file

@ -17,7 +17,7 @@ class RendererRegistry:
self._format_mappings: Dict[str, str] = {} self._format_mappings: Dict[str, str] = {}
self._discovered = False self._discovered = False
def discover_renderers(self) -> None: def discoverRenderers(self) -> None:
"""Automatically discover and register all renderers by scanning files.""" """Automatically discover and register all renderers by scanning files."""
if self._discovered: if self._discovered:
return return
@ -28,38 +28,38 @@ class RendererRegistry:
from pathlib import Path from pathlib import Path
# Get the directory containing this registry file # Get the directory containing this registry file
current_dir = Path(__file__).parent currentDir = Path(__file__).parent
renderers_dir = current_dir renderersDir = currentDir
# Get the package name dynamically # Get the package name dynamically
package_name = __name__.rsplit('.', 1)[0] packageName = __name__.rsplit('.', 1)[0]
# Scan all Python files in the renderers directory # Scan all Python files in the renderers directory
for file_path in renderers_dir.glob("*.py"): for filePath in renderersDir.glob("*.py"):
if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']: if filePath.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
continue continue
# Extract module name from filename # Extract module name from filename
module_name = file_path.stem moduleName = filePath.stem
try: try:
# Import the module dynamically # Import the module dynamically
full_module_name = f"{package_name}.{module_name}" fullModuleName = f"{packageName}.{moduleName}"
module = importlib.import_module(full_module_name) module = importlib.import_module(fullModuleName)
# Look for renderer classes in the module # Look for renderer classes in the module
for attr_name in dir(module): for attrName in dir(module):
attr = getattr(module, attr_name) attr = getattr(module, attrName)
if (isinstance(attr, type) and if (isinstance(attr, type) and
issubclass(attr, BaseRenderer) and issubclass(attr, BaseRenderer) and
attr != BaseRenderer and attr != BaseRenderer and
hasattr(attr, 'get_supported_formats')): hasattr(attr, 'getSupportedFormats')):
# Register the renderer # Register the renderer
self._register_renderer_class(attr) self._registerRendererClass(attr)
except Exception as e: except Exception as e:
logger.warning(f"Could not load renderer from {module_name}: {str(e)}") logger.warning(f"Could not load renderer from {moduleName}: {str(e)}")
continue continue
self._discovered = True self._discovered = True
@ -68,72 +68,72 @@ class RendererRegistry:
logger.error(f"Error during renderer discovery: {str(e)}") logger.error(f"Error during renderer discovery: {str(e)}")
self._discovered = True # Mark as discovered to avoid repeated attempts self._discovered = True # Mark as discovered to avoid repeated attempts
def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None: def _registerRendererClass(self, rendererClass: Type[BaseRenderer]) -> None:
"""Register a renderer class with its supported formats.""" """Register a renderer class with its supported formats."""
try: try:
# Get supported formats from the renderer class # Get supported formats from the renderer class
supported_formats = renderer_class.get_supported_formats() supportedFormats = rendererClass.getSupportedFormats()
for format_name in supported_formats: for formatName in supportedFormats:
# Register primary format # Register primary format
self._renderers[format_name.lower()] = renderer_class self._renderers[formatName.lower()] = rendererClass
# Register aliases if any # Register aliases if any
if hasattr(renderer_class, 'get_format_aliases'): if hasattr(rendererClass, 'getFormatAliases'):
aliases = renderer_class.get_format_aliases() aliases = rendererClass.getFormatAliases()
for alias in aliases: for alias in aliases:
self._format_mappings[alias.lower()] = format_name.lower() self._format_mappings[alias.lower()] = formatName.lower()
logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}") logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}")
except Exception as e: except Exception as e:
logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}") logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}")
def get_renderer(self, output_format: str, services=None) -> Optional[BaseRenderer]: def getRenderer(self, outputFormat: str, services=None) -> Optional[BaseRenderer]:
"""Get a renderer instance for the specified format.""" """Get a renderer instance for the specified format."""
if not self._discovered: if not self._discovered:
self.discover_renderers() self.discoverRenderers()
# Normalize format name # Normalize format name
format_name = output_format.lower().strip() formatName = outputFormat.lower().strip()
# Check for aliases first # Check for aliases first
if format_name in self._format_mappings: if formatName in self._format_mappings:
format_name = self._format_mappings[format_name] formatName = self._format_mappings[formatName]
# Get renderer class # Get renderer class
renderer_class = self._renderers.get(format_name) rendererClass = self._renderers.get(formatName)
if renderer_class: if rendererClass:
try: try:
return renderer_class(services=services) return rendererClass(services=services)
except Exception as e: except Exception as e:
logger.error(f"Error creating renderer instance for {format_name}: {str(e)}") logger.error(f"Error creating renderer instance for {formatName}: {str(e)}")
return None return None
logger.warning(f"No renderer found for format: {output_format}") logger.warning(f"No renderer found for format: {outputFormat}")
return None return None
def get_supported_formats(self) -> List[str]: def getSupportedFormats(self) -> List[str]:
"""Get list of all supported formats.""" """Get list of all supported formats."""
if not self._discovered: if not self._discovered:
self.discover_renderers() self.discoverRenderers()
formats = list(self._renderers.keys()) formats = list(self._renderers.keys())
formats.extend(self._format_mappings.keys()) formats.extend(self._format_mappings.keys())
return sorted(set(formats)) return sorted(set(formats))
def get_renderer_info(self) -> Dict[str, Dict[str, str]]: def getRendererInfo(self) -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers.""" """Get information about all registered renderers."""
if not self._discovered: if not self._discovered:
self.discover_renderers() self.discoverRenderers()
info = {} info = {}
for format_name, renderer_class in self._renderers.items(): for formatName, rendererClass in self._renderers.items():
info[format_name] = { info[formatName] = {
'class_name': renderer_class.__name__, 'class_name': rendererClass.__name__,
'module': renderer_class.__module__, 'module': rendererClass.__module__,
'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description' 'description': getattr(rendererClass, '__doc__', 'No description').strip().split('\n')[0] if rendererClass.__doc__ else 'No description'
} }
return info return info
@ -141,14 +141,14 @@ class RendererRegistry:
# Global registry instance # Global registry instance
_registry = RendererRegistry() _registry = RendererRegistry()
def get_renderer(output_format: str, services=None) -> Optional[BaseRenderer]: def getRenderer(outputFormat: str, services=None) -> Optional[BaseRenderer]:
"""Get a renderer instance for the specified format.""" """Get a renderer instance for the specified format."""
return _registry.get_renderer(output_format, services) return _registry.getRenderer(outputFormat, services)
def get_supported_formats() -> List[str]: def getSupportedFormats() -> List[str]:
"""Get list of all supported formats.""" """Get list of all supported formats."""
return _registry.get_supported_formats() return _registry.getSupportedFormats()
def get_renderer_info() -> Dict[str, Dict[str, str]]: def getRendererInfo() -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers.""" """Get information about all registered renderers."""
return _registry.get_renderer_info() return _registry.getRendererInfo()

View file

@ -4,6 +4,7 @@ Base renderer class for all format renderers.
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict, Any, Tuple, List from typing import Dict, Any, Tuple, List
from modules.datamodels.datamodelJson import supportedSectionTypes
import json import json
import logging import logging
import re import re
@ -23,7 +24,7 @@ class BaseRenderer(ABC):
self.services = services # Add services attribute self.services = services # Add services attribute
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
""" """
Return list of supported format names for this renderer. Return list of supported format names for this renderer.
Override this method in subclasses to specify supported formats. Override this method in subclasses to specify supported formats.
@ -31,7 +32,7 @@ class BaseRenderer(ABC):
return [] return []
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
""" """
Return list of format aliases for this renderer. Return list of format aliases for this renderer.
Override this method in subclasses to specify format aliases. Override this method in subclasses to specify format aliases.
@ -39,7 +40,7 @@ class BaseRenderer(ABC):
return [] return []
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
""" """
Return priority for this renderer (higher number = higher priority). Return priority for this renderer (higher number = higher priority).
Used when multiple renderers support the same format. Used when multiple renderers support the same format.
@ -47,43 +48,43 @@ class BaseRenderer(ABC):
return 0 return 0
@abstractmethod @abstractmethod
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
""" """
Render extracted JSON content to the target format. Render extracted JSON content to the target format.
Args: Args:
extracted_content: Structured JSON content with sections and metadata extractedContent: Structured JSON content with sections and metadata
title: Report title title: Report title
user_prompt: Original user prompt for context userPrompt: Original user prompt for context
ai_service: AI service instance for additional processing aiService: AI service instance for additional processing
Returns: Returns:
tuple: (rendered_content, mime_type) tuple: (renderedContent, mimeType)
""" """
pass pass
def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]: def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract sections from report data.""" """Extract sections from report data."""
return report_data.get('sections', []) return reportData.get('sections', [])
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]: def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
"""Extract metadata from report data.""" """Extract metadata from report data."""
return report_data.get('metadata', {}) return reportData.get('metadata', {})
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str: def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str:
"""Get title from report data or use fallback.""" """Get title from report data or use fallback."""
metadata = report_data.get('metadata', {}) metadata = reportData.get('metadata', {})
return metadata.get('title', fallback_title) return metadata.get('title', fallbackTitle)
def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool: def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool:
"""Validate that JSON content has the expected structure.""" """Validate that JSON content has the expected structure."""
if not isinstance(json_content, dict): if not isinstance(jsonContent, dict):
return False return False
if "sections" not in json_content: if "sections" not in jsonContent:
return False return False
sections = json_content.get("sections", []) sections = jsonContent.get("sections", [])
if not isinstance(sections, list): if not isinstance(sections, list):
return False return False
@ -96,14 +97,14 @@ class BaseRenderer(ABC):
return True return True
def _get_section_type(self, section: Dict[str, Any]) -> str: def _getSectionType(self, section: Dict[str, Any]) -> str:
"""Get the type of a section; default to 'paragraph' for non-dict inputs.""" """Get the type of a section; default to 'paragraph' for non-dict inputs."""
if isinstance(section, dict): if isinstance(section, dict):
return section.get("content_type", "paragraph") return section.get("content_type", "paragraph")
# If section is a list or any other type, treat as paragraph elements # If section is a list or any other type, treat as paragraph elements
return "paragraph" return "paragraph"
def _get_section_data(self, section: Dict[str, Any]) -> List[Dict[str, Any]]: def _getSectionData(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Get the elements of a section; if a list is provided directly, return it.""" """Get the elements of a section; if a list is provided directly, return it."""
if isinstance(section, dict): if isinstance(section, dict):
return section.get("elements", []) return section.get("elements", [])
@ -111,21 +112,30 @@ class BaseRenderer(ABC):
return section return section
return [] return []
def _get_section_id(self, section: Dict[str, Any]) -> str: def _getSectionId(self, section: Dict[str, Any]) -> str:
"""Get the ID of a section (if available).""" """Get the ID of a section (if available)."""
if isinstance(section, dict): if isinstance(section, dict):
return section.get("id", "unknown") return section.get("id", "unknown")
return "unknown" return "unknown"
def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]: def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
"""Extract table headers and rows from section data.""" """Extract table headers and rows from section data."""
headers = section_data.get("headers", []) # Normalize when elements array was passed in
rows = section_data.get("rows", []) if isinstance(sectionData, list) and sectionData:
candidate = sectionData[0]
sectionData = candidate if isinstance(candidate, dict) else {}
headers = sectionData.get("headers", [])
rows = sectionData.get("rows", [])
return headers, rows return headers, rows
def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]: def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]:
"""Extract bullet list items from section data.""" """Extract bullet list items from section data."""
items = section_data.get("items", []) # Normalize when elements array or raw list was passed in
if isinstance(sectionData, list):
# Already a list of items (strings or dicts)
items = sectionData
else:
items = sectionData.get("items", [])
result = [] result = []
for item in items: for item in items:
if isinstance(item, str): if isinstance(item, str):
@ -134,29 +144,47 @@ class BaseRenderer(ABC):
result.append(item["text"]) result.append(item["text"])
return result return result
def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]: def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]:
"""Extract heading level and text from section data.""" """Extract heading level and text from section data."""
level = section_data.get("level", 1) # Normalize when elements array was passed in
text = section_data.get("text", "") if isinstance(sectionData, list) and sectionData:
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
level = sectionData.get("level", 1)
text = sectionData.get("text", "")
return level, text return level, text
def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str: def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str:
"""Extract paragraph text from section data.""" """Extract paragraph text from section data."""
return section_data.get("text", "") if isinstance(sectionData, list):
# Join multiple paragraph elements if provided as a list
texts = []
for el in sectionData:
if isinstance(el, dict) and "text" in el:
texts.append(el["text"])
elif isinstance(el, str):
texts.append(el)
return "\n".join(texts)
return sectionData.get("text", "")
def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]: def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
"""Extract code and language from section data.""" """Extract code and language from section data."""
code = section_data.get("code", "") # Normalize when elements array was passed in
language = section_data.get("language", "") if isinstance(sectionData, list) and sectionData:
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
code = sectionData.get("code", "")
language = sectionData.get("language", "")
return code, language return code, language
def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]: def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
"""Extract base64 data and alt text from section data.""" """Extract base64 data and alt text from section data."""
base64_data = section_data.get("base64Data", "") # Normalize when elements array was passed in
alt_text = section_data.get("altText", "Image") if isinstance(sectionData, list) and sectionData:
return base64_data, alt_text sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
base64Data = sectionData.get("base64Data", "")
altText = sectionData.get("altText", "Image")
return base64Data, altText
def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any: def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
""" """
Render an image section. This is a base implementation that should be overridden Render an image section. This is a base implementation that should be overridden
by format-specific renderers. by format-specific renderers.
@ -168,47 +196,47 @@ class BaseRenderer(ABC):
Returns: Returns:
Format-specific image representation Format-specific image representation
""" """
section_data = self._get_section_data(section) sectionData = self._getSectionData(section)
base64_data, alt_text = self._extract_image_data(section_data) base64Data, altText = self._extractImageData(sectionData)
# Base implementation returns a simple dict # Base implementation returns a simple dict
# Format-specific renderers should override this method # Format-specific renderers should override this method
return { return {
"content_type": "image", "content_type": "image",
"base64Data": base64_data, "base64Data": base64Data,
"altText": alt_text, "altText": altText,
"width": section_data.get("width", None), "width": sectionData.get("width", None),
"height": section_data.get("height", None), "height": sectionData.get("height", None),
"caption": section_data.get("caption", "") "caption": sectionData.get("caption", "")
} }
def _validate_image_data(self, base64_data: str, alt_text: str) -> bool: def _validateImageData(self, base64Data: str, altText: str) -> bool:
"""Validate image data.""" """Validate image data."""
if not base64_data: if not base64Data:
self.logger.warning("Image section has no base64 data") self.logger.warning("Image section has no base64 data")
return False return False
if not alt_text: if not altText:
self.logger.warning("Image section has no alt text") self.logger.warning("Image section has no alt text")
return False return False
# Basic base64 validation # Basic base64 validation
try: try:
base64.b64decode(base64_data, validate=True) base64.b64decode(base64Data, validate=True)
return True return True
except Exception as e: except Exception as e:
self.logger.warning(f"Invalid base64 image data: {str(e)}") self.logger.warning(f"Invalid base64 image data: {str(e)}")
return False return False
def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]: def _getImageDimensions(self, base64Data: str) -> Tuple[int, int]:
""" """
Get image dimensions from base64 data. Get image dimensions from base64 data.
This is a helper method that format-specific renderers can use. This is a helper method that format-specific renderers can use.
""" """
try: try:
# Decode base64 data # Decode base64 data
image_data = base64.b64decode(base64_data) imageData = base64.b64decode(base64Data)
image = Image.open(io.BytesIO(image_data)) image = Image.open(io.BytesIO(imageData))
return image.size # Returns (width, height) return image.size # Returns (width, height)
@ -216,89 +244,89 @@ class BaseRenderer(ABC):
self.logger.warning(f"Could not determine image dimensions: {str(e)}") self.logger.warning(f"Could not determine image dimensions: {str(e)}")
return (0, 0) return (0, 0)
def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str: def _resizeImageIfNeeded(self, base64Data: str, maxWidth: int = 800, maxHeight: int = 600) -> str:
""" """
Resize image if it exceeds maximum dimensions. Resize image if it exceeds maximum dimensions.
Returns the resized image as base64 string. Returns the resized image as base64 string.
""" """
try: try:
# Decode base64 data # Decode base64 data
image_data = base64.b64decode(base64_data) imageData = base64.b64decode(base64Data)
image = Image.open(io.BytesIO(image_data)) image = Image.open(io.BytesIO(imageData))
# Check if resizing is needed # Check if resizing is needed
width, height = image.size width, height = image.size
if width <= max_width and height <= max_height: if width <= maxWidth and height <= maxHeight:
return base64_data # No resizing needed return base64Data # No resizing needed
# Calculate new dimensions maintaining aspect ratio # Calculate new dimensions maintaining aspect ratio
ratio = min(max_width / width, max_height / height) ratio = min(maxWidth / width, maxHeight / height)
new_width = int(width * ratio) newWidth = int(width * ratio)
new_height = int(height * ratio) newHeight = int(height * ratio)
# Resize image # Resize image
resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) resizedImage = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS)
# Convert back to base64 # Convert back to base64
buffer = io.BytesIO() buffer = io.BytesIO()
resized_image.save(buffer, format=image.format or 'PNG') resizedImage.save(buffer, format=image.format or 'PNG')
resized_data = buffer.getvalue() resizedData = buffer.getvalue()
return base64.b64encode(resized_data).decode('utf-8') return base64.b64encode(resizedData).decode('utf-8')
except Exception as e: except Exception as e:
self.logger.warning(f"Could not resize image: {str(e)}") self.logger.warning(f"Could not resize image: {str(e)}")
return base64_data # Return original if resize fails return base64Data # Return original if resize fails
def _get_supported_section_types(self) -> List[str]: def _getSupportedSectionTypes(self) -> List[str]:
"""Return list of supported section types.""" """Return list of supported section types (from unified schema)."""
return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"] return supportedSectionTypes
def _is_valid_section_type(self, section_type: str) -> bool: def _isValidSectionType(self, sectionType: str) -> bool:
"""Check if a section type is valid.""" """Check if a section type is valid."""
return section_type in self._get_supported_section_types() return sectionType in self._getSupportedSectionTypes()
def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]: def _processSectionByType(self, section: Dict[str, Any]) -> Dict[str, Any]:
"""Process a section and return structured data based on its type.""" """Process a section and return structured data based on its type."""
section_type = self._get_section_type(section) sectionType = self._getSectionType(section)
section_data = self._get_section_data(section) sectionData = self._getSectionData(section)
if section_type == "table": if sectionType == "table":
headers, rows = self._extract_table_data(section_data) headers, rows = self._extractTableData(sectionData)
return {"content_type": "table", "headers": headers, "rows": rows} return {"content_type": "table", "headers": headers, "rows": rows}
elif section_type == "bullet_list": elif sectionType == "bullet_list":
items = self._extract_bullet_list_items(section_data) items = self._extractBulletListItems(sectionData)
return {"content_type": "bullet_list", "items": items} return {"content_type": "bullet_list", "items": items}
elif section_type == "heading": elif sectionType == "heading":
level, text = self._extract_heading_data(section_data) level, text = self._extractHeadingData(sectionData)
return {"content_type": "heading", "level": level, "text": text} return {"content_type": "heading", "level": level, "text": text}
elif section_type == "paragraph": elif sectionType == "paragraph":
text = self._extract_paragraph_text(section_data) text = self._extractParagraphText(sectionData)
return {"content_type": "paragraph", "text": text} return {"content_type": "paragraph", "text": text}
elif section_type == "code_block": elif sectionType == "code_block":
code, language = self._extract_code_block_data(section_data) code, language = self._extractCodeBlockData(sectionData)
return {"content_type": "code_block", "code": code, "language": language} return {"content_type": "code_block", "code": code, "language": language}
elif section_type == "image": elif sectionType == "image":
base64_data, alt_text = self._extract_image_data(section_data) base64Data, altText = self._extractImageData(sectionData)
# Validate image data # Validate image data
if self._validate_image_data(base64_data, alt_text): if self._validateImageData(base64Data, altText):
return { return {
"content_type": "image", "content_type": "image",
"base64Data": base64_data, "base64Data": base64Data,
"altText": alt_text, "altText": altText,
"width": section_data.get("width"), "width": sectionData.get("width") if isinstance(sectionData, dict) else None,
"height": section_data.get("height"), "height": sectionData.get("height") if isinstance(sectionData, dict) else None,
"caption": section_data.get("caption", "") "caption": sectionData.get("caption", "") if isinstance(sectionData, dict) else ""
} }
else: else:
# Return placeholder if image data is invalid # Return placeholder if image data is invalid
return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"} return {"content_type": "paragraph", "text": f"[Image: {altText}]"}
else: else:
# Fallback to paragraph # Fallback to paragraph
text = self._extract_paragraph_text(section_data) text = self._extractParagraphText(sectionData)
return {"content_type": "paragraph", "text": text} return {"content_type": "paragraph", "text": text}
def _format_timestamp(self, timestamp: str = None) -> str: def _formatTimestamp(self, timestamp: str = None) -> str:
"""Format timestamp for display.""" """Format timestamp for display."""
if timestamp: if timestamp:
return timestamp return timestamp
@ -306,38 +334,38 @@ class BaseRenderer(ABC):
# ===== GENERIC AI STYLING HELPERS ===== # ===== GENERIC AI STYLING HELPERS =====
async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: async def _getAiStyles(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
""" """
Generic AI styling method that can be used by all renderers. Generic AI styling method that can be used by all renderers.
Args: Args:
ai_service: AI service instance aiService: AI service instance
style_template: Format-specific style template styleTemplate: Format-specific style template
default_styles: Default styles to fall back to defaultStyles: Default styles to fall back to
Returns: Returns:
Dict with styling definitions Dict with styling definitions
""" """
# DEBUG: Show which renderer is calling this method # DEBUG: Show which renderer is calling this method
if not ai_service: if not aiService:
return default_styles return defaultStyles
try: try:
request_options = AiCallOptions() requestOptions = AiCallOptions()
request_options.operationType = OperationTypeEnum.DATA_GENERATE requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
request = AiCallRequest(prompt=style_template, context="", options=request_options) request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
# DEBUG: Show the actual prompt being sent to AI # DEBUG: Show the actual prompt being sent to AI
self.logger.debug(f"AI Style Template Prompt:") self.logger.debug(f"AI Style Template Prompt:")
self.logger.debug(f"{style_template}") self.logger.debug(f"{styleTemplate}")
response = await ai_service.aiObjects.call(request) response = await aiService.aiObjects.call(request)
# Save styling prompt and response to debug # Save styling prompt and response to debug
self.services.utils.writeDebugFile(style_template, "renderer_styling_prompt") self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response") self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
# Clean and parse JSON # Clean and parse JSON
@ -346,12 +374,12 @@ class BaseRenderer(ABC):
# Check if result is empty # Check if result is empty
if not result: if not result:
self.logger.warning("AI styling returned empty response, using defaults") self.logger.warning("AI styling returned empty response, using defaults")
return default_styles return defaultStyles
# Extract JSON from markdown if present # Extract JSON from markdown if present
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if json_match: if jsonMatch:
result = json_match.group(1).strip() result = jsonMatch.group(1).strip()
elif result.startswith('```json'): elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result) result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result) result = re.sub(r'\s*```$', '', result)
@ -362,8 +390,8 @@ class BaseRenderer(ABC):
# Try to parse JSON # Try to parse JSON
try: try:
styles = json.loads(result) styles = json.loads(result)
except json.JSONDecodeError as json_error: except json.JSONDecodeError as jsonError:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}") self.logger.warning(f"AI styling returned invalid JSON: {jsonError}")
# Use print instead of logger to avoid truncation # Use print instead of logger to avoid truncation
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER") self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER")
@ -372,88 +400,88 @@ class BaseRenderer(ABC):
self.logger.warning(f"Raw content that failed to parse: {result}") self.logger.warning(f"Raw content that failed to parse: {result}")
# Try to fix incomplete JSON by adding missing closing braces # Try to fix incomplete JSON by adding missing closing braces
open_braces = result.count('{') openBraces = result.count('{')
close_braces = result.count('}') closeBraces = result.count('}')
if open_braces > close_braces: if openBraces > closeBraces:
# JSON is incomplete, add missing closing braces # JSON is incomplete, add missing closing braces
missing_braces = open_braces - close_braces missingBraces = openBraces - closeBraces
result = result + '}' * missing_braces result = result + '}' * missingBraces
self.logger.info(f"Added {missing_braces} missing closing brace(s)") self.logger.info(f"Added {missingBraces} missing closing brace(s)")
self.logger.debug(f"Fixed JSON: {result}") self.logger.debug(f"Fixed JSON: {result}")
# Try parsing the fixed JSON # Try parsing the fixed JSON
try: try:
styles = json.loads(result) styles = json.loads(result)
self.logger.info("Successfully fixed incomplete JSON") self.logger.info("Successfully fixed incomplete JSON")
except json.JSONDecodeError as fix_error: except json.JSONDecodeError as fixError:
self.logger.warning(f"Fixed JSON still invalid: {fix_error}") self.logger.warning(f"Fixed JSON still invalid: {fixError}")
self.logger.warning(f"Fixed JSON content: {result}") self.logger.warning(f"Fixed JSON content: {result}")
# Try to extract just the JSON part if it's embedded in text # Try to extract just the JSON part if it's embedded in text
json_start = result.find('{') jsonStart = result.find('{')
json_end = result.rfind('}') jsonEnd = result.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start: if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
json_part = result[json_start:json_end+1] jsonPart = result[jsonStart:jsonEnd+1]
try: try:
styles = json.loads(json_part) styles = json.loads(jsonPart)
self.logger.info("Successfully extracted JSON from explanatory text") self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError: except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults") self.logger.warning("Could not extract valid JSON from response, using defaults")
return default_styles return defaultStyles
else: else:
return default_styles return defaultStyles
else: else:
# Try to extract just the JSON part if it's embedded in text # Try to extract just the JSON part if it's embedded in text
json_start = result.find('{') jsonStart = result.find('{')
json_end = result.rfind('}') jsonEnd = result.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start: if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
json_part = result[json_start:json_end+1] jsonPart = result[jsonStart:jsonEnd+1]
try: try:
styles = json.loads(json_part) styles = json.loads(jsonPart)
self.logger.info("Successfully extracted JSON from explanatory text") self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError: except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults") self.logger.warning("Could not extract valid JSON from response, using defaults")
return default_styles return defaultStyles
else: else:
return default_styles return defaultStyles
# Convert colors to appropriate format # Convert colors to appropriate format
styles = self._convert_colors_format(styles) styles = self._convertColorsFormat(styles)
return styles return styles
except Exception as e: except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults") self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles return defaultStyles
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
""" """
Convert colors to appropriate format based on renderer type. Convert colors to appropriate format based on renderer type.
Override this method in subclasses for format-specific color handling. Override this method in subclasses for format-specific color handling.
""" """
return styles return styles
def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str: def _createAiStyleTemplate(self, formatName: str, userPrompt: str, styleSchema: Dict[str, Any]) -> str:
""" """
Create a standardized AI style template for any format. Create a standardized AI style template for any format.
Args: Args:
format_name: Name of the format (e.g., "docx", "xlsx", "pptx") formatName: Name of the format (e.g., "docx", "xlsx", "pptx")
user_prompt: User's original prompt userPrompt: User's original prompt
style_schema: Format-specific style schema styleSchema: Format-specific style schema
Returns: Returns:
Formatted prompt string Formatted prompt string
""" """
schema_json = json.dumps(style_schema, indent=4) schemaJson = json.dumps(styleSchema, indent=4)
# DEBUG: Show the schema being sent # DEBUG: Show the schema being sent
return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents. return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {formatName.upper()} documents.
Use this schema as a template and customize the values for professional document styling: Use this schema as a template and customize the values for professional document styling:
{schema_json} {schemaJson}
Requirements: Requirements:
- Return ONLY the complete JSON object (no markdown, no explanations) - Return ONLY the complete JSON object (no markdown, no explanations)

View file

@ -9,163 +9,163 @@ class RendererCsv(BaseRenderer):
"""Renders content to CSV format with format-specific extraction.""" """Renders content to CSV format with format-specific extraction."""
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
"""Return supported CSV formats.""" """Return supported CSV formats."""
return ['csv'] return ['csv']
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
"""Return format aliases.""" """Return format aliases."""
return ['spreadsheet', 'table'] return ['spreadsheet', 'table']
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
"""Return priority for CSV renderer.""" """Return priority for CSV renderer."""
return 70 return 70
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to CSV format.""" """Render extracted JSON content to CSV format."""
try: try:
# Generate CSV directly from JSON (no styling needed for CSV) # Generate CSV directly from JSON (no styling needed for CSV)
csv_content = await self._generate_csv_from_json(extracted_content, title) csvContent = await self._generateCsvFromJson(extractedContent, title)
return csv_content, "text/csv" return csvContent, "text/csv"
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering CSV: {str(e)}") self.logger.error(f"Error rendering CSV: {str(e)}")
# Return minimal CSV fallback # Return minimal CSV fallback
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv" return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str: async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate CSV content from structured JSON document.""" """Generate CSV content from structured JSON document."""
try: try:
# Validate JSON structure # Validate JSON structure
if not isinstance(json_content, dict): if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary") raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content: if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field") raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title # Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title) documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Generate CSV content # Generate CSV content
csv_rows = [] csvRows = []
# Add title row # Add title row
if document_title: if documentTitle:
csv_rows.append([document_title]) csvRows.append([documentTitle])
csv_rows.append([]) # Empty row csvRows.append([]) # Empty row
# Process each section in order # Process each section in order
sections = json_content.get("sections", []) sections = jsonContent.get("sections", [])
for section in sections: for section in sections:
section_csv = self._render_json_section_to_csv(section) sectionCsv = self._renderJsonSectionToCsv(section)
if section_csv: if sectionCsv:
csv_rows.extend(section_csv) csvRows.extend(sectionCsv)
csv_rows.append([]) # Empty row between sections csvRows.append([]) # Empty row between sections
# Convert to CSV string # Convert to CSV string
csv_content = self._convert_rows_to_csv(csv_rows) csvContent = self._convertRowsToCsv(csvRows)
return csv_content return csvContent
except Exception as e: except Exception as e:
self.logger.error(f"Error generating CSV from JSON: {str(e)}") self.logger.error(f"Error generating CSV from JSON: {str(e)}")
raise Exception(f"CSV generation failed: {str(e)}") raise Exception(f"CSV generation failed: {str(e)}")
def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]: def _renderJsonSectionToCsv(self, section: Dict[str, Any]) -> List[List[str]]:
"""Render a single JSON section to CSV rows.""" """Render a single JSON section to CSV rows."""
try: try:
section_type = section.get("content_type", "paragraph") sectionType = section.get("content_type", "paragraph")
elements = section.get("elements", []) elements = section.get("elements", [])
csv_rows = [] csvRows = []
# Add section title if available # Add section title if available
section_title = section.get("title") sectionTitle = section.get("title")
if section_title: if sectionTitle:
csv_rows.append([f"# {section_title}"]) csvRows.append([f"# {sectionTitle}"])
# Process each element in the section # Process each element in the section
for element in elements: for element in elements:
if section_type == "table": if sectionType == "table":
csv_rows.extend(self._render_json_table_to_csv(element)) csvRows.extend(self._renderJsonTableToCsv(element))
elif section_type == "list": elif sectionType == "list":
csv_rows.extend(self._render_json_list_to_csv(element)) csvRows.extend(self._renderJsonListToCsv(element))
elif section_type == "heading": elif sectionType == "heading":
csv_rows.extend(self._render_json_heading_to_csv(element)) csvRows.extend(self._renderJsonHeadingToCsv(element))
elif section_type == "paragraph": elif sectionType == "paragraph":
csv_rows.extend(self._render_json_paragraph_to_csv(element)) csvRows.extend(self._renderJsonParagraphToCsv(element))
elif section_type == "code": elif sectionType == "code":
csv_rows.extend(self._render_json_code_to_csv(element)) csvRows.extend(self._renderJsonCodeToCsv(element))
else: else:
# Fallback to paragraph for unknown types # Fallback to paragraph for unknown types
csv_rows.extend(self._render_json_paragraph_to_csv(element)) csvRows.extend(self._renderJsonParagraphToCsv(element))
return csv_rows return csvRows
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}") self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
return [["[Error rendering section]"]] return [["[Error rendering section]"]]
def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]: def _renderJsonTableToCsv(self, tableData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON table to CSV rows.""" """Render a JSON table to CSV rows."""
try: try:
headers = table_data.get("headers", []) headers = tableData.get("headers", [])
rows = table_data.get("rows", []) rows = tableData.get("rows", [])
csv_rows = [] csvRows = []
if headers: if headers:
csv_rows.append(headers) csvRows.append(headers)
if rows: if rows:
csv_rows.extend(rows) csvRows.extend(rows)
return csv_rows return csvRows
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}") self.logger.warning(f"Error rendering table: {str(e)}")
return [["[Error rendering table]"]] return [["[Error rendering table]"]]
def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]: def _renderJsonListToCsv(self, listData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON list to CSV rows.""" """Render a JSON list to CSV rows."""
try: try:
items = list_data.get("items", []) items = listData.get("items", [])
csv_rows = [] csvRows = []
for item in items: for item in items:
if isinstance(item, dict): if isinstance(item, dict):
text = item.get("text", "") text = item.get("text", "")
subitems = item.get("subitems", []) subitems = item.get("subitems", [])
csv_rows.append([text]) csvRows.append([text])
# Add subitems as indented rows # Add subitems as indented rows
for subitem in subitems: for subitem in subitems:
if isinstance(subitem, dict): if isinstance(subitem, dict):
csv_rows.append([f" - {subitem.get('text', '')}"]) csvRows.append([f" - {subitem.get('text', '')}"])
else: else:
csv_rows.append([f" - {subitem}"]) csvRows.append([f" - {subitem}"])
else: else:
csv_rows.append([str(item)]) csvRows.append([str(item)])
return csv_rows return csvRows
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering list: {str(e)}") self.logger.warning(f"Error rendering list: {str(e)}")
return [["[Error rendering list]"]] return [["[Error rendering list]"]]
def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]: def _renderJsonHeadingToCsv(self, headingData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON heading to CSV rows.""" """Render a JSON heading to CSV rows."""
try: try:
text = heading_data.get("text", "") text = headingData.get("text", "")
level = heading_data.get("level", 1) level = headingData.get("level", 1)
if text: if text:
# Use # symbols for heading levels # Use # symbols for heading levels
heading_text = f"{'#' * level} {text}" headingText = f"{'#' * level} {text}"
return [[heading_text]] return [[headingText]]
return [] return []
@ -173,30 +173,30 @@ class RendererCsv(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}") self.logger.warning(f"Error rendering heading: {str(e)}")
return [["[Error rendering heading]"]] return [["[Error rendering heading]"]]
def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]: def _renderJsonParagraphToCsv(self, paragraphData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON paragraph to CSV rows.""" """Render a JSON paragraph to CSV rows."""
try: try:
text = paragraph_data.get("text", "") text = paragraphData.get("text", "")
if text: if text:
# Split long paragraphs into multiple rows if needed # Split long paragraphs into multiple rows if needed
if len(text) > 100: if len(text) > 100:
words = text.split() words = text.split()
rows = [] rows = []
current_row = [] currentRow = []
current_length = 0 currentLength = 0
for word in words: for word in words:
if current_length + len(word) > 100 and current_row: if currentLength + len(word) > 100 and currentRow:
rows.append([" ".join(current_row)]) rows.append([" ".join(currentRow)])
current_row = [word] currentRow = [word]
current_length = len(word) currentLength = len(word)
else: else:
current_row.append(word) currentRow.append(word)
current_length += len(word) + 1 currentLength += len(word) + 1
if current_row: if currentRow:
rows.append([" ".join(current_row)]) rows.append([" ".join(currentRow)])
return rows return rows
else: else:
@ -208,30 +208,30 @@ class RendererCsv(BaseRenderer):
self.logger.warning(f"Error rendering paragraph: {str(e)}") self.logger.warning(f"Error rendering paragraph: {str(e)}")
return [["[Error rendering paragraph]"]] return [["[Error rendering paragraph]"]]
def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]: def _renderJsonCodeToCsv(self, codeData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON code block to CSV rows.""" """Render a JSON code block to CSV rows."""
try: try:
code = code_data.get("code", "") code = codeData.get("code", "")
language = code_data.get("language", "") language = codeData.get("language", "")
csv_rows = [] csvRows = []
if language: if language:
csv_rows.append([f"Code ({language}):"]) csvRows.append([f"Code ({language}):"])
if code: if code:
# Split code into lines # Split code into lines
code_lines = code.split('\n') codeLines = code.split('\n')
for line in code_lines: for line in codeLines:
csv_rows.append([f" {line}"]) csvRows.append([f" {line}"])
return csv_rows return csvRows
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}") self.logger.warning(f"Error rendering code block: {str(e)}")
return [["[Error rendering code block]"]] return [["[Error rendering code block]"]]
def _convert_rows_to_csv(self, rows: List[List[str]]) -> str: def _convertRowsToCsv(self, rows: List[List[str]]) -> str:
"""Convert rows to CSV string.""" """Convert rows to CSV string."""
import csv import csv
import io import io
@ -245,7 +245,7 @@ class RendererCsv(BaseRenderer):
return output.getvalue() return output.getvalue()
def _clean_csv_content(self, content: str, title: str) -> str: def _cleanCsvContent(self, content: str, title: str) -> str:
"""Clean and validate CSV content from AI.""" """Clean and validate CSV content from AI."""
content = content.strip() content = content.strip()

View file

@ -21,33 +21,33 @@ class RendererDocx(BaseRenderer):
"""Renders content to DOCX format using python-docx.""" """Renders content to DOCX format using python-docx."""
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
"""Return supported DOCX formats.""" """Return supported DOCX formats."""
return ['docx', 'doc'] return ['docx', 'doc']
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
"""Return format aliases.""" """Return format aliases."""
return ['word', 'document'] return ['word', 'document']
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
"""Return priority for DOCX renderer.""" """Return priority for DOCX renderer."""
return 115 return 115
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling.""" """Render extracted JSON content to DOCX format using AI-analyzed styling."""
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...", "DOCX_RENDERER") self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
try: try:
if not DOCX_AVAILABLE: if not DOCX_AVAILABLE:
# Fallback to HTML if python-docx not available # Fallback to HTML if python-docx not available
from .rendererHtml import RendererHtml from .rendererHtml import RendererHtml
html_renderer = RendererHtml() htmlRenderer = RendererHtml()
html_content, _ = await html_renderer.render(extracted_content, title) htmlContent, _ = await htmlRenderer.render(extractedContent, title)
return html_content, "text/html" return htmlContent, "text/html"
# Generate DOCX using AI-analyzed styling # Generate DOCX using AI-analyzed styling
docx_content = await self._generate_docx_from_json(extracted_content, title, user_prompt, ai_service) docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document" return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
@ -56,18 +56,18 @@ class RendererDocx(BaseRenderer):
# Return minimal fallback # Return minimal fallback
return f"DOCX Generation Error: {str(e)}", "text/plain" return f"DOCX Generation Error: {str(e)}", "text/plain"
async def _generate_docx_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate DOCX content from structured JSON document using AI-generated styling.""" """Generate DOCX content from structured JSON document using AI-generated styling."""
try: try:
# Create new document # Create new document
doc = Document() doc = Document()
# Get AI-generated styling definitions # Get AI-generated styling definitions
self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...") self.logger.info(f"About to call AI styling with user_prompt: {userPrompt[:100] if userPrompt else 'None'}...")
styles = await self._get_docx_styles(user_prompt, ai_service) styles = await self._getDocxStyles(userPrompt, aiService)
# Apply basic document setup # Apply basic document setup
self._setup_basic_document_styles(doc) self._setupBasicDocumentStyles(doc)
# Validate JSON structure # Validate JSON structure
if not isinstance(json_content, dict): if not isinstance(json_content, dict):
@ -104,7 +104,7 @@ class RendererDocx(BaseRenderer):
self.logger.error(f"Error generating DOCX from JSON: {str(e)}") self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
raise Exception(f"DOCX generation failed: {str(e)}") raise Exception(f"DOCX generation failed: {str(e)}")
async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: async def _getDocxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get DOCX styling definitions using base template AI styling.""" """Get DOCX styling definitions using base template AI styling."""
style_schema = { style_schema = {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"}, "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
@ -118,13 +118,13 @@ class RendererDocx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
} }
style_template = self._create_ai_style_template("docx", user_prompt, style_schema) style_template = self._createAiStyleTemplate("docx", userPrompt, style_schema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles()) styles = await self._getAiStyles(aiService, style_template, self._getDefaultStyles())
# Validate and fix contrast issues # Validate and fix contrast issues
return self._validate_styles_contrast(styles) return self._validateStylesContrast(styles)
def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles.""" """Validate and fix contrast issues in AI-generated styles."""
try: try:
# Fix table header contrast # Fix table header contrast
@ -159,9 +159,9 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}") self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_styles() return self._getDefaultStyles()
def _get_default_styles(self) -> Dict[str, Any]: def _getDefaultStyles(self) -> Dict[str, Any]:
"""Default DOCX styles.""" """Default DOCX styles."""
return { return {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"}, "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
@ -175,7 +175,7 @@ class RendererDocx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"} "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
} }
def _setup_basic_document_styles(self, doc: Document) -> None: def _setupBasicDocumentStyles(self, doc: Document) -> None:
"""Set up basic document styles.""" """Set up basic document styles."""
try: try:
# Set default font # Set default font
@ -189,7 +189,7 @@ class RendererDocx(BaseRenderer):
def _clear_template_content(self, doc: Document) -> None: def _clearTemplateContent(self, doc: Document) -> None:
"""Clear template content while preserving styles.""" """Clear template content while preserving styles."""
try: try:
# Remove all paragraphs except keep the styles # Remove all paragraphs except keep the styles
@ -204,7 +204,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not clear template content: {str(e)}") self.logger.warning(f"Could not clear template content: {str(e)}")
def _render_json_section(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a single JSON section to DOCX using AI-generated styles.""" """Render a single JSON section to DOCX using AI-generated styles."""
try: try:
section_type = section.get("content_type", "paragraph") section_type = section.get("content_type", "paragraph")
@ -213,27 +213,27 @@ class RendererDocx(BaseRenderer):
# Process each element in the section # Process each element in the section
for element in elements: for element in elements:
if section_type == "table": if section_type == "table":
self._render_json_table(doc, element, styles) self._renderJsonTable(doc, element, styles)
elif section_type == "bullet_list": elif section_type == "bullet_list":
self._render_json_bullet_list(doc, element, styles) self._renderJsonBulletList(doc, element, styles)
elif section_type == "heading": elif section_type == "heading":
self._render_json_heading(doc, element, styles) self._renderJsonHeading(doc, element, styles)
elif section_type == "paragraph": elif section_type == "paragraph":
self._render_json_paragraph(doc, element, styles) self._renderJsonParagraph(doc, element, styles)
elif section_type == "code_block": elif section_type == "code_block":
self._render_json_code_block(doc, element, styles) self._renderJsonCodeBlock(doc, element, styles)
elif section_type == "image": elif section_type == "image":
self._render_json_image(doc, element, styles) self._renderJsonImage(doc, element, styles)
else: else:
# Fallback to paragraph for unknown types # Fallback to paragraph for unknown types
self._render_json_paragraph(doc, element, styles) self._renderJsonParagraph(doc, element, styles)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}") self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
# Add error paragraph as fallback # Add error paragraph as fallback
error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]") error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
def _render_json_table(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON table to DOCX using AI-generated styles.""" """Render a JSON table to DOCX using AI-generated styles."""
try: try:
headers = table_data.get("headers", []) headers = table_data.get("headers", [])
@ -249,7 +249,7 @@ class RendererDocx(BaseRenderer):
# Apply table borders based on AI style # Apply table borders based on AI style
border_style = styles["table_border"]["style"] border_style = styles["table_border"]["style"]
if border_style == "horizontal_only": if border_style == "horizontal_only":
self._apply_horizontal_borders_only(table) self._applyHorizontalBordersOnly(table)
elif border_style == "grid": elif border_style == "grid":
table.style = 'Table Grid' table.style = 'Table Grid'
# else: no borders # else: no borders
@ -264,7 +264,7 @@ class RendererDocx(BaseRenderer):
# Apply background color # Apply background color
bg_color = header_style["background"].lstrip('#') bg_color = header_style["background"].lstrip('#')
self._set_cell_background(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16))) self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
# Apply text styling # Apply text styling
for paragraph in cell.paragraphs: for paragraph in cell.paragraphs:
@ -296,7 +296,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}") self.logger.warning(f"Error rendering table: {str(e)}")
def _apply_horizontal_borders_only(self, table) -> None: def _applyHorizontalBordersOnly(self, table) -> None:
"""Apply only horizontal borders to the table (no vertical borders).""" """Apply only horizontal borders to the table (no vertical borders)."""
try: try:
from docx.oxml.shared import OxmlElement, qn from docx.oxml.shared import OxmlElement, qn
@ -359,7 +359,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not apply horizontal borders: {str(e)}") self.logger.warning(f"Could not apply horizontal borders: {str(e)}")
def _set_cell_background(self, cell, color: RGBColor) -> None: def _setCellBackground(self, cell, color: RGBColor) -> None:
"""Set the background color of a table cell.""" """Set the background color of a table cell."""
try: try:
from docx.oxml.shared import OxmlElement, qn from docx.oxml.shared import OxmlElement, qn
@ -389,7 +389,7 @@ class RendererDocx(BaseRenderer):
self.logger.warning(f"Could not set cell background: {str(e)}") self.logger.warning(f"Could not set cell background: {str(e)}")
def _render_json_bullet_list(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON bullet list to DOCX using AI-generated styles.""" """Render a JSON bullet list to DOCX using AI-generated styles."""
try: try:
items = list_data.get("items", []) items = list_data.get("items", [])
@ -404,7 +404,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}") self.logger.warning(f"Error rendering bullet list: {str(e)}")
def _render_json_heading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON heading to DOCX using AI-generated styles.""" """Render a JSON heading to DOCX using AI-generated styles."""
try: try:
level = heading_data.get("level", 1) level = heading_data.get("level", 1)
@ -417,7 +417,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}") self.logger.warning(f"Error rendering heading: {str(e)}")
def _render_json_paragraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON paragraph to DOCX using AI-generated styles.""" """Render a JSON paragraph to DOCX using AI-generated styles."""
try: try:
text = paragraph_data.get("text", "") text = paragraph_data.get("text", "")
@ -428,7 +428,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}") self.logger.warning(f"Error rendering paragraph: {str(e)}")
def _render_json_code_block(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON code block to DOCX using AI-generated styles.""" """Render a JSON code block to DOCX using AI-generated styles."""
try: try:
code = code_data.get("code", "") code = code_data.get("code", "")
@ -447,7 +447,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}") self.logger.warning(f"Error rendering code block: {str(e)}")
def _render_json_image(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None: def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON image to DOCX.""" """Render a JSON image to DOCX."""
try: try:
base64_data = image_data.get("base64Data", "") base64_data = image_data.get("base64Data", "")
@ -465,7 +465,7 @@ class RendererDocx(BaseRenderer):
self.logger.warning(f"Error rendering image: {str(e)}") self.logger.warning(f"Error rendering image: {str(e)}")
doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]") doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]")
def _extract_structure_from_prompt(self, user_prompt: str, title: str) -> Dict[str, Any]: def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]:
"""Extract document structure from user prompt.""" """Extract document structure from user prompt."""
structure = { structure = {
'title': title, 'title': title,
@ -473,21 +473,21 @@ class RendererDocx(BaseRenderer):
'format': 'standard' 'format': 'standard'
} }
if not user_prompt: if not userPrompt:
return structure return structure
# Extract title from prompt if not provided # Extract title from prompt if not provided
if not title or title == "Generated Document": if not title or title == "Generated Document":
# Look for "create a ... document" or "generate a ... report" # Look for "create a ... document" or "generate a ... report"
import re import re
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', user_prompt.lower()) title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
if title_match: if title_match:
structure['title'] = title_match.group(1).strip().title() structure['title'] = title_match.group(1).strip().title()
# Extract sections from numbered lists in prompt # Extract sections from numbered lists in prompt
import re import re
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)' section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
sections = re.findall(section_pattern, user_prompt) sections = re.findall(section_pattern, userPrompt)
for num, section_text in sections: for num, section_text in sections:
structure['sections'].append({ structure['sections'].append({
@ -498,7 +498,7 @@ class RendererDocx(BaseRenderer):
# If no numbered sections found, try to extract from "including:" patterns # If no numbered sections found, try to extract from "including:" patterns
if not structure['sections']: if not structure['sections']:
including_match = re.search(r'including:\s*(.+?)(?:\.|$)', user_prompt, re.DOTALL) including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL)
if including_match: if including_match:
including_text = including_match.group(1) including_text = including_match.group(1)
# Split by common separators # Split by common separators
@ -516,7 +516,7 @@ class RendererDocx(BaseRenderer):
if not structure['sections']: if not structure['sections']:
# Look for bullet points or dashes # Look for bullet points or dashes
bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)' bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
bullets = re.findall(bullet_pattern, user_prompt) bullets = re.findall(bullet_pattern, userPrompt)
for i, bullet in enumerate(bullets, 1): for i, bullet in enumerate(bullets, 1):
bullet = bullet.strip() bullet = bullet.strip()
if bullet and len(bullet) > 3: if bullet and len(bullet) > 3:
@ -529,7 +529,7 @@ class RendererDocx(BaseRenderer):
# If still no sections, extract from sentence structure # If still no sections, extract from sentence structure
if not structure['sections']: if not structure['sections']:
# Split prompt into sentences and use as sections # Split prompt into sentences and use as sections
sentences = re.split(r'[.!?]\s+', user_prompt) sentences = re.split(r'[.!?]\s+', userPrompt)
for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections
sentence = sentence.strip() sentence = sentence.strip()
if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')): if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
@ -545,7 +545,7 @@ class RendererDocx(BaseRenderer):
action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate'] action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
found_actions = [] found_actions = []
for action in action_words: for action in action_words:
if action in user_prompt.lower(): if action in userPrompt.lower():
found_actions.append(action.title()) found_actions.append(action.title())
if found_actions: if found_actions:
@ -565,7 +565,7 @@ class RendererDocx(BaseRenderer):
return structure return structure
def _generate_from_structure(self, doc, content: str, structure: Dict[str, Any]): def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]):
"""Generate DOCX content based on extracted structure.""" """Generate DOCX content based on extracted structure."""
# Add sections based on prompt structure # Add sections based on prompt structure
for section in structure['sections']: for section in structure['sections']:
@ -574,7 +574,7 @@ class RendererDocx(BaseRenderer):
# Add AI-generated content for this section # Add AI-generated content for this section
# Try to extract relevant content for this section from the AI response # Try to extract relevant content for this section from the AI response
section_content = self._extract_section_content(content, section['title']) section_content = self._extractSectionContent(content, section['title'])
if section_content: if section_content:
doc.add_paragraph(section_content) doc.add_paragraph(section_content)
@ -590,7 +590,7 @@ class RendererDocx(BaseRenderer):
doc.add_heading("Complete Analysis", level=1) doc.add_heading("Complete Analysis", level=1)
doc.add_paragraph(content) doc.add_paragraph(content)
def _extract_section_content(self, content: str, section_title: str) -> str: def _extractSectionContent(self, content: str, section_title: str) -> str:
"""Extract relevant content for a specific section from AI response.""" """Extract relevant content for a specific section from AI response."""
if not content or not section_title: if not content or not section_title:
return "" return ""
@ -613,7 +613,7 @@ class RendererDocx(BaseRenderer):
return "" return ""
def _setup_document_styles(self, doc): def _setupDocumentStyles(self, doc):
"""Set up document styles.""" """Set up document styles."""
try: try:
# Set default font # Set default font
@ -632,7 +632,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not set up document styles: {str(e)}") self.logger.warning(f"Could not set up document styles: {str(e)}")
def _process_section(self, doc, lines: list): def _processSection(self, doc, lines: list):
"""Process a section of content into DOCX elements.""" """Process a section of content into DOCX elements."""
for line in lines: for line in lines:
if not line.strip(): if not line.strip():
@ -641,9 +641,9 @@ class RendererDocx(BaseRenderer):
# Check for tables (lines with |) # Check for tables (lines with |)
if '|' in line and not line.startswith('|'): if '|' in line and not line.startswith('|'):
# This might be part of a table, process as table # This might be part of a table, process as table
table_data = self._extract_table_data(lines) table_data = self._extractTableData(lines)
if table_data: if table_data:
self._add_table(doc, table_data) self._addTable(doc, table_data)
return return
# Check for lists # Check for lists
@ -657,7 +657,7 @@ class RendererDocx(BaseRenderer):
# Regular paragraph # Regular paragraph
doc.add_paragraph(line) doc.add_paragraph(line)
def _extract_table_data(self, lines: list) -> list: def _extractTableData(self, lines: list) -> list:
"""Extract table data from lines.""" """Extract table data from lines."""
table_data = [] table_data = []
in_table = False in_table = False
@ -676,7 +676,7 @@ class RendererDocx(BaseRenderer):
return table_data if len(table_data) > 1 else [] return table_data if len(table_data) > 1 else []
def _add_table(self, doc, table_data: list): def _addTable(self, doc, table_data: list):
"""Add a table to the document.""" """Add a table to the document."""
try: try:
if not table_data: if not table_data:
@ -693,12 +693,12 @@ class RendererDocx(BaseRenderer):
table.rows[row_idx].cells[col_idx].text = cell_data table.rows[row_idx].cells[col_idx].text = cell_data
# Style the table # Style the table
self._style_table(table) self._styleTable(table)
except Exception as e: except Exception as e:
self.logger.warning(f"Could not add table: {str(e)}") self.logger.warning(f"Could not add table: {str(e)}")
def _style_table(self, table): def _styleTable(self, table):
"""Apply styling to the table.""" """Apply styling to the table."""
try: try:
# Style header row # Style header row
@ -711,7 +711,7 @@ class RendererDocx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not style table: {str(e)}") self.logger.warning(f"Could not style table: {str(e)}")
def _process_table_row(self, doc, line: str): def _processTableRow(self, doc, line: str):
"""Process a table row and add it to the document.""" """Process a table row and add it to the document."""
if not line.strip(): if not line.strip():
return return
@ -745,7 +745,7 @@ class RendererDocx(BaseRenderer):
# Not a table row, treat as regular text # Not a table row, treat as regular text
doc.add_paragraph(line) doc.add_paragraph(line)
def _clean_ai_content(self, content: str) -> str: def _cleanAiContent(self, content: str) -> str:
"""Clean AI-generated content by removing debug information and duplicates.""" """Clean AI-generated content by removing debug information and duplicates."""
if not content: if not content:
return "" return ""
@ -781,7 +781,7 @@ class RendererDocx(BaseRenderer):
return '\n\n'.join(unique_sections) return '\n\n'.join(unique_sections)
def _process_tables(self, doc, content: str) -> str: def _processTables(self, doc, content: str) -> str:
""" """
Process tables in the content (both CSV and pipe-separated) and convert them to Word tables. Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
Returns the content with tables replaced by placeholders. Returns the content with tables replaced by placeholders.
@ -864,13 +864,13 @@ class RendererDocx(BaseRenderer):
return '\n'.join(processed_lines) return '\n'.join(processed_lines)
def _parse_and_format_content(self, doc, content: str, title: str): def _parseAndFormatContent(self, doc, content: str, title: str):
"""Parse AI-generated content in standardized format and apply proper DOCX formatting.""" """Parse AI-generated content in standardized format and apply proper DOCX formatting."""
if not content: if not content:
return return
# Process tables and replace them with placeholders # Process tables and replace them with placeholders
content = self._process_tables(doc, content) content = self._processTables(doc, content)
# Parse content line by line in exact sequence # Parse content line by line in exact sequence
lines = content.split('\n') lines = content.split('\n')
@ -920,9 +920,9 @@ class RendererDocx(BaseRenderer):
# Regular paragraph # Regular paragraph
else: else:
self._add_paragraph_to_doc(doc, line) self._addParagraphToDoc(doc, line)
def _add_paragraph_to_doc(self, doc, text: str): def _addParagraphToDoc(self, doc, text: str):
"""Add a paragraph to the document with proper formatting.""" """Add a paragraph to the document with proper formatting."""
if not text.strip(): if not text.strip():
return return

View file

@ -9,97 +9,97 @@ class RendererHtml(BaseRenderer):
"""Renders content to HTML format with format-specific extraction.""" """Renders content to HTML format with format-specific extraction."""
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
"""Return supported HTML formats.""" """Return supported HTML formats."""
return ['html', 'htm'] return ['html', 'htm']
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
"""Return format aliases.""" """Return format aliases."""
return ['web', 'webpage'] return ['web', 'webpage']
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
"""Return priority for HTML renderer.""" """Return priority for HTML renderer."""
return 100 return 100
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to HTML format using AI-analyzed styling.""" """Render extracted JSON content to HTML format using AI-analyzed styling."""
try: try:
# Generate HTML using AI-analyzed styling # Generate HTML using AI-analyzed styling
html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service) htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
return html_content, "text/html" return htmlContent, "text/html"
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering HTML: {str(e)}") self.logger.error(f"Error rendering HTML: {str(e)}")
# Return minimal HTML fallback # Return minimal HTML fallback
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html" return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate HTML content from structured JSON document using AI-generated styling.""" """Generate HTML content from structured JSON document using AI-generated styling."""
try: try:
# Get AI-generated styling definitions # Get AI-generated styling definitions
styles = await self._get_html_styles(user_prompt, ai_service) styles = await self._getHtmlStyles(userPrompt, aiService)
# Validate JSON structure # Validate JSON structure
if not isinstance(json_content, dict): if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary") raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content: if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field") raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title # Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title) documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Build HTML document # Build HTML document
html_parts = [] htmlParts = []
# HTML document structure # HTML document structure
html_parts.append('<!DOCTYPE html>') htmlParts.append('<!DOCTYPE html>')
html_parts.append('<html lang="en">') htmlParts.append('<html lang="en">')
html_parts.append('<head>') htmlParts.append('<head>')
html_parts.append('<meta charset="UTF-8">') htmlParts.append('<meta charset="UTF-8">')
html_parts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">') htmlParts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
html_parts.append(f'<title>{document_title}</title>') htmlParts.append(f'<title>{documentTitle}</title>')
html_parts.append('<style>') htmlParts.append('<style>')
html_parts.append(self._generate_css_styles(styles)) htmlParts.append(self._generateCssStyles(styles))
html_parts.append('</style>') htmlParts.append('</style>')
html_parts.append('</head>') htmlParts.append('</head>')
html_parts.append('<body>') htmlParts.append('<body>')
# Document header # Document header
html_parts.append(f'<header><h1 class="document-title">{document_title}</h1></header>') htmlParts.append(f'<header><h1 class="document-title">{documentTitle}</h1></header>')
# Main content # Main content
html_parts.append('<main>') htmlParts.append('<main>')
# Process each section # Process each section
sections = json_content.get("sections", []) sections = jsonContent.get("sections", [])
for section in sections: for section in sections:
section_html = self._render_json_section(section, styles) sectionHtml = self._renderJsonSection(section, styles)
if section_html: if sectionHtml:
html_parts.append(section_html) htmlParts.append(sectionHtml)
html_parts.append('</main>') htmlParts.append('</main>')
# Footer # Footer
html_parts.append('<footer>') htmlParts.append('<footer>')
html_parts.append(f'<p class="generated-info">Generated: {self._format_timestamp()}</p>') htmlParts.append(f'<p class="generated-info">Generated: {self._formatTimestamp()}</p>')
html_parts.append('</footer>') htmlParts.append('</footer>')
html_parts.append('</body>') htmlParts.append('</body>')
html_parts.append('</html>') htmlParts.append('</html>')
return '\n'.join(html_parts) return '\n'.join(htmlParts)
except Exception as e: except Exception as e:
self.logger.error(f"Error generating HTML from JSON: {str(e)}") self.logger.error(f"Error generating HTML from JSON: {str(e)}")
raise Exception(f"HTML generation failed: {str(e)}") raise Exception(f"HTML generation failed: {str(e)}")
async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: async def _getHtmlStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get HTML styling definitions using base template AI styling.""" """Get HTML styling definitions using base template AI styling."""
style_schema = { styleSchema = {
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"}, "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
"heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"}, "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
"heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"}, "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
@ -113,40 +113,40 @@ class RendererHtml(BaseRenderer):
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"} "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
} }
style_template = self._create_ai_style_template("html", user_prompt, style_schema) styleTemplate = self._createAiStyleTemplate("html", userPrompt, styleSchema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_html_styles()) styles = await self._getAiStyles(aiService, styleTemplate, self._getDefaultHtmlStyles())
# Validate and fix contrast issues # Validate and fix contrast issues
return self._validate_html_styles_contrast(styles) return self._validateHtmlStylesContrast(styles)
def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: def _validateHtmlStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles.""" """Validate and fix contrast issues in AI-generated styles."""
try: try:
# Fix table header contrast # Fix table header contrast
if "table_header" in styles: if "table_header" in styles:
header = styles["table_header"] header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF") bgColor = header.get("background", "#FFFFFF")
text_color = header.get("color", "#000000") textColor = header.get("color", "#000000")
# If both are white or both are dark, fix it # If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
header["background"] = "#4F4F4F" header["background"] = "#4F4F4F"
header["color"] = "#FFFFFF" header["color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
header["background"] = "#4F4F4F" header["background"] = "#4F4F4F"
header["color"] = "#FFFFFF" header["color"] = "#FFFFFF"
# Fix table cell contrast # Fix table cell contrast
if "table_cell" in styles: if "table_cell" in styles:
cell = styles["table_cell"] cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF") bgColor = cell.get("background", "#FFFFFF")
text_color = cell.get("color", "#000000") textColor = cell.get("color", "#000000")
# If both are white or both are dark, fix it # If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF" cell["background"] = "#FFFFFF"
cell["color"] = "#2F2F2F" cell["color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
cell["background"] = "#FFFFFF" cell["background"] = "#FFFFFF"
cell["color"] = "#2F2F2F" cell["color"] = "#2F2F2F"
@ -154,10 +154,10 @@ class RendererHtml(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}") self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_html_styles() return self._getDefaultHtmlStyles()
def _get_default_html_styles(self) -> Dict[str, Any]: def _getDefaultHtmlStyles(self) -> Dict[str, Any]:
"""Default HTML styles.""" """Default HTML styles."""
return { return {
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"}, "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
@ -173,7 +173,7 @@ class RendererHtml(BaseRenderer):
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"} "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
} }
def _generate_css_styles(self, styles: Dict[str, Any]) -> str: def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
"""Generate CSS from style definitions.""" """Generate CSS from style definitions."""
css_parts = [] css_parts = []
@ -271,109 +271,109 @@ class RendererHtml(BaseRenderer):
return '\n'.join(css_parts) return '\n'.join(css_parts)
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a single JSON section to HTML using AI-generated styles.""" """Render a single JSON section to HTML using AI-generated styles."""
try: try:
section_type = self._get_section_type(section) sectionType = self._getSectionType(section)
section_data = self._get_section_data(section) sectionData = self._getSectionData(section)
if section_type == "table": if sectionType == "table":
# Process the section data to extract table structure # Process the section data to extract table structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_table(processed_data, styles) return self._renderJsonTable(processedData, styles)
elif section_type == "bullet_list": elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure # Process the section data to extract bullet list structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_bullet_list(processed_data, styles) return self._renderJsonBulletList(processedData, styles)
elif section_type == "heading": elif sectionType == "heading":
return self._render_json_heading(section_data, styles) return self._renderJsonHeading(sectionData, styles)
elif section_type == "paragraph": elif sectionType == "paragraph":
return self._render_json_paragraph(section_data, styles) return self._renderJsonParagraph(sectionData, styles)
elif section_type == "code_block": elif sectionType == "code_block":
# Process the section data to extract code block structure # Process the section data to extract code block structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_code_block(processed_data, styles) return self._renderJsonCodeBlock(processedData, styles)
elif section_type == "image": elif sectionType == "image":
# Process the section data to extract image structure # Process the section data to extract image structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_image(processed_data, styles) return self._renderJsonImage(processedData, styles)
else: else:
# Fallback to paragraph for unknown types # Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data, styles) return self._renderJsonParagraph(sectionData, styles)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return f'<div class="error">[Error rendering section: {str(e)}]</div>' return f'<div class="error">[Error rendering section: {str(e)}]</div>'
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str: def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON table to HTML using AI-generated styles.""" """Render a JSON table to HTML using AI-generated styles."""
try: try:
headers = table_data.get("headers", []) headers = tableData.get("headers", [])
rows = table_data.get("rows", []) rows = tableData.get("rows", [])
if not headers or not rows: if not headers or not rows:
return "" return ""
html_parts = ['<table>'] htmlParts = ['<table>']
# Table header # Table header
html_parts.append('<thead><tr>') htmlParts.append('<thead><tr>')
for header in headers: for header in headers:
html_parts.append(f'<th>{header}</th>') htmlParts.append(f'<th>{header}</th>')
html_parts.append('</tr></thead>') htmlParts.append('</tr></thead>')
# Table body # Table body
html_parts.append('<tbody>') htmlParts.append('<tbody>')
for row in rows: for row in rows:
html_parts.append('<tr>') htmlParts.append('<tr>')
for cell_data in row: for cellData in row:
html_parts.append(f'<td>{cell_data}</td>') htmlParts.append(f'<td>{cellData}</td>')
html_parts.append('</tr>') htmlParts.append('</tr>')
html_parts.append('</tbody>') htmlParts.append('</tbody>')
html_parts.append('</table>') htmlParts.append('</table>')
return '\n'.join(html_parts) return '\n'.join(htmlParts)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}") self.logger.warning(f"Error rendering table: {str(e)}")
return "" return ""
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str: def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON bullet list to HTML using AI-generated styles.""" """Render a JSON bullet list to HTML using AI-generated styles."""
try: try:
items = list_data.get("items", []) items = listData.get("items", [])
if not items: if not items:
return "" return ""
html_parts = ['<ul>'] htmlParts = ['<ul>']
for item in items: for item in items:
if isinstance(item, str): if isinstance(item, str):
html_parts.append(f'<li>{item}</li>') htmlParts.append(f'<li>{item}</li>')
elif isinstance(item, dict) and "text" in item: elif isinstance(item, dict) and "text" in item:
html_parts.append(f'<li>{item["text"]}</li>') htmlParts.append(f'<li>{item["text"]}</li>')
html_parts.append('</ul>') htmlParts.append('</ul>')
return '\n'.join(html_parts) return '\n'.join(htmlParts)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}") self.logger.warning(f"Error rendering bullet list: {str(e)}")
return "" return ""
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str: def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON heading to HTML using AI-generated styles.""" """Render a JSON heading to HTML using AI-generated styles."""
try: try:
# Normalize non-dict inputs # Normalize non-dict inputs
if isinstance(heading_data, str): if isinstance(headingData, str):
heading_data = {"text": heading_data, "level": 2} headingData = {"text": headingData, "level": 2}
elif isinstance(heading_data, list): elif isinstance(headingData, list):
# Render a list as bullet list under a default heading label # Render a list as bullet list under a default heading label
return self._render_json_bullet_list({"items": heading_data}, styles) return self._renderJsonBulletList({"items": headingData}, styles)
elif not isinstance(heading_data, dict): elif not isinstance(headingData, dict):
return "" return ""
level = heading_data.get("level", 1) level = headingData.get("level", 1)
text = heading_data.get("text", "") text = headingData.get("text", "")
if text: if text:
level = max(1, min(6, level)) level = max(1, min(6, level))
@ -385,19 +385,19 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}") self.logger.warning(f"Error rendering heading: {str(e)}")
return "" return ""
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str: def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON paragraph to HTML using AI-generated styles.""" """Render a JSON paragraph to HTML using AI-generated styles."""
try: try:
# Normalize non-dict inputs # Normalize non-dict inputs
if isinstance(paragraph_data, str): if isinstance(paragraphData, str):
paragraph_data = {"text": paragraph_data} paragraphData = {"text": paragraphData}
elif isinstance(paragraph_data, list): elif isinstance(paragraphData, list):
# Treat list as bullet list paragraph # Treat list as bullet list paragraph
return self._render_json_bullet_list({"items": paragraph_data}, styles) return self._renderJsonBulletList({"items": paragraphData}, styles)
elif not isinstance(paragraph_data, dict): elif not isinstance(paragraphData, dict):
return "" return ""
text = paragraph_data.get("text", "") text = paragraphData.get("text", "")
if text: if text:
return f'<p>{text}</p>' return f'<p>{text}</p>'
@ -408,11 +408,11 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Error rendering paragraph: {str(e)}") self.logger.warning(f"Error rendering paragraph: {str(e)}")
return "" return ""
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str: def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON code block to HTML using AI-generated styles.""" """Render a JSON code block to HTML using AI-generated styles."""
try: try:
code = code_data.get("code", "") code = codeData.get("code", "")
language = code_data.get("language", "") language = codeData.get("language", "")
if code: if code:
if language: if language:
@ -426,17 +426,17 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}") self.logger.warning(f"Error rendering code block: {str(e)}")
return "" return ""
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str: def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON image to HTML.""" """Render a JSON image to HTML."""
try: try:
base64_data = image_data.get("base64Data", "") base64Data = imageData.get("base64Data", "")
alt_text = image_data.get("altText", "Image") altText = imageData.get("altText", "Image")
if base64_data: if base64Data:
return f'<img src="data:image/png;base64,{base64_data}" alt="{alt_text}">' return f'<img src="data:image/png;base64,{base64Data}" alt="{altText}">'
return "" return ""
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}") self.logger.warning(f"Error rendering image: {str(e)}")
return f'<div class="error">[Image: {image_data.get("altText", "Image")}]</div>' return f'<div class="error">[Image: {imageData.get("altText", "Image")}]</div>'

View file

@ -12,154 +12,156 @@ class RendererImage(BaseRenderer):
"""Renders content to image format using AI image generation.""" """Renders content to image format using AI image generation."""
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
"""Return supported image formats.""" """Return supported image formats."""
return ['png', 'jpg', 'jpeg', 'image'] return ['png', 'jpg', 'jpeg', 'image']
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
"""Return format aliases.""" """Return format aliases."""
return ['img', 'picture', 'photo', 'graphic'] return ['img', 'picture', 'photo', 'graphic']
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
"""Return priority for image renderer.""" """Return priority for image renderer."""
return 90 return 90
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to image format using AI image generation.""" """Render extracted JSON content to image format using AI image generation."""
try: try:
# Generate AI image from content # Generate AI image from content
image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service) imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
return image_content, "image/png" return imageContent, "image/png"
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering image: {str(e)}") self.logger.error(f"Error rendering image: {str(e)}")
# Re-raise the exception instead of using fallback # Re-raise the exception instead of using fallback
raise Exception(f"Image rendering failed: {str(e)}") raise Exception(f"Image rendering failed: {str(e)}")
async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate AI image from extracted content.""" """Generate AI image from extracted content."""
try: try:
if not ai_service: if not aiService:
raise ValueError("AI service is required for image generation") raise ValueError("AI service is required for image generation")
# Validate JSON structure # Validate JSON structure
if not isinstance(extracted_content, dict): if not isinstance(extractedContent, dict):
raise ValueError("Extracted content must be a dictionary") raise ValueError("Extracted content must be a dictionary")
if "sections" not in extracted_content: if "sections" not in extractedContent:
raise ValueError("Extracted content must contain 'sections' field") raise ValueError("Extracted content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title # Use title from JSON metadata if available, otherwise use provided title
document_title = extracted_content.get("metadata", {}).get("title", title) documentTitle = extractedContent.get("metadata", {}).get("title", title)
# Create AI prompt for image generation # Create AI prompt for image generation
image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service) imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)
# Save image generation prompt to debug # Save image generation prompt to debug
ai_service.services.utils.writeDebugFile(image_prompt, "image_generation_prompt") aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt")
# Generate image using AI # Generate image using AI
image_result = await ai_service.aiObjects.generateImage( imageResult = await aiService.aiObjects.generateImage(
prompt=image_prompt, prompt=imagePrompt,
size="1024x1024", size="1024x1024",
quality="standard", quality="standard",
style="vivid" style="vivid"
) )
# Save image generation response to debug # Save image generation response to debug
ai_service.services.utils.writeDebugFile(str(image_result), "image_generation_response") aiService.services.utils.writeDebugFile(str(imageResult), "image_generation_response")
# Extract base64 image data from result # Extract base64 image data from result
if image_result and image_result.get("success", False): if imageResult and imageResult.get("success", False):
image_data = image_result.get("image_data", "") imageData = imageResult.get("image_data", "")
if image_data: if imageData:
return image_data return imageData
else: else:
raise ValueError("No image data returned from AI") raise ValueError("No image data returned from AI")
else: else:
error_msg = image_result.get("error", "Unknown error") if image_result else "No result" errorMsg = imageResult.get("error", "Unknown error") if imageResult else "No result"
raise ValueError(f"AI image generation failed: {error_msg}") raise ValueError(f"AI image generation failed: {errorMsg}")
except Exception as e: except Exception as e:
self.logger.error(f"Error generating AI image: {str(e)}") self.logger.error(f"Error generating AI image: {str(e)}")
raise Exception(f"AI image generation failed: {str(e)}") raise Exception(f"AI image generation failed: {str(e)}")
async def _create_imageGenerate_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Create a detailed prompt for AI image generation based on the content.""" """Create a detailed prompt for AI image generation based on the content."""
try: try:
# Start with base prompt # Start with base prompt
prompt_parts = [] promptParts = []
# Add user's original intent if available # Add user's original intent if available
if user_prompt: if userPrompt:
prompt_parts.append(f"User Request: {ai_service.sanitizePromptContent(user_prompt, 'userinput')}") sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
promptParts.append(f"User Request: {sanitized_prompt}")
# Add document title # Add document title
prompt_parts.append(f"Document Title: {title}") promptParts.append(f"Document Title: {title}")
# Analyze content and create visual description # Analyze content and create visual description
sections = extracted_content.get("sections", []) sections = extractedContent.get("sections", [])
content_description = self._analyze_content_for_visual_description(sections) contentDescription = self._analyzeContentForVisualDescription(sections)
if content_description: if contentDescription:
prompt_parts.append(f"Content to Visualize: {content_description}") promptParts.append(f"Content to Visualize: {contentDescription}")
# Add style guidance # Add style guidance
style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt) styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt)
if style_guidance: if styleGuidance:
prompt_parts.append(f"Visual Style: {style_guidance}") promptParts.append(f"Visual Style: {styleGuidance}")
# Combine all parts # Combine all parts
full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts) fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts)
# Add technical requirements # Add technical requirements
full_prompt += "\n\nTechnical Requirements:" fullPrompt += "\n\nTechnical Requirements:"
full_prompt += "\n- High quality, professional appearance" fullPrompt += "\n- High quality, professional appearance"
full_prompt += "\n- Clear, readable text if any text is included" fullPrompt += "\n- Clear, readable text if any text is included"
full_prompt += "\n- Appropriate colors and layout" fullPrompt += "\n- Appropriate colors and layout"
full_prompt += "\n- Suitable for business/professional use" fullPrompt += "\n- Suitable for business/professional use"
# Truncate prompt if it exceeds DALL-E's 4000 character limit # Truncate prompt if it exceeds DALL-E's 4000 character limit
if len(full_prompt) > 4000: if len(fullPrompt) > 4000:
# Use AI to compress the prompt intelligently # Use AI to compress the prompt intelligently
compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service) compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService)
if compressed_prompt and len(compressed_prompt) <= 4000: if compressedPrompt and len(compressedPrompt) <= 4000:
return compressed_prompt return compressedPrompt
# Fallback to minimal prompt if AI compression fails or is still too long # Fallback to minimal prompt if AI compression fails or is still too long
minimal_prompt = f"Create a professional image representing: {title}" minimalPrompt = f"Create a professional image representing: {title}"
if user_prompt: if userPrompt:
minimal_prompt += f" - {ai_service.sanitizePromptContent(user_prompt, 'userinput')}" sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
minimalPrompt += f" - {sanitized_prompt}"
# If even the minimal prompt is too long, truncate it # If even the minimal prompt is too long, truncate it
if len(minimal_prompt) > 4000: if len(minimalPrompt) > 4000:
minimal_prompt = minimal_prompt[:3997] + "..." minimalPrompt = minimalPrompt[:3997] + "..."
return minimal_prompt return minimalPrompt
return full_prompt return fullPrompt
except Exception as e: except Exception as e:
self.logger.warning(f"Error creating image prompt: {str(e)}") self.logger.warning(f"Error creating image prompt: {str(e)}")
# Fallback to simple prompt # Fallback to simple prompt
return f"Create a professional image representing: {title}" return f"Create a professional image representing: {title}"
async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str: async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str:
"""Use AI to intelligently compress a long prompt while preserving key information.""" """Use AI to intelligently compress a long prompt while preserving key information."""
try: try:
if not ai_service: if not aiService:
return None return None
compression_prompt = f""" compressionPrompt = f"""
You are an expert at creating concise, effective prompts for AI image generation. You are an expert at creating concise, effective prompts for AI image generation.
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information. The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
Original prompt ({len(long_prompt)} characters): Original prompt ({len(longPrompt)} characters):
{long_prompt} {longPrompt}
Please create a compressed version that: Please create a compressed version that:
1. Keeps the most important visual elements and requirements 1. Keeps the most important visual elements and requirements
@ -176,7 +178,7 @@ Return only the compressed prompt, no explanations.
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
request = AiCallRequest( request = AiCallRequest(
prompt=compression_prompt, prompt=compressionPrompt,
options=AiCallOptions( options=AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE, operationType=OperationTypeEnum.DATA_GENERATE,
maxTokens=None, # Let the model use its full context length maxTokens=None, # Let the model use its full context length
@ -184,12 +186,12 @@ Return only the compressed prompt, no explanations.
) )
) )
response = await ai_service.aiObjects.call(request) response = await aiService.aiObjects.call(request)
compressed = response.content.strip() compressed = response.content.strip()
# Validate the compressed prompt # Validate the compressed prompt
if compressed and len(compressed) <= 4000 and len(compressed) > 50: if compressed and len(compressed) <= 4000 and len(compressed) > 50:
self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters") self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters")
return compressed return compressed
else: else:
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars") self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
@ -199,42 +201,42 @@ Return only the compressed prompt, no explanations.
self.logger.warning(f"Error compressing prompt with AI: {str(e)}") self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
return None return None
def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str: def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str:
"""Analyze content sections and create a visual description for AI.""" """Analyze content sections and create a visual description for AI."""
try: try:
descriptions = [] descriptions = []
for section in sections: for section in sections:
section_type = self._get_section_type(section) sectionType = self._getSectionType(section)
section_data = self._get_section_data(section) sectionData = self._getSectionData(section)
if section_type == "table": if sectionType == "table":
headers = section_data.get("headers", []) headers = sectionData.get("headers", [])
rows = section_data.get("rows", []) rows = sectionData.get("rows", [])
if headers and rows: if headers and rows:
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}") descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
elif section_type == "bullet_list": elif sectionType == "bullet_list":
items = section_data.get("items", []) items = sectionData.get("items", [])
if items: if items:
descriptions.append(f"List with {len(items)} items") descriptions.append(f"List with {len(items)} items")
elif section_type == "heading": elif sectionType == "heading":
text = section_data.get("text", "") text = sectionData.get("text", "")
level = section_data.get("level", 1) level = sectionData.get("level", 1)
if text: if text:
descriptions.append(f"Heading {level}: {text}") descriptions.append(f"Heading {level}: {text}")
elif section_type == "paragraph": elif sectionType == "paragraph":
text = section_data.get("text", "") text = sectionData.get("text", "")
if text and len(text) > 10: # Only include substantial paragraphs if text and len(text) > 10: # Only include substantial paragraphs
# Truncate long text # Truncate long text
truncated = text[:100] + "..." if len(text) > 100 else text truncated = text[:100] + "..." if len(text) > 100 else text
descriptions.append(f"Text content: {truncated}") descriptions.append(f"Text content: {truncated}")
elif section_type == "code_block": elif sectionType == "code_block":
code = section_data.get("code", "") code = sectionData.get("code", "")
language = section_data.get("language", "") language = sectionData.get("language", "")
if code: if code:
descriptions.append(f"Code block ({language}): {code[:50]}...") descriptions.append(f"Code block ({language}): {code[:50]}...")
@ -244,42 +246,42 @@ Return only the compressed prompt, no explanations.
self.logger.warning(f"Error analyzing content: {str(e)}") self.logger.warning(f"Error analyzing content: {str(e)}")
return "Document content" return "Document content"
def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str: def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str:
"""Determine visual style guidance based on content and user prompt.""" """Determine visual style guidance based on content and user prompt."""
try: try:
style_elements = [] styleElements = []
# Analyze user prompt for style hints # Analyze user prompt for style hints
if user_prompt: if userPrompt:
prompt_lower = user_prompt.lower() promptLower = userPrompt.lower()
if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]): if any(word in promptLower for word in ["modern", "contemporary", "sleek"]):
style_elements.append("modern, clean design") styleElements.append("modern, clean design")
elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]): elif any(word in promptLower for word in ["classic", "traditional", "formal"]):
style_elements.append("classic, formal design") styleElements.append("classic, formal design")
elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]): elif any(word in promptLower for word in ["creative", "artistic", "colorful"]):
style_elements.append("creative, artistic design") styleElements.append("creative, artistic design")
elif any(word in prompt_lower for word in ["corporate", "business", "professional"]): elif any(word in promptLower for word in ["corporate", "business", "professional"]):
style_elements.append("corporate, professional design") styleElements.append("corporate, professional design")
# Analyze content type for additional style hints # Analyze content type for additional style hints
sections = extracted_content.get("sections", []) sections = extractedContent.get("sections", [])
has_tables = any(self._get_section_type(s) == "table" for s in sections) hasTables = any(self._getSectionType(s) == "table" for s in sections)
has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections) hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
has_code = any(self._get_section_type(s) == "code_block" for s in sections) hasCode = any(self._getSectionType(s) == "code_block" for s in sections)
if has_tables: if hasTables:
style_elements.append("data-focused layout") styleElements.append("data-focused layout")
if has_lists: if hasLists:
style_elements.append("organized, structured presentation") styleElements.append("organized, structured presentation")
if has_code: if hasCode:
style_elements.append("technical, developer-friendly") styleElements.append("technical, developer-friendly")
# Default style if no specific guidance # Default style if no specific guidance
if not style_elements: if not styleElements:
style_elements.append("professional, clean design") styleElements.append("professional, clean design")
return ", ".join(style_elements) return ", ".join(styleElements)
except Exception as e: except Exception as e:
self.logger.warning(f"Error determining style guidance: {str(e)}") self.logger.warning(f"Error determining style guidance: {str(e)}")

View file

@ -10,40 +10,40 @@ class RendererJson(BaseRenderer):
"""Renders content to JSON format with format-specific extraction.""" """Renders content to JSON format with format-specific extraction."""
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
"""Return supported JSON formats.""" """Return supported JSON formats."""
return ['json'] return ['json']
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
"""Return format aliases.""" """Return format aliases."""
return ['data'] return ['data']
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
"""Return priority for JSON renderer.""" """Return priority for JSON renderer."""
return 80 return 80
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to JSON format.""" """Render extracted JSON content to JSON format."""
try: try:
# The extracted content should already be JSON from the AI # The extracted content should already be JSON from the AI
# Just validate and format it # Just validate and format it
json_content = self._clean_json_content(extracted_content, title) jsonContent = self._cleanJsonContent(extractedContent, title)
return json_content, "application/json" return jsonContent, "application/json"
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering JSON: {str(e)}") self.logger.error(f"Error rendering JSON: {str(e)}")
# Return minimal JSON fallback # Return minimal JSON fallback
fallback_data = { fallbackData = {
"title": title, "title": title,
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}], "sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
"metadata": {"error": str(e)} "metadata": {"error": str(e)}
} }
return json.dumps(fallback_data, indent=2), "application/json" return json.dumps(fallbackData, indent=2), "application/json"
def _clean_json_content(self, content: Dict[str, Any], title: str) -> str: def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
"""Clean and validate JSON content from AI.""" """Clean and validate JSON content from AI."""
try: try:
# Validate JSON structure # Validate JSON structure
@ -72,8 +72,8 @@ class RendererJson(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Error cleaning JSON content: {str(e)}") self.logger.warning(f"Error cleaning JSON content: {str(e)}")
# Return minimal valid JSON # Return minimal valid JSON
fallback_data = { fallbackData = {
"sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}], "sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
"metadata": {"title": title, "error": str(e)} "metadata": {"title": title, "error": str(e)}
} }
return json.dumps(fallback_data, indent=2, ensure_ascii=False) return json.dumps(fallbackData, indent=2, ensure_ascii=False)

View file

@ -9,161 +9,161 @@ class RendererMarkdown(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction.""" """Renders content to Markdown format with format-specific extraction."""
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
"""Return supported Markdown formats.""" """Return supported Markdown formats."""
return ['md', 'markdown'] return ['md', 'markdown']
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
"""Return format aliases.""" """Return format aliases."""
return ['mdown', 'mkd'] return ['mdown', 'mkd']
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
"""Return priority for markdown renderer.""" """Return priority for markdown renderer."""
return 95 return 95
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to Markdown format.""" """Render extracted JSON content to Markdown format."""
try: try:
# Generate markdown from JSON structure # Generate markdown from JSON structure
markdown_content = self._generate_markdown_from_json(extracted_content, title) markdownContent = self._generateMarkdownFromJson(extractedContent, title)
return markdown_content, "text/markdown" return markdownContent, "text/markdown"
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering markdown: {str(e)}") self.logger.error(f"Error rendering markdown: {str(e)}")
# Return minimal markdown fallback # Return minimal markdown fallback
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown" return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str: def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate markdown content from structured JSON document.""" """Generate markdown content from structured JSON document."""
try: try:
# Validate JSON structure # Validate JSON structure
if not isinstance(json_content, dict): if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary") raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content: if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field") raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title # Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title) documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Build markdown content # Build markdown content
markdown_parts = [] markdownParts = []
# Document title # Document title
markdown_parts.append(f"# {document_title}") markdownParts.append(f"# {documentTitle}")
markdown_parts.append("") markdownParts.append("")
# Process each section # Process each section
sections = json_content.get("sections", []) sections = jsonContent.get("sections", [])
for section in sections: for section in sections:
section_markdown = self._render_json_section(section) sectionMarkdown = self._renderJsonSection(section)
if section_markdown: if sectionMarkdown:
markdown_parts.append(section_markdown) markdownParts.append(sectionMarkdown)
markdown_parts.append("") # Add spacing between sections markdownParts.append("") # Add spacing between sections
# Add generation info # Add generation info
markdown_parts.append("---") markdownParts.append("---")
markdown_parts.append(f"*Generated: {self._format_timestamp()}*") markdownParts.append(f"*Generated: {self._formatTimestamp()}*")
return '\n'.join(markdown_parts) return '\n'.join(markdownParts)
except Exception as e: except Exception as e:
self.logger.error(f"Error generating markdown from JSON: {str(e)}") self.logger.error(f"Error generating markdown from JSON: {str(e)}")
raise Exception(f"Markdown generation failed: {str(e)}") raise Exception(f"Markdown generation failed: {str(e)}")
def _render_json_section(self, section: Dict[str, Any]) -> str: def _renderJsonSection(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to markdown.""" """Render a single JSON section to markdown."""
try: try:
section_type = self._get_section_type(section) sectionType = self._getSectionType(section)
section_data = self._get_section_data(section) sectionData = self._getSectionData(section)
if section_type == "table": if sectionType == "table":
# Process the section data to extract table structure # Process the section data to extract table structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_table(processed_data) return self._renderJsonTable(processedData)
elif section_type == "bullet_list": elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure # Process the section data to extract bullet list structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_bullet_list(processed_data) return self._renderJsonBulletList(processedData)
elif section_type == "heading": elif sectionType == "heading":
return self._render_json_heading(section_data) return self._renderJsonHeading(sectionData)
elif section_type == "paragraph": elif sectionType == "paragraph":
return self._render_json_paragraph(section_data) return self._renderJsonParagraph(sectionData)
elif section_type == "code_block": elif sectionType == "code_block":
# Process the section data to extract code block structure # Process the section data to extract code block structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_code_block(processed_data) return self._renderJsonCodeBlock(processedData)
elif section_type == "image": elif sectionType == "image":
# Process the section data to extract image structure # Process the section data to extract image structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_image(processed_data) return self._renderJsonImage(processedData)
else: else:
# Fallback to paragraph for unknown types # Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data) return self._renderJsonParagraph(sectionData)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return f"*[Error rendering section: {str(e)}]*" return f"*[Error rendering section: {str(e)}]*"
def _render_json_table(self, table_data: Dict[str, Any]) -> str: def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
"""Render a JSON table to markdown.""" """Render a JSON table to markdown."""
try: try:
headers = table_data.get("headers", []) headers = tableData.get("headers", [])
rows = table_data.get("rows", []) rows = tableData.get("rows", [])
if not headers or not rows: if not headers or not rows:
return "" return ""
markdown_parts = [] markdownParts = []
# Create table header # Create table header
header_line = " | ".join(str(header) for header in headers) headerLine = " | ".join(str(header) for header in headers)
markdown_parts.append(header_line) markdownParts.append(headerLine)
# Add separator line # Add separator line
separator_line = " | ".join("---" for _ in headers) separatorLine = " | ".join("---" for _ in headers)
markdown_parts.append(separator_line) markdownParts.append(separatorLine)
# Add data rows # Add data rows
for row in rows: for row in rows:
row_line = " | ".join(str(cell_data) for cell_data in row) rowLine = " | ".join(str(cellData) for cellData in row)
markdown_parts.append(row_line) markdownParts.append(rowLine)
return '\n'.join(markdown_parts) return '\n'.join(markdownParts)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}") self.logger.warning(f"Error rendering table: {str(e)}")
return "" return ""
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str: def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
"""Render a JSON bullet list to markdown.""" """Render a JSON bullet list to markdown."""
try: try:
items = list_data.get("items", []) items = listData.get("items", [])
if not items: if not items:
return "" return ""
markdown_parts = [] markdownParts = []
for item in items: for item in items:
if isinstance(item, str): if isinstance(item, str):
markdown_parts.append(f"- {item}") markdownParts.append(f"- {item}")
elif isinstance(item, dict) and "text" in item: elif isinstance(item, dict) and "text" in item:
markdown_parts.append(f"- {item['text']}") markdownParts.append(f"- {item['text']}")
return '\n'.join(markdown_parts) return '\n'.join(markdownParts)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}") self.logger.warning(f"Error rendering bullet list: {str(e)}")
return "" return ""
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str: def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
"""Render a JSON heading to markdown.""" """Render a JSON heading to markdown."""
try: try:
level = heading_data.get("level", 1) level = headingData.get("level", 1)
text = heading_data.get("text", "") text = headingData.get("text", "")
if text: if text:
level = max(1, min(6, level)) level = max(1, min(6, level))
@ -175,21 +175,21 @@ class RendererMarkdown(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}") self.logger.warning(f"Error rendering heading: {str(e)}")
return "" return ""
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str: def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
"""Render a JSON paragraph to markdown.""" """Render a JSON paragraph to markdown."""
try: try:
text = paragraph_data.get("text", "") text = paragraphData.get("text", "")
return text if text else "" return text if text else ""
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}") self.logger.warning(f"Error rendering paragraph: {str(e)}")
return "" return ""
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str: def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
"""Render a JSON code block to markdown.""" """Render a JSON code block to markdown."""
try: try:
code = code_data.get("code", "") code = codeData.get("code", "")
language = code_data.get("language", "") language = codeData.get("language", "")
if code: if code:
if language: if language:
@ -203,19 +203,19 @@ class RendererMarkdown(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}") self.logger.warning(f"Error rendering code block: {str(e)}")
return "" return ""
def _render_json_image(self, image_data: Dict[str, Any]) -> str: def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
"""Render a JSON image to markdown.""" """Render a JSON image to markdown."""
try: try:
alt_text = image_data.get("altText", "Image") altText = imageData.get("altText", "Image")
base64_data = image_data.get("base64Data", "") base64Data = imageData.get("base64Data", "")
if base64_data: if base64Data:
# For base64 images, we can't embed them directly in markdown # For base64 images, we can't embed them directly in markdown
# So we'll use a placeholder with the alt text # So we'll use a placeholder with the alt text
return f"![{alt_text}](data:image/png;base64,{base64_data[:50]}...)" return f"![{altText}](data:image/png;base64,{base64Data[:50]}...)"
else: else:
return f"![{alt_text}](image-placeholder)" return f"![{altText}](image-placeholder)"
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}") self.logger.warning(f"Error rendering image: {str(e)}")
return f"![{image_data.get('altText', 'Image')}](image-error)" return f"![{imageData.get('altText', 'Image')}](image-error)"

View file

@ -22,32 +22,32 @@ class RendererPdf(BaseRenderer):
"""Renders content to PDF format using reportlab.""" """Renders content to PDF format using reportlab."""
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
"""Return supported PDF formats.""" """Return supported PDF formats."""
return ['pdf'] return ['pdf']
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
"""Return format aliases.""" """Return format aliases."""
return ['document', 'print'] return ['document', 'print']
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
"""Return priority for PDF renderer.""" """Return priority for PDF renderer."""
return 120 return 120
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to PDF format using AI-analyzed styling.""" """Render extracted JSON content to PDF format using AI-analyzed styling."""
try: try:
if not REPORTLAB_AVAILABLE: if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available # Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml from .rendererHtml import RendererHtml
html_renderer = RendererHtml() html_renderer = RendererHtml()
html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service) html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService)
return html_content, "text/html" return html_content, "text/html"
# Generate PDF using AI-analyzed styling # Generate PDF using AI-analyzed styling
pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service) pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
return pdf_content, "application/pdf" return pdf_content, "application/pdf"
@ -56,11 +56,11 @@ class RendererPdf(BaseRenderer):
# Return minimal fallback # Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain" return f"PDF Generation Error: {str(e)}", "text/plain"
async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling.""" """Generate PDF content from structured JSON document using AI-generated styling."""
try: try:
# Get AI-generated styling definitions # Get AI-generated styling definitions
styles = await self._get_pdf_styles(user_prompt, ai_service) styles = await self._getPdfStyles(userPrompt, aiService)
# Validate JSON structure # Validate JSON structure
if not isinstance(json_content, dict): if not isinstance(json_content, dict):
@ -93,10 +93,10 @@ class RendererPdf(BaseRenderer):
story = [] story = []
# Title page # Title page
title_style = self._create_title_style(styles) title_style = self._createTitleStyle(styles)
story.append(Paragraph(document_title, title_style)) story.append(Paragraph(document_title, title_style))
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles))) story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._createNormalStyle(styles)))
story.append(Spacer(1, 30)) # Add spacing before page break story.append(Spacer(1, 30)) # Add spacing before page break
story.append(PageBreak()) story.append(PageBreak())
@ -105,7 +105,7 @@ class RendererPdf(BaseRenderer):
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER") self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
for i, section in enumerate(sections): for i, section in enumerate(sections):
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER") self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
section_elements = self._render_json_section(section, styles) section_elements = self._renderJsonSection(section, styles)
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER") self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
story.extend(section_elements) story.extend(section_elements)
@ -123,7 +123,7 @@ class RendererPdf(BaseRenderer):
self.logger.error(f"Error generating PDF from JSON: {str(e)}") self.logger.error(f"Error generating PDF from JSON: {str(e)}")
raise Exception(f"PDF generation failed: {str(e)}") raise Exception(f"PDF generation failed: {str(e)}")
async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: async def _getPdfStyles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Get PDF styling definitions using base template AI styling.""" """Get PDF styling definitions using base template AI styling."""
style_schema = { style_schema = {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30}, "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
@ -136,21 +136,21 @@ class RendererPdf(BaseRenderer):
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6} "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
} }
style_template = self._create_ai_style_template("pdf", user_prompt, style_schema) style_template = self._createAiStyleTemplate("pdf", user_prompt, style_schema)
# Use base template method like DOCX does (this works!) # Use base template method like DOCX does (this works!)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles()) styles = await self._getAiStyles(ai_service, style_template, self._getDefaultPdfStyles())
if styles is None: if styles is None:
return self._get_default_pdf_styles() return self._getDefaultPdfStyles()
# Convert colors to PDF format after getting styles # Convert colors to PDF format after getting styles
styles = self._convert_colors_format(styles) styles = self._convertColorsFormat(styles)
# Validate and fix contrast issues # Validate and fix contrast issues
return self._validate_pdf_styles_contrast(styles) return self._validatePdfStylesContrast(styles)
async def _get_ai_styles_with_pdf_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper PDF color conversion.""" """Get AI styles with proper PDF color conversion."""
if not ai_service: if not ai_service:
return default_styles return default_styles
@ -279,7 +279,7 @@ class RendererPdf(BaseRenderer):
return default_styles return default_styles
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them) # Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
styles = self._convert_colors_format(styles) styles = self._convertColorsFormat(styles)
return styles return styles
@ -287,7 +287,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"AI styling failed: {str(e)}, using defaults") self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles return default_styles
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert colors to proper format for PDF compatibility.""" """Convert colors to proper format for PDF compatibility."""
try: try:
for style_name, style_config in styles.items(): for style_name, style_config in styles.items():
@ -304,7 +304,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Color conversion failed: {str(e)}") self.logger.warning(f"Color conversion failed: {str(e)}")
return styles return styles
def _get_safe_color(self, color_value: str, default: str = "#000000") -> str: def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
"""Get a safe hex color value for PDF.""" """Get a safe hex color value for PDF."""
if isinstance(color_value, str) and color_value.startswith('#'): if isinstance(color_value, str) and color_value.startswith('#'):
if len(color_value) == 7: if len(color_value) == 7:
@ -313,7 +313,7 @@ class RendererPdf(BaseRenderer):
return color_value return color_value
return default return default
def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: def _validatePdfStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles.""" """Validate and fix contrast issues in AI-generated styles."""
try: try:
# Fix table header contrast # Fix table header contrast
@ -348,9 +348,9 @@ class RendererPdf(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}") self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_pdf_styles() return self._getDefaultPdfStyles()
def _get_default_pdf_styles(self) -> Dict[str, Any]: def _getDefaultPdfStyles(self) -> Dict[str, Any]:
"""Default PDF styles.""" """Default PDF styles."""
return { return {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30}, "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
@ -363,27 +363,27 @@ class RendererPdf(BaseRenderer):
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6} "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
} }
def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle: def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create title style from style definitions.""" """Create title style from style definitions."""
title_style_def = styles.get("title", {}) title_style_def = styles.get("title", {})
# DEBUG: Show what color and spacing is being used for title # DEBUG: Show what color and spacing is being used for title
title_color = title_style_def.get("color", "#1F4E79") title_color = title_style_def.get("color", "#1F4E79")
title_space_after = title_style_def.get("space_after", 30) title_space_after = title_style_def.get("space_after", 30)
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hex_to_color(title_color)}", "PDF_RENDERER") self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER")
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER") self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
return ParagraphStyle( return ParagraphStyle(
'CustomTitle', 'CustomTitle',
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20 fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
spaceAfter=title_style_def.get("space_after", 30), spaceAfter=title_style_def.get("space_after", 30),
alignment=self._get_alignment(title_style_def.get("align", "center")), alignment=self._getAlignment(title_style_def.get("align", "center")),
textColor=self._hex_to_color(title_color), textColor=self._hexToColor(title_color),
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
spaceBefore=0 # Ensure no space before title spaceBefore=0 # Ensure no space before title
) )
def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle: def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
"""Create heading style from style definitions.""" """Create heading style from style definitions."""
heading_key = f"heading{level}" heading_key = f"heading{level}"
heading_style_def = styles.get(heading_key, styles.get("heading1", {})) heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
@ -393,11 +393,11 @@ class RendererPdf(BaseRenderer):
fontSize=heading_style_def.get("font_size", 18 - level * 2), fontSize=heading_style_def.get("font_size", 18 - level * 2),
spaceAfter=heading_style_def.get("space_after", 12), spaceAfter=heading_style_def.get("space_after", 12),
spaceBefore=heading_style_def.get("space_before", 12), spaceBefore=heading_style_def.get("space_before", 12),
alignment=self._get_alignment(heading_style_def.get("align", "left")), alignment=self._getAlignment(heading_style_def.get("align", "left")),
textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F")) textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F"))
) )
def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle: def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create normal paragraph style from style definitions.""" """Create normal paragraph style from style definitions."""
paragraph_style_def = styles.get("paragraph", {}) paragraph_style_def = styles.get("paragraph", {})
@ -405,12 +405,12 @@ class RendererPdf(BaseRenderer):
'CustomNormal', 'CustomNormal',
fontSize=paragraph_style_def.get("font_size", 11), fontSize=paragraph_style_def.get("font_size", 11),
spaceAfter=paragraph_style_def.get("space_after", 6), spaceAfter=paragraph_style_def.get("space_after", 6),
alignment=self._get_alignment(paragraph_style_def.get("align", "left")), alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")), textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11) leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
) )
def _get_alignment(self, align: str) -> int: def _getAlignment(self, align: str) -> int:
"""Convert alignment string to reportlab alignment constant.""" """Convert alignment string to reportlab alignment constant."""
if not align or not isinstance(align, str): if not align or not isinstance(align, str):
return TA_LEFT return TA_LEFT
@ -426,7 +426,7 @@ class RendererPdf(BaseRenderer):
} }
return align_map.get(align.lower().strip(), TA_LEFT) return align_map.get(align.lower().strip(), TA_LEFT)
def _get_table_alignment(self, align: str) -> str: def _getTableAlignment(self, align: str) -> str:
"""Convert alignment string to ReportLab table alignment string.""" """Convert alignment string to ReportLab table alignment string."""
if not align or not isinstance(align, str): if not align or not isinstance(align, str):
return 'LEFT' return 'LEFT'
@ -442,7 +442,7 @@ class RendererPdf(BaseRenderer):
} }
return align_map.get(align.lower().strip(), 'LEFT') return align_map.get(align.lower().strip(), 'LEFT')
def _hex_to_color(self, hex_color: str) -> colors.Color: def _hexToColor(self, hex_color: str) -> colors.Color:
"""Convert hex color to reportlab color.""" """Convert hex color to reportlab color."""
try: try:
hex_color = hex_color.lstrip('#') hex_color = hex_color.lstrip('#')
@ -464,38 +464,38 @@ class RendererPdf(BaseRenderer):
except: except:
return colors.black return colors.black
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a single JSON section to PDF elements using AI-generated styles.""" """Render a single JSON section to PDF elements using AI-generated styles."""
try: try:
section_type = self._get_section_type(section) section_type = self._getSectionType(section)
elements = self._get_section_data(section) elements = self._getSectionData(section)
# Process each element in the section # Process each element in the section
all_elements = [] all_elements = []
for element in elements: for element in elements:
if section_type == "table": if section_type == "table":
all_elements.extend(self._render_json_table(element, styles)) all_elements.extend(self._renderJsonTable(element, styles))
elif section_type == "bullet_list": elif section_type == "bullet_list":
all_elements.extend(self._render_json_bullet_list(element, styles)) all_elements.extend(self._renderJsonBulletList(element, styles))
elif section_type == "heading": elif section_type == "heading":
all_elements.extend(self._render_json_heading(element, styles)) all_elements.extend(self._renderJsonHeading(element, styles))
elif section_type == "paragraph": elif section_type == "paragraph":
all_elements.extend(self._render_json_paragraph(element, styles)) all_elements.extend(self._renderJsonParagraph(element, styles))
elif section_type == "code_block": elif section_type == "code_block":
all_elements.extend(self._render_json_code_block(element, styles)) all_elements.extend(self._renderJsonCodeBlock(element, styles))
elif section_type == "image": elif section_type == "image":
all_elements.extend(self._render_json_image(element, styles)) all_elements.extend(self._renderJsonImage(element, styles))
else: else:
# Fallback to paragraph for unknown types # Fallback to paragraph for unknown types
all_elements.extend(self._render_json_paragraph(element, styles)) all_elements.extend(self._renderJsonParagraph(element, styles))
return all_elements return all_elements
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))] return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON table to PDF elements using AI-generated styles.""" """Render a JSON table to PDF elements using AI-generated styles."""
try: try:
headers = table_data.get("headers", []) headers = table_data.get("headers", [])
@ -517,7 +517,7 @@ class RendererPdf(BaseRenderer):
table_style = [ table_style = [
('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))), ('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))), ('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
('ALIGN', (0, 0), (-1, -1), self._get_table_alignment(table_cell_style.get("align", "left"))), ('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)), ('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
@ -534,7 +534,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering table: {str(e)}") self.logger.warning(f"Error rendering table: {str(e)}")
return [] return []
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON bullet list to PDF elements using AI-generated styles.""" """Render a JSON bullet list to PDF elements using AI-generated styles."""
try: try:
items = list_data.get("items", []) items = list_data.get("items", [])
@ -556,7 +556,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering bullet list: {str(e)}") self.logger.warning(f"Error rendering bullet list: {str(e)}")
return [] return []
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON heading to PDF elements using AI-generated styles.""" """Render a JSON heading to PDF elements using AI-generated styles."""
try: try:
level = heading_data.get("level", 1) level = heading_data.get("level", 1)
@ -564,7 +564,7 @@ class RendererPdf(BaseRenderer):
if text: if text:
level = max(1, min(6, level)) level = max(1, min(6, level))
heading_style = self._create_heading_style(styles, level) heading_style = self._createHeadingStyle(styles, level)
return [Paragraph(text, heading_style)] return [Paragraph(text, heading_style)]
return [] return []
@ -573,13 +573,13 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}") self.logger.warning(f"Error rendering heading: {str(e)}")
return [] return []
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON paragraph to PDF elements using AI-generated styles.""" """Render a JSON paragraph to PDF elements using AI-generated styles."""
try: try:
text = paragraph_data.get("text", "") text = paragraph_data.get("text", "")
if text: if text:
return [Paragraph(text, self._create_normal_style(styles))] return [Paragraph(text, self._createNormalStyle(styles))]
return [] return []
@ -587,7 +587,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering paragraph: {str(e)}") self.logger.warning(f"Error rendering paragraph: {str(e)}")
return [] return []
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON code block to PDF elements using AI-generated styles.""" """Render a JSON code block to PDF elements using AI-generated styles."""
try: try:
code = code_data.get("code", "") code = code_data.get("code", "")
@ -601,7 +601,7 @@ class RendererPdf(BaseRenderer):
lang_style = ParagraphStyle( lang_style = ParagraphStyle(
'CodeLanguage', 'CodeLanguage',
fontSize=code_style_def.get("font_size", 9), fontSize=code_style_def.get("font_size", 9),
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")), textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName='Helvetica-Bold' fontName='Helvetica-Bold'
) )
elements.append(Paragraph(f"Code ({language}):", lang_style)) elements.append(Paragraph(f"Code ({language}):", lang_style))
@ -609,9 +609,9 @@ class RendererPdf(BaseRenderer):
code_style = ParagraphStyle( code_style = ParagraphStyle(
'CodeBlock', 'CodeBlock',
fontSize=code_style_def.get("font_size", 9), fontSize=code_style_def.get("font_size", 9),
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")), textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName=code_style_def.get("font", "Courier"), fontName=code_style_def.get("font", "Courier"),
backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")), backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")),
spaceAfter=code_style_def.get("space_after", 6) spaceAfter=code_style_def.get("space_after", 6)
) )
elements.append(Paragraph(code, code_style)) elements.append(Paragraph(code, code_style))
@ -624,7 +624,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}") self.logger.warning(f"Error rendering code block: {str(e)}")
return [] return []
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON image to PDF elements.""" """Render a JSON image to PDF elements."""
try: try:
base64_data = image_data.get("base64Data", "") base64_data = image_data.get("base64Data", "")
@ -632,10 +632,10 @@ class RendererPdf(BaseRenderer):
if base64_data: if base64_data:
# For now, just add a placeholder since reportlab image handling is complex # For now, just add a placeholder since reportlab image handling is complex
return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))] return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
return [] return []
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}") self.logger.warning(f"Error rendering image: {str(e)}")
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))] return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._createNormalStyle(styles))]

View file

@ -12,23 +12,23 @@ class RendererPptx(BaseRenderer):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.supported_formats = ["pptx", "ppt"] self.supportedFormats = ["pptx", "ppt"]
self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation" self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
@classmethod @classmethod
def get_supported_formats(cls) -> list: def getSupportedFormats(cls) -> list:
"""Get list of supported output formats.""" """Get list of supported output formats."""
return ["pptx", "ppt"] return ["pptx", "ppt"]
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
""" """
Render content as PowerPoint presentation from JSON data. Render content as PowerPoint presentation from JSON data.
Args: Args:
extracted_content: JSON content to render as presentation extractedContent: JSON content to render as presentation
title: Title for the presentation title: Title for the presentation
user_prompt: User prompt for AI styling userPrompt: User prompt for AI styling
ai_service: AI service for styling aiService: AI service for styling
**kwargs: Additional rendering options **kwargs: Additional rendering options
Returns: Returns:
@ -43,7 +43,7 @@ class RendererPptx(BaseRenderer):
import re import re
# Get AI-generated styling definitions first # Get AI-generated styling definitions first
styles = await self._get_pptx_styles(user_prompt, ai_service) styles = await self._getPptxStyles(userPrompt, aiService)
# Create new presentation # Create new presentation
prs = Presentation() prs = Presentation()
@ -58,13 +58,13 @@ class RendererPptx(BaseRenderer):
prs.slide_height = Inches(7.5) prs.slide_height = Inches(7.5)
# Generate slides from JSON content # Generate slides from JSON content
slides_data = await self._parse_json_to_slides(extracted_content, title, styles) slidesData = await self._parseJsonToSlides(extractedContent, title, styles)
logger.info(f"Parsed {len(slides_data)} slides from JSON content") logger.info(f"Parsed {len(slidesData)} slides from JSON content")
# Debug: Show first 200 chars of content # Debug: Show first 200 chars of content
logger.info(f"JSON content preview: {str(extracted_content)[:200]}...") logger.info(f"JSON content preview: {str(extractedContent)[:200]}...")
for i, slide_data in enumerate(slides_data): for i, slide_data in enumerate(slidesData):
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars") logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
# Debug: Show slide content preview # Debug: Show slide content preview
slide_content = slide_data.get('content', '') slide_content = slide_data.get('content', '')
@ -74,8 +74,8 @@ class RendererPptx(BaseRenderer):
logger.warning(f" ⚠️ Slide {i+1} has NO content!") logger.warning(f" ⚠️ Slide {i+1} has NO content!")
# Create slide with appropriate layout based on content # Create slide with appropriate layout based on content
slide_layout_index = self._get_slide_layout_index(slide_data, styles) slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles)
slide_layout = prs.slide_layouts[slide_layout_index] slide_layout = prs.slide_layouts[slideLayoutIndex]
slide = prs.slides.add_slide(slide_layout) slide = prs.slides.add_slide(slide_layout)
# Set title with AI-generated styling # Set title with AI-generated styling
@ -153,7 +153,7 @@ class RendererPptx(BaseRenderer):
p.alignment = PP_ALIGN.LEFT p.alignment = PP_ALIGN.LEFT
# If no slides were created, create a default slide # If no slides were created, create a default slide
if not slides_data: if not slidesData:
slide_layout = prs.slide_layouts[0] # Title slide layout slide_layout = prs.slide_layouts[0] # Title slide layout
slide = prs.slides.add_slide(slide_layout) slide = prs.slides.add_slide(slide_layout)
@ -198,7 +198,7 @@ class RendererPptx(BaseRenderer):
logger.error(f"Error rendering PowerPoint presentation: {str(e)}") logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain" return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
def _parse_content_to_slides(self, content: str, title: str) -> list: def _parseContentToSlides(self, content: str, title: str) -> list:
""" """
Parse content into slide data structure. Parse content into slide data structure.
@ -212,7 +212,7 @@ class RendererPptx(BaseRenderer):
slides = [] slides = []
# Split content by slide markers or headers # Split content by slide markers or headers
slide_sections = self._split_content_into_slides(content) slide_sections = self._splitContentIntoSlides(content)
for i, section in enumerate(slide_sections): for i, section in enumerate(slide_sections):
if section.strip(): if section.strip():
@ -239,7 +239,7 @@ class RendererPptx(BaseRenderer):
return slides return slides
def _split_content_into_slides(self, content: str) -> list: def _splitContentIntoSlides(self, content: str) -> list:
""" """
Split content into individual slides based on headers and structure. Split content into individual slides based on headers and structure.
@ -299,11 +299,11 @@ class RendererPptx(BaseRenderer):
return [content.strip()] return [content.strip()]
def get_output_mime_type(self) -> str: def getOutputMimeType(self) -> str:
"""Get MIME type for rendered output.""" """Get MIME type for rendered output."""
return self.output_mime_type return self.outputMimeType
async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: async def _getPptxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get PowerPoint styling definitions using base template AI styling.""" """Get PowerPoint styling definitions using base template AI styling."""
style_schema = { style_schema = {
"title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"}, "title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"},
@ -323,21 +323,21 @@ class RendererPptx(BaseRenderer):
"executive_ready": True "executive_ready": True
} }
style_template = self._create_professional_pptx_template(user_prompt, style_schema) style_template = self._createProfessionalPptxTemplate(userPrompt, style_schema)
# Use our own _get_ai_styles_with_pptx_colors method to ensure proper color conversion # Use our own _getAiStylesWithPptxColors method to ensure proper color conversion
styles = await self._get_ai_styles_with_pptx_colors(ai_service, style_template, self._get_default_pptx_styles()) styles = await self._getAiStylesWithPptxColors(aiService, style_template, self._getDefaultPptxStyles())
# Validate PowerPoint-specific requirements # Validate PowerPoint-specific requirements
return self._validate_pptx_styles_readability(styles) return self._validatePptxStylesReadability(styles)
def _create_professional_pptx_template(self, user_prompt: str, style_schema: Dict[str, Any]) -> str: def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str:
"""Create a professional PowerPoint-specific AI style template for corporate-quality slides.""" """Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
import json import json
schema_json = json.dumps(style_schema, indent=4) schema_json = json.dumps(style_schema, indent=4)
return f"""Customize the JSON below for professional PowerPoint slides. return f"""Customize the JSON below for professional PowerPoint slides.
User Request: {user_prompt or "Create professional corporate slides"} User Request: {userPrompt or "Create professional corporate slides"}
Rules: Rules:
- Use professional colors (blues, grays, deep greens) - Use professional colors (blues, grays, deep greens)
@ -351,9 +351,9 @@ Return ONLY this JSON with your changes:
JSON ONLY. NO OTHER TEXT.""" JSON ONLY. NO OTHER TEXT."""
async def _get_ai_styles_with_pptx_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper PowerPoint color conversion.""" """Get AI styles with proper PowerPoint color conversion."""
if not ai_service: if not aiService:
return default_styles return default_styles
try: try:
@ -365,11 +365,11 @@ JSON ONLY. NO OTHER TEXT."""
request = AiCallRequest(prompt=style_template, context="", options=request_options) request = AiCallRequest(prompt=style_template, context="", options=request_options)
# Check if AI service is properly configured # Check if AI service is properly configured
if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects: if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects:
self.logger.warning("AI service not properly configured, using defaults") self.logger.warning("AI service not properly configured, using defaults")
return default_styles return default_styles
response = await ai_service.aiObjects.call(request) response = await aiService.aiObjects.call(request)
# Check if response is valid # Check if response is valid
if not response: if not response:
@ -445,7 +445,7 @@ JSON ONLY. NO OTHER TEXT."""
return default_styles return default_styles
# Convert colors to PowerPoint RGB format # Convert colors to PowerPoint RGB format
styles = self._convert_colors_format(styles) styles = self._convertColorsFormat(styles)
return styles return styles
@ -453,7 +453,7 @@ JSON ONLY. NO OTHER TEXT."""
self.logger.warning(f"AI styling failed: {str(e)}, using defaults") self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles return default_styles
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to RGB format for PowerPoint compatibility.""" """Convert hex colors to RGB format for PowerPoint compatibility."""
try: try:
for style_name, style_config in styles.items(): for style_name, style_config in styles.items():
@ -477,7 +477,7 @@ JSON ONLY. NO OTHER TEXT."""
self.logger.warning(f"Color conversion failed: {str(e)}") self.logger.warning(f"Color conversion failed: {str(e)}")
return styles return styles
def _get_safe_color(self, color_value, default=(0, 0, 0)) -> tuple: def _getSafeColor(self, color_value, default=(0, 0, 0)) -> tuple:
"""Get a safe RGB color tuple for PowerPoint.""" """Get a safe RGB color tuple for PowerPoint."""
if isinstance(color_value, tuple) and len(color_value) == 3: if isinstance(color_value, tuple) and len(color_value) == 3:
return color_value return color_value
@ -495,7 +495,7 @@ JSON ONLY. NO OTHER TEXT."""
return (r, g, b) return (r, g, b)
return default return default
def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]: def _validatePptxStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix readability issues in AI-generated styles.""" """Validate and fix readability issues in AI-generated styles."""
try: try:
# Ensure minimum font sizes for PowerPoint readability # Ensure minimum font sizes for PowerPoint readability
@ -519,9 +519,9 @@ JSON ONLY. NO OTHER TEXT."""
except Exception as e: except Exception as e:
logger.warning(f"Style validation failed: {str(e)}") logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_pptx_styles() return self._getDefaultPptxStyles()
def _get_default_pptx_styles(self) -> Dict[str, Any]: def _getDefaultPptxStyles(self) -> Dict[str, Any]:
"""Default PowerPoint styles with corporate professional color scheme.""" """Default PowerPoint styles with corporate professional color scheme."""
return { return {
"title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"}, "title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"},
@ -541,7 +541,7 @@ JSON ONLY. NO OTHER TEXT."""
"executive_ready": True "executive_ready": True
} }
async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]: async def _parseJsonToSlides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
""" """
Parse JSON content into slide data structure. Parse JSON content into slide data structure.
@ -569,12 +569,12 @@ JSON ONLY. NO OTHER TEXT."""
# Create title slide # Create title slide
slides.append({ slides.append({
"title": document_title, "title": document_title,
"content": "Generated by PowerOn AI System\n\n" + self._format_timestamp() "content": "Generated by PowerOn AI System\n\n" + self._formatTimestamp()
}) })
# Process sections into slides based on content and user intent # Process sections into slides based on content and user intent
sections = json_content.get("sections", []) sections = json_content.get("sections", [])
slides.extend(self._create_slides_from_sections(sections, styles)) slides.extend(self._createSlidesFromSections(sections, styles))
# If no content slides were created, create a default content slide # If no content slides were created, create a default content slide
if len(slides) == 1: # Only title slide if len(slides) == 1: # Only title slide
@ -595,7 +595,7 @@ JSON ONLY. NO OTHER TEXT."""
} }
] ]
def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create a slide from a JSON section.""" """Create a slide from a JSON section."""
try: try:
# Get section title from data or use default # Get section title from data or use default
@ -616,15 +616,15 @@ JSON ONLY. NO OTHER TEXT."""
content_parts = [] content_parts = []
if content_type == "table": if content_type == "table":
content_parts.append(self._format_table_for_slide(elements)) content_parts.append(self._formatTableForSlide(elements))
elif content_type == "list": elif content_type == "list":
content_parts.append(self._format_list_for_slide(elements)) content_parts.append(self._formatListForSlide(elements))
elif content_type == "heading": elif content_type == "heading":
content_parts.append(self._format_heading_for_slide(elements)) content_parts.append(self._formatHeadingForSlide(elements))
elif content_type == "paragraph": elif content_type == "paragraph":
content_parts.append(self._format_paragraph_for_slide(elements)) content_parts.append(self._formatParagraphForSlide(elements))
elif content_type == "code": elif content_type == "code":
content_parts.append(self._format_code_for_slide(elements)) content_parts.append(self._formatCodeForSlide(elements))
else: else:
content_parts.append(self._format_paragraph_for_slide(elements)) content_parts.append(self._format_paragraph_for_slide(elements))
@ -640,7 +640,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error creating slide from section: {str(e)}") logger.warning(f"Error creating slide from section: {str(e)}")
return None return None
def _format_table_for_slide(self, elements: List[Dict[str, Any]]) -> str: def _formatTableForSlide(self, elements: List[Dict[str, Any]]) -> str:
"""Format table data for slide presentation.""" """Format table data for slide presentation."""
try: try:
# Extract table data from elements array # Extract table data from elements array
@ -681,7 +681,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting table for slide: {str(e)}") logger.warning(f"Error formatting table for slide: {str(e)}")
return "" return ""
def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str: def _formatListForSlide(self, list_data: Dict[str, Any]) -> str:
"""Format list data for slide presentation.""" """Format list data for slide presentation."""
try: try:
items = list_data.get("items", []) items = list_data.get("items", [])
@ -713,7 +713,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting list for slide: {str(e)}") logger.warning(f"Error formatting list for slide: {str(e)}")
return "" return ""
def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str: def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str:
"""Format heading data for slide presentation.""" """Format heading data for slide presentation."""
try: try:
text = heading_data.get("text", "") text = heading_data.get("text", "")
@ -728,7 +728,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting heading for slide: {str(e)}") logger.warning(f"Error formatting heading for slide: {str(e)}")
return "" return ""
def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str: def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str:
"""Format paragraph data for slide presentation.""" """Format paragraph data for slide presentation."""
try: try:
text = paragraph_data.get("text", "") text = paragraph_data.get("text", "")
@ -747,7 +747,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting paragraph for slide: {str(e)}") logger.warning(f"Error formatting paragraph for slide: {str(e)}")
return "" return ""
def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str: def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str:
"""Format code data for slide presentation.""" """Format code data for slide presentation."""
try: try:
code = code_data.get("code", "") code = code_data.get("code", "")
@ -770,7 +770,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting code for slide: {str(e)}") logger.warning(f"Error formatting code for slide: {str(e)}")
return "" return ""
def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int: def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
"""Determine the best professional slide layout based on content.""" """Determine the best professional slide layout based on content."""
try: try:
content = slide_data.get("content", "") content = slide_data.get("content", "")
@ -804,7 +804,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error determining slide layout: {str(e)}") logger.warning(f"Error determining slide layout: {str(e)}")
return 1 # Default to title and content layout return 1 # Default to title and content layout
def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Create slides from sections based on content density and user intent.""" """Create slides from sections based on content density and user intent."""
try: try:
slides = [] slides = []
@ -834,7 +834,7 @@ JSON ONLY. NO OTHER TEXT."""
break break
else: else:
# Add content to current slide # Add content to current slide
formatted_content = self._format_section_content(section) formatted_content = self._formatSectionContent(section)
if formatted_content: if formatted_content:
current_slide_content.append(formatted_content) current_slide_content.append(formatted_content)
@ -851,7 +851,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error creating slides from sections: {str(e)}") logger.warning(f"Error creating slides from sections: {str(e)}")
return [] return []
def _format_section_content(self, section: Dict[str, Any]) -> str: def _formatSectionContent(self, section: Dict[str, Any]) -> str:
"""Format section content for slide presentation.""" """Format section content for slide presentation."""
try: try:
content_type = section.get("content_type", "paragraph") content_type = section.get("content_type", "paragraph")
@ -861,15 +861,15 @@ JSON ONLY. NO OTHER TEXT."""
content_parts = [] content_parts = []
for element in elements: for element in elements:
if content_type == "table": if content_type == "table":
content_parts.append(self._format_table_for_slide([element])) content_parts.append(self._formatTableForSlide([element]))
elif content_type == "list": elif content_type == "list":
content_parts.append(self._format_list_for_slide([element])) content_parts.append(self._formatListForSlide([element]))
elif content_type == "heading": elif content_type == "heading":
content_parts.append(self._format_heading_for_slide([element])) content_parts.append(self._formatHeadingForSlide([element]))
elif content_type == "paragraph": elif content_type == "paragraph":
content_parts.append(self._format_paragraph_for_slide([element])) content_parts.append(self._formatParagraphForSlide([element]))
elif content_type == "code": elif content_type == "code":
content_parts.append(self._format_code_for_slide([element])) content_parts.append(self._formatCodeForSlide([element]))
else: else:
content_parts.append(self._format_paragraph_for_slide([element])) content_parts.append(self._format_paragraph_for_slide([element]))
@ -879,7 +879,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting section content: {str(e)}") logger.warning(f"Error formatting section content: {str(e)}")
return "" return ""
def _format_timestamp(self) -> str: def _formatTimestamp(self) -> str:
"""Format current timestamp for presentation generation.""" """Format current timestamp for presentation generation."""
from datetime import datetime, UTC from datetime import datetime, UTC
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

View file

@ -9,7 +9,7 @@ class RendererText(BaseRenderer):
"""Renders content to plain text format with format-specific extraction.""" """Renders content to plain text format with format-specific extraction."""
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
"""Return supported text formats (excluding formats with dedicated renderers).""" """Return supported text formats (excluding formats with dedicated renderers)."""
return [ return [
'txt', 'text', 'plain', 'txt', 'text', 'plain',
@ -32,7 +32,7 @@ class RendererText(BaseRenderer):
] ]
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
"""Return format aliases.""" """Return format aliases."""
return [ return [
'ascii', 'utf8', 'utf-8', 'code', 'source', 'ascii', 'utf8', 'utf-8', 'code', 'source',
@ -41,166 +41,166 @@ class RendererText(BaseRenderer):
] ]
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
"""Return priority for text renderer.""" """Return priority for text renderer."""
return 90 return 90
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to plain text format.""" """Render extracted JSON content to plain text format."""
try: try:
# Generate text from JSON structure # Generate text from JSON structure
text_content = self._generate_text_from_json(extracted_content, title) textContent = self._generateTextFromJson(extractedContent, title)
return text_content, "text/plain" return textContent, "text/plain"
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering text: {str(e)}") self.logger.error(f"Error rendering text: {str(e)}")
# Return minimal text fallback # Return minimal text fallback
return f"{title}\n\nError rendering report: {str(e)}", "text/plain" return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str: def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate text content from structured JSON document.""" """Generate text content from structured JSON document."""
try: try:
# Validate JSON structure # Validate JSON structure
if not isinstance(json_content, dict): if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary") raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content: if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field") raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title # Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title) documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Build text content # Build text content
text_parts = [] textParts = []
# Document title # Document title
text_parts.append(document_title) textParts.append(documentTitle)
text_parts.append("=" * len(document_title)) textParts.append("=" * len(documentTitle))
text_parts.append("") textParts.append("")
# Process each section # Process each section
sections = json_content.get("sections", []) sections = jsonContent.get("sections", [])
for section in sections: for section in sections:
section_text = self._render_json_section(section) sectionText = self._renderJsonSection(section)
if section_text: if sectionText:
text_parts.append(section_text) textParts.append(sectionText)
text_parts.append("") # Add spacing between sections textParts.append("") # Add spacing between sections
# Add generation info # Add generation info
text_parts.append("") textParts.append("")
text_parts.append(f"Generated: {self._format_timestamp()}") textParts.append(f"Generated: {self._formatTimestamp()}")
return '\n'.join(text_parts) return '\n'.join(textParts)
except Exception as e: except Exception as e:
self.logger.error(f"Error generating text from JSON: {str(e)}") self.logger.error(f"Error generating text from JSON: {str(e)}")
raise Exception(f"Text generation failed: {str(e)}") raise Exception(f"Text generation failed: {str(e)}")
def _render_json_section(self, section: Dict[str, Any]) -> str: def _renderJsonSection(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to text.""" """Render a single JSON section to text."""
try: try:
section_type = self._get_section_type(section) sectionType = self._getSectionType(section)
section_data = self._get_section_data(section) sectionData = self._getSectionData(section)
if section_type == "table": if sectionType == "table":
# Process the section data to extract table structure # Process the section data to extract table structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_table(processed_data) return self._renderJsonTable(processedData)
elif section_type == "bullet_list": elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure # Process the section data to extract bullet list structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_bullet_list(processed_data) return self._renderJsonBulletList(processedData)
elif section_type == "heading": elif sectionType == "heading":
# Render each heading element in the elements array # Render each heading element in the elements array
# section_data is already the elements array from _get_section_data # sectionData is already the elements array from _getSectionData
rendered_elements = [] renderedElements = []
for element in section_data: for element in sectionData:
rendered_elements.append(self._render_json_heading(element)) renderedElements.append(self._renderJsonHeading(element))
return "\n".join(rendered_elements) return "\n".join(renderedElements)
elif section_type == "paragraph": elif sectionType == "paragraph":
# Render each paragraph element in the elements array # Render each paragraph element in the elements array
# section_data is already the elements array from _get_section_data # sectionData is already the elements array from _getSectionData
rendered_elements = [] renderedElements = []
for element in section_data: for element in sectionData:
rendered_elements.append(self._render_json_paragraph(element)) renderedElements.append(self._renderJsonParagraph(element))
return "\n".join(rendered_elements) return "\n".join(renderedElements)
elif section_type == "code_block": elif sectionType == "code_block":
# Process the section data to extract code block structure # Process the section data to extract code block structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_code_block(processed_data) return self._renderJsonCodeBlock(processedData)
elif section_type == "image": elif sectionType == "image":
# Process the section data to extract image structure # Process the section data to extract image structure
processed_data = self._process_section_by_type(section) processedData = self._processSectionByType(section)
return self._render_json_image(processed_data) return self._renderJsonImage(processedData)
else: else:
# Fallback to paragraph for unknown types - render each element # Fallback to paragraph for unknown types - render each element
# section_data is already the elements array from _get_section_data # sectionData is already the elements array from _getSectionData
rendered_elements = [] renderedElements = []
for element in section_data: for element in sectionData:
rendered_elements.append(self._render_json_paragraph(element)) renderedElements.append(self._renderJsonParagraph(element))
return "\n".join(rendered_elements) return "\n".join(renderedElements)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}") self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return f"[Error rendering section: {str(e)}]" return f"[Error rendering section: {str(e)}]"
def _render_json_table(self, table_data: Dict[str, Any]) -> str: def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
"""Render a JSON table to text.""" """Render a JSON table to text."""
try: try:
headers = table_data.get("headers", []) headers = tableData.get("headers", [])
rows = table_data.get("rows", []) rows = tableData.get("rows", [])
if not headers or not rows: if not headers or not rows:
return "" return ""
text_parts = [] textParts = []
# Create table header # Create table header
header_line = " | ".join(str(header) for header in headers) headerLine = " | ".join(str(header) for header in headers)
text_parts.append(header_line) textParts.append(headerLine)
# Add separator line # Add separator line
separator_line = " | ".join("-" * len(str(header)) for header in headers) separatorLine = " | ".join("-" * len(str(header)) for header in headers)
text_parts.append(separator_line) textParts.append(separatorLine)
# Add data rows # Add data rows
for row in rows: for row in rows:
row_line = " | ".join(str(cell_data) for cell_data in row) rowLine = " | ".join(str(cellData) for cellData in row)
text_parts.append(row_line) textParts.append(rowLine)
return '\n'.join(text_parts) return '\n'.join(textParts)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}") self.logger.warning(f"Error rendering table: {str(e)}")
return "" return ""
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str: def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
"""Render a JSON bullet list to text.""" """Render a JSON bullet list to text."""
try: try:
items = list_data.get("items", []) items = listData.get("items", [])
if not items: if not items:
return "" return ""
text_parts = [] textParts = []
for item in items: for item in items:
if isinstance(item, str): if isinstance(item, str):
text_parts.append(f"- {item}") textParts.append(f"- {item}")
elif isinstance(item, dict) and "text" in item: elif isinstance(item, dict) and "text" in item:
text_parts.append(f"- {item['text']}") textParts.append(f"- {item['text']}")
return '\n'.join(text_parts) return '\n'.join(textParts)
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}") self.logger.warning(f"Error rendering bullet list: {str(e)}")
return "" return ""
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str: def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
"""Render a JSON heading to text.""" """Render a JSON heading to text."""
try: try:
level = heading_data.get("level", 1) level = headingData.get("level", 1)
text = heading_data.get("text", "") text = headingData.get("text", "")
if text: if text:
level = max(1, min(6, level)) level = max(1, min(6, level))
@ -217,21 +217,21 @@ class RendererText(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}") self.logger.warning(f"Error rendering heading: {str(e)}")
return "" return ""
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str: def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
"""Render a JSON paragraph to text.""" """Render a JSON paragraph to text."""
try: try:
text = paragraph_data.get("text", "") text = paragraphData.get("text", "")
return text if text else "" return text if text else ""
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}") self.logger.warning(f"Error rendering paragraph: {str(e)}")
return "" return ""
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str: def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
"""Render a JSON code block to text.""" """Render a JSON code block to text."""
try: try:
code = code_data.get("code", "") code = codeData.get("code", "")
language = code_data.get("language", "") language = codeData.get("language", "")
if code: if code:
if language: if language:
@ -245,12 +245,12 @@ class RendererText(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}") self.logger.warning(f"Error rendering code block: {str(e)}")
return "" return ""
def _render_json_image(self, image_data: Dict[str, Any]) -> str: def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
"""Render a JSON image to text.""" """Render a JSON image to text."""
try: try:
alt_text = image_data.get("altText", "Image") altText = imageData.get("altText", "Image")
return f"[Image: {alt_text}]" return f"[Image: {altText}]"
except Exception as e: except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}") self.logger.warning(f"Error rendering image: {str(e)}")
return f"[Image: {image_data.get('altText', 'Image')}]" return f"[Image: {imageData.get('altText', 'Image')}]"

View file

@ -21,41 +21,41 @@ class RendererXlsx(BaseRenderer):
"""Renders content to Excel format using openpyxl.""" """Renders content to Excel format using openpyxl."""
@classmethod @classmethod
def get_supported_formats(cls) -> List[str]: def getSupportedFormats(cls) -> List[str]:
"""Return supported Excel formats.""" """Return supported Excel formats."""
return ['xlsx', 'xls', 'excel'] return ['xlsx', 'xls', 'excel']
@classmethod @classmethod
def get_format_aliases(cls) -> List[str]: def getFormatAliases(cls) -> List[str]:
"""Return format aliases.""" """Return format aliases."""
return ['spreadsheet', 'workbook'] return ['spreadsheet', 'workbook']
@classmethod @classmethod
def get_priority(cls) -> int: def getPriority(cls) -> int:
"""Return priority for Excel renderer.""" """Return priority for Excel renderer."""
return 110 return 110
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to Excel format using AI-analyzed styling.""" """Render extracted JSON content to Excel format using AI-analyzed styling."""
try: try:
if not OPENPYXL_AVAILABLE: if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available # Fallback to CSV if openpyxl not available
from .rendererCsv import RendererCsv from .rendererCsv import RendererCsv
csv_renderer = RendererCsv() csvRenderer = RendererCsv()
csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service) csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService)
return csv_content, "text/csv" return csvContent, "text/csv"
# Generate Excel using AI-analyzed styling # Generate Excel using AI-analyzed styling
excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service) excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}") self.logger.error(f"Error rendering Excel: {str(e)}")
# Return CSV fallback # Return CSV fallback
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv" return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
def _generate_excel(self, content: str, title: str) -> str: def _generateExcel(self, content: str, title: str) -> str:
"""Generate Excel content using openpyxl.""" """Generate Excel content using openpyxl."""
try: try:
# Create workbook # Create workbook
@ -65,14 +65,14 @@ class RendererXlsx(BaseRenderer):
wb.remove(wb.active) wb.remove(wb.active)
# Create sheets # Create sheets
summary_sheet = wb.create_sheet("Summary", 0) summarySheet = wb.create_sheet("Summary", 0)
data_sheet = wb.create_sheet("Data", 1) dataSheet = wb.create_sheet("Data", 1)
analysis_sheet = wb.create_sheet("Analysis", 2) analysisSheet = wb.create_sheet("Analysis", 2)
# Add content to sheets # Add content to sheets
self._populate_summary_sheet(summary_sheet, title) self._populateSummarySheet(summarySheet, title)
self._populate_data_sheet(data_sheet, content) self._populateDataSheet(dataSheet, content)
self._populate_analysis_sheet(analysis_sheet, content) self._populateAnalysisSheet(analysisSheet, content)
# Save to buffer # Save to buffer
buffer = io.BytesIO() buffer = io.BytesIO()
@ -80,16 +80,16 @@ class RendererXlsx(BaseRenderer):
buffer.seek(0) buffer.seek(0)
# Convert to base64 # Convert to base64
excel_bytes = buffer.getvalue() excelBytes = buffer.getvalue()
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
return excel_base64 return excelBase64
except Exception as e: except Exception as e:
self.logger.error(f"Error generating Excel: {str(e)}") self.logger.error(f"Error generating Excel: {str(e)}")
raise raise
def _populate_summary_sheet(self, sheet, title: str): def _populateSummarySheet(self, sheet, title: str):
"""Populate the summary sheet.""" """Populate the summary sheet."""
try: try:
# Title # Title
@ -99,7 +99,7 @@ class RendererXlsx(BaseRenderer):
# Generation info # Generation info
sheet['A3'] = "Generated:" sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp() sheet['B3'] = self._formatTimestamp()
sheet['A4'] = "Status:" sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully" sheet['B4'] = "Generated Successfully"
@ -116,7 +116,7 @@ class RendererXlsx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not populate summary sheet: {str(e)}") self.logger.warning(f"Could not populate summary sheet: {str(e)}")
def _populate_data_sheet(self, sheet, content: str): def _populateDataSheet(self, sheet, content: str):
"""Populate the data sheet.""" """Populate the data sheet."""
try: try:
# Headers # Headers
@ -138,8 +138,8 @@ class RendererXlsx(BaseRenderer):
# Check for table data (lines with |) # Check for table data (lines with |)
if '|' in line: if '|' in line:
cells = [cell.strip() for cell in line.split('|') if cell.strip()] cells = [cell.strip() for cell in line.split('|') if cell.strip()]
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns
sheet.cell(row=row, column=col, value=cell_data) sheet.cell(row=row, column=col, value=cellData)
row += 1 row += 1
else: else:
# Regular content # Regular content
@ -153,7 +153,7 @@ class RendererXlsx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not populate data sheet: {str(e)}") self.logger.warning(f"Could not populate data sheet: {str(e)}")
def _populate_analysis_sheet(self, sheet, content: str): def _populateAnalysisSheet(self, sheet, content: str):
"""Populate the analysis sheet.""" """Populate the analysis sheet."""
try: try:
# Title # Title
@ -169,17 +169,17 @@ class RendererXlsx(BaseRenderer):
row += 1 row += 1
# Count different types of content # Count different types of content
table_lines = sum(1 for line in lines if '|' in line) tableLines = sum(1 for line in lines if '|' in line)
list_lines = sum(1 for line in lines if line.startswith(('- ', '* '))) listLines = sum(1 for line in lines if line.startswith(('- ', '* ')))
text_lines = len(lines) - table_lines - list_lines textLines = len(lines) - tableLines - listLines
sheet[f'A{row}'] = f"Total Lines: {len(lines)}" sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
row += 1 row += 1
sheet[f'A{row}'] = f"Table Rows: {table_lines}" sheet[f'A{row}'] = f"Table Rows: {tableLines}"
row += 1 row += 1
sheet[f'A{row}'] = f"List Items: {list_lines}" sheet[f'A{row}'] = f"List Items: {listLines}"
row += 1 row += 1
sheet[f'A{row}'] = f"Text Lines: {text_lines}" sheet[f'A{row}'] = f"Text Lines: {textLines}"
row += 2 row += 2
# Recommendations # Recommendations
@ -198,35 +198,35 @@ class RendererXlsx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}") self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str: async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling.""" """Generate Excel content from structured JSON document using AI-generated styling."""
try: try:
# Debug output # Debug output
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(json_content)}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
# Get AI-generated styling definitions # Get AI-generated styling definitions
styles = await self._get_excel_styles(user_prompt, ai_service) styles = await self._getExcelStyles(userPrompt, aiService)
# Validate JSON structure # Validate JSON structure
if not isinstance(json_content, dict): if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary") raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content: if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field") raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title # Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title) document_title = jsonContent.get("metadata", {}).get("title", title)
# Create workbook # Create workbook
wb = Workbook() wb = Workbook()
# Create sheets based on content # Create sheets based on content
sheets = self._create_excel_sheets(wb, json_content, styles) sheets = self._createExcelSheets(wb, jsonContent, styles)
self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
# Populate sheets with content # Populate sheets with content
self._populate_excel_sheets(sheets, json_content, styles) self._populateExcelSheets(sheets, jsonContent, styles)
# Save to buffer # Save to buffer
buffer = io.BytesIO() buffer = io.BytesIO()
@ -234,24 +234,24 @@ class RendererXlsx(BaseRenderer):
buffer.seek(0) buffer.seek(0)
# Convert to base64 # Convert to base64
excel_bytes = buffer.getvalue() excelBytes = buffer.getvalue()
self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excel_bytes)}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excelBytes)}", "EXCEL_RENDERER")
try: try:
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8') excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excel_base64)}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excelBase64)}", "EXCEL_RENDERER")
except Exception as b64_error: except Exception as b64_error:
self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
raise raise
return excel_base64 return excelBase64
except Exception as e: except Exception as e:
self.logger.error(f"Error generating Excel from JSON: {str(e)}") self.logger.error(f"Error generating Excel from JSON: {str(e)}")
raise Exception(f"Excel generation failed: {str(e)}") raise Exception(f"Excel generation failed: {str(e)}")
async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]: async def _getExcelStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get Excel styling definitions using base template AI styling.""" """Get Excel styling definitions using base template AI styling."""
style_schema = { styleSchema = {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"}, "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"}, "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"}, "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
@ -261,26 +261,26 @@ class RendererXlsx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"} "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
} }
style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema) styleTemplate = self._createAiStyleTemplate("xlsx", userPrompt, styleSchema)
# Use our own _get_ai_styles_with_excel_colors method to ensure proper color conversion # Use our own _getAiStylesWithExcelColors method to ensure proper color conversion
styles = await self._get_ai_styles_with_excel_colors(ai_service, style_template, self._get_default_excel_styles()) styles = await self._getAiStylesWithExcelColors(aiService, styleTemplate, self._getDefaultExcelStyles())
# Validate and fix contrast issues # Validate and fix contrast issues
return self._validate_excel_styles_contrast(styles) return self._validateExcelStylesContrast(styles)
async def _get_ai_styles_with_excel_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]: async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper Excel color conversion.""" """Get AI styles with proper Excel color conversion."""
if not ai_service: if not aiService:
return default_styles return defaultStyles
try: try:
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
request_options = AiCallOptions() requestOptions = AiCallOptions()
request_options.operationType = OperationTypeEnum.DATA_GENERATE requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
request = AiCallRequest(prompt=style_template, context="", options=request_options) request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
response = await ai_service.aiObjects.call(request) response = await aiService.aiObjects.call(request)
import json import json
import re import re
@ -291,7 +291,7 @@ class RendererXlsx(BaseRenderer):
# Check if result is empty # Check if result is empty
if not result: if not result:
self.logger.warning("AI styling returned empty response, using defaults") self.logger.warning("AI styling returned empty response, using defaults")
return default_styles return defaultStyles
# Extract JSON from markdown if present # Extract JSON from markdown if present
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL) json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
@ -312,46 +312,46 @@ class RendererXlsx(BaseRenderer):
styles = json.loads(result) styles = json.loads(result)
except json.JSONDecodeError as json_error: except json.JSONDecodeError as json_error:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults") self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
return default_styles return defaultStyles
# Convert colors to Excel aRGB format # Convert colors to Excel aRGB format
styles = self._convert_colors_format(styles) styles = self._convertColorsFormat(styles)
return styles return styles
except Exception as e: except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults") self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles return defaultStyles
def _get_safe_color(self, color_value: str, default: str = "FF000000") -> str: def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
"""Get a safe aRGB color value for Excel (without # prefix).""" """Get a safe aRGB color value for Excel (without # prefix)."""
if not isinstance(color_value, str): if not isinstance(colorValue, str):
return default return default
# Remove # prefix if present # Remove # prefix if present
if color_value.startswith('#'): if colorValue.startswith('#'):
color_value = color_value[1:] colorValue = colorValue[1:]
if len(color_value) == 6: if len(colorValue) == 6:
# Convert RRGGBB to AARRGGBB # Convert RRGGBB to AARRGGBB
return f"FF{color_value}" return f"FF{colorValue}"
elif len(color_value) == 8: elif len(colorValue) == 8:
# Already aRGB format # Already aRGB format
return color_value return colorValue
else: else:
# Unexpected format, return default # Unexpected format, return default
return default return default
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]: def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to aRGB format for Excel compatibility.""" """Convert hex colors to aRGB format for Excel compatibility."""
try: try:
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
for style_name, style_config in styles.items(): for styleName, styleConfig in styles.items():
if isinstance(style_config, dict): if isinstance(styleConfig, dict):
for prop, value in style_config.items(): for prop, value in styleConfig.items():
if isinstance(value, str) and value.startswith('#') and len(value) == 7: if isinstance(value, str) and value.startswith('#') and len(value) == 7:
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel) # Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
styles[style_name][prop] = f"FF{value[1:]}" styles[styleName][prop] = f"FF{value[1:]}"
elif isinstance(value, str) and value.startswith('#') and len(value) == 9: elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
pass # Already aRGB format pass # Already aRGB format
elif isinstance(value, str) and value.startswith('#'): elif isinstance(value, str) and value.startswith('#'):
@ -360,34 +360,34 @@ class RendererXlsx(BaseRenderer):
except Exception as e: except Exception as e:
return styles return styles
def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]: def _validateExcelStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles.""" """Validate and fix contrast issues in AI-generated styles."""
try: try:
# Fix table header contrast # Fix table header contrast
if "table_header" in styles: if "table_header" in styles:
header = styles["table_header"] header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF") bgColor = header.get("background", "#FFFFFF")
text_color = header.get("text_color", "#000000") textColor = header.get("text_color", "#000000")
# If both are white or both are dark, fix it # If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
header["background"] = "#4F4F4F" header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF" header["text_color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
header["background"] = "#4F4F4F" header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF" header["text_color"] = "#FFFFFF"
# Fix table cell contrast # Fix table cell contrast
if "table_cell" in styles: if "table_cell" in styles:
cell = styles["table_cell"] cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF") bgColor = cell.get("background", "#FFFFFF")
text_color = cell.get("text_color", "#000000") textColor = cell.get("text_color", "#000000")
# If both are white or both are dark, fix it # If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF": if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF" cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F" cell["text_color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000": elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
cell["background"] = "#FFFFFF" cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F" cell["text_color"] = "#2F2F2F"
@ -395,9 +395,9 @@ class RendererXlsx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}") self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_excel_styles() return self._getDefaultExcelStyles()
def _get_default_excel_styles(self) -> Dict[str, Any]: def _getDefaultExcelStyles(self) -> Dict[str, Any]:
"""Default Excel styles with aRGB color format.""" """Default Excel styles with aRGB color format."""
return { return {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"}, "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
@ -409,104 +409,104 @@ class RendererXlsx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"} "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
} }
def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]: def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create Excel sheets based on content structure and user intent.""" """Create Excel sheets based on content structure and user intent."""
sheets = {} sheets = {}
# Get sheet names from AI styles or generate based on content # Get sheet names from AI styles or generate based on content
sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content)) sheetNames = styles.get("sheet_names", self._generateSheetNamesFromContent(jsonContent))
self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheet_names}", "EXCEL_RENDERER") self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheetNames}", "EXCEL_RENDERER")
# Create sheets # Create sheets
for i, sheet_name in enumerate(sheet_names): for i, sheetName in enumerate(sheetNames):
if i == 0: if i == 0:
# Use the default sheet for the first sheet # Use the default sheet for the first sheet
sheet = wb.active sheet = wb.active
sheet.title = sheet_name sheet.title = sheetName
else: else:
# Create additional sheets # Create additional sheets
sheet = wb.create_sheet(sheet_name, i) sheet = wb.create_sheet(sheetName, i)
sheets[sheet_name.lower()] = sheet sheets[sheetName.lower()] = sheet
return sheets return sheets
def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]: def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
"""Generate sheet names based on actual content structure.""" """Generate sheet names based on actual content structure."""
sections = json_content.get("sections", []) sections = jsonContent.get("sections", [])
# If no sections, create a single sheet # If no sections, create a single sheet
if not sections: if not sections:
return ["Content"] return ["Content"]
# Generate sheet names based on content structure # Generate sheet names based on content structure
sheet_names = [] sheetNames = []
# Check if we have multiple table sections # Check if we have multiple table sections
table_sections = [s for s in sections if s.get("content_type") == "table"] tableSections = [s for s in sections if s.get("content_type") == "table"]
if len(table_sections) > 1: if len(tableSections) > 1:
# Create separate sheets for each table # Create separate sheets for each table
for i, section in enumerate(table_sections, 1): for i, section in enumerate(tableSections, 1):
section_title = section.get("title", f"Table {i}") sectionTitle = section.get("title", f"Table {i}")
sheet_names.append(section_title[:31]) # Excel sheet name limit sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
else: else:
# Single table or mixed content - create main sheet # Single table or mixed content - create main sheet
document_title = json_content.get("metadata", {}).get("title", "Document") documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheet_names.append(document_title[:31]) # Excel sheet name limit sheetNames.append(documentTitle[:31]) # Excel sheet name limit
# Add additional sheets for other content types # Add additional sheets for other content types
content_types = set() contentTypes = set()
for section in sections: for section in sections:
content_type = section.get("content_type", "paragraph") contentType = section.get("content_type", "paragraph")
content_types.add(content_type) contentTypes.add(contentType)
if "table" in content_types and len(table_sections) == 1: if "table" in contentTypes and len(tableSections) == 1:
sheet_names.append("Table Data") sheetNames.append("Table Data")
if "list" in content_types: if "list" in contentTypes:
sheet_names.append("Lists") sheetNames.append("Lists")
if "paragraph" in content_types or "heading" in content_types: if "paragraph" in contentTypes or "heading" in contentTypes:
sheet_names.append("Text") sheetNames.append("Text")
# Limit to 4 sheets maximum # Limit to 4 sheets maximum
return sheet_names[:4] return sheetNames[:4]
def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None: def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets with content from JSON based on actual sheet names.""" """Populate Excel sheets with content from JSON based on actual sheet names."""
try: try:
# Get the actual sheet names that were created # Get the actual sheet names that were created
sheet_names = list(sheets.keys()) sheetNames = list(sheets.keys())
if not sheet_names: if not sheetNames:
return return
sections = json_content.get("sections", []) sections = jsonContent.get("sections", [])
table_sections = [s for s in sections if s.get("content_type") == "table"] tableSections = [s for s in sections if s.get("content_type") == "table"]
if len(table_sections) > 1: if len(tableSections) > 1:
# Multiple tables - populate each sheet with its corresponding table # Multiple tables - populate each sheet with its corresponding table
for i, section in enumerate(table_sections): for i, section in enumerate(tableSections):
if i < len(sheet_names): if i < len(sheetNames):
sheet_name = sheet_names[i] sheetName = sheetNames[i]
sheet = sheets[sheet_name] sheet = sheets[sheetName]
self._populate_table_sheet(sheet, section, styles, f"Table {i+1}") self._populateTableSheet(sheet, section, styles, f"Table {i+1}")
else: else:
# Single table or mixed content - use original logic # Single table or mixed content - use original logic
first_sheet_name = sheet_names[0] firstSheetName = sheetNames[0]
self._populate_main_sheet(sheets[first_sheet_name], json_content, styles) self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
# If we have multiple sheets, distribute content by type # If we have multiple sheets, distribute content by type
if len(sheet_names) > 1: if len(sheetNames) > 1:
self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:]) self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
except Exception as e: except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}") self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
def _populate_table_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheet_title: str): def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str):
"""Populate a sheet with a single table section.""" """Populate a sheet with a single table section."""
try: try:
# Sheet title # Sheet title
sheet['A1'] = sheet_title sheet['A1'] = sheetTitle
sheet['A1'].font = Font(size=16, bold=True, color=self._get_safe_color(styles.get("title", {}).get("color", "FF1F4E79"))) sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(styles.get("title", {}).get("color", "FF1F4E79")))
sheet['A1'].alignment = Alignment(horizontal="center") sheet['A1'].alignment = Alignment(horizontal="center")
# Get table data from elements (canonical JSON format) # Get table data from elements (canonical JSON format)
@ -528,9 +528,9 @@ class RendererXlsx(BaseRenderer):
for col, header in enumerate(headers, 1): for col, header in enumerate(headers, 1):
cell = sheet.cell(row=3, column=col, value=header) cell = sheet.cell(row=3, column=col, value=header)
if header_style.get("bold"): if header_style.get("bold"):
cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000"))) cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
if header_style.get("background"): if header_style.get("background"):
cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid") cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
# Add rows # Add rows
cell_style = styles.get("table_cell", {}) cell_style = styles.get("table_cell", {})
@ -538,7 +538,7 @@ class RendererXlsx(BaseRenderer):
for col_idx, cell_value in enumerate(row_data, 1): for col_idx, cell_value in enumerate(row_data, 1):
cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value) cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value)
if cell_style.get("text_color"): if cell_style.get("text_color"):
cell.font = Font(color=self._get_safe_color(cell_style["text_color"])) cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
# Auto-adjust column widths # Auto-adjust column widths
for col in range(1, len(headers) + 1): for col in range(1, len(headers) + 1):
@ -547,17 +547,17 @@ class RendererXlsx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not populate table sheet: {str(e)}") self.logger.warning(f"Could not populate table sheet: {str(e)}")
def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]): def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
"""Populate the main sheet with document overview and all content.""" """Populate the main sheet with document overview and all content."""
try: try:
# Document title # Document title
document_title = json_content.get("metadata", {}).get("title", "Generated Report") documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = document_title sheet['A1'] = documentTitle
# Safety check for title style # Safety check for title style
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"}) title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
try: try:
safe_color = self._get_safe_color(title_style["color"]) safe_color = self._getSafeColor(title_style["color"])
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color) sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
except Exception as font_error: except Exception as font_error:
@ -567,12 +567,12 @@ class RendererXlsx(BaseRenderer):
# Generation info # Generation info
sheet['A3'] = "Generated:" sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp() sheet['B3'] = self._formatTimestamp()
sheet['A4'] = "Status:" sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully" sheet['B4'] = "Generated Successfully"
# Document metadata # Document metadata
metadata = json_content.get("metadata", {}) metadata = jsonContent.get("metadata", {})
if metadata: if metadata:
sheet['A6'] = "Document Information:" sheet['A6'] = "Document Information:"
sheet['A6'].font = Font(bold=True) sheet['A6'].font = Font(bold=True)
@ -585,7 +585,7 @@ class RendererXlsx(BaseRenderer):
row += 1 row += 1
# Content overview # Content overview
sections = json_content.get("sections", []) sections = jsonContent.get("sections", [])
sheet[f'A{row + 1}'] = "Content Overview:" sheet[f'A{row + 1}'] = "Content Overview:"
sheet[f'A{row + 1}'].font = Font(bold=True) sheet[f'A{row + 1}'].font = Font(bold=True)
@ -605,7 +605,7 @@ class RendererXlsx(BaseRenderer):
# Add all content to this sheet # Add all content to this sheet
row += 2 row += 2
for section in sections: for section in sections:
row = self._add_section_to_sheet(sheet, section, styles, row) row = self._addSectionToSheet(sheet, section, styles, row)
row += 1 # Empty row between sections row += 1 # Empty row between sections
# Auto-adjust column widths # Auto-adjust column widths
@ -615,34 +615,34 @@ class RendererXlsx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not populate main sheet: {str(e)}") self.logger.warning(f"Could not populate main sheet: {str(e)}")
def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]): def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
"""Populate additional sheets based on content types.""" """Populate additional sheets based on content types."""
try: try:
sections = json_content.get("sections", []) sections = jsonContent.get("sections", [])
for sheet_name in sheet_names: for sheetName in sheetNames:
if sheet_name not in sheets: if sheetName not in sheets:
continue continue
sheet = sheets[sheet_name] sheet = sheets[sheetName]
sheet_title = sheet_name.title() sheetTitle = sheetName.title()
sheet['A1'] = sheet_title sheet['A1'] = sheetTitle
sheet['A1'].font = Font(size=16, bold=True) sheet['A1'].font = Font(size=16, bold=True)
row = 3 row = 3
# Filter sections by content type # Filter sections by content type
if sheet_name == "tables": if sheetName == "tables":
filtered_sections = [s for s in sections if s.get("content_type") == "table"] filtered_sections = [s for s in sections if s.get("content_type") == "table"]
elif sheet_name == "lists": elif sheetName == "lists":
filtered_sections = [s for s in sections if s.get("content_type") == "list"] filtered_sections = [s for s in sections if s.get("content_type") == "list"]
elif sheet_name == "text": elif sheetName == "text":
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]] filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
else: else:
filtered_sections = sections filtered_sections = sections
for section in filtered_sections: for section in filtered_sections:
row = self._add_section_to_sheet(sheet, section, styles, row) row = self._addSectionToSheet(sheet, section, styles, row)
row += 1 # Empty row between sections row += 1 # Empty row between sections
# Auto-adjust column widths # Auto-adjust column widths
@ -652,15 +652,15 @@ class RendererXlsx(BaseRenderer):
except Exception as e: except Exception as e:
self.logger.warning(f"Could not populate content type sheets: {str(e)}") self.logger.warning(f"Could not populate content type sheets: {str(e)}")
def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a section to a sheet and return the next row.""" """Add a section to a sheet and return the next row."""
try: try:
# Add section title # Add section title
section_title = section.get("title") section_title = section.get("title")
if section_title: if section_title:
sheet[f'A{start_row}'] = f"# {section_title}" sheet[f'A{startRow}'] = f"# {section_title}"
sheet[f'A{start_row}'].font = Font(bold=True) sheet[f'A{startRow}'].font = Font(bold=True)
start_row += 1 startRow += 1
# Process section based on type # Process section based on type
section_type = section.get("content_type", "paragraph") section_type = section.get("content_type", "paragraph")
@ -669,23 +669,23 @@ class RendererXlsx(BaseRenderer):
elements = section.get("elements", []) elements = section.get("elements", [])
for element in elements: for element in elements:
if section_type == "table": if section_type == "table":
start_row = self._add_table_to_excel(sheet, element, styles, start_row) startRow = self._addTableToExcel(sheet, element, styles, startRow)
elif section_type == "list": elif section_type == "list":
start_row = self._add_list_to_excel(sheet, element, styles, start_row) startRow = self._addListToExcel(sheet, element, styles, startRow)
elif section_type == "paragraph": elif section_type == "paragraph":
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row) startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
elif section_type == "heading": elif section_type == "heading":
start_row = self._add_heading_to_excel(sheet, element, styles, start_row) startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
else: else:
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row) startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
return start_row return startRow
except Exception as e: except Exception as e:
self.logger.warning(f"Could not add section to sheet: {str(e)}") self.logger.warning(f"Could not add section to sheet: {str(e)}")
return start_row + 1 return startRow + 1
def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a table element to Excel sheet.""" """Add a table element to Excel sheet."""
try: try:
# In canonical JSON format, table elements have headers and rows directly # In canonical JSON format, table elements have headers and rows directly
@ -693,99 +693,99 @@ class RendererXlsx(BaseRenderer):
rows = element.get("rows", []) rows = element.get("rows", [])
if not headers and not rows: if not headers and not rows:
return start_row return startRow
# Add headers # Add headers
header_style = styles.get("table_header", {}) header_style = styles.get("table_header", {})
for col, header in enumerate(headers, 1): for col, header in enumerate(headers, 1):
cell = sheet.cell(row=start_row, column=col, value=header) cell = sheet.cell(row=startRow, column=col, value=header)
if header_style.get("bold"): if header_style.get("bold"):
cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000"))) cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
if header_style.get("background"): if header_style.get("background"):
cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid") cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
start_row += 1 startRow += 1
# Add rows # Add rows
cell_style = styles.get("table_cell", {}) cell_style = styles.get("table_cell", {})
for row_data in rows: for row_data in rows:
for col, cell_value in enumerate(row_data, 1): for col, cell_value in enumerate(row_data, 1):
cell = sheet.cell(row=start_row, column=col, value=cell_value) cell = sheet.cell(row=startRow, column=col, value=cell_value)
if cell_style.get("text_color"): if cell_style.get("text_color"):
cell.font = Font(color=self._get_safe_color(cell_style["text_color"])) cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
start_row += 1 startRow += 1
return start_row return startRow
except Exception as e: except Exception as e:
self.logger.warning(f"Could not add table to Excel: {str(e)}") self.logger.warning(f"Could not add table to Excel: {str(e)}")
return start_row + 1 return startRow + 1
def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a list element to Excel sheet.""" """Add a list element to Excel sheet."""
try: try:
list_items = element.get("items", []) list_items = element.get("items", [])
list_style = styles.get("bullet_list", {}) list_style = styles.get("bullet_list", {})
for item in list_items: for item in list_items:
sheet.cell(row=start_row, column=1, value=f"{item}") sheet.cell(row=startRow, column=1, value=f"{item}")
if list_style.get("color"): if list_style.get("color"):
sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(list_style["color"])) sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
start_row += 1 startRow += 1
return start_row return startRow
except Exception as e: except Exception as e:
self.logger.warning(f"Could not add list to Excel: {str(e)}") self.logger.warning(f"Could not add list to Excel: {str(e)}")
return start_row + 1 return startRow + 1
def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a paragraph element to Excel sheet.""" """Add a paragraph element to Excel sheet."""
try: try:
text = element.get("text", "") text = element.get("text", "")
if text: if text:
sheet.cell(row=start_row, column=1, value=text) sheet.cell(row=startRow, column=1, value=text)
paragraph_style = styles.get("paragraph", {}) paragraph_style = styles.get("paragraph", {})
if paragraph_style.get("color"): if paragraph_style.get("color"):
sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(paragraph_style["color"])) sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"]))
start_row += 1 startRow += 1
return start_row return startRow
except Exception as e: except Exception as e:
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}") self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
return start_row + 1 return startRow + 1
def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int: def _addHeadingToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a heading element to Excel sheet.""" """Add a heading element to Excel sheet."""
try: try:
text = element.get("text", "") text = element.get("text", "")
level = element.get("level", 1) level = element.get("level", 1)
if text: if text:
sheet.cell(row=start_row, column=1, value=text) sheet.cell(row=startRow, column=1, value=text)
heading_style = styles.get("heading", {}) heading_style = styles.get("heading", {})
font_size = heading_style.get("font_size", 14) font_size = heading_style.get("font_size", 14)
if level > 1: if level > 1:
font_size = max(10, font_size - (level - 1) * 2) font_size = max(10, font_size - (level - 1) * 2)
sheet.cell(row=start_row, column=1).font = Font( sheet.cell(row=startRow, column=1).font = Font(
size=font_size, size=font_size,
bold=True, bold=True,
color=self._get_safe_color(heading_style.get("color", "FF000000")) color=self._getSafeColor(heading_style.get("color", "FF000000"))
) )
start_row += 1 startRow += 1
return start_row return startRow
except Exception as e: except Exception as e:
self.logger.warning(f"Could not add heading to Excel: {str(e)}") self.logger.warning(f"Could not add heading to Excel: {str(e)}")
return start_row + 1 return startRow + 1
def _format_timestamp(self) -> str: def _formatTimestamp(self) -> str:
"""Format current timestamp for document generation.""" """Format current timestamp for document generation."""
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

View file

@ -1,25 +1,32 @@
""" """
JSON Schema definitions for AI-generated document structures. JSON Schema definitions for AI-generated document structures (unified).
This module provides schemas that guide AI to generate structured JSON output. This module provides schemas that guide AI to generate structured JSON output
that matches the master template in modules.datamodels.datamodelJson.
""" """
from typing import Dict, Any from typing import Dict, Any
def get_multi_document_subJsonSchema() -> Dict[str, Any]: def getMultiDocumentSchema() -> Dict[str, Any]:
"""Get the JSON schema for multi-document generation.""" """Get the JSON schema for multi-document generation (unified)."""
return { return {
"type": "object", "type": "object",
"required": ["metadata", "documents"], "required": ["metadata", "documents"],
"properties": { "properties": {
"metadata": { "metadata": {
"type": "object", "type": "object",
"required": ["title", "split_strategy"], "required": ["split_strategy"],
"properties": { "properties": {
"title": {"type": "string", "description": "Document title"},
"split_strategy": { "split_strategy": {
"type": "string", "type": "string",
"enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"], "enum": [
"single_document",
"per_entity",
"by_section",
"by_criteria",
"by_data_type",
"custom"
],
"description": "Strategy for splitting content into multiple files" "description": "Strategy for splitting content into multiple files"
}, },
"splitCriteria": { "splitCriteria": {
@ -30,7 +37,6 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"type": "string", "type": "string",
"description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')" "description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')"
}, },
"author": {"type": "string", "description": "Document author (optional)"},
"source_documents": { "source_documents": {
"type": "array", "type": "array",
"items": {"type": "string"}, "items": {"type": "string"},
@ -38,7 +44,7 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
}, },
"extraction_method": { "extraction_method": {
"type": "string", "type": "string",
"default": "ai_extraction", "default": "ai_generation",
"description": "Method used for extraction" "description": "Method used for extraction"
} }
} }
@ -64,7 +70,15 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"title": {"type": "string", "description": "Section title (optional)"}, "title": {"type": "string", "description": "Section title (optional)"},
"content_type": { "content_type": {
"type": "string", "type": "string",
"enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"], "enum": [
"table",
"bullet_list",
"paragraph",
"heading",
"code_block",
"image",
"mixed"
],
"description": "Primary content type of this section" "description": "Primary content type of this section"
}, },
"elements": { "elements": {
@ -76,7 +90,8 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
{"$ref": "#/definitions/bullet_list"}, {"$ref": "#/definitions/bullet_list"},
{"$ref": "#/definitions/paragraph"}, {"$ref": "#/definitions/paragraph"},
{"$ref": "#/definitions/heading"}, {"$ref": "#/definitions/heading"},
{"$ref": "#/definitions/code_block"} {"$ref": "#/definitions/code_block"},
{"$ref": "#/definitions/image"}
] ]
} }
}, },
@ -191,11 +206,20 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"code": {"type": "string", "description": "Code content"}, "code": {"type": "string", "description": "Code content"},
"language": {"type": "string", "description": "Programming language (optional)"} "language": {"type": "string", "description": "Programming language (optional)"}
} }
},
"image": {
"type": "object",
"required": ["url"],
"properties": {
"url": {"type": "string", "description": "Image URL or data URI"},
"caption": {"type": "string", "description": "Image caption (optional)"},
"alt": {"type": "string", "description": "Alt text (optional)"}
}
} }
} }
} }
def get_document_subJsonSchema() -> Dict[str, Any]: def getDocumentSchema() -> Dict[str, Any]:
"""Get the JSON schema for structured document generation (single document).""" """Get the JSON schema for structured document generation (single document)."""
return { return {
"type": "object", "type": "object",
@ -206,7 +230,6 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
"required": ["title"], "required": ["title"],
"properties": { "properties": {
"title": {"type": "string", "description": "Document title"}, "title": {"type": "string", "description": "Document title"},
"author": {"type": "string", "description": "Document author (optional)"},
"source_documents": { "source_documents": {
"type": "array", "type": "array",
"items": {"type": "string"}, "items": {"type": "string"},
@ -214,7 +237,7 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
}, },
"extraction_method": { "extraction_method": {
"type": "string", "type": "string",
"default": "ai_extraction", "default": "ai_generation",
"description": "Method used for extraction" "description": "Method used for extraction"
} }
} }
@ -230,7 +253,15 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
"title": {"type": "string", "description": "Section title (optional)"}, "title": {"type": "string", "description": "Section title (optional)"},
"content_type": { "content_type": {
"type": "string", "type": "string",
"enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"], "enum": [
"table",
"bullet_list",
"paragraph",
"heading",
"code_block",
"image",
"mixed"
],
"description": "Primary content type of this section" "description": "Primary content type of this section"
}, },
"elements": { "elements": {
@ -242,7 +273,8 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
{"$ref": "#/definitions/bullet_list"}, {"$ref": "#/definitions/bullet_list"},
{"$ref": "#/definitions/paragraph"}, {"$ref": "#/definitions/paragraph"},
{"$ref": "#/definitions/heading"}, {"$ref": "#/definitions/heading"},
{"$ref": "#/definitions/code_block"} {"$ref": "#/definitions/code_block"},
{"$ref": "#/definitions/image"}
] ]
} }
}, },
@ -359,12 +391,21 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
"code": {"type": "string", "description": "Code content"}, "code": {"type": "string", "description": "Code content"},
"language": {"type": "string", "description": "Programming language (optional)"} "language": {"type": "string", "description": "Programming language (optional)"}
} }
},
"image": {
"type": "object",
"required": ["url"],
"properties": {
"url": {"type": "string", "description": "Image URL or data URI"},
"caption": {"type": "string", "description": "Image caption (optional)"},
"alt": {"type": "string", "description": "Alt text (optional)"}
}
} }
} }
} }
def get_extraction_prompt_template() -> str: def getExtractionPromptTemplate() -> str:
"""Get the template for AI extraction prompts that request JSON output.""" """Get the template for AI extraction prompts that request JSON output."""
return """ return """
You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document. You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document.
@ -390,7 +431,7 @@ Return only the JSON structure following the schema. Do not include any text bef
""" """
def get_generation_prompt_template() -> str: def getGenerationPromptTemplate() -> str:
"""Get the template for AI generation prompts that work with JSON input.""" """Get the template for AI generation prompts that work with JSON input."""
return """ return """
You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content. You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content.
@ -416,31 +457,31 @@ Return only the enhanced JSON structure following the schema. Do not include any
""" """
def get_adaptive_json_schema(prompt_analysis: Dict[str, Any] = None) -> Dict[str, Any]: def getAdaptiveJsonSchema(promptAnalysis: Dict[str, Any] = None) -> Dict[str, Any]:
"""Automatically select appropriate schema based on prompt analysis.""" """Automatically select appropriate schema based on prompt analysis."""
if prompt_analysis and prompt_analysis.get("is_multi_file", False): if promptAnalysis and promptAnalysis.get("is_multi_file", False):
return get_multi_document_subJsonSchema() return getMultiDocumentSchema()
else: else:
return get_document_subJsonSchema() return getDocumentSchema()
def validate_json_document(json_data: Dict[str, Any]) -> bool: def validateJsonDocument(jsonData: Dict[str, Any]) -> bool:
"""Validate that the JSON data follows the document schema.""" """Validate that the JSON data follows the unified document schema."""
try: try:
# Basic validation - check required fields # Basic validation - check required fields
if not isinstance(json_data, dict): if not isinstance(jsonData, dict):
return False return False
# Check if it's multi-document or single-document structure # Check if it's multi-document or single-document structure
if "documents" in json_data: if "documents" in jsonData:
# Multi-document structure # Multi-document structure
if "metadata" not in json_data: if "metadata" not in jsonData:
return False return False
metadata = json_data["metadata"] metadata = jsonData["metadata"]
if not isinstance(metadata, dict) or "title" not in metadata or "split_strategy" not in metadata: if not isinstance(metadata, dict) or "split_strategy" not in metadata:
return False return False
documents = json_data["documents"] documents = jsonData["documents"]
if not isinstance(documents, list): if not isinstance(documents, list):
return False return False
@ -469,7 +510,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
return False return False
# Validate content_type # Validate content_type
valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"] valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
if section["content_type"] not in valid_types: if section["content_type"] not in valid_types:
return False return False
@ -477,16 +518,16 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
if not isinstance(section["elements"], list): if not isinstance(section["elements"], list):
return False return False
elif "sections" in json_data: elif "sections" in jsonData:
# Single-document structure (existing validation) # Single-document structure (existing validation)
if "metadata" not in json_data: if "metadata" not in jsonData:
return False return False
metadata = json_data["metadata"] metadata = jsonData["metadata"]
if not isinstance(metadata, dict) or "title" not in metadata: if not isinstance(metadata, dict) or "title" not in metadata:
return False return False
sections = json_data["sections"] sections = jsonData["sections"]
if not isinstance(sections, list): if not isinstance(sections, list):
return False return False
@ -501,7 +542,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
return False return False
# Validate content_type # Validate content_type
valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"] valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
if section["content_type"] not in valid_types: if section["content_type"] not in valid_types:
return False return False

View file

@ -5,83 +5,10 @@ This module builds prompts for generating documents from extracted content.
import logging import logging
from typing import Dict, Any from typing import Dict, Any
from modules.datamodels.datamodelJson import jsonTemplateDocument
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Centralized JSON structure template for document generation
# Includes examples for all content types so AI knows the structure patterns
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": [
{
"id": "doc_1",
"title": "{{DOCUMENT_TITLE}}",
"filename": "document.json",
"sections": [
{
"id": "section_heading_example",
"content_type": "heading",
"elements": [
{"level": 1, "text": "Heading Text"}
],
"order": 0
},
{
"id": "section_paragraph_example",
"content_type": "paragraph",
"elements": [
{"text": "Paragraph text content"}
],
"order": 0
},
{
"id": "section_list_example",
"content_type": "list",
"elements": [
{
"items": [
{"text": "Item 1"},
{"text": "Item 2"}
],
"list_type": "numbered"
}
],
"order": 0
},
{
"id": "section_table_example",
"content_type": "table",
"elements": [
{
"headers": ["Column 1", "Column 2"],
"rows": [
["Row 1 Col 1", "Row 1 Col 2"],
["Row 2 Col 1", "Row 2 Col 2"]
],
"caption": "Table caption"
}
],
"order": 0
},
{
"id": "section_code_example",
"content_type": "code",
"elements": [
{
"code": "function example() { return true; }",
"language": "javascript"
}
],
"order": 0
}
]
}
]
}"""
async def buildGenerationPrompt( async def buildGenerationPrompt(
@ -106,99 +33,101 @@ async def buildGenerationPrompt(
Complete generation prompt string Complete generation prompt string
""" """
# Create a template - let AI generate title if not provided # Create a template - let AI generate title if not provided
title_value = title if title else "Generated Document" titleValue = title if title else "Generated Document"
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value) jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
# Build prompt based on whether this is a continuation or first call # Build prompt based on whether this is a continuation or first call
# Check if we have valid continuation context with actual JSON fragment # Check if we have valid continuation context with actual JSON fragment
has_continuation = ( hasContinuation = (
continuationContext continuationContext
and continuationContext.get("section_count", 0) > 0 and continuationContext.get("section_count", 0) > 0
and continuationContext.get("last_raw_json", "") and continuationContext.get("last_raw_json", "")
and continuationContext.get("last_raw_json", "").strip() != "{}" and continuationContext.get("last_raw_json", "").strip() != "{}"
) )
if has_continuation: if hasContinuation:
# CONTINUATION PROMPT - user already received first part, continue from where it stopped # CONTINUATION PROMPT - user already received first part, continue from where it stopped
last_raw_json = continuationContext.get("last_raw_json", "") lastRawJson = continuationContext.get("last_raw_json", "")
last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"} lastItemObject = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
last_items_from_fragment = continuationContext.get("last_items_from_fragment", "") lastItemsFromFragment = continuationContext.get("last_items_from_fragment", "")
total_items_count = continuationContext.get("total_items_count", 0) totalItemsCount = continuationContext.get("total_items_count", 0)
# Show the last few items to indicate where to continue (limit fragment size) # Show the last few items to indicate where to continue (limit fragment size)
# Extract just the ending portion of the JSON to show where it cut off # Extract just the ending portion of the JSON to show where it cut off
fragment_snippet = "" fragmentSnippet = ""
if last_raw_json: if lastRawJson:
# Show last 1500 chars or the whole thing if shorter - just enough to show the cut point # Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json fragmentSnippet = lastRawJson[-1500:] if len(lastRawJson) > 1500 else lastRawJson
# Add ellipsis if truncated # Add ellipsis if truncated
if len(last_raw_json) > 1500: if len(lastRawJson) > 1500:
fragment_snippet = "..." + fragment_snippet fragmentSnippet = "..." + fragmentSnippet
# Build clear continuation guidance # Build clear continuation guidance
continuation_guidance = [] continuationGuidance = []
if total_items_count > 0: if totalItemsCount > 0:
continuation_guidance.append(f"You have already generated {total_items_count} items.") continuationGuidance.append(f"You have already generated {totalItemsCount} items.")
# Show the last complete item object (full object format) # Show the last complete item object (full object format)
if last_item_object: if lastItemObject:
continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.") continuationGuidance.append(f"Last item in previous response: {lastItemObject}. Continue with the NEXT item after this.")
continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped." continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."
generation_prompt = f"""User request: "{userPrompt}" generationPrompt = f"""User request: "{userPrompt}"
The user already received part of the response. Continue generating the remaining content. The user already received part of the response. Continue generating the remaining content.
{continuation_text} {continuationText}
Previous response ended here (JSON was cut off at this point): Previous response ended here (JSON was cut off at this point):
```json ```json
{fragment_snippet if fragment_snippet else "(No fragment available)"} {fragmentSnippet if fragmentSnippet else "(No fragment available)"}
``` ```
JSON structure template: JSON structure template:
{json_template} {jsonTemplate}
Instructions: Instructions:
- Return full JSON structure (metadata + documents + sections) - Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
- Continue from where it stopped - add NEW items only, do not repeat old items - Arrays must contain ONLY JSON values; do not include comments or ellipses.
- Use the element structures shown in the template - Use ONLY the element structures shown in the template.
- Generate all remaining content needed to complete the user request - Continue from where it stopped add NEW items only; do not repeat existing items.
- Fill with actual content (no comments, no "Add more..." text, no placeholders) - Generate all remaining content needed to complete the user request.
- When fully complete, add "complete_response": true at root level - Fill with actual content (no placeholders or instructional text such as "Add more...").
- Return only valid JSON (no comments, no markdown blocks) - When fully complete, add "complete_response": true at root level.
- Output JSON only; no markdown fences or extra text before/after.
Continue generating: Continue generating:
""" """
else: else:
# FIRST CALL - initial generation # FIRST CALL - initial generation
generation_prompt = f"""User request: "{userPrompt}" generationPrompt = f"""User request: "{userPrompt}"
Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning. Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
JSON structure template (reference only - shows the pattern): JSON structure template:
{json_template} {jsonTemplate}
Instructions: Instructions:
- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning - Start your response with {{"metadata": ...}} return COMPLETE, STRICT JSON.
- Do NOT continue from the template examples above - create your own sections - Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
- Generate complete content based on the user request - Arrays must contain ONLY JSON values; do not include comments or ellipses.
- Use the element structures shown in the template (heading, paragraph, list, table, code) - Do NOT reuse the example section IDs from the template; create your own.
- Create your own section IDs (do not use the example IDs like "section_heading_example") - Use ONLY the element structures shown in the template.
- When fully complete, add "complete_response": true at root level - Generate complete content based on the user request.
- Return only valid JSON (no comments, no markdown blocks, no text before/after) - When fully complete, add "complete_response": true at root level.
- Output JSON only; no markdown fences or any additional text.
Generate your complete response starting from {{"metadata": ...}}: Generate your complete response starting from {{"metadata": ...}}:
""" """
# If we have extracted content, prepend it to the prompt # If we have extracted content, prepend it to the prompt
if extracted_content: if extracted_content:
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS: generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content} {extracted_content}
{generation_prompt}""" {generationPrompt}"""
return generation_prompt.strip() return generationPrompt.strip()

View file

@ -152,11 +152,11 @@ class NeutralizationService:
try: try:
# Auto-detect content type if not provided # Auto-detect content type if not provided
if textType is None: if textType is None:
textType = self.commonUtils.detect_content_type(text) textType = self.commonUtils.detectContentType(text)
# Check if content is binary data # Check if content is binary data
if self.binaryProcessor.is_binary_content(text): if self.binaryProcessor.isBinaryContent(text):
data, mapping, replaced_fields, processed_info = self.binaryProcessor.process_binary_content(text) data, mapping, replaced_fields, processed_info = self.binaryProcessor.processBinaryContent(text)
neutralized_text = text if isinstance(data, str) else str(data) neutralized_text = text if isinstance(data, str) else str(data)
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()] attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
return NeutralizationResult( return NeutralizationResult(
@ -169,13 +169,13 @@ class NeutralizationService:
# Inline former _processData routing # Inline former _processData routing
if textType in ['csv', 'json', 'xml']: if textType in ['csv', 'json', 'xml']:
if textType == 'csv': if textType == 'csv':
data, mapping, replaced_fields, processed_info = self.listProcessor.process_csv_content(text) data, mapping, replaced_fields, processed_info = self.listProcessor.processCsvContent(text)
elif textType == 'json': elif textType == 'json':
data, mapping, replaced_fields, processed_info = self.listProcessor.process_json_content(text) data, mapping, replaced_fields, processed_info = self.listProcessor.processJsonContent(text)
else: # xml else: # xml
data, mapping, replaced_fields, processed_info = self.listProcessor.process_xml_content(text) data, mapping, replaced_fields, processed_info = self.listProcessor.processXmlContent(text)
else: else:
data, mapping, replaced_fields, processed_info = self.textProcessor.process_text_content(text) data, mapping, replaced_fields, processed_info = self.textProcessor.processTextContent(text)
# Stringify data consistently # Stringify data consistently
if textType == 'csv': if textType == 'csv':
try: try:

View file

@ -6,7 +6,7 @@ Handles pattern matching and replacement for emails, phones, addresses, IDs and
import re import re
import uuid import uuid
from typing import Dict, List, Tuple, Any from typing import Dict, List, Tuple, Any
from modules.services.serviceNeutralization.subPatterns import DataPatterns, find_patterns_in_text from modules.services.serviceNeutralization.subPatterns import DataPatterns, findPatternsInText
class StringParser: class StringParser:
"""Handles string parsing and replacement operations""" """Handles string parsing and replacement operations"""
@ -22,7 +22,7 @@ class StringParser:
self.NamesToParse = NamesToParse or [] self.NamesToParse = NamesToParse or []
self.mapping = {} self.mapping = {}
def is_placeholder(self, text: str) -> bool: def _isPlaceholder(self, text: str) -> bool:
""" """
Check if text is already a placeholder in format [tag.uuid] Check if text is already a placeholder in format [tag.uuid]
@ -34,7 +34,7 @@ class StringParser:
""" """
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text)) return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text))
def replace_pattern_matches(self, text: str) -> str: def _replacePatternMatches(self, text: str) -> str:
""" """
Replace pattern-based matches (emails, phones, etc.) in text Replace pattern-based matches (emails, phones, etc.) in text
@ -44,37 +44,37 @@ class StringParser:
Returns: Returns:
str: Text with pattern matches replaced str: Text with pattern matches replaced
""" """
pattern_matches = find_patterns_in_text(text, self.data_patterns) patternMatches = findPatternsInText(text, self.data_patterns)
# Process pattern matches from right to left to avoid position shifts # Process pattern matches from right to left to avoid position shifts
for pattern_name, matched_text, start, end in reversed(pattern_matches): for patternName, matchedText, start, end in reversed(patternMatches):
# Skip if already a placeholder # Skip if already a placeholder
if self.is_placeholder(matched_text): if self._isPlaceholder(matchedText):
continue continue
# Skip if contains placeholder characters # Skip if contains placeholder characters
if '[' in matched_text or ']' in matched_text: if '[' in matchedText or ']' in matchedText:
continue continue
if matched_text not in self.mapping: if matchedText not in self.mapping:
# Generate a UUID for the placeholder # Generate a UUID for the placeholder
placeholder_id = str(uuid.uuid4()) placeholderId = str(uuid.uuid4())
# Create placeholder in format [type.uuid] # Create placeholder in format [type.uuid]
type_mapping = { typeMapping = {
'email': 'email', 'email': 'email',
'phone': 'phone', 'phone': 'phone',
'address': 'address', 'address': 'address',
'id': 'id' 'id': 'id'
} }
placeholder_type = type_mapping.get(pattern_name, 'data') placeholderType = typeMapping.get(patternName, 'data')
self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]" self.mapping[matchedText] = f"[{placeholderType}.{placeholderId}]"
replacement = self.mapping[matched_text] replacement = self.mapping[matchedText]
text = text[:start] + replacement + text[end:] text = text[:start] + replacement + text[end:]
return text return text
def replace_custom_names(self, text: str) -> str: def _replaceCustomNames(self, text: str) -> str:
""" """
Replace custom names from the user list in text Replace custom names from the user list in text
@ -96,19 +96,19 @@ class StringParser:
# Replace each match with a placeholder # Replace each match with a placeholder
for match in reversed(matches): # Process from right to left to avoid position shifts for match in reversed(matches): # Process from right to left to avoid position shifts
matched_text = match.group() matchedText = match.group()
if matched_text not in self.mapping: if matchedText not in self.mapping:
# Generate a UUID for the placeholder # Generate a UUID for the placeholder
placeholder_id = str(uuid.uuid4()) placeholderId = str(uuid.uuid4())
self.mapping[matched_text] = f"[name.{placeholder_id}]" self.mapping[matchedText] = f"[name.{placeholderId}]"
replacement = self.mapping[matched_text] replacement = self.mapping[matchedText]
start, end = match.span() start, end = match.span()
text = text[:start] + replacement + text[end:] text = text[:start] + replacement + text[end:]
return text return text
def process_string(self, text: str) -> str: def processString(self, text: str) -> str:
""" """
Process a string by replacing patterns first, then custom names Process a string by replacing patterns first, then custom names
@ -118,18 +118,18 @@ class StringParser:
Returns: Returns:
str: Processed text with replacements str: Processed text with replacements
""" """
if self.is_placeholder(text): if self._isPlaceholder(text):
return text return text
# Step 1: Replace pattern-based matches FIRST # Step 1: Replace pattern-based matches FIRST
text = self.replace_pattern_matches(text) text = self._replacePatternMatches(text)
# Step 2: Replace custom names SECOND # Step 2: Replace custom names SECOND
text = self.replace_custom_names(text) text = self._replaceCustomNames(text)
return text return text
def process_json_value(self, value: Any) -> Any: def processJsonValue(self, value: Any) -> Any:
""" """
Process a JSON value for anonymization Process a JSON value for anonymization
@ -140,15 +140,15 @@ class StringParser:
Any: Processed value Any: Processed value
""" """
if isinstance(value, str): if isinstance(value, str):
return self.process_string(value) return self.processString(value)
elif isinstance(value, dict): elif isinstance(value, dict):
return {k: self.process_json_value(v) for k, v in value.items()} return {k: self.processJsonValue(v) for k, v in value.items()}
elif isinstance(value, list): elif isinstance(value, list):
return [self.process_json_value(item) for item in value] return [self.processJsonValue(item) for item in value]
else: else:
return value return value
def get_mapping(self) -> Dict[str, str]: def getMapping(self) -> Dict[str, str]:
""" """
Get the current mapping of original values to placeholders Get the current mapping of original values to placeholders
@ -157,6 +157,6 @@ class StringParser:
""" """
return self.mapping.copy() return self.mapping.copy()
def clear_mapping(self): def clearMapping(self):
"""Clear the current mapping""" """Clear the current mapping"""
self.mapping.clear() self.mapping.clear()

View file

@ -316,7 +316,7 @@ class TextTablePatterns:
"""Patterns for identifying table-like structures in text""" """Patterns for identifying table-like structures in text"""
@staticmethod @staticmethod
def get_patterns() -> List[Tuple[str, str]]: def getPatterns() -> List[Tuple[str, str]]:
return [ return [
# key: value pattern (with optional whitespace) # key: value pattern (with optional whitespace)
(r'^([^:]+):\s*(.+)$', ':'), (r'^([^:]+):\s*(.+)$', ':'),
@ -329,15 +329,15 @@ class TextTablePatterns:
] ]
@staticmethod @staticmethod
def is_table_line(line: str) -> bool: def _isTableLine(line: str) -> bool:
"""Check if a line matches any table pattern""" """Check if a line matches any table pattern"""
patterns = TextTablePatterns.get_patterns() patterns = TextTablePatterns.getPatterns()
return any(re.match(pattern[0], line.strip()) for pattern in patterns) return any(re.match(pattern[0], line.strip()) for pattern in patterns)
@staticmethod @staticmethod
def extract_key_value(line: str) -> Optional[Tuple[str, str]]: def extractKeyValue(line: str) -> Optional[Tuple[str, str]]:
"""Extract key and value from a table line""" """Extract key and value from a table line"""
patterns = TextTablePatterns.get_patterns() patterns = TextTablePatterns.getPatterns()
for pattern, separator in patterns: for pattern, separator in patterns:
match = re.match(pattern, line.strip()) match = re.match(pattern, line.strip())
if match: if match:
@ -346,7 +346,7 @@ class TextTablePatterns:
return key, value return key, value
return None return None
def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pattern]: def getPatternForHeader(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
""" """
Find matching pattern for a header Find matching pattern for a header
@ -368,7 +368,7 @@ def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pat
return pattern return pattern
return None return None
def find_patterns_in_text(text: str, patterns: List[Pattern]) -> List[tuple]: def findPatternsInText(text: str, patterns: List[Pattern]) -> List[tuple]:
""" """
Find all pattern matches in text Find all pattern matches in text

View file

@ -27,7 +27,7 @@ class BinaryProcessor:
'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'] 'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx']
} }
def detect_binary_type(self, content: str) -> str: def _detectBinaryType(self, content: str) -> str:
""" """
Detect if content is binary data and determine type Detect if content is binary data and determine type
@ -54,7 +54,7 @@ class BinaryProcessor:
return 'text' return 'text'
def is_binary_content(self, content: str) -> bool: def isBinaryContent(self, content: str) -> bool:
""" """
Check if content is binary data Check if content is binary data
@ -64,9 +64,9 @@ class BinaryProcessor:
Returns: Returns:
bool: True if content is binary bool: True if content is binary
""" """
return self.detect_binary_type(content) == 'binary' return self._detectBinaryType(content) == 'binary'
def process_binary_content(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]: def processBinaryContent(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
""" """
Process binary content for anonymization Process binary content for anonymization
@ -83,15 +83,15 @@ class BinaryProcessor:
# 3. Handling metadata and embedded content # 3. Handling metadata and embedded content
# 4. Preserving binary integrity while removing sensitive data # 4. Preserving binary integrity while removing sensitive data
processed_info = { processedInfo = {
'type': 'binary', 'type': 'binary',
'status': 'not_implemented', 'status': 'not_implemented',
'message': 'Binary data neutralization not yet implemented' 'message': 'Binary data neutralization not yet implemented'
} }
return content, {}, [], processed_info return content, {}, [], processedInfo
def get_supported_types(self) -> Dict[str, list]: def getSupportedTypes(self) -> Dict[str, list]:
""" """
Get list of supported binary file types Get list of supported binary file types

View file

@ -33,7 +33,7 @@ class CommonUtils:
"""Common utility functions for data processing""" """Common utility functions for data processing"""
@staticmethod @staticmethod
def normalize_whitespace(text: str) -> str: def normalizeWhitespace(text: str) -> str:
""" """
Normalize whitespace in text Normalize whitespace in text
@ -48,7 +48,7 @@ class CommonUtils:
return text.strip() return text.strip()
@staticmethod @staticmethod
def is_table_line(line: str) -> bool: def _isTableLine(line: str) -> bool:
""" """
Check if a line represents a table row Check if a line represents a table row
@ -62,7 +62,7 @@ class CommonUtils:
re.match(r'^\s*[^\t]+\t[^\t]+$', line)) re.match(r'^\s*[^\t]+\t[^\t]+$', line))
@staticmethod @staticmethod
def detect_content_type(content: str) -> str: def detectContentType(content: str) -> str:
""" """
Detect the type of content based on its structure Detect the type of content based on its structure
@ -98,7 +98,7 @@ class CommonUtils:
return 'text' return 'text'
@staticmethod @staticmethod
def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]: def mergeMappings(*mappings: Dict[str, str]) -> Dict[str, str]:
""" """
Merge multiple mapping dictionaries Merge multiple mapping dictionaries
@ -114,21 +114,21 @@ class CommonUtils:
return merged return merged
@staticmethod @staticmethod
def create_placeholder(placeholder_type: str, placeholder_id: str) -> str: def createPlaceholder(placeholderType: str, placeholderId: str) -> str:
""" """
Create a placeholder string in the format [type.uuid] Create a placeholder string in the format [type.uuid]
Args: Args:
placeholder_type: Type of placeholder (email, phone, name, etc.) placeholderType: Type of placeholder (email, phone, name, etc.)
placeholder_id: Unique identifier for the placeholder placeholderId: Unique identifier for the placeholder
Returns: Returns:
str: Formatted placeholder string str: Formatted placeholder string
""" """
return f"[{placeholder_type}.{placeholder_id}]" return f"[{placeholderType}.{placeholderId}]"
@staticmethod @staticmethod
def validate_placeholder(placeholder: str) -> bool: def validatePlaceholder(placeholder: str) -> bool:
""" """
Validate if a string is a valid placeholder Validate if a string is a valid placeholder
@ -141,7 +141,7 @@ class CommonUtils:
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder)) return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder))
@staticmethod @staticmethod
def extract_placeholder_info(placeholder: str) -> Optional[tuple]: def extractPlaceholderInfo(placeholder: str) -> Optional[tuple]:
""" """
Extract type and ID from a placeholder Extract type and ID from a placeholder

View file

@ -10,7 +10,7 @@ from typing import Dict, List, Any, Union
from dataclasses import dataclass from dataclasses import dataclass
from io import StringIO from io import StringIO
from modules.services.serviceNeutralization.subParseString import StringParser from modules.services.serviceNeutralization.subParseString import StringParser
from modules.services.serviceNeutralization.subPatterns import get_pattern_for_header, HeaderPatterns from modules.services.serviceNeutralization.subPatterns import getPatternForHeader, HeaderPatterns
@dataclass @dataclass
class TableData: class TableData:
@ -32,7 +32,7 @@ class ListProcessor:
self.string_parser = StringParser(NamesToParse) self.string_parser = StringParser(NamesToParse)
self.header_patterns = HeaderPatterns.patterns self.header_patterns = HeaderPatterns.patterns
def anonymize_table(self, table: TableData) -> TableData: def _anonymizeTable(self, table: TableData) -> TableData:
""" """
Anonymize table data based on headers Anonymize table data based on headers
@ -42,28 +42,28 @@ class ListProcessor:
Returns: Returns:
TableData: Anonymized table TableData: Anonymized table
""" """
anonymized_table = TableData( anonymizedTable = TableData(
headers=table.headers.copy(), headers=table.headers.copy(),
rows=[row.copy() for row in table.rows], rows=[row.copy() for row in table.rows],
source_type=table.source_type source_type=table.source_type
) )
for i, header in enumerate(anonymized_table.headers): for i, header in enumerate(anonymizedTable.headers):
pattern = get_pattern_for_header(header, self.header_patterns) pattern = getPatternForHeader(header, self.header_patterns)
if pattern: if pattern:
for row in anonymized_table.rows: for row in anonymizedTable.rows:
if row[i] is not None: if row[i] is not None:
original = str(row[i]) original = str(row[i])
if original not in self.string_parser.mapping: if original not in self.string_parser.mapping:
# Generate a UUID for the placeholder # Generate a UUID for the placeholder
import uuid import uuid
placeholder_id = str(uuid.uuid4()) placeholderId = str(uuid.uuid4())
self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1) self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1)
row[i] = self.string_parser.mapping[original] row[i] = self.string_parser.mapping[original]
return anonymized_table return anonymizedTable
def process_csv_content(self, content: str) -> tuple: def processCsvContent(self, content: str) -> tuple:
""" """
Process CSV content Process CSV content
@ -81,29 +81,29 @@ class ListProcessor:
) )
if not table.rows: if not table.rows:
return None, self.string_parser.get_mapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0} return None, self.string_parser.getMapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
anonymized_table = self.anonymize_table(table) anonymizedTable = self._anonymizeTable(table)
# Track replaced fields # Track replaced fields
replaced_fields = [] replacedFields = []
for i, header in enumerate(anonymized_table.headers): for i, header in enumerate(anonymizedTable.headers):
for orig_row, anon_row in zip(table.rows, anonymized_table.rows): for origRow, anonRow in zip(table.rows, anonymizedTable.rows):
if anon_row[i] != orig_row[i]: if anonRow[i] != origRow[i]:
replaced_fields.append(header) replacedFields.append(header)
# Convert back to DataFrame # Convert back to DataFrame
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers) result = pd.DataFrame(anonymizedTable.rows, columns=anonymizedTable.headers)
processed_info = { processedInfo = {
'type': 'table', 'type': 'table',
'headers': table.headers, 'headers': table.headers,
'row_count': len(table.rows) 'row_count': len(table.rows)
} }
return result, self.string_parser.get_mapping(), replaced_fields, processed_info return result, self.string_parser.getMapping(), replacedFields, processedInfo
def process_json_content(self, content: str) -> tuple: def processJsonContent(self, content: str) -> tuple:
""" """
Process JSON content Process JSON content
@ -116,13 +116,13 @@ class ListProcessor:
data = json.loads(content) data = json.loads(content)
# Process JSON recursively using string parser # Process JSON recursively using string parser
result = self.string_parser.process_json_value(data) result = self.string_parser.processJsonValue(data)
processed_info = {'type': 'json'} processedInfo = {'type': 'json'}
return result, self.string_parser.get_mapping(), [], processed_info return result, self.string_parser.getMapping(), [], processedInfo
def anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str: def _anonymizeXmlElement(self, element: ET.Element, indent: str = '') -> str:
""" """
Recursively process XML element and return formatted string Recursively process XML element and return formatted string
@ -134,69 +134,69 @@ class ListProcessor:
Formatted XML string Formatted XML string
""" """
# Process attributes # Process attributes
processed_attrs = {} processedAttrs = {}
for attr_name, attr_value in element.attrib.items(): for attrName, attrValue in element.attrib.items():
# Check if attribute name matches any header patterns # Check if attribute name matches any header patterns
pattern = get_pattern_for_header(attr_name, self.header_patterns) pattern = getPatternForHeader(attrName, self.header_patterns)
if pattern: if pattern:
if attr_value not in self.string_parser.mapping: if attrValue not in self.string_parser.mapping:
# Generate a UUID for the placeholder # Generate a UUID for the placeholder
import uuid import uuid
placeholder_id = str(uuid.uuid4()) placeholderId = str(uuid.uuid4())
# Create placeholder in format [type.uuid] # Create placeholder in format [type.uuid]
type_mapping = { typeMapping = {
'email': 'email', 'email': 'email',
'phone': 'phone', 'phone': 'phone',
'name': 'name', 'name': 'name',
'address': 'address', 'address': 'address',
'id': 'id' 'id': 'id'
} }
placeholder_type = type_mapping.get(pattern.name, 'data') placeholderType = typeMapping.get(pattern.name, 'data')
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]" self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
processed_attrs[attr_name] = self.string_parser.mapping[attr_value] processedAttrs[attrName] = self.string_parser.mapping[attrValue]
else: else:
# Check if attribute value matches any data patterns # Check if attribute value matches any data patterns
from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
matches = find_patterns_in_text(attr_value, DataPatterns.patterns) matches = findPatternsInText(attrValue, DataPatterns.patterns)
if matches: if matches:
pattern_name = matches[0][0] patternName = matches[0][0]
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None) pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
if pattern: if pattern:
if attr_value not in self.string_parser.mapping: if attrValue not in self.string_parser.mapping:
# Generate a UUID for the placeholder # Generate a UUID for the placeholder
import uuid import uuid
placeholder_id = str(uuid.uuid4()) placeholderId = str(uuid.uuid4())
# Create placeholder in format [type.uuid] # Create placeholder in format [type.uuid]
type_mapping = { typeMapping = {
'email': 'email', 'email': 'email',
'phone': 'phone', 'phone': 'phone',
'name': 'name', 'name': 'name',
'address': 'address', 'address': 'address',
'id': 'id' 'id': 'id'
} }
placeholder_type = type_mapping.get(pattern_name, 'data') placeholderType = typeMapping.get(patternName, 'data')
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]" self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
processed_attrs[attr_name] = self.string_parser.mapping[attr_value] processedAttrs[attrName] = self.string_parser.mapping[attrValue]
else: else:
processed_attrs[attr_name] = attr_value processedAttrs[attrName] = attrValue
else: else:
processed_attrs[attr_name] = attr_value processedAttrs[attrName] = attrValue
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items()) attrs = ' '.join(f'{k}="{v}"' for k, v in processedAttrs.items())
attrs = f' {attrs}' if attrs else '' attrs = f' {attrs}' if attrs else ''
# Process text content # Process text content
text = element.text.strip() if element.text and element.text.strip() else '' text = element.text.strip() if element.text and element.text.strip() else ''
if text: if text:
# Skip if already a placeholder # Skip if already a placeholder
if not self.string_parser.is_placeholder(text): if not self.string_parser._isPlaceholder(text):
# Check if text matches any patterns # Check if text matches any patterns
from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
pattern_matches = find_patterns_in_text(text, DataPatterns.patterns) patternMatches = findPatternsInText(text, DataPatterns.patterns)
if pattern_matches: if patternMatches:
pattern_name = pattern_matches[0][0] patternName = patternMatches[0][0]
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None) pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
if pattern: if pattern:
if text not in self.string_parser.mapping: if text not in self.string_parser.mapping:
# Generate a UUID for the placeholder # Generate a UUID for the placeholder
@ -210,8 +210,8 @@ class ListProcessor:
'address': 'address', 'address': 'address',
'id': 'id' 'id': 'id'
} }
placeholder_type = type_mapping.get(pattern_name, 'data') placeholderType = typeMapping.get(patternName, 'data')
self.string_parser.mapping[text] = f"[{placeholder_type}.{placeholder_id}]" self.string_parser.mapping[text] = f"[{placeholderType}.{placeholderId}]"
text = self.string_parser.mapping[text] text = self.string_parser.mapping[text]
else: else:
# Check if text matches any custom names from the user list # Check if text matches any custom names from the user list
@ -230,8 +230,8 @@ class ListProcessor:
# Process child elements # Process child elements
children = [] children = []
for child in element: for child in element:
child_str = self.anonymize_xml_element(child, indent + ' ') childStr = self._anonymizeXmlElement(child, indent + ' ')
children.append(child_str) children.append(childStr)
# Build element string # Build element string
if not children and not text: if not children and not text:
@ -246,7 +246,7 @@ class ListProcessor:
result.append(f"{indent}</{element.tag}>") result.append(f"{indent}</{element.tag}>")
return '\n'.join(result) return '\n'.join(result)
def process_xml_content(self, content: str) -> tuple: def processXmlContent(self, content: str) -> tuple:
""" """
Process XML content Process XML content
@ -259,21 +259,21 @@ class ListProcessor:
root = ET.fromstring(content) root = ET.fromstring(content)
# Process XML recursively with proper formatting # Process XML recursively with proper formatting
result = self.anonymize_xml_element(root) result = self._anonymizeXmlElement(root)
processed_info = {'type': 'xml'} processedInfo = {'type': 'xml'}
return result, self.string_parser.get_mapping(), [], processed_info return result, self.string_parser.getMapping(), [], processedInfo
def get_mapping(self) -> Dict[str, str]: def getMapping(self) -> Dict[str, str]:
""" """
Get the current mapping of original values to placeholders Get the current mapping of original values to placeholders
Returns: Returns:
Dict[str, str]: Mapping dictionary Dict[str, str]: Mapping dictionary
""" """
return self.string_parser.get_mapping() return self.string_parser.getMapping()
def clear_mapping(self): def clearMapping(self):
"""Clear the current mapping""" """Clear the current mapping"""
self.string_parser.clear_mapping() self.string_parser.clearMapping()

View file

@ -25,7 +25,7 @@ class TextProcessor:
""" """
self.string_parser = StringParser(NamesToParse) self.string_parser = StringParser(NamesToParse)
def extract_tables_from_text(self, content: str) -> tuple: def _extractTablesFromText(self, content: str) -> tuple:
""" """
Extract tables and plain text from content Extract tables and plain text from content
@ -38,11 +38,11 @@ class TextProcessor:
# For now, process the entire content as plain text # For now, process the entire content as plain text
# This can be extended later to detect table-like structures # This can be extended later to detect table-like structures
tables = [] tables = []
plain_texts = [PlainText(content=content, source_type='text_plain')] plainTexts = [PlainText(content=content, source_type='text_plain')]
return tables, plain_texts return tables, plainTexts
def anonymize_plain_text(self, text: PlainText) -> PlainText: def _anonymizePlainText(self, text: PlainText) -> PlainText:
""" """
Anonymize plain text content Anonymize plain text content
@ -53,11 +53,11 @@ class TextProcessor:
PlainText: Anonymized text PlainText: Anonymized text
""" """
# Use the string parser to process the content # Use the string parser to process the content
anonymized_content = self.string_parser.process_string(text.content) anonymizedContent = self.string_parser.processString(text.content)
return PlainText(content=anonymized_content, source_type=text.source_type) return PlainText(content=anonymizedContent, source_type=text.source_type)
def process_text_content(self, content: str) -> tuple: def processTextContent(self, content: str) -> tuple:
""" """
Process text content and return anonymized data Process text content and return anonymized data
@ -68,35 +68,35 @@ class TextProcessor:
Tuple of (anonymized_content, mapping, replaced_fields, processed_info) Tuple of (anonymized_content, mapping, replaced_fields, processed_info)
""" """
# Extract tables and plain text sections # Extract tables and plain text sections
tables, plain_texts = self.extract_tables_from_text(content) tables, plainTexts = self._extractTablesFromText(content)
# Process plain text sections # Process plain text sections
anonymized_texts = [self.anonymize_plain_text(text) for text in plain_texts] anonymizedTexts = [self._anonymizePlainText(text) for text in plainTexts]
# Combine all processed content # Combine all processed content
result = content result = content
for text, anonymized_text in zip(plain_texts, anonymized_texts): for text, anonymizedText in zip(plainTexts, anonymizedTexts):
if text.content != anonymized_text.content: if text.content != anonymizedText.content:
result = result.replace(text.content, anonymized_text.content) result = result.replace(text.content, anonymizedText.content)
# Get processing information # Get processing information
processed_info = { processedInfo = {
'type': 'text', 'type': 'text',
'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables] 'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
if tables else []) if tables else [])
} }
return result, self.string_parser.get_mapping(), [], processed_info return result, self.string_parser.getMapping(), [], processedInfo
def get_mapping(self) -> Dict[str, str]: def getMapping(self) -> Dict[str, str]:
""" """
Get the current mapping of original values to placeholders Get the current mapping of original values to placeholders
Returns: Returns:
Dict[str, str]: Mapping dictionary Dict[str, str]: Mapping dictionary
""" """
return self.string_parser.get_mapping() return self.string_parser.getMapping()
def clear_mapping(self): def clearMapping(self):
"""Clear the current mapping""" """Clear the current mapping"""
self.string_parser.clear_mapping() self.string_parser.clearMapping()

View file

@ -20,8 +20,8 @@ class SharepointService:
Use setAccessTokenFromConnection() method to configure the access token before making API calls. Use setAccessTokenFromConnection() method to configure the access token before making API calls.
""" """
self.services = serviceCenter self.services = serviceCenter
self.access_token = None self.accessToken = None
self.base_url = "https://graph.microsoft.com/v1.0" self.baseUrl = "https://graph.microsoft.com/v1.0"
def setAccessTokenFromConnection(self, userConnection) -> bool: def setAccessTokenFromConnection(self, userConnection) -> bool:
"""Set access token from UserConnection. """Set access token from UserConnection.
@ -52,21 +52,21 @@ class SharepointService:
logger.error(f"Error setting access token: {str(e)}") logger.error(f"Error setting access token: {str(e)}")
return False return False
async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]: async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
"""Make a Microsoft Graph API call with proper error handling.""" """Make a Microsoft Graph API call with proper error handling."""
try: try:
if self.access_token is None: if self.accessToken is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.") logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."} return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."}
headers = { headers = {
"Authorization": f"Bearer {self.access_token}", "Authorization": f"Bearer {self.accessToken}",
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json" "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
} }
# Remove leading slash from endpoint to avoid double slash # Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/') cleanEndpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}" url = f"{self.baseUrl}/{cleanEndpoint}"
logger.debug(f"Making Graph API call: {method} {url}") logger.debug(f"Making Graph API call: {method} {url}")
timeout = aiohttp.ClientTimeout(total=30) timeout = aiohttp.ClientTimeout(total=30)
@ -106,10 +106,10 @@ class SharepointService:
logger.error(f"Error making Graph API call: {str(e)}") logger.error(f"Error making Graph API call: {str(e)}")
return {"error": f"Error making Graph API call: {str(e)}"} return {"error": f"Error making Graph API call: {str(e)}"}
async def discover_sites(self) -> List[Dict[str, Any]]: async def discoverSites(self) -> List[Dict[str, Any]]:
"""Discover all SharePoint sites accessible to the user.""" """Discover all SharePoint sites accessible to the user."""
try: try:
result = await self._make_graph_api_call("sites?search=*") result = await self._makeGraphApiCall("sites?search=*")
if "error" in result: if "error" in result:
logger.error(f"Error discovering SharePoint sites: {result['error']}") logger.error(f"Error discovering SharePoint sites: {result['error']}")
@ -118,9 +118,9 @@ class SharepointService:
sites = result.get("value", []) sites = result.get("value", [])
logger.info(f"Discovered {len(sites)} SharePoint sites") logger.info(f"Discovered {len(sites)} SharePoint sites")
processed_sites = [] processedSites = []
for site in sites: for site in sites:
site_info = { siteInfo = {
"id": site.get("id"), "id": site.get("id"),
"displayName": site.get("displayName"), "displayName": site.get("displayName"),
"name": site.get("name"), "name": site.get("name"),
@ -129,24 +129,24 @@ class SharepointService:
"createdDateTime": site.get("createdDateTime"), "createdDateTime": site.get("createdDateTime"),
"lastModifiedDateTime": site.get("lastModifiedDateTime") "lastModifiedDateTime": site.get("lastModifiedDateTime")
} }
processed_sites.append(site_info) processedSites.append(siteInfo)
logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}") logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}")
return processed_sites return processedSites
except Exception as e: except Exception as e:
logger.error(f"Error discovering SharePoint sites: {str(e)}") logger.error(f"Error discovering SharePoint sites: {str(e)}")
return [] return []
async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]: async def findSiteByName(self, siteName: str) -> Optional[Dict[str, Any]]:
"""Find a specific SharePoint site by name using direct Graph API call.""" """Find a specific SharePoint site by name using direct Graph API call."""
try: try:
# Try to get the site directly by name using Graph API # Try to get the site directly by name using Graph API
endpoint = f"sites/{site_name}" endpoint = f"sites/{siteName}"
result = await self._make_graph_api_call(endpoint) result = await self._makeGraphApiCall(endpoint)
if result and "error" not in result: if result and "error" not in result:
site_info = { siteInfo = {
"id": result.get("id"), "id": result.get("id"),
"displayName": result.get("displayName"), "displayName": result.get("displayName"),
"name": result.get("name"), "name": result.get("name"),
@ -155,15 +155,15 @@ class SharepointService:
"createdDateTime": result.get("createdDateTime"), "createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime") "lastModifiedDateTime": result.get("lastModifiedDateTime")
} }
logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}") logger.info(f"Found site directly: {siteInfo['displayName']} - {siteInfo['webUrl']}")
return site_info return siteInfo
except Exception as e: except Exception as e:
logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}") logger.debug(f"Direct site lookup failed for '{siteName}': {str(e)}")
# Fallback to discovery if direct lookup fails # Fallback to discovery if direct lookup fails
logger.info(f"Direct lookup failed, trying discovery for site: {site_name}") logger.info(f"Direct lookup failed, trying discovery for site: {siteName}")
sites = await self.discover_sites() sites = await self.discoverSites()
if not sites: if not sites:
logger.warning("No sites discovered") logger.warning("No sites discovered")
return None return None
@ -174,46 +174,46 @@ class SharepointService:
# Try exact match first # Try exact match first
for site in sites: for site in sites:
if site.get("displayName", "").strip().lower() == site_name.strip().lower(): if site.get("displayName", "").strip().lower() == siteName.strip().lower():
logger.info(f"Found exact match: {site.get('displayName')}") logger.info(f"Found exact match: {site.get('displayName')}")
return site return site
# Try partial match # Try partial match
for site in sites: for site in sites:
if site_name.lower() in site.get("displayName", "").lower(): if siteName.lower() in site.get("displayName", "").lower():
logger.info(f"Found partial match: {site.get('displayName')}") logger.info(f"Found partial match: {site.get('displayName')}")
return site return site
logger.warning(f"No site found matching: {site_name}") logger.warning(f"No site found matching: {siteName}")
return None return None
async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]: async def findSiteByWebUrl(self, webUrl: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using its web URL (useful for guest sites).""" """Find a SharePoint site using its web URL (useful for guest sites)."""
try: try:
# Use the web URL format: sites/{hostname}:/sites/{site-path} # Use the web URL format: sites/{hostname}:/sites/{site-path}
# Extract hostname and site path from the web URL # Extract hostname and site path from the web URL
if not web_url.startswith("https://"): if not webUrl.startswith("https://"):
web_url = f"https://{web_url}" webUrl = f"https://{webUrl}"
# Parse the URL to extract hostname and site path # Parse the URL to extract hostname and site path
from urllib.parse import urlparse from urllib.parse import urlparse
parsed = urlparse(web_url) parsed = urlparse(webUrl)
hostname = parsed.hostname hostname = parsed.hostname
path_parts = parsed.path.strip('/').split('/') pathParts = parsed.path.strip('/').split('/')
if len(path_parts) >= 2 and path_parts[0] == 'sites': if len(pathParts) >= 2 and pathParts[0] == 'sites':
site_path = '/'.join(path_parts[1:]) # Everything after 'sites/' sitePath = '/'.join(pathParts[1:]) # Everything after 'sites/'
else: else:
logger.error(f"Invalid SharePoint URL format: {web_url}") logger.error(f"Invalid SharePoint URL format: {webUrl}")
return None return None
endpoint = f"sites/{hostname}:/sites/{site_path}" endpoint = f"sites/{hostname}:/sites/{sitePath}"
logger.debug(f"Trying web URL format: {endpoint}") logger.debug(f"Trying web URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint) result = await self._makeGraphApiCall(endpoint)
if result and "error" not in result: if result and "error" not in result:
site_info = { siteInfo = {
"id": result.get("id"), "id": result.get("id"),
"displayName": result.get("displayName"), "displayName": result.get("displayName"),
"name": result.get("name"), "name": result.get("name"),
@ -222,33 +222,33 @@ class SharepointService:
"createdDateTime": result.get("createdDateTime"), "createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime") "lastModifiedDateTime": result.get("lastModifiedDateTime")
} }
logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") logger.info(f"Found site by web URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
return site_info return siteInfo
else: else:
logger.warning(f"Site not found using web URL: {web_url}") logger.warning(f"Site not found using web URL: {webUrl}")
return None return None
except Exception as e: except Exception as e:
logger.error(f"Error finding site by web URL: {str(e)}") logger.error(f"Error finding site by web URL: {str(e)}")
return None return None
async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]: async def findSiteByUrl(self, hostname: str, sitePath: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using the site URL format.""" """Find a SharePoint site using the site URL format."""
try: try:
# For guest sites, try different URL formats # For guest sites, try different URL formats
url_formats = [ urlFormats = [
f"sites/{hostname}:/sites/{site_path}", # Standard format f"sites/{hostname}:/sites/{sitePath}", # Standard format
f"sites/{hostname}:/sites/{site_path}/", # With trailing slash f"sites/{hostname}:/sites/{sitePath}/", # With trailing slash
f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase f"sites/{hostname}:/sites/{sitePath.lower()}", # Lowercase
f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash f"sites/{hostname}:/sites/{sitePath.lower()}/", # Lowercase with slash
] ]
for endpoint in url_formats: for endpoint in urlFormats:
logger.debug(f"Trying URL format: {endpoint}") logger.debug(f"Trying URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint) result = await self._makeGraphApiCall(endpoint)
if result and "error" not in result: if result and "error" not in result:
site_info = { siteInfo = {
"id": result.get("id"), "id": result.get("id"),
"displayName": result.get("displayName"), "displayName": result.get("displayName"),
"name": result.get("name"), "name": result.get("name"),
@ -257,29 +257,29 @@ class SharepointService:
"createdDateTime": result.get("createdDateTime"), "createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime") "lastModifiedDateTime": result.get("lastModifiedDateTime")
} }
logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") logger.info(f"Found site by URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
return site_info return siteInfo
else: else:
logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}") logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}")
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}") logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{sitePath}")
return None return None
except Exception as e: except Exception as e:
logger.error(f"Error finding site by URL: {str(e)}") logger.error(f"Error finding site by URL: {str(e)}")
return None return None
async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]: async def getFolderByPath(self, siteId: str, folderPath: str) -> Optional[Dict[str, Any]]:
"""Get folder information by path within a site.""" """Get folder information by path within a site."""
try: try:
# Clean the path # Clean the path
clean_path = folder_path.lstrip('/') cleanPath = folderPath.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}" endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
result = await self._make_graph_api_call(endpoint) result = await self._makeGraphApiCall(endpoint)
if "error" in result: if "error" in result:
logger.warning(f"Folder not found at path {folder_path}: {result['error']}") logger.warning(f"Folder not found at path {folderPath}: {result['error']}")
return None return None
return result return result
@ -288,43 +288,43 @@ class SharepointService:
logger.error(f"Error getting folder by path: {str(e)}") logger.error(f"Error getting folder by path: {str(e)}")
return None return None
async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]: async def uploadFile(self, siteId: str, folderPath: str, fileName: str, content: bytes) -> Dict[str, Any]:
"""Upload a file to SharePoint.""" """Upload a file to SharePoint."""
try: try:
# Clean the path # Clean the path
clean_path = folder_path.lstrip('/') cleanPath = folderPath.lstrip('/')
upload_path = f"{clean_path.rstrip('/')}/{file_name}" uploadPath = f"{cleanPath.rstrip('/')}/{fileName}"
endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content" endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
logger.info(f"Uploading file to: {endpoint}") logger.info(f"Uploading file to: {endpoint}")
result = await self._make_graph_api_call(endpoint, method="PUT", data=content) result = await self._makeGraphApiCall(endpoint, method="PUT", data=content)
if "error" in result: if "error" in result:
logger.error(f"Upload failed: {result['error']}") logger.error(f"Upload failed: {result['error']}")
return result return result
logger.info(f"File uploaded successfully: {file_name}") logger.info(f"File uploaded successfully: {fileName}")
return result return result
except Exception as e: except Exception as e:
logger.error(f"Error uploading file: {str(e)}") logger.error(f"Error uploading file: {str(e)}")
return {"error": f"Error uploading file: {str(e)}"} return {"error": f"Error uploading file: {str(e)}"}
async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]: async def downloadFile(self, siteId: str, fileId: str) -> Optional[bytes]:
"""Download a file from SharePoint.""" """Download a file from SharePoint."""
try: try:
if self.access_token is None: if self.accessToken is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.") logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return None return None
endpoint = f"sites/{site_id}/drive/items/{file_id}/content" endpoint = f"sites/{siteId}/drive/items/{fileId}/content"
headers = {"Authorization": f"Bearer {self.access_token}"} headers = {"Authorization": f"Bearer {self.accessToken}"}
timeout = aiohttp.ClientTimeout(total=30) timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session: async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response: async with session.get(f"{self.baseUrl}/{endpoint}", headers=headers) as response:
if response.status == 200: if response.status == 200:
return await response.read() return await response.read()
else: else:
@ -335,32 +335,32 @@ class SharepointService:
logger.error(f"Error downloading file: {str(e)}") logger.error(f"Error downloading file: {str(e)}")
return None return None
async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]: async def listFolderContents(self, siteId: str, folderPath: str = "") -> List[Dict[str, Any]]:
"""List contents of a folder.""" """List contents of a folder."""
try: try:
if not folder_path or folder_path == "/": if not folderPath or folderPath == "/":
endpoint = f"sites/{site_id}/drive/root/children" endpoint = f"sites/{siteId}/drive/root/children"
else: else:
clean_path = folder_path.lstrip('/') cleanPath = folderPath.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children" endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
result = await self._make_graph_api_call(endpoint) result = await self._makeGraphApiCall(endpoint)
if "error" in result: if "error" in result:
logger.warning(f"Failed to list folder contents: {result['error']}") logger.warning(f"Failed to list folder contents: {result['error']}")
return None return None
items = result.get("value", []) items = result.get("value", [])
processed_items = [] processedItems = []
for item in items: for item in items:
# Determine if it's a folder or file # Determine if it's a folder or file
is_folder = 'folder' in item isFolder = 'folder' in item
item_info = { itemInfo = {
"id": item.get("id"), "id": item.get("id"),
"name": item.get("name"), "name": item.get("name"),
"type": "folder" if is_folder else "file", "type": "folder" if isFolder else "file",
"size": item.get("size", 0), "size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"), "createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"),
@ -368,42 +368,42 @@ class SharepointService:
} }
if "file" in item: if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType") itemInfo["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
if "folder" in item: if "folder" in item:
item_info["childCount"] = item["folder"].get("childCount", 0) itemInfo["childCount"] = item["folder"].get("childCount", 0)
processed_items.append(item_info) processedItems.append(itemInfo)
return processed_items return processedItems
except Exception as e: except Exception as e:
logger.error(f"Error listing folder contents: {str(e)}") logger.error(f"Error listing folder contents: {str(e)}")
return [] return []
async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]: async def searchFiles(self, siteId: str, query: str) -> List[Dict[str, Any]]:
"""Search for files in a site.""" """Search for files in a site."""
try: try:
search_query = query.replace("'", "''") # Escape single quotes for OData searchQuery = query.replace("'", "''") # Escape single quotes for OData
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')" endpoint = f"sites/{siteId}/drive/root/search(q='{searchQuery}')"
result = await self._make_graph_api_call(endpoint) result = await self._makeGraphApiCall(endpoint)
if "error" in result: if "error" in result:
logger.warning(f"Search failed: {result['error']}") logger.warning(f"Search failed: {result['error']}")
return [] return []
items = result.get("value", []) items = result.get("value", [])
processed_items = [] processedItems = []
for item in items: for item in items:
is_folder = 'folder' in item isFolder = 'folder' in item
item_info = { itemInfo = {
"id": item.get("id"), "id": item.get("id"),
"name": item.get("name"), "name": item.get("name"),
"type": "folder" if is_folder else "file", "type": "folder" if isFolder else "file",
"size": item.get("size", 0), "size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"), "createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"),
@ -412,64 +412,64 @@ class SharepointService:
} }
if "file" in item: if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType") itemInfo["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
processed_items.append(item_info) processedItems.append(itemInfo)
return processed_items return processedItems
except Exception as e: except Exception as e:
logger.error(f"Error searching files: {str(e)}") logger.error(f"Error searching files: {str(e)}")
return [] return []
async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None: async def copyFileAsync(self, siteId: str, sourceFolder: str, sourceFile: str, destFolder: str, destFile: str) -> None:
"""Copy a file from source to destination folder (like original synchronizer).""" """Copy a file from source to destination folder (like original synchronizer)."""
try: try:
# First, download the source file # First, download the source file
source_path = f"{source_folder}/{source_file}" sourcePath = f"{sourceFolder}/{sourceFile}"
file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path) fileContent = await self.downloadFileByPath(siteId=siteId, filePath=sourcePath)
if not file_content: if not fileContent:
raise Exception(f"Failed to download source file: {source_path}") raise Exception(f"Failed to download source file: {sourcePath}")
# Upload to destination # Upload to destination
await self.upload_file( await self.uploadFile(
site_id=site_id, siteId=siteId,
folder_path=dest_folder, folderPath=destFolder,
file_name=dest_file, fileName=destFile,
content=file_content content=fileContent
) )
logger.info(f"File copied: {source_file} -> {dest_file}") logger.info(f"File copied: {sourceFile} -> {destFile}")
except Exception as e: except Exception as e:
# Provide more specific error information # Provide more specific error information
error_msg = str(e) errorMsg = str(e)
if "itemNotFound" in error_msg or "404" in error_msg: if "itemNotFound" in errorMsg or "404" in errorMsg:
raise Exception(f"Source file not found (404): {source_path} - {error_msg}") raise Exception(f"Source file not found (404): {sourcePath} - {errorMsg}")
else: else:
raise Exception(f"Error copying file: {error_msg}") raise Exception(f"Error copying file: {errorMsg}")
async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]: async def downloadFileByPath(self, siteId: str, filePath: str) -> Optional[bytes]:
"""Download a file by its path within a site.""" """Download a file by its path within a site."""
try: try:
if self.access_token is None: if self.accessToken is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.") logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return None return None
# Clean the path # Clean the path
clean_path = file_path.strip('/') cleanPath = filePath.strip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content" endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/content"
# Use direct HTTP call for file downloads (binary content) # Use direct HTTP call for file downloads (binary content)
headers = { headers = {
"Authorization": f"Bearer {self.access_token}", "Authorization": f"Bearer {self.accessToken}",
} }
# Remove leading slash from endpoint to avoid double slash # Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/') cleanEndpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}" url = f"{self.baseUrl}/{cleanEndpoint}"
logger.debug(f"Downloading file: GET {url}") logger.debug(f"Downloading file: GET {url}")
timeout = aiohttp.ClientTimeout(total=30) timeout = aiohttp.ClientTimeout(total=30)

View file

@ -7,7 +7,7 @@ import logging
from typing import Any, Optional, Dict, Callable, List from typing import Any, Optional, Dict, Callable, List
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.eventManagement import eventManager from modules.shared.eventManagement import eventManager
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
from modules.shared import jsonUtils from modules.shared import jsonUtils
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -122,7 +122,7 @@ class UtilsService:
float: Current UTC timestamp in seconds float: Current UTC timestamp in seconds
""" """
try: try:
return get_utc_timestamp() return getUtcTimestamp()
except Exception as e: except Exception as e:
logger.error(f"Error getting UTC timestamp: {str(e)}") logger.error(f"Error getting UTC timestamp: {str(e)}")
return 0.0 return 0.0
@ -185,6 +185,75 @@ class UtilsService:
# Silent fail to never break main flow # Silent fail to never break main flow
pass pass
# ===== Prompt sanitization =====
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
"""
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
This is the single source of truth for all prompt sanitization across the system.
Replaces all scattered sanitization functions with a unified approach.
Args:
content: The content to sanitize
contentType: Type of content ("text", "userinput", "json", "document")
Returns:
Safely sanitized content ready for AI prompt insertion
"""
if not content:
return ""
try:
import re
# Convert to string if not already
content_str = str(content)
# Remove null bytes and control characters (except newlines and tabs)
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
# Handle different content types with appropriate sanitization
if contentType == "userinput":
# Extra security for user-controlled content
# Escape curly braces to prevent placeholder injection
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
# Escape quotes and wrap in single quotes
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
return f"'{sanitized}'"
elif contentType == "json":
# For JSON content, escape quotes and backslashes
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
elif contentType == "document":
# For document content, escape special characters
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace("'", "\\'")
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
else: # contentType == "text" or default
# Basic text sanitization
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace("'", "\\'")
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
return sanitized
except Exception as e:
logger.error(f"Error sanitizing prompt content: {str(e)}")
# Return a safe fallback
return "[ERROR: Content could not be safely sanitized]"
# ===== JSON utility wrappers ===== # ===== JSON utility wrappers =====
def jsonStripCodeFences(self, text: str) -> str: def jsonStripCodeFences(self, text: str) -> str:

View file

@ -34,54 +34,54 @@ class AttributeDefinition(BaseModel):
MODEL_LABELS: Dict[str, Dict[str, Dict[str, str]]] = {} MODEL_LABELS: Dict[str, Dict[str, Dict[str, str]]] = {}
def register_model_labels(model_name: str, model_label: Dict[str, str], labels: Dict[str, Dict[str, str]]): def registerModelLabels(modelName: str, modelLabel: Dict[str, str], labels: Dict[str, Dict[str, str]]):
""" """
Register labels for a model's attributes and the model itself. Register labels for a model's attributes and the model itself.
Args: Args:
model_name: Name of the model class modelName: Name of the model class
model_label: Dictionary mapping language codes to model labels modelLabel: Dictionary mapping language codes to model labels
e.g. {"en": "Prompt", "fr": "Invite"} e.g. {"en": "Prompt", "fr": "Invite"}
labels: Dictionary mapping attribute names to their translations labels: Dictionary mapping attribute names to their translations
e.g. {"name": {"en": "Name", "fr": "Nom"}} e.g. {"name": {"en": "Name", "fr": "Nom"}}
""" """
MODEL_LABELS[model_name] = {"model": model_label, "attributes": labels} MODEL_LABELS[modelName] = {"model": modelLabel, "attributes": labels}
def get_model_labels(model_name: str, language: str = "en") -> Dict[str, str]: def getModelLabels(modelName: str, language: str = "en") -> Dict[str, str]:
""" """
Get labels for a model's attributes in the specified language. Get labels for a model's attributes in the specified language.
Args: Args:
model_name: Name of the model class modelName: Name of the model class
language: Language code (default: "en") language: Language code (default: "en")
Returns: Returns:
Dictionary mapping attribute names to their labels in the specified language Dictionary mapping attribute names to their labels in the specified language
""" """
model_data = MODEL_LABELS.get(model_name, {}) modelData = MODEL_LABELS.get(modelName, {})
attribute_labels = model_data.get("attributes", {}) attributeLabels = modelData.get("attributes", {})
return { return {
attr: translations.get(language, translations.get("en", attr)) attr: translations.get(language, translations.get("en", attr))
for attr, translations in attribute_labels.items() for attr, translations in attributeLabels.items()
} }
def get_model_label(model_name: str, language: str = "en") -> str: def getModelLabel(modelName: str, language: str = "en") -> str:
""" """
Get the label for a model in the specified language. Get the label for a model in the specified language.
Args: Args:
model_name: Name of the model class modelName: Name of the model class
language: Language code (default: "en") language: Language code (default: "en")
Returns: Returns:
Model label in the specified language, or model name if no label exists Model label in the specified language, or model name if no label exists
""" """
model_data = MODEL_LABELS.get(model_name, {}) modelData = MODEL_LABELS.get(modelName, {})
model_label = model_data.get("model", {}) modelLabel = modelData.get("model", {})
return model_label.get(language, model_label.get("en", model_name)) return modelLabel.get(language, modelLabel.get("en", modelName))
def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguage: str = "en") -> Dict[str, Any]: def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguage: str = "en") -> Dict[str, Any]:
@ -100,8 +100,8 @@ def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguag
attributes = [] attributes = []
model_name = modelClass.__name__ model_name = modelClass.__name__
labels = get_model_labels(model_name, userLanguage) labels = getModelLabels(model_name, userLanguage)
model_label = get_model_label(model_name, userLanguage) model_label = getModelLabel(model_name, userLanguage)
# Pydantic v2 only # Pydantic v2 only
fields = modelClass.model_fields fields = modelClass.model_fields

View file

@ -19,42 +19,42 @@ class DailyRotatingFileHandler(RotatingFileHandler):
The log file name includes the current date and switches at midnight. The log file name includes the current date and switches at midnight.
""" """
def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs): def __init__(self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs):
self.log_dir = log_dir self.logDir = logDir
self.filename_prefix = filename_prefix self.filenamePrefix = filenamePrefix
self.current_date = None self.currentDate = None
self.current_file = None self.currentFile = None
# Initialize with today's file # Initialize with today's file
self._update_file_if_needed() self._updateFileIfNeeded()
# Call parent constructor with current file # Call parent constructor with current file
super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs) super().__init__(self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs)
def _update_file_if_needed(self): def _updateFileIfNeeded(self):
"""Update the log file if the date has changed""" """Update the log file if the date has changed"""
today = datetime.now().strftime("%Y%m%d") today = datetime.now().strftime("%Y%m%d")
if self.current_date != today: if self.currentDate != today:
self.current_date = today self.currentDate = today
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log") newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
if self.current_file != new_file: if self.currentFile != newFile:
self.current_file = new_file self.currentFile = newFile
return True return True
return False return False
def emit(self, record): def emit(self, record):
"""Emit a log record, switching files if date has changed""" """Emit a log record, switching files if date has changed"""
# Check if we need to switch to a new file # Check if we need to switch to a new file
if self._update_file_if_needed(): if self._updateFileIfNeeded():
# Close current file and open new one # Close current file and open new one
if self.stream: if self.stream:
self.stream.close() self.stream.close()
self.stream = None self.stream = None
# Update the baseFilename for the parent class # Update the baseFilename for the parent class
self.baseFilename = self.current_file self.baseFilename = self.currentFile
# Reopen the stream # Reopen the stream
if not self.delay: if not self.delay:
self.stream = self._open() self.stream = self._open()
@ -68,9 +68,9 @@ class AuditLogger:
def __init__(self): def __init__(self):
self.logger = None self.logger = None
self._setup_audit_logger() self._setupAuditLogger()
def _setup_audit_logger(self): def _setupAuditLogger(self):
"""Setup the audit logger with daily file rotation""" """Setup the audit logger with daily file rotation"""
try: try:
# Get log directory from config # Get log directory from config
@ -96,10 +96,10 @@ class AuditLogger:
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5)) backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
fileHandler = DailyRotatingFileHandler( fileHandler = DailyRotatingFileHandler(
log_dir=logDir, logDir=logDir,
filename_prefix="log_audit", filenamePrefix="log_audit",
max_bytes=rotationSize, maxBytes=rotationSize,
backup_count=backupCount backupCount=backupCount
) )
# Create formatter for audit log # Create formatter for audit log
@ -120,9 +120,9 @@ class AuditLogger:
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)
self.logger.error(f"Failed to setup audit logger: {str(e)}") self.logger.error(f"Failed to setup audit logger: {str(e)}")
def log_event(self, def logEvent(self,
user_id: str, userId: str,
mandate_id: str, mandateId: str,
category: str, category: str,
action: str, action: str,
details: str = "", details: str = "",
@ -131,8 +131,8 @@ class AuditLogger:
Log an audit event Log an audit event
Args: Args:
user_id: User identifier userId: User identifier
mandate_id: Mandate identifier (can be empty if not applicable) mandateId: Mandate identifier (can be empty if not applicable)
category: Event category (e.g., 'key', 'access', 'data') category: Event category (e.g., 'key', 'access', 'data')
action: Specific action (e.g., 'decode', 'login', 'logout') action: Specific action (e.g., 'decode', 'login', 'logout')
details: Additional details about the event details: Additional details about the event
@ -148,50 +148,50 @@ class AuditLogger:
# Format the audit log entry # Format the audit log entry
# Format: timestamp | userid | mandateid | category | action | details # Format: timestamp | userid | mandateid | category | action | details
audit_entry = f"{user_id} | {mandate_id} | {category} | {action} | {details}" auditEntry = f"{userId} | {mandateId} | {category} | {action} | {details}"
# Log the event # Log the event
self.logger.info(audit_entry) self.logger.info(auditEntry)
except Exception as e: except Exception as e:
# Use standard logger as fallback # Use standard logger as fallback
logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}") logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}")
def log_key_access(self, user_id: str, mandate_id: str, key_name: str, action: str) -> None: def logKeyAccess(self, userId: str, mandateId: str, keyName: str, action: str) -> None:
"""Log key access events (decode/encode)""" """Log key access events (decode/encode)"""
self.log_event( self.logEvent(
user_id=user_id, userId=userId,
mandate_id=mandate_id, mandateId=mandateId,
category="key", category="key",
action=action, action=action,
details=key_name details=keyName
) )
def log_user_access(self, user_id: str, mandate_id: str, action: str, success_info: str = "") -> None: def logUserAccess(self, userId: str, mandateId: str, action: str, successInfo: str = "") -> None:
"""Log user access events (login/logout)""" """Log user access events (login/logout)"""
self.log_event( self.logEvent(
user_id=user_id, userId=userId,
mandate_id=mandate_id, mandateId=mandateId,
category="access", category="access",
action=action, action=action,
details=success_info details=successInfo
) )
def log_data_access(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None: def logDataAccess(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
"""Log data access events""" """Log data access events"""
self.log_event( self.logEvent(
user_id=user_id, userId=userId,
mandate_id=mandate_id, mandateId=mandateId,
category="data", category="data",
action=action, action=action,
details=details details=details
) )
def log_security_event(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None: def logSecurityEvent(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
"""Log security-related events""" """Log security-related events"""
self.log_event( self.logEvent(
user_id=user_id, userId=userId,
mandate_id=mandate_id, mandateId=mandateId,
category="security", category="security",
action=action, action=action,
details=details details=details

View file

@ -199,10 +199,10 @@ class Configuration:
# Log audit event for secret key access # Log audit event for secret key access
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_key_access( audit_logger.logKeyAccess(
user_id=user_id, userId=user_id,
mandate_id="system", mandateId="system",
key_name=key, keyName=key,
action="decode" action="decode"
) )
except Exception: except Exception:
@ -211,9 +211,9 @@ class Configuration:
if value.startswith("{") and value.endswith("}"): if value.startswith("{") and value.endswith("}"):
# Handle JSON secrets (keys ending with _API_KEY that contain JSON) # Handle JSON secrets (keys ending with _API_KEY that contain JSON)
return handleSecretJson(value, user_id, key) return handleSecretJson(value, userId=user_id, keyName=key)
else: else:
return handleSecretText(value, user_id, key) return handleSecretText(value, userId=user_id, keyName=key)
return value return value
return default return default
@ -235,31 +235,31 @@ class Configuration:
"""Set a configuration value (for testing/overrides)""" """Set a configuration value (for testing/overrides)"""
self._data[key] = value self._data[key] = value
def handleSecretText(value: str, user_id: str = "system", key_name: str = "unknown") -> str: def handleSecretText(value: str, userId: str = "system", keyName: str = "unknown") -> str:
""" """
Handle secret values with encryption/decryption support. Handle secret values with encryption/decryption support.
Args: Args:
value: The secret value to handle (may be encrypted) value: The secret value to handle (may be encrypted)
user_id: The user ID making the request (default: "system") userId: The user ID making the request (default: "system")
key_name: The name of the key being decrypted (default: "unknown") keyName: The name of the key being decrypted (default: "unknown")
Returns: Returns:
str: Processed secret value (decrypted if encrypted) str: Processed secret value (decrypted if encrypted)
""" """
if _is_encrypted_value(value): if _isEncryptedValue(value):
return decrypt_value(value, user_id, key_name) return decryptValue(value, userId, keyName)
return value return value
def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unknown") -> str: def handleSecretJson(value: str, userId: str = "system", keyName: str = "unknown") -> str:
""" """
Handle JSON secret values (like Google service account keys) with encryption/decryption support. Handle JSON secret values (like Google service account keys) with encryption/decryption support.
Validates that the value is valid JSON after decryption. Validates that the value is valid JSON after decryption.
Args: Args:
value: The JSON secret value to handle (may be encrypted) value: The JSON secret value to handle (may be encrypted)
user_id: The user ID making the request (default: "system") userId: The user ID making the request (default: "system")
key_name: The name of the key being decrypted (default: "unknown") keyName: The name of the key being decrypted (default: "unknown")
Returns: Returns:
str: Processed JSON secret value (decrypted if encrypted) str: Processed JSON secret value (decrypted if encrypted)
@ -268,15 +268,15 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
ValueError: If the value is not valid JSON after decryption ValueError: If the value is not valid JSON after decryption
""" """
# Decrypt if encrypted # Decrypt if encrypted
if _is_encrypted_value(value): if _isEncryptedValue(value):
decrypted_value = decrypt_value(value, user_id, key_name) decryptedValue = decryptValue(value, userId, keyName)
else: else:
decrypted_value = value decryptedValue = value
try: try:
# Validate that it's valid JSON # Validate that it's valid JSON
json.loads(decrypted_value) json.loads(decryptedValue)
return decrypted_value return decryptedValue
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in secret value: {e}") raise ValueError(f"Invalid JSON in secret value: {e}")
@ -284,12 +284,12 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
# Structure: {user_id: {key_name: [timestamps]}} # Structure: {user_id: {key_name: [timestamps]}}
_decryption_attempts = {} _decryption_attempts = {}
def _get_master_key(env_type: str = None) -> bytes: def _getMasterKey(envType: str = None) -> bytes:
""" """
Get the master key for the specified environment. Get the master key for the specified environment.
Args: Args:
env_type: The environment type (dev, int, prod, etc.). If None, uses current config. envType: The environment type (dev, int, prod, etc.). If None, uses current config.
Returns: Returns:
bytes: The master key for encryption/decryption bytes: The master key for encryption/decryption
@ -298,24 +298,24 @@ def _get_master_key(env_type: str = None) -> bytes:
ValueError: If no master key is found ValueError: If no master key is found
""" """
# Get the key location from config # Get the key location from config
key_location = APP_CONFIG.get('APP_KEY_SYSVAR') keyLocation = APP_CONFIG.get('APP_KEY_SYSVAR')
if env_type is None: if envType is None:
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev') envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
if not key_location: if not keyLocation:
raise ValueError("APP_KEY_SYSVAR not configured") raise ValueError("APP_KEY_SYSVAR not configured")
# First try to get from environment variable # First try to get from environment variable
master_key = os.environ.get(key_location) masterKey = os.environ.get(keyLocation)
if master_key: if masterKey:
# If found in environment, use it directly # If found in environment, use it directly
return master_key.encode('utf-8') return masterKey.encode('utf-8')
# If not in environment, try to read from file # If not in environment, try to read from file
if os.path.exists(key_location): if os.path.exists(keyLocation):
try: try:
with open(key_location, 'r') as f: with open(keyLocation, 'r') as f:
content = f.read().strip() content = f.read().strip()
# Parse the key file format: env = key # Parse the key file format: env = key
@ -326,26 +326,26 @@ def _get_master_key(env_type: str = None) -> bytes:
continue continue
if '=' in line: if '=' in line:
key_env, key_value = line.split('=', 1) keyEnv, keyValue = line.split('=', 1)
key_env = key_env.strip() keyEnv = keyEnv.strip()
key_value = key_value.strip() keyValue = keyValue.strip()
if key_env == env_type: if keyEnv == envType:
return key_value.encode('utf-8') return keyValue.encode('utf-8')
raise ValueError(f"No key found for environment '{env_type}' in {key_location}") raise ValueError(f"No key found for environment '{envType}' in {keyLocation}")
except Exception as e: except Exception as e:
raise ValueError(f"Error reading key file {key_location}: {e}") raise ValueError(f"Error reading key file {keyLocation}: {e}")
raise ValueError(f"Master key not found. Checked environment variable '{key_location}' and file path") raise ValueError(f"Master key not found. Checked environment variable '{keyLocation}' and file path")
def _derive_encryption_key(master_key: bytes) -> bytes: def _deriveEncryptionKey(masterKey: bytes) -> bytes:
""" """
Derive a 32-byte encryption key from the master key using PBKDF2. Derive a 32-byte encryption key from the master key using PBKDF2.
Args: Args:
master_key: The master key bytes masterKey: The master key bytes
Returns: Returns:
bytes: 32-byte derived key suitable for Fernet bytes: 32-byte derived key suitable for Fernet
@ -360,9 +360,9 @@ def _derive_encryption_key(master_key: bytes) -> bytes:
iterations=100000, iterations=100000,
) )
return base64.urlsafe_b64encode(kdf.derive(master_key)) return base64.urlsafe_b64encode(kdf.derive(masterKey))
def _is_encrypted_value(value: str) -> bool: def _isEncryptedValue(value: str) -> bool:
""" """
Check if a value is encrypted (starts with environment-specific prefix). Check if a value is encrypted (starts with environment-specific prefix).
@ -382,64 +382,64 @@ def _is_encrypted_value(value: str) -> bool:
value.startswith('TEST_ENC:') or value.startswith('TEST_ENC:') or
value.startswith('STAGING_ENC:')) value.startswith('STAGING_ENC:'))
def _get_encryption_prefix(env_type: str) -> str: def _getEncryptionPrefix(envType: str) -> str:
""" """
Get the encryption prefix for the given environment type. Get the encryption prefix for the given environment type.
Args: Args:
env_type: The environment type (dev, int, prod, etc.) envType: The environment type (dev, int, prod, etc.)
Returns: Returns:
str: The encryption prefix str: The encryption prefix
""" """
return f"{env_type.upper()}_ENC:" return f"{envType.upper()}_ENC:"
def _check_decryption_rate_limit(user_id: str, key_name: str, max_per_second: int = 10) -> bool: def _checkDecryptionRateLimit(userId: str, keyName: str, maxPerSecond: int = 10) -> bool:
""" """
Check if decryption is allowed based on rate limiting (max 10 per second per user per key). Check if decryption is allowed based on rate limiting (max 10 per second per user per key).
Args: Args:
user_id: The user ID making the request userId: The user ID making the request
key_name: The name of the key being decrypted keyName: The name of the key being decrypted
max_per_second: Maximum decryptions per second (default: 10) maxPerSecond: Maximum decryptions per second (default: 10)
Returns: Returns:
bool: True if allowed, False if rate limited bool: True if allowed, False if rate limited
""" """
current_time = time.time() currentTime = time.time()
# Initialize tracking for this user if not exists # Initialize tracking for this user if not exists
if user_id not in _decryption_attempts: if userId not in _decryption_attempts:
_decryption_attempts[user_id] = {} _decryption_attempts[userId] = {}
# Initialize tracking for this key if not exists # Initialize tracking for this key if not exists
if key_name not in _decryption_attempts[user_id]: if keyName not in _decryption_attempts[userId]:
_decryption_attempts[user_id][key_name] = [] _decryption_attempts[userId][keyName] = []
# Clean old attempts (older than 1 second) # Clean old attempts (older than 1 second)
_decryption_attempts[user_id][key_name] = [ _decryption_attempts[userId][keyName] = [
timestamp for timestamp in _decryption_attempts[user_id][key_name] timestamp for timestamp in _decryption_attempts[userId][keyName]
if current_time - timestamp < 1.0 if currentTime - timestamp < 1.0
] ]
# Check if we're within rate limit # Check if we're within rate limit
if len(_decryption_attempts[user_id][key_name]) >= max_per_second: if len(_decryption_attempts[userId][keyName]) >= maxPerSecond:
logger.warning(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' ({max_per_second}/sec)") logger.warning(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' ({maxPerSecond}/sec)")
return False return False
# Record this attempt # Record this attempt
_decryption_attempts[user_id][key_name].append(current_time) _decryption_attempts[userId][keyName].append(currentTime)
return True return True
def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key_name: str = "unknown") -> str: def encryptValue(value: str, envType: str = None, userId: str = "system", keyName: str = "unknown") -> str:
""" """
Encrypt a value using the master key for the specified environment. Encrypt a value using the master key for the specified environment.
Args: Args:
value: The plain text value to encrypt value: The plain text value to encrypt
env_type: The environment type (dev, int, prod). If None, uses current environment. envType: The environment type (dev, int, prod). If None, uses current environment.
user_id: The user ID making the request (default: "system") userId: The user ID making the request (default: "system")
key_name: The name of the key being encrypted (default: "unknown") keyName: The name of the key being encrypted (default: "unknown")
Returns: Returns:
str: The encrypted value with prefix str: The encrypted value with prefix
@ -447,48 +447,48 @@ def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key
Raises: Raises:
ValueError: If encryption fails ValueError: If encryption fails
""" """
if env_type is None: if envType is None:
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev') envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
try: try:
master_key = _get_master_key(env_type) masterKey = _getMasterKey(envType)
derived_key = _derive_encryption_key(master_key) derivedKey = _deriveEncryptionKey(masterKey)
fernet = Fernet(derived_key) fernet = Fernet(derivedKey)
# Encrypt the value # Encrypt the value
encrypted_bytes = fernet.encrypt(value.encode('utf-8')) encryptedBytes = fernet.encrypt(value.encode('utf-8'))
encrypted_b64 = base64.urlsafe_b64encode(encrypted_bytes).decode('utf-8') encryptedB64 = base64.urlsafe_b64encode(encryptedBytes).decode('utf-8')
# Add environment prefix # Add environment prefix
prefix = _get_encryption_prefix(env_type) prefix = _getEncryptionPrefix(envType)
encrypted_value = f"{prefix}{encrypted_b64}" encryptedValue = f"{prefix}{encryptedB64}"
# Log audit event for encryption # Log audit event for encryption
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_key_access( audit_logger.logKeyAccess(
user_id=user_id, userId=userId,
mandate_id="system", mandateId="system",
key_name=key_name, keyName=keyName,
action="encrypt" action="encrypt"
) )
except Exception: except Exception:
# Don't fail if audit logging fails # Don't fail if audit logging fails
pass pass
return encrypted_value return encryptedValue
except Exception as e: except Exception as e:
raise ValueError(f"Encryption failed: {e}") raise ValueError(f"Encryption failed: {e}")
def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = "unknown") -> str: def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "unknown") -> str:
""" """
Decrypt a value using the master key for the current environment. Decrypt a value using the master key for the current environment.
Args: Args:
encrypted_value: The encrypted value with prefix encryptedValue: The encrypted value with prefix
user_id: The user ID making the request (default: "system") userId: The user ID making the request (default: "system")
key_name: The name of the key being decrypted (default: "unknown") keyName: The name of the key being decrypted (default: "unknown")
Returns: Returns:
str: The decrypted plain text value str: The decrypted plain text value
@ -496,59 +496,59 @@ def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str =
Raises: Raises:
ValueError: If decryption fails ValueError: If decryption fails
""" """
if not _is_encrypted_value(encrypted_value): if not _isEncryptedValue(encryptedValue):
return encrypted_value # Return as-is if not encrypted return encryptedValue # Return as-is if not encrypted
# Check rate limiting (10 per second per user per key) # Check rate limiting (10 per second per user per key)
if not _check_decryption_rate_limit(user_id, key_name, max_per_second=10): if not _checkDecryptionRateLimit(userId, keyName, maxPerSecond=10):
raise ValueError(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' (10/sec)") raise ValueError(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' (10/sec)")
try: try:
# Extract environment type from prefix # Extract environment type from prefix
if encrypted_value.startswith('DEV_ENC:'): if encryptedValue.startswith('DEV_ENC:'):
env_type = 'dev' envType = 'dev'
prefix = 'DEV_ENC:' prefix = 'DEV_ENC:'
elif encrypted_value.startswith('INT_ENC:'): elif encryptedValue.startswith('INT_ENC:'):
env_type = 'int' envType = 'int'
prefix = 'INT_ENC:' prefix = 'INT_ENC:'
elif encrypted_value.startswith('PROD_ENC:'): elif encryptedValue.startswith('PROD_ENC:'):
env_type = 'prod' envType = 'prod'
prefix = 'PROD_ENC:' prefix = 'PROD_ENC:'
elif encrypted_value.startswith('TEST_ENC:'): elif encryptedValue.startswith('TEST_ENC:'):
env_type = 'test' envType = 'test'
prefix = 'TEST_ENC:' prefix = 'TEST_ENC:'
elif encrypted_value.startswith('STAGING_ENC:'): elif encryptedValue.startswith('STAGING_ENC:'):
env_type = 'staging' envType = 'staging'
prefix = 'STAGING_ENC:' prefix = 'STAGING_ENC:'
else: else:
raise ValueError(f"Invalid encryption prefix. Expected DEV_ENC:, INT_ENC:, PROD_ENC:, TEST_ENC:, or STAGING_ENC:") raise ValueError(f"Invalid encryption prefix. Expected DEV_ENC:, INT_ENC:, PROD_ENC:, TEST_ENC:, or STAGING_ENC:")
encrypted_part = encrypted_value[len(prefix):] encryptedPart = encryptedValue[len(prefix):]
# Get master key for the specific environment and derive encryption key # Get master key for the specific environment and derive encryption key
master_key = _get_master_key(env_type) masterKey = _getMasterKey(envType)
derived_key = _derive_encryption_key(master_key) derivedKey = _deriveEncryptionKey(masterKey)
fernet = Fernet(derived_key) fernet = Fernet(derivedKey)
# Decode and decrypt # Decode and decrypt
encrypted_bytes = base64.urlsafe_b64decode(encrypted_part.encode('utf-8')) encryptedBytes = base64.urlsafe_b64decode(encryptedPart.encode('utf-8'))
decrypted_bytes = fernet.decrypt(encrypted_bytes) decryptedBytes = fernet.decrypt(encryptedBytes)
decrypted_value = decrypted_bytes.decode('utf-8') decryptedValue = decryptedBytes.decode('utf-8')
# Log audit event for decryption # Log audit event for decryption
try: try:
from modules.shared.auditLogger import audit_logger from modules.shared.auditLogger import audit_logger
audit_logger.log_key_access( audit_logger.logKeyAccess(
user_id=user_id, userId=userId,
mandate_id="system", mandateId="system",
key_name=key_name, keyName=keyName,
action="decrypt" action="decrypt"
) )
except Exception: except Exception:
# Don't fail if audit logging fails # Don't fail if audit logging fails
pass pass
return decrypted_value return decryptedValue
except Exception as e: except Exception as e:
raise ValueError(f"Decryption failed: {e}") raise ValueError(f"Decryption failed: {e}")

View file

@ -120,8 +120,8 @@ def debugLogToFile(message: str, context: str = "DEBUG") -> None:
debug_file = os.path.join(debug_dir, "debug_workflow.log") debug_file = os.path.join(debug_dir, "debug_workflow.log")
# Format the debug entry # Format the debug entry
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import getUtcTimestamp
timestamp = get_utc_timestamp() timestamp = getUtcTimestamp()
debug_entry = f"[{timestamp}] [{context}] {message}\n" debug_entry = f"[{timestamp}] [{context}] {message}\n"
# Write to debug file # Write to debug file

View file

@ -102,7 +102,7 @@ def parseJsonOrRaise(text: Union[str, bytes]) -> Union[Dict, List]:
return obj return obj
def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]: def mergeRootLists(jsonParts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
""" """
Generic merger for root-level lists: take first dict as base; for each subsequent part: Generic merger for root-level lists: take first dict as base; for each subsequent part:
- if value is list and same key exists as list, extend it - if value is list and same key exists as list, extend it
@ -112,7 +112,7 @@ def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
""" """
base: Optional[Dict[str, Any]] = None base: Optional[Dict[str, Any]] = None
parsed: List[Dict[str, Any]] = [] parsed: List[Dict[str, Any]] = []
for part in json_parts: for part in jsonParts:
if isinstance(part, (dict, list)): if isinstance(part, (dict, list)):
obj = part obj = part
else: else:
@ -146,61 +146,61 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
# Strategy 1: Try to extract sections from the entire text first # Strategy 1: Try to extract sections from the entire text first
# This handles cases where the JSON structure is broken but content is intact # This handles cases where the JSON structure is broken but content is intact
extracted_sections = _extractSectionsRegex(text) extractedSections = _extractSectionsRegex(text)
if extracted_sections: if extractedSections:
logger.info(f"Extracted {len(extracted_sections)} sections using regex") logger.info(f"Extracted {len(extractedSections)} sections using regex")
return { return {
"metadata": { "metadata": {
"split_strategy": "single_document", "split_strategy": "single_document",
"source_documents": [], "source_documents": [],
"extraction_method": "ai_generation" "extraction_method": "ai_generation"
}, },
"documents": [{"sections": extracted_sections}] "documents": [{"sections": extractedSections}]
} }
# Strategy 2: Progressive parsing - try to find longest valid prefix # Strategy 2: Progressive parsing - try to find longest valid prefix
best_result = None bestResult = None
best_valid_length = 0 bestValidLength = 0
# Try different step sizes to find the best valid JSON # Try different step sizes to find the best valid JSON
for step_size in [100, 50, 10, 1]: for stepSize in [100, 50, 10, 1]:
for i in range(len(text), 0, -step_size): for i in range(len(text), 0, -stepSize):
test_str = text[:i] testStr = text[:i]
closed_str = _closeJsonStructures(test_str) closedStr = _closeJsonStructures(testStr)
obj, err, _ = tryParseJson(closed_str) obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict): if err is None and isinstance(obj, dict):
best_result = obj bestResult = obj
best_valid_length = i bestValidLength = i
logger.debug(f"Progressive parsing success at length {i} (step: {step_size})") logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
break break
if best_result: if bestResult:
break break
if best_result: if bestResult:
logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})") logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
# Check if we have sections in the result # Check if we have sections in the result
sections = extractSectionsFromDocument(best_result) sections = extractSectionsFromDocument(bestResult)
if sections: if sections:
logger.info(f"Progressive parsing found {len(sections)} sections") logger.info(f"Progressive parsing found {len(sections)} sections")
return best_result return bestResult
else: else:
# No sections found in progressive parsing, try to extract from broken part # No sections found in progressive parsing, try to extract from broken part
logger.info("Progressive parsing found no sections, trying to extract from broken part") logger.info("Progressive parsing found no sections, trying to extract from broken part")
extracted_sections = _extractSectionsRegex(text[best_valid_length:]) extractedSections = _extractSectionsRegex(text[bestValidLength:])
if extracted_sections: if extractedSections:
logger.info(f"Extracted {len(extracted_sections)} sections from broken part") logger.info(f"Extracted {len(extractedSections)} sections from broken part")
# Merge with the valid part # Merge with the valid part
if "documents" not in best_result: if "documents" not in bestResult:
best_result["documents"] = [] bestResult["documents"] = []
if not best_result["documents"]: if not bestResult["documents"]:
best_result["documents"] = [{"sections": []}] bestResult["documents"] = [{"sections": []}]
best_result["documents"][0]["sections"].extend(extracted_sections) bestResult["documents"][0]["sections"].extend(extractedSections)
return best_result return bestResult
# Strategy 3: Structure closing - close incomplete structures # Strategy 3: Structure closing - close incomplete structures
closed_str = _closeJsonStructures(text) closedStr = _closeJsonStructures(text)
obj, err, _ = tryParseJson(closed_str) obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict): if err is None and isinstance(obj, dict):
logger.info("Repaired JSON using structure closing") logger.info("Repaired JSON using structure closing")
return obj return obj
@ -217,16 +217,16 @@ def _closeJsonStructures(text: str) -> str:
return text return text
# Count open/close brackets and braces # Count open/close brackets and braces
open_braces = text.count('{') openBraces = text.count('{')
close_braces = text.count('}') closeBraces = text.count('}')
open_brackets = text.count('[') openBrackets = text.count('[')
close_brackets = text.count(']') closeBrackets = text.count(']')
# Close incomplete structures # Close incomplete structures
result = text result = text
for _ in range(open_braces - close_braces): for _ in range(openBraces - closeBraces):
result += '}' result += '}'
for _ in range(open_brackets - close_brackets): for _ in range(openBrackets - closeBrackets):
result += ']' result += ']'
return result return result
@ -242,32 +242,32 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
sections = [] sections = []
# Pattern to find section objects # Pattern to find section objects
section_pattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)' sectionPattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
for match in re.finditer(section_pattern, text, re.IGNORECASE): for match in re.finditer(sectionPattern, text, re.IGNORECASE):
section_id = match.group(1) sectionId = match.group(1)
content_type = match.group(2) contentType = match.group(2)
order = int(match.group(3)) order = int(match.group(3))
# Try to extract elements array - look for the elements array after this section # Try to extract elements array - look for the elements array after this section
elements_match = re.search( elementsMatch = re.search(
r'"elements"\s*:\s*\[(.*?)\]', r'"elements"\s*:\s*\[(.*?)\]',
text[match.end():match.end()+5000] # Look ahead for elements (large range) text[match.end():match.end()+5000] # Look ahead for elements (large range)
) )
elements = [] elements = []
if elements_match: if elementsMatch:
try: try:
elements_str = '[' + elements_match.group(1) + ']' elementsStr = '[' + elementsMatch.group(1) + ']'
elements = json.loads(elements_str) elements = json.loads(elementsStr)
except: except:
# If JSON parsing fails, try to extract individual items manually # If JSON parsing fails, try to extract individual items manually
elements_text = elements_match.group(1) elementsText = elementsMatch.group(1)
elements = _extractElementsFromText(elements_text, content_type) elements = _extractElementsFromText(elementsText, contentType)
sections.append({ sections.append({
"id": section_id, "id": sectionId,
"content_type": content_type, "content_type": contentType,
"elements": elements, "elements": elements,
"order": order "order": order
}) })
@ -279,7 +279,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
return sections return sections
def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict[str, Any]]: def _extractElementsFromText(elementsText: str, contentType: str) -> List[Dict[str, Any]]:
""" """
Extract elements from text when JSON parsing fails. Extract elements from text when JSON parsing fails.
Generic approach that works for any content type. Generic approach that works for any content type.
@ -290,11 +290,11 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
elements = [] elements = []
if content_type == "list": if contentType == "list":
# Look for {"text": "..."} patterns, including incomplete ones # Look for {"text": "..."} patterns, including incomplete ones
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text) text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
# Also look for incomplete patterns like {"text": "36 # Also look for incomplete patterns like {"text": "36
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
# Combine both complete and incomplete items # Combine both complete and incomplete items
all_items = text_items + incomplete_items all_items = text_items + incomplete_items
@ -303,41 +303,41 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
# Remove the last item if it appears to be incomplete/corrupted # Remove the last item if it appears to be incomplete/corrupted
if unique_items: if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text) unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"text": item} for item in unique_items] elements = [{"text": item} for item in unique_items]
elif content_type == "paragraph": elif contentType == "paragraph":
# Look for {"text": "..."} patterns, including incomplete ones # Look for {"text": "..."} patterns, including incomplete ones
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text) text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = text_items + incomplete_items all_items = text_items + incomplete_items
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()])) unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
# Remove the last item if it appears to be incomplete/corrupted # Remove the last item if it appears to be incomplete/corrupted
if unique_items: if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text) unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"text": item} for item in unique_items] elements = [{"text": item} for item in unique_items]
elif content_type == "heading": elif contentType == "heading":
# Look for {"level": X, "text": "..."} patterns, including incomplete ones # Look for {"level": X, "text": "..."} patterns, including incomplete ones
heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elements_text) heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elementsText)
incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = heading_items + incomplete_heading_items all_items = heading_items + incomplete_heading_items
unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()])) unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()]))
# Remove the last item if it appears to be incomplete/corrupted # Remove the last item if it appears to be incomplete/corrupted
if unique_items: if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text) unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"level": level, "text": text} for level, text in unique_items] elements = [{"level": level, "text": text} for level, text in unique_items]
elif content_type == "table": elif contentType == "table":
# Look for table patterns # Look for table patterns
table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elements_text) table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elementsText)
for headers_str, rows_str, caption in table_items: for headers_str, rows_str, caption in table_items:
# Extract headers # Extract headers
headers = re.findall(r'"([^"]+)"', headers_str) headers = re.findall(r'"([^"]+)"', headers_str)
@ -354,31 +354,31 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
"caption": caption "caption": caption
}) })
elif content_type == "code": elif contentType == "code":
# Look for {"code": "...", "language": "..."} patterns, including incomplete ones # Look for {"code": "...", "language": "..."} patterns, including incomplete ones
code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elements_text) code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elementsText)
incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = code_items + [(code, "unknown") for code in incomplete_code_items] all_items = code_items + [(code, "unknown") for code in incomplete_code_items]
unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()])) unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()]))
# Remove the last item if it appears to be incomplete/corrupted # Remove the last item if it appears to be incomplete/corrupted
if unique_items: if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text) unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"code": code, "language": lang} for code, lang in unique_items] elements = [{"code": code, "language": lang} for code, lang in unique_items]
else: else:
# Generic fallback - look for any text content, including incomplete # Generic fallback - look for any text content, including incomplete
text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elements_text) text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elementsText)
incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text) incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = text_items + incomplete_text_items all_items = text_items + incomplete_text_items
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()])) unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
# Remove the last item if it appears to be incomplete/corrupted # Remove the last item if it appears to be incomplete/corrupted
if unique_items: if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text) unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"text": item} for item in unique_items] elements = [{"text": item} for item in unique_items]

View file

@ -6,7 +6,7 @@ Ensures all timestamps are properly handled as UTC.
from datetime import datetime, timezone from datetime import datetime, timezone
import time import time
def get_utc_now() -> datetime: def getUtcNow() -> datetime:
""" """
Get current time in UTC with timezone info. Get current time in UTC with timezone info.
@ -15,7 +15,7 @@ def get_utc_now() -> datetime:
""" """
return datetime.now(timezone.utc) return datetime.now(timezone.utc)
def get_utc_timestamp() -> float: def getUtcTimestamp() -> float:
""" """
Get current UTC timestamp (seconds since epoch with millisecond precision). Get current UTC timestamp (seconds since epoch with millisecond precision).
@ -24,14 +24,14 @@ def get_utc_timestamp() -> float:
""" """
return time.time() return time.time()
def create_expiration_timestamp(expires_in_seconds: int) -> float: def createExpirationTimestamp(expiresInSeconds: int) -> float:
""" """
Create a new expiration timestamp from seconds until expiration. Create a new expiration timestamp from seconds until expiration.
Args: Args:
expires_in_seconds (int): Seconds until expiration expiresInSeconds (int): Seconds until expiration
Returns: Returns:
float: UTC timestamp in seconds float: UTC timestamp in seconds
""" """
return get_utc_timestamp() + expires_in_seconds return getUtcTimestamp() + expiresInSeconds

View file

@ -22,13 +22,11 @@ class AdaptiveLearningEngine:
workflowId: str, attemptNumber: int): workflowId: str, attemptNumber: int):
"""Record validation result and learn from it""" """Record validation result and learn from it"""
try: try:
actionType = actionContext.get('actionType', 'unknown')
actionName = actionContext.get('actionName', 'unknown') actionName = actionContext.get('actionName', 'unknown')
# Store validation history # Store validation history
validationEntry = { validationEntry = {
'workflowId': workflowId, 'workflowId': workflowId,
'actionType': actionType,
'actionName': actionName, 'actionName': actionName,
'attemptNumber': attemptNumber, 'attemptNumber': attemptNumber,
'validationResult': validationResult, 'validationResult': validationResult,
@ -42,17 +40,17 @@ class AdaptiveLearningEngine:
# Track patterns # Track patterns
if validationResult.get('overallSuccess', False): if validationResult.get('overallSuccess', False):
self.successPatterns[actionType].append(validationEntry) self.successPatterns[actionName].append(validationEntry)
else: else:
self.failurePatterns[actionType].append(validationEntry) self.failurePatterns[actionName].append(validationEntry)
# Update attempt count # Update attempt count
self.actionAttempts[f"{workflowId}:{actionType}"] += 1 self.actionAttempts[f"{workflowId}:{actionName}"] += 1
# Generate learning insights # Generate learning insights
self._generateLearningInsights(workflowId, actionType) self._generateLearningInsights(workflowId, actionName)
logger.info(f"Recorded validation for {actionType} (attempt {attemptNumber}): " logger.info(f"Recorded validation for {actionName} (attempt {attemptNumber}): "
f"Success={validationResult.get('overallSuccess', False)}, " f"Success={validationResult.get('overallSuccess', False)}, "
f"Quality={validationResult.get('qualityScore', 0.0)}") f"Quality={validationResult.get('qualityScore', 0.0)}")
@ -86,21 +84,21 @@ class AdaptiveLearningEngine:
logger.error(f"Error generating adaptive context: {str(e)}") logger.error(f"Error generating adaptive context: {str(e)}")
return {} return {}
def getAdaptiveContextForParameters(self, workflowId: str, actionType: str, def getAdaptiveContextForParameters(self, workflowId: str, actionName: str,
parametersContext: str) -> Dict[str, Any]: parametersContext: str) -> Dict[str, Any]:
"""Generate adaptive context for parameter selection prompt""" """Generate adaptive context for parameter selection prompt"""
try: try:
# Get validation history for this specific action type # Get validation history for this specific action name
actionValidations = [ actionValidations = [
v for v in self.validationHistory v for v in self.validationHistory
if v['workflowId'] == workflowId and v['actionType'] == actionType if v['workflowId'] == workflowId and v['actionName'] == actionName
][-3:] # Last 3 attempts for this action ][-3:] # Last 3 attempts for this action
# Analyze what went wrong in previous attempts # Analyze what went wrong in previous attempts
failureAnalysis = self._analyzeParameterFailures(actionValidations) failureAnalysis = self._analyzeParameterFailures(actionValidations)
# Generate specific parameter guidance # Generate specific parameter guidance
parameterGuidance = self._generateParameterGuidance(actionType, parametersContext, failureAnalysis) parameterGuidance = self._generateParameterGuidance(actionName, parametersContext, failureAnalysis)
return { return {
'actionValidations': actionValidations, 'actionValidations': actionValidations,
@ -206,36 +204,28 @@ class AdaptiveLearningEngine:
return "\n".join(guidance_parts) if guidance_parts else "No specific guidance available." return "\n".join(guidance_parts) if guidance_parts else "No specific guidance available."
def _generateParameterGuidance(self, actionType: str, parametersContext: str, def _generateParameterGuidance(self, actionName: str, parametersContext: str,
failureAnalysis: Dict[str, Any]) -> str: failureAnalysis: Dict[str, Any]) -> str:
"""Generate specific parameter guidance based on previous failures""" """Generate generic parameter guidance based on previous failures (no app-specific logic)."""
if not failureAnalysis.get('hasFailures', False): if not failureAnalysis.get('hasFailures', False):
return "No previous parameter failures. Use standard parameter values." return "No previous parameter failures. Use standard parameter values."
guidance_parts = [] guidanceParts = []
# Add attempt awareness # Attempt awareness
attemptNumber = failureAnalysis.get('attemptNumber', 1) attemptNumber = failureAnalysis.get('attemptNumber', 1)
if attemptNumber >= 3: if attemptNumber and attemptNumber >= 3:
guidance_parts.append(f"ATTEMPT #{attemptNumber}: Previous attempts failed. Adjust parameters based on validation feedback.") guidanceParts.append(f"Attempt #{attemptNumber}: Adjust parameters based on validation feedback.")
# Add specific parameter guidance based on action type # Generic issues summary
if actionType == "outlook.composeAndSendEmailWithContext": commonIssues = failureAnalysis.get('commonIssues', {}) or {}
guidance_parts.append("EMAIL PARAMETER GUIDANCE:") if commonIssues:
guidance_parts.append("- context: Be very specific about account (valueon), appointment time (Friday), and requirements") guidanceParts.append("Address the following parameter issues:")
guidance_parts.append("- emailStyle: Use 'formal' for business emails") for issueKey, issueDesc in commonIssues.items():
guidance_parts.append("- maxLength: Set to 2000+ for detailed emails with summaries") guidanceParts.append(f"- {issueKey}: {issueDesc}")
# Add specific guidance based on common failures # Keep guidance format stable
commonIssues = failureAnalysis.get('commonIssues', {}) return "\n".join(guidanceParts) if guidanceParts else "Use standard parameter values."
if any("account" in str(issue).lower() for issue in commonIssues.keys()):
guidance_parts.append("- context: MUST specify 'from valueon account' explicitly")
if any("attachment" in str(issue).lower() for issue in commonIssues.keys()):
guidance_parts.append("- documentList: Ensure PDF is properly referenced")
if any("summary" in str(issue).lower() for issue in commonIssues.keys()):
guidance_parts.append("- context: MUST request '10-12 sentence German summary' explicitly")
return "\n".join(guidance_parts) if guidance_parts else "Use standard parameter values."
def _getEscalationLevel(self, workflowId: str) -> str: def _getEscalationLevel(self, workflowId: str) -> str:
"""Determine escalation level based on failure patterns""" """Determine escalation level based on failure patterns"""
@ -251,7 +241,7 @@ class AdaptiveLearningEngine:
else: else:
return "low" return "low"
def _generateLearningInsights(self, workflowId: str, actionType: str): def _generateLearningInsights(self, workflowId: str, actionName: str):
"""Generate learning insights for a workflow""" """Generate learning insights for a workflow"""
if workflowId not in self.learningInsights: if workflowId not in self.learningInsights:
self.learningInsights[workflowId] = {} self.learningInsights[workflowId] = {}
@ -263,7 +253,7 @@ class AdaptiveLearningEngine:
'totalAttempts': len(workflowValidations), 'totalAttempts': len(workflowValidations),
'successfulAttempts': len([v for v in workflowValidations if v['success']]), 'successfulAttempts': len([v for v in workflowValidations if v['success']]),
'failedAttempts': len([v for v in workflowValidations if not v['success']]), 'failedAttempts': len([v for v in workflowValidations if not v['success']]),
'lastActionType': actionType, 'lastActionName': actionName,
'escalationLevel': self._getEscalationLevel(workflowId) 'escalationLevel': self._getEscalationLevel(workflowId)
} }

View file

@ -26,14 +26,14 @@ class ContentValidator:
if isinstance(data, dict) and 'content' in data: if isinstance(data, dict) and 'content' in data:
content = data['content'] content = data['content']
# For large content, check size before converting to string # For large content, check size before converting to string
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
# For very large content, return a size indicator instead # For very large content, return a size indicator instead
return f"[Large document content - {len(str(content))} characters - truncated for validation]" return f"[Large document content - {len(str(content))} characters - truncated for validation]"
return str(content) return str(content)
else: else:
content = data content = data
# For large content, check size before converting to string # For large content, check size before converting to string
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
return f"[Large document content - {len(str(content))} characters - truncated for validation]" return f"[Large document content - {len(str(content))} characters - truncated for validation]"
return str(content) return str(content)
return "" return ""

View file

@ -30,7 +30,7 @@ class IntentAnalyzer:
analysisPrompt = f""" analysisPrompt = f"""
You are an intent analyzer. Analyze the user's request to understand what they want delivered. You are an intent analyzer. Analyze the user's request to understand what they want delivered.
USER REQUEST: {self.services.ai.sanitizePromptContent(userPrompt, 'userinput')} USER REQUEST: {self.services.utils.sanitizePromptContent(userPrompt, 'userinput')}
CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''} CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}

View file

@ -571,7 +571,7 @@ class ActionplanMode(BaseMode):
actionData["execParameters"] = {} actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model # Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData) simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database # Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields) createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
@ -715,7 +715,7 @@ class ActionplanMode(BaseMode):
actionData["execParameters"] = {} actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model # Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData) simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database # Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields) createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)

View file

@ -98,7 +98,12 @@ class ReactMode(BaseMode):
# NEW: Add content validation (against original cleaned user prompt / workflow intent) # NEW: Add content validation (against original cleaned user prompt / workflow intent)
if getattr(self, 'workflowIntent', None) and result.documents: if getattr(self, 'workflowIntent', None) and result.documents:
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent) # Validate ONLY the produced JSON (structured content), not rendered files
from types import SimpleNamespace
validationDocs = []
if hasattr(result, 'content') and result.content:
validationDocs.append(SimpleNamespace(documentName='generated.json', documentData={'content': result.content}))
validationResult = await self.contentValidator.validateContent(validationDocs, self.workflowIntent)
observation['contentValidation'] = validationResult observation['contentValidation'] = validationResult
quality_score = validationResult.get('qualityScore', 0.0) quality_score = validationResult.get('qualityScore', 0.0)
if quality_score is None: if quality_score is None:
@ -106,9 +111,9 @@ class ReactMode(BaseMode):
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})") logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})")
# NEW: Record validation result for adaptive learning # NEW: Record validation result for adaptive learning
actionValue = selection.get('action', 'unknown')
actionContext = { actionContext = {
'actionType': selection.get('action', {}).get('action', 'unknown'), 'actionName': actionValue,
'actionName': selection.get('action', {}).get('action', 'unknown'),
'workflowId': context.workflow_id 'workflowId': context.workflow_id
} }
@ -747,7 +752,7 @@ Return only the user-friendly message, no technical details."""
actionData["execParameters"] = {} actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model # Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData) simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database # Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields) createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
@ -838,7 +843,7 @@ Return only the user-friendly message, no technical details."""
actionData["execParameters"] = {} actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model # Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData) simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database # Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields) createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)

View file

@ -215,7 +215,7 @@ class WorkflowManager:
" }\n" " }\n"
" ]\n" " ]\n"
"}\n\n" "}\n\n"
f"User message:\n{self.services.ai.sanitizePromptContent(userInput.prompt, 'userinput')}" f"User message:\n{self.services.utils.sanitizePromptContent(userInput.prompt, 'userinput')}"
) )
# Call AI analyzer (planning call - will use static parameters) # Call AI analyzer (planning call - will use static parameters)

View file

@ -0,0 +1,107 @@
Module,Function Names,Parameter Names,Variable Names,Total
modules/workflows/methods/methodSharepoint.py,0,2,211,213
modules/workflows/methods/methodOutlook.py,0,3,131,134
modules/services/serviceAi/subDocumentProcessing.py,0,0,104,104
modules/features/syncDelta/mainSyncDelta.py,1,10,88,99
modules/shared/jsonUtils.py,0,3,88,91
modules/services/serviceGeneration/renderers/rendererDocx.py,3,8,79,90
modules/services/serviceWorkflow/mainServiceWorkflow.py,0,3,85,88
modules/services/serviceGeneration/renderers/rendererPptx.py,2,7,73,82
modules/services/serviceGeneration/renderers/rendererPdf.py,3,8,50,61
modules/connectors/connectorVoiceGoogle.py,1,2,52,55
modules/services/serviceGeneration/renderers/rendererHtml.py,3,6,46,55
modules/services/serviceGeneration/renderers/rendererBaseTemplate.py,3,21,27,51
modules/shared/configuration.py,2,17,30,49
modules/services/serviceExtraction/subMerger.py,2,5,31,38
modules/connectors/connectorDbPostgre.py,0,14,20,34
modules/interfaces/interfaceDbAppObjects.py,0,8,26,34
modules/routes/routeSecurityGoogle.py,0,0,32,32
modules/shared/attributeUtils.py,3,4,25,32
modules/interfaces/interfaceDbChatObjects.py,0,4,27,31
modules/routes/routeSecurityAdmin.py,0,2,28,30
modules/services/serviceNeutralization/subProcessList.py,7,0,22,29
modules/services/serviceGeneration/renderers/rendererText.py,3,7,19,29
modules/routes/routeSecurityMsft.py,0,0,27,27
modules/services/serviceGeneration/renderers/rendererMarkdown.py,3,7,17,27
modules/services/serviceGeneration/renderers/rendererXlsx.py,3,0,24,27
modules/services/serviceGeneration/renderers/rendererImage.py,3,2,21,26
modules/security/tokenManager.py,4,7,14,25
modules/workflows/workflowManager.py,0,0,25,25
modules/services/serviceGeneration/renderers/rendererCsv.py,3,5,17,25
modules/shared/auditLogger.py,5,16,3,24
modules/shared/debugLogger.py,0,0,24,24
modules/workflows/processing/shared/placeholderFactory.py,0,0,24,24
modules/interfaces/interfaceDbAppAccess.py,0,2,21,23
modules/connectors/connectorTicketsJira.py,0,0,22,22
modules/services/serviceGeneration/renderers/registry.py,7,3,12,22
modules/routes/routeDataConnections.py,1,1,19,21
modules/security/tokenRefreshService.py,0,2,19,21
modules/services/serviceExtraction/extractors/extractorPptx.py,0,1,16,17
modules/routes/routeSecurityLocal.py,0,0,16,16
modules/workflows/methods/methodBase.py,0,4,12,16
modules/services/serviceGeneration/mainServiceGeneration.py,0,4,11,15
modules/services/serviceUtils/mainServiceUtils.py,0,14,1,15
modules/features/neutralizePlayground/mainNeutralizePlayground.py,8,5,2,15
modules/interfaces/interfaceTicketObjects.py,0,5,9,14
modules/services/serviceNeutralization/subParseString.py,7,0,6,13
modules/workflows/processing/modes/modeReact.py,0,1,11,12
modules/interfaces/interfaceDbComponentAccess.py,0,2,9,11
modules/services/serviceAi/subCoreAi.py,0,0,11,11
modules/services/serviceExtraction/subRegistry.py,0,0,11,11
modules/services/serviceNeutralization/mainServiceNeutralization.py,0,2,9,11
modules/interfaces/interfaceAiObjects.py,0,0,10,10
modules/services/serviceAi/subSharedAiUtils.py,0,3,7,10
modules/connectors/connectorDbJson.py,0,3,6,9
modules/workflows/methods/methodAi.py,0,0,9,9
modules/services/serviceExtraction/subPromptBuilderExtraction.py,0,0,9,9
modules/services/serviceGeneration/subDocumentUtility.py,0,3,6,9
modules/services/serviceNeutralization/subProcessCommon.py,7,2,0,9
modules/services/serviceNeutralization/subProcessText.py,5,0,4,9
modules/interfaces/interfaceDbChatAccess.py,0,2,6,8
modules/security/auth.py,0,1,7,8
modules/aicore/aicorePluginAnthropic.py,0,0,7,7
modules/security/tokenRefreshMiddleware.py,0,2,4,6
modules/services/serviceGeneration/renderers/rendererJson.py,3,0,3,6
analyze_naming_violations.py,5,0,0,5
modules/aicore/aicorePluginOpenai.py,0,0,5,5
modules/routes/routeVoiceGoogle.py,0,0,5,5
modules/shared/eventManagement.py,2,3,0,5
modules/workflows/processing/adaptive/intentAnalyzer.py,0,0,5,5
modules/workflows/processing/shared/executionState.py,0,5,0,5
modules/services/serviceGeneration/subJsonSchema.py,0,0,5,5
modules/services/serviceNeutralization/subPatterns.py,5,0,0,5
modules/services/serviceNeutralization/subProcessBinary.py,4,0,1,5
modules/services/serviceExtraction/extractors/extractorXlsx.py,0,0,5,5
modules/interfaces/interfaceDbComponentObjects.py,0,3,1,4
modules/routes/routeDataNeutralization.py,0,0,4,4
modules/routes/routeWorkflows.py,0,0,4,4
modules/shared/timezoneUtils.py,3,1,0,4
modules/workflows/processing/adaptive/contentValidator.py,0,0,4,4
modules/workflows/processing/core/messageCreator.py,0,0,4,4
modules/services/serviceSharepoint/mainServiceSharepoint.py,0,0,4,4
modules/routes/routeDataUsers.py,0,0,3,3
modules/services/serviceExtraction/subPipeline.py,0,0,3,3
app.py,0,0,2,2
modules/datamodels/datamodelChat.py,0,1,1,2
modules/routes/routeAttributes.py,0,0,2,2
modules/routes/routeDataPrompts.py,0,0,2,2
modules/security/csrf.py,0,1,1,2
modules/security/jwtService.py,0,0,2,2
modules/workflows/processing/adaptive/learningEngine.py,0,0,2,2
modules/workflows/processing/modes/modeActionplan.py,0,0,2,2
modules/workflows/processing/shared/methodDiscovery.py,0,0,2,2
modules/services/serviceNormalization/mainServiceNormalization.py,0,0,2,2
modules/services/serviceExtraction/extractors/extractorImage.py,0,0,2,2
modules/aicore/aicoreBase.py,0,0,1,1
modules/aicore/aicoreModelSelector.py,0,0,1,1
modules/connectors/connectorTicketsClickup.py,0,0,1,1
modules/datamodels/datamodelDocument.py,0,1,0,1
modules/datamodels/datamodelSecurity.py,0,0,1,1
modules/routes/routeAdmin.py,0,0,1,1
modules/routes/routeDataFiles.py,0,0,1,1
modules/workflows/processing/workflowProcessor.py,0,0,1,1
modules/workflows/processing/adaptive/adaptiveLearningEngine.py,0,0,1,1
modules/workflows/processing/core/actionExecutor.py,0,0,1,1
modules/workflows/processing/core/taskPlanner.py,0,0,1,1
modules/workflows/processing/modes/modeBase.py,0,0,1,1
modules/services/serviceAi/subDocumentGeneration.py,0,0,1,1
1 Module Function Names Parameter Names Variable Names Total
2 modules/workflows/methods/methodSharepoint.py 0 2 211 213
3 modules/workflows/methods/methodOutlook.py 0 3 131 134
4 modules/services/serviceAi/subDocumentProcessing.py 0 0 104 104
5 modules/features/syncDelta/mainSyncDelta.py 1 10 88 99
6 modules/shared/jsonUtils.py 0 3 88 91
7 modules/services/serviceGeneration/renderers/rendererDocx.py 3 8 79 90
8 modules/services/serviceWorkflow/mainServiceWorkflow.py 0 3 85 88
9 modules/services/serviceGeneration/renderers/rendererPptx.py 2 7 73 82
10 modules/services/serviceGeneration/renderers/rendererPdf.py 3 8 50 61
11 modules/connectors/connectorVoiceGoogle.py 1 2 52 55
12 modules/services/serviceGeneration/renderers/rendererHtml.py 3 6 46 55
13 modules/services/serviceGeneration/renderers/rendererBaseTemplate.py 3 21 27 51
14 modules/shared/configuration.py 2 17 30 49
15 modules/services/serviceExtraction/subMerger.py 2 5 31 38
16 modules/connectors/connectorDbPostgre.py 0 14 20 34
17 modules/interfaces/interfaceDbAppObjects.py 0 8 26 34
18 modules/routes/routeSecurityGoogle.py 0 0 32 32
19 modules/shared/attributeUtils.py 3 4 25 32
20 modules/interfaces/interfaceDbChatObjects.py 0 4 27 31
21 modules/routes/routeSecurityAdmin.py 0 2 28 30
22 modules/services/serviceNeutralization/subProcessList.py 7 0 22 29
23 modules/services/serviceGeneration/renderers/rendererText.py 3 7 19 29
24 modules/routes/routeSecurityMsft.py 0 0 27 27
25 modules/services/serviceGeneration/renderers/rendererMarkdown.py 3 7 17 27
26 modules/services/serviceGeneration/renderers/rendererXlsx.py 3 0 24 27
27 modules/services/serviceGeneration/renderers/rendererImage.py 3 2 21 26
28 modules/security/tokenManager.py 4 7 14 25
29 modules/workflows/workflowManager.py 0 0 25 25
30 modules/services/serviceGeneration/renderers/rendererCsv.py 3 5 17 25
31 modules/shared/auditLogger.py 5 16 3 24
32 modules/shared/debugLogger.py 0 0 24 24
33 modules/workflows/processing/shared/placeholderFactory.py 0 0 24 24
34 modules/interfaces/interfaceDbAppAccess.py 0 2 21 23
35 modules/connectors/connectorTicketsJira.py 0 0 22 22
36 modules/services/serviceGeneration/renderers/registry.py 7 3 12 22
37 modules/routes/routeDataConnections.py 1 1 19 21
38 modules/security/tokenRefreshService.py 0 2 19 21
39 modules/services/serviceExtraction/extractors/extractorPptx.py 0 1 16 17
40 modules/routes/routeSecurityLocal.py 0 0 16 16
41 modules/workflows/methods/methodBase.py 0 4 12 16
42 modules/services/serviceGeneration/mainServiceGeneration.py 0 4 11 15
43 modules/services/serviceUtils/mainServiceUtils.py 0 14 1 15
44 modules/features/neutralizePlayground/mainNeutralizePlayground.py 8 5 2 15
45 modules/interfaces/interfaceTicketObjects.py 0 5 9 14
46 modules/services/serviceNeutralization/subParseString.py 7 0 6 13
47 modules/workflows/processing/modes/modeReact.py 0 1 11 12
48 modules/interfaces/interfaceDbComponentAccess.py 0 2 9 11
49 modules/services/serviceAi/subCoreAi.py 0 0 11 11
50 modules/services/serviceExtraction/subRegistry.py 0 0 11 11
51 modules/services/serviceNeutralization/mainServiceNeutralization.py 0 2 9 11
52 modules/interfaces/interfaceAiObjects.py 0 0 10 10
53 modules/services/serviceAi/subSharedAiUtils.py 0 3 7 10
54 modules/connectors/connectorDbJson.py 0 3 6 9
55 modules/workflows/methods/methodAi.py 0 0 9 9
56 modules/services/serviceExtraction/subPromptBuilderExtraction.py 0 0 9 9
57 modules/services/serviceGeneration/subDocumentUtility.py 0 3 6 9
58 modules/services/serviceNeutralization/subProcessCommon.py 7 2 0 9
59 modules/services/serviceNeutralization/subProcessText.py 5 0 4 9
60 modules/interfaces/interfaceDbChatAccess.py 0 2 6 8
61 modules/security/auth.py 0 1 7 8
62 modules/aicore/aicorePluginAnthropic.py 0 0 7 7
63 modules/security/tokenRefreshMiddleware.py 0 2 4 6
64 modules/services/serviceGeneration/renderers/rendererJson.py 3 0 3 6
65 analyze_naming_violations.py 5 0 0 5
66 modules/aicore/aicorePluginOpenai.py 0 0 5 5
67 modules/routes/routeVoiceGoogle.py 0 0 5 5
68 modules/shared/eventManagement.py 2 3 0 5
69 modules/workflows/processing/adaptive/intentAnalyzer.py 0 0 5 5
70 modules/workflows/processing/shared/executionState.py 0 5 0 5
71 modules/services/serviceGeneration/subJsonSchema.py 0 0 5 5
72 modules/services/serviceNeutralization/subPatterns.py 5 0 0 5
73 modules/services/serviceNeutralization/subProcessBinary.py 4 0 1 5
74 modules/services/serviceExtraction/extractors/extractorXlsx.py 0 0 5 5
75 modules/interfaces/interfaceDbComponentObjects.py 0 3 1 4
76 modules/routes/routeDataNeutralization.py 0 0 4 4
77 modules/routes/routeWorkflows.py 0 0 4 4
78 modules/shared/timezoneUtils.py 3 1 0 4
79 modules/workflows/processing/adaptive/contentValidator.py 0 0 4 4
80 modules/workflows/processing/core/messageCreator.py 0 0 4 4
81 modules/services/serviceSharepoint/mainServiceSharepoint.py 0 0 4 4
82 modules/routes/routeDataUsers.py 0 0 3 3
83 modules/services/serviceExtraction/subPipeline.py 0 0 3 3
84 app.py 0 0 2 2
85 modules/datamodels/datamodelChat.py 0 1 1 2
86 modules/routes/routeAttributes.py 0 0 2 2
87 modules/routes/routeDataPrompts.py 0 0 2 2
88 modules/security/csrf.py 0 1 1 2
89 modules/security/jwtService.py 0 0 2 2
90 modules/workflows/processing/adaptive/learningEngine.py 0 0 2 2
91 modules/workflows/processing/modes/modeActionplan.py 0 0 2 2
92 modules/workflows/processing/shared/methodDiscovery.py 0 0 2 2
93 modules/services/serviceNormalization/mainServiceNormalization.py 0 0 2 2
94 modules/services/serviceExtraction/extractors/extractorImage.py 0 0 2 2
95 modules/aicore/aicoreBase.py 0 0 1 1
96 modules/aicore/aicoreModelSelector.py 0 0 1 1
97 modules/connectors/connectorTicketsClickup.py 0 0 1 1
98 modules/datamodels/datamodelDocument.py 0 1 0 1
99 modules/datamodels/datamodelSecurity.py 0 0 1 1
100 modules/routes/routeAdmin.py 0 0 1 1
101 modules/routes/routeDataFiles.py 0 0 1 1
102 modules/workflows/processing/workflowProcessor.py 0 0 1 1
103 modules/workflows/processing/adaptive/adaptiveLearningEngine.py 0 0 1 1
104 modules/workflows/processing/core/actionExecutor.py 0 0 1 1
105 modules/workflows/processing/core/taskPlanner.py 0 0 1 1
106 modules/workflows/processing/modes/modeBase.py 0 0 1 1
107 modules/services/serviceAi/subDocumentGeneration.py 0 0 1 1

View file

@ -0,0 +1,184 @@
# Analysis: `processDocumentsWithContinuation` and Subfunctions Usage
## Executive Summary
**FINDING**: The function `processDocumentsWithContinuation` in `subDocumentProcessing.py` is **NOT USED** anywhere in the active codebase. The continuation chain was only referenced by the deleted `subDocumentGeneration.py` module.
---
## Main Function: `processDocumentsWithContinuation`
**Location**: `gateway/modules/services/serviceAi/subDocumentProcessing.py:303`
**Status**: ❌ **NOT USED**
### Usage Search Results
- ❌ No actual code calls to `.processDocumentsWithContinuation(`
- ⚠️ Only mentioned in documentation files:
- `wiki/poweron/appdoc/doc_system_function_relationship_ai.md` (documentation)
- `gateway/callAiWithDocumentGeneration_usage_analysis.md` (previous analysis - noted it was called by deleted code)
### Why It's Not Used
The only caller was `subDocumentGeneration._processDocumentsUnified()` which we just deleted. The current active codebase uses `subCoreAi.callAiDocuments()` which has its own continuation logic via `_callAiWithLooping()`.
---
## Function Call Chain Analysis
```
processDocumentsWithContinuation (line 303) - ❌ NOT USED
├─> _buildContinuationPrompt (line 319, 324) - ❌ ONLY USED HERE
└─> _processWithContinuationLoop (line 322, 373) - ❌ ONLY USED HERE
├─> _buildContinuationIterationPrompt (line 393, 459) - ❌ ONLY USED HERE
└─> processDocumentsPerChunkJsonWithPrompt (line 402) - ✅ USED ELSEWHERE
```
---
## Subfunction Analysis
### 1. `_buildContinuationPrompt`
**Location**: Line 324-371
**Status**: ✅ **USED** (but only internally)
**Called by**: `processDocumentsWithContinuation` (line 319)
**Effectively**: ❌ **UNUSED** (because parent function is unused)
**Internal Usage**:
- Called from `processDocumentsWithContinuation` at line 319
**Functionality**:
- Builds a prompt with continuation instructions
- Adds JSON structure requirements with `"continue": true/false` flag
- Adds `continuation_context` field specification
**Note**: This uses a different continuation pattern than `SubCoreAi._callAiWithLooping()`:
- This uses `"continue": true/false + "continuation_context"` for document sections
- SubCoreAi uses `buildContinuationContext()` with `last_raw_json`
---
### 2. `_processWithContinuationLoop`
**Location**: Line 373-457
**Status**: ✅ **USED** (but only internally)
**Called by**: `processDocumentsWithContinuation` (line 322)
**Effectively**: ❌ **UNUSED** (because parent function is unused)
**Internal Usage**:
- Called from `processDocumentsWithContinuation` at line 322
**External Dependencies**:
- Calls `self._buildContinuationIterationPrompt()` (line 393)
- Calls `self.processDocumentsPerChunkJsonWithPrompt()` (line 402)
**Functionality**:
- Implements continuation loop (max 10 iterations)
- Accumulates sections across iterations
- Checks `continue` flag and `continuation_context` to determine if more iterations needed
- Builds final result with accumulated sections
---
### 3. `_buildContinuationIterationPrompt`
**Location**: Line 459-498
**Status**: ✅ **USED** (but only internally)
**Called by**: `_processWithContinuationLoop` (line 393)
**Effectively**: ❌ **UNUSED** (because parent chain is unused)
**Internal Usage**:
- Called from `_processWithContinuationLoop` at line 393 (in loop, conditionally)
**Functionality**:
- Builds a prompt for continuation iteration with context
- Includes summary of previously generated content (last 3 sections)
- Includes continuation instructions with last section ID, element index, remaining requirements
---
### 4. `processDocumentsPerChunkJsonWithPrompt`
**Location**: Line 219-301
**Status**: ✅ **USED ELSEWHERE**
**Called by**:
- `_processWithContinuationLoop` (line 402)
- Also referenced in backup files (not active code)
**Internal Usage**:
- Called from `_processWithContinuationLoop` at line 402
**External Usage Search**:
- ✅ Used internally by continuation loop
- ⚠️ Referenced in `local/backup/backup_mainServiceAi.py.txt` (backup file, not active)
- ❌ Not used by any other active code
**Functionality**:
- Processes documents with per-chunk AI calls
- Uses a custom prompt instead of default extraction prompt
- Returns merged JSON document
**Note**: This function itself is only used by the continuation loop. However, it's a more general function that could be useful, so it's not "dead code" - it's just currently only used by unused code.
---
## Summary Table
| Function | Line | Status | Called By | Effectively Used? |
|----------|------|--------|-----------|-------------------|
| `processDocumentsWithContinuation` | 303 | ❌ Not used | (external) | ❌ No |
| `_buildContinuationPrompt` | 324 | ✅ Used internally | `processDocumentsWithContinuation:319` | ❌ No |
| `_processWithContinuationLoop` | 373 | ✅ Used internally | `processDocumentsWithContinuation:322` | ❌ No |
| `_buildContinuationIterationPrompt` | 459 | ✅ Used internally | `_processWithContinuationLoop:393` | ❌ No |
| `processDocumentsPerChunkJsonWithPrompt` | 219 | ✅ Used internally | `_processWithContinuationLoop:402` | ⚠️ **ONLY USED BY UNUSED CODE** |
---
## Current Active Implementation
The active continuation logic is in `subCoreAi.callAiDocuments()``_callAiWithLooping()`:
- Uses `buildGenerationPrompt()` with `continuationContext` parameter
- Uses `buildContinuationContext()` to build context from sections
- Different continuation pattern (uses `last_raw_json` instead of `continuation_context`)
---
## Dead Code Identification
**Completely Unused Chain** (can be safely removed):
1. ✅ `processDocumentsWithContinuation` - entry point, not called
2. ✅ `_buildContinuationPrompt` - only used by #1
3. ✅ `_processWithContinuationLoop` - only used by #1
4. ✅ `_buildContinuationIterationPrompt` - only used by #3
**Potentially Unused** (only used by dead code):
- ⚠️ `processDocumentsPerChunkJsonWithPrompt` - only caller is dead code, but function is general-purpose
---
## Recommendations
1. **Remove Dead Code Chain**: All four functions (`processDocumentsWithContinuation`, `_buildContinuationPrompt`, `_processWithContinuationLoop`, `_buildContinuationIterationPrompt`) can be safely removed.
2. **For `processDocumentsPerChunkJsonWithPrompt`**:
- **Option A**: Remove if not needed (it's only used by the dead continuation chain)
- **Option B**: Keep if it might be useful for future custom prompt processing
- **Recommendation**: Since it's a general-purpose function that could be useful, keep it but note that it's currently unused.
3. **If Keeping**: Document why this continuation logic exists but is unused, or mark as deprecated/legacy alternative to `_callAiWithLooping()`.
---
## Verification Commands
To verify these findings:
```bash
# Search for actual function calls (should return no results for the main function)
grep -r "\.processDocumentsWithContinuation(" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup
# Search for _buildContinuationPrompt usage (should only find the definition)
grep -r "_buildContinuationPrompt" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
# Search for _processWithContinuationLoop usage (should only find the definition)
grep -r "_processWithContinuationLoop" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
```

View file

@ -39,7 +39,7 @@ else:
# Import encryption functions # Import encryption functions
try: try:
from modules.shared.configuration import encrypt_value from modules.shared.configuration import encryptValue
except ImportError as e: except ImportError as e:
print(f"Error: Could not import encryption functions from shared.configuration: {e}") print(f"Error: Could not import encryption functions from shared.configuration: {e}")
print(f"Make sure you're running this script from the gateway directory") print(f"Make sure you're running this script from the gateway directory")
@ -254,7 +254,7 @@ def encrypt_all_secrets_in_file(file_path: Path, dry_run: bool = False, create_b
print(f" 🔐 Encrypting {key}...") print(f" 🔐 Encrypting {key}...")
# Encrypt the value using the environment type from the file # Encrypt the value using the environment type from the file
encrypted_value = encrypt_value(value, file_env_type) encrypted_value = encryptValue(value, file_env_type)
# Replace the line in the file content # Replace the line in the file content
new_line = f"{key} = {encrypted_value}\n" new_line = f"{key} = {encrypted_value}\n"

View file

@ -30,7 +30,7 @@ from datetime import datetime
# Add the modules directory to the Python path # Add the modules directory to the Python path
sys.path.insert(0, str(Path(__file__).parent / 'modules')) sys.path.insert(0, str(Path(__file__).parent / 'modules'))
from shared.configuration import encrypt_value, decrypt_value, _is_encrypted_value from modules.shared.configuration import encryptValue, decryptValue, isEncryptedValue
def get_env_type_from_file(file_path: Path) -> str: def get_env_type_from_file(file_path: Path) -> str:
""" """
@ -247,7 +247,7 @@ def encrypt_all_secrets_in_file(file_path: Path, env_type: str, dry_run: bool =
print(f" 🔐 Encrypting {key}...") print(f" 🔐 Encrypting {key}...")
# Encrypt the value using the environment type from the file # Encrypt the value using the environment type from the file
encrypted_value = encrypt_value(value, file_env_type) encrypted_value = encryptValue(value, file_env_type)
# Replace the line in the file content # Replace the line in the file content
new_line = f"{key} = {encrypted_value}\n" new_line = f"{key} = {encrypted_value}\n"
@ -360,8 +360,8 @@ def main():
# Handle decryption # Handle decryption
if args.decrypt: if args.decrypt:
if _is_encrypted_value(args.decrypt): if isEncryptedValue(args.decrypt):
decrypted = decrypt_value(args.decrypt) decrypted = decryptValue(args.decrypt)
print(f"Decrypted value: {decrypted}") print(f"Decrypted value: {decrypted}")
else: else:
print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)") print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)")
@ -411,7 +411,7 @@ def main():
return return
# Encrypt the value # Encrypt the value
encrypted_value = encrypt_value(value_to_encrypt, args.env) encrypted_value = encryptValue(value_to_encrypt, args.env)
print(f"\n✓ Encryption successful!") print(f"\n✓ Encryption successful!")
print(f"Environment: {args.env or 'current'}") print(f"Environment: {args.env or 'current'}")