refactored whole codebase for camelCase part 1 of 2

This commit is contained in:
ValueOn AG 2025-10-31 00:05:39 +01:00
parent 26b2109844
commit c44fc92568
86 changed files with 3969 additions and 5005 deletions

View file

@ -0,0 +1,242 @@
"""
Script to analyze codebase for snake_case naming violations that should be camelStyle.
Excludes routes (decorated endpoint functions) and JSON field names.
"""
import ast
import os
import re
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple
import csv
# Patterns to exclude (external library interfaces, etc.)
EXCLUDE_PATTERNS = [
r'@.*\.(get|post|put|delete|patch|options|head)', # FastAPI route decorators
r'self\.(db|db_|model|orm)', # Database ORM attributes
r'\.(objects|query|filter|get|all)', # ORM methods
r'(request|response|response_model|status_code)', # FastAPI params
r'(snake_case|kebab-case)', # String literals
]
# External library attribute patterns (should not be changed)
EXTERNAL_LIB_ATTRIBUTES = {
'pydantic', 'fastapi', 'sqlalchemy', 'psycopg', 'requests',
'aiohttp', 'azure', 'google', 'openai', 'anthropic', 'reportlab',
'docx', 'pptx', 'openpyxl', 'json', 'logging', 'datetime', 'typing'
}
def isRouteFile(filePath: str) -> bool:
"""Check if file is a route file"""
return 'routes' in filePath or 'route' in os.path.basename(filePath).lower()
def shouldExcludeName(name: str, context: str = "") -> bool:
"""Check if a name should be excluded from analysis"""
# Skip if it's a builtin or external library attribute
if name.startswith('__') and name.endswith('__'):
return True
# Skip if context suggests external library usage
for pattern in EXCLUDE_PATTERNS:
if re.search(pattern, context, re.IGNORECASE):
return True
return False
def isSnakeCase(name: str) -> bool:
"""Check if a name is snake_case"""
if not name or name.startswith('_'):
return False
# Check if contains underscore and is not all caps
return '_' in name and not name.isupper()
def analyzeFile(filePath: str) -> Dict[str, List[str]]:
"""Analyze a Python file for naming violations"""
violations = {
'functions': [],
'parameters': [],
'variables': []
}
try:
with open(filePath, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content, filename=filePath)
except (SyntaxError, UnicodeDecodeError):
return violations
# Track current context
currentClass = None
inRouteDecorator = False
class NamingAnalyzer(ast.NodeVisitor):
def __init__(self):
self.violations = violations
self.currentClass = None
self.inRouteDecorator = False
self.functionDefs = []
def visit_FunctionDef(self, node):
# Check if this is a route endpoint (has FastAPI decorator)
isRouteEndpoint = False
for decorator in node.decorator_list:
if isinstance(decorator, ast.Attribute):
if decorator.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
isRouteEndpoint = True
break
elif isinstance(decorator, ast.Call):
if isinstance(decorator.func, ast.Attribute):
if decorator.func.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
isRouteEndpoint = True
break
# Skip route endpoint function names
# But we still need to check their parameters and variables
funcName = node.name
if not isRouteEndpoint and isSnakeCase(funcName) and not shouldExcludeName(funcName):
self.violations['functions'].append(f"{funcName} (line {node.lineno})")
# Analyze parameters
for arg in node.args.args:
if arg.arg != 'self' and arg.arg != 'cls':
paramName = arg.arg
if isSnakeCase(paramName) and not shouldExcludeName(paramName):
self.violations['parameters'].append(f"{paramName} in {funcName} (line {node.lineno})")
# Analyze function body for local variables
for stmt in node.body:
self.visit(stmt)
def visit_ClassDef(self, node):
oldClass = self.currentClass
self.currentClass = node.name
self.generic_visit(node)
self.currentClass = oldClass
def visit_Assign(self, node):
for target in node.targets:
if isinstance(target, ast.Name):
varName = target.id
# Skip constants (ALL_CAPS), builtins, and private (_xxx)
if varName.isupper() or varName.startswith('_'):
continue
# Local variables should be camelStyle
if isSnakeCase(varName) and not shouldExcludeName(varName):
self.violations['variables'].append(f"{varName} (line {node.lineno})")
def visit_For(self, node):
if isinstance(node.target, ast.Name):
varName = node.target.id
if isSnakeCase(varName) and not shouldExcludeName(varName):
self.violations['variables'].append(f"{varName} (line {node.lineno})")
self.generic_visit(node)
def visit_With(self, node):
if node.items:
for item in node.items:
if item.optional_vars:
if isinstance(item.optional_vars, ast.Name):
varName = item.optional_vars.id
if isSnakeCase(varName) and not shouldExcludeName(varName):
self.violations['variables'].append(f"{varName} (line {node.lineno})")
self.generic_visit(node)
analyzer = NamingAnalyzer()
analyzer.visit(tree)
return violations
def analyzeCodebase(rootDir: str = 'gateway') -> Dict[str, Dict[str, int]]:
"""Analyze entire codebase"""
results = defaultdict(lambda: {
'functions': 0,
'parameters': 0,
'variables': 0,
'details': {
'functions': [],
'parameters': [],
'variables': []
}
})
# Handle both absolute and relative paths
rootPath = Path(rootDir)
if not rootPath.exists():
# Try relative to current directory
rootPath = Path('.').resolve() / rootDir
if not rootPath.exists():
# Try just current directory if we're already in gateway
rootPath = Path('.')
# Find all Python files
for pyFile in rootPath.rglob('*.py'):
# Skip route files for function name analysis (but analyze their internals)
filePath = str(pyFile.relative_to(rootPath))
# Skip test files and special scripts
if 'test' in filePath.lower() or 'tool_' in filePath or '__pycache__' in filePath:
continue
violations = analyzeFile(str(pyFile))
# Check if there are any violations
totalViolations = len(violations['functions']) + len(violations['parameters']) + len(violations['variables'])
if totalViolations > 0:
moduleName = filePath.replace('\\', '/')
results[moduleName]['functions'] = len(violations['functions'])
results[moduleName]['parameters'] = len(violations['parameters'])
results[moduleName]['variables'] = len(violations['variables'])
results[moduleName]['details'] = violations
return results
def generateCSV(results: Dict[str, Dict[str, int]], outputFile: str = 'naming_violations.csv'):
"""Generate CSV report"""
with open(outputFile, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Module', 'Function Names', 'Parameter Names', 'Variable Names', 'Total'])
# Sort by total violations
sortedResults = sorted(
results.items(),
key=lambda x: x[1]['functions'] + x[1]['parameters'] + x[1]['variables'],
reverse=True
)
rowsWritten = 0
for module, stats in sortedResults:
total = stats['functions'] + stats['parameters'] + stats['variables']
if total > 0:
writer.writerow([
module,
stats['functions'],
stats['parameters'],
stats['variables'],
total
])
rowsWritten += 1
if rowsWritten == 0:
print("WARNING: No rows written to CSV despite finding violations!")
print(f"CSV report generated: {outputFile}")
print(f"Total modules analyzed: {len(results)}")
# Print summary
totalFuncs = sum(r['functions'] for r in results.values())
totalParams = sum(r['parameters'] for r in results.values())
totalVars = sum(r['variables'] for r in results.values())
print(f"\nSummary:")
print(f" Function names: {totalFuncs}")
print(f" Parameter names: {totalParams}")
print(f" Variable names: {totalVars}")
print(f" Total violations: {totalFuncs + totalParams + totalVars}")
if __name__ == '__main__':
print("Analyzing codebase for naming violations...")
results = analyzeCodebase('gateway')
# Write CSV to gateway directory
outputPath = Path('gateway') / 'naming_violations_report.csv'
generateCSV(results, str(outputPath))

62
app.py
View file

@ -24,45 +24,45 @@ class DailyRotatingFileHandler(RotatingFileHandler):
"""
def __init__(
self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs
self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs
):
self.log_dir = log_dir
self.filename_prefix = filename_prefix
self.current_date = None
self.current_file = None
self.logDir = logDir
self.filenamePrefix = filenamePrefix
self.currentDate = None
self.currentFile = None
# Initialize with today's file
self._update_file_if_needed()
self._updateFileIfNeeded()
# Call parent constructor with current file
super().__init__(
self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs
self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs
)
def _update_file_if_needed(self):
def _updateFileIfNeeded(self):
"""Update the log file if the date has changed"""
today = datetime.now().strftime("%Y%m%d")
if self.current_date != today:
self.current_date = today
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
if self.currentDate != today:
self.currentDate = today
newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
if self.current_file != new_file:
self.current_file = new_file
if self.currentFile != newFile:
self.currentFile = newFile
return True
return False
def emit(self, record):
"""Emit a log record, switching files if date has changed"""
# Check if we need to switch to a new file
if self._update_file_if_needed():
if self._updateFileIfNeeded():
# Close current file and open new one
if self.stream:
self.stream.close()
self.stream = None
# Update the baseFilename for the parent class
self.baseFilename = self.current_file
self.baseFilename = self.currentFile
# Reopen the stream
if not self.delay:
self.stream = self._open()
@ -200,10 +200,10 @@ def initLogging():
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
fileHandler = DailyRotatingFileHandler(
log_dir=logDir,
filename_prefix="log_app",
max_bytes=rotationSize,
backup_count=backupCount,
logDir=logDir,
filenamePrefix="log_app",
maxBytes=rotationSize,
backupCount=backupCount,
encoding="utf-8",
)
fileHandler.setFormatter(fileFormatter)
@ -252,7 +252,7 @@ def initLogging():
)
def make_sqlalchemy_db_url() -> str:
def makeSqlalchemyDbUrl() -> str:
host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost")
port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432")
db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway")
@ -299,17 +299,17 @@ app = FastAPI(
# Configure OpenAPI security scheme for Swagger UI
# This adds the "Authorize" button to the /docs page
security_scheme = HTTPBearer()
securityScheme = HTTPBearer()
app.openapi_schema = None # Reset schema to regenerate with security
def custom_openapi():
def customOpenapi():
if app.openapi_schema:
return app.openapi_schema
from fastapi.openapi.utils import get_openapi
openapi_schema = get_openapi(
openapiSchema = get_openapi(
title=app.title,
version="1.0.0",
description=app.description,
@ -317,7 +317,7 @@ def custom_openapi():
)
# Add security scheme definition
openapi_schema["components"]["securitySchemes"] = {
openapiSchema["components"]["securitySchemes"] = {
"BearerAuth": {
"type": "http",
"scheme": "bearer",
@ -328,20 +328,20 @@ def custom_openapi():
# Apply security globally to all endpoints
# Individual endpoints can override this if needed
openapi_schema["security"] = [{"BearerAuth": []}]
openapiSchema["security"] = [{"BearerAuth": []}]
app.openapi_schema = openapi_schema
app.openapi_schema = openapiSchema
return app.openapi_schema
app.openapi = custom_openapi
app.openapi = customOpenapi
# Parse CORS origins from environment variable
def get_allowed_origins():
origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
def getAllowedOrigins():
originsStr = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
# Split by comma and strip whitespace
origins = [origin.strip() for origin in origins_str.split(",")]
origins = [origin.strip() for origin in originsStr.split(",")]
logger.info(f"CORS allowed origins: {origins}")
return origins
@ -349,7 +349,7 @@ def get_allowed_origins():
# CORS configuration using environment variables
app.add_middleware(
CORSMiddleware,
allow_origins=get_allowed_origins(),
allow_origins=getAllowedOrigins(),
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allow_headers=["*"],

View file

@ -7,7 +7,7 @@ from pydantic import BaseModel
import threading
import time
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
@ -232,7 +232,7 @@ class DatabaseConnector:
raise ValueError(f"Record ID mismatch: file name ID ({recordId}) does not match record ID ({record['id']})")
# Add metadata
currentTime = get_utc_timestamp()
currentTime = getUtcTimestamp()
if "_createdAt" not in record:
record["_createdAt"] = currentTime
record["_createdBy"] = self.userId

View file

@ -6,7 +6,7 @@ import uuid
from pydantic import BaseModel, Field
import threading
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
@ -287,7 +287,7 @@ class DatabaseConnector:
INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt")
VALUES (%s, %s, %s)
""",
(table_name, initial_id, get_utc_timestamp()),
(table_name, initial_id, getUtcTimestamp()),
)
self.connection.commit()
@ -611,7 +611,7 @@ class DatabaseConnector:
raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}")
# Add metadata
currentTime = get_utc_timestamp()
currentTime = getUtcTimestamp()
if "_createdAt" not in record:
record["_createdAt"] = currentTime
record["_createdBy"] = self.userId

View file

@ -32,7 +32,7 @@ class ConnectorTicketClickup(TicketBase):
"Content-Type": "application/json",
}
async def read_attributes(self) -> list[TicketFieldAttribute]:
async def readAttributes(self) -> list[TicketFieldAttribute]:
"""Fetch field attributes. Uses list custom fields if listId provided; else basic fields."""
attributes: list[TicketFieldAttribute] = []
try:
@ -65,7 +65,7 @@ class ConnectorTicketClickup(TicketBase):
logger.error(f"ClickUp read_attributes error: {e}")
return attributes
async def read_tasks(self, *, limit: int = 0) -> list[dict]:
async def readTasks(self, *, limit: int = 0) -> list[dict]:
"""Read tasks from ClickUp, always returning full task records.
If list_id is set, read from that list; otherwise read from team.
"""
@ -102,7 +102,7 @@ class ConnectorTicketClickup(TicketBase):
logger.error(f"ClickUp read_tasks error: {e}")
return tasks
async def write_tasks(self, tasklist: list[dict]) -> None:
async def writeTasks(self, tasklist: list[dict]) -> None:
"""Update tasks in ClickUp. Expects each item to contain {'ID' or 'id' or 'task_id', 'fields': {...}}"""
try:
async with aiohttp.ClientSession() as session:

View file

@ -29,7 +29,7 @@ class ConnectorTicketJira(TicketBase):
self.ticketType = ticketType
async def read_attributes(self) -> list[TicketFieldAttribute]:
async def readAttributes(self) -> list[TicketFieldAttribute]:
"""
Read field attributes from Jira by querying for a single issue
and extracting the field mappings.
@ -130,7 +130,7 @@ class ConnectorTicketJira(TicketBase):
logger.error(f"Error while calling fields API: {str(e)}")
return []
async def read_tasks(self, *, limit: int = 0) -> list[dict]:
async def readTasks(self, *, limit: int = 0) -> list[dict]:
"""
Read tasks from Jira with pagination support.
@ -253,7 +253,7 @@ class ConnectorTicketJira(TicketBase):
logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}")
raise
async def write_tasks(self, tasklist: list[dict]) -> None:
async def writeTasks(self, tasklist: list[dict]) -> None:
"""
Write/update tasks to Jira.

View file

@ -26,18 +26,18 @@ class ConnectorGoogleSpeech:
"""
try:
# Get JSON key from config.ini
api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
if not apiKey or apiKey == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key")
# Parse the JSON key and set up authentication
try:
credentials_info = json.loads(api_key)
credentialsInfo = json.loads(apiKey)
# Create credentials object directly (no file needed!)
from google.oauth2 import service_account
credentials = service_account.Credentials.from_service_account_info(credentials_info)
credentials = service_account.Credentials.from_service_account_info(credentialsInfo)
logger.info("✅ Using Google Speech credentials from config.ini")
@ -55,8 +55,8 @@ class ConnectorGoogleSpeech:
logger.error(f"❌ Failed to initialize Google Cloud clients: {e}")
raise
async def speech_to_text(self, audio_content: bytes, language: str = "de-DE",
sample_rate: int = None, channels: int = None) -> Dict:
async def speech_to_text(self, audioContent: bytes, language: str = "de-DE",
sampleRate: int = None, channels: int = None) -> Dict:
"""
Convert speech to text using Google Cloud Speech-to-Text API.
@ -71,8 +71,8 @@ class ConnectorGoogleSpeech:
"""
try:
# Auto-detect audio format if not provided
if sample_rate is None or channels is None:
validation = self.validate_audio_format(audio_content)
if sampleRate is None or channels is None:
validation = self.validate_audio_format(audioContent)
if not validation["valid"]:
return {
"success": False,
@ -80,59 +80,59 @@ class ConnectorGoogleSpeech:
"confidence": 0.0,
"error": f"Invalid audio format: {validation.get('error', 'Unknown error')}"
}
sample_rate = validation["sample_rate"]
sampleRate = validation["sample_rate"]
channels = validation["channels"]
audio_format = validation["format"]
logger.info(f"Auto-detected audio: {audio_format}, {sample_rate}Hz, {channels}ch")
audioFormat = validation["format"]
logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch")
logger.info(f"Processing audio with Google Cloud Speech-to-Text")
logger.info(f"Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch")
logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch")
# Configure audio settings
audio = speech.RecognitionAudio(content=audio_content)
audio = speech.RecognitionAudio(content=audioContent)
# Determine encoding based on detected format
# Google Cloud Speech API has specific requirements for different formats
if audio_format == "webm_opus":
if audioFormat == "webm_opus":
# For WEBM OPUS, we need to ensure proper format
encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
# WEBM_OPUS requires specific sample rate handling - must match header
if sample_rate != 48000:
logger.warning(f"WEBM_OPUS detected but sample rate is {sample_rate}, adjusting to 48000")
sample_rate = 48000
if sampleRate != 48000:
logger.warning(f"WEBM_OPUS detected but sample rate is {sampleRate}, adjusting to 48000")
sampleRate = 48000
# For WEBM_OPUS, don't specify sample_rate_hertz in config
# Google Cloud will read it from the WEBM header
use_sample_rate = False
elif audio_format == "linear16":
useSampleRate = False
elif audioFormat == "linear16":
# For LINEAR16 format (PCM)
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
# Ensure sample rate is reasonable
if sample_rate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
logger.warning(f"Unusual sample rate {sample_rate}, adjusting to 16000")
sample_rate = 16000
use_sample_rate = True
elif audio_format == "mp3":
if sampleRate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
logger.warning(f"Unusual sample rate {sampleRate}, adjusting to 16000")
sampleRate = 16000
useSampleRate = True
elif audioFormat == "mp3":
# For MP3 format
encoding = speech.RecognitionConfig.AudioEncoding.MP3
use_sample_rate = True
elif audio_format == "flac":
useSampleRate = True
elif audioFormat == "flac":
# For FLAC format
encoding = speech.RecognitionConfig.AudioEncoding.FLAC
use_sample_rate = True
elif audio_format == "wav":
useSampleRate = True
elif audioFormat == "wav":
# For WAV format
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
use_sample_rate = True
useSampleRate = True
else:
# For unknown formats, try LINEAR16 as fallback
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
sample_rate = 16000 # Use standard sample rate
sampleRate = 16000 # Use standard sample rate
channels = 1 # Use mono
use_sample_rate = True
logger.warning(f"Unknown audio format '{audio_format}', using LINEAR16 encoding with 16000Hz")
useSampleRate = True
logger.warning(f"Unknown audio format '{audioFormat}', using LINEAR16 encoding with 16000Hz")
# Build config based on format requirements
config_params = {
configParams = {
"encoding": encoding,
"audio_channel_count": channels,
"language_code": language,
@ -145,13 +145,13 @@ class ConnectorGoogleSpeech:
}
# Only add sample_rate_hertz if needed (not for WEBM_OPUS)
if use_sample_rate:
config_params["sample_rate_hertz"] = sample_rate
logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sample_rate}, channels={channels}, language={language}")
if useSampleRate:
configParams["sample_rate_hertz"] = sampleRate
logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sampleRate}, channels={channels}, language={language}")
else:
logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}")
config = speech.RecognitionConfig(**config_params)
config = speech.RecognitionConfig(**configParams)
# Perform speech recognition
logger.info("Sending audio to Google Cloud Speech-to-Text...")
@ -162,12 +162,12 @@ class ConnectorGoogleSpeech:
response = self.speech_client.recognize(config=config, audio=audio)
logger.debug(f"Google Cloud response: {response}")
except Exception as api_error:
logger.error(f"Google Cloud API error: {api_error}")
except Exception as apiError:
logger.error(f"Google Cloud API error: {apiError}")
# Try with different encoding as fallback
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
logger.info("Trying fallback with LINEAR16 encoding...")
fallback_config = speech.RecognitionConfig(
fallbackConfig = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000, # Use standard sample rate
audio_channel_count=1,
@ -177,13 +177,13 @@ class ConnectorGoogleSpeech:
)
try:
response = self.speech_client.recognize(config=fallback_config, audio=audio)
response = self.speech_client.recognize(config=fallbackConfig, audio=audio)
logger.debug(f"Google Cloud fallback response: {response}")
except Exception as fallback_error:
logger.error(f"Google Cloud fallback error: {fallback_error}")
raise api_error
except Exception as fallbackError:
logger.error(f"Google Cloud fallback error: {fallbackError}")
raise apiError
else:
raise api_error
raise apiError
# Process results
if response.results:
@ -234,18 +234,18 @@ class ConnectorGoogleSpeech:
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
# For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts
if audio_format != "webm_opus":
if audioFormat != "webm_opus":
# Try LINEAR16 with detected sample rate for non-WEBM formats
fallback_configs.append({
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
"sample_rate": sample_rate,
"sample_rate": sampleRate,
"channels": channels,
"use_sample_rate": True,
"description": f"LINEAR16 with {sample_rate}Hz"
"description": f"LINEAR16 with {sampleRate}Hz"
})
# For WEBM_OPUS, only try compatible sample rates or skip sample rate specification
if audio_format == "webm_opus":
if audioFormat == "webm_opus":
# Try WEBM_OPUS without sample rate specification (let Google read from header)
fallback_configs.append({
"encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
@ -273,7 +273,7 @@ class ConnectorGoogleSpeech:
else:
# For other formats, try standard sample rates
for std_rate in [16000, 8000, 22050, 44100]:
if std_rate != sample_rate:
if std_rate != sampleRate:
fallback_configs.append({
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
"sample_rate": std_rate,

View file

@ -3,8 +3,8 @@
from typing import List, Dict, Any, Optional
from enum import Enum
from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import getUtcTimestamp
import uuid
@ -26,7 +26,7 @@ class ChatStat(BaseModel):
priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation")
register_model_labels(
registerModelLabels(
"ChatStat",
{"en": "Chat Statistics", "fr": "Statistiques de chat"},
{
@ -51,7 +51,7 @@ class ChatLog(BaseModel):
message: str = Field(description="Log message")
type: str = Field(description="Log type (info, warning, error, etc.)")
timestamp: float = Field(
default_factory=get_utc_timestamp,
default_factory=getUtcTimestamp,
description="When the log entry was created (UTC timestamp in seconds)",
)
status: Optional[str] = Field(None, description="Status of the log entry")
@ -63,7 +63,7 @@ class ChatLog(BaseModel):
)
register_model_labels(
registerModelLabels(
"ChatLog",
{"en": "Chat Log", "fr": "Journal de chat"},
{
@ -96,7 +96,7 @@ class ChatDocument(BaseModel):
)
register_model_labels(
registerModelLabels(
"ChatDocument",
{"en": "Chat Document", "fr": "Document de chat"},
{
@ -133,7 +133,7 @@ class ContentMetadata(BaseModel):
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
register_model_labels(
registerModelLabels(
"ContentMetadata",
{"en": "Content Metadata", "fr": "Métadonnées du contenu"},
{
@ -157,7 +157,7 @@ class ContentItem(BaseModel):
metadata: ContentMetadata = Field(description="Content metadata")
register_model_labels(
registerModelLabels(
"ContentItem",
{"en": "Content Item", "fr": "Élément de contenu"},
{
@ -175,7 +175,7 @@ class ChatContentExtracted(BaseModel):
)
register_model_labels(
registerModelLabels(
"ChatContentExtracted",
{"en": "Extracted Content", "fr": "Contenu extrait"},
{
@ -209,7 +209,7 @@ class ChatMessage(BaseModel):
description="Sequence number of the message (set automatically)"
)
publishedAt: float = Field(
default_factory=get_utc_timestamp,
default_factory=getUtcTimestamp,
description="When the message was published (UTC timestamp in seconds)",
)
success: Optional[bool] = Field(
@ -235,7 +235,7 @@ class ChatMessage(BaseModel):
)
register_model_labels(
registerModelLabels(
"ChatMessage",
{"en": "Chat Message", "fr": "Message de chat"},
{
@ -331,14 +331,14 @@ class ChatWorkflow(BaseModel):
frontend_required=False,
)
lastActivity: float = Field(
default_factory=get_utc_timestamp,
default_factory=getUtcTimestamp,
description="Timestamp of last activity (UTC timestamp in seconds)",
frontend_type="timestamp",
frontend_readonly=True,
frontend_required=False,
)
startedAt: float = Field(
default_factory=get_utc_timestamp,
default_factory=getUtcTimestamp,
description="When the workflow started (UTC timestamp in seconds)",
frontend_type="timestamp",
frontend_readonly=True,
@ -395,7 +395,7 @@ class ChatWorkflow(BaseModel):
)
register_model_labels(
registerModelLabels(
"ChatWorkflow",
{"en": "Chat Workflow", "fr": "Flux de travail de chat"},
{
@ -426,7 +426,7 @@ class UserInputRequest(BaseModel):
userLanguage: str = Field(default="en", description="User's preferred language")
register_model_labels(
registerModelLabels(
"UserInputRequest",
{"en": "User Input Request", "fr": "Demande de saisie utilisateur"},
{
@ -445,7 +445,7 @@ class ActionDocument(BaseModel):
mimeType: str = Field(description="MIME type of the document")
register_model_labels(
registerModelLabels(
"ActionDocument",
{"en": "Action Document", "fr": "Document d'action"},
{
@ -485,7 +485,7 @@ class ActionResult(BaseModel):
return cls(success=False, documents=documents or [], error=error)
register_model_labels(
registerModelLabels(
"ActionResult",
{"en": "Action Result", "fr": "Résultat de l'action"},
{
@ -504,7 +504,7 @@ class ActionSelection(BaseModel):
)
register_model_labels(
registerModelLabels(
"ActionSelection",
{"en": "Action Selection", "fr": "Sélection d'action"},
{
@ -520,7 +520,7 @@ class ActionParameters(BaseModel):
)
register_model_labels(
registerModelLabels(
"ActionParameters",
{"en": "Action Parameters", "fr": "Paramètres d'action"},
{
@ -535,7 +535,7 @@ class ObservationPreview(BaseModel):
snippet: str = Field(description="Short snippet or summary")
register_model_labels(
registerModelLabels(
"ObservationPreview",
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
{
@ -558,7 +558,7 @@ class Observation(BaseModel):
)
register_model_labels(
registerModelLabels(
"Observation",
{"en": "Observation", "fr": "Observation"},
{
@ -579,7 +579,7 @@ class TaskStatus(str, Enum):
CANCELLED = "cancelled"
register_model_labels(
registerModelLabels(
"TaskStatus",
{"en": "Task Status", "fr": "Statut de la tâche"},
{
@ -599,7 +599,7 @@ class DocumentExchange(BaseModel):
)
register_model_labels(
registerModelLabels(
"DocumentExchange",
{"en": "Document Exchange", "fr": "Échange de documents"},
{
@ -650,7 +650,7 @@ class ActionItem(BaseModel):
self.error = error_message
register_model_labels(
registerModelLabels(
"ActionItem",
{"en": "Task Action", "fr": "Action de tâche"},
{
@ -683,7 +683,7 @@ class TaskResult(BaseModel):
error: Optional[str] = Field(None, description="Error message if task failed")
register_model_labels(
registerModelLabels(
"TaskResult",
{"en": "Task Result", "fr": "Résultat de tâche"},
{
@ -728,7 +728,7 @@ class TaskItem(BaseModel):
)
register_model_labels(
registerModelLabels(
"TaskItem",
{"en": "Task", "fr": "Tâche"},
{
@ -758,7 +758,7 @@ class TaskStep(BaseModel):
)
register_model_labels(
registerModelLabels(
"TaskStep",
{"en": "Task Step", "fr": "Étape de tâche"},
{
@ -805,7 +805,7 @@ class TaskHandover(BaseModel):
)
register_model_labels(
registerModelLabels(
"TaskHandover",
{"en": "Task Handover", "fr": "Transfert de tâche"},
{
@ -879,7 +879,7 @@ class ReviewResult(BaseModel):
)
register_model_labels(
registerModelLabels(
"ReviewResult",
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
{
@ -904,7 +904,7 @@ class TaskPlan(BaseModel):
)
register_model_labels(
registerModelLabels(
"TaskPlan",
{"en": "Task Plan", "fr": "Plan de tâches"},
{
@ -927,7 +927,7 @@ class PromptPlaceholder(BaseModel):
)
register_model_labels(
registerModelLabels(
"PromptPlaceholder",
{"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"},
{
@ -943,7 +943,7 @@ class PromptBundle(BaseModel):
placeholders: List[PromptPlaceholder] = Field(default_factory=list)
register_model_labels(
registerModelLabels(
"PromptBundle",
{"en": "Prompt Bundle", "fr": "Lot d'invite"},
{

View file

@ -81,11 +81,11 @@ class StructuredDocument(BaseModel):
summary: Optional[str] = Field(default=None, description="Document summary")
tags: List[str] = Field(default_factory=list, description="Document tags")
def get_sections_by_type(self, content_type: str) -> List[DocumentSection]:
def getSectionsByType(self, content_type: str) -> List[DocumentSection]:
"""Get all sections of a specific content type."""
return [section for section in self.sections if section.content_type == content_type]
def get_all_tables(self) -> List[TableData]:
def getAllTables(self) -> List[TableData]:
"""Get all table data from the document."""
tables = []
for section in self.sections:
@ -94,7 +94,7 @@ class StructuredDocument(BaseModel):
tables.append(element)
return tables
def get_all_lists(self) -> List[BulletList]:
def getAllLists(self) -> List[BulletList]:
"""Get all lists from the document."""
lists = []
for section in self.sections:

View file

@ -2,8 +2,8 @@
from typing import Dict, Any, Optional, Union
from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import getUtcTimestamp
import uuid
import base64
@ -15,9 +15,9 @@ class FileItem(BaseModel):
mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False)
creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
register_model_labels(
registerModelLabels(
"FileItem",
{"en": "File Item", "fr": "Élément de fichier"},
{
@ -45,7 +45,7 @@ class FilePreview(BaseModel):
if isinstance(data.get("content"), bytes):
data["content"] = base64.b64encode(data["content"]).decode("utf-8")
return data
register_model_labels(
registerModelLabels(
"FilePreview",
{"en": "File Preview", "fr": "Aperçu du fichier"},
{
@ -62,7 +62,7 @@ class FileData(BaseModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
data: str = Field(description="File data content")
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
register_model_labels(
registerModelLabels(
"FileData",
{"en": "File Data", "fr": "Données de fichier"},
{

View file

@ -0,0 +1,90 @@
"""
Unified JSON document schema and helpers used by both generation prompts and renderers.
This defines a single canonical template and the supported section types.
"""
from typing import List
# Canonical list of supported section types across the system
supportedSectionTypes: List[str] = [
"table",
"bullet_list",
"heading",
"paragraph",
"code_block",
"image",
]
# Canonical JSON template used for AI generation (documents array + sections)
# Rendering pipelines can select the first document and read its sections.
jsonTemplateDocument: str = """{
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": [
{
"id": "doc_1",
"title": "{{DOCUMENT_TITLE}}",
"filename": "document.json",
"sections": [
{
"id": "section_heading_example",
"content_type": "heading",
"elements": [
{"level": 1, "text": "Heading Text"}
],
"order": 0
},
{
"id": "section_paragraph_example",
"content_type": "paragraph",
"elements": [
{"text": "Paragraph text content"}
],
"order": 0
},
{
"id": "section_bullet_list_example",
"content_type": "bullet_list",
"elements": [
{
"items": ["Item 1", "Item 2"]
}
],
"order": 0
},
{
"id": "section_table_example",
"content_type": "table",
"elements": [
{
"headers": ["Column 1", "Column 2"],
"rows": [
["Row 1 Col 1", "Row 1 Col 2"],
["Row 2 Col 1", "Row 2 Col 2"]
],
"caption": "Table caption"
}
],
"order": 0
},
{
"id": "section_code_example",
"content_type": "code_block",
"elements": [
{
"code": "function example() { return true; }",
"language": "javascript"
}
],
"order": 0
}
]
}
]
}"""

View file

@ -3,7 +3,7 @@
import uuid
from typing import Optional
from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels
from modules.shared.attributeUtils import registerModelLabels
class DataNeutraliserConfig(BaseModel):
@ -14,7 +14,7 @@ class DataNeutraliserConfig(BaseModel):
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
register_model_labels(
registerModelLabels(
"DataNeutraliserConfig",
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
{
@ -35,7 +35,7 @@ class DataNeutralizerAttributes(BaseModel):
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
register_model_labels(
registerModelLabels(
"DataNeutralizerAttributes",
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
{

View file

@ -2,8 +2,8 @@
from typing import Optional
from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import getUtcTimestamp
from .datamodelUam import AuthAuthority
from enum import Enum
import uuid
@ -51,7 +51,7 @@ class Token(BaseModel):
use_enum_values = True
register_model_labels(
registerModelLabels(
"Token",
{"en": "Token", "fr": "Jeton"},
{
@ -95,7 +95,7 @@ class AuthEvent(BaseModel):
frontend_required=True,
)
timestamp: float = Field(
default_factory=get_utc_timestamp,
default_factory=getUtcTimestamp,
description="Unix timestamp when the event occurred",
frontend_type="datetime",
frontend_readonly=True,
@ -131,7 +131,7 @@ class AuthEvent(BaseModel):
)
register_model_labels(
registerModelLabels(
"AuthEvent",
{"en": "Authentication Event", "fr": "Événement d'authentification"},
{

View file

@ -11,12 +11,12 @@ class TicketFieldAttribute(BaseModel):
class TicketBase(ABC):
@abstractmethod
async def read_attributes(self) -> list[TicketFieldAttribute]: ...
async def readAttributes(self) -> list[TicketFieldAttribute]: ...
@abstractmethod
async def read_tasks(self, *, limit: int = 0) -> list[dict]: ...
async def readTasks(self, *, limit: int = 0) -> list[dict]: ...
@abstractmethod
async def write_tasks(self, tasklist: list[dict]) -> None: ...
async def writeTasks(self, tasklist: list[dict]) -> None: ...

View file

@ -4,8 +4,8 @@ import uuid
from typing import Optional
from enum import Enum
from pydantic import BaseModel, Field, EmailStr
from modules.shared.attributeUtils import register_model_labels
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import getUtcTimestamp
class AuthAuthority(str, Enum):
@ -34,7 +34,7 @@ class Mandate(BaseModel):
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
])
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
register_model_labels(
registerModelLabels(
"Mandate",
{"en": "Mandate", "fr": "Mandat"},
{
@ -62,8 +62,8 @@ class UserConnection(BaseModel):
{"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}},
{"value": "pending", "label": {"en": "Pending", "fr": "En attente"}},
])
connectedAt: float = Field(default_factory=get_utc_timestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
lastChecked: float = Field(default_factory=get_utc_timestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
connectedAt: float = Field(default_factory=getUtcTimestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
lastChecked: float = Field(default_factory=getUtcTimestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[
{"value": "active", "label": {"en": "Active", "fr": "Actif"}},
@ -71,7 +71,7 @@ class UserConnection(BaseModel):
{"value": "none", "label": {"en": "None", "fr": "Aucun"}},
])
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
register_model_labels(
registerModelLabels(
"UserConnection",
{"en": "User Connection", "fr": "Connexion utilisateur"},
{
@ -113,7 +113,7 @@ class User(BaseModel):
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
])
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
register_model_labels(
registerModelLabels(
"User",
{"en": "User", "fr": "Utilisateur"},
{
@ -131,7 +131,7 @@ register_model_labels(
class UserInDB(User):
hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
register_model_labels(
registerModelLabels(
"UserInDB",
{"en": "User Access", "fr": "Accès de l'utilisateur"},
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},

View file

@ -1,7 +1,7 @@
"""Utility datamodels: Prompt."""
from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels
from modules.shared.attributeUtils import registerModelLabels
import uuid
@ -10,7 +10,7 @@ class Prompt(BaseModel):
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
register_model_labels(
registerModelLabels(
"Prompt",
{"en": "Prompt", "fr": "Invite"},
{

View file

@ -1,8 +1,8 @@
"""Voice settings datamodel."""
from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.attributeUtils import registerModelLabels
from modules.shared.timezoneUtils import getUtcTimestamp
import uuid
@ -15,11 +15,11 @@ class VoiceSettings(BaseModel):
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True)
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False)
creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
lastModified: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
lastModified: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
register_model_labels(
registerModelLabels(
"VoiceSettings",
{"en": "Voice Settings", "fr": "Paramètres vocaux"},
{

View file

@ -43,10 +43,6 @@ class NeutralizationPlayground:
'errors': errors,
}
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
processor = SharepointProcessor(self.currentUser, self.services)
return await processor.processSharepointFiles(sourcePath, targetPath)
# Cleanup attributes
def cleanAttributes(self, fileId: str) -> bool:
@ -77,49 +73,51 @@ class NeutralizationPlayground:
}
# Additional methods needed by the route
def get_config(self) -> Optional[DataNeutraliserConfig]:
def getConfig(self) -> Optional[DataNeutraliserConfig]:
"""Get neutralization configuration"""
return self.services.neutralization.getConfig()
def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
def saveConfig(self, configData: Dict[str, Any]) -> DataNeutraliserConfig:
"""Save neutralization configuration"""
return self.services.neutralization.saveConfig(config_data)
return self.services.neutralization.saveConfig(configData)
def neutralize_text(self, text: str, file_id: str = None) -> Dict[str, Any]:
def neutralizeText(self, text: str, fileId: str = None) -> Dict[str, Any]:
"""Neutralize text content"""
return self.services.neutralization.processText(text)
def resolve_text(self, text: str) -> str:
def resolveText(self, text: str) -> str:
"""Resolve UIDs in neutralized text back to original text"""
return self.services.neutralization.resolveText(text)
def get_attributes(self, file_id: str = None) -> List[DataNeutralizerAttributes]:
def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]:
"""Get neutralization attributes, optionally filtered by file ID"""
try:
all_attributes = self.services.neutralization.getAttributes()
if file_id:
return [attr for attr in all_attributes if attr.fileId == file_id]
return all_attributes
allAttributes = self.services.neutralization.getAttributes()
if fileId:
return [attr for attr in allAttributes if attr.fileId == fileId]
return allAttributes
except Exception as e:
logger.error(f"Error getting attributes: {str(e)}")
return []
async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]:
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
"""Process files from SharePoint source path and store neutralized files in target path"""
return await self.processSharepointFiles(source_path, target_path)
from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
processor = SharepointProcessor(self.currentUser, self.services)
return await processor.processSharepointFiles(sourcePath, targetPath)
def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]:
def batchNeutralizeFiles(self, filesData: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Process multiple files for neutralization"""
file_ids = [file_data.get('fileId') for file_data in files_data if file_data.get('fileId')]
return self.processFiles(file_ids)
fileIds = [fileData.get('fileId') for fileData in filesData if fileData.get('fileId')]
return self.processFiles(fileIds)
def get_processing_stats(self) -> Dict[str, Any]:
def getProcessingStats(self) -> Dict[str, Any]:
"""Get neutralization processing statistics"""
return self.getStats()
def cleanup_file_attributes(self, file_id: str) -> bool:
def cleanupFileAttributes(self, fileId: str) -> bool:
"""Clean up neutralization attributes for a specific file"""
return self.cleanAttributes(file_id)
return self.cleanAttributes(fileId)
# Internal SharePoint helper module separated to keep feature logic tidy
@ -208,7 +206,7 @@ class SharepointProcessor:
siteUrl, _ = self._parseSharepointPath(sharepointPath)
if not siteUrl:
return False
siteInfo = await self.services.sharepoint.find_site_by_web_url(siteUrl)
siteInfo = await self.services.sharepoint.findSiteByWebUrl(siteUrl)
return siteInfo is not None
except Exception:
return False
@ -219,17 +217,17 @@ class SharepointProcessor:
targetSite, targetFolder = self._parseSharepointPath(targetPath)
if not sourceSite or not targetSite:
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
sourceSiteInfo = await self.services.sharepoint.find_site_by_web_url(sourceSite)
sourceSiteInfo = await self.services.sharepoint.findSiteByWebUrl(sourceSite)
if not sourceSiteInfo:
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
targetSiteInfo = await self.services.sharepoint.find_site_by_web_url(targetSite)
targetSiteInfo = await self.services.sharepoint.findSiteByWebUrl(targetSite)
if not targetSiteInfo:
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], sourceFolder)
files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], sourceFolder)
if not files:
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], '')
files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], '')
if files:
folders = [f for f in files if f.get('type') == 'folder']
folderNames = [f.get('name') for f in folders]
@ -251,7 +249,7 @@ class SharepointProcessor:
async def _processSingle(fileInfo: Dict[str, Any]):
try:
fileContent = await self.services.sharepoint.download_file(sourceSiteInfo['id'], fileInfo['id'])
fileContent = await self.services.sharepoint.downloadFile(sourceSiteInfo['id'], fileInfo['id'])
if not fileContent:
return {'error': f"Failed to download file: {fileInfo['name']}"}
try:
@ -260,7 +258,7 @@ class SharepointProcessor:
textContent = fileContent.decode('latin-1')
result = self.services.neutralization.processText(textContent)
neutralizedFilename = f"neutralized_{fileInfo['name']}"
uploadResult = await self.services.sharepoint.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
uploadResult = await self.services.sharepoint.uploadFile(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
if 'error' in uploadResult:
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
return {

View file

@ -204,9 +204,9 @@ class ManagerSyncDelta:
logger.info(
f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}"
)
resolved = await self.services.sharepoint.find_site_by_url(
resolved = await self.services.sharepoint.findSiteByUrl(
hostname=self.SHAREPOINT_HOSTNAME,
site_path=self.SHAREPOINT_SITE_PATH
sitePath=self.SHAREPOINT_SITE_PATH
)
if not resolved:
@ -223,9 +223,9 @@ class ManagerSyncDelta:
# Test site access by listing root of the drive
logger.info("Testing site access using resolved site ID...")
test_result = await self.services.sharepoint.list_folder_contents(
site_id=self.targetSite["id"],
folder_path=""
test_result = await self.services.sharepoint.listFolderContents(
siteId=self.targetSite["id"],
folderPath=""
)
if test_result is not None:
@ -293,8 +293,8 @@ class ManagerSyncDelta:
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
try:
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
excel_content = await self.services.sharepoint.download_file_by_path(
site_id=self.targetSite['id'], file_path=file_path
excel_content = await self.services.sharepoint.downloadFileByPath(
siteId=self.targetSite['id'], filePath=file_path
)
existing_data, existing_headers = self.parseExcelContent(excel_content)
except Exception:
@ -307,16 +307,16 @@ class ManagerSyncDelta:
await self.backupSharepointFile(filename=sync_file_name)
excel_bytes = self.createExcelContent(merged_data, existing_headers)
await self.services.sharepoint.upload_file(
site_id=self.targetSite['id'],
folder_path=self.SHAREPOINT_MAIN_FOLDER,
file_name=sync_file_name,
await self.services.sharepoint.uploadFile(
siteId=self.targetSite['id'],
folderPath=self.SHAREPOINT_MAIN_FOLDER,
fileName=sync_file_name,
content=excel_bytes,
)
# Import back to tickets
try:
excel_content = await self.services.sharepoint.download_file_by_path(
site_id=self.targetSite['id'], file_path=file_path
excel_content = await self.services.sharepoint.downloadFileByPath(
siteId=self.targetSite['id'], filePath=file_path
)
excel_rows, _ = self.parseExcelContent(excel_content)
self._log_audit_event("SYNC_IMPORT", "INFO", f"Importing {len(excel_rows)} Excel rows back to tickets")
@ -333,8 +333,8 @@ class ManagerSyncDelta:
existing_data: list[dict] = []
try:
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
csv_content = await self.services.sharepoint.download_file_by_path(
site_id=self.targetSite['id'], file_path=file_path
csv_content = await self.services.sharepoint.downloadFileByPath(
siteId=self.targetSite['id'], filePath=file_path
)
csv_lines = csv_content.decode('utf-8').split('\n')
if len(csv_lines) >= 2:
@ -348,16 +348,16 @@ class ManagerSyncDelta:
await self.backupSharepointFile(filename=sync_file_name)
merged_data, _ = self.mergeJiraWithExistingDetailed(data_list, existing_data)
csv_bytes = self.createCsvContent(merged_data, existing_headers)
await self.services.sharepoint.upload_file(
site_id=self.targetSite['id'],
folder_path=self.SHAREPOINT_MAIN_FOLDER,
file_name=sync_file_name,
await self.services.sharepoint.uploadFile(
siteId=self.targetSite['id'],
folderPath=self.SHAREPOINT_MAIN_FOLDER,
fileName=sync_file_name,
content=csv_bytes,
)
# Import from CSV
try:
csv_content = await self.services.sharepoint.download_file_by_path(
site_id=self.targetSite['id'], file_path=file_path
csv_content = await self.services.sharepoint.downloadFileByPath(
siteId=self.targetSite['id'], filePath=file_path
)
df = pd.read_csv(io.BytesIO(csv_content), skiprows=2, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python')
csv_rows = df.to_dict('records')
@ -388,12 +388,12 @@ class ManagerSyncDelta:
try:
timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y%m%d_%H%M%S")
backup_filename = f"backup_{timestamp}_{filename}"
await self.services.sharepoint.copy_file_async(
site_id=self.targetSite['id'],
source_folder=self.SHAREPOINT_MAIN_FOLDER,
source_file=filename,
dest_folder=self.SHAREPOINT_BACKUP_FOLDER,
dest_file=backup_filename,
await self.services.sharepoint.copyFileAsync(
siteId=self.targetSite['id'],
sourceFolder=self.SHAREPOINT_MAIN_FOLDER,
sourceFile=filename,
destFolder=self.SHAREPOINT_BACKUP_FOLDER,
destFile=backup_filename,
)
self._log_audit_event("SYNC_BACKUP", "SUCCESS", f"Backed up file: {filename} -> {backup_filename}")
return True
@ -679,7 +679,7 @@ class ManagerSyncDelta:
connectorType=connectorType,
connectorParams=connectorParams,
)
attributes = await ticket_interface.connector_ticket.read_attributes()
attributes = await ticket_interface.connector_ticket.readAttributes()
if not attributes:
logger.warning("No ticket attributes returned; nothing to write.")
return False
@ -713,7 +713,7 @@ class ManagerSyncDelta:
connectorType=connectorType,
connectorParams=connectorParams,
)
tickets = await ticket_interface.connector_ticket.read_tasks(limit=sampleLimit)
tickets = await ticket_interface.connector_ticket.readTasks(limit=sampleLimit)
if not tickets:
logger.warning("No tickets returned; nothing to write.")
return False

View file

@ -54,8 +54,6 @@ class AiObjects:
# No need to manually create connectors - they're auto-discovered
return cls()
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
"""Select the best model using dynamic model selection system."""
# Get available models from the dynamic registry

View file

@ -10,7 +10,7 @@ import uuid
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
from modules.interfaces.interfaceDbAppAccess import AppAccess
from modules.datamodels.datamodelUam import (
User,
@ -604,8 +604,8 @@ class AppObjects:
externalUsername=externalUsername,
externalEmail=externalEmail,
status=status,
connectedAt=get_utc_timestamp(),
lastChecked=get_utc_timestamp(),
connectedAt=getUtcTimestamp(),
lastChecked=getUtcTimestamp(),
expiresAt=None, # Optional field, set to None by default
)
@ -755,7 +755,7 @@ class AppObjects:
if not token.id:
token.id = str(uuid.uuid4())
if not token.createdAt:
token.createdAt = get_utc_timestamp()
token.createdAt = getUtcTimestamp()
# If replace_existing is True, delete old access tokens for this user and authority first
if replace_existing:
@ -822,7 +822,7 @@ class AppObjects:
if not token.id:
token.id = str(uuid.uuid4())
if not token.createdAt:
token.createdAt = get_utc_timestamp()
token.createdAt = getUtcTimestamp()
# Convert to dict and ensure all fields are properly set
token_dict = token.model_dump()
@ -932,7 +932,7 @@ class AppObjects:
return True
tokenUpdate = {
"status": TokenStatus.REVOKED,
"revokedAt": get_utc_timestamp(),
"revokedAt": getUtcTimestamp(),
"revokedBy": revokedBy,
"reason": reason or "revoked",
}
@ -970,7 +970,7 @@ class AppObjects:
t["id"],
{
"status": TokenStatus.REVOKED,
"revokedAt": get_utc_timestamp(),
"revokedAt": getUtcTimestamp(),
"revokedBy": revokedBy,
"reason": reason or "session logout",
},
@ -1008,7 +1008,7 @@ class AppObjects:
t["id"],
{
"status": TokenStatus.REVOKED,
"revokedAt": get_utc_timestamp(),
"revokedAt": getUtcTimestamp(),
"revokedBy": revokedBy,
"reason": reason or "admin revoke",
},
@ -1022,7 +1022,7 @@ class AppObjects:
def cleanupExpiredTokens(self) -> int:
"""Clean up expired tokens for all connections, returns count of cleaned tokens"""
try:
current_time = get_utc_timestamp()
current_time = getUtcTimestamp()
cleaned_count = 0
# Get all tokens
@ -1100,7 +1100,7 @@ class AppObjects:
# Update existing config
update_data = existing_config.model_dump()
update_data.update(config_data)
update_data["updatedAt"] = get_utc_timestamp()
update_data["updatedAt"] = getUtcTimestamp()
updated_config = DataNeutraliserConfig(**update_data)
self.db.recordModify(

View file

@ -31,7 +31,7 @@ from modules.datamodels.datamodelUam import User
# DYNAMIC PART: Connectors to the Interface
from modules.connectors.connectorDbPostgre import DatabaseConnector
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
# Basic Configurations
from modules.shared.configuration import APP_CONFIG
@ -66,56 +66,56 @@ class ChatObjects:
# ===== Generic Utility Methods =====
def _is_object_field(self, field_type) -> bool:
def _isObjectField(self, fieldType) -> bool:
"""Check if a field type represents a complex object (not a simple type)."""
# Simple scalar types
if field_type in (str, int, float, bool, type(None)):
if fieldType in (str, int, float, bool, type(None)):
return False
# Everything else is an object
return True
def _separate_object_fields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
def _separateObjectFields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
"""Separate simple fields from object fields based on Pydantic model structure."""
simple_fields = {}
object_fields = {}
simpleFields = {}
objectFields = {}
# Get field information from the Pydantic model
model_fields = model_class.model_fields
modelFields = model_class.model_fields
for field_name, value in data.items():
for fieldName, value in data.items():
# Check if this field should be stored as JSONB in the database
if field_name in model_fields:
field_info = model_fields[field_name]
if fieldName in modelFields:
fieldInfo = modelFields[fieldName]
# Pydantic v2 only
field_type = field_info.annotation
fieldType = fieldInfo.annotation
# Always route relational/object fields to object_fields for separate handling
if field_name in ['documents', 'stats']:
object_fields[field_name] = value
if fieldName in ['documents', 'stats']:
objectFields[fieldName] = value
continue
# Check if this is a JSONB field (Dict, List, or complex types)
if (field_type == dict or
field_type == list or
(hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or
field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']):
if (fieldType == dict or
fieldType == list or
(hasattr(fieldType, '__origin__') and fieldType.__origin__ in (dict, list)) or
fieldName in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']):
# Store as JSONB - include in simple_fields for database storage
simple_fields[field_name] = value
simpleFields[fieldName] = value
elif isinstance(value, (str, int, float, bool, type(None))):
# Simple scalar types
simple_fields[field_name] = value
simpleFields[fieldName] = value
else:
# Complex objects that should be filtered out
object_fields[field_name] = value
objectFields[fieldName] = value
else:
# Field not in model - treat as scalar if simple, otherwise filter out
if isinstance(value, (str, int, float, bool, type(None))):
simple_fields[field_name] = value
simpleFields[fieldName] = value
else:
object_fields[field_name] = value
objectFields[fieldName] = value
return simple_fields, object_fields
return simpleFields, objectFields
def _initializeServices(self):
pass
@ -240,8 +240,8 @@ class ChatObjects:
currentAction=workflow.get("currentAction", 0),
totalTasks=workflow.get("totalTasks", 0),
totalActions=workflow.get("totalActions", 0),
lastActivity=workflow.get("lastActivity", get_utc_timestamp()),
startedAt=workflow.get("startedAt", get_utc_timestamp()),
lastActivity=workflow.get("lastActivity", getUtcTimestamp()),
startedAt=workflow.get("startedAt", getUtcTimestamp()),
logs=logs,
messages=messages,
stats=stats,
@ -257,7 +257,7 @@ class ChatObjects:
raise PermissionError("No permission to create workflows")
# Set timestamp if not present
currentTime = get_utc_timestamp()
currentTime = getUtcTimestamp()
if "startedAt" not in workflowData:
workflowData["startedAt"] = currentTime
@ -265,10 +265,10 @@ class ChatObjects:
workflowData["lastActivity"] = currentTime
# Use generic field separation based on ChatWorkflow model
simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData)
simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
# Create workflow in database
created = self.db.recordCreate(ChatWorkflow, simple_fields)
created = self.db.recordCreate(ChatWorkflow, simpleFields)
# Convert to ChatWorkflow model (empty related data for new workflow)
@ -302,13 +302,13 @@ class ChatObjects:
raise PermissionError(f"No permission to update workflow {workflowId}")
# Use generic field separation based on ChatWorkflow model
simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData)
simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
# Set update time for main workflow
simple_fields["lastActivity"] = get_utc_timestamp()
simpleFields["lastActivity"] = getUtcTimestamp()
# Update main workflow in database
updated = self.db.recordModify(ChatWorkflow, workflowId, simple_fields)
updated = self.db.recordModify(ChatWorkflow, workflowId, simpleFields)
# Removed cascade writes for logs/messages/stats during workflow update.
# CUD for child entities must be executed via dedicated service methods.
@ -423,7 +423,7 @@ class ChatObjects:
role=msg.get("role", "assistant"),
status=msg.get("status", "step"),
sequenceNr=msg.get("sequenceNr", 0),
publishedAt=msg.get("publishedAt", get_utc_timestamp()),
publishedAt=msg.get("publishedAt", getUtcTimestamp()),
success=msg.get("success"),
actionId=msg.get("actionId"),
actionMethod=msg.get("actionMethod"),
@ -490,20 +490,30 @@ class ChatObjects:
messageData["actionNumber"] = workflow.currentAction
# Use generic field separation based on ChatMessage model
simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData)
simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
# Handle documents separately - they will be stored in normalized documents table
documents_to_create = object_fields.get("documents", [])
documents_to_create = objectFields.get("documents", [])
# Create message in normalized table using only simple fields
createdMessage = self.db.recordCreate(ChatMessage, simple_fields)
createdMessage = self.db.recordCreate(ChatMessage, simpleFields)
# Create documents in normalized documents table
created_documents = []
for doc_data in documents_to_create:
# Use the document data directly
doc_dict = doc_data
# Normalize to plain dict before assignment
if isinstance(doc_data, ChatDocument):
doc_dict = doc_data.model_dump()
elif isinstance(doc_data, dict):
doc_dict = dict(doc_data)
else:
# Attempt to coerce to ChatDocument then dump
try:
doc_dict = ChatDocument(**doc_data).model_dump()
except Exception:
logger.error("Invalid document data type for message creation")
continue
doc_dict["messageId"] = createdMessage["id"]
created_doc = self.createDocument(doc_dict)
@ -522,8 +532,8 @@ class ChatObjects:
role=createdMessage.get("role", "assistant"),
status=createdMessage.get("status", "step"),
sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number
publishedAt=createdMessage.get("publishedAt", get_utc_timestamp()),
stats=object_fields.get("stats"), # Use stats from object_fields
publishedAt=createdMessage.get("publishedAt", getUtcTimestamp()),
stats=objectFields.get("stats"), # Use stats from objectFields
roundNumber=createdMessage.get("roundNumber"),
taskNumber=createdMessage.get("taskNumber"),
actionNumber=createdMessage.get("actionNumber"),
@ -588,31 +598,41 @@ class ChatObjects:
raise PermissionError(f"No permission to modify workflow {workflowId}")
# Use generic field separation based on ChatMessage model
simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData)
simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
# Ensure required fields present
for key in ["role", "agentName"]:
if key not in simple_fields and key not in existingMessage:
simple_fields[key] = "assistant" if key == "role" else ""
if key not in simpleFields and key not in existingMessage:
simpleFields[key] = "assistant" if key == "role" else ""
# Ensure ID is in the dataset
if 'id' not in simple_fields:
simple_fields['id'] = messageId
if 'id' not in simpleFields:
simpleFields['id'] = messageId
# Convert createdAt to startedAt if needed
if "createdAt" in simple_fields and "startedAt" not in simple_fields:
simple_fields["startedAt"] = simple_fields["createdAt"]
del simple_fields["createdAt"]
if "createdAt" in simpleFields and "startedAt" not in simpleFields:
simpleFields["startedAt"] = simpleFields["createdAt"]
del simpleFields["createdAt"]
# Update the message with simple fields only
updatedMessage = self.db.recordModify(ChatMessage, messageId, simple_fields)
updatedMessage = self.db.recordModify(ChatMessage, messageId, simpleFields)
# Handle object field updates (documents, stats) inline
if 'documents' in object_fields:
documents_data = object_fields['documents']
if 'documents' in objectFields:
documents_data = objectFields['documents']
try:
for doc_data in documents_data:
doc_dict = doc_data
# Normalize to dict before mutation
if isinstance(doc_data, ChatDocument):
doc_dict = doc_data.model_dump()
elif isinstance(doc_data, dict):
doc_dict = dict(doc_data)
else:
try:
doc_dict = ChatDocument(**doc_data).model_dump()
except Exception:
logger.error("Invalid document data type for message update")
continue
doc_dict["messageId"] = messageId
self.createDocument(doc_dict)
except Exception as e:
@ -732,11 +752,9 @@ class ChatObjects:
def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument:
"""Creates a document for a message in normalized table."""
try:
# Validate document data
# Validate and normalize document data to dict
document = ChatDocument(**documentData)
# Create document in normalized table
created = self.db.recordCreate(ChatDocument, document)
created = self.db.recordCreate(ChatDocument, document.model_dump())
return ChatDocument(**created)
@ -785,7 +803,7 @@ class ChatObjects:
# Make sure required fields are present
if "timestamp" not in logData:
logData["timestamp"] = get_utc_timestamp()
logData["timestamp"] = getUtcTimestamp()
# Add status information if not present
if "status" not in logData and "type" in logData:
@ -882,7 +900,7 @@ class ChatObjects:
messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId})
for msg in messages:
# Apply timestamp filtering in Python
msg_timestamp = msg.get("publishedAt", get_utc_timestamp())
msg_timestamp = msg.get("publishedAt", getUtcTimestamp())
if afterTimestamp is not None and msg_timestamp <= afterTimestamp:
continue
@ -900,7 +918,7 @@ class ChatObjects:
role=msg.get("role", "assistant"),
status=msg.get("status", "step"),
sequenceNr=msg.get("sequenceNr", 0),
publishedAt=msg.get("publishedAt", get_utc_timestamp()),
publishedAt=msg.get("publishedAt", getUtcTimestamp()),
success=msg.get("success"),
actionId=msg.get("actionId"),
actionMethod=msg.get("actionMethod"),
@ -923,7 +941,7 @@ class ChatObjects:
logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId})
for log in logs:
# Apply timestamp filtering in Python
log_timestamp = log.get("timestamp", get_utc_timestamp())
log_timestamp = log.get("timestamp", getUtcTimestamp())
if afterTimestamp is not None and log_timestamp <= afterTimestamp:
continue
@ -938,7 +956,7 @@ class ChatObjects:
stats = self.getStats(workflowId)
for stat in stats:
# Apply timestamp filtering in Python
stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else get_utc_timestamp()
stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else getUtcTimestamp()
if afterTimestamp is not None and stat_timestamp <= afterTimestamp:
continue

View file

@ -16,7 +16,7 @@ from modules.datamodels.datamodelUtils import Prompt
from modules.datamodels.datamodelVoice import VoiceSettings
from modules.datamodels.datamodelUam import User, Mandate
from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
@ -466,7 +466,7 @@ class ComponentObjects:
# Ensure proper values, use defaults for invalid data
creationDate = file.get("creationDate")
if creationDate is None or not isinstance(creationDate, (int, float)) or creationDate <= 0:
creationDate = get_utc_timestamp()
creationDate = getUtcTimestamp()
fileName = file.get("fileName")
if not fileName or fileName == "None":
@ -503,7 +503,7 @@ class ComponentObjects:
# Get creation date from record or use current time
creationDate = file.get("creationDate")
if not creationDate:
creationDate = get_utc_timestamp()
creationDate = getUtcTimestamp()
return FileItem(
id=file.get("id"),
@ -881,9 +881,9 @@ class ComponentObjects:
# Ensure timestamps are set for validation
settings_data = filteredSettings[0]
if not settings_data.get("creationDate"):
settings_data["creationDate"] = get_utc_timestamp()
settings_data["creationDate"] = getUtcTimestamp()
if not settings_data.get("lastModified"):
settings_data["lastModified"] = get_utc_timestamp()
settings_data["lastModified"] = getUtcTimestamp()
return VoiceSettings(**settings_data)
@ -931,7 +931,7 @@ class ComponentObjects:
raise ValueError(f"Voice settings not found for user {userId}")
# Update lastModified timestamp
updateData["lastModified"] = get_utc_timestamp()
updateData["lastModified"] = getUtcTimestamp()
# Update voice settings record
success = self.db.recordModify(VoiceSettings, existingSettings.id, updateData)

View file

@ -31,7 +31,7 @@ class TicketInterface:
self.task_sync_definition = task_sync_definition
async def exportTicketsAsList(self) -> list[dict]:
tickets: list[dict] = await self.connector_ticket.read_tasks(limit=0)
tickets: list[dict] = await self.connector_ticket.readTasks(limit=0)
transformed_tasks = self._transformTicketRecords(tickets, includePut=True)
# Return plain dictionaries filtered by presence of ID
rows: list[dict] = []
@ -57,7 +57,7 @@ class TicketInterface:
if fields:
updates.append({"ID": task_id, "fields": fields})
if updates:
await self.connector_ticket.write_tasks(updates)
await self.connector_ticket.writeTasks(updates)
def _transformTicketRecords(
self, tasks: list[dict], includePut: bool = False

View file

@ -10,7 +10,7 @@ from typing import Dict, Any, Optional, List
from modules.connectors.connectorVoiceGoogle import ConnectorGoogleSpeech
from modules.datamodels.datamodelVoice import VoiceSettings
from modules.datamodels.datamodelUam import User
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
@ -269,7 +269,7 @@ class VoiceObjects:
logger.info(f"Creating voice settings: {settingsData}")
# Add timestamps
currentTime = get_utc_timestamp()
currentTime = getUtcTimestamp()
settingsData["creationDate"] = currentTime
settingsData["lastModified"] = currentTime
@ -298,7 +298,7 @@ class VoiceObjects:
logger.info(f"Updating voice settings for user {userId}: {settingsData}")
# Add last modified timestamp
settingsData["lastModified"] = get_utc_timestamp()
settingsData["lastModified"] = getUtcTimestamp()
# Create updated VoiceSettings object
voiceSettings = VoiceSettings(**settingsData)

View file

@ -18,7 +18,7 @@ from modules.datamodels.datamodelUam import User, UserConnection, AuthAuthority,
from modules.datamodels.datamodelSecurity import Token
from modules.security.auth import getCurrentUser, limiter
from modules.interfaces.interfaceDbAppObjects import getInterface
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
# Configure logger
logger = logging.getLogger(__name__)
@ -64,7 +64,7 @@ def get_token_status_for_connection(interface, connection_id: str) -> tuple[str,
if not expires_at:
return "none", None
current_time = get_utc_timestamp()
current_time = getUtcTimestamp()
# Add 5 minute buffer for proactive refresh
buffer_time = 5 * 60 # 5 minutes in seconds
@ -247,7 +247,7 @@ async def update_connection(
setattr(connection, field, value)
# Update lastChecked timestamp using UTC timestamp
connection.lastChecked = get_utc_timestamp()
connection.lastChecked = getUtcTimestamp()
# Update connection - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
@ -382,7 +382,7 @@ async def disconnect_service(
# Update connection status
connection.status = ConnectionStatus.INACTIVE
connection.lastChecked = get_utc_timestamp()
connection.lastChecked = getUtcTimestamp()
# Update connection record - models now handle timestamp serialization automatically
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())

View file

@ -35,7 +35,7 @@ async def get_neutralization_config(
"""Get data neutralization configuration"""
try:
service = NeutralizationPlayground(currentUser)
config = service.get_config()
config = service.getConfig()
if not config:
# Return default config instead of 404
@ -69,7 +69,7 @@ async def save_neutralization_config(
"""Save or update data neutralization configuration"""
try:
service = NeutralizationPlayground(currentUser)
config = service.save_config(config_data)
config = service.saveConfig(config_data)
return config
@ -99,7 +99,7 @@ async def neutralize_text(
)
service = NeutralizationPlayground(currentUser)
result = service.neutralize_text(text, file_id)
result = service.neutralizeText(text, file_id)
return result
@ -130,7 +130,7 @@ async def resolve_text(
)
service = NeutralizationPlayground(currentUser)
resolved_text = service.resolve_text(text)
resolved_text = service.resolveText(text)
return {"resolved_text": resolved_text}
@ -153,7 +153,7 @@ async def get_neutralization_attributes(
"""Get neutralization attributes, optionally filtered by file ID"""
try:
service = NeutralizationPlayground(currentUser)
attributes = service.get_attributes(fileId)
attributes = service.getAttributes(fileId)
return attributes
@ -183,7 +183,7 @@ async def process_sharepoint_files(
)
service = NeutralizationPlayground(currentUser)
result = await service.process_sharepoint_files(source_path, target_path)
result = await service.processSharepointFiles(source_path, target_path)
return result
@ -212,7 +212,7 @@ async def batch_process_files(
)
service = NeutralizationPlayground(currentUser)
result = service.batch_neutralize_files(files_data)
result = service.batchNeutralizeFiles(files_data)
return result
@ -234,7 +234,7 @@ async def get_neutralization_stats(
"""Get neutralization processing statistics"""
try:
service = NeutralizationPlayground(currentUser)
stats = service.get_processing_stats()
stats = service.getProcessingStats()
return stats
@ -255,7 +255,7 @@ async def cleanup_file_attributes(
"""Clean up neutralization attributes for a specific file"""
try:
service = NeutralizationPlayground(currentUser)
success = service.cleanup_file_attributes(fileId)
success = service.cleanupFileAttributes(fileId)
if success:
return {"message": f"Successfully cleaned up attributes for file {fileId}"}

View file

@ -181,9 +181,9 @@ async def reset_user_password(
# Log password reset
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_security_event(
user_id=str(currentUser.id),
mandate_id=str(currentUser.mandateId),
audit_logger.logSecurityEvent(
userId=str(currentUser.id),
mandateId=str(currentUser.mandateId),
action="password_reset",
details=f"Reset password for user {userId}"
)
@ -257,9 +257,9 @@ async def change_password(
# Log password change
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_security_event(
user_id=str(currentUser.id),
mandate_id=str(currentUser.mandateId),
audit_logger.logSecurityEvent(
userId=str(currentUser.id),
mandateId=str(currentUser.mandateId),
action="password_change",
details="User changed their own password"
)

View file

@ -14,7 +14,7 @@ from modules.shared.configuration import APP_CONFIG
from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface
from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
from modules.security.auth import getCurrentUser, limiter
from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp
from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
# Configure logger
logger = logging.getLogger(__name__)
@ -356,7 +356,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenRefresh=token_response.get("refresh_token", ""),
tokenType="bearer",
expiresAt=jwt_expires_at.timestamp(),
createdAt=get_utc_timestamp()
createdAt=getUtcTimestamp()
)
# Save access token (no connectionId)
@ -460,8 +460,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
logger.info(f"Updating connection {connection_id} for user {user.username}")
# Update connection with external service details
connection.status = ConnectionStatus.ACTIVE
connection.lastChecked = get_utc_timestamp()
connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0)
connection.lastChecked = getUtcTimestamp()
connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
connection.externalId = user_info.get("id")
connection.externalUsername = user_info.get("email")
connection.externalEmail = user_info.get("email")
@ -479,8 +479,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=token_response["access_token"],
tokenRefresh=token_response.get("refresh_token", ""),
tokenType=token_response.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
createdAt=get_utc_timestamp()
expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
createdAt=getUtcTimestamp()
)
interface.saveConnectionToken(token)
@ -498,8 +498,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
id: '{connection.id}',
status: 'connected',
type: 'google',
lastChecked: {get_utc_timestamp()},
expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))}
lastChecked: {getUtcTimestamp()},
expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
}}
}}, '*');
// Wait for message to be sent before closing
@ -592,11 +592,11 @@ async def logout(
# Log successful logout
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id=str(currentUser.id),
mandate_id=str(currentUser.mandateId),
audit_logger.logUserAccess(
userId=str(currentUser.id),
mandateId=str(currentUser.mandateId),
action="logout",
success_info="google_auth_logout"
successInfo="google_auth_logout"
)
except Exception:
# Don't fail if audit logging fails
@ -726,12 +726,12 @@ async def refresh_token(
# Update the connection status and timing
google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt
google_connection.lastChecked = get_utc_timestamp()
google_connection.lastChecked = getUtcTimestamp()
google_connection.status = ConnectionStatus.ACTIVE
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.model_dump())
# Calculate time until expiration
current_time = get_utc_timestamp()
current_time = getUtcTimestamp()
expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0
return {

View file

@ -131,11 +131,11 @@ async def login(
# Log successful login
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id=str(user.id),
mandate_id=str(user.mandateId),
audit_logger.logUserAccess(
userId=str(user.id),
mandateId=str(user.mandateId),
action="login",
success_info="local_auth_success"
successInfo="local_auth_success"
)
except Exception:
# Don't fail if audit logging fails
@ -159,11 +159,11 @@ async def login(
# Log failed login attempt
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id="unknown",
mandate_id="unknown",
audit_logger.logUserAccess(
userId="unknown",
mandateId="unknown",
action="login",
success_info=f"failed: {error_msg}"
successInfo=f"failed: {error_msg}"
)
except Exception:
# Don't fail if audit logging fails
@ -367,11 +367,11 @@ async def logout(request: Request, response: Response, currentUser: User = Depen
# Log successful logout
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id=str(currentUser.id),
mandate_id=str(currentUser.mandateId),
audit_logger.logUserAccess(
userId=str(currentUser.id),
mandateId=str(currentUser.mandateId),
action="logout",
success_info=f"revoked_tokens: {revoked}"
successInfo=f"revoked_tokens: {revoked}"
)
except Exception:
# Don't fail if audit logging fails

View file

@ -16,7 +16,7 @@ from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatu
from modules.datamodels.datamodelSecurity import Token
from modules.security.auth import getCurrentUser, limiter
from modules.security.jwtService import createAccessToken
from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp
from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
# Configure logger
logger = logging.getLogger(__name__)
@ -199,8 +199,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=token_response["access_token"],
tokenRefresh=token_response.get("refresh_token", ""),
tokenType=token_response.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
createdAt=get_utc_timestamp()
expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
createdAt=getUtcTimestamp()
)
# Save access token (no connectionId)
@ -225,7 +225,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=jwt_token,
tokenType="bearer",
expiresAt=jwt_expires_at.timestamp(),
createdAt=get_utc_timestamp()
createdAt=getUtcTimestamp()
)
# Save JWT access token
@ -332,8 +332,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
logger.info(f"Updating connection {connection_id} for user {user.username}")
# Update connection with external service details
connection.status = ConnectionStatus.ACTIVE
connection.lastChecked = get_utc_timestamp()
connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0)
connection.lastChecked = getUtcTimestamp()
connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
connection.externalId = user_info.get("id")
connection.externalUsername = user_info.get("userPrincipalName")
connection.externalEmail = user_info.get("mail")
@ -351,8 +351,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
tokenAccess=token_response["access_token"],
tokenRefresh=token_response.get("refresh_token", ""),
tokenType=token_response.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
createdAt=get_utc_timestamp()
expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
createdAt=getUtcTimestamp()
)
@ -373,8 +373,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
id: '{connection.id}',
status: 'connected',
type: 'msft',
lastChecked: {get_utc_timestamp()},
expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))}
lastChecked: {getUtcTimestamp()},
expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
}}
}}, '*');
// Wait for message to be sent before closing
@ -467,11 +467,11 @@ async def logout(
# Log successful logout
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id=str(currentUser.id),
mandate_id=str(currentUser.mandateId),
audit_logger.logUserAccess(
userId=str(currentUser.id),
mandateId=str(currentUser.mandateId),
action="logout",
success_info="microsoft_auth_logout"
successInfo="microsoft_auth_logout"
)
except Exception:
# Don't fail if audit logging fails
@ -575,27 +575,27 @@ async def refresh_token(
from modules.security.tokenManager import TokenManager
token_manager = TokenManager()
refreshed_token = token_manager.refresh_token(current_token)
if refreshed_token:
refreshedToken = token_manager.refreshToken(current_token)
if refreshedToken:
# Save the new connection token (which will automatically replace old ones)
appInterface.saveConnectionToken(refreshed_token)
appInterface.saveConnectionToken(refreshedToken)
# Update the connection's expiration time
msft_connection.expiresAt = float(refreshed_token.expiresAt)
msft_connection.lastChecked = get_utc_timestamp()
msft_connection.expiresAt = float(refreshedToken.expiresAt)
msft_connection.lastChecked = getUtcTimestamp()
msft_connection.status = ConnectionStatus.ACTIVE
# Save updated connection
appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.model_dump())
# Calculate time until expiration
current_time = get_utc_timestamp()
expires_in = int(refreshed_token.expiresAt - current_time)
current_time = getUtcTimestamp()
expiresIn = int(refreshedToken.expiresAt - current_time)
return {
"message": "Token refreshed successfully",
"expires_at": refreshed_token.expiresAt,
"expires_in_seconds": expires_in
"expires_at": refreshedToken.expiresAt,
"expires_in_seconds": expiresIn
}
else:
raise HTTPException(

View file

@ -18,26 +18,26 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
# Store active WebSocket connections
active_connections: Dict[str, WebSocket] = {}
activeConnections: Dict[str, WebSocket] = {}
class ConnectionManager:
def __init__(self):
self.active_connections: List[WebSocket] = []
self.activeConnections: List[WebSocket] = []
async def connect(self, websocket: WebSocket, connection_id: str):
async def connect(self, websocket: WebSocket, connectionId: str):
await websocket.accept()
self.active_connections.append(websocket)
active_connections[connection_id] = websocket
logger.info(f"WebSocket connected: {connection_id}")
self.activeConnections.append(websocket)
activeConnections[connectionId] = websocket
logger.info(f"WebSocket connected: {connectionId}")
def disconnect(self, websocket: WebSocket, connection_id: str):
if websocket in self.active_connections:
self.active_connections.remove(websocket)
if connection_id in active_connections:
del active_connections[connection_id]
logger.info(f"WebSocket disconnected: {connection_id}")
def disconnect(self, websocket: WebSocket, connectionId: str):
if websocket in self.activeConnections:
self.activeConnections.remove(websocket)
if connectionId in activeConnections:
del activeConnections[connectionId]
logger.info(f"WebSocket disconnected: {connectionId}")
async def send_personal_message(self, message: dict, websocket: WebSocket):
async def sendPersonalMessage(self, message: dict, websocket: WebSocket):
try:
await websocket.send_text(json.dumps(message))
except Exception as e:
@ -45,10 +45,10 @@ class ConnectionManager:
manager = ConnectionManager()
def get_voice_interface(current_user: User) -> VoiceObjects:
def _getVoiceInterface(currentUser: User) -> VoiceObjects:
"""Get voice interface instance with user context."""
try:
return getVoiceInterface(current_user)
return getVoiceInterface(currentUser)
except Exception as e:
logger.error(f"Failed to initialize voice interface: {e}")
raise HTTPException(
@ -58,23 +58,23 @@ def get_voice_interface(current_user: User) -> VoiceObjects:
@router.post("/speech-to-text")
async def speech_to_text(
audio_file: UploadFile = File(...),
audioFile: UploadFile = File(...),
language: str = Form("de-DE"),
current_user: User = Depends(getCurrentUser)
currentUser: User = Depends(getCurrentUser)
):
"""Convert speech to text using Google Cloud Speech-to-Text API."""
try:
logger.info(f"🎤 Speech-to-text request: {audio_file.filename}, language: {language}")
logger.info(f"🎤 Speech-to-text request: {audioFile.filename}, language: {language}")
# Read audio file
audio_content = await audio_file.read()
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
audioContent = await audioFile.read()
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
# Get voice interface
voice_interface = get_voice_interface(current_user)
voiceInterface = _getVoiceInterface(currentUser)
# Validate audio format
validation = voice_interface.validateAudioFormat(audio_content)
validation = voiceInterface.validateAudioFormat(audioContent)
if not validation["valid"]:
raise HTTPException(
@ -83,8 +83,8 @@ async def speech_to_text(
)
# Perform speech recognition
result = await voice_interface.speechToText(
audioContent=audio_content,
result = await voiceInterface.speechToText(
audioContent=audioContent,
language=language
)
@ -95,7 +95,7 @@ async def speech_to_text(
"confidence": result["confidence"],
"language": result["language"],
"audio_info": {
"size": len(audio_content),
"size": len(audioContent),
"format": validation["format"],
"estimated_duration": validation.get("estimated_duration", 0)
}
@ -118,13 +118,13 @@ async def speech_to_text(
@router.post("/translate")
async def translate_text(
text: str = Form(...),
source_language: str = Form("de"),
target_language: str = Form("en"),
current_user: User = Depends(getCurrentUser)
sourceLanguage: str = Form("de"),
targetLanguage: str = Form("en"),
currentUser: User = Depends(getCurrentUser)
):
"""Translate text using Google Cloud Translation API."""
try:
logger.info(f"🌐 Translation request: '{text}' ({source_language} -> {target_language})")
logger.info(f"🌐 Translation request: '{text}' ({sourceLanguage} -> {targetLanguage})")
if not text.strip():
raise HTTPException(
@ -133,13 +133,13 @@ async def translate_text(
)
# Get voice interface
voice_interface = get_voice_interface(current_user)
voiceInterface = _getVoiceInterface(currentUser)
# Perform translation
result = await voice_interface.translateText(
result = await voiceInterface.translateText(
text=text,
sourceLanguage=source_language,
targetLanguage=target_language
sourceLanguage=sourceLanguage,
targetLanguage=targetLanguage
)
if result["success"]:
@ -167,21 +167,21 @@ async def translate_text(
@router.post("/realtime-interpreter")
async def realtime_interpreter(
audio_file: UploadFile = File(...),
from_language: str = Form("de-DE"),
to_language: str = Form("en-US"),
connection_id: str = Form(None),
current_user: User = Depends(getCurrentUser)
audioFile: UploadFile = File(...),
fromLanguage: str = Form("de-DE"),
toLanguage: str = Form("en-US"),
connectionId: str = Form(None),
currentUser: User = Depends(getCurrentUser)
):
"""Real-time interpreter: speech to translated text using Google Cloud APIs."""
try:
logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}")
logger.info(f" From: {from_language} -> To: {to_language}")
logger.info(f" MIME type: {audio_file.content_type}")
logger.info(f"🔄 Real-time interpreter request: {audioFile.filename}")
logger.info(f" From: {fromLanguage} -> To: {toLanguage}")
logger.info(f" MIME type: {audioFile.content_type}")
# Read audio file
audio_content = await audio_file.read()
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
audioContent = await audioFile.read()
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
# Save audio file for debugging with correct extension
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
@ -192,10 +192,10 @@ async def realtime_interpreter(
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
# Get voice interface
voice_interface = get_voice_interface(current_user)
voiceInterface = _getVoiceInterface(currentUser)
# Validate audio format
validation = voice_interface.validateAudioFormat(audio_content)
validation = voiceInterface.validateAudioFormat(audioContent)
if not validation["valid"]:
raise HTTPException(
@ -204,10 +204,10 @@ async def realtime_interpreter(
)
# Perform complete pipeline: Speech-to-Text + Translation
result = await voice_interface.speechToTranslatedText(
audioContent=audio_content,
fromLanguage=from_language,
toLanguage=to_language
result = await voiceInterface.speechToTranslatedText(
audioContent=audioContent,
fromLanguage=fromLanguage,
toLanguage=toLanguage
)
if result["success"]:
@ -223,7 +223,7 @@ async def realtime_interpreter(
"source_language": result["source_language"],
"target_language": result["target_language"],
"audio_info": {
"size": len(audio_content),
"size": len(audioContent),
"format": validation["format"],
"estimated_duration": validation.get("estimated_duration", 0)
}
@ -249,7 +249,7 @@ async def text_to_speech(
text: str = Form(...),
language: str = Form("de-DE"),
voice: str = Form(None),
current_user: User = Depends(getCurrentUser)
currentUser: User = Depends(getCurrentUser)
):
"""Convert text to speech using Google Cloud Text-to-Speech."""
try:
@ -261,8 +261,8 @@ async def text_to_speech(
detail="Empty text provided for text-to-speech"
)
voice_interface = get_voice_interface(current_user)
result = await voice_interface.textToSpeech(
voiceInterface = _getVoiceInterface(currentUser)
result = await voiceInterface.textToSpeech(
text=text,
languageCode=language,
voiceName=voice
@ -294,13 +294,13 @@ async def text_to_speech(
)
@router.get("/languages")
async def get_available_languages(current_user: User = Depends(getCurrentUser)):
async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
"""Get available languages from Google Cloud Text-to-Speech."""
try:
logger.info("🌐 Getting available languages from Google Cloud TTS")
voice_interface = get_voice_interface(current_user)
result = await voice_interface.getAvailableLanguages()
voiceInterface = _getVoiceInterface(currentUser)
result = await voiceInterface.getAvailableLanguages()
if result["success"]:
return {
@ -324,21 +324,21 @@ async def get_available_languages(current_user: User = Depends(getCurrentUser)):
@router.get("/voices")
async def get_available_voices(
language_code: Optional[str] = None,
current_user: User = Depends(getCurrentUser)
languageCode: Optional[str] = None,
currentUser: User = Depends(getCurrentUser)
):
"""Get available voices from Google Cloud Text-to-Speech."""
try:
logger.info(f"🎤 Getting available voices, language filter: {language_code}")
logger.info(f"🎤 Getting available voices, language filter: {languageCode}")
voice_interface = get_voice_interface(current_user)
result = await voice_interface.getAvailableVoices(languageCode=language_code)
voiceInterface = _getVoiceInterface(currentUser)
result = await voiceInterface.getAvailableVoices(languageCode=languageCode)
if result["success"]:
return {
"success": True,
"voices": result["voices"],
"language_filter": language_code
"language_filter": languageCode
}
else:
raise HTTPException(
@ -356,11 +356,11 @@ async def get_available_voices(
)
@router.get("/health")
async def health_check(current_user: User = Depends(getCurrentUser)):
async def health_check(currentUser: User = Depends(getCurrentUser)):
"""Health check for Google Cloud voice services."""
try:
voice_interface = get_voice_interface(current_user)
test_result = await voice_interface.healthCheck()
voiceInterface = _getVoiceInterface(currentUser)
test_result = await voiceInterface.healthCheck()
return test_result
@ -372,16 +372,16 @@ async def health_check(current_user: User = Depends(getCurrentUser)):
}
@router.get("/settings")
async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
async def get_voice_settings(currentUser: User = Depends(getCurrentUser)):
"""Get voice settings for the current user."""
try:
logger.info(f"Getting voice settings for user: {current_user.id}")
logger.info(f"Getting voice settings for user: {currentUser.id}")
# Get voice interface
voice_interface = get_voice_interface(current_user)
voiceInterface = _getVoiceInterface(currentUser)
# Get or create voice settings for the user
voice_settings = voice_interface.getOrCreateVoiceSettings(current_user.id)
voice_settings = voiceInterface.getOrCreateVoiceSettings(currentUser.id)
if voice_settings:
# Return user settings
@ -425,16 +425,16 @@ async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
@router.post("/settings")
async def save_voice_settings(
settings: Dict[str, Any] = Body(...),
current_user: User = Depends(getCurrentUser)
currentUser: User = Depends(getCurrentUser)
):
"""Save voice settings for the current user."""
try:
logger.info(f"Saving voice settings for user: {current_user.id}")
logger.info(f"Saving voice settings for user: {currentUser.id}")
logger.info(f"Settings: {settings}")
# Validate required settings
required_fields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
for field in required_fields:
requiredFields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
for field in requiredFields:
if field not in settings:
raise HTTPException(
status_code=400,
@ -448,23 +448,23 @@ async def save_voice_settings(
settings["targetLanguage"] = "en-US"
# Get voice interface
voice_interface = get_voice_interface(current_user)
voiceInterface = _getVoiceInterface(currentUser)
# Check if settings already exist for this user
existing_settings = voice_interface.getVoiceSettings(current_user.id)
existing_settings = voiceInterface.getVoiceSettings(currentUser.id)
if existing_settings:
# Update existing settings
logger.info(f"Updating existing voice settings for user {current_user.id}")
updated_settings = voice_interface.updateVoiceSettings(current_user.id, settings)
logger.info(f"Voice settings updated for user {current_user.id}: {updated_settings}")
logger.info(f"Updating existing voice settings for user {currentUser.id}")
updated_settings = voiceInterface.updateVoiceSettings(currentUser.id, settings)
logger.info(f"Voice settings updated for user {currentUser.id}: {updated_settings}")
else:
# Create new settings
logger.info(f"Creating new voice settings for user {current_user.id}")
logger.info(f"Creating new voice settings for user {currentUser.id}")
# Add userId to settings
settings["userId"] = current_user.id
created_settings = voice_interface.createVoiceSettings(settings)
logger.info(f"Voice settings created for user {current_user.id}: {created_settings}")
settings["userId"] = currentUser.id
created_settings = voiceInterface.createVoiceSettings(settings)
logger.info(f"Voice settings created for user {currentUser.id}: {created_settings}")
return {
"success": True,
@ -486,25 +486,25 @@ async def save_voice_settings(
@router.websocket("/ws/realtime-interpreter")
async def websocket_realtime_interpreter(
websocket: WebSocket,
user_id: str = "default",
from_language: str = "de-DE",
to_language: str = "en-US"
userId: str = "default",
fromLanguage: str = "de-DE",
toLanguage: str = "en-US"
):
"""WebSocket endpoint for real-time voice interpretation"""
connection_id = f"realtime_{user_id}_{from_language}_{to_language}"
connectionId = f"realtime_{userId}_{fromLanguage}_{toLanguage}"
try:
await manager.connect(websocket, connection_id)
await manager.connect(websocket, connectionId)
# Send connection confirmation
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "connected",
"connection_id": connection_id,
"connection_id": connectionId,
"message": "Connected to real-time interpreter"
}, websocket)
# Initialize voice interface
voice_interface = get_voice_interface(User(id=user_id))
voiceInterface = _getVoiceInterface(User(id=userId))
while True:
# Receive message from client
@ -515,7 +515,7 @@ async def websocket_realtime_interpreter(
# Process audio chunk
try:
# Decode base64 audio data
audio_data = base64.b64decode(message["data"])
audioData = base64.b64decode(message["data"])
# For now, just acknowledge receipt
# In a full implementation, this would:
@ -524,9 +524,9 @@ async def websocket_realtime_interpreter(
# 3. Send partial results back
# 4. Handle translation
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "audio_received",
"chunk_size": len(audio_data),
"chunk_size": len(audioData),
"timestamp": message.get("timestamp")
}, websocket)
@ -539,7 +539,7 @@ async def websocket_realtime_interpreter(
elif message["type"] == "ping":
# Respond to ping
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "pong",
"timestamp": message.get("timestamp")
}, websocket)
@ -548,32 +548,32 @@ async def websocket_realtime_interpreter(
logger.warning(f"Unknown message type: {message['type']}")
except WebSocketDisconnect:
manager.disconnect(websocket, connection_id)
logger.info(f"Client disconnected: {connection_id}")
manager.disconnect(websocket, connectionId)
logger.info(f"Client disconnected: {connectionId}")
except Exception as e:
logger.error(f"WebSocket error: {e}")
manager.disconnect(websocket, connection_id)
manager.disconnect(websocket, connectionId)
@router.websocket("/ws/speech-to-text")
async def websocket_speech_to_text(
websocket: WebSocket,
user_id: str = "default",
userId: str = "default",
language: str = "de-DE"
):
"""WebSocket endpoint for real-time speech-to-text"""
connection_id = f"stt_{user_id}_{language}"
connectionId = f"stt_{userId}_{language}"
try:
await manager.connect(websocket, connection_id)
await manager.connect(websocket, connectionId)
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "connected",
"connection_id": connection_id,
"connection_id": connectionId,
"message": "Connected to speech-to-text"
}, websocket)
# Initialize voice interface
voice_interface = get_voice_interface(User(id=user_id))
voiceInterface = _getVoiceInterface(User(id=userId))
while True:
data = await websocket.receive_text()
@ -581,12 +581,12 @@ async def websocket_speech_to_text(
if message["type"] == "audio_chunk":
try:
audio_data = base64.b64decode(message["data"])
audioData = base64.b64decode(message["data"])
# Process audio chunk
# This would integrate with Google Cloud Speech-to-Text streaming API
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "transcription_result",
"text": "Audio chunk received", # Placeholder
"confidence": 0.95,
@ -595,39 +595,39 @@ async def websocket_speech_to_text(
except Exception as e:
logger.error(f"Error processing audio: {e}")
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "error",
"error": f"Failed to process audio: {str(e)}"
}, websocket)
elif message["type"] == "ping":
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "pong",
"timestamp": message.get("timestamp")
}, websocket)
except WebSocketDisconnect:
manager.disconnect(websocket, connection_id)
manager.disconnect(websocket, connectionId)
except Exception as e:
logger.error(f"WebSocket error: {e}")
manager.disconnect(websocket, connection_id)
manager.disconnect(websocket, connectionId)
@router.websocket("/ws/text-to-speech")
async def websocket_text_to_speech(
websocket: WebSocket,
user_id: str = "default",
userId: str = "default",
language: str = "de-DE",
voice: str = "de-DE-Wavenet-A"
):
"""WebSocket endpoint for real-time text-to-speech"""
connection_id = f"tts_{user_id}_{language}_{voice}"
connectionId = f"tts_{userId}_{language}_{voice}"
try:
await manager.connect(websocket, connection_id)
await manager.connect(websocket, connectionId)
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "connected",
"connection_id": connection_id,
"connection_id": connectionId,
"message": "Connected to text-to-speech"
}, websocket)
@ -643,7 +643,7 @@ async def websocket_text_to_speech(
# This would integrate with Google Cloud Text-to-Speech API
# For now, send a placeholder response
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "audio_data",
"audio": "base64_encoded_audio_here", # Placeholder
"format": "mp3"
@ -651,19 +651,19 @@ async def websocket_text_to_speech(
except Exception as e:
logger.error(f"Error processing text-to-speech: {e}")
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "error",
"error": f"Failed to process text: {str(e)}"
}, websocket)
elif message["type"] == "ping":
await manager.send_personal_message({
await manager.sendPersonalMessage({
"type": "pong",
"timestamp": message.get("timestamp")
}, websocket)
except WebSocketDisconnect:
manager.disconnect(websocket, connection_id)
manager.disconnect(websocket, connectionId)
except Exception as e:
logger.error(f"WebSocket error: {e}")
manager.disconnect(websocket, connection_id)
manager.disconnect(websocket, connectionId)

View file

@ -9,7 +9,7 @@ from fastapi import Response
from jose import jwt
from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_now
from modules.shared.timezoneUtils import getUtcNow
# Config
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
@ -30,7 +30,7 @@ def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> T
import uuid
toEncode["jti"] = str(uuid.uuid4())
expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
expire = getUtcNow() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire
@ -44,7 +44,7 @@ def createRefreshToken(data: dict) -> Tuple[str, "datetime"]:
toEncode["jti"] = str(uuid.uuid4())
toEncode["type"] = "refresh"
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
expire = getUtcNow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire

View file

@ -10,7 +10,7 @@ from typing import Optional, Dict, Any, Callable
from modules.datamodels.datamodelSecurity import Token
from modules.datamodels.datamodelUam import AuthAuthority
from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp, createExpirationTimestamp
logger = logging.getLogger(__name__)
@ -27,54 +27,54 @@ class TokenManager:
self.google_client_id = APP_CONFIG.get("Service_GOOGLE_CLIENT_ID")
self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
def refresh_microsoft_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]:
def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
"""Refresh Microsoft OAuth token using refresh token"""
try:
logger.debug(f"refresh_microsoft_token: Starting Microsoft token refresh for user {user_id}")
logger.debug(f"refresh_microsoft_token: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}")
logger.debug(f"refreshMicrosoftToken: Starting Microsoft token refresh for user {userId}")
logger.debug(f"refreshMicrosoftToken: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}")
if not self.msft_client_id or not self.msft_client_secret:
logger.error("Microsoft OAuth configuration not found")
return None
# Microsoft token refresh endpoint
token_url = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token"
logger.debug(f"refresh_microsoft_token: Using token URL: {token_url}")
tokenUrl = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token"
logger.debug(f"refreshMicrosoftToken: Using token URL: {tokenUrl}")
# Prepare refresh request
data = {
"client_id": self.msft_client_id,
"client_secret": self.msft_client_secret,
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"refresh_token": refreshToken,
"scope": "Mail.ReadWrite Mail.Send Mail.ReadWrite.Shared User.Read"
}
logger.debug(f"refresh_microsoft_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})")
logger.debug(f"refreshMicrosoftToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
# Make refresh request
with httpx.Client(timeout=30.0) as client:
logger.debug(f"refresh_microsoft_token: Making HTTP request to Microsoft OAuth endpoint")
response = client.post(token_url, data=data)
logger.debug(f"refresh_microsoft_token: HTTP response status: {response.status_code}")
logger.debug(f"refreshMicrosoftToken: Making HTTP request to Microsoft OAuth endpoint")
response = client.post(tokenUrl, data=data)
logger.debug(f"refreshMicrosoftToken: HTTP response status: {response.status_code}")
if response.status_code == 200:
token_data = response.json()
logger.debug(f"refresh_microsoft_token: Token refresh successful, creating new token")
tokenData = response.json()
logger.debug(f"refreshMicrosoftToken: Token refresh successful, creating new token")
# Create new token
new_token = Token(
userId=user_id,
newToken = Token(
userId=userId,
authority=AuthAuthority.MSFT,
connectionId=old_token.connectionId, # Preserve connection ID
tokenAccess=token_data["access_token"],
tokenRefresh=token_data.get("refresh_token", refresh_token), # Keep old refresh token if new one not provided
tokenType=token_data.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)),
createdAt=get_utc_timestamp()
connectionId=oldToken.connectionId, # Preserve connection ID
tokenAccess=tokenData["access_token"],
tokenRefresh=tokenData.get("refresh_token", refreshToken), # Keep old refresh token if new one not provided
tokenType=tokenData.get("token_type", "bearer"),
expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
createdAt=getUtcTimestamp()
)
logger.debug(f"refresh_microsoft_token: New token created with ID: {new_token.id}")
return new_token
logger.debug(f"refreshMicrosoftToken: New token created with ID: {newToken.id}")
return newToken
else:
logger.error(f"Failed to refresh Microsoft token: {response.status_code} - {response.text}")
return None
@ -83,70 +83,70 @@ class TokenManager:
logger.error(f"Error refreshing Microsoft token: {str(e)}")
return None
def refresh_google_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]:
def refreshGoogleToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
"""Refresh Google OAuth token using refresh token"""
try:
logger.debug(f"refresh_google_token: Starting Google token refresh for user {user_id}")
logger.debug(f"refresh_google_token: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}")
logger.debug(f"refreshGoogleToken: Starting Google token refresh for user {userId}")
logger.debug(f"refreshGoogleToken: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}")
if not self.google_client_id or not self.google_client_secret:
logger.error("Google OAuth configuration not found")
return None
# Google token refresh endpoint
token_url = "https://oauth2.googleapis.com/token"
logger.debug(f"refresh_google_token: Using token URL: {token_url}")
tokenUrl = "https://oauth2.googleapis.com/token"
logger.debug(f"refreshGoogleToken: Using token URL: {tokenUrl}")
# Prepare refresh request
data = {
"client_id": self.google_client_id,
"client_secret": self.google_client_secret,
"grant_type": "refresh_token",
"refresh_token": refresh_token
"refresh_token": refreshToken
}
logger.debug(f"refresh_google_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})")
logger.debug(f"refreshGoogleToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
# Make refresh request
with httpx.Client(timeout=30.0) as client:
logger.debug(f"refresh_google_token: Making HTTP request to Google OAuth endpoint")
response = client.post(token_url, data=data)
logger.debug(f"refresh_google_token: HTTP response status: {response.status_code}")
logger.debug(f"refreshGoogleToken: Making HTTP request to Google OAuth endpoint")
response = client.post(tokenUrl, data=data)
logger.debug(f"refreshGoogleToken: HTTP response status: {response.status_code}")
if response.status_code == 200:
token_data = response.json()
logger.debug(f"refresh_google_token: Token refresh successful, creating new token")
tokenData = response.json()
logger.debug(f"refreshGoogleToken: Token refresh successful, creating new token")
# Validate the response contains required fields
if "access_token" not in token_data:
if "access_token" not in tokenData:
logger.error("Google token refresh response missing access_token")
return None
# Create new token
new_token = Token(
userId=user_id,
newToken = Token(
userId=userId,
authority=AuthAuthority.GOOGLE,
connectionId=old_token.connectionId, # Preserve connection ID
tokenAccess=token_data["access_token"],
tokenRefresh=token_data.get("refresh_token", refresh_token), # Use new refresh token if provided
tokenType=token_data.get("token_type", "bearer"),
expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)),
createdAt=get_utc_timestamp()
connectionId=oldToken.connectionId, # Preserve connection ID
tokenAccess=tokenData["access_token"],
tokenRefresh=tokenData.get("refresh_token", refreshToken), # Use new refresh token if provided
tokenType=tokenData.get("token_type", "bearer"),
expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
createdAt=getUtcTimestamp()
)
logger.debug(f"refresh_google_token: New token created with ID: {new_token.id}")
return new_token
logger.debug(f"refreshGoogleToken: New token created with ID: {newToken.id}")
return newToken
else:
error_details = response.text
logger.error(f"Failed to refresh Google token: {response.status_code} - {error_details}")
errorDetails = response.text
logger.error(f"Failed to refresh Google token: {response.status_code} - {errorDetails}")
# Handle specific error cases
if response.status_code == 400:
try:
error_data = response.json()
error_code = error_data.get("error")
if error_code == "invalid_grant":
errorData = response.json()
errorCode = errorData.get("error")
if errorCode == "invalid_grant":
logger.warning("Google refresh token is invalid or expired - user needs to re-authenticate")
elif error_code == "invalid_client":
elif errorCode == "invalid_client":
logger.error("Google OAuth client configuration is invalid")
except:
pass
@ -157,55 +157,55 @@ class TokenManager:
logger.error(f"Error refreshing Google token: {str(e)}")
return None
def refresh_token(self, old_token: Token) -> Optional[Token]:
def refreshToken(self, oldToken: Token) -> Optional[Token]:
"""Refresh an expired token using the appropriate OAuth service"""
try:
logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}")
logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}")
logger.debug(f"refreshToken: Starting refresh for token {oldToken.id}, authority: {oldToken.authority}")
logger.debug(f"refreshToken: Token details: userId={oldToken.userId}, connectionId={oldToken.connectionId}, hasRefreshToken={bool(oldToken.tokenRefresh)}")
# Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly
# Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed
try:
now_ts = get_utc_timestamp()
created_ts = float(old_token.createdAt) if old_token.createdAt is not None else 0.0
seconds_since_last_refresh = now_ts - created_ts
if seconds_since_last_refresh < 10 * 60:
nowTs = getUtcTimestamp()
createdTs = float(oldToken.createdAt) if oldToken.createdAt is not None else 0.0
secondsSinceLastRefresh = nowTs - createdTs
if secondsSinceLastRefresh < 10 * 60:
logger.info(
f"refresh_token: Skipping refresh for connection {old_token.connectionId} due to cooldown. "
f"Last refresh {int(seconds_since_last_refresh)}s ago (< 600s)."
f"refreshToken: Skipping refresh for connection {oldToken.connectionId} due to cooldown. "
f"Last refresh {int(secondsSinceLastRefresh)}s ago (< 600s)."
)
# Return the existing token to avoid caller errors while preventing provider rate limits
return old_token
return oldToken
except Exception:
# If any issue reading timestamps, proceed with normal refresh to be safe
pass
if not old_token.tokenRefresh:
logger.warning(f"No refresh token available for {old_token.authority}")
if not oldToken.tokenRefresh:
logger.warning(f"No refresh token available for {oldToken.authority}")
return None
# Route to appropriate refresh method
if old_token.authority == AuthAuthority.MSFT:
logger.debug(f"refresh_token: Refreshing Microsoft token")
return self.refresh_microsoft_token(old_token.tokenRefresh, old_token.userId, old_token)
elif old_token.authority == AuthAuthority.GOOGLE:
logger.debug(f"refresh_token: Refreshing Google token")
return self.refresh_google_token(old_token.tokenRefresh, old_token.userId, old_token)
if oldToken.authority == AuthAuthority.MSFT:
logger.debug(f"refreshToken: Refreshing Microsoft token")
return self.refreshMicrosoftToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
elif oldToken.authority == AuthAuthority.GOOGLE:
logger.debug(f"refreshToken: Refreshing Google token")
return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
else:
logger.warning(f"Unknown authority for token refresh: {old_token.authority}")
logger.warning(f"Unknown authority for token refresh: {oldToken.authority}")
return None
except Exception as e:
logger.error(f"Error refreshing token: {str(e)}")
return None
def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
def ensureFreshToken(self, token: Token, *, secondsBeforeExpiry: int = 30 * 60, saveCallback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
"""Ensure a token is fresh; refresh if expiring within threshold.
Args:
token: Existing token to validate/refresh.
seconds_before_expiry: Threshold window to proactively refresh.
save_callback: Optional function to persist a refreshed token.
secondsBeforeExpiry: Threshold window to proactively refresh.
saveCallback: Optional function to persist a refreshed token.
Returns:
A fresh token (refreshed or original) or None if refresh failed.
@ -214,31 +214,31 @@ class TokenManager:
if token is None:
return None
now_ts = get_utc_timestamp()
expires_at = token.expiresAt or 0
nowTs = getUtcTimestamp()
expiresAt = token.expiresAt or 0
# If token expires within the threshold, try to refresh
if expires_at and expires_at < (now_ts + seconds_before_expiry):
if expiresAt and expiresAt < (nowTs + secondsBeforeExpiry):
logger.info(
f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon "
f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh."
f"ensureFreshToken: Token for connection {token.connectionId} expiring soon "
f"(in {max(0, expiresAt - nowTs)}s). Attempting proactive refresh."
)
refreshed = self.refresh_token(token)
refreshed = self.refreshToken(token)
if refreshed:
if save_callback is not None:
if saveCallback is not None:
try:
save_callback(refreshed)
saveCallback(refreshed)
except Exception as e:
logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}")
logger.warning(f"ensureFreshToken: Failed to persist refreshed token: {e}")
return refreshed
else:
logger.warning("ensure_fresh_token: Token refresh failed")
logger.warning("ensureFreshToken: Token refresh failed")
return None
# Token is sufficiently fresh
return token
except Exception as e:
logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}")
logger.error(f"ensureFreshToken: Error ensuring fresh token: {e}")
return None
# Convenience wrapper to fetch and ensure fresh token for a connection via interface layer
@ -256,10 +256,10 @@ class TokenManager:
token = interfaceDbApp.getConnectionToken(connectionId)
if not token:
return None
return self.ensure_fresh_token(
return self.ensureFreshToken(
token,
seconds_before_expiry=secondsBeforeExpiry,
save_callback=lambda t: interfaceDbApp.saveConnectionToken(t)
secondsBeforeExpiry=secondsBeforeExpiry,
saveCallback=lambda t: interfaceDbApp.saveConnectionToken(t)
)
except Exception as e:
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")

View file

@ -11,7 +11,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
from typing import Callable
import asyncio
from modules.security.tokenRefreshService import token_refresh_service
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
@ -135,7 +135,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
try:
# Perform proactive refresh in background
asyncio.create_task(self._proactive_refresh_tokens(user_id))
self.last_check[user_id] = get_utc_timestamp()
self.last_check[user_id] = getUtcTimestamp()
except Exception as e:
logger.warning(f"Error scheduling proactive refresh: {str(e)}")
@ -160,7 +160,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
Check if we should perform proactive refresh for this user
"""
try:
current_time = get_utc_timestamp()
current_time = getUtcTimestamp()
last_check = self.last_check.get(user_id, 0)
# Check every 5 minutes

View file

@ -9,7 +9,7 @@ to ensure users don't experience token expiration issues.
import logging
from typing import Dict, Any
from modules.datamodels.datamodelUam import UserConnection, AuthAuthority
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
from modules.shared.auditLogger import audit_logger
logger = logging.getLogger(__name__)
@ -24,7 +24,7 @@ class TokenRefreshService:
def _is_rate_limited(self, connection_id: str) -> bool:
"""Check if connection is rate limited for refresh attempts"""
now = get_utc_timestamp()
now = getUtcTimestamp()
if connection_id not in self.rate_limit_map:
return False
@ -39,7 +39,7 @@ class TokenRefreshService:
def _record_refresh_attempt(self, connection_id: str) -> None:
"""Record a refresh attempt for rate limiting"""
now = get_utc_timestamp()
now = getUtcTimestamp()
if connection_id not in self.rate_limit_map:
self.rate_limit_map[connection_id] = []
self.rate_limit_map[connection_id].append(now)
@ -60,14 +60,14 @@ class TokenRefreshService:
token_manager = TokenManager()
# Attempt to refresh the token
refreshed_token = token_manager.refresh_token(current_token)
if refreshed_token:
refreshedToken = token_manager.refreshToken(current_token)
if refreshedToken:
# Save the refreshed token
interface.saveConnectionToken(refreshed_token)
interface.saveConnectionToken(refreshedToken)
# Update connection status
interface.db.recordModify(UserConnection, connection.id, {
"lastChecked": get_utc_timestamp(),
"lastChecked": getUtcTimestamp(),
"expiresAt": refreshed_token.expiresAt
})
@ -75,9 +75,9 @@ class TokenRefreshService:
# Log audit event
try:
audit_logger.log_security_event(
user_id=str(connection.userId),
mandate_id="system",
audit_logger.logSecurityEvent(
userId=str(connection.userId),
mandateId="system",
action="token_refresh",
details=f"Google token refreshed for connection {connection.id}"
)
@ -109,14 +109,14 @@ class TokenRefreshService:
token_manager = TokenManager()
# Attempt to refresh the token
refreshed_token = token_manager.refresh_token(current_token)
if refreshed_token:
refreshedToken = token_manager.refreshToken(current_token)
if refreshedToken:
# Save the refreshed token
interface.saveConnectionToken(refreshed_token)
interface.saveConnectionToken(refreshedToken)
# Update connection status
interface.db.recordModify(UserConnection, connection.id, {
"lastChecked": get_utc_timestamp(),
"lastChecked": getUtcTimestamp(),
"expiresAt": refreshed_token.expiresAt
})
@ -124,9 +124,9 @@ class TokenRefreshService:
# Log audit event
try:
audit_logger.log_security_event(
user_id=str(connection.userId),
mandate_id="system",
audit_logger.logSecurityEvent(
userId=str(connection.userId),
mandateId="system",
action="token_refresh",
details=f"Microsoft token refreshed for connection {connection.id}"
)
@ -234,7 +234,7 @@ class TokenRefreshService:
refreshed_count = 0
failed_count = 0
rate_limited_count = 0
current_time = get_utc_timestamp()
current_time = getUtcTimestamp()
five_minutes = 5 * 60 # 5 minutes in seconds
# Process each connection

View file

@ -11,18 +11,18 @@ class PublicService:
- Optional name_filter predicate for allow-list patterns
"""
def __init__(self, target: Any, functions_only: bool = True, name_filter=None):
def __init__(self, target: Any, functionsOnly: bool = True, nameFilter=None):
self._target = target
self._functions_only = functions_only
self._name_filter = name_filter
self._functionsOnly = functionsOnly
self._nameFilter = nameFilter
def __getattr__(self, name: str):
if name.startswith('_'):
raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private")
if self._name_filter and not self._name_filter(name):
if self._nameFilter and not self._nameFilter(name):
raise AttributeError(f"'{name}' not exposed by policy")
attr = getattr(self._target, name)
if self._functions_only and not callable(attr):
if self._functionsOnly and not callable(attr):
raise AttributeError(f"'{name}' is not a function")
return attr
@ -30,8 +30,8 @@ class PublicService:
names = [
n for n in dir(self._target)
if not n.startswith('_')
and (not self._functions_only or callable(getattr(self._target, n, None)))
and (self._name_filter(n) if self._name_filter else True)
and (not self._functionsOnly or callable(getattr(self._target, n, None)))
and (self._nameFilter(n) if self._nameFilter else True)
]
return sorted(names)
@ -70,7 +70,7 @@ class Services:
self.sharepoint = PublicService(SharepointService(self))
from .serviceAi.mainServiceAi import AiService
self.ai = PublicService(AiService(self))
self.ai = PublicService(AiService(self), functionsOnly=False)
from .serviceTicket.mainServiceTicket import TicketService
self.ticket = PublicService(TicketService(self))

View file

@ -1,30 +1,26 @@
import json
import logging
from typing import Dict, Any, List, Optional, Union
import time
from typing import Dict, Any, List, Optional, Tuple, Union
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.interfaces.interfaceAiObjects import AiObjects
from modules.services.serviceAi.subCoreAi import SubCoreAi
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration
from modules.services.serviceAi.subSharedAiUtils import sanitizePromptContent
from modules.shared.jsonUtils import (
extractJsonString,
repairBrokenJson,
extractSectionsFromDocument,
buildContinuationContext
)
logger = logging.getLogger(__name__)
# Rebuild the model to resolve forward references
AiCallRequest.model_rebuild()
class AiService:
"""Lightweight AI service orchestrator that delegates to specialized sub-modules.
Manager delegates to specialized sub-modules:
- SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls)
- SubDocumentProcessing: Document chunking, processing, and merging logic
- SubDocumentGeneration: Single-file and multi-file document generation
The main service acts as a coordinator:
1. Manages lazy initialization of sub-modules
2. Delegates operations to appropriate sub-modules
3. Maintains the same public API for backward compatibility
"""
"""AI service with core operations integrated."""
def __init__(self, serviceCenter=None) -> None:
"""Initialize AI service with service center access.
@ -34,64 +30,638 @@ class AiService:
"""
self.services = serviceCenter
# Only depend on interfaces
self.aiObjects = None # Will be initialized in create()
self._extractionService = None # Lazy initialization
self._coreAi = None # Lazy initialization
self._documentProcessor = None # Lazy initialization
self._documentGenerator = None # Lazy initialization
self.aiObjects = None # Will be initialized in create() or _ensureAiObjectsInitialized()
# Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready
self.extractionService = None
self.documentProcessor = None
@property
def extractionService(self):
"""Lazy initialization of extraction service."""
if self._extractionService is None:
logger.info("Lazy initializing ExtractionService...")
self._extractionService = ExtractionService(self.services)
return self._extractionService
@property
def coreAi(self):
"""Lazy initialization of core AI service."""
if self._coreAi is None:
if self.aiObjects is None:
raise RuntimeError("AiService.aiObjects must be initialized before accessing coreAi. Use await AiService.create() or await service._ensureAiObjectsInitialized()")
logger.info("Lazy initializing SubCoreAi...")
self._coreAi = SubCoreAi(self.services, self.aiObjects)
return self._coreAi
@property
def documentProcessor(self):
"""Lazy initialization of document processing service."""
if self._documentProcessor is None:
logger.info("Lazy initializing SubDocumentProcessing...")
self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
return self._documentProcessor
@property
def documentGenerator(self):
"""Lazy initialization of document generation service."""
if self._documentGenerator is None:
logger.info("Lazy initializing SubDocumentGeneration...")
self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor)
return self._documentGenerator
def _initializeSubmodules(self):
"""Initialize all submodules after aiObjects is ready."""
if self.aiObjects is None:
raise RuntimeError("aiObjects must be initialized before initializing submodules")
if self.extractionService is None:
logger.info("Initializing ExtractionService...")
self.extractionService = ExtractionService(self.services)
if self.documentProcessor is None:
logger.info("Initializing SubDocumentProcessing...")
self.documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
async def _ensureAiObjectsInitialized(self):
"""Ensure aiObjects is initialized."""
"""Ensure aiObjects is initialized and submodules are ready."""
if self.aiObjects is None:
logger.info("Lazy initializing AiObjects...")
self.aiObjects = await AiObjects.create()
logger.info("AiObjects initialization completed")
# Initialize submodules after aiObjects is ready
self._initializeSubmodules()
@classmethod
async def create(cls, serviceCenter=None) -> "AiService":
"""Create AiService instance with all connectors initialized."""
"""Create AiService instance with all connectors and submodules initialized."""
logger.info("AiService.create() called")
instance = cls(serviceCenter)
logger.info("AiService created, about to call AiObjects.create()...")
instance.aiObjects = await AiObjects.create()
logger.info("AiObjects.create() completed")
# Initialize all submodules after aiObjects is ready
instance._initializeSubmodules()
logger.info("AiService submodules initialized")
return instance
# Helper methods
def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
"""
Build full prompt by replacing placeholders with their content.
Uses the new {{KEY:placeholder}} format.
Args:
prompt: The base prompt template
placeholders: Dictionary of placeholder key-value pairs
Returns:
Prompt with placeholders replaced
"""
if not placeholders:
return prompt
full_prompt = prompt
for placeholder, content in placeholders.items():
# Skip if content is None or empty
if content is None:
continue
# Replace {{KEY:placeholder}}
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
return full_prompt
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
try:
# Get dynamic enum values from Pydantic models
operationTypes = [e.value for e in OperationTypeEnum]
priorities = [e.value for e in PriorityEnum]
processingModes = [e.value for e in ProcessingModeEnum]
# Create analysis prompt for AI to determine operation type and parameters
analysisPrompt = f"""
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
PROMPT TO ANALYZE:
{self.services.utils.sanitizePromptContent(prompt, 'userinput')}
Based on the prompt content, determine:
1. operationType: Choose the most appropriate from: {', '.join(operationTypes)}
2. priority: Choose from: {', '.join(priorities)}
3. processingMode: Choose from: {', '.join(processingModes)}
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
5. compressContext: true/false (true to summarize context, false to process fully)
Respond with ONLY a JSON object in this exact format:
{{
"operationType": "dataAnalyse",
"priority": "balanced",
"processingMode": "basic",
"compressPrompt": true,
"compressContext": true
}}
"""
# Use AI to analyze the prompt
request = AiCallRequest(
prompt=analysisPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC,
compressPrompt=True,
compressContext=False
)
)
response = await self.aiObjects.call(request)
# Parse AI response
try:
jsonStart = response.content.find('{')
jsonEnd = response.content.rfind('}') + 1
if jsonStart != -1 and jsonEnd > jsonStart:
analysis = json.loads(response.content[jsonStart:jsonEnd])
# Map string values to enums
operationType = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
priority = PriorityEnum(analysis.get('priority', 'balanced'))
processingMode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
return AiCallOptions(
operationType=operationType,
priority=priority,
processingMode=processingMode,
compressPrompt=analysis.get('compressPrompt', True),
compressContext=analysis.get('compressContext', True)
)
except Exception as e:
logger.warning(f"Failed to parse AI analysis response: {e}")
except Exception as e:
logger.warning(f"Prompt analysis failed: {e}")
# Fallback to default options
return AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC
)
async def _callAiWithLooping(
self,
prompt: str,
options: AiCallOptions,
debugPrefix: str = "ai_call",
promptBuilder: Optional[callable] = None,
promptArgs: Optional[Dict[str, Any]] = None,
operationId: Optional[str] = None
) -> str:
"""
Shared core function for AI calls with repair-based looping system.
Automatically repairs broken JSON and continues generation seamlessly.
Args:
prompt: The prompt to send to AI
options: AI call configuration options
debugPrefix: Prefix for debug file names
promptBuilder: Optional function to rebuild prompts for continuation
promptArgs: Optional arguments for prompt builder
operationId: Optional operation ID for progress tracking
Returns:
Complete AI response after all iterations
"""
maxIterations = 50 # Prevent infinite loops
iteration = 0
allSections = [] # Accumulate all sections across iterations
lastRawResponse = None # Store last raw JSON response for continuation
while iteration < maxIterations:
iteration += 1
# Update progress for iteration start
if operationId:
if iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
else:
# For continuation iterations, show progress incrementally
baseProgress = 0.5 + (min(iteration - 1, maxIterations) / maxIterations * 0.4) # Progress from 0.5 to 0.9 over maxIterations iterations
self.services.workflow.progressLogUpdate(operationId, baseProgress, f"Continuing generation (iteration {iteration})")
# Build iteration prompt
if len(allSections) > 0 and promptBuilder and promptArgs:
# This is a continuation - build continuation context with raw JSON and rebuild prompt
continuationContext = buildContinuationContext(allSections, lastRawResponse)
if not lastRawResponse:
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
# Rebuild prompt with continuation context using the provided prompt builder
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
else:
# First iteration - use original prompt
iterationPrompt = prompt
# Make AI call
try:
if operationId and iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
request = AiCallRequest(
prompt=iterationPrompt,
context="",
options=options
)
# Write the ACTUAL prompt sent to AI
if iteration == 1:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
else:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
response = await self.aiObjects.call(request)
result = response.content
# Update progress after AI call
if operationId:
if iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
else:
progress = 0.6 + (min(iteration - 1, 10) * 0.03)
self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
# Write raw AI response to debug file
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
else:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
# Emit stats for this iteration
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.call.{debugPrefix}.iteration_{iteration}"
)
if not result or not result.strip():
logger.warning(f"Iteration {iteration}: Empty response, stopping")
break
# Store raw response for continuation (even if broken)
lastRawResponse = result
# Check for complete_response flag in raw response (before parsing)
import re
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
pass # Flag detected, will stop in _shouldContinueGeneration
# Extract sections from response (handles both valid and broken JSON)
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
# Update progress after parsing
if operationId:
if extractedSections:
self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
if not extractedSections:
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
if iteration > 1 and not wasJsonComplete:
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
continue
# Otherwise, stop if no sections
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
break
# Add new sections to accumulator
allSections.extend(extractedSections)
# Check if we should continue (completion detection)
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
continue
else:
# Done - build final result
if operationId:
self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
break
except Exception as e:
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
break
if iteration >= maxIterations:
logger.warning(f"AI call stopped after maximum iterations ({maxIterations})")
# Build final result from accumulated sections
final_result = self._buildFinalResultFromSections(allSections)
# Write final result to debug file
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
return final_result
def _extractSectionsFromResponse(
self,
result: str,
iteration: int,
debugPrefix: str
) -> Tuple[List[Dict[str, Any]], bool]:
"""
Extract sections from AI response, handling both valid and broken JSON.
Uses repair mechanism for broken JSON.
Checks for "complete_response": true flag to determine completion.
Returns (sections, wasJsonComplete)
"""
# First, try to parse as valid JSON
try:
extracted = extractJsonString(result)
parsed_result = json.loads(extracted)
# Check if AI marked response as complete
isComplete = parsed_result.get("complete_response", False) == True
# Extract sections from parsed JSON
sections = extractSectionsFromDocument(parsed_result)
# If AI marked as complete, always return as complete
if isComplete:
return sections, True
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
if len(sections) == 0 and iteration > 1:
return sections, False # Mark as incomplete so loop continues
# First iteration with 0 sections means empty response - stop
if len(sections) == 0:
return sections, True # Complete but empty
return sections, True # JSON was complete with sections
except json.JSONDecodeError as e:
# Broken JSON - try repair mechanism (normal in iterative generation)
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
# Try to repair
repaired_json = repairBrokenJson(result)
if repaired_json:
# Extract sections from repaired JSON
sections = extractSectionsFromDocument(repaired_json)
return sections, False # JSON was broken but repaired
else:
# Repair failed - log error
logger.error(f"Iteration {iteration}: All repair strategies failed")
return [], False
except Exception as e:
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
return [], False
def _shouldContinueGeneration(
self,
allSections: List[Dict[str, Any]],
iteration: int,
wasJsonComplete: bool,
rawResponse: str = None
) -> bool:
"""
Determine if generation should continue based on JSON completeness and complete_response flag.
Returns True if we should continue, False if done.
"""
if len(allSections) == 0:
return True # No sections yet, continue
# Check for complete_response flag in raw response
if rawResponse:
import re
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
return False
# If JSON was complete (and no complete_response flag), we're done
# If JSON was broken and repaired, continue to get more content
if wasJsonComplete:
return False
else:
return True
def _buildFinalResultFromSections(
self,
allSections: List[Dict[str, Any]]
) -> str:
"""
Build final JSON result from accumulated sections.
"""
if not allSections:
return ""
# Build documents structure
# Assuming single document for now
documents = [{
"id": "doc_1",
"title": "Generated Document", # This should come from prompt
"filename": "document.json",
"sections": allSections
}]
result = {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": documents
}
return json.dumps(result, indent=2)
# Public API Methods
# Planning AI Call
async def callAiPlanning(
self,
prompt: str,
placeholders: Optional[List[PromptPlaceholder]] = None
) -> str:
"""
Planning AI call for task planning, action planning, action selection, etc.
Always uses static parameters optimized for planning tasks.
Args:
prompt: The planning prompt
placeholders: Optional list of placeholder replacements
Returns:
Planning JSON response
"""
await self._ensureAiObjectsInitialized()
# Planning calls always use static parameters
options = AiCallOptions(
operationType=OperationTypeEnum.PLAN,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False
)
# Build full prompt with placeholders
if placeholders:
placeholdersDict = {p.label: p.content for p in placeholders}
fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict)
else:
fullPrompt = prompt
# Root-cause fix: planning must return raw single-shot JSON, not section-based output
request = AiCallRequest(
prompt=fullPrompt,
context="",
options=options
)
# Debug: persist prompt/response for analysis
self.services.utils.writeDebugFile(fullPrompt, "plan_prompt")
response = await self.aiObjects.call(request)
result = response.content or ""
self.services.utils.writeDebugFile(result, "plan_response")
return result
# Document Generation AI Call
async def callAiDocuments(
self,
prompt: str,
documents: Optional[List[ChatDocument]] = None,
options: Optional[AiCallOptions] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None
) -> Union[str, Dict[str, Any]]:
"""
Document generation AI call for all non-planning calls.
Uses the current unified path with extraction and generation.
Args:
prompt: The main prompt for the AI call
documents: Optional list of documents to process
options: AI call configuration options
outputFormat: Optional output format for document generation
title: Optional title for generated documents
Returns:
AI response as string, or dict with documents if outputFormat is specified
"""
await self._ensureAiObjectsInitialized()
# Create separate operationId for detailed progress tracking
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
# Start progress tracking for this operation
self.services.workflow.progressLogStart(
aiOperationId,
"AI call with documents",
"Document Generation",
f"Format: {outputFormat or 'text'}"
)
try:
# Ensure AI connectors are initialized before delegating to documentProcessor/generator
if hasattr(self.services, 'ai') and hasattr(self.services.ai, '_ensureAiObjectsInitialized'):
await self.services.ai._ensureAiObjectsInitialized()
if options is None or (hasattr(options, 'operationType') and options.operationType is None):
# Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
options = await self._analyzePromptAndCreateOptions(prompt)
# Route image-generation requests directly to image pipeline to avoid JSON loop
imgFormats = {"png", "jpg", "jpeg", "webp", "image", "base64"}
opType = getattr(options, "operationType", None)
fmt = (outputFormat or "").lower() if outputFormat else None
isImageRequest = (opType == OperationTypeEnum.IMAGE_GENERATE) or (fmt in imgFormats)
if isImageRequest:
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
imageResponse = await self.generateImage(prompt, options=options)
self.services.workflow.progressLogUpdate(aiOperationId, 0.9, "Image generated")
self.services.workflow.progressLogFinish(aiOperationId, True)
return imageResponse
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
# Compressing would truncate the template structure and confuse the AI
if outputFormat: # Document generation with structured output
if not options:
options = AiCallOptions()
options.compressPrompt = False # JSON templates must NOT be truncated
options.compressContext = False # Context also should not be compressed
# Handle document generation with specific output format using unified approach
if outputFormat:
# Use unified generation method for all document generation
if documents and len(documents) > 0:
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
extracted_content = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
else:
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
extracted_content = None
self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
# First call without continuation context
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
# Prepare prompt builder arguments for continuation
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": extracted_content
}
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId
)
self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
# Parse the generated JSON (extract fenced/embedded JSON first)
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
logger.error(f"JSON content length: {len(generated_json)}")
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
# Write the problematic JSON to debug file
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
# Render to final format using the existing renderer
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, title or "Generated Document", prompt, self
)
# Build result in the expected format
result = {
"success": True,
"content": generated_data,
"documents": [{
"documentName": f"generated.{outputFormat}",
"documentData": rendered_content,
"mimeType": mime_type,
"title": title or "Generated Document"
}],
"is_multi_file": False,
"format": outputFormat,
"title": title,
"split_strategy": "single",
"total_documents": 1,
"processed_documents": 1
}
# Log AI response for debugging
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
self.services.workflow.progressLogFinish(aiOperationId, True)
return result
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": f"Rendering failed: {str(e)}"}
# Handle text calls (no output format specified)
self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
if documents:
# Use document processing for text calls with documents
result = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
else:
# Use shared core function for direct text calls
result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
self.services.workflow.progressLogFinish(aiOperationId, True)
return result
except Exception as e:
logger.error(f"Error in callAiDocuments: {str(e)}")
self.services.workflow.progressLogFinish(aiOperationId, False)
raise
# AI Image Analysis
async def readImage(
self,
@ -102,7 +672,64 @@ class AiService:
) -> str:
"""Call AI for image analysis using interface.call() with contentParts."""
await self._ensureAiObjectsInitialized()
return await self.coreAi.readImage(prompt, imageData, mimeType, options)
try:
# Check if imageData is valid
if not imageData:
error_msg = "No image data provided"
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
# Always use IMAGE_ANALYSE operation type for image processing
if options is None:
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
else:
# Override the operation type to ensure image analysis
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
# Create content parts with image data
from modules.datamodels.datamodelExtraction import ContentPart
import base64
# ContentPart.data must be a string - convert bytes to base64 if needed
if isinstance(imageData, bytes):
imageDataStr = base64.b64encode(imageData).decode('utf-8')
else:
# Already a base64 string
imageDataStr = imageData
imagePart = ContentPart(
id="image_0",
parentId=None,
label="Image",
typeGroup="image",
mimeType=mimeType or "image/jpeg",
data=imageDataStr, # Must be a string (base64 encoded)
metadata={"imageAnalysis": True}
)
# Create request with content parts
request = AiCallRequest(
prompt=prompt,
context="",
options=options,
contentParts=[imagePart]
)
response = await self.aiObjects.call(request)
result = response.content
# Check if result is valid
if not result or (isinstance(result, str) and not result.strip()):
error_msg = f"No response from AI image analysis (result: {repr(result)})"
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
return result
except Exception as e:
logger.error(f"Error in AI image analysis: {str(e)}")
return f"Error: {str(e)}"
# AI Image Generation
async def generateImage(
@ -115,34 +742,19 @@ class AiService:
) -> Dict[str, Any]:
"""Generate an image using AI using interface.generateImage()."""
await self._ensureAiObjectsInitialized()
return await self.coreAi.generateImage(prompt, size, quality, style, options)
# Core AI Methods - Delegating to SubCoreAi
async def callAiPlanning(
self,
prompt: str,
placeholders: Optional[List[PromptPlaceholder]] = None
) -> str:
"""Planning AI call for task planning, action planning, action selection, etc."""
await self._ensureAiObjectsInitialized()
# Always use "json" for planning calls since they return JSON
return await self.coreAi.callAiPlanning(prompt, placeholders)
async def callAiDocuments(
self,
prompt: str,
documents: Optional[List[ChatDocument]] = None,
options: Optional[AiCallOptions] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None
) -> Union[str, Dict[str, Any]]:
"""Document generation AI call for all non-planning calls."""
await self._ensureAiObjectsInitialized()
return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title)
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
"""Sanitize prompt content to prevent injection attacks and ensure safe presentation."""
return sanitizePromptContent(content, contentType)
try:
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
# Emit stats for image generation
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.generate.image"
)
return response
except Exception as e:
logger.error(f"Error in AI image generation: {str(e)}")
return {"success": False, "error": str(e)}

View file

@ -1,687 +0,0 @@
import json
import logging
from typing import Dict, Any, List, Optional, Tuple, Union
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.services.serviceAi.subSharedAiUtils import (
buildPromptWithPlaceholders,
extractTextFromContentParts,
reduceText,
determineCallType
)
from modules.shared.jsonUtils import (
extractJsonString,
repairBrokenJson,
extractSectionsFromDocument,
buildContinuationContext
)
logger = logging.getLogger(__name__)
# Repair-based looping system - no longer needs LOOP_INSTRUCTION_TEXT
# Sections are accumulated and repair mechanism handles broken JSON automatically
# Rebuild the model to resolve forward references
AiCallRequest.model_rebuild()
class SubCoreAi:
"""Core AI operations including image analysis, text generation, and planning calls."""
def __init__(self, services, aiObjects):
"""Initialize core AI operations.
Args:
services: Service center instance for accessing other services
aiObjects: Initialized AiObjects instance
"""
self.services = services
self.aiObjects = aiObjects
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
try:
# Get dynamic enum values from Pydantic models
operation_types = [e.value for e in OperationTypeEnum]
priorities = [e.value for e in PriorityEnum]
processing_modes = [e.value for e in ProcessingModeEnum]
# Create analysis prompt for AI to determine operation type and parameters
analysisPrompt = f"""
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
PROMPT TO ANALYZE:
{self.services.ai.sanitizePromptContent(prompt, 'userinput')}
Based on the prompt content, determine:
1. operationType: Choose the most appropriate from: {', '.join(operation_types)}
2. priority: Choose from: {', '.join(priorities)}
3. processingMode: Choose from: {', '.join(processing_modes)}
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
5. compressContext: true/false (true to summarize context, false to process fully)
Respond with ONLY a JSON object in this exact format:
{{
"operationType": "dataAnalyse",
"priority": "balanced",
"processingMode": "basic",
"compressPrompt": true,
"compressContext": true
}}
"""
# Use AI to analyze the prompt
request = AiCallRequest(
prompt=analysisPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC,
compressPrompt=True,
compressContext=False
)
)
response = await self.aiObjects.call(request)
# Parse AI response
try:
import json
json_start = response.content.find('{')
json_end = response.content.rfind('}') + 1
if json_start != -1 and json_end > json_start:
analysis = json.loads(response.content[json_start:json_end])
# Map string values to enums
operation_type = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
priority = PriorityEnum(analysis.get('priority', 'balanced'))
processing_mode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
return AiCallOptions(
operationType=operation_type,
priority=priority,
processingMode=processing_mode,
compressPrompt=analysis.get('compressPrompt', True),
compressContext=analysis.get('compressContext', True)
)
except Exception as e:
logger.warning(f"Failed to parse AI analysis response: {e}")
except Exception as e:
logger.warning(f"Prompt analysis failed: {e}")
# Fallback to default options
return AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC
)
# Shared Core Function for AI Calls with Looping and Repair
async def _callAiWithLooping(
self,
prompt: str,
options: AiCallOptions,
debugPrefix: str = "ai_call",
promptBuilder: Optional[callable] = None,
promptArgs: Optional[Dict[str, Any]] = None,
operationId: Optional[str] = None
) -> str:
"""
Shared core function for AI calls with repair-based looping system.
Automatically repairs broken JSON and continues generation seamlessly.
Args:
prompt: The prompt to send to AI
options: AI call configuration options
debugPrefix: Prefix for debug file names
promptBuilder: Optional function to rebuild prompts for continuation
promptArgs: Optional arguments for prompt builder
operationId: Optional operation ID for progress tracking
Returns:
Complete AI response after all iterations
"""
max_iterations = 50 # Prevent infinite loops
iteration = 0
allSections = [] # Accumulate all sections across iterations
lastRawResponse = None # Store last raw JSON response for continuation
while iteration < max_iterations:
iteration += 1
# Update progress for iteration start
if operationId:
if iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
else:
# For continuation iterations, show progress incrementally
base_progress = 0.5 + (min(iteration - 1, max_iterations) / max_iterations * 0.4) # Progress from 0.5 to 0.9 over max_iterations iterations
self.services.workflow.progressLogUpdate(operationId, base_progress, f"Continuing generation (iteration {iteration})")
# Build iteration prompt
if len(allSections) > 0 and promptBuilder and promptArgs:
# This is a continuation - build continuation context with raw JSON and rebuild prompt
continuationContext = buildContinuationContext(allSections, lastRawResponse)
if not lastRawResponse:
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
# Rebuild prompt with continuation context using the provided prompt builder
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
else:
# First iteration - use original prompt
iterationPrompt = prompt
# Make AI call
try:
if operationId and iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
from modules.datamodels.datamodelAi import AiCallRequest
request = AiCallRequest(
prompt=iterationPrompt,
context="",
options=options
)
# Write the ACTUAL prompt sent to AI
if iteration == 1:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
else:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
response = await self.aiObjects.call(request)
result = response.content
# Update progress after AI call
if operationId:
if iteration == 1:
self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
else:
progress = 0.6 + (min(iteration - 1, 10) * 0.03)
self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
# Write raw AI response to debug file
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
else:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
# Emit stats for this iteration
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.call.{debugPrefix}.iteration_{iteration}"
)
if not result or not result.strip():
logger.warning(f"Iteration {iteration}: Empty response, stopping")
break
# Store raw response for continuation (even if broken)
lastRawResponse = result
# Check for complete_response flag in raw response (before parsing)
import re
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
pass # Flag detected, will stop in _shouldContinueGeneration
# Extract sections from response (handles both valid and broken JSON)
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
# Update progress after parsing
if operationId:
if extractedSections:
self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
if not extractedSections:
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
if iteration > 1 and not wasJsonComplete:
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
continue
# Otherwise, stop if no sections
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
break
# Add new sections to accumulator
allSections.extend(extractedSections)
# Check if we should continue (completion detection)
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
continue
else:
# Done - build final result
if operationId:
self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
break
except Exception as e:
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
break
if iteration >= max_iterations:
logger.warning(f"AI call stopped after maximum iterations ({max_iterations})")
# Build final result from accumulated sections
final_result = self._buildFinalResultFromSections(allSections)
# Write final result to debug file
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
return final_result
def _extractSectionsFromResponse(
self,
result: str,
iteration: int,
debugPrefix: str
) -> Tuple[List[Dict[str, Any]], bool]:
"""
Extract sections from AI response, handling both valid and broken JSON.
Uses repair mechanism for broken JSON.
Checks for "complete_response": true flag to determine completion.
Returns (sections, wasJsonComplete)
"""
# First, try to parse as valid JSON
try:
extracted = extractJsonString(result)
parsed_result = json.loads(extracted)
# Check if AI marked response as complete
isComplete = parsed_result.get("complete_response", False) == True
# Extract sections from parsed JSON
sections = extractSectionsFromDocument(parsed_result)
# If AI marked as complete, always return as complete
if isComplete:
return sections, True
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
if len(sections) == 0 and iteration > 1:
return sections, False # Mark as incomplete so loop continues
# First iteration with 0 sections means empty response - stop
if len(sections) == 0:
return sections, True # Complete but empty
return sections, True # JSON was complete with sections
except json.JSONDecodeError as e:
# Broken JSON - try repair mechanism (normal in iterative generation)
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
# Try to repair
repaired_json = repairBrokenJson(result)
if repaired_json:
# Extract sections from repaired JSON
sections = extractSectionsFromDocument(repaired_json)
return sections, False # JSON was broken but repaired
else:
# Repair failed - log error
logger.error(f"Iteration {iteration}: All repair strategies failed")
return [], False
except Exception as e:
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
return [], False
def _shouldContinueGeneration(
self,
allSections: List[Dict[str, Any]],
iteration: int,
wasJsonComplete: bool,
rawResponse: str = None
) -> bool:
"""
Determine if generation should continue based on JSON completeness and complete_response flag.
Returns True if we should continue, False if done.
"""
if len(allSections) == 0:
return True # No sections yet, continue
# Check for complete_response flag in raw response
if rawResponse:
import re
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
return False
# If JSON was complete (and no complete_response flag), we're done
# If JSON was broken and repaired, continue to get more content
if wasJsonComplete:
return False
else:
return True
def _buildFinalResultFromSections(
self,
allSections: List[Dict[str, Any]]
) -> str:
"""
Build final JSON result from accumulated sections.
"""
if not allSections:
return ""
# Build documents structure
# Assuming single document for now
documents = [{
"id": "doc_1",
"title": "Generated Document", # This should come from prompt
"filename": "document.json",
"sections": allSections
}]
result = {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": documents
}
return json.dumps(result, indent=2)
# Old _buildContinuationPrompt and _mergeJsonContent methods removed
# Now handled by repair mechanism in jsonUtils.py and section accumulation
# Planning AI Call
async def callAiPlanning(
self,
prompt: str,
placeholders: Optional[List[PromptPlaceholder]] = None
) -> str:
"""
Planning AI call for task planning, action planning, action selection, etc.
Always uses static parameters optimized for planning tasks.
Args:
prompt: The planning prompt
placeholders: Optional list of placeholder replacements
Returns:
Planning JSON response
"""
# Planning calls always use static parameters
options = AiCallOptions(
operationType=OperationTypeEnum.PLAN,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False
)
# Build full prompt with placeholders
if placeholders:
placeholders_dict = {p.label: p.content for p in placeholders}
full_prompt = buildPromptWithPlaceholders(prompt, placeholders_dict)
else:
full_prompt = prompt
# Use shared core function with planning-specific debug prefix
return await self._callAiWithLooping(full_prompt, options, "plan")
# Document Generation AI Call
async def callAiDocuments(
self,
prompt: str,
documents: Optional[List[ChatDocument]] = None,
options: Optional[AiCallOptions] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None
) -> Union[str, Dict[str, Any]]:
"""
Document generation AI call for all non-planning calls.
Uses the current unified path with extraction and generation.
Args:
prompt: The main prompt for the AI call
documents: Optional list of documents to process
options: AI call configuration options
outputFormat: Optional output format for document generation
title: Optional title for generated documents
Returns:
AI response as string, or dict with documents if outputFormat is specified
"""
# Create separate operationId for detailed progress tracking
import time
import uuid
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
# Start progress tracking for this operation
self.services.workflow.progressLogStart(
aiOperationId,
"AI call with documents",
"Document Generation",
f"Format: {outputFormat or 'text'}"
)
try:
if options is None or (hasattr(options, 'operationType') and options.operationType is None):
# Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
options = await self._analyzePromptAndCreateOptions(prompt)
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
# Compressing would truncate the template structure and confuse the AI
if outputFormat: # Document generation with structured output
if not options:
options = AiCallOptions()
options.compressPrompt = False # JSON templates must NOT be truncated
options.compressContext = False # Context also should not be compressed
# Handle document generation with specific output format using unified approach
if outputFormat:
# Use unified generation method for all document generation
if documents and len(documents) > 0:
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
extracted_content = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
else:
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
extracted_content = None
self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
# First call without continuation context
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
# Prepare prompt builder arguments for continuation
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": extracted_content
}
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId
)
self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
# Parse the generated JSON (extract fenced/embedded JSON first)
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
logger.error(f"JSON content length: {len(generated_json)}")
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
# Write the problematic JSON to debug file
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
# Render to final format using the existing renderer
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, title or "Generated Document", prompt, self
)
# Build result in the expected format
result = {
"success": True,
"content": generated_data,
"documents": [{
"documentName": f"generated.{outputFormat}",
"documentData": rendered_content,
"mimeType": mime_type,
"title": title or "Generated Document"
}],
"is_multi_file": False,
"format": outputFormat,
"title": title,
"split_strategy": "single",
"total_documents": 1,
"processed_documents": 1
}
# Log AI response for debugging
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
self.services.workflow.progressLogFinish(aiOperationId, True)
return result
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": f"Rendering failed: {str(e)}"}
# Handle text calls (no output format specified)
self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
if documents:
# Use document processing for text calls with documents
result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
else:
# Use shared core function for direct text calls
result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
self.services.workflow.progressLogFinish(aiOperationId, True)
return result
except Exception as e:
logger.error(f"Error in callAiDocuments: {str(e)}")
self.services.workflow.progressLogFinish(aiOperationId, False)
raise
# AI Image Analysis
async def readImage(
self,
prompt: str,
imageData: Union[str, bytes],
mimeType: str = None,
options: Optional[AiCallOptions] = None,
) -> str:
"""Call AI for image analysis using interface.call() with contentParts."""
try:
# Check if imageData is valid
if not imageData:
error_msg = "No image data provided"
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
# Always use IMAGE_ANALYSE operation type for image processing
if options is None:
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
else:
# Override the operation type to ensure image analysis
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
# Create content parts with image data
from modules.datamodels.datamodelExtraction import ContentPart
import base64
# ContentPart.data must be a string - convert bytes to base64 if needed
if isinstance(imageData, bytes):
imageDataStr = base64.b64encode(imageData).decode('utf-8')
else:
# Already a base64 string
imageDataStr = imageData
imagePart = ContentPart(
id="image_0",
parentId=None,
label="Image",
typeGroup="image",
mimeType=mimeType or "image/jpeg",
data=imageDataStr, # Must be a string (base64 encoded)
metadata={"imageAnalysis": True}
)
# Create request with content parts
from modules.datamodels.datamodelAi import AiCallRequest
request = AiCallRequest(
prompt=prompt,
context="",
options=options,
contentParts=[imagePart]
)
response = await self.aiObjects.call(request)
result = response.content
# Check if result is valid
if not result or (isinstance(result, str) and not result.strip()):
error_msg = f"No response from AI image analysis (result: {repr(result)})"
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
return result
except Exception as e:
logger.error(f"Error in AI image analysis: {str(e)}")
return f"Error: {str(e)}"
# AI Image Generation
async def generateImage(
self,
prompt: str,
size: str = "1024x1024",
quality: str = "standard",
style: str = "vivid",
options: Optional[AiCallOptions] = None,
) -> Dict[str, Any]:
"""Generate an image using AI using interface.generateImage()."""
try:
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
# Emit stats for image generation
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.generate.image"
)
# Convert response to dict format for backward compatibility
if hasattr(response, 'content'):
return {
"success": True,
"content": response.content,
"modelName": response.modelName,
"priceUsd": response.priceUsd,
"processingTime": response.processingTime
}
else:
return response
except Exception as e:
logger.error(f"Error in AI image generation: {str(e)}")
return {"success": False, "error": str(e)}

View file

@ -1,500 +0,0 @@
import re
import json
import logging
import time
from datetime import datetime, UTC
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallOptions
logger = logging.getLogger(__name__)
class SubDocumentGeneration:
"""Document generation operations including single-file and multi-file generation."""
def __init__(self, services, aiObjects, documentProcessor):
"""Initialize document generation service.
Args:
services: Service center instance for accessing other services
aiObjects: Initialized AiObjects instance
documentProcessor: Document processing service instance
"""
self.services = services
self.aiObjects = aiObjects
self.documentProcessor = documentProcessor
async def callAiWithDocumentGeneration(
self,
prompt: str,
documents: Optional[List[ChatDocument]],
options: AiCallOptions,
outputFormat: str,
title: Optional[str]
) -> Dict[str, Any]:
"""
Unified document generation method that handles both single and multi-file cases.
Always uses multi-file approach internally.
Args:
prompt: The main prompt for the AI call
documents: Optional list of documents to process
options: AI call configuration options
outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx)
title: Optional title for generated documents
Returns:
Dict with generated documents and metadata in unified structure
"""
try:
# 1. Get unified extraction prompt
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
extractionPrompt = await generationService.getAdaptiveExtractionPrompt(
outputFormat=outputFormat,
userPrompt=prompt,
title=title,
aiService=self
)
# 2. Process with unified pipeline (always multi-file approach)
aiResponse = await self._processDocumentsUnified(
documents, extractionPrompt, options
)
# 3. Return unified result structure
return await self._buildUnifiedResult(aiResponse, outputFormat, title)
except Exception as e:
logger.error(f"Error in unified document generation: {str(e)}")
return self._buildErrorResult(str(e), outputFormat, title)
async def _processDocumentsUnified(
self,
documents: Optional[List[ChatDocument]],
extractionPrompt: str,
options: AiCallOptions
) -> Dict[str, Any]:
"""
Unified document processing that handles both single and multi-file cases.
Always processes as multi-file structure internally.
"""
# Init progress logger
workflow = self.services.currentWorkflow
operationId = f"docGenUnified_{workflow.id}_{int(time.time())}"
try:
# Start progress tracking
self.services.workflow.progressLogStart(
operationId,
"Generate",
"Unified Document Generation",
f"Processing {len(documents) if documents else 0} documents"
)
# Update progress - generating extraction prompt
self.services.workflow.progressLogUpdate(operationId, 0.1, "Generating prompt")
# Write prompt to debug file
self.services.utils.writeDebugFile(extractionPrompt, "extraction_prompt", documents)
# Process with unified JSON pipeline using continuation logic
aiResponse = await self.documentProcessor.processDocumentsWithContinuation(
documents, extractionPrompt, options
)
# Update progress - AI processing completed
self.services.workflow.progressLogUpdate(operationId, 0.6, "Processing done")
# Write AI response to debug file
response_json = json.dumps(aiResponse, indent=2, ensure_ascii=False) if isinstance(aiResponse, dict) else str(aiResponse)
self.services.utils.writeDebugFile(response_json, "ai_response", documents)
# Validate response structure
if not self._validateUnifiedResponseStructure(aiResponse):
raise Exception("AI response is not valid unified document structure")
# Emit raw extracted data as a chat message attachment
try:
await self._postRawDataChatMessage(aiResponse, label="raw_extraction_unified")
except Exception:
logger.warning("Failed to emit raw extraction chat message (unified)")
# Complete progress tracking
self.services.workflow.progressLogFinish(operationId, True)
return aiResponse
except Exception as e:
logger.error(f"Error in unified document processing: {str(e)}")
self.services.workflow.progressLogFinish(operationId, False)
raise
def _validateUnifiedResponseStructure(self, response: Dict[str, Any]) -> bool:
"""
Unified validation that checks for document structure.
Handles both multi-file (documents array) and single-file (sections array) structures.
"""
try:
if not isinstance(response, dict):
logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
return False
# Check for documents array (multi-file structure)
hasDocuments = "documents" in response
isDocumentsList = isinstance(response.get("documents"), list)
# Check for sections array (single-file structure)
hasSections = "sections" in response
isSectionsList = isinstance(response.get("sections"), list)
if hasDocuments and isDocumentsList:
# Multi-file structure
documents = response.get("documents", [])
if not documents:
logger.warning("Unified validation failed: documents array is empty")
return False
# Validate each document individually
validDocuments = 0
for i, doc in enumerate(documents):
if self._validateDocumentStructure(doc, i):
validDocuments += 1
else:
logger.warning(f"Document {i} failed validation, but continuing with others")
# Process succeeds if at least one document is valid
if validDocuments == 0:
logger.error("Unified validation failed: no valid documents found")
return False
logger.info(f"Unified validation passed: {validDocuments}/{len(documents)} documents valid")
return True
elif hasSections and isSectionsList:
# Single-file structure - convert to multi-file format
logger.info("Converting single-file structure to multi-file format")
sections = response.get("sections", [])
if not sections:
logger.warning("Unified validation failed: sections array is empty")
return False
# Convert to documents array format
response["documents"] = [{
"id": "document_1",
"title": response.get("metadata", {}).get("title", "Generated Document"),
"filename": "document_1",
"sections": sections
}]
logger.info("Successfully converted single-file structure to multi-file format")
return True
else:
# No valid structure found - fail with clear error details
logger.error("Unified validation failed: No valid structure found")
logger.error(f"Response type: {type(response)}")
logger.error(f"Available keys: {list(response.keys()) if isinstance(response, dict) else 'Not a dict'}")
logger.error(f"hasDocuments={hasDocuments}, isDocumentsList={isDocumentsList}")
logger.error(f"hasSections={hasSections}, isSectionsList={isSectionsList}")
logger.error(f"Full response: {response}")
return False
except Exception as e:
logger.warning(f"Unified response validation failed with exception: {str(e)}")
return False
def _validateDocumentStructure(self, document: Dict[str, Any], documentIndex: int) -> bool:
"""
Validate individual document structure.
Returns True if document is valid, False otherwise.
Does not fail the entire process if one document is invalid.
"""
try:
if not isinstance(document, dict):
logger.error(f"Document {documentIndex} validation failed: not a dict, got {type(document)}")
logger.error(f"Document {documentIndex} content: {document}")
return False
# Check for required fields
hasTitle = "title" in document
hasSections = "sections" in document
isSectionsList = isinstance(document.get("sections"), list)
logger.debug(f"Document {documentIndex} structure check:")
logger.debug(f" - hasTitle: {hasTitle}")
logger.debug(f" - hasSections: {hasSections}")
logger.debug(f" - isSectionsList: {isSectionsList}")
logger.debug(f" - available keys: {list(document.keys())}")
if not (hasTitle and hasSections and isSectionsList):
logger.error(f"Document {documentIndex} validation failed:")
logger.error(f" - title present: {hasTitle}")
logger.error(f" - sections present: {hasSections}")
logger.error(f" - sections is list: {isSectionsList}")
logger.error(f" - document content: {document}")
return False
sections = document.get("sections", [])
if not sections:
logger.error(f"Document {documentIndex} validation failed: sections array is empty")
logger.error(f" - document content: {document}")
return False
logger.info(f"Document {documentIndex} validation passed")
return True
except Exception as e:
logger.error(f"Document {documentIndex} validation failed with exception: {str(e)}")
logger.error(f" - document content: {document}")
return False
async def _buildUnifiedResult(
self,
aiResponse: Dict[str, Any],
outputFormat: str,
title: str
) -> Dict[str, Any]:
"""
Build unified result structure that always returns array-based format.
Content is always a multi-document structure.
"""
try:
# Process all documents uniformly
generatedDocuments = []
documents = aiResponse.get("documents", [])
for i, docData in enumerate(documents):
try:
processedDocument = await self._processDocument(
docData, outputFormat, title, i
)
generatedDocuments.append(processedDocument)
except Exception as e:
logger.warning(f"Failed to process document {i}: {str(e)}, skipping")
continue
if not generatedDocuments:
raise Exception("No documents could be processed successfully")
# Build unified result
result = {
"success": True,
"content": aiResponse, # Always multi-document structure
"documents": generatedDocuments, # Always array
"is_multi_file": len(generatedDocuments) > 1,
"format": outputFormat,
"title": title,
"total_documents": len(generatedDocuments),
"processed_documents": len(generatedDocuments)
}
return result
except Exception as e:
logger.error(f"Error building unified result: {str(e)}")
return self._buildErrorResult(str(e), outputFormat, title)
async def _processDocument(
self,
docData: Dict[str, Any],
outputFormat: str,
title: str,
documentIndex: int
) -> Dict[str, Any]:
"""
Process individual document with content enhancement and rendering.
"""
try:
# Get generation service
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
# Use AI generation to enhance the extracted JSON before rendering
enhancedContent = docData # Default to original
if docData.get("sections"):
try:
# Get generation prompt directly
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
generationPrompt = await buildGenerationPrompt(
outputFormat=outputFormat,
userPrompt=title,
title=docData.get("title", title)
)
# Prepare the AI call
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
requestOptions = AiCallOptions()
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
# Create context with the extracted JSON content
context = f"Extracted JSON content:\n{json.dumps(docData, indent=2)}"
request = AiCallRequest(
prompt=generationPrompt,
context=context,
options=requestOptions
)
# Write document generation prompt to debug file
self.services.utils.writeDebugFile(generationPrompt, "document_generation_enhancement_prompt")
# Call AI to enhance the content
response = await self.aiObjects.call(request)
# Write document generation response to debug file
self.services.utils.writeDebugFile(response.content or '', "document_generation_enhancement_response")
if response and response.content:
# Parse the AI response as JSON
try:
result = response.content.strip()
# Extract JSON from markdown if present
jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if jsonMatch:
result = jsonMatch.group(1).strip()
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
elif result.startswith('```'):
result = re.sub(r'^```\s*', '', result)
result = re.sub(r'\s*```$', '', result)
# Try to parse JSON
enhancedContent = json.loads(result)
logger.info(f"AI enhanced JSON content successfully for document {documentIndex}")
except json.JSONDecodeError as e:
logger.warning(f"AI generation returned invalid JSON for document {documentIndex}: {str(e)}, using original content")
enhancedContent = docData
else:
logger.warning(f"AI generation returned empty response for document {documentIndex}, using original content")
enhancedContent = docData
except Exception as e:
logger.warning(f"AI generation failed for document {documentIndex}: {str(e)}, using original content")
enhancedContent = docData
# Render the enhanced JSON content
renderedContent, mimeType = await generationService.renderReport(
extractedContent=enhancedContent,
outputFormat=outputFormat,
title=docData.get("title", title),
userPrompt=title,
aiService=self
)
# Generate proper filename
baseFilename = docData.get("filename", f"document_{documentIndex + 1}")
if '.' in baseFilename:
baseFilename = baseFilename.rsplit('.', 1)[0]
# Add proper extension based on output format
if outputFormat.lower() == "docx":
filename = f"{baseFilename}.docx"
elif outputFormat.lower() == "pdf":
filename = f"{baseFilename}.pdf"
elif outputFormat.lower() == "html":
filename = f"{baseFilename}.html"
else:
filename = f"{baseFilename}.{outputFormat}"
return {
"documentName": filename,
"documentData": renderedContent,
"mimeType": mimeType,
"title": docData.get("title", title),
"documentIndex": documentIndex
}
except Exception as e:
logger.error(f"Error processing document {documentIndex}: {str(e)}")
raise
def _buildErrorResult(self, errorMessage: str, outputFormat: str, title: str) -> Dict[str, Any]:
"""
Build error result with unified structure.
"""
return {
"success": False,
"error": errorMessage,
"content": {},
"documents": [],
"is_multi_file": False,
"format": outputFormat,
"title": title,
"split_strategy": "error",
"total_documents": 0,
"processed_documents": 0
}
async def _callAiJson(
self,
prompt: str,
documents: Optional[List[ChatDocument]],
options: AiCallOptions
) -> Dict[str, Any]:
"""
Handle AI calls with document processing for JSON output.
Returns structured JSON document instead of text.
"""
# Process documents with JSON merging
return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
"""
Create a ChatMessage with the extracted raw JSON attached as a file so the user
has access to the data even if downstream processing fails.
"""
try:
services = self.services
workflow = services.currentWorkflow
# Serialize payload
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
content_text = json.dumps(payload, ensure_ascii=False, indent=2)
content_bytes = content_text.encode('utf-8')
# Store as file via component storage
file_name = f"{label}_{ts}.json"
file_item = services.interfaceDbComponent.createFile(
name=file_name,
mimeType="application/json",
content=content_bytes
)
services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
# Lookup file info for ChatDocument
file_info = services.workflow.getFileInfo(file_item.id)
doc = ChatDocument(
messageId="", # set after message creation
fileId=file_item.id,
fileName=file_info.get("fileName", file_name) if file_info else file_name,
fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
mimeType=file_info.get("mimeType", "application/json") if file_info else "application/json"
)
# Create message referencing the file - include document in initial call
messageData = {
"workflowId": workflow.id,
"role": "assistant",
"message": "Raw extraction data saved",
"status": "data",
"sequenceNr": len(getattr(workflow, 'messages', []) or []) + 1,
"publishedAt": services.utils.timestampGetUtc(),
"documentsLabel": label,
"documents": []
}
# Store message with document included from the start
services.workflow.storeMessageWithDocuments(services.workflow.workflow, messageData, [doc])
except Exception:
# Non-fatal; ignore if storage or chat creation fails
return

File diff suppressed because it is too large Load diff

View file

@ -1,165 +0,0 @@
"""
Shared utilities for AI services to eliminate code duplication.
This module contains common functions used across multiple AI service modules
to maintain DRY principles and ensure consistency.
"""
import re
import logging
from typing import Dict, Any, List, Optional, Union
logger = logging.getLogger(__name__)
def buildPromptWithPlaceholders(prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
"""
Build full prompt by replacing placeholders with their content.
Uses the new {{KEY:placeholder}} format.
Args:
prompt: The base prompt template
placeholders: Dictionary of placeholder key-value pairs
Returns:
Prompt with placeholders replaced
"""
if not placeholders:
return prompt
full_prompt = prompt
for placeholder, content in placeholders.items():
# Skip if content is None or empty
if content is None:
continue
# Replace {{KEY:placeholder}}
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
return full_prompt
def sanitizePromptContent(content: str, contentType: str = "text") -> str:
"""
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
This is the single source of truth for all prompt sanitization across the system.
Replaces all scattered sanitization functions with a unified approach.
Args:
content: The content to sanitize
contentType: Type of content ("text", "userinput", "json", "document")
Returns:
Safely sanitized content ready for AI prompt insertion
"""
if not content:
return ""
try:
# Convert to string if not already
content_str = str(content)
# Remove null bytes and control characters (except newlines and tabs)
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
# Handle different content types with appropriate sanitization
if contentType == "userinput":
# Extra security for user-controlled content
# Escape curly braces to prevent placeholder injection
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
# Escape quotes and wrap in single quotes
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
return f"'{sanitized}'"
elif contentType == "json":
# For JSON content, escape quotes and backslashes
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
elif contentType == "document":
# For document content, escape special characters
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace("'", "\\'")
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
else: # contentType == "text" or default
# Basic text sanitization
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace("'", "\\'")
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
return sanitized
except Exception as e:
logger.error(f"Error sanitizing prompt content: {str(e)}")
# Return a safe fallback
return "[ERROR: Content could not be safely sanitized]"
def extractTextFromContentParts(extracted_content) -> str:
"""
Extract text content from ExtractionService ContentPart objects.
Args:
extracted_content: ContentExtracted object with parts
Returns:
Concatenated text content from all text/table/structure parts
"""
if not extracted_content or not hasattr(extracted_content, 'parts'):
return ""
text_parts = []
for part in extracted_content.parts:
if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
if hasattr(part, 'data') and part.data:
text_parts.append(part.data)
return "\n\n".join(text_parts)
def reduceText(text: str, reduction_factor: float) -> str:
"""
Reduce text size by the specified factor.
Args:
text: Text to reduce
reduction_factor: Factor by which to reduce (0.0 to 1.0)
Returns:
Reduced text with truncation indicator
"""
if reduction_factor >= 1.0:
return text
target_length = int(len(text) * reduction_factor)
return text[:target_length] + "... [reduced]"
def determineCallType(documents: Optional[List], operation_type: str) -> str:
"""
Determine call type based on documents and operation type.
Args:
documents: List of ChatDocument objects
operation_type: Type of operation being performed
Returns:
Call type: "plan" or "text"
"""
has_documents = documents is not None and len(documents) > 0
is_planning_operation = operation_type == "plan"
if not has_documents and is_planning_operation:
return "plan"
else:
return "text"

View file

@ -19,6 +19,16 @@ class ExtractionService:
self.services = services
self._extractorRegistry = ExtractorRegistry()
self._chunkerRegistry = ChunkerRegistry()
# Ensure AI connectors are discovered so pricing models are available
try:
# If internal model is missing, trigger discovery and registration
if modelRegistry.getModel("internal-extractor") is None:
discovered = modelRegistry.discoverConnectors()
for connector in discovered:
modelRegistry.registerConnector(connector)
except Exception:
# Propagate actual errors during use; init should be fast and side-effect free otherwise
pass
def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]:
"""
@ -82,12 +92,12 @@ class ExtractionService:
p.metadata["documentMimeType"] = documentData["mimeType"]
# Log chunking information
chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)]
if chunked_parts:
chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
if chunkedParts:
logger.debug(f"=== CHUNKING RESULTS ===")
logger.debug(f"Total parts: {len(ec.parts)}")
logger.debug(f"Chunked parts: {len(chunked_parts)}")
for chunk in chunked_parts:
logger.debug(f"Chunked parts: {len(chunkedParts)}")
for chunk in chunkedParts:
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
else:
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
@ -101,8 +111,11 @@ class ExtractionService:
# Emit stats for extraction operation
# Use internal extraction model for pricing
modelName = "internal_extraction"
modelName = "internal-extractor"
model = modelRegistry.getModel(modelName)
# Hard fail if model is missing; caller must ensure connectors are registered
if model is None or model.calculatePriceUsd is None:
raise RuntimeError(f"Pricing model not available: {modelName}")
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
# Create AiCallResponse with real calculation

View file

@ -20,13 +20,13 @@ class IntelligentTokenAwareMerger:
4. Minimize total number of AI calls
"""
def __init__(self, model_capabilities: Dict[str, Any]):
self.max_tokens = model_capabilities.get("maxTokens", 4000)
self.safety_margin = model_capabilities.get("safetyMargin", 0.1)
self.effective_max_tokens = int(self.max_tokens * (1 - self.safety_margin))
self.chars_per_token = model_capabilities.get("charsPerToken", 4) # Rough estimation
def __init__(self, modelCapabilities: Dict[str, Any]):
self.maxTokens = modelCapabilities.get("maxTokens", 4000)
self.safetyMargin = modelCapabilities.get("safetyMargin", 0.1)
self.effectiveMaxTokens = int(self.maxTokens * (1 - self.safetyMargin))
self.charsPerToken = modelCapabilities.get("charsPerToken", 4) # Rough estimation
def merge_chunks_intelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
def mergeChunksIntelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
"""
Merge chunks intelligently based on token limits.
@ -40,125 +40,125 @@ class IntelligentTokenAwareMerger:
if not chunks:
return chunks
logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, max_tokens={self.effective_max_tokens}")
logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, maxTokens={self.effectiveMaxTokens}")
# Calculate tokens for prompt
prompt_tokens = self._estimate_tokens(prompt)
available_tokens = self.effective_max_tokens - prompt_tokens
promptTokens = self._estimateTokens(prompt)
availableTokens = self.effectiveMaxTokens - promptTokens
logger.info(f"📊 Prompt tokens: {prompt_tokens}, Available for content: {available_tokens}")
logger.info(f"📊 Prompt tokens: {promptTokens}, Available for content: {availableTokens}")
# Group chunks by document and type for semantic coherence
grouped_chunks = self._group_chunks_by_document_and_type(chunks)
groupedChunks = self._groupChunksByDocumentAndType(chunks)
merged_parts = []
mergedParts = []
for group_key, group_chunks in grouped_chunks.items():
logger.info(f"📁 Processing group: {group_key} ({len(group_chunks)} chunks)")
for groupKey, groupChunks in groupedChunks.items():
logger.info(f"📁 Processing group: {groupKey} ({len(groupChunks)} chunks)")
# Merge chunks within this group optimally
group_merged = self._merge_group_optimally(group_chunks, available_tokens)
merged_parts.extend(group_merged)
groupMerged = self._mergeGroupOptimally(groupChunks, availableTokens)
mergedParts.extend(groupMerged)
logger.info(f"✅ Intelligent merging complete: {len(chunks)}{len(merged_parts)} parts")
return merged_parts
logger.info(f"✅ Intelligent merging complete: {len(chunks)}{len(mergedParts)} parts")
return mergedParts
def _group_chunks_by_document_and_type(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
def _groupChunksByDocumentAndType(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
"""Group chunks by document and type for semantic coherence."""
groups = {}
for chunk in chunks:
# Create group key: document_id + type_group
doc_id = chunk.metadata.get("documentId", "unknown")
type_group = chunk.typeGroup
group_key = f"{doc_id}_{type_group}"
if group_key not in groups:
groups[group_key] = []
groups[group_key].append(chunk)
docId = chunk.metadata.get("documentId", "unknown")
typeGroup = chunk.typeGroup
groupKey = f"{docId}_{typeGroup}"
if groupKey not in groups:
groups[groupKey] = []
groups[groupKey].append(chunk)
return groups
def _merge_group_optimally(self, chunks: List[ContentPart], available_tokens: int) -> List[ContentPart]:
def _mergeGroupOptimally(self, chunks: List[ContentPart], availableTokens: int) -> List[ContentPart]:
"""Merge chunks within a group optimally to minimize AI calls."""
if not chunks:
return []
# Sort chunks by size (smallest first for better packing)
sorted_chunks = sorted(chunks, key=lambda c: self._estimate_tokens(c.data))
sortedChunks = sorted(chunks, key=lambda c: self._estimateTokens(c.data))
merged_parts = []
current_group = []
current_tokens = 0
mergedParts = []
currentGroup = []
currentTokens = 0
for chunk in sorted_chunks:
chunk_tokens = self._estimate_tokens(chunk.data)
for chunk in sortedChunks:
chunkTokens = self._estimateTokens(chunk.data)
# Special case: If single chunk is already at max size, process it alone
if chunk_tokens >= available_tokens * 0.9: # 90% of available tokens
if chunkTokens >= availableTokens * 0.9: # 90% of available tokens
# Finalize current group if it exists
if current_group:
merged_part = self._create_merged_part(current_group, current_tokens)
merged_parts.append(merged_part)
current_group = []
current_tokens = 0
if currentGroup:
mergedPart = self._createMergedPart(currentGroup, currentTokens)
mergedParts.append(mergedPart)
currentGroup = []
currentTokens = 0
# Process large chunk individually
merged_parts.append(chunk)
logger.debug(f"🔍 Large chunk processed individually: {chunk_tokens} tokens")
mergedParts.append(chunk)
logger.debug(f"🔍 Large chunk processed individually: {chunkTokens} tokens")
continue
# If adding this chunk would exceed limit, finalize current group
if current_tokens + chunk_tokens > available_tokens and current_group:
merged_part = self._create_merged_part(current_group, current_tokens)
merged_parts.append(merged_part)
current_group = [chunk]
current_tokens = chunk_tokens
if currentTokens + chunkTokens > availableTokens and currentGroup:
mergedPart = self._createMergedPart(currentGroup, currentTokens)
mergedParts.append(mergedPart)
currentGroup = [chunk]
currentTokens = chunkTokens
else:
current_group.append(chunk)
current_tokens += chunk_tokens
currentGroup.append(chunk)
currentTokens += chunkTokens
# Finalize remaining group
if current_group:
merged_part = self._create_merged_part(current_group, current_tokens)
merged_parts.append(merged_part)
if currentGroup:
mergedPart = self._createMergedPart(currentGroup, currentTokens)
mergedParts.append(mergedPart)
logger.info(f"📦 Group merged: {len(chunks)}{len(merged_parts)} parts")
return merged_parts
logger.info(f"📦 Group merged: {len(chunks)}{len(mergedParts)} parts")
return mergedParts
def _create_merged_part(self, chunks: List[ContentPart], total_tokens: int) -> ContentPart:
def _createMergedPart(self, chunks: List[ContentPart], totalTokens: int) -> ContentPart:
"""Create a merged ContentPart from multiple chunks."""
if len(chunks) == 1:
return chunks[0] # No need to merge single chunk
# Combine data with semantic separators
combined_data = self._combine_chunk_data(chunks)
combinedData = self._combineChunkData(chunks)
# Use metadata from first chunk as base
base_chunk = chunks[0]
merged_metadata = base_chunk.metadata.copy()
merged_metadata.update({
baseChunk = chunks[0]
mergedMetadata = baseChunk.metadata.copy()
mergedMetadata.update({
"merged": True,
"originalChunkCount": len(chunks),
"totalTokens": total_tokens,
"totalTokens": totalTokens,
"originalChunkIds": [c.id for c in chunks],
"size": len(combined_data.encode('utf-8'))
"size": len(combinedData.encode('utf-8'))
})
merged_part = ContentPart(
mergedPart = ContentPart(
id=makeId(),
parentId=base_chunk.parentId,
parentId=baseChunk.parentId,
label=f"merged_{len(chunks)}_chunks",
typeGroup=base_chunk.typeGroup,
mimeType=base_chunk.mimeType,
data=combined_data,
metadata=merged_metadata
typeGroup=baseChunk.typeGroup,
mimeType=baseChunk.mimeType,
data=combinedData,
metadata=mergedMetadata
)
logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {total_tokens} tokens")
return merged_part
logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {totalTokens} tokens")
return mergedPart
def _combine_chunk_data(self, chunks: List[ContentPart]) -> str:
def _combineChunkData(self, chunks: List[ContentPart]) -> str:
"""Combine chunk data with appropriate separators."""
if not chunks:
return ""
@ -173,37 +173,37 @@ class IntelligentTokenAwareMerger:
return separator.join([chunk.data for chunk in chunks])
def _estimate_tokens(self, text: str) -> int:
def _estimateTokens(self, text: str) -> int:
"""Estimate token count for text."""
if not text:
return 0
return len(text) // self.chars_per_token
return len(text) // self.charsPerToken
def calculate_optimization_stats(self, original_chunks: List[ContentPart], merged_parts: List[ContentPart]) -> Dict[str, Any]:
def calculateOptimizationStats(self, originalChunks: List[ContentPart], mergedParts: List[ContentPart]) -> Dict[str, Any]:
"""Calculate optimization statistics with detailed analysis."""
original_calls = len(original_chunks)
optimized_calls = len(merged_parts)
reduction_percent = ((original_calls - optimized_calls) / original_calls * 100) if original_calls > 0 else 0
originalCalls = len(originalChunks)
optimizedCalls = len(mergedParts)
reductionPercent = ((originalCalls - optimizedCalls) / originalCalls * 100) if originalCalls > 0 else 0
# Analyze chunk sizes
large_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) >= self.effective_max_tokens * 0.9]
small_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) < self.effective_max_tokens * 0.9]
largeChunks = [c for c in originalChunks if self._estimateTokens(c.data) >= self.effectiveMaxTokens * 0.9]
smallChunks = [c for c in originalChunks if self._estimateTokens(c.data) < self.effectiveMaxTokens * 0.9]
# Calculate theoretical maximum optimization (if all small chunks could be merged)
theoretical_min_calls = len(large_chunks) + max(1, len(small_chunks) // 3) # Assume 3 small chunks per call
theoretical_reduction = ((original_calls - theoretical_min_calls) / original_calls * 100) if original_calls > 0 else 0
theoreticalMinCalls = len(largeChunks) + max(1, len(smallChunks) // 3) # Assume 3 small chunks per call
theoreticalReduction = ((originalCalls - theoreticalMinCalls) / originalCalls * 100) if originalCalls > 0 else 0
return {
"original_ai_calls": original_calls,
"optimized_ai_calls": optimized_calls,
"reduction_percent": round(reduction_percent, 1),
"cost_savings": f"{reduction_percent:.1f}%",
"efficiency_gain": f"{original_calls / optimized_calls:.1f}x" if optimized_calls > 0 else "",
"original_ai_calls": originalCalls,
"optimized_ai_calls": optimizedCalls,
"reduction_percent": round(reductionPercent, 1),
"cost_savings": f"{reductionPercent:.1f}%",
"efficiency_gain": f"{originalCalls / optimizedCalls:.1f}x" if optimizedCalls > 0 else "",
"analysis": {
"large_chunks": len(large_chunks),
"small_chunks": len(small_chunks),
"theoretical_min_calls": theoretical_min_calls,
"theoretical_reduction": round(theoretical_reduction, 1),
"optimization_potential": "high" if reduction_percent > 50 else "moderate" if reduction_percent > 20 else "low"
"large_chunks": len(largeChunks),
"small_chunks": len(smallChunks),
"theoretical_min_calls": theoreticalMinCalls,
"theoretical_reduction": round(theoreticalReduction, 1),
"optimization_potential": "high" if reductionPercent > 50 else "moderate" if reductionPercent > 20 else "low"
}
}

View file

@ -96,10 +96,10 @@ def _applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[Con
subMerger = IntelligentTokenAwareMerger(model_capabilities)
# Use intelligent merging for all parts
merged = subMerger.merge_chunks_intelligently(parts, strategy.prompt or "")
merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "")
# Calculate and log optimization stats
stats = subMerger.calculate_optimization_stats(parts, merged)
stats = subMerger.calculateOptimizationStats(parts, merged)
logger.info(f"🧠 Intelligent merging stats: {stats}")
logger.debug(f"Intelligent merging: {stats['original_ai_calls']}{stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")

View file

@ -101,7 +101,7 @@ async def buildExtractionPrompt(
# Build base prompt
adaptive_prompt = f"""
{services.ai.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.

View file

@ -37,13 +37,13 @@ class GenerationService:
return []
# Process each document from the AI action result
processed_documents = []
processedDocuments = []
for doc in documents:
processed_doc = self.processSingleDocument(doc, action)
if processed_doc:
processed_documents.append(processed_doc)
processedDoc = self.processSingleDocument(doc, action)
if processedDoc:
processedDocuments.append(processedDoc)
return processed_documents
return processedDocuments
except Exception as e:
logger.error(f"Error processing action result documents: {str(e)}")
return []
@ -77,20 +77,20 @@ class GenerationService:
try:
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
created_documents = []
createdDocuments = []
for i, doc_data in enumerate(processed_docs):
try:
document_name = doc_data['fileName']
document_data = doc_data['content']
mime_type = doc_data['mimeType']
documentName = doc_data['fileName']
documentData = doc_data['content']
mimeType = doc_data['mimeType']
# Convert document data to string content
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
content = convertDocumentDataToString(documentData, getFileExtension(documentName))
# Skip empty or minimal content
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
minimalContentPatterns = ['{}', '[]', 'null', '""', "''"]
if not content or content.strip() == "" or content.strip() in minimalContentPatterns:
logger.warning(f"Empty or minimal content for document {documentName}, skipping")
continue
# Normalize file extension based on mime type if missing or incorrect
@ -105,35 +105,35 @@ class GenerationService:
"text/plain": ".txt",
"application/json": ".json",
}
expected_ext = mime_to_ext.get(mime_type)
if expected_ext:
if not document_name.lower().endswith(expected_ext):
expectedExt = mime_to_ext.get(mimeType)
if expectedExt:
if not documentName.lower().endswith(expectedExt):
# Append/replace extension to match mime type
if "." in document_name:
document_name = document_name.rsplit(".", 1)[0] + expected_ext
if "." in documentName:
documentName = documentName.rsplit(".", 1)[0] + expectedExt
else:
document_name = document_name + expected_ext
documentName = documentName + expectedExt
except Exception:
pass
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
base64encoded = False
try:
binary_mime_types = {
binaryMimeTypes = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/pdf",
}
if isinstance(document_data, str) and mime_type in binary_mime_types:
if isinstance(documentData, str) and mimeType in binaryMimeTypes:
base64encoded = True
except Exception:
base64encoded = False
# Create document with file in one step using interfaces directly
document = self._createDocument(
fileName=document_name,
mimeType=mime_type,
fileName=documentName,
mimeType=mimeType,
content=content,
base64encoded=base64encoded,
messageId=message_id
@ -141,14 +141,14 @@ class GenerationService:
if document:
# Set workflow context on the document if possible
self._setDocumentWorkflowContext(document, action, workflow)
created_documents.append(document)
createdDocuments.append(document)
else:
logger.error(f"Failed to create ChatDocument object for {document_name}")
logger.error(f"Failed to create ChatDocument object for {documentName}")
except Exception as e:
logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
continue
return created_documents
return createdDocuments
except Exception as e:
logger.error(f"Error creating documents from action result: {str(e)}")
return []
@ -157,28 +157,28 @@ class GenerationService:
"""Set workflow context on a document for proper routing and labeling"""
try:
# Get current workflow context directly from workflow object
workflow_context = self._getWorkflowContext(workflow)
workflow_stats = self._getWorkflowStats(workflow)
workflowContext = self._getWorkflowContext(workflow)
workflowStats = self._getWorkflowStats(workflow)
current_round = workflow_context.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 0)
current_action = workflow_context.get('currentAction', 0)
currentRound = workflowContext.get('currentRound', 0)
currentTask = workflowContext.get('currentTask', 0)
currentAction = workflowContext.get('currentAction', 0)
# Try to set workflow context attributes if they exist
if hasattr(document, 'roundNumber'):
document.roundNumber = current_round
document.roundNumber = currentRound
if hasattr(document, 'taskNumber'):
document.taskNumber = current_task
document.taskNumber = currentTask
if hasattr(document, 'actionNumber'):
document.actionNumber = current_action
document.actionNumber = currentAction
if hasattr(document, 'actionId'):
document.actionId = action.id if hasattr(action, 'id') else None
# Set additional workflow metadata if available
if hasattr(document, 'workflowId'):
document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
document.workflowId = workflowStats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
if hasattr(document, 'workflowStatus'):
document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
document.workflowStatus = workflowStats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
except Exception as e:
@ -355,17 +355,17 @@ class GenerationService:
def _getFormatRenderer(self, output_format: str):
"""Get the appropriate renderer for the specified format using auto-discovery."""
try:
from .renderers.registry import get_renderer
renderer = get_renderer(output_format, services=self.services)
from .renderers.registry import getRenderer
renderer = getRenderer(output_format, services=self.services)
if renderer:
return renderer
# Fallback to text renderer if no specific renderer found
logger.warning(f"No renderer found for format {output_format}, falling back to text")
fallback_renderer = get_renderer('text', services=self.services)
if fallback_renderer:
return fallback_renderer
fallbackRenderer = getRenderer('text', services=self.services)
if fallbackRenderer:
return fallbackRenderer
logger.error("Even text renderer fallback failed")
return None

View file

@ -17,7 +17,7 @@ class RendererRegistry:
self._format_mappings: Dict[str, str] = {}
self._discovered = False
def discover_renderers(self) -> None:
def discoverRenderers(self) -> None:
"""Automatically discover and register all renderers by scanning files."""
if self._discovered:
return
@ -28,38 +28,38 @@ class RendererRegistry:
from pathlib import Path
# Get the directory containing this registry file
current_dir = Path(__file__).parent
renderers_dir = current_dir
currentDir = Path(__file__).parent
renderersDir = currentDir
# Get the package name dynamically
package_name = __name__.rsplit('.', 1)[0]
packageName = __name__.rsplit('.', 1)[0]
# Scan all Python files in the renderers directory
for file_path in renderers_dir.glob("*.py"):
if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
for filePath in renderersDir.glob("*.py"):
if filePath.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
continue
# Extract module name from filename
module_name = file_path.stem
moduleName = filePath.stem
try:
# Import the module dynamically
full_module_name = f"{package_name}.{module_name}"
module = importlib.import_module(full_module_name)
fullModuleName = f"{packageName}.{moduleName}"
module = importlib.import_module(fullModuleName)
# Look for renderer classes in the module
for attr_name in dir(module):
attr = getattr(module, attr_name)
for attrName in dir(module):
attr = getattr(module, attrName)
if (isinstance(attr, type) and
issubclass(attr, BaseRenderer) and
attr != BaseRenderer and
hasattr(attr, 'get_supported_formats')):
hasattr(attr, 'getSupportedFormats')):
# Register the renderer
self._register_renderer_class(attr)
self._registerRendererClass(attr)
except Exception as e:
logger.warning(f"Could not load renderer from {module_name}: {str(e)}")
logger.warning(f"Could not load renderer from {moduleName}: {str(e)}")
continue
self._discovered = True
@ -68,72 +68,72 @@ class RendererRegistry:
logger.error(f"Error during renderer discovery: {str(e)}")
self._discovered = True # Mark as discovered to avoid repeated attempts
def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None:
def _registerRendererClass(self, rendererClass: Type[BaseRenderer]) -> None:
"""Register a renderer class with its supported formats."""
try:
# Get supported formats from the renderer class
supported_formats = renderer_class.get_supported_formats()
supportedFormats = rendererClass.getSupportedFormats()
for format_name in supported_formats:
for formatName in supportedFormats:
# Register primary format
self._renderers[format_name.lower()] = renderer_class
self._renderers[formatName.lower()] = rendererClass
# Register aliases if any
if hasattr(renderer_class, 'get_format_aliases'):
aliases = renderer_class.get_format_aliases()
if hasattr(rendererClass, 'getFormatAliases'):
aliases = rendererClass.getFormatAliases()
for alias in aliases:
self._format_mappings[alias.lower()] = format_name.lower()
self._format_mappings[alias.lower()] = formatName.lower()
logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}")
logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}")
except Exception as e:
logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}")
logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}")
def get_renderer(self, output_format: str, services=None) -> Optional[BaseRenderer]:
def getRenderer(self, outputFormat: str, services=None) -> Optional[BaseRenderer]:
"""Get a renderer instance for the specified format."""
if not self._discovered:
self.discover_renderers()
self.discoverRenderers()
# Normalize format name
format_name = output_format.lower().strip()
formatName = outputFormat.lower().strip()
# Check for aliases first
if format_name in self._format_mappings:
format_name = self._format_mappings[format_name]
if formatName in self._format_mappings:
formatName = self._format_mappings[formatName]
# Get renderer class
renderer_class = self._renderers.get(format_name)
rendererClass = self._renderers.get(formatName)
if renderer_class:
if rendererClass:
try:
return renderer_class(services=services)
return rendererClass(services=services)
except Exception as e:
logger.error(f"Error creating renderer instance for {format_name}: {str(e)}")
logger.error(f"Error creating renderer instance for {formatName}: {str(e)}")
return None
logger.warning(f"No renderer found for format: {output_format}")
logger.warning(f"No renderer found for format: {outputFormat}")
return None
def get_supported_formats(self) -> List[str]:
def getSupportedFormats(self) -> List[str]:
"""Get list of all supported formats."""
if not self._discovered:
self.discover_renderers()
self.discoverRenderers()
formats = list(self._renderers.keys())
formats.extend(self._format_mappings.keys())
return sorted(set(formats))
def get_renderer_info(self) -> Dict[str, Dict[str, str]]:
def getRendererInfo(self) -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers."""
if not self._discovered:
self.discover_renderers()
self.discoverRenderers()
info = {}
for format_name, renderer_class in self._renderers.items():
info[format_name] = {
'class_name': renderer_class.__name__,
'module': renderer_class.__module__,
'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description'
for formatName, rendererClass in self._renderers.items():
info[formatName] = {
'class_name': rendererClass.__name__,
'module': rendererClass.__module__,
'description': getattr(rendererClass, '__doc__', 'No description').strip().split('\n')[0] if rendererClass.__doc__ else 'No description'
}
return info
@ -141,14 +141,14 @@ class RendererRegistry:
# Global registry instance
_registry = RendererRegistry()
def get_renderer(output_format: str, services=None) -> Optional[BaseRenderer]:
def getRenderer(outputFormat: str, services=None) -> Optional[BaseRenderer]:
"""Get a renderer instance for the specified format."""
return _registry.get_renderer(output_format, services)
return _registry.getRenderer(outputFormat, services)
def get_supported_formats() -> List[str]:
def getSupportedFormats() -> List[str]:
"""Get list of all supported formats."""
return _registry.get_supported_formats()
return _registry.getSupportedFormats()
def get_renderer_info() -> Dict[str, Dict[str, str]]:
def getRendererInfo() -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers."""
return _registry.get_renderer_info()
return _registry.getRendererInfo()

View file

@ -4,6 +4,7 @@ Base renderer class for all format renderers.
from abc import ABC, abstractmethod
from typing import Dict, Any, Tuple, List
from modules.datamodels.datamodelJson import supportedSectionTypes
import json
import logging
import re
@ -23,7 +24,7 @@ class BaseRenderer(ABC):
self.services = services # Add services attribute
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""
Return list of supported format names for this renderer.
Override this method in subclasses to specify supported formats.
@ -31,7 +32,7 @@ class BaseRenderer(ABC):
return []
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""
Return list of format aliases for this renderer.
Override this method in subclasses to specify format aliases.
@ -39,7 +40,7 @@ class BaseRenderer(ABC):
return []
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""
Return priority for this renderer (higher number = higher priority).
Used when multiple renderers support the same format.
@ -47,43 +48,43 @@ class BaseRenderer(ABC):
return 0
@abstractmethod
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""
Render extracted JSON content to the target format.
Args:
extracted_content: Structured JSON content with sections and metadata
extractedContent: Structured JSON content with sections and metadata
title: Report title
user_prompt: Original user prompt for context
ai_service: AI service instance for additional processing
userPrompt: Original user prompt for context
aiService: AI service instance for additional processing
Returns:
tuple: (rendered_content, mime_type)
tuple: (renderedContent, mimeType)
"""
pass
def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]:
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract sections from report data."""
return report_data.get('sections', [])
return reportData.get('sections', [])
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
"""Extract metadata from report data."""
return report_data.get('metadata', {})
return reportData.get('metadata', {})
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str:
"""Get title from report data or use fallback."""
metadata = report_data.get('metadata', {})
return metadata.get('title', fallback_title)
metadata = reportData.get('metadata', {})
return metadata.get('title', fallbackTitle)
def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool:
def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool:
"""Validate that JSON content has the expected structure."""
if not isinstance(json_content, dict):
if not isinstance(jsonContent, dict):
return False
if "sections" not in json_content:
if "sections" not in jsonContent:
return False
sections = json_content.get("sections", [])
sections = jsonContent.get("sections", [])
if not isinstance(sections, list):
return False
@ -96,14 +97,14 @@ class BaseRenderer(ABC):
return True
def _get_section_type(self, section: Dict[str, Any]) -> str:
def _getSectionType(self, section: Dict[str, Any]) -> str:
"""Get the type of a section; default to 'paragraph' for non-dict inputs."""
if isinstance(section, dict):
return section.get("content_type", "paragraph")
# If section is a list or any other type, treat as paragraph elements
return "paragraph"
def _get_section_data(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
def _getSectionData(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Get the elements of a section; if a list is provided directly, return it."""
if isinstance(section, dict):
return section.get("elements", [])
@ -111,21 +112,30 @@ class BaseRenderer(ABC):
return section
return []
def _get_section_id(self, section: Dict[str, Any]) -> str:
def _getSectionId(self, section: Dict[str, Any]) -> str:
"""Get the ID of a section (if available)."""
if isinstance(section, dict):
return section.get("id", "unknown")
return "unknown"
def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
"""Extract table headers and rows from section data."""
headers = section_data.get("headers", [])
rows = section_data.get("rows", [])
# Normalize when elements array was passed in
if isinstance(sectionData, list) and sectionData:
candidate = sectionData[0]
sectionData = candidate if isinstance(candidate, dict) else {}
headers = sectionData.get("headers", [])
rows = sectionData.get("rows", [])
return headers, rows
def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]:
def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]:
"""Extract bullet list items from section data."""
items = section_data.get("items", [])
# Normalize when elements array or raw list was passed in
if isinstance(sectionData, list):
# Already a list of items (strings or dicts)
items = sectionData
else:
items = sectionData.get("items", [])
result = []
for item in items:
if isinstance(item, str):
@ -134,29 +144,47 @@ class BaseRenderer(ABC):
result.append(item["text"])
return result
def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]:
def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]:
"""Extract heading level and text from section data."""
level = section_data.get("level", 1)
text = section_data.get("text", "")
# Normalize when elements array was passed in
if isinstance(sectionData, list) and sectionData:
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
level = sectionData.get("level", 1)
text = sectionData.get("text", "")
return level, text
def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str:
def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str:
"""Extract paragraph text from section data."""
return section_data.get("text", "")
if isinstance(sectionData, list):
# Join multiple paragraph elements if provided as a list
texts = []
for el in sectionData:
if isinstance(el, dict) and "text" in el:
texts.append(el["text"])
elif isinstance(el, str):
texts.append(el)
return "\n".join(texts)
return sectionData.get("text", "")
def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
"""Extract code and language from section data."""
code = section_data.get("code", "")
language = section_data.get("language", "")
# Normalize when elements array was passed in
if isinstance(sectionData, list) and sectionData:
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
code = sectionData.get("code", "")
language = sectionData.get("language", "")
return code, language
def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
"""Extract base64 data and alt text from section data."""
base64_data = section_data.get("base64Data", "")
alt_text = section_data.get("altText", "Image")
return base64_data, alt_text
# Normalize when elements array was passed in
if isinstance(sectionData, list) and sectionData:
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
base64Data = sectionData.get("base64Data", "")
altText = sectionData.get("altText", "Image")
return base64Data, altText
def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
"""
Render an image section. This is a base implementation that should be overridden
by format-specific renderers.
@ -168,47 +196,47 @@ class BaseRenderer(ABC):
Returns:
Format-specific image representation
"""
section_data = self._get_section_data(section)
base64_data, alt_text = self._extract_image_data(section_data)
sectionData = self._getSectionData(section)
base64Data, altText = self._extractImageData(sectionData)
# Base implementation returns a simple dict
# Format-specific renderers should override this method
return {
"content_type": "image",
"base64Data": base64_data,
"altText": alt_text,
"width": section_data.get("width", None),
"height": section_data.get("height", None),
"caption": section_data.get("caption", "")
"base64Data": base64Data,
"altText": altText,
"width": sectionData.get("width", None),
"height": sectionData.get("height", None),
"caption": sectionData.get("caption", "")
}
def _validate_image_data(self, base64_data: str, alt_text: str) -> bool:
def _validateImageData(self, base64Data: str, altText: str) -> bool:
"""Validate image data."""
if not base64_data:
if not base64Data:
self.logger.warning("Image section has no base64 data")
return False
if not alt_text:
if not altText:
self.logger.warning("Image section has no alt text")
return False
# Basic base64 validation
try:
base64.b64decode(base64_data, validate=True)
base64.b64decode(base64Data, validate=True)
return True
except Exception as e:
self.logger.warning(f"Invalid base64 image data: {str(e)}")
return False
def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]:
def _getImageDimensions(self, base64Data: str) -> Tuple[int, int]:
"""
Get image dimensions from base64 data.
This is a helper method that format-specific renderers can use.
"""
try:
# Decode base64 data
image_data = base64.b64decode(base64_data)
image = Image.open(io.BytesIO(image_data))
imageData = base64.b64decode(base64Data)
image = Image.open(io.BytesIO(imageData))
return image.size # Returns (width, height)
@ -216,89 +244,89 @@ class BaseRenderer(ABC):
self.logger.warning(f"Could not determine image dimensions: {str(e)}")
return (0, 0)
def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str:
def _resizeImageIfNeeded(self, base64Data: str, maxWidth: int = 800, maxHeight: int = 600) -> str:
"""
Resize image if it exceeds maximum dimensions.
Returns the resized image as base64 string.
"""
try:
# Decode base64 data
image_data = base64.b64decode(base64_data)
image = Image.open(io.BytesIO(image_data))
imageData = base64.b64decode(base64Data)
image = Image.open(io.BytesIO(imageData))
# Check if resizing is needed
width, height = image.size
if width <= max_width and height <= max_height:
return base64_data # No resizing needed
if width <= maxWidth and height <= maxHeight:
return base64Data # No resizing needed
# Calculate new dimensions maintaining aspect ratio
ratio = min(max_width / width, max_height / height)
new_width = int(width * ratio)
new_height = int(height * ratio)
ratio = min(maxWidth / width, maxHeight / height)
newWidth = int(width * ratio)
newHeight = int(height * ratio)
# Resize image
resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
resizedImage = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS)
# Convert back to base64
buffer = io.BytesIO()
resized_image.save(buffer, format=image.format or 'PNG')
resized_data = buffer.getvalue()
resizedImage.save(buffer, format=image.format or 'PNG')
resizedData = buffer.getvalue()
return base64.b64encode(resized_data).decode('utf-8')
return base64.b64encode(resizedData).decode('utf-8')
except Exception as e:
self.logger.warning(f"Could not resize image: {str(e)}")
return base64_data # Return original if resize fails
return base64Data # Return original if resize fails
def _get_supported_section_types(self) -> List[str]:
"""Return list of supported section types."""
return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
def _getSupportedSectionTypes(self) -> List[str]:
"""Return list of supported section types (from unified schema)."""
return supportedSectionTypes
def _is_valid_section_type(self, section_type: str) -> bool:
def _isValidSectionType(self, sectionType: str) -> bool:
"""Check if a section type is valid."""
return section_type in self._get_supported_section_types()
return sectionType in self._getSupportedSectionTypes()
def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]:
def _processSectionByType(self, section: Dict[str, Any]) -> Dict[str, Any]:
"""Process a section and return structured data based on its type."""
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
if section_type == "table":
headers, rows = self._extract_table_data(section_data)
if sectionType == "table":
headers, rows = self._extractTableData(sectionData)
return {"content_type": "table", "headers": headers, "rows": rows}
elif section_type == "bullet_list":
items = self._extract_bullet_list_items(section_data)
elif sectionType == "bullet_list":
items = self._extractBulletListItems(sectionData)
return {"content_type": "bullet_list", "items": items}
elif section_type == "heading":
level, text = self._extract_heading_data(section_data)
elif sectionType == "heading":
level, text = self._extractHeadingData(sectionData)
return {"content_type": "heading", "level": level, "text": text}
elif section_type == "paragraph":
text = self._extract_paragraph_text(section_data)
elif sectionType == "paragraph":
text = self._extractParagraphText(sectionData)
return {"content_type": "paragraph", "text": text}
elif section_type == "code_block":
code, language = self._extract_code_block_data(section_data)
elif sectionType == "code_block":
code, language = self._extractCodeBlockData(sectionData)
return {"content_type": "code_block", "code": code, "language": language}
elif section_type == "image":
base64_data, alt_text = self._extract_image_data(section_data)
elif sectionType == "image":
base64Data, altText = self._extractImageData(sectionData)
# Validate image data
if self._validate_image_data(base64_data, alt_text):
if self._validateImageData(base64Data, altText):
return {
"content_type": "image",
"base64Data": base64_data,
"altText": alt_text,
"width": section_data.get("width"),
"height": section_data.get("height"),
"caption": section_data.get("caption", "")
"base64Data": base64Data,
"altText": altText,
"width": sectionData.get("width") if isinstance(sectionData, dict) else None,
"height": sectionData.get("height") if isinstance(sectionData, dict) else None,
"caption": sectionData.get("caption", "") if isinstance(sectionData, dict) else ""
}
else:
# Return placeholder if image data is invalid
return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"}
return {"content_type": "paragraph", "text": f"[Image: {altText}]"}
else:
# Fallback to paragraph
text = self._extract_paragraph_text(section_data)
text = self._extractParagraphText(sectionData)
return {"content_type": "paragraph", "text": text}
def _format_timestamp(self, timestamp: str = None) -> str:
def _formatTimestamp(self, timestamp: str = None) -> str:
"""Format timestamp for display."""
if timestamp:
return timestamp
@ -306,38 +334,38 @@ class BaseRenderer(ABC):
# ===== GENERIC AI STYLING HELPERS =====
async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
async def _getAiStyles(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
"""
Generic AI styling method that can be used by all renderers.
Args:
ai_service: AI service instance
style_template: Format-specific style template
default_styles: Default styles to fall back to
aiService: AI service instance
styleTemplate: Format-specific style template
defaultStyles: Default styles to fall back to
Returns:
Dict with styling definitions
"""
# DEBUG: Show which renderer is calling this method
if not ai_service:
return default_styles
if not aiService:
return defaultStyles
try:
request_options = AiCallOptions()
request_options.operationType = OperationTypeEnum.DATA_GENERATE
requestOptions = AiCallOptions()
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
request = AiCallRequest(prompt=style_template, context="", options=request_options)
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
# DEBUG: Show the actual prompt being sent to AI
self.logger.debug(f"AI Style Template Prompt:")
self.logger.debug(f"{style_template}")
self.logger.debug(f"{styleTemplate}")
response = await ai_service.aiObjects.call(request)
response = await aiService.aiObjects.call(request)
# Save styling prompt and response to debug
self.services.utils.writeDebugFile(style_template, "renderer_styling_prompt")
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
# Clean and parse JSON
@ -346,12 +374,12 @@ class BaseRenderer(ABC):
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
return default_styles
return defaultStyles
# Extract JSON from markdown if present
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if json_match:
result = json_match.group(1).strip()
jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
if jsonMatch:
result = jsonMatch.group(1).strip()
elif result.startswith('```json'):
result = re.sub(r'^```json\s*', '', result)
result = re.sub(r'\s*```$', '', result)
@ -362,8 +390,8 @@ class BaseRenderer(ABC):
# Try to parse JSON
try:
styles = json.loads(result)
except json.JSONDecodeError as json_error:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
except json.JSONDecodeError as jsonError:
self.logger.warning(f"AI styling returned invalid JSON: {jsonError}")
# Use print instead of logger to avoid truncation
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER")
@ -372,88 +400,88 @@ class BaseRenderer(ABC):
self.logger.warning(f"Raw content that failed to parse: {result}")
# Try to fix incomplete JSON by adding missing closing braces
open_braces = result.count('{')
close_braces = result.count('}')
openBraces = result.count('{')
closeBraces = result.count('}')
if open_braces > close_braces:
if openBraces > closeBraces:
# JSON is incomplete, add missing closing braces
missing_braces = open_braces - close_braces
result = result + '}' * missing_braces
self.logger.info(f"Added {missing_braces} missing closing brace(s)")
missingBraces = openBraces - closeBraces
result = result + '}' * missingBraces
self.logger.info(f"Added {missingBraces} missing closing brace(s)")
self.logger.debug(f"Fixed JSON: {result}")
# Try parsing the fixed JSON
try:
styles = json.loads(result)
self.logger.info("Successfully fixed incomplete JSON")
except json.JSONDecodeError as fix_error:
self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
except json.JSONDecodeError as fixError:
self.logger.warning(f"Fixed JSON still invalid: {fixError}")
self.logger.warning(f"Fixed JSON content: {result}")
# Try to extract just the JSON part if it's embedded in text
json_start = result.find('{')
json_end = result.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start:
json_part = result[json_start:json_end+1]
jsonStart = result.find('{')
jsonEnd = result.rfind('}')
if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
jsonPart = result[jsonStart:jsonEnd+1]
try:
styles = json.loads(json_part)
styles = json.loads(jsonPart)
self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults")
return default_styles
return defaultStyles
else:
return default_styles
return defaultStyles
else:
# Try to extract just the JSON part if it's embedded in text
json_start = result.find('{')
json_end = result.rfind('}')
if json_start != -1 and json_end != -1 and json_end > json_start:
json_part = result[json_start:json_end+1]
jsonStart = result.find('{')
jsonEnd = result.rfind('}')
if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
jsonPart = result[jsonStart:jsonEnd+1]
try:
styles = json.loads(json_part)
styles = json.loads(jsonPart)
self.logger.info("Successfully extracted JSON from explanatory text")
except json.JSONDecodeError:
self.logger.warning("Could not extract valid JSON from response, using defaults")
return default_styles
return defaultStyles
else:
return default_styles
return defaultStyles
# Convert colors to appropriate format
styles = self._convert_colors_format(styles)
styles = self._convertColorsFormat(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles
return defaultStyles
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""
Convert colors to appropriate format based on renderer type.
Override this method in subclasses for format-specific color handling.
"""
return styles
def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str:
def _createAiStyleTemplate(self, formatName: str, userPrompt: str, styleSchema: Dict[str, Any]) -> str:
"""
Create a standardized AI style template for any format.
Args:
format_name: Name of the format (e.g., "docx", "xlsx", "pptx")
user_prompt: User's original prompt
style_schema: Format-specific style schema
formatName: Name of the format (e.g., "docx", "xlsx", "pptx")
userPrompt: User's original prompt
styleSchema: Format-specific style schema
Returns:
Formatted prompt string
"""
schema_json = json.dumps(style_schema, indent=4)
schemaJson = json.dumps(styleSchema, indent=4)
# DEBUG: Show the schema being sent
return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents.
return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {formatName.upper()} documents.
Use this schema as a template and customize the values for professional document styling:
{schema_json}
{schemaJson}
Requirements:
- Return ONLY the complete JSON object (no markdown, no explanations)

View file

@ -9,163 +9,163 @@ class RendererCsv(BaseRenderer):
"""Renders content to CSV format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""Return supported CSV formats."""
return ['csv']
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'table']
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""Return priority for CSV renderer."""
return 70
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to CSV format."""
try:
# Generate CSV directly from JSON (no styling needed for CSV)
csv_content = await self._generate_csv_from_json(extracted_content, title)
csvContent = await self._generateCsvFromJson(extractedContent, title)
return csv_content, "text/csv"
return csvContent, "text/csv"
except Exception as e:
self.logger.error(f"Error rendering CSV: {str(e)}")
# Return minimal CSV fallback
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str:
async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate CSV content from structured JSON document."""
try:
# Validate JSON structure
if not isinstance(json_content, dict):
if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Generate CSV content
csv_rows = []
csvRows = []
# Add title row
if document_title:
csv_rows.append([document_title])
csv_rows.append([]) # Empty row
if documentTitle:
csvRows.append([documentTitle])
csvRows.append([]) # Empty row
# Process each section in order
sections = json_content.get("sections", [])
sections = jsonContent.get("sections", [])
for section in sections:
section_csv = self._render_json_section_to_csv(section)
if section_csv:
csv_rows.extend(section_csv)
csv_rows.append([]) # Empty row between sections
sectionCsv = self._renderJsonSectionToCsv(section)
if sectionCsv:
csvRows.extend(sectionCsv)
csvRows.append([]) # Empty row between sections
# Convert to CSV string
csv_content = self._convert_rows_to_csv(csv_rows)
csvContent = self._convertRowsToCsv(csvRows)
return csv_content
return csvContent
except Exception as e:
self.logger.error(f"Error generating CSV from JSON: {str(e)}")
raise Exception(f"CSV generation failed: {str(e)}")
def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]:
def _renderJsonSectionToCsv(self, section: Dict[str, Any]) -> List[List[str]]:
"""Render a single JSON section to CSV rows."""
try:
section_type = section.get("content_type", "paragraph")
sectionType = section.get("content_type", "paragraph")
elements = section.get("elements", [])
csv_rows = []
csvRows = []
# Add section title if available
section_title = section.get("title")
if section_title:
csv_rows.append([f"# {section_title}"])
sectionTitle = section.get("title")
if sectionTitle:
csvRows.append([f"# {sectionTitle}"])
# Process each element in the section
for element in elements:
if section_type == "table":
csv_rows.extend(self._render_json_table_to_csv(element))
elif section_type == "list":
csv_rows.extend(self._render_json_list_to_csv(element))
elif section_type == "heading":
csv_rows.extend(self._render_json_heading_to_csv(element))
elif section_type == "paragraph":
csv_rows.extend(self._render_json_paragraph_to_csv(element))
elif section_type == "code":
csv_rows.extend(self._render_json_code_to_csv(element))
if sectionType == "table":
csvRows.extend(self._renderJsonTableToCsv(element))
elif sectionType == "list":
csvRows.extend(self._renderJsonListToCsv(element))
elif sectionType == "heading":
csvRows.extend(self._renderJsonHeadingToCsv(element))
elif sectionType == "paragraph":
csvRows.extend(self._renderJsonParagraphToCsv(element))
elif sectionType == "code":
csvRows.extend(self._renderJsonCodeToCsv(element))
else:
# Fallback to paragraph for unknown types
csv_rows.extend(self._render_json_paragraph_to_csv(element))
csvRows.extend(self._renderJsonParagraphToCsv(element))
return csv_rows
return csvRows
except Exception as e:
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
return [["[Error rendering section]"]]
def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]:
def _renderJsonTableToCsv(self, tableData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON table to CSV rows."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
headers = tableData.get("headers", [])
rows = tableData.get("rows", [])
csv_rows = []
csvRows = []
if headers:
csv_rows.append(headers)
csvRows.append(headers)
if rows:
csv_rows.extend(rows)
csvRows.extend(rows)
return csv_rows
return csvRows
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return [["[Error rendering table]"]]
def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]:
def _renderJsonListToCsv(self, listData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON list to CSV rows."""
try:
items = list_data.get("items", [])
csv_rows = []
items = listData.get("items", [])
csvRows = []
for item in items:
if isinstance(item, dict):
text = item.get("text", "")
subitems = item.get("subitems", [])
csv_rows.append([text])
csvRows.append([text])
# Add subitems as indented rows
for subitem in subitems:
if isinstance(subitem, dict):
csv_rows.append([f" - {subitem.get('text', '')}"])
csvRows.append([f" - {subitem.get('text', '')}"])
else:
csv_rows.append([f" - {subitem}"])
csvRows.append([f" - {subitem}"])
else:
csv_rows.append([str(item)])
csvRows.append([str(item)])
return csv_rows
return csvRows
except Exception as e:
self.logger.warning(f"Error rendering list: {str(e)}")
return [["[Error rendering list]"]]
def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]:
def _renderJsonHeadingToCsv(self, headingData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON heading to CSV rows."""
try:
text = heading_data.get("text", "")
level = heading_data.get("level", 1)
text = headingData.get("text", "")
level = headingData.get("level", 1)
if text:
# Use # symbols for heading levels
heading_text = f"{'#' * level} {text}"
return [[heading_text]]
headingText = f"{'#' * level} {text}"
return [[headingText]]
return []
@ -173,30 +173,30 @@ class RendererCsv(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return [["[Error rendering heading]"]]
def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]:
def _renderJsonParagraphToCsv(self, paragraphData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON paragraph to CSV rows."""
try:
text = paragraph_data.get("text", "")
text = paragraphData.get("text", "")
if text:
# Split long paragraphs into multiple rows if needed
if len(text) > 100:
words = text.split()
rows = []
current_row = []
current_length = 0
currentRow = []
currentLength = 0
for word in words:
if current_length + len(word) > 100 and current_row:
rows.append([" ".join(current_row)])
current_row = [word]
current_length = len(word)
if currentLength + len(word) > 100 and currentRow:
rows.append([" ".join(currentRow)])
currentRow = [word]
currentLength = len(word)
else:
current_row.append(word)
current_length += len(word) + 1
currentRow.append(word)
currentLength += len(word) + 1
if current_row:
rows.append([" ".join(current_row)])
if currentRow:
rows.append([" ".join(currentRow)])
return rows
else:
@ -208,30 +208,30 @@ class RendererCsv(BaseRenderer):
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return [["[Error rendering paragraph]"]]
def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]:
def _renderJsonCodeToCsv(self, codeData: Dict[str, Any]) -> List[List[str]]:
"""Render a JSON code block to CSV rows."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
code = codeData.get("code", "")
language = codeData.get("language", "")
csv_rows = []
csvRows = []
if language:
csv_rows.append([f"Code ({language}):"])
csvRows.append([f"Code ({language}):"])
if code:
# Split code into lines
code_lines = code.split('\n')
for line in code_lines:
csv_rows.append([f" {line}"])
codeLines = code.split('\n')
for line in codeLines:
csvRows.append([f" {line}"])
return csv_rows
return csvRows
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
return [["[Error rendering code block]"]]
def _convert_rows_to_csv(self, rows: List[List[str]]) -> str:
def _convertRowsToCsv(self, rows: List[List[str]]) -> str:
"""Convert rows to CSV string."""
import csv
import io
@ -245,7 +245,7 @@ class RendererCsv(BaseRenderer):
return output.getvalue()
def _clean_csv_content(self, content: str, title: str) -> str:
def _cleanCsvContent(self, content: str, title: str) -> str:
"""Clean and validate CSV content from AI."""
content = content.strip()

View file

@ -21,33 +21,33 @@ class RendererDocx(BaseRenderer):
"""Renders content to DOCX format using python-docx."""
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""Return supported DOCX formats."""
return ['docx', 'doc']
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['word', 'document']
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""Return priority for DOCX renderer."""
return 115
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...", "DOCX_RENDERER")
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
try:
if not DOCX_AVAILABLE:
# Fallback to HTML if python-docx not available
from .rendererHtml import RendererHtml
html_renderer = RendererHtml()
html_content, _ = await html_renderer.render(extracted_content, title)
return html_content, "text/html"
htmlRenderer = RendererHtml()
htmlContent, _ = await htmlRenderer.render(extractedContent, title)
return htmlContent, "text/html"
# Generate DOCX using AI-analyzed styling
docx_content = await self._generate_docx_from_json(extracted_content, title, user_prompt, ai_service)
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
@ -56,18 +56,18 @@ class RendererDocx(BaseRenderer):
# Return minimal fallback
return f"DOCX Generation Error: {str(e)}", "text/plain"
async def _generate_docx_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate DOCX content from structured JSON document using AI-generated styling."""
try:
# Create new document
doc = Document()
# Get AI-generated styling definitions
self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...")
styles = await self._get_docx_styles(user_prompt, ai_service)
self.logger.info(f"About to call AI styling with user_prompt: {userPrompt[:100] if userPrompt else 'None'}...")
styles = await self._getDocxStyles(userPrompt, aiService)
# Apply basic document setup
self._setup_basic_document_styles(doc)
self._setupBasicDocumentStyles(doc)
# Validate JSON structure
if not isinstance(json_content, dict):
@ -104,7 +104,7 @@ class RendererDocx(BaseRenderer):
self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
raise Exception(f"DOCX generation failed: {str(e)}")
async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
async def _getDocxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get DOCX styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
@ -118,13 +118,13 @@ class RendererDocx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
}
style_template = self._create_ai_style_template("docx", user_prompt, style_schema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles())
style_template = self._createAiStyleTemplate("docx", userPrompt, style_schema)
styles = await self._getAiStyles(aiService, style_template, self._getDefaultStyles())
# Validate and fix contrast issues
return self._validate_styles_contrast(styles)
return self._validateStylesContrast(styles)
def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
@ -159,9 +159,9 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_styles()
return self._getDefaultStyles()
def _get_default_styles(self) -> Dict[str, Any]:
def _getDefaultStyles(self) -> Dict[str, Any]:
"""Default DOCX styles."""
return {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
@ -175,7 +175,7 @@ class RendererDocx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
}
def _setup_basic_document_styles(self, doc: Document) -> None:
def _setupBasicDocumentStyles(self, doc: Document) -> None:
"""Set up basic document styles."""
try:
# Set default font
@ -189,7 +189,7 @@ class RendererDocx(BaseRenderer):
def _clear_template_content(self, doc: Document) -> None:
def _clearTemplateContent(self, doc: Document) -> None:
"""Clear template content while preserving styles."""
try:
# Remove all paragraphs except keep the styles
@ -204,7 +204,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not clear template content: {str(e)}")
def _render_json_section(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a single JSON section to DOCX using AI-generated styles."""
try:
section_type = section.get("content_type", "paragraph")
@ -213,27 +213,27 @@ class RendererDocx(BaseRenderer):
# Process each element in the section
for element in elements:
if section_type == "table":
self._render_json_table(doc, element, styles)
self._renderJsonTable(doc, element, styles)
elif section_type == "bullet_list":
self._render_json_bullet_list(doc, element, styles)
self._renderJsonBulletList(doc, element, styles)
elif section_type == "heading":
self._render_json_heading(doc, element, styles)
self._renderJsonHeading(doc, element, styles)
elif section_type == "paragraph":
self._render_json_paragraph(doc, element, styles)
self._renderJsonParagraph(doc, element, styles)
elif section_type == "code_block":
self._render_json_code_block(doc, element, styles)
self._renderJsonCodeBlock(doc, element, styles)
elif section_type == "image":
self._render_json_image(doc, element, styles)
self._renderJsonImage(doc, element, styles)
else:
# Fallback to paragraph for unknown types
self._render_json_paragraph(doc, element, styles)
self._renderJsonParagraph(doc, element, styles)
except Exception as e:
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
# Add error paragraph as fallback
error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
def _render_json_table(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON table to DOCX using AI-generated styles."""
try:
headers = table_data.get("headers", [])
@ -249,7 +249,7 @@ class RendererDocx(BaseRenderer):
# Apply table borders based on AI style
border_style = styles["table_border"]["style"]
if border_style == "horizontal_only":
self._apply_horizontal_borders_only(table)
self._applyHorizontalBordersOnly(table)
elif border_style == "grid":
table.style = 'Table Grid'
# else: no borders
@ -264,7 +264,7 @@ class RendererDocx(BaseRenderer):
# Apply background color
bg_color = header_style["background"].lstrip('#')
self._set_cell_background(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
# Apply text styling
for paragraph in cell.paragraphs:
@ -296,7 +296,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
def _apply_horizontal_borders_only(self, table) -> None:
def _applyHorizontalBordersOnly(self, table) -> None:
"""Apply only horizontal borders to the table (no vertical borders)."""
try:
from docx.oxml.shared import OxmlElement, qn
@ -359,7 +359,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not apply horizontal borders: {str(e)}")
def _set_cell_background(self, cell, color: RGBColor) -> None:
def _setCellBackground(self, cell, color: RGBColor) -> None:
"""Set the background color of a table cell."""
try:
from docx.oxml.shared import OxmlElement, qn
@ -389,7 +389,7 @@ class RendererDocx(BaseRenderer):
self.logger.warning(f"Could not set cell background: {str(e)}")
def _render_json_bullet_list(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON bullet list to DOCX using AI-generated styles."""
try:
items = list_data.get("items", [])
@ -404,7 +404,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
def _render_json_heading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON heading to DOCX using AI-generated styles."""
try:
level = heading_data.get("level", 1)
@ -417,7 +417,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering heading: {str(e)}")
def _render_json_paragraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON paragraph to DOCX using AI-generated styles."""
try:
text = paragraph_data.get("text", "")
@ -428,7 +428,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
def _render_json_code_block(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON code block to DOCX using AI-generated styles."""
try:
code = code_data.get("code", "")
@ -447,7 +447,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error rendering code block: {str(e)}")
def _render_json_image(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Render a JSON image to DOCX."""
try:
base64_data = image_data.get("base64Data", "")
@ -465,7 +465,7 @@ class RendererDocx(BaseRenderer):
self.logger.warning(f"Error rendering image: {str(e)}")
doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]")
def _extract_structure_from_prompt(self, user_prompt: str, title: str) -> Dict[str, Any]:
def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]:
"""Extract document structure from user prompt."""
structure = {
'title': title,
@ -473,21 +473,21 @@ class RendererDocx(BaseRenderer):
'format': 'standard'
}
if not user_prompt:
if not userPrompt:
return structure
# Extract title from prompt if not provided
if not title or title == "Generated Document":
# Look for "create a ... document" or "generate a ... report"
import re
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', user_prompt.lower())
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
if title_match:
structure['title'] = title_match.group(1).strip().title()
# Extract sections from numbered lists in prompt
import re
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
sections = re.findall(section_pattern, user_prompt)
sections = re.findall(section_pattern, userPrompt)
for num, section_text in sections:
structure['sections'].append({
@ -498,7 +498,7 @@ class RendererDocx(BaseRenderer):
# If no numbered sections found, try to extract from "including:" patterns
if not structure['sections']:
including_match = re.search(r'including:\s*(.+?)(?:\.|$)', user_prompt, re.DOTALL)
including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL)
if including_match:
including_text = including_match.group(1)
# Split by common separators
@ -516,7 +516,7 @@ class RendererDocx(BaseRenderer):
if not structure['sections']:
# Look for bullet points or dashes
bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
bullets = re.findall(bullet_pattern, user_prompt)
bullets = re.findall(bullet_pattern, userPrompt)
for i, bullet in enumerate(bullets, 1):
bullet = bullet.strip()
if bullet and len(bullet) > 3:
@ -529,7 +529,7 @@ class RendererDocx(BaseRenderer):
# If still no sections, extract from sentence structure
if not structure['sections']:
# Split prompt into sentences and use as sections
sentences = re.split(r'[.!?]\s+', user_prompt)
sentences = re.split(r'[.!?]\s+', userPrompt)
for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections
sentence = sentence.strip()
if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
@ -545,7 +545,7 @@ class RendererDocx(BaseRenderer):
action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
found_actions = []
for action in action_words:
if action in user_prompt.lower():
if action in userPrompt.lower():
found_actions.append(action.title())
if found_actions:
@ -565,7 +565,7 @@ class RendererDocx(BaseRenderer):
return structure
def _generate_from_structure(self, doc, content: str, structure: Dict[str, Any]):
def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]):
"""Generate DOCX content based on extracted structure."""
# Add sections based on prompt structure
for section in structure['sections']:
@ -574,7 +574,7 @@ class RendererDocx(BaseRenderer):
# Add AI-generated content for this section
# Try to extract relevant content for this section from the AI response
section_content = self._extract_section_content(content, section['title'])
section_content = self._extractSectionContent(content, section['title'])
if section_content:
doc.add_paragraph(section_content)
@ -590,7 +590,7 @@ class RendererDocx(BaseRenderer):
doc.add_heading("Complete Analysis", level=1)
doc.add_paragraph(content)
def _extract_section_content(self, content: str, section_title: str) -> str:
def _extractSectionContent(self, content: str, section_title: str) -> str:
"""Extract relevant content for a specific section from AI response."""
if not content or not section_title:
return ""
@ -613,7 +613,7 @@ class RendererDocx(BaseRenderer):
return ""
def _setup_document_styles(self, doc):
def _setupDocumentStyles(self, doc):
"""Set up document styles."""
try:
# Set default font
@ -632,7 +632,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not set up document styles: {str(e)}")
def _process_section(self, doc, lines: list):
def _processSection(self, doc, lines: list):
"""Process a section of content into DOCX elements."""
for line in lines:
if not line.strip():
@ -641,9 +641,9 @@ class RendererDocx(BaseRenderer):
# Check for tables (lines with |)
if '|' in line and not line.startswith('|'):
# This might be part of a table, process as table
table_data = self._extract_table_data(lines)
table_data = self._extractTableData(lines)
if table_data:
self._add_table(doc, table_data)
self._addTable(doc, table_data)
return
# Check for lists
@ -657,7 +657,7 @@ class RendererDocx(BaseRenderer):
# Regular paragraph
doc.add_paragraph(line)
def _extract_table_data(self, lines: list) -> list:
def _extractTableData(self, lines: list) -> list:
"""Extract table data from lines."""
table_data = []
in_table = False
@ -676,7 +676,7 @@ class RendererDocx(BaseRenderer):
return table_data if len(table_data) > 1 else []
def _add_table(self, doc, table_data: list):
def _addTable(self, doc, table_data: list):
"""Add a table to the document."""
try:
if not table_data:
@ -693,12 +693,12 @@ class RendererDocx(BaseRenderer):
table.rows[row_idx].cells[col_idx].text = cell_data
# Style the table
self._style_table(table)
self._styleTable(table)
except Exception as e:
self.logger.warning(f"Could not add table: {str(e)}")
def _style_table(self, table):
def _styleTable(self, table):
"""Apply styling to the table."""
try:
# Style header row
@ -711,7 +711,7 @@ class RendererDocx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not style table: {str(e)}")
def _process_table_row(self, doc, line: str):
def _processTableRow(self, doc, line: str):
"""Process a table row and add it to the document."""
if not line.strip():
return
@ -745,7 +745,7 @@ class RendererDocx(BaseRenderer):
# Not a table row, treat as regular text
doc.add_paragraph(line)
def _clean_ai_content(self, content: str) -> str:
def _cleanAiContent(self, content: str) -> str:
"""Clean AI-generated content by removing debug information and duplicates."""
if not content:
return ""
@ -781,7 +781,7 @@ class RendererDocx(BaseRenderer):
return '\n\n'.join(unique_sections)
def _process_tables(self, doc, content: str) -> str:
def _processTables(self, doc, content: str) -> str:
"""
Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
Returns the content with tables replaced by placeholders.
@ -864,13 +864,13 @@ class RendererDocx(BaseRenderer):
return '\n'.join(processed_lines)
def _parse_and_format_content(self, doc, content: str, title: str):
def _parseAndFormatContent(self, doc, content: str, title: str):
"""Parse AI-generated content in standardized format and apply proper DOCX formatting."""
if not content:
return
# Process tables and replace them with placeholders
content = self._process_tables(doc, content)
content = self._processTables(doc, content)
# Parse content line by line in exact sequence
lines = content.split('\n')
@ -920,9 +920,9 @@ class RendererDocx(BaseRenderer):
# Regular paragraph
else:
self._add_paragraph_to_doc(doc, line)
self._addParagraphToDoc(doc, line)
def _add_paragraph_to_doc(self, doc, text: str):
def _addParagraphToDoc(self, doc, text: str):
"""Add a paragraph to the document with proper formatting."""
if not text.strip():
return

View file

@ -9,97 +9,97 @@ class RendererHtml(BaseRenderer):
"""Renders content to HTML format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""Return supported HTML formats."""
return ['html', 'htm']
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['web', 'webpage']
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""Return priority for HTML renderer."""
return 100
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
try:
# Generate HTML using AI-analyzed styling
html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service)
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
return html_content, "text/html"
return htmlContent, "text/html"
except Exception as e:
self.logger.error(f"Error rendering HTML: {str(e)}")
# Return minimal HTML fallback
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate HTML content from structured JSON document using AI-generated styling."""
try:
# Get AI-generated styling definitions
styles = await self._get_html_styles(user_prompt, ai_service)
styles = await self._getHtmlStyles(userPrompt, aiService)
# Validate JSON structure
if not isinstance(json_content, dict):
if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Build HTML document
html_parts = []
htmlParts = []
# HTML document structure
html_parts.append('<!DOCTYPE html>')
html_parts.append('<html lang="en">')
html_parts.append('<head>')
html_parts.append('<meta charset="UTF-8">')
html_parts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
html_parts.append(f'<title>{document_title}</title>')
html_parts.append('<style>')
html_parts.append(self._generate_css_styles(styles))
html_parts.append('</style>')
html_parts.append('</head>')
html_parts.append('<body>')
htmlParts.append('<!DOCTYPE html>')
htmlParts.append('<html lang="en">')
htmlParts.append('<head>')
htmlParts.append('<meta charset="UTF-8">')
htmlParts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
htmlParts.append(f'<title>{documentTitle}</title>')
htmlParts.append('<style>')
htmlParts.append(self._generateCssStyles(styles))
htmlParts.append('</style>')
htmlParts.append('</head>')
htmlParts.append('<body>')
# Document header
html_parts.append(f'<header><h1 class="document-title">{document_title}</h1></header>')
htmlParts.append(f'<header><h1 class="document-title">{documentTitle}</h1></header>')
# Main content
html_parts.append('<main>')
htmlParts.append('<main>')
# Process each section
sections = json_content.get("sections", [])
sections = jsonContent.get("sections", [])
for section in sections:
section_html = self._render_json_section(section, styles)
if section_html:
html_parts.append(section_html)
sectionHtml = self._renderJsonSection(section, styles)
if sectionHtml:
htmlParts.append(sectionHtml)
html_parts.append('</main>')
htmlParts.append('</main>')
# Footer
html_parts.append('<footer>')
html_parts.append(f'<p class="generated-info">Generated: {self._format_timestamp()}</p>')
html_parts.append('</footer>')
htmlParts.append('<footer>')
htmlParts.append(f'<p class="generated-info">Generated: {self._formatTimestamp()}</p>')
htmlParts.append('</footer>')
html_parts.append('</body>')
html_parts.append('</html>')
htmlParts.append('</body>')
htmlParts.append('</html>')
return '\n'.join(html_parts)
return '\n'.join(htmlParts)
except Exception as e:
self.logger.error(f"Error generating HTML from JSON: {str(e)}")
raise Exception(f"HTML generation failed: {str(e)}")
async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
async def _getHtmlStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get HTML styling definitions using base template AI styling."""
style_schema = {
styleSchema = {
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
"heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
"heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
@ -113,40 +113,40 @@ class RendererHtml(BaseRenderer):
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
}
style_template = self._create_ai_style_template("html", user_prompt, style_schema)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_html_styles())
styleTemplate = self._createAiStyleTemplate("html", userPrompt, styleSchema)
styles = await self._getAiStyles(aiService, styleTemplate, self._getDefaultHtmlStyles())
# Validate and fix contrast issues
return self._validate_html_styles_contrast(styles)
return self._validateHtmlStylesContrast(styles)
def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
def _validateHtmlStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF")
text_color = header.get("color", "#000000")
bgColor = header.get("background", "#FFFFFF")
textColor = header.get("color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
header["background"] = "#4F4F4F"
header["color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF")
text_color = cell.get("color", "#000000")
bgColor = cell.get("background", "#FFFFFF")
textColor = cell.get("color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["color"] = "#2F2F2F"
@ -154,10 +154,10 @@ class RendererHtml(BaseRenderer):
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_html_styles()
return self._getDefaultHtmlStyles()
def _get_default_html_styles(self) -> Dict[str, Any]:
def _getDefaultHtmlStyles(self) -> Dict[str, Any]:
"""Default HTML styles."""
return {
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
@ -173,7 +173,7 @@ class RendererHtml(BaseRenderer):
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
}
def _generate_css_styles(self, styles: Dict[str, Any]) -> str:
def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
"""Generate CSS from style definitions."""
css_parts = []
@ -271,109 +271,109 @@ class RendererHtml(BaseRenderer):
return '\n'.join(css_parts)
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a single JSON section to HTML using AI-generated styles."""
try:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
if section_type == "table":
if sectionType == "table":
# Process the section data to extract table structure
processed_data = self._process_section_by_type(section)
return self._render_json_table(processed_data, styles)
elif section_type == "bullet_list":
processedData = self._processSectionByType(section)
return self._renderJsonTable(processedData, styles)
elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure
processed_data = self._process_section_by_type(section)
return self._render_json_bullet_list(processed_data, styles)
elif section_type == "heading":
return self._render_json_heading(section_data, styles)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data, styles)
elif section_type == "code_block":
processedData = self._processSectionByType(section)
return self._renderJsonBulletList(processedData, styles)
elif sectionType == "heading":
return self._renderJsonHeading(sectionData, styles)
elif sectionType == "paragraph":
return self._renderJsonParagraph(sectionData, styles)
elif sectionType == "code_block":
# Process the section data to extract code block structure
processed_data = self._process_section_by_type(section)
return self._render_json_code_block(processed_data, styles)
elif section_type == "image":
processedData = self._processSectionByType(section)
return self._renderJsonCodeBlock(processedData, styles)
elif sectionType == "image":
# Process the section data to extract image structure
processed_data = self._process_section_by_type(section)
return self._render_json_image(processed_data, styles)
processedData = self._processSectionByType(section)
return self._renderJsonImage(processedData, styles)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data, styles)
return self._renderJsonParagraph(sectionData, styles)
except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return f'<div class="error">[Error rendering section: {str(e)}]</div>'
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON table to HTML using AI-generated styles."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
headers = tableData.get("headers", [])
rows = tableData.get("rows", [])
if not headers or not rows:
return ""
html_parts = ['<table>']
htmlParts = ['<table>']
# Table header
html_parts.append('<thead><tr>')
htmlParts.append('<thead><tr>')
for header in headers:
html_parts.append(f'<th>{header}</th>')
html_parts.append('</tr></thead>')
htmlParts.append(f'<th>{header}</th>')
htmlParts.append('</tr></thead>')
# Table body
html_parts.append('<tbody>')
htmlParts.append('<tbody>')
for row in rows:
html_parts.append('<tr>')
for cell_data in row:
html_parts.append(f'<td>{cell_data}</td>')
html_parts.append('</tr>')
html_parts.append('</tbody>')
htmlParts.append('<tr>')
for cellData in row:
htmlParts.append(f'<td>{cellData}</td>')
htmlParts.append('</tr>')
htmlParts.append('</tbody>')
html_parts.append('</table>')
return '\n'.join(html_parts)
htmlParts.append('</table>')
return '\n'.join(htmlParts)
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return ""
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON bullet list to HTML using AI-generated styles."""
try:
items = list_data.get("items", [])
items = listData.get("items", [])
if not items:
return ""
html_parts = ['<ul>']
htmlParts = ['<ul>']
for item in items:
if isinstance(item, str):
html_parts.append(f'<li>{item}</li>')
htmlParts.append(f'<li>{item}</li>')
elif isinstance(item, dict) and "text" in item:
html_parts.append(f'<li>{item["text"]}</li>')
html_parts.append('</ul>')
htmlParts.append(f'<li>{item["text"]}</li>')
htmlParts.append('</ul>')
return '\n'.join(html_parts)
return '\n'.join(htmlParts)
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return ""
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON heading to HTML using AI-generated styles."""
try:
# Normalize non-dict inputs
if isinstance(heading_data, str):
heading_data = {"text": heading_data, "level": 2}
elif isinstance(heading_data, list):
if isinstance(headingData, str):
headingData = {"text": headingData, "level": 2}
elif isinstance(headingData, list):
# Render a list as bullet list under a default heading label
return self._render_json_bullet_list({"items": heading_data}, styles)
elif not isinstance(heading_data, dict):
return self._renderJsonBulletList({"items": headingData}, styles)
elif not isinstance(headingData, dict):
return ""
level = heading_data.get("level", 1)
text = heading_data.get("text", "")
level = headingData.get("level", 1)
text = headingData.get("text", "")
if text:
level = max(1, min(6, level))
@ -385,19 +385,19 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return ""
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON paragraph to HTML using AI-generated styles."""
try:
# Normalize non-dict inputs
if isinstance(paragraph_data, str):
paragraph_data = {"text": paragraph_data}
elif isinstance(paragraph_data, list):
if isinstance(paragraphData, str):
paragraphData = {"text": paragraphData}
elif isinstance(paragraphData, list):
# Treat list as bullet list paragraph
return self._render_json_bullet_list({"items": paragraph_data}, styles)
elif not isinstance(paragraph_data, dict):
return self._renderJsonBulletList({"items": paragraphData}, styles)
elif not isinstance(paragraphData, dict):
return ""
text = paragraph_data.get("text", "")
text = paragraphData.get("text", "")
if text:
return f'<p>{text}</p>'
@ -408,11 +408,11 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return ""
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON code block to HTML using AI-generated styles."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
code = codeData.get("code", "")
language = codeData.get("language", "")
if code:
if language:
@ -426,17 +426,17 @@ class RendererHtml(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}")
return ""
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON image to HTML."""
try:
base64_data = image_data.get("base64Data", "")
alt_text = image_data.get("altText", "Image")
base64Data = imageData.get("base64Data", "")
altText = imageData.get("altText", "Image")
if base64_data:
return f'<img src="data:image/png;base64,{base64_data}" alt="{alt_text}">'
if base64Data:
return f'<img src="data:image/png;base64,{base64Data}" alt="{altText}">'
return ""
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return f'<div class="error">[Image: {image_data.get("altText", "Image")}]</div>'
return f'<div class="error">[Image: {imageData.get("altText", "Image")}]</div>'

View file

@ -12,154 +12,156 @@ class RendererImage(BaseRenderer):
"""Renders content to image format using AI image generation."""
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""Return supported image formats."""
return ['png', 'jpg', 'jpeg', 'image']
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['img', 'picture', 'photo', 'graphic']
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""Return priority for image renderer."""
return 90
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to image format using AI image generation."""
try:
# Generate AI image from content
image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service)
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
return image_content, "image/png"
return imageContent, "image/png"
except Exception as e:
self.logger.error(f"Error rendering image: {str(e)}")
# Re-raise the exception instead of using fallback
raise Exception(f"Image rendering failed: {str(e)}")
async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate AI image from extracted content."""
try:
if not ai_service:
if not aiService:
raise ValueError("AI service is required for image generation")
# Validate JSON structure
if not isinstance(extracted_content, dict):
if not isinstance(extractedContent, dict):
raise ValueError("Extracted content must be a dictionary")
if "sections" not in extracted_content:
if "sections" not in extractedContent:
raise ValueError("Extracted content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = extracted_content.get("metadata", {}).get("title", title)
documentTitle = extractedContent.get("metadata", {}).get("title", title)
# Create AI prompt for image generation
image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service)
imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)
# Save image generation prompt to debug
ai_service.services.utils.writeDebugFile(image_prompt, "image_generation_prompt")
aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt")
# Generate image using AI
image_result = await ai_service.aiObjects.generateImage(
prompt=image_prompt,
imageResult = await aiService.aiObjects.generateImage(
prompt=imagePrompt,
size="1024x1024",
quality="standard",
style="vivid"
)
# Save image generation response to debug
ai_service.services.utils.writeDebugFile(str(image_result), "image_generation_response")
aiService.services.utils.writeDebugFile(str(imageResult), "image_generation_response")
# Extract base64 image data from result
if image_result and image_result.get("success", False):
image_data = image_result.get("image_data", "")
if image_data:
return image_data
if imageResult and imageResult.get("success", False):
imageData = imageResult.get("image_data", "")
if imageData:
return imageData
else:
raise ValueError("No image data returned from AI")
else:
error_msg = image_result.get("error", "Unknown error") if image_result else "No result"
raise ValueError(f"AI image generation failed: {error_msg}")
errorMsg = imageResult.get("error", "Unknown error") if imageResult else "No result"
raise ValueError(f"AI image generation failed: {errorMsg}")
except Exception as e:
self.logger.error(f"Error generating AI image: {str(e)}")
raise Exception(f"AI image generation failed: {str(e)}")
async def _create_imageGenerate_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Create a detailed prompt for AI image generation based on the content."""
try:
# Start with base prompt
prompt_parts = []
promptParts = []
# Add user's original intent if available
if user_prompt:
prompt_parts.append(f"User Request: {ai_service.sanitizePromptContent(user_prompt, 'userinput')}")
if userPrompt:
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
promptParts.append(f"User Request: {sanitized_prompt}")
# Add document title
prompt_parts.append(f"Document Title: {title}")
promptParts.append(f"Document Title: {title}")
# Analyze content and create visual description
sections = extracted_content.get("sections", [])
content_description = self._analyze_content_for_visual_description(sections)
sections = extractedContent.get("sections", [])
contentDescription = self._analyzeContentForVisualDescription(sections)
if content_description:
prompt_parts.append(f"Content to Visualize: {content_description}")
if contentDescription:
promptParts.append(f"Content to Visualize: {contentDescription}")
# Add style guidance
style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt)
if style_guidance:
prompt_parts.append(f"Visual Style: {style_guidance}")
styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt)
if styleGuidance:
promptParts.append(f"Visual Style: {styleGuidance}")
# Combine all parts
full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts)
fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts)
# Add technical requirements
full_prompt += "\n\nTechnical Requirements:"
full_prompt += "\n- High quality, professional appearance"
full_prompt += "\n- Clear, readable text if any text is included"
full_prompt += "\n- Appropriate colors and layout"
full_prompt += "\n- Suitable for business/professional use"
fullPrompt += "\n\nTechnical Requirements:"
fullPrompt += "\n- High quality, professional appearance"
fullPrompt += "\n- Clear, readable text if any text is included"
fullPrompt += "\n- Appropriate colors and layout"
fullPrompt += "\n- Suitable for business/professional use"
# Truncate prompt if it exceeds DALL-E's 4000 character limit
if len(full_prompt) > 4000:
if len(fullPrompt) > 4000:
# Use AI to compress the prompt intelligently
compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service)
if compressed_prompt and len(compressed_prompt) <= 4000:
return compressed_prompt
compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService)
if compressedPrompt and len(compressedPrompt) <= 4000:
return compressedPrompt
# Fallback to minimal prompt if AI compression fails or is still too long
minimal_prompt = f"Create a professional image representing: {title}"
if user_prompt:
minimal_prompt += f" - {ai_service.sanitizePromptContent(user_prompt, 'userinput')}"
minimalPrompt = f"Create a professional image representing: {title}"
if userPrompt:
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
minimalPrompt += f" - {sanitized_prompt}"
# If even the minimal prompt is too long, truncate it
if len(minimal_prompt) > 4000:
minimal_prompt = minimal_prompt[:3997] + "..."
if len(minimalPrompt) > 4000:
minimalPrompt = minimalPrompt[:3997] + "..."
return minimal_prompt
return minimalPrompt
return full_prompt
return fullPrompt
except Exception as e:
self.logger.warning(f"Error creating image prompt: {str(e)}")
# Fallback to simple prompt
return f"Create a professional image representing: {title}"
async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str:
async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str:
"""Use AI to intelligently compress a long prompt while preserving key information."""
try:
if not ai_service:
if not aiService:
return None
compression_prompt = f"""
compressionPrompt = f"""
You are an expert at creating concise, effective prompts for AI image generation.
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
Original prompt ({len(long_prompt)} characters):
{long_prompt}
Original prompt ({len(longPrompt)} characters):
{longPrompt}
Please create a compressed version that:
1. Keeps the most important visual elements and requirements
@ -176,7 +178,7 @@ Return only the compressed prompt, no explanations.
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
request = AiCallRequest(
prompt=compression_prompt,
prompt=compressionPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
maxTokens=None, # Let the model use its full context length
@ -184,12 +186,12 @@ Return only the compressed prompt, no explanations.
)
)
response = await ai_service.aiObjects.call(request)
response = await aiService.aiObjects.call(request)
compressed = response.content.strip()
# Validate the compressed prompt
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters")
self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters")
return compressed
else:
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
@ -199,42 +201,42 @@ Return only the compressed prompt, no explanations.
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
return None
def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str:
def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str:
"""Analyze content sections and create a visual description for AI."""
try:
descriptions = []
for section in sections:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
if section_type == "table":
headers = section_data.get("headers", [])
rows = section_data.get("rows", [])
if sectionType == "table":
headers = sectionData.get("headers", [])
rows = sectionData.get("rows", [])
if headers and rows:
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
elif section_type == "bullet_list":
items = section_data.get("items", [])
elif sectionType == "bullet_list":
items = sectionData.get("items", [])
if items:
descriptions.append(f"List with {len(items)} items")
elif section_type == "heading":
text = section_data.get("text", "")
level = section_data.get("level", 1)
elif sectionType == "heading":
text = sectionData.get("text", "")
level = sectionData.get("level", 1)
if text:
descriptions.append(f"Heading {level}: {text}")
elif section_type == "paragraph":
text = section_data.get("text", "")
elif sectionType == "paragraph":
text = sectionData.get("text", "")
if text and len(text) > 10: # Only include substantial paragraphs
# Truncate long text
truncated = text[:100] + "..." if len(text) > 100 else text
descriptions.append(f"Text content: {truncated}")
elif section_type == "code_block":
code = section_data.get("code", "")
language = section_data.get("language", "")
elif sectionType == "code_block":
code = sectionData.get("code", "")
language = sectionData.get("language", "")
if code:
descriptions.append(f"Code block ({language}): {code[:50]}...")
@ -244,42 +246,42 @@ Return only the compressed prompt, no explanations.
self.logger.warning(f"Error analyzing content: {str(e)}")
return "Document content"
def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str:
def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str:
"""Determine visual style guidance based on content and user prompt."""
try:
style_elements = []
styleElements = []
# Analyze user prompt for style hints
if user_prompt:
prompt_lower = user_prompt.lower()
if userPrompt:
promptLower = userPrompt.lower()
if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]):
style_elements.append("modern, clean design")
elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]):
style_elements.append("classic, formal design")
elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]):
style_elements.append("creative, artistic design")
elif any(word in prompt_lower for word in ["corporate", "business", "professional"]):
style_elements.append("corporate, professional design")
if any(word in promptLower for word in ["modern", "contemporary", "sleek"]):
styleElements.append("modern, clean design")
elif any(word in promptLower for word in ["classic", "traditional", "formal"]):
styleElements.append("classic, formal design")
elif any(word in promptLower for word in ["creative", "artistic", "colorful"]):
styleElements.append("creative, artistic design")
elif any(word in promptLower for word in ["corporate", "business", "professional"]):
styleElements.append("corporate, professional design")
# Analyze content type for additional style hints
sections = extracted_content.get("sections", [])
has_tables = any(self._get_section_type(s) == "table" for s in sections)
has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections)
has_code = any(self._get_section_type(s) == "code_block" for s in sections)
sections = extractedContent.get("sections", [])
hasTables = any(self._getSectionType(s) == "table" for s in sections)
hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
hasCode = any(self._getSectionType(s) == "code_block" for s in sections)
if has_tables:
style_elements.append("data-focused layout")
if has_lists:
style_elements.append("organized, structured presentation")
if has_code:
style_elements.append("technical, developer-friendly")
if hasTables:
styleElements.append("data-focused layout")
if hasLists:
styleElements.append("organized, structured presentation")
if hasCode:
styleElements.append("technical, developer-friendly")
# Default style if no specific guidance
if not style_elements:
style_elements.append("professional, clean design")
if not styleElements:
styleElements.append("professional, clean design")
return ", ".join(style_elements)
return ", ".join(styleElements)
except Exception as e:
self.logger.warning(f"Error determining style guidance: {str(e)}")

View file

@ -10,40 +10,40 @@ class RendererJson(BaseRenderer):
"""Renders content to JSON format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""Return supported JSON formats."""
return ['json']
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['data']
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""Return priority for JSON renderer."""
return 80
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to JSON format."""
try:
# The extracted content should already be JSON from the AI
# Just validate and format it
json_content = self._clean_json_content(extracted_content, title)
jsonContent = self._cleanJsonContent(extractedContent, title)
return json_content, "application/json"
return jsonContent, "application/json"
except Exception as e:
self.logger.error(f"Error rendering JSON: {str(e)}")
# Return minimal JSON fallback
fallback_data = {
fallbackData = {
"title": title,
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
"metadata": {"error": str(e)}
}
return json.dumps(fallback_data, indent=2), "application/json"
return json.dumps(fallbackData, indent=2), "application/json"
def _clean_json_content(self, content: Dict[str, Any], title: str) -> str:
def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
"""Clean and validate JSON content from AI."""
try:
# Validate JSON structure
@ -72,8 +72,8 @@ class RendererJson(BaseRenderer):
except Exception as e:
self.logger.warning(f"Error cleaning JSON content: {str(e)}")
# Return minimal valid JSON
fallback_data = {
fallbackData = {
"sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
"metadata": {"title": title, "error": str(e)}
}
return json.dumps(fallback_data, indent=2, ensure_ascii=False)
return json.dumps(fallbackData, indent=2, ensure_ascii=False)

View file

@ -9,161 +9,161 @@ class RendererMarkdown(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""Return supported Markdown formats."""
return ['md', 'markdown']
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['mdown', 'mkd']
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""Return priority for markdown renderer."""
return 95
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to Markdown format."""
try:
# Generate markdown from JSON structure
markdown_content = self._generate_markdown_from_json(extracted_content, title)
markdownContent = self._generateMarkdownFromJson(extractedContent, title)
return markdown_content, "text/markdown"
return markdownContent, "text/markdown"
except Exception as e:
self.logger.error(f"Error rendering markdown: {str(e)}")
# Return minimal markdown fallback
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str:
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate markdown content from structured JSON document."""
try:
# Validate JSON structure
if not isinstance(json_content, dict):
if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Build markdown content
markdown_parts = []
markdownParts = []
# Document title
markdown_parts.append(f"# {document_title}")
markdown_parts.append("")
markdownParts.append(f"# {documentTitle}")
markdownParts.append("")
# Process each section
sections = json_content.get("sections", [])
sections = jsonContent.get("sections", [])
for section in sections:
section_markdown = self._render_json_section(section)
if section_markdown:
markdown_parts.append(section_markdown)
markdown_parts.append("") # Add spacing between sections
sectionMarkdown = self._renderJsonSection(section)
if sectionMarkdown:
markdownParts.append(sectionMarkdown)
markdownParts.append("") # Add spacing between sections
# Add generation info
markdown_parts.append("---")
markdown_parts.append(f"*Generated: {self._format_timestamp()}*")
markdownParts.append("---")
markdownParts.append(f"*Generated: {self._formatTimestamp()}*")
return '\n'.join(markdown_parts)
return '\n'.join(markdownParts)
except Exception as e:
self.logger.error(f"Error generating markdown from JSON: {str(e)}")
raise Exception(f"Markdown generation failed: {str(e)}")
def _render_json_section(self, section: Dict[str, Any]) -> str:
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to markdown."""
try:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
if section_type == "table":
if sectionType == "table":
# Process the section data to extract table structure
processed_data = self._process_section_by_type(section)
return self._render_json_table(processed_data)
elif section_type == "bullet_list":
processedData = self._processSectionByType(section)
return self._renderJsonTable(processedData)
elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure
processed_data = self._process_section_by_type(section)
return self._render_json_bullet_list(processed_data)
elif section_type == "heading":
return self._render_json_heading(section_data)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data)
elif section_type == "code_block":
processedData = self._processSectionByType(section)
return self._renderJsonBulletList(processedData)
elif sectionType == "heading":
return self._renderJsonHeading(sectionData)
elif sectionType == "paragraph":
return self._renderJsonParagraph(sectionData)
elif sectionType == "code_block":
# Process the section data to extract code block structure
processed_data = self._process_section_by_type(section)
return self._render_json_code_block(processed_data)
elif section_type == "image":
processedData = self._processSectionByType(section)
return self._renderJsonCodeBlock(processedData)
elif sectionType == "image":
# Process the section data to extract image structure
processed_data = self._process_section_by_type(section)
return self._render_json_image(processed_data)
processedData = self._processSectionByType(section)
return self._renderJsonImage(processedData)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data)
return self._renderJsonParagraph(sectionData)
except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return f"*[Error rendering section: {str(e)}]*"
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
"""Render a JSON table to markdown."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
headers = tableData.get("headers", [])
rows = tableData.get("rows", [])
if not headers or not rows:
return ""
markdown_parts = []
markdownParts = []
# Create table header
header_line = " | ".join(str(header) for header in headers)
markdown_parts.append(header_line)
headerLine = " | ".join(str(header) for header in headers)
markdownParts.append(headerLine)
# Add separator line
separator_line = " | ".join("---" for _ in headers)
markdown_parts.append(separator_line)
separatorLine = " | ".join("---" for _ in headers)
markdownParts.append(separatorLine)
# Add data rows
for row in rows:
row_line = " | ".join(str(cell_data) for cell_data in row)
markdown_parts.append(row_line)
rowLine = " | ".join(str(cellData) for cellData in row)
markdownParts.append(rowLine)
return '\n'.join(markdown_parts)
return '\n'.join(markdownParts)
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return ""
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
"""Render a JSON bullet list to markdown."""
try:
items = list_data.get("items", [])
items = listData.get("items", [])
if not items:
return ""
markdown_parts = []
markdownParts = []
for item in items:
if isinstance(item, str):
markdown_parts.append(f"- {item}")
markdownParts.append(f"- {item}")
elif isinstance(item, dict) and "text" in item:
markdown_parts.append(f"- {item['text']}")
markdownParts.append(f"- {item['text']}")
return '\n'.join(markdown_parts)
return '\n'.join(markdownParts)
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return ""
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
"""Render a JSON heading to markdown."""
try:
level = heading_data.get("level", 1)
text = heading_data.get("text", "")
level = headingData.get("level", 1)
text = headingData.get("text", "")
if text:
level = max(1, min(6, level))
@ -175,21 +175,21 @@ class RendererMarkdown(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return ""
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
"""Render a JSON paragraph to markdown."""
try:
text = paragraph_data.get("text", "")
text = paragraphData.get("text", "")
return text if text else ""
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return ""
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
"""Render a JSON code block to markdown."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
code = codeData.get("code", "")
language = codeData.get("language", "")
if code:
if language:
@ -203,19 +203,19 @@ class RendererMarkdown(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}")
return ""
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
"""Render a JSON image to markdown."""
try:
alt_text = image_data.get("altText", "Image")
base64_data = image_data.get("base64Data", "")
altText = imageData.get("altText", "Image")
base64Data = imageData.get("base64Data", "")
if base64_data:
if base64Data:
# For base64 images, we can't embed them directly in markdown
# So we'll use a placeholder with the alt text
return f"![{alt_text}](data:image/png;base64,{base64_data[:50]}...)"
return f"![{altText}](data:image/png;base64,{base64Data[:50]}...)"
else:
return f"![{alt_text}](image-placeholder)"
return f"![{altText}](image-placeholder)"
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return f"![{image_data.get('altText', 'Image')}](image-error)"
return f"![{imageData.get('altText', 'Image')}](image-error)"

View file

@ -22,32 +22,32 @@ class RendererPdf(BaseRenderer):
"""Renders content to PDF format using reportlab."""
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""Return supported PDF formats."""
return ['pdf']
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['document', 'print']
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""Return priority for PDF renderer."""
return 120
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml
html_renderer = RendererHtml()
html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service)
html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService)
return html_content, "text/html"
# Generate PDF using AI-analyzed styling
pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service)
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
return pdf_content, "application/pdf"
@ -56,11 +56,11 @@ class RendererPdf(BaseRenderer):
# Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain"
async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling."""
try:
# Get AI-generated styling definitions
styles = await self._get_pdf_styles(user_prompt, ai_service)
styles = await self._getPdfStyles(userPrompt, aiService)
# Validate JSON structure
if not isinstance(json_content, dict):
@ -93,10 +93,10 @@ class RendererPdf(BaseRenderer):
story = []
# Title page
title_style = self._create_title_style(styles)
title_style = self._createTitleStyle(styles)
story.append(Paragraph(document_title, title_style))
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles)))
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._createNormalStyle(styles)))
story.append(Spacer(1, 30)) # Add spacing before page break
story.append(PageBreak())
@ -105,7 +105,7 @@ class RendererPdf(BaseRenderer):
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
for i, section in enumerate(sections):
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
section_elements = self._render_json_section(section, styles)
section_elements = self._renderJsonSection(section, styles)
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
story.extend(section_elements)
@ -123,7 +123,7 @@ class RendererPdf(BaseRenderer):
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
raise Exception(f"PDF generation failed: {str(e)}")
async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
async def _getPdfStyles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
"""Get PDF styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
@ -136,21 +136,21 @@ class RendererPdf(BaseRenderer):
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
}
style_template = self._create_ai_style_template("pdf", user_prompt, style_schema)
style_template = self._createAiStyleTemplate("pdf", user_prompt, style_schema)
# Use base template method like DOCX does (this works!)
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles())
styles = await self._getAiStyles(ai_service, style_template, self._getDefaultPdfStyles())
if styles is None:
return self._get_default_pdf_styles()
return self._getDefaultPdfStyles()
# Convert colors to PDF format after getting styles
styles = self._convert_colors_format(styles)
styles = self._convertColorsFormat(styles)
# Validate and fix contrast issues
return self._validate_pdf_styles_contrast(styles)
return self._validatePdfStylesContrast(styles)
async def _get_ai_styles_with_pdf_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper PDF color conversion."""
if not ai_service:
return default_styles
@ -279,7 +279,7 @@ class RendererPdf(BaseRenderer):
return default_styles
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
styles = self._convert_colors_format(styles)
styles = self._convertColorsFormat(styles)
return styles
@ -287,7 +287,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert colors to proper format for PDF compatibility."""
try:
for style_name, style_config in styles.items():
@ -304,7 +304,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Color conversion failed: {str(e)}")
return styles
def _get_safe_color(self, color_value: str, default: str = "#000000") -> str:
def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
"""Get a safe hex color value for PDF."""
if isinstance(color_value, str) and color_value.startswith('#'):
if len(color_value) == 7:
@ -313,7 +313,7 @@ class RendererPdf(BaseRenderer):
return color_value
return default
def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
def _validatePdfStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
@ -348,9 +348,9 @@ class RendererPdf(BaseRenderer):
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_pdf_styles()
return self._getDefaultPdfStyles()
def _get_default_pdf_styles(self) -> Dict[str, Any]:
def _getDefaultPdfStyles(self) -> Dict[str, Any]:
"""Default PDF styles."""
return {
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
@ -363,27 +363,27 @@ class RendererPdf(BaseRenderer):
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
}
def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create title style from style definitions."""
title_style_def = styles.get("title", {})
# DEBUG: Show what color and spacing is being used for title
title_color = title_style_def.get("color", "#1F4E79")
title_space_after = title_style_def.get("space_after", 30)
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hex_to_color(title_color)}", "PDF_RENDERER")
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER")
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
return ParagraphStyle(
'CustomTitle',
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
spaceAfter=title_style_def.get("space_after", 30),
alignment=self._get_alignment(title_style_def.get("align", "center")),
textColor=self._hex_to_color(title_color),
alignment=self._getAlignment(title_style_def.get("align", "center")),
textColor=self._hexToColor(title_color),
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
spaceBefore=0 # Ensure no space before title
)
def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
"""Create heading style from style definitions."""
heading_key = f"heading{level}"
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
@ -393,11 +393,11 @@ class RendererPdf(BaseRenderer):
fontSize=heading_style_def.get("font_size", 18 - level * 2),
spaceAfter=heading_style_def.get("space_after", 12),
spaceBefore=heading_style_def.get("space_before", 12),
alignment=self._get_alignment(heading_style_def.get("align", "left")),
textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F"))
alignment=self._getAlignment(heading_style_def.get("align", "left")),
textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F"))
)
def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
"""Create normal paragraph style from style definitions."""
paragraph_style_def = styles.get("paragraph", {})
@ -405,12 +405,12 @@ class RendererPdf(BaseRenderer):
'CustomNormal',
fontSize=paragraph_style_def.get("font_size", 11),
spaceAfter=paragraph_style_def.get("space_after", 6),
alignment=self._get_alignment(paragraph_style_def.get("align", "left")),
textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")),
alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
)
def _get_alignment(self, align: str) -> int:
def _getAlignment(self, align: str) -> int:
"""Convert alignment string to reportlab alignment constant."""
if not align or not isinstance(align, str):
return TA_LEFT
@ -426,7 +426,7 @@ class RendererPdf(BaseRenderer):
}
return align_map.get(align.lower().strip(), TA_LEFT)
def _get_table_alignment(self, align: str) -> str:
def _getTableAlignment(self, align: str) -> str:
"""Convert alignment string to ReportLab table alignment string."""
if not align or not isinstance(align, str):
return 'LEFT'
@ -442,7 +442,7 @@ class RendererPdf(BaseRenderer):
}
return align_map.get(align.lower().strip(), 'LEFT')
def _hex_to_color(self, hex_color: str) -> colors.Color:
def _hexToColor(self, hex_color: str) -> colors.Color:
"""Convert hex color to reportlab color."""
try:
hex_color = hex_color.lstrip('#')
@ -464,38 +464,38 @@ class RendererPdf(BaseRenderer):
except:
return colors.black
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a single JSON section to PDF elements using AI-generated styles."""
try:
section_type = self._get_section_type(section)
elements = self._get_section_data(section)
section_type = self._getSectionType(section)
elements = self._getSectionData(section)
# Process each element in the section
all_elements = []
for element in elements:
if section_type == "table":
all_elements.extend(self._render_json_table(element, styles))
all_elements.extend(self._renderJsonTable(element, styles))
elif section_type == "bullet_list":
all_elements.extend(self._render_json_bullet_list(element, styles))
all_elements.extend(self._renderJsonBulletList(element, styles))
elif section_type == "heading":
all_elements.extend(self._render_json_heading(element, styles))
all_elements.extend(self._renderJsonHeading(element, styles))
elif section_type == "paragraph":
all_elements.extend(self._render_json_paragraph(element, styles))
all_elements.extend(self._renderJsonParagraph(element, styles))
elif section_type == "code_block":
all_elements.extend(self._render_json_code_block(element, styles))
all_elements.extend(self._renderJsonCodeBlock(element, styles))
elif section_type == "image":
all_elements.extend(self._render_json_image(element, styles))
all_elements.extend(self._renderJsonImage(element, styles))
else:
# Fallback to paragraph for unknown types
all_elements.extend(self._render_json_paragraph(element, styles))
all_elements.extend(self._renderJsonParagraph(element, styles))
return all_elements
except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON table to PDF elements using AI-generated styles."""
try:
headers = table_data.get("headers", [])
@ -517,7 +517,7 @@ class RendererPdf(BaseRenderer):
table_style = [
('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
('ALIGN', (0, 0), (-1, -1), self._get_table_alignment(table_cell_style.get("align", "left"))),
('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
@ -534,7 +534,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering table: {str(e)}")
return []
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
try:
items = list_data.get("items", [])
@ -556,7 +556,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return []
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON heading to PDF elements using AI-generated styles."""
try:
level = heading_data.get("level", 1)
@ -564,7 +564,7 @@ class RendererPdf(BaseRenderer):
if text:
level = max(1, min(6, level))
heading_style = self._create_heading_style(styles, level)
heading_style = self._createHeadingStyle(styles, level)
return [Paragraph(text, heading_style)]
return []
@ -573,13 +573,13 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return []
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
try:
text = paragraph_data.get("text", "")
if text:
return [Paragraph(text, self._create_normal_style(styles))]
return [Paragraph(text, self._createNormalStyle(styles))]
return []
@ -587,7 +587,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return []
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON code block to PDF elements using AI-generated styles."""
try:
code = code_data.get("code", "")
@ -601,7 +601,7 @@ class RendererPdf(BaseRenderer):
lang_style = ParagraphStyle(
'CodeLanguage',
fontSize=code_style_def.get("font_size", 9),
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName='Helvetica-Bold'
)
elements.append(Paragraph(f"Code ({language}):", lang_style))
@ -609,9 +609,9 @@ class RendererPdf(BaseRenderer):
code_style = ParagraphStyle(
'CodeBlock',
fontSize=code_style_def.get("font_size", 9),
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
fontName=code_style_def.get("font", "Courier"),
backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")),
backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")),
spaceAfter=code_style_def.get("space_after", 6)
)
elements.append(Paragraph(code, code_style))
@ -624,7 +624,7 @@ class RendererPdf(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}")
return []
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
"""Render a JSON image to PDF elements."""
try:
base64_data = image_data.get("base64Data", "")
@ -632,10 +632,10 @@ class RendererPdf(BaseRenderer):
if base64_data:
# For now, just add a placeholder since reportlab image handling is complex
return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))]
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
return []
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))]
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._createNormalStyle(styles))]

View file

@ -12,23 +12,23 @@ class RendererPptx(BaseRenderer):
def __init__(self):
super().__init__()
self.supported_formats = ["pptx", "ppt"]
self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
self.supportedFormats = ["pptx", "ppt"]
self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
@classmethod
def get_supported_formats(cls) -> list:
def getSupportedFormats(cls) -> list:
"""Get list of supported output formats."""
return ["pptx", "ppt"]
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""
Render content as PowerPoint presentation from JSON data.
Args:
extracted_content: JSON content to render as presentation
extractedContent: JSON content to render as presentation
title: Title for the presentation
user_prompt: User prompt for AI styling
ai_service: AI service for styling
userPrompt: User prompt for AI styling
aiService: AI service for styling
**kwargs: Additional rendering options
Returns:
@ -43,7 +43,7 @@ class RendererPptx(BaseRenderer):
import re
# Get AI-generated styling definitions first
styles = await self._get_pptx_styles(user_prompt, ai_service)
styles = await self._getPptxStyles(userPrompt, aiService)
# Create new presentation
prs = Presentation()
@ -58,13 +58,13 @@ class RendererPptx(BaseRenderer):
prs.slide_height = Inches(7.5)
# Generate slides from JSON content
slides_data = await self._parse_json_to_slides(extracted_content, title, styles)
logger.info(f"Parsed {len(slides_data)} slides from JSON content")
slidesData = await self._parseJsonToSlides(extractedContent, title, styles)
logger.info(f"Parsed {len(slidesData)} slides from JSON content")
# Debug: Show first 200 chars of content
logger.info(f"JSON content preview: {str(extracted_content)[:200]}...")
logger.info(f"JSON content preview: {str(extractedContent)[:200]}...")
for i, slide_data in enumerate(slides_data):
for i, slide_data in enumerate(slidesData):
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
# Debug: Show slide content preview
slide_content = slide_data.get('content', '')
@ -74,8 +74,8 @@ class RendererPptx(BaseRenderer):
logger.warning(f" ⚠️ Slide {i+1} has NO content!")
# Create slide with appropriate layout based on content
slide_layout_index = self._get_slide_layout_index(slide_data, styles)
slide_layout = prs.slide_layouts[slide_layout_index]
slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles)
slide_layout = prs.slide_layouts[slideLayoutIndex]
slide = prs.slides.add_slide(slide_layout)
# Set title with AI-generated styling
@ -153,7 +153,7 @@ class RendererPptx(BaseRenderer):
p.alignment = PP_ALIGN.LEFT
# If no slides were created, create a default slide
if not slides_data:
if not slidesData:
slide_layout = prs.slide_layouts[0] # Title slide layout
slide = prs.slides.add_slide(slide_layout)
@ -198,7 +198,7 @@ class RendererPptx(BaseRenderer):
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
def _parse_content_to_slides(self, content: str, title: str) -> list:
def _parseContentToSlides(self, content: str, title: str) -> list:
"""
Parse content into slide data structure.
@ -212,7 +212,7 @@ class RendererPptx(BaseRenderer):
slides = []
# Split content by slide markers or headers
slide_sections = self._split_content_into_slides(content)
slide_sections = self._splitContentIntoSlides(content)
for i, section in enumerate(slide_sections):
if section.strip():
@ -239,7 +239,7 @@ class RendererPptx(BaseRenderer):
return slides
def _split_content_into_slides(self, content: str) -> list:
def _splitContentIntoSlides(self, content: str) -> list:
"""
Split content into individual slides based on headers and structure.
@ -299,11 +299,11 @@ class RendererPptx(BaseRenderer):
return [content.strip()]
def get_output_mime_type(self) -> str:
def getOutputMimeType(self) -> str:
"""Get MIME type for rendered output."""
return self.output_mime_type
return self.outputMimeType
async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
async def _getPptxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get PowerPoint styling definitions using base template AI styling."""
style_schema = {
"title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"},
@ -323,21 +323,21 @@ class RendererPptx(BaseRenderer):
"executive_ready": True
}
style_template = self._create_professional_pptx_template(user_prompt, style_schema)
# Use our own _get_ai_styles_with_pptx_colors method to ensure proper color conversion
styles = await self._get_ai_styles_with_pptx_colors(ai_service, style_template, self._get_default_pptx_styles())
style_template = self._createProfessionalPptxTemplate(userPrompt, style_schema)
# Use our own _getAiStylesWithPptxColors method to ensure proper color conversion
styles = await self._getAiStylesWithPptxColors(aiService, style_template, self._getDefaultPptxStyles())
# Validate PowerPoint-specific requirements
return self._validate_pptx_styles_readability(styles)
return self._validatePptxStylesReadability(styles)
def _create_professional_pptx_template(self, user_prompt: str, style_schema: Dict[str, Any]) -> str:
def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str:
"""Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
import json
schema_json = json.dumps(style_schema, indent=4)
return f"""Customize the JSON below for professional PowerPoint slides.
User Request: {user_prompt or "Create professional corporate slides"}
User Request: {userPrompt or "Create professional corporate slides"}
Rules:
- Use professional colors (blues, grays, deep greens)
@ -351,9 +351,9 @@ Return ONLY this JSON with your changes:
JSON ONLY. NO OTHER TEXT."""
async def _get_ai_styles_with_pptx_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper PowerPoint color conversion."""
if not ai_service:
if not aiService:
return default_styles
try:
@ -365,11 +365,11 @@ JSON ONLY. NO OTHER TEXT."""
request = AiCallRequest(prompt=style_template, context="", options=request_options)
# Check if AI service is properly configured
if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects:
self.logger.warning("AI service not properly configured, using defaults")
return default_styles
response = await ai_service.aiObjects.call(request)
response = await aiService.aiObjects.call(request)
# Check if response is valid
if not response:
@ -445,7 +445,7 @@ JSON ONLY. NO OTHER TEXT."""
return default_styles
# Convert colors to PowerPoint RGB format
styles = self._convert_colors_format(styles)
styles = self._convertColorsFormat(styles)
return styles
@ -453,7 +453,7 @@ JSON ONLY. NO OTHER TEXT."""
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to RGB format for PowerPoint compatibility."""
try:
for style_name, style_config in styles.items():
@ -477,7 +477,7 @@ JSON ONLY. NO OTHER TEXT."""
self.logger.warning(f"Color conversion failed: {str(e)}")
return styles
def _get_safe_color(self, color_value, default=(0, 0, 0)) -> tuple:
def _getSafeColor(self, color_value, default=(0, 0, 0)) -> tuple:
"""Get a safe RGB color tuple for PowerPoint."""
if isinstance(color_value, tuple) and len(color_value) == 3:
return color_value
@ -495,7 +495,7 @@ JSON ONLY. NO OTHER TEXT."""
return (r, g, b)
return default
def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
def _validatePptxStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix readability issues in AI-generated styles."""
try:
# Ensure minimum font sizes for PowerPoint readability
@ -519,9 +519,9 @@ JSON ONLY. NO OTHER TEXT."""
except Exception as e:
logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_pptx_styles()
return self._getDefaultPptxStyles()
def _get_default_pptx_styles(self) -> Dict[str, Any]:
def _getDefaultPptxStyles(self) -> Dict[str, Any]:
"""Default PowerPoint styles with corporate professional color scheme."""
return {
"title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"},
@ -541,7 +541,7 @@ JSON ONLY. NO OTHER TEXT."""
"executive_ready": True
}
async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
async def _parseJsonToSlides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Parse JSON content into slide data structure.
@ -569,12 +569,12 @@ JSON ONLY. NO OTHER TEXT."""
# Create title slide
slides.append({
"title": document_title,
"content": "Generated by PowerOn AI System\n\n" + self._format_timestamp()
"content": "Generated by PowerOn AI System\n\n" + self._formatTimestamp()
})
# Process sections into slides based on content and user intent
sections = json_content.get("sections", [])
slides.extend(self._create_slides_from_sections(sections, styles))
slides.extend(self._createSlidesFromSections(sections, styles))
# If no content slides were created, create a default content slide
if len(slides) == 1: # Only title slide
@ -595,7 +595,7 @@ JSON ONLY. NO OTHER TEXT."""
}
]
def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create a slide from a JSON section."""
try:
# Get section title from data or use default
@ -616,15 +616,15 @@ JSON ONLY. NO OTHER TEXT."""
content_parts = []
if content_type == "table":
content_parts.append(self._format_table_for_slide(elements))
content_parts.append(self._formatTableForSlide(elements))
elif content_type == "list":
content_parts.append(self._format_list_for_slide(elements))
content_parts.append(self._formatListForSlide(elements))
elif content_type == "heading":
content_parts.append(self._format_heading_for_slide(elements))
content_parts.append(self._formatHeadingForSlide(elements))
elif content_type == "paragraph":
content_parts.append(self._format_paragraph_for_slide(elements))
content_parts.append(self._formatParagraphForSlide(elements))
elif content_type == "code":
content_parts.append(self._format_code_for_slide(elements))
content_parts.append(self._formatCodeForSlide(elements))
else:
content_parts.append(self._format_paragraph_for_slide(elements))
@ -640,7 +640,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error creating slide from section: {str(e)}")
return None
def _format_table_for_slide(self, elements: List[Dict[str, Any]]) -> str:
def _formatTableForSlide(self, elements: List[Dict[str, Any]]) -> str:
"""Format table data for slide presentation."""
try:
# Extract table data from elements array
@ -681,7 +681,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting table for slide: {str(e)}")
return ""
def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str:
def _formatListForSlide(self, list_data: Dict[str, Any]) -> str:
"""Format list data for slide presentation."""
try:
items = list_data.get("items", [])
@ -713,7 +713,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting list for slide: {str(e)}")
return ""
def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str:
def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str:
"""Format heading data for slide presentation."""
try:
text = heading_data.get("text", "")
@ -728,7 +728,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting heading for slide: {str(e)}")
return ""
def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str:
def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str:
"""Format paragraph data for slide presentation."""
try:
text = paragraph_data.get("text", "")
@ -747,7 +747,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting paragraph for slide: {str(e)}")
return ""
def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str:
def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str:
"""Format code data for slide presentation."""
try:
code = code_data.get("code", "")
@ -770,7 +770,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting code for slide: {str(e)}")
return ""
def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
"""Determine the best professional slide layout based on content."""
try:
content = slide_data.get("content", "")
@ -804,7 +804,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error determining slide layout: {str(e)}")
return 1 # Default to title and content layout
def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Create slides from sections based on content density and user intent."""
try:
slides = []
@ -834,7 +834,7 @@ JSON ONLY. NO OTHER TEXT."""
break
else:
# Add content to current slide
formatted_content = self._format_section_content(section)
formatted_content = self._formatSectionContent(section)
if formatted_content:
current_slide_content.append(formatted_content)
@ -851,7 +851,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error creating slides from sections: {str(e)}")
return []
def _format_section_content(self, section: Dict[str, Any]) -> str:
def _formatSectionContent(self, section: Dict[str, Any]) -> str:
"""Format section content for slide presentation."""
try:
content_type = section.get("content_type", "paragraph")
@ -861,15 +861,15 @@ JSON ONLY. NO OTHER TEXT."""
content_parts = []
for element in elements:
if content_type == "table":
content_parts.append(self._format_table_for_slide([element]))
content_parts.append(self._formatTableForSlide([element]))
elif content_type == "list":
content_parts.append(self._format_list_for_slide([element]))
content_parts.append(self._formatListForSlide([element]))
elif content_type == "heading":
content_parts.append(self._format_heading_for_slide([element]))
content_parts.append(self._formatHeadingForSlide([element]))
elif content_type == "paragraph":
content_parts.append(self._format_paragraph_for_slide([element]))
content_parts.append(self._formatParagraphForSlide([element]))
elif content_type == "code":
content_parts.append(self._format_code_for_slide([element]))
content_parts.append(self._formatCodeForSlide([element]))
else:
content_parts.append(self._format_paragraph_for_slide([element]))
@ -879,7 +879,7 @@ JSON ONLY. NO OTHER TEXT."""
logger.warning(f"Error formatting section content: {str(e)}")
return ""
def _format_timestamp(self) -> str:
def _formatTimestamp(self) -> str:
"""Format current timestamp for presentation generation."""
from datetime import datetime, UTC
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

View file

@ -9,7 +9,7 @@ class RendererText(BaseRenderer):
"""Renders content to plain text format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""Return supported text formats (excluding formats with dedicated renderers)."""
return [
'txt', 'text', 'plain',
@ -32,7 +32,7 @@ class RendererText(BaseRenderer):
]
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return [
'ascii', 'utf8', 'utf-8', 'code', 'source',
@ -41,166 +41,166 @@ class RendererText(BaseRenderer):
]
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""Return priority for text renderer."""
return 90
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to plain text format."""
try:
# Generate text from JSON structure
text_content = self._generate_text_from_json(extracted_content, title)
textContent = self._generateTextFromJson(extractedContent, title)
return text_content, "text/plain"
return textContent, "text/plain"
except Exception as e:
self.logger.error(f"Error rendering text: {str(e)}")
# Return minimal text fallback
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str:
def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate text content from structured JSON document."""
try:
# Validate JSON structure
if not isinstance(json_content, dict):
if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
documentTitle = jsonContent.get("metadata", {}).get("title", title)
# Build text content
text_parts = []
textParts = []
# Document title
text_parts.append(document_title)
text_parts.append("=" * len(document_title))
text_parts.append("")
textParts.append(documentTitle)
textParts.append("=" * len(documentTitle))
textParts.append("")
# Process each section
sections = json_content.get("sections", [])
sections = jsonContent.get("sections", [])
for section in sections:
section_text = self._render_json_section(section)
if section_text:
text_parts.append(section_text)
text_parts.append("") # Add spacing between sections
sectionText = self._renderJsonSection(section)
if sectionText:
textParts.append(sectionText)
textParts.append("") # Add spacing between sections
# Add generation info
text_parts.append("")
text_parts.append(f"Generated: {self._format_timestamp()}")
textParts.append("")
textParts.append(f"Generated: {self._formatTimestamp()}")
return '\n'.join(text_parts)
return '\n'.join(textParts)
except Exception as e:
self.logger.error(f"Error generating text from JSON: {str(e)}")
raise Exception(f"Text generation failed: {str(e)}")
def _render_json_section(self, section: Dict[str, Any]) -> str:
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
"""Render a single JSON section to text."""
try:
section_type = self._get_section_type(section)
section_data = self._get_section_data(section)
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
if section_type == "table":
if sectionType == "table":
# Process the section data to extract table structure
processed_data = self._process_section_by_type(section)
return self._render_json_table(processed_data)
elif section_type == "bullet_list":
processedData = self._processSectionByType(section)
return self._renderJsonTable(processedData)
elif sectionType == "bullet_list":
# Process the section data to extract bullet list structure
processed_data = self._process_section_by_type(section)
return self._render_json_bullet_list(processed_data)
elif section_type == "heading":
processedData = self._processSectionByType(section)
return self._renderJsonBulletList(processedData)
elif sectionType == "heading":
# Render each heading element in the elements array
# section_data is already the elements array from _get_section_data
rendered_elements = []
for element in section_data:
rendered_elements.append(self._render_json_heading(element))
return "\n".join(rendered_elements)
elif section_type == "paragraph":
# sectionData is already the elements array from _getSectionData
renderedElements = []
for element in sectionData:
renderedElements.append(self._renderJsonHeading(element))
return "\n".join(renderedElements)
elif sectionType == "paragraph":
# Render each paragraph element in the elements array
# section_data is already the elements array from _get_section_data
rendered_elements = []
for element in section_data:
rendered_elements.append(self._render_json_paragraph(element))
return "\n".join(rendered_elements)
elif section_type == "code_block":
# sectionData is already the elements array from _getSectionData
renderedElements = []
for element in sectionData:
renderedElements.append(self._renderJsonParagraph(element))
return "\n".join(renderedElements)
elif sectionType == "code_block":
# Process the section data to extract code block structure
processed_data = self._process_section_by_type(section)
return self._render_json_code_block(processed_data)
elif section_type == "image":
processedData = self._processSectionByType(section)
return self._renderJsonCodeBlock(processedData)
elif sectionType == "image":
# Process the section data to extract image structure
processed_data = self._process_section_by_type(section)
return self._render_json_image(processed_data)
processedData = self._processSectionByType(section)
return self._renderJsonImage(processedData)
else:
# Fallback to paragraph for unknown types - render each element
# section_data is already the elements array from _get_section_data
rendered_elements = []
for element in section_data:
rendered_elements.append(self._render_json_paragraph(element))
return "\n".join(rendered_elements)
# sectionData is already the elements array from _getSectionData
renderedElements = []
for element in sectionData:
renderedElements.append(self._renderJsonParagraph(element))
return "\n".join(renderedElements)
except Exception as e:
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
return f"[Error rendering section: {str(e)}]"
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
"""Render a JSON table to text."""
try:
headers = table_data.get("headers", [])
rows = table_data.get("rows", [])
headers = tableData.get("headers", [])
rows = tableData.get("rows", [])
if not headers or not rows:
return ""
text_parts = []
textParts = []
# Create table header
header_line = " | ".join(str(header) for header in headers)
text_parts.append(header_line)
headerLine = " | ".join(str(header) for header in headers)
textParts.append(headerLine)
# Add separator line
separator_line = " | ".join("-" * len(str(header)) for header in headers)
text_parts.append(separator_line)
separatorLine = " | ".join("-" * len(str(header)) for header in headers)
textParts.append(separatorLine)
# Add data rows
for row in rows:
row_line = " | ".join(str(cell_data) for cell_data in row)
text_parts.append(row_line)
rowLine = " | ".join(str(cellData) for cellData in row)
textParts.append(rowLine)
return '\n'.join(text_parts)
return '\n'.join(textParts)
except Exception as e:
self.logger.warning(f"Error rendering table: {str(e)}")
return ""
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
"""Render a JSON bullet list to text."""
try:
items = list_data.get("items", [])
items = listData.get("items", [])
if not items:
return ""
text_parts = []
textParts = []
for item in items:
if isinstance(item, str):
text_parts.append(f"- {item}")
textParts.append(f"- {item}")
elif isinstance(item, dict) and "text" in item:
text_parts.append(f"- {item['text']}")
textParts.append(f"- {item['text']}")
return '\n'.join(text_parts)
return '\n'.join(textParts)
except Exception as e:
self.logger.warning(f"Error rendering bullet list: {str(e)}")
return ""
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
"""Render a JSON heading to text."""
try:
level = heading_data.get("level", 1)
text = heading_data.get("text", "")
level = headingData.get("level", 1)
text = headingData.get("text", "")
if text:
level = max(1, min(6, level))
@ -217,21 +217,21 @@ class RendererText(BaseRenderer):
self.logger.warning(f"Error rendering heading: {str(e)}")
return ""
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
"""Render a JSON paragraph to text."""
try:
text = paragraph_data.get("text", "")
text = paragraphData.get("text", "")
return text if text else ""
except Exception as e:
self.logger.warning(f"Error rendering paragraph: {str(e)}")
return ""
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
"""Render a JSON code block to text."""
try:
code = code_data.get("code", "")
language = code_data.get("language", "")
code = codeData.get("code", "")
language = codeData.get("language", "")
if code:
if language:
@ -245,12 +245,12 @@ class RendererText(BaseRenderer):
self.logger.warning(f"Error rendering code block: {str(e)}")
return ""
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
"""Render a JSON image to text."""
try:
alt_text = image_data.get("altText", "Image")
return f"[Image: {alt_text}]"
altText = imageData.get("altText", "Image")
return f"[Image: {altText}]"
except Exception as e:
self.logger.warning(f"Error rendering image: {str(e)}")
return f"[Image: {image_data.get('altText', 'Image')}]"
return f"[Image: {imageData.get('altText', 'Image')}]"

View file

@ -21,41 +21,41 @@ class RendererXlsx(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@classmethod
def get_supported_formats(cls) -> List[str]:
def getSupportedFormats(cls) -> List[str]:
"""Return supported Excel formats."""
return ['xlsx', 'xls', 'excel']
@classmethod
def get_format_aliases(cls) -> List[str]:
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'workbook']
@classmethod
def get_priority(cls) -> int:
def getPriority(cls) -> int:
"""Return priority for Excel renderer."""
return 110
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .rendererCsv import RendererCsv
csv_renderer = RendererCsv()
csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
return csv_content, "text/csv"
csvRenderer = RendererCsv()
csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService)
return csvContent, "text/csv"
# Generate Excel using AI-analyzed styling
excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service)
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}")
# Return CSV fallback
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
def _generate_excel(self, content: str, title: str) -> str:
def _generateExcel(self, content: str, title: str) -> str:
"""Generate Excel content using openpyxl."""
try:
# Create workbook
@ -65,14 +65,14 @@ class RendererXlsx(BaseRenderer):
wb.remove(wb.active)
# Create sheets
summary_sheet = wb.create_sheet("Summary", 0)
data_sheet = wb.create_sheet("Data", 1)
analysis_sheet = wb.create_sheet("Analysis", 2)
summarySheet = wb.create_sheet("Summary", 0)
dataSheet = wb.create_sheet("Data", 1)
analysisSheet = wb.create_sheet("Analysis", 2)
# Add content to sheets
self._populate_summary_sheet(summary_sheet, title)
self._populate_data_sheet(data_sheet, content)
self._populate_analysis_sheet(analysis_sheet, content)
self._populateSummarySheet(summarySheet, title)
self._populateDataSheet(dataSheet, content)
self._populateAnalysisSheet(analysisSheet, content)
# Save to buffer
buffer = io.BytesIO()
@ -80,16 +80,16 @@ class RendererXlsx(BaseRenderer):
buffer.seek(0)
# Convert to base64
excel_bytes = buffer.getvalue()
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
excelBytes = buffer.getvalue()
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
return excel_base64
return excelBase64
except Exception as e:
self.logger.error(f"Error generating Excel: {str(e)}")
raise
def _populate_summary_sheet(self, sheet, title: str):
def _populateSummarySheet(self, sheet, title: str):
"""Populate the summary sheet."""
try:
# Title
@ -99,7 +99,7 @@ class RendererXlsx(BaseRenderer):
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp()
sheet['B3'] = self._formatTimestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
@ -116,7 +116,7 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
def _populate_data_sheet(self, sheet, content: str):
def _populateDataSheet(self, sheet, content: str):
"""Populate the data sheet."""
try:
# Headers
@ -138,8 +138,8 @@ class RendererXlsx(BaseRenderer):
# Check for table data (lines with |)
if '|' in line:
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
sheet.cell(row=row, column=col, value=cell_data)
for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns
sheet.cell(row=row, column=col, value=cellData)
row += 1
else:
# Regular content
@ -153,7 +153,7 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate data sheet: {str(e)}")
def _populate_analysis_sheet(self, sheet, content: str):
def _populateAnalysisSheet(self, sheet, content: str):
"""Populate the analysis sheet."""
try:
# Title
@ -169,17 +169,17 @@ class RendererXlsx(BaseRenderer):
row += 1
# Count different types of content
table_lines = sum(1 for line in lines if '|' in line)
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
text_lines = len(lines) - table_lines - list_lines
tableLines = sum(1 for line in lines if '|' in line)
listLines = sum(1 for line in lines if line.startswith(('- ', '* ')))
textLines = len(lines) - tableLines - listLines
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
row += 1
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
sheet[f'A{row}'] = f"Table Rows: {tableLines}"
row += 1
sheet[f'A{row}'] = f"List Items: {list_lines}"
sheet[f'A{row}'] = f"List Items: {listLines}"
row += 1
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
sheet[f'A{row}'] = f"Text Lines: {textLines}"
row += 2
# Recommendations
@ -198,35 +198,35 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate Excel content from structured JSON document using AI-generated styling."""
try:
# Debug output
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(json_content)}", "EXCEL_RENDERER")
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}", "EXCEL_RENDERER")
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
# Get AI-generated styling definitions
styles = await self._get_excel_styles(user_prompt, ai_service)
styles = await self._getExcelStyles(userPrompt, aiService)
# Validate JSON structure
if not isinstance(json_content, dict):
if not isinstance(jsonContent, dict):
raise ValueError("JSON content must be a dictionary")
if "sections" not in json_content:
if "sections" not in jsonContent:
raise ValueError("JSON content must contain 'sections' field")
# Use title from JSON metadata if available, otherwise use provided title
document_title = json_content.get("metadata", {}).get("title", title)
document_title = jsonContent.get("metadata", {}).get("title", title)
# Create workbook
wb = Workbook()
# Create sheets based on content
sheets = self._create_excel_sheets(wb, json_content, styles)
sheets = self._createExcelSheets(wb, jsonContent, styles)
self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
# Populate sheets with content
self._populate_excel_sheets(sheets, json_content, styles)
self._populateExcelSheets(sheets, jsonContent, styles)
# Save to buffer
buffer = io.BytesIO()
@ -234,24 +234,24 @@ class RendererXlsx(BaseRenderer):
buffer.seek(0)
# Convert to base64
excel_bytes = buffer.getvalue()
self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excel_bytes)}", "EXCEL_RENDERER")
excelBytes = buffer.getvalue()
self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excelBytes)}", "EXCEL_RENDERER")
try:
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excel_base64)}", "EXCEL_RENDERER")
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excelBase64)}", "EXCEL_RENDERER")
except Exception as b64_error:
self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
raise
return excel_base64
return excelBase64
except Exception as e:
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
raise Exception(f"Excel generation failed: {str(e)}")
async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
async def _getExcelStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
"""Get Excel styling definitions using base template AI styling."""
style_schema = {
styleSchema = {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
@ -261,26 +261,26 @@ class RendererXlsx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
}
style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema)
# Use our own _get_ai_styles_with_excel_colors method to ensure proper color conversion
styles = await self._get_ai_styles_with_excel_colors(ai_service, style_template, self._get_default_excel_styles())
styleTemplate = self._createAiStyleTemplate("xlsx", userPrompt, styleSchema)
# Use our own _getAiStylesWithExcelColors method to ensure proper color conversion
styles = await self._getAiStylesWithExcelColors(aiService, styleTemplate, self._getDefaultExcelStyles())
# Validate and fix contrast issues
return self._validate_excel_styles_contrast(styles)
return self._validateExcelStylesContrast(styles)
async def _get_ai_styles_with_excel_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
"""Get AI styles with proper Excel color conversion."""
if not ai_service:
return default_styles
if not aiService:
return defaultStyles
try:
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
request_options = AiCallOptions()
request_options.operationType = OperationTypeEnum.DATA_GENERATE
requestOptions = AiCallOptions()
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
request = AiCallRequest(prompt=style_template, context="", options=request_options)
response = await ai_service.aiObjects.call(request)
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
response = await aiService.aiObjects.call(request)
import json
import re
@ -291,7 +291,7 @@ class RendererXlsx(BaseRenderer):
# Check if result is empty
if not result:
self.logger.warning("AI styling returned empty response, using defaults")
return default_styles
return defaultStyles
# Extract JSON from markdown if present
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
@ -312,46 +312,46 @@ class RendererXlsx(BaseRenderer):
styles = json.loads(result)
except json.JSONDecodeError as json_error:
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
return default_styles
return defaultStyles
# Convert colors to Excel aRGB format
styles = self._convert_colors_format(styles)
styles = self._convertColorsFormat(styles)
return styles
except Exception as e:
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
return default_styles
return defaultStyles
def _get_safe_color(self, color_value: str, default: str = "FF000000") -> str:
def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
"""Get a safe aRGB color value for Excel (without # prefix)."""
if not isinstance(color_value, str):
if not isinstance(colorValue, str):
return default
# Remove # prefix if present
if color_value.startswith('#'):
color_value = color_value[1:]
if colorValue.startswith('#'):
colorValue = colorValue[1:]
if len(color_value) == 6:
if len(colorValue) == 6:
# Convert RRGGBB to AARRGGBB
return f"FF{color_value}"
elif len(color_value) == 8:
return f"FF{colorValue}"
elif len(colorValue) == 8:
# Already aRGB format
return color_value
return colorValue
else:
# Unexpected format, return default
return default
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Convert hex colors to aRGB format for Excel compatibility."""
try:
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
for style_name, style_config in styles.items():
if isinstance(style_config, dict):
for prop, value in style_config.items():
for styleName, styleConfig in styles.items():
if isinstance(styleConfig, dict):
for prop, value in styleConfig.items():
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
styles[style_name][prop] = f"FF{value[1:]}"
styles[styleName][prop] = f"FF{value[1:]}"
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
pass # Already aRGB format
elif isinstance(value, str) and value.startswith('#'):
@ -360,34 +360,34 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
return styles
def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
def _validateExcelStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and fix contrast issues in AI-generated styles."""
try:
# Fix table header contrast
if "table_header" in styles:
header = styles["table_header"]
bg_color = header.get("background", "#FFFFFF")
text_color = header.get("text_color", "#000000")
bgColor = header.get("background", "#FFFFFF")
textColor = header.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
header["background"] = "#4F4F4F"
header["text_color"] = "#FFFFFF"
# Fix table cell contrast
if "table_cell" in styles:
cell = styles["table_cell"]
bg_color = cell.get("background", "#FFFFFF")
text_color = cell.get("text_color", "#000000")
bgColor = cell.get("background", "#FFFFFF")
textColor = cell.get("text_color", "#000000")
# If both are white or both are dark, fix it
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
cell["background"] = "#FFFFFF"
cell["text_color"] = "#2F2F2F"
@ -395,9 +395,9 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Style validation failed: {str(e)}")
return self._get_default_excel_styles()
return self._getDefaultExcelStyles()
def _get_default_excel_styles(self) -> Dict[str, Any]:
def _getDefaultExcelStyles(self) -> Dict[str, Any]:
"""Default Excel styles with aRGB color format."""
return {
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
@ -409,104 +409,104 @@ class RendererXlsx(BaseRenderer):
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
}
def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
"""Create Excel sheets based on content structure and user intent."""
sheets = {}
# Get sheet names from AI styles or generate based on content
sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheet_names}", "EXCEL_RENDERER")
sheetNames = styles.get("sheet_names", self._generateSheetNamesFromContent(jsonContent))
self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheetNames}", "EXCEL_RENDERER")
# Create sheets
for i, sheet_name in enumerate(sheet_names):
for i, sheetName in enumerate(sheetNames):
if i == 0:
# Use the default sheet for the first sheet
sheet = wb.active
sheet.title = sheet_name
sheet.title = sheetName
else:
# Create additional sheets
sheet = wb.create_sheet(sheet_name, i)
sheets[sheet_name.lower()] = sheet
sheet = wb.create_sheet(sheetName, i)
sheets[sheetName.lower()] = sheet
return sheets
def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]:
def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
"""Generate sheet names based on actual content structure."""
sections = json_content.get("sections", [])
sections = jsonContent.get("sections", [])
# If no sections, create a single sheet
if not sections:
return ["Content"]
# Generate sheet names based on content structure
sheet_names = []
sheetNames = []
# Check if we have multiple table sections
table_sections = [s for s in sections if s.get("content_type") == "table"]
tableSections = [s for s in sections if s.get("content_type") == "table"]
if len(table_sections) > 1:
if len(tableSections) > 1:
# Create separate sheets for each table
for i, section in enumerate(table_sections, 1):
section_title = section.get("title", f"Table {i}")
sheet_names.append(section_title[:31]) # Excel sheet name limit
for i, section in enumerate(tableSections, 1):
sectionTitle = section.get("title", f"Table {i}")
sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
else:
# Single table or mixed content - create main sheet
document_title = json_content.get("metadata", {}).get("title", "Document")
sheet_names.append(document_title[:31]) # Excel sheet name limit
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(documentTitle[:31]) # Excel sheet name limit
# Add additional sheets for other content types
content_types = set()
contentTypes = set()
for section in sections:
content_type = section.get("content_type", "paragraph")
content_types.add(content_type)
contentType = section.get("content_type", "paragraph")
contentTypes.add(contentType)
if "table" in content_types and len(table_sections) == 1:
sheet_names.append("Table Data")
if "list" in content_types:
sheet_names.append("Lists")
if "paragraph" in content_types or "heading" in content_types:
sheet_names.append("Text")
if "table" in contentTypes and len(tableSections) == 1:
sheetNames.append("Table Data")
if "list" in contentTypes:
sheetNames.append("Lists")
if "paragraph" in contentTypes or "heading" in contentTypes:
sheetNames.append("Text")
# Limit to 4 sheets maximum
return sheet_names[:4]
return sheetNames[:4]
def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None:
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets with content from JSON based on actual sheet names."""
try:
# Get the actual sheet names that were created
sheet_names = list(sheets.keys())
sheetNames = list(sheets.keys())
if not sheet_names:
if not sheetNames:
return
sections = json_content.get("sections", [])
table_sections = [s for s in sections if s.get("content_type") == "table"]
sections = jsonContent.get("sections", [])
tableSections = [s for s in sections if s.get("content_type") == "table"]
if len(table_sections) > 1:
if len(tableSections) > 1:
# Multiple tables - populate each sheet with its corresponding table
for i, section in enumerate(table_sections):
if i < len(sheet_names):
sheet_name = sheet_names[i]
sheet = sheets[sheet_name]
self._populate_table_sheet(sheet, section, styles, f"Table {i+1}")
for i, section in enumerate(tableSections):
if i < len(sheetNames):
sheetName = sheetNames[i]
sheet = sheets[sheetName]
self._populateTableSheet(sheet, section, styles, f"Table {i+1}")
else:
# Single table or mixed content - use original logic
first_sheet_name = sheet_names[0]
self._populate_main_sheet(sheets[first_sheet_name], json_content, styles)
firstSheetName = sheetNames[0]
self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
# If we have multiple sheets, distribute content by type
if len(sheet_names) > 1:
self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:])
if len(sheetNames) > 1:
self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
def _populate_table_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheet_title: str):
def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str):
"""Populate a sheet with a single table section."""
try:
# Sheet title
sheet['A1'] = sheet_title
sheet['A1'].font = Font(size=16, bold=True, color=self._get_safe_color(styles.get("title", {}).get("color", "FF1F4E79")))
sheet['A1'] = sheetTitle
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(styles.get("title", {}).get("color", "FF1F4E79")))
sheet['A1'].alignment = Alignment(horizontal="center")
# Get table data from elements (canonical JSON format)
@ -528,9 +528,9 @@ class RendererXlsx(BaseRenderer):
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=3, column=col, value=header)
if header_style.get("bold"):
cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
if header_style.get("background"):
cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
# Add rows
cell_style = styles.get("table_cell", {})
@ -538,7 +538,7 @@ class RendererXlsx(BaseRenderer):
for col_idx, cell_value in enumerate(row_data, 1):
cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value)
if cell_style.get("text_color"):
cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
# Auto-adjust column widths
for col in range(1, len(headers) + 1):
@ -547,17 +547,17 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate table sheet: {str(e)}")
def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]):
def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
"""Populate the main sheet with document overview and all content."""
try:
# Document title
document_title = json_content.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = document_title
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = documentTitle
# Safety check for title style
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
try:
safe_color = self._get_safe_color(title_style["color"])
safe_color = self._getSafeColor(title_style["color"])
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
except Exception as font_error:
@ -567,12 +567,12 @@ class RendererXlsx(BaseRenderer):
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp()
sheet['B3'] = self._formatTimestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Document metadata
metadata = json_content.get("metadata", {})
metadata = jsonContent.get("metadata", {})
if metadata:
sheet['A6'] = "Document Information:"
sheet['A6'].font = Font(bold=True)
@ -585,7 +585,7 @@ class RendererXlsx(BaseRenderer):
row += 1
# Content overview
sections = json_content.get("sections", [])
sections = jsonContent.get("sections", [])
sheet[f'A{row + 1}'] = "Content Overview:"
sheet[f'A{row + 1}'].font = Font(bold=True)
@ -605,7 +605,7 @@ class RendererXlsx(BaseRenderer):
# Add all content to this sheet
row += 2
for section in sections:
row = self._add_section_to_sheet(sheet, section, styles, row)
row = self._addSectionToSheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
@ -615,34 +615,34 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate main sheet: {str(e)}")
def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]):
def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
"""Populate additional sheets based on content types."""
try:
sections = json_content.get("sections", [])
sections = jsonContent.get("sections", [])
for sheet_name in sheet_names:
if sheet_name not in sheets:
for sheetName in sheetNames:
if sheetName not in sheets:
continue
sheet = sheets[sheet_name]
sheet_title = sheet_name.title()
sheet['A1'] = sheet_title
sheet = sheets[sheetName]
sheetTitle = sheetName.title()
sheet['A1'] = sheetTitle
sheet['A1'].font = Font(size=16, bold=True)
row = 3
# Filter sections by content type
if sheet_name == "tables":
if sheetName == "tables":
filtered_sections = [s for s in sections if s.get("content_type") == "table"]
elif sheet_name == "lists":
elif sheetName == "lists":
filtered_sections = [s for s in sections if s.get("content_type") == "list"]
elif sheet_name == "text":
elif sheetName == "text":
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
else:
filtered_sections = sections
for section in filtered_sections:
row = self._add_section_to_sheet(sheet, section, styles, row)
row = self._addSectionToSheet(sheet, section, styles, row)
row += 1 # Empty row between sections
# Auto-adjust column widths
@ -652,15 +652,15 @@ class RendererXlsx(BaseRenderer):
except Exception as e:
self.logger.warning(f"Could not populate content type sheets: {str(e)}")
def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a section to a sheet and return the next row."""
try:
# Add section title
section_title = section.get("title")
if section_title:
sheet[f'A{start_row}'] = f"# {section_title}"
sheet[f'A{start_row}'].font = Font(bold=True)
start_row += 1
sheet[f'A{startRow}'] = f"# {section_title}"
sheet[f'A{startRow}'].font = Font(bold=True)
startRow += 1
# Process section based on type
section_type = section.get("content_type", "paragraph")
@ -669,23 +669,23 @@ class RendererXlsx(BaseRenderer):
elements = section.get("elements", [])
for element in elements:
if section_type == "table":
start_row = self._add_table_to_excel(sheet, element, styles, start_row)
startRow = self._addTableToExcel(sheet, element, styles, startRow)
elif section_type == "list":
start_row = self._add_list_to_excel(sheet, element, styles, start_row)
startRow = self._addListToExcel(sheet, element, styles, startRow)
elif section_type == "paragraph":
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
elif section_type == "heading":
start_row = self._add_heading_to_excel(sheet, element, styles, start_row)
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
else:
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
return start_row
return startRow
except Exception as e:
self.logger.warning(f"Could not add section to sheet: {str(e)}")
return start_row + 1
return startRow + 1
def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a table element to Excel sheet."""
try:
# In canonical JSON format, table elements have headers and rows directly
@ -693,99 +693,99 @@ class RendererXlsx(BaseRenderer):
rows = element.get("rows", [])
if not headers and not rows:
return start_row
return startRow
# Add headers
header_style = styles.get("table_header", {})
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=start_row, column=col, value=header)
cell = sheet.cell(row=startRow, column=col, value=header)
if header_style.get("bold"):
cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
if header_style.get("background"):
cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
start_row += 1
startRow += 1
# Add rows
cell_style = styles.get("table_cell", {})
for row_data in rows:
for col, cell_value in enumerate(row_data, 1):
cell = sheet.cell(row=start_row, column=col, value=cell_value)
cell = sheet.cell(row=startRow, column=col, value=cell_value)
if cell_style.get("text_color"):
cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
start_row += 1
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
startRow += 1
return start_row
return startRow
except Exception as e:
self.logger.warning(f"Could not add table to Excel: {str(e)}")
return start_row + 1
return startRow + 1
def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a list element to Excel sheet."""
try:
list_items = element.get("items", [])
list_style = styles.get("bullet_list", {})
for item in list_items:
sheet.cell(row=start_row, column=1, value=f"{item}")
sheet.cell(row=startRow, column=1, value=f"{item}")
if list_style.get("color"):
sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(list_style["color"]))
start_row += 1
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
startRow += 1
return start_row
return startRow
except Exception as e:
self.logger.warning(f"Could not add list to Excel: {str(e)}")
return start_row + 1
return startRow + 1
def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a paragraph element to Excel sheet."""
try:
text = element.get("text", "")
if text:
sheet.cell(row=start_row, column=1, value=text)
sheet.cell(row=startRow, column=1, value=text)
paragraph_style = styles.get("paragraph", {})
if paragraph_style.get("color"):
sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(paragraph_style["color"]))
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"]))
start_row += 1
startRow += 1
return start_row
return startRow
except Exception as e:
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
return start_row + 1
return startRow + 1
def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
def _addHeadingToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
"""Add a heading element to Excel sheet."""
try:
text = element.get("text", "")
level = element.get("level", 1)
if text:
sheet.cell(row=start_row, column=1, value=text)
sheet.cell(row=startRow, column=1, value=text)
heading_style = styles.get("heading", {})
font_size = heading_style.get("font_size", 14)
if level > 1:
font_size = max(10, font_size - (level - 1) * 2)
sheet.cell(row=start_row, column=1).font = Font(
sheet.cell(row=startRow, column=1).font = Font(
size=font_size,
bold=True,
color=self._get_safe_color(heading_style.get("color", "FF000000"))
color=self._getSafeColor(heading_style.get("color", "FF000000"))
)
start_row += 1
startRow += 1
return start_row
return startRow
except Exception as e:
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
return start_row + 1
return startRow + 1
def _format_timestamp(self) -> str:
def _formatTimestamp(self) -> str:
"""Format current timestamp for document generation."""
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

View file

@ -1,25 +1,32 @@
"""
JSON Schema definitions for AI-generated document structures.
This module provides schemas that guide AI to generate structured JSON output.
JSON Schema definitions for AI-generated document structures (unified).
This module provides schemas that guide AI to generate structured JSON output
that matches the master template in modules.datamodels.datamodelJson.
"""
from typing import Dict, Any
def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"""Get the JSON schema for multi-document generation."""
def getMultiDocumentSchema() -> Dict[str, Any]:
"""Get the JSON schema for multi-document generation (unified)."""
return {
"type": "object",
"required": ["metadata", "documents"],
"properties": {
"metadata": {
"type": "object",
"required": ["title", "split_strategy"],
"required": ["split_strategy"],
"properties": {
"title": {"type": "string", "description": "Document title"},
"split_strategy": {
"type": "string",
"enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
"enum": [
"single_document",
"per_entity",
"by_section",
"by_criteria",
"by_data_type",
"custom"
],
"description": "Strategy for splitting content into multiple files"
},
"splitCriteria": {
@ -30,7 +37,6 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"type": "string",
"description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')"
},
"author": {"type": "string", "description": "Document author (optional)"},
"source_documents": {
"type": "array",
"items": {"type": "string"},
@ -38,7 +44,7 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
},
"extraction_method": {
"type": "string",
"default": "ai_extraction",
"default": "ai_generation",
"description": "Method used for extraction"
}
}
@ -64,7 +70,15 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"title": {"type": "string", "description": "Section title (optional)"},
"content_type": {
"type": "string",
"enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
"enum": [
"table",
"bullet_list",
"paragraph",
"heading",
"code_block",
"image",
"mixed"
],
"description": "Primary content type of this section"
},
"elements": {
@ -76,7 +90,8 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
{"$ref": "#/definitions/bullet_list"},
{"$ref": "#/definitions/paragraph"},
{"$ref": "#/definitions/heading"},
{"$ref": "#/definitions/code_block"}
{"$ref": "#/definitions/code_block"},
{"$ref": "#/definitions/image"}
]
}
},
@ -191,11 +206,20 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
"code": {"type": "string", "description": "Code content"},
"language": {"type": "string", "description": "Programming language (optional)"}
}
},
"image": {
"type": "object",
"required": ["url"],
"properties": {
"url": {"type": "string", "description": "Image URL or data URI"},
"caption": {"type": "string", "description": "Image caption (optional)"},
"alt": {"type": "string", "description": "Alt text (optional)"}
}
}
}
}
def get_document_subJsonSchema() -> Dict[str, Any]:
def getDocumentSchema() -> Dict[str, Any]:
"""Get the JSON schema for structured document generation (single document)."""
return {
"type": "object",
@ -206,7 +230,6 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
"required": ["title"],
"properties": {
"title": {"type": "string", "description": "Document title"},
"author": {"type": "string", "description": "Document author (optional)"},
"source_documents": {
"type": "array",
"items": {"type": "string"},
@ -214,7 +237,7 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
},
"extraction_method": {
"type": "string",
"default": "ai_extraction",
"default": "ai_generation",
"description": "Method used for extraction"
}
}
@ -230,7 +253,15 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
"title": {"type": "string", "description": "Section title (optional)"},
"content_type": {
"type": "string",
"enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
"enum": [
"table",
"bullet_list",
"paragraph",
"heading",
"code_block",
"image",
"mixed"
],
"description": "Primary content type of this section"
},
"elements": {
@ -242,7 +273,8 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
{"$ref": "#/definitions/bullet_list"},
{"$ref": "#/definitions/paragraph"},
{"$ref": "#/definitions/heading"},
{"$ref": "#/definitions/code_block"}
{"$ref": "#/definitions/code_block"},
{"$ref": "#/definitions/image"}
]
}
},
@ -359,12 +391,21 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
"code": {"type": "string", "description": "Code content"},
"language": {"type": "string", "description": "Programming language (optional)"}
}
},
"image": {
"type": "object",
"required": ["url"],
"properties": {
"url": {"type": "string", "description": "Image URL or data URI"},
"caption": {"type": "string", "description": "Image caption (optional)"},
"alt": {"type": "string", "description": "Alt text (optional)"}
}
}
}
}
def get_extraction_prompt_template() -> str:
def getExtractionPromptTemplate() -> str:
"""Get the template for AI extraction prompts that request JSON output."""
return """
You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document.
@ -390,7 +431,7 @@ Return only the JSON structure following the schema. Do not include any text bef
"""
def get_generation_prompt_template() -> str:
def getGenerationPromptTemplate() -> str:
"""Get the template for AI generation prompts that work with JSON input."""
return """
You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content.
@ -416,31 +457,31 @@ Return only the enhanced JSON structure following the schema. Do not include any
"""
def get_adaptive_json_schema(prompt_analysis: Dict[str, Any] = None) -> Dict[str, Any]:
def getAdaptiveJsonSchema(promptAnalysis: Dict[str, Any] = None) -> Dict[str, Any]:
"""Automatically select appropriate schema based on prompt analysis."""
if prompt_analysis and prompt_analysis.get("is_multi_file", False):
return get_multi_document_subJsonSchema()
if promptAnalysis and promptAnalysis.get("is_multi_file", False):
return getMultiDocumentSchema()
else:
return get_document_subJsonSchema()
return getDocumentSchema()
def validate_json_document(json_data: Dict[str, Any]) -> bool:
"""Validate that the JSON data follows the document schema."""
def validateJsonDocument(jsonData: Dict[str, Any]) -> bool:
"""Validate that the JSON data follows the unified document schema."""
try:
# Basic validation - check required fields
if not isinstance(json_data, dict):
if not isinstance(jsonData, dict):
return False
# Check if it's multi-document or single-document structure
if "documents" in json_data:
if "documents" in jsonData:
# Multi-document structure
if "metadata" not in json_data:
if "metadata" not in jsonData:
return False
metadata = json_data["metadata"]
if not isinstance(metadata, dict) or "title" not in metadata or "split_strategy" not in metadata:
metadata = jsonData["metadata"]
if not isinstance(metadata, dict) or "split_strategy" not in metadata:
return False
documents = json_data["documents"]
documents = jsonData["documents"]
if not isinstance(documents, list):
return False
@ -469,7 +510,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
return False
# Validate content_type
valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
if section["content_type"] not in valid_types:
return False
@ -477,16 +518,16 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
if not isinstance(section["elements"], list):
return False
elif "sections" in json_data:
elif "sections" in jsonData:
# Single-document structure (existing validation)
if "metadata" not in json_data:
if "metadata" not in jsonData:
return False
metadata = json_data["metadata"]
metadata = jsonData["metadata"]
if not isinstance(metadata, dict) or "title" not in metadata:
return False
sections = json_data["sections"]
sections = jsonData["sections"]
if not isinstance(sections, list):
return False
@ -501,7 +542,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
return False
# Validate content_type
valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
if section["content_type"] not in valid_types:
return False

View file

@ -5,83 +5,10 @@ This module builds prompts for generating documents from extracted content.
import logging
from typing import Dict, Any
from modules.datamodels.datamodelJson import jsonTemplateDocument
logger = logging.getLogger(__name__)
# Centralized JSON structure template for document generation
# Includes examples for all content types so AI knows the structure patterns
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": [
{
"id": "doc_1",
"title": "{{DOCUMENT_TITLE}}",
"filename": "document.json",
"sections": [
{
"id": "section_heading_example",
"content_type": "heading",
"elements": [
{"level": 1, "text": "Heading Text"}
],
"order": 0
},
{
"id": "section_paragraph_example",
"content_type": "paragraph",
"elements": [
{"text": "Paragraph text content"}
],
"order": 0
},
{
"id": "section_list_example",
"content_type": "list",
"elements": [
{
"items": [
{"text": "Item 1"},
{"text": "Item 2"}
],
"list_type": "numbered"
}
],
"order": 0
},
{
"id": "section_table_example",
"content_type": "table",
"elements": [
{
"headers": ["Column 1", "Column 2"],
"rows": [
["Row 1 Col 1", "Row 1 Col 2"],
["Row 2 Col 1", "Row 2 Col 2"]
],
"caption": "Table caption"
}
],
"order": 0
},
{
"id": "section_code_example",
"content_type": "code",
"elements": [
{
"code": "function example() { return true; }",
"language": "javascript"
}
],
"order": 0
}
]
}
]
}"""
async def buildGenerationPrompt(
@ -106,99 +33,101 @@ async def buildGenerationPrompt(
Complete generation prompt string
"""
# Create a template - let AI generate title if not provided
title_value = title if title else "Generated Document"
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
titleValue = title if title else "Generated Document"
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
# Build prompt based on whether this is a continuation or first call
# Check if we have valid continuation context with actual JSON fragment
has_continuation = (
hasContinuation = (
continuationContext
and continuationContext.get("section_count", 0) > 0
and continuationContext.get("last_raw_json", "")
and continuationContext.get("last_raw_json", "").strip() != "{}"
)
if has_continuation:
if hasContinuation:
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
last_raw_json = continuationContext.get("last_raw_json", "")
last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
last_items_from_fragment = continuationContext.get("last_items_from_fragment", "")
total_items_count = continuationContext.get("total_items_count", 0)
lastRawJson = continuationContext.get("last_raw_json", "")
lastItemObject = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
lastItemsFromFragment = continuationContext.get("last_items_from_fragment", "")
totalItemsCount = continuationContext.get("total_items_count", 0)
# Show the last few items to indicate where to continue (limit fragment size)
# Extract just the ending portion of the JSON to show where it cut off
fragment_snippet = ""
if last_raw_json:
fragmentSnippet = ""
if lastRawJson:
# Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json
fragmentSnippet = lastRawJson[-1500:] if len(lastRawJson) > 1500 else lastRawJson
# Add ellipsis if truncated
if len(last_raw_json) > 1500:
fragment_snippet = "..." + fragment_snippet
if len(lastRawJson) > 1500:
fragmentSnippet = "..." + fragmentSnippet
# Build clear continuation guidance
continuation_guidance = []
continuationGuidance = []
if total_items_count > 0:
continuation_guidance.append(f"You have already generated {total_items_count} items.")
if totalItemsCount > 0:
continuationGuidance.append(f"You have already generated {totalItemsCount} items.")
# Show the last complete item object (full object format)
if last_item_object:
continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.")
if lastItemObject:
continuationGuidance.append(f"Last item in previous response: {lastItemObject}. Continue with the NEXT item after this.")
continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped."
continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."
generation_prompt = f"""User request: "{userPrompt}"
generationPrompt = f"""User request: "{userPrompt}"
The user already received part of the response. Continue generating the remaining content.
{continuation_text}
{continuationText}
Previous response ended here (JSON was cut off at this point):
```json
{fragment_snippet if fragment_snippet else "(No fragment available)"}
{fragmentSnippet if fragmentSnippet else "(No fragment available)"}
```
JSON structure template:
{json_template}
{jsonTemplate}
Instructions:
- Return full JSON structure (metadata + documents + sections)
- Continue from where it stopped - add NEW items only, do not repeat old items
- Use the element structures shown in the template
- Generate all remaining content needed to complete the user request
- Fill with actual content (no comments, no "Add more..." text, no placeholders)
- When fully complete, add "complete_response": true at root level
- Return only valid JSON (no comments, no markdown blocks)
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
- Use ONLY the element structures shown in the template.
- Continue from where it stopped add NEW items only; do not repeat existing items.
- Generate all remaining content needed to complete the user request.
- Fill with actual content (no placeholders or instructional text such as "Add more...").
- When fully complete, add "complete_response": true at root level.
- Output JSON only; no markdown fences or extra text before/after.
Continue generating:
"""
else:
# FIRST CALL - initial generation
generation_prompt = f"""User request: "{userPrompt}"
generationPrompt = f"""User request: "{userPrompt}"
Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
JSON structure template (reference only - shows the pattern):
{json_template}
JSON structure template:
{jsonTemplate}
Instructions:
- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning
- Do NOT continue from the template examples above - create your own sections
- Generate complete content based on the user request
- Use the element structures shown in the template (heading, paragraph, list, table, code)
- Create your own section IDs (do not use the example IDs like "section_heading_example")
- When fully complete, add "complete_response": true at root level
- Return only valid JSON (no comments, no markdown blocks, no text before/after)
- Start your response with {{"metadata": ...}} return COMPLETE, STRICT JSON.
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
- Do NOT reuse the example section IDs from the template; create your own.
- Use ONLY the element structures shown in the template.
- Generate complete content based on the user request.
- When fully complete, add "complete_response": true at root level.
- Output JSON only; no markdown fences or any additional text.
Generate your complete response starting from {{"metadata": ...}}:
"""
# If we have extracted content, prepend it to the prompt
if extracted_content:
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}
{generation_prompt}"""
{generationPrompt}"""
return generation_prompt.strip()
return generationPrompt.strip()

View file

@ -152,11 +152,11 @@ class NeutralizationService:
try:
# Auto-detect content type if not provided
if textType is None:
textType = self.commonUtils.detect_content_type(text)
textType = self.commonUtils.detectContentType(text)
# Check if content is binary data
if self.binaryProcessor.is_binary_content(text):
data, mapping, replaced_fields, processed_info = self.binaryProcessor.process_binary_content(text)
if self.binaryProcessor.isBinaryContent(text):
data, mapping, replaced_fields, processed_info = self.binaryProcessor.processBinaryContent(text)
neutralized_text = text if isinstance(data, str) else str(data)
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
return NeutralizationResult(
@ -169,13 +169,13 @@ class NeutralizationService:
# Inline former _processData routing
if textType in ['csv', 'json', 'xml']:
if textType == 'csv':
data, mapping, replaced_fields, processed_info = self.listProcessor.process_csv_content(text)
data, mapping, replaced_fields, processed_info = self.listProcessor.processCsvContent(text)
elif textType == 'json':
data, mapping, replaced_fields, processed_info = self.listProcessor.process_json_content(text)
data, mapping, replaced_fields, processed_info = self.listProcessor.processJsonContent(text)
else: # xml
data, mapping, replaced_fields, processed_info = self.listProcessor.process_xml_content(text)
data, mapping, replaced_fields, processed_info = self.listProcessor.processXmlContent(text)
else:
data, mapping, replaced_fields, processed_info = self.textProcessor.process_text_content(text)
data, mapping, replaced_fields, processed_info = self.textProcessor.processTextContent(text)
# Stringify data consistently
if textType == 'csv':
try:

View file

@ -6,7 +6,7 @@ Handles pattern matching and replacement for emails, phones, addresses, IDs and
import re
import uuid
from typing import Dict, List, Tuple, Any
from modules.services.serviceNeutralization.subPatterns import DataPatterns, find_patterns_in_text
from modules.services.serviceNeutralization.subPatterns import DataPatterns, findPatternsInText
class StringParser:
"""Handles string parsing and replacement operations"""
@ -22,7 +22,7 @@ class StringParser:
self.NamesToParse = NamesToParse or []
self.mapping = {}
def is_placeholder(self, text: str) -> bool:
def _isPlaceholder(self, text: str) -> bool:
"""
Check if text is already a placeholder in format [tag.uuid]
@ -34,7 +34,7 @@ class StringParser:
"""
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text))
def replace_pattern_matches(self, text: str) -> str:
def _replacePatternMatches(self, text: str) -> str:
"""
Replace pattern-based matches (emails, phones, etc.) in text
@ -44,37 +44,37 @@ class StringParser:
Returns:
str: Text with pattern matches replaced
"""
pattern_matches = find_patterns_in_text(text, self.data_patterns)
patternMatches = findPatternsInText(text, self.data_patterns)
# Process pattern matches from right to left to avoid position shifts
for pattern_name, matched_text, start, end in reversed(pattern_matches):
for patternName, matchedText, start, end in reversed(patternMatches):
# Skip if already a placeholder
if self.is_placeholder(matched_text):
if self._isPlaceholder(matchedText):
continue
# Skip if contains placeholder characters
if '[' in matched_text or ']' in matched_text:
if '[' in matchedText or ']' in matchedText:
continue
if matched_text not in self.mapping:
if matchedText not in self.mapping:
# Generate a UUID for the placeholder
placeholder_id = str(uuid.uuid4())
placeholderId = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
typeMapping = {
'email': 'email',
'phone': 'phone',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]"
placeholderType = typeMapping.get(patternName, 'data')
self.mapping[matchedText] = f"[{placeholderType}.{placeholderId}]"
replacement = self.mapping[matched_text]
replacement = self.mapping[matchedText]
text = text[:start] + replacement + text[end:]
return text
def replace_custom_names(self, text: str) -> str:
def _replaceCustomNames(self, text: str) -> str:
"""
Replace custom names from the user list in text
@ -96,19 +96,19 @@ class StringParser:
# Replace each match with a placeholder
for match in reversed(matches): # Process from right to left to avoid position shifts
matched_text = match.group()
if matched_text not in self.mapping:
matchedText = match.group()
if matchedText not in self.mapping:
# Generate a UUID for the placeholder
placeholder_id = str(uuid.uuid4())
self.mapping[matched_text] = f"[name.{placeholder_id}]"
placeholderId = str(uuid.uuid4())
self.mapping[matchedText] = f"[name.{placeholderId}]"
replacement = self.mapping[matched_text]
replacement = self.mapping[matchedText]
start, end = match.span()
text = text[:start] + replacement + text[end:]
return text
def process_string(self, text: str) -> str:
def processString(self, text: str) -> str:
"""
Process a string by replacing patterns first, then custom names
@ -118,18 +118,18 @@ class StringParser:
Returns:
str: Processed text with replacements
"""
if self.is_placeholder(text):
if self._isPlaceholder(text):
return text
# Step 1: Replace pattern-based matches FIRST
text = self.replace_pattern_matches(text)
text = self._replacePatternMatches(text)
# Step 2: Replace custom names SECOND
text = self.replace_custom_names(text)
text = self._replaceCustomNames(text)
return text
def process_json_value(self, value: Any) -> Any:
def processJsonValue(self, value: Any) -> Any:
"""
Process a JSON value for anonymization
@ -140,15 +140,15 @@ class StringParser:
Any: Processed value
"""
if isinstance(value, str):
return self.process_string(value)
return self.processString(value)
elif isinstance(value, dict):
return {k: self.process_json_value(v) for k, v in value.items()}
return {k: self.processJsonValue(v) for k, v in value.items()}
elif isinstance(value, list):
return [self.process_json_value(item) for item in value]
return [self.processJsonValue(item) for item in value]
else:
return value
def get_mapping(self) -> Dict[str, str]:
def getMapping(self) -> Dict[str, str]:
"""
Get the current mapping of original values to placeholders
@ -157,6 +157,6 @@ class StringParser:
"""
return self.mapping.copy()
def clear_mapping(self):
def clearMapping(self):
"""Clear the current mapping"""
self.mapping.clear()

View file

@ -316,7 +316,7 @@ class TextTablePatterns:
"""Patterns for identifying table-like structures in text"""
@staticmethod
def get_patterns() -> List[Tuple[str, str]]:
def getPatterns() -> List[Tuple[str, str]]:
return [
# key: value pattern (with optional whitespace)
(r'^([^:]+):\s*(.+)$', ':'),
@ -329,15 +329,15 @@ class TextTablePatterns:
]
@staticmethod
def is_table_line(line: str) -> bool:
def _isTableLine(line: str) -> bool:
"""Check if a line matches any table pattern"""
patterns = TextTablePatterns.get_patterns()
patterns = TextTablePatterns.getPatterns()
return any(re.match(pattern[0], line.strip()) for pattern in patterns)
@staticmethod
def extract_key_value(line: str) -> Optional[Tuple[str, str]]:
def extractKeyValue(line: str) -> Optional[Tuple[str, str]]:
"""Extract key and value from a table line"""
patterns = TextTablePatterns.get_patterns()
patterns = TextTablePatterns.getPatterns()
for pattern, separator in patterns:
match = re.match(pattern, line.strip())
if match:
@ -346,7 +346,7 @@ class TextTablePatterns:
return key, value
return None
def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
def getPatternForHeader(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
"""
Find matching pattern for a header
@ -368,7 +368,7 @@ def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pat
return pattern
return None
def find_patterns_in_text(text: str, patterns: List[Pattern]) -> List[tuple]:
def findPatternsInText(text: str, patterns: List[Pattern]) -> List[tuple]:
"""
Find all pattern matches in text

View file

@ -27,7 +27,7 @@ class BinaryProcessor:
'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx']
}
def detect_binary_type(self, content: str) -> str:
def _detectBinaryType(self, content: str) -> str:
"""
Detect if content is binary data and determine type
@ -54,7 +54,7 @@ class BinaryProcessor:
return 'text'
def is_binary_content(self, content: str) -> bool:
def isBinaryContent(self, content: str) -> bool:
"""
Check if content is binary data
@ -64,9 +64,9 @@ class BinaryProcessor:
Returns:
bool: True if content is binary
"""
return self.detect_binary_type(content) == 'binary'
return self._detectBinaryType(content) == 'binary'
def process_binary_content(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
def processBinaryContent(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
"""
Process binary content for anonymization
@ -83,15 +83,15 @@ class BinaryProcessor:
# 3. Handling metadata and embedded content
# 4. Preserving binary integrity while removing sensitive data
processed_info = {
processedInfo = {
'type': 'binary',
'status': 'not_implemented',
'message': 'Binary data neutralization not yet implemented'
}
return content, {}, [], processed_info
return content, {}, [], processedInfo
def get_supported_types(self) -> Dict[str, list]:
def getSupportedTypes(self) -> Dict[str, list]:
"""
Get list of supported binary file types

View file

@ -33,7 +33,7 @@ class CommonUtils:
"""Common utility functions for data processing"""
@staticmethod
def normalize_whitespace(text: str) -> str:
def normalizeWhitespace(text: str) -> str:
"""
Normalize whitespace in text
@ -48,7 +48,7 @@ class CommonUtils:
return text.strip()
@staticmethod
def is_table_line(line: str) -> bool:
def _isTableLine(line: str) -> bool:
"""
Check if a line represents a table row
@ -62,7 +62,7 @@ class CommonUtils:
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
@staticmethod
def detect_content_type(content: str) -> str:
def detectContentType(content: str) -> str:
"""
Detect the type of content based on its structure
@ -98,7 +98,7 @@ class CommonUtils:
return 'text'
@staticmethod
def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]:
def mergeMappings(*mappings: Dict[str, str]) -> Dict[str, str]:
"""
Merge multiple mapping dictionaries
@ -114,21 +114,21 @@ class CommonUtils:
return merged
@staticmethod
def create_placeholder(placeholder_type: str, placeholder_id: str) -> str:
def createPlaceholder(placeholderType: str, placeholderId: str) -> str:
"""
Create a placeholder string in the format [type.uuid]
Args:
placeholder_type: Type of placeholder (email, phone, name, etc.)
placeholder_id: Unique identifier for the placeholder
placeholderType: Type of placeholder (email, phone, name, etc.)
placeholderId: Unique identifier for the placeholder
Returns:
str: Formatted placeholder string
"""
return f"[{placeholder_type}.{placeholder_id}]"
return f"[{placeholderType}.{placeholderId}]"
@staticmethod
def validate_placeholder(placeholder: str) -> bool:
def validatePlaceholder(placeholder: str) -> bool:
"""
Validate if a string is a valid placeholder
@ -141,7 +141,7 @@ class CommonUtils:
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder))
@staticmethod
def extract_placeholder_info(placeholder: str) -> Optional[tuple]:
def extractPlaceholderInfo(placeholder: str) -> Optional[tuple]:
"""
Extract type and ID from a placeholder

View file

@ -10,7 +10,7 @@ from typing import Dict, List, Any, Union
from dataclasses import dataclass
from io import StringIO
from modules.services.serviceNeutralization.subParseString import StringParser
from modules.services.serviceNeutralization.subPatterns import get_pattern_for_header, HeaderPatterns
from modules.services.serviceNeutralization.subPatterns import getPatternForHeader, HeaderPatterns
@dataclass
class TableData:
@ -32,7 +32,7 @@ class ListProcessor:
self.string_parser = StringParser(NamesToParse)
self.header_patterns = HeaderPatterns.patterns
def anonymize_table(self, table: TableData) -> TableData:
def _anonymizeTable(self, table: TableData) -> TableData:
"""
Anonymize table data based on headers
@ -42,28 +42,28 @@ class ListProcessor:
Returns:
TableData: Anonymized table
"""
anonymized_table = TableData(
anonymizedTable = TableData(
headers=table.headers.copy(),
rows=[row.copy() for row in table.rows],
source_type=table.source_type
)
for i, header in enumerate(anonymized_table.headers):
pattern = get_pattern_for_header(header, self.header_patterns)
for i, header in enumerate(anonymizedTable.headers):
pattern = getPatternForHeader(header, self.header_patterns)
if pattern:
for row in anonymized_table.rows:
for row in anonymizedTable.rows:
if row[i] is not None:
original = str(row[i])
if original not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
placeholderId = str(uuid.uuid4())
self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1)
row[i] = self.string_parser.mapping[original]
return anonymized_table
return anonymizedTable
def process_csv_content(self, content: str) -> tuple:
def processCsvContent(self, content: str) -> tuple:
"""
Process CSV content
@ -81,29 +81,29 @@ class ListProcessor:
)
if not table.rows:
return None, self.string_parser.get_mapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
return None, self.string_parser.getMapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
anonymized_table = self.anonymize_table(table)
anonymizedTable = self._anonymizeTable(table)
# Track replaced fields
replaced_fields = []
for i, header in enumerate(anonymized_table.headers):
for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
if anon_row[i] != orig_row[i]:
replaced_fields.append(header)
replacedFields = []
for i, header in enumerate(anonymizedTable.headers):
for origRow, anonRow in zip(table.rows, anonymizedTable.rows):
if anonRow[i] != origRow[i]:
replacedFields.append(header)
# Convert back to DataFrame
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
result = pd.DataFrame(anonymizedTable.rows, columns=anonymizedTable.headers)
processed_info = {
processedInfo = {
'type': 'table',
'headers': table.headers,
'row_count': len(table.rows)
}
return result, self.string_parser.get_mapping(), replaced_fields, processed_info
return result, self.string_parser.getMapping(), replacedFields, processedInfo
def process_json_content(self, content: str) -> tuple:
def processJsonContent(self, content: str) -> tuple:
"""
Process JSON content
@ -116,13 +116,13 @@ class ListProcessor:
data = json.loads(content)
# Process JSON recursively using string parser
result = self.string_parser.process_json_value(data)
result = self.string_parser.processJsonValue(data)
processed_info = {'type': 'json'}
processedInfo = {'type': 'json'}
return result, self.string_parser.get_mapping(), [], processed_info
return result, self.string_parser.getMapping(), [], processedInfo
def anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
def _anonymizeXmlElement(self, element: ET.Element, indent: str = '') -> str:
"""
Recursively process XML element and return formatted string
@ -134,69 +134,69 @@ class ListProcessor:
Formatted XML string
"""
# Process attributes
processed_attrs = {}
for attr_name, attr_value in element.attrib.items():
processedAttrs = {}
for attrName, attrValue in element.attrib.items():
# Check if attribute name matches any header patterns
pattern = get_pattern_for_header(attr_name, self.header_patterns)
pattern = getPatternForHeader(attrName, self.header_patterns)
if pattern:
if attr_value not in self.string_parser.mapping:
if attrValue not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
placeholderId = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
typeMapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern.name, 'data')
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
placeholderType = typeMapping.get(pattern.name, 'data')
self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
processedAttrs[attrName] = self.string_parser.mapping[attrValue]
else:
# Check if attribute value matches any data patterns
from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns
matches = find_patterns_in_text(attr_value, DataPatterns.patterns)
from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
matches = findPatternsInText(attrValue, DataPatterns.patterns)
if matches:
pattern_name = matches[0][0]
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
patternName = matches[0][0]
pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
if pattern:
if attr_value not in self.string_parser.mapping:
if attrValue not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
placeholderId = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
typeMapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
placeholderType = typeMapping.get(patternName, 'data')
self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
processedAttrs[attrName] = self.string_parser.mapping[attrValue]
else:
processed_attrs[attr_name] = attr_value
processedAttrs[attrName] = attrValue
else:
processed_attrs[attr_name] = attr_value
processedAttrs[attrName] = attrValue
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
attrs = ' '.join(f'{k}="{v}"' for k, v in processedAttrs.items())
attrs = f' {attrs}' if attrs else ''
# Process text content
text = element.text.strip() if element.text and element.text.strip() else ''
if text:
# Skip if already a placeholder
if not self.string_parser.is_placeholder(text):
if not self.string_parser._isPlaceholder(text):
# Check if text matches any patterns
from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns
pattern_matches = find_patterns_in_text(text, DataPatterns.patterns)
from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
patternMatches = findPatternsInText(text, DataPatterns.patterns)
if pattern_matches:
pattern_name = pattern_matches[0][0]
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
if patternMatches:
patternName = patternMatches[0][0]
pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
if pattern:
if text not in self.string_parser.mapping:
# Generate a UUID for the placeholder
@ -210,8 +210,8 @@ class ListProcessor:
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.string_parser.mapping[text] = f"[{placeholder_type}.{placeholder_id}]"
placeholderType = typeMapping.get(patternName, 'data')
self.string_parser.mapping[text] = f"[{placeholderType}.{placeholderId}]"
text = self.string_parser.mapping[text]
else:
# Check if text matches any custom names from the user list
@ -230,8 +230,8 @@ class ListProcessor:
# Process child elements
children = []
for child in element:
child_str = self.anonymize_xml_element(child, indent + ' ')
children.append(child_str)
childStr = self._anonymizeXmlElement(child, indent + ' ')
children.append(childStr)
# Build element string
if not children and not text:
@ -246,7 +246,7 @@ class ListProcessor:
result.append(f"{indent}</{element.tag}>")
return '\n'.join(result)
def process_xml_content(self, content: str) -> tuple:
def processXmlContent(self, content: str) -> tuple:
"""
Process XML content
@ -259,21 +259,21 @@ class ListProcessor:
root = ET.fromstring(content)
# Process XML recursively with proper formatting
result = self.anonymize_xml_element(root)
result = self._anonymizeXmlElement(root)
processed_info = {'type': 'xml'}
processedInfo = {'type': 'xml'}
return result, self.string_parser.get_mapping(), [], processed_info
return result, self.string_parser.getMapping(), [], processedInfo
def get_mapping(self) -> Dict[str, str]:
def getMapping(self) -> Dict[str, str]:
"""
Get the current mapping of original values to placeholders
Returns:
Dict[str, str]: Mapping dictionary
"""
return self.string_parser.get_mapping()
return self.string_parser.getMapping()
def clear_mapping(self):
def clearMapping(self):
"""Clear the current mapping"""
self.string_parser.clear_mapping()
self.string_parser.clearMapping()

View file

@ -25,7 +25,7 @@ class TextProcessor:
"""
self.string_parser = StringParser(NamesToParse)
def extract_tables_from_text(self, content: str) -> tuple:
def _extractTablesFromText(self, content: str) -> tuple:
"""
Extract tables and plain text from content
@ -38,11 +38,11 @@ class TextProcessor:
# For now, process the entire content as plain text
# This can be extended later to detect table-like structures
tables = []
plain_texts = [PlainText(content=content, source_type='text_plain')]
plainTexts = [PlainText(content=content, source_type='text_plain')]
return tables, plain_texts
return tables, plainTexts
def anonymize_plain_text(self, text: PlainText) -> PlainText:
def _anonymizePlainText(self, text: PlainText) -> PlainText:
"""
Anonymize plain text content
@ -53,11 +53,11 @@ class TextProcessor:
PlainText: Anonymized text
"""
# Use the string parser to process the content
anonymized_content = self.string_parser.process_string(text.content)
anonymizedContent = self.string_parser.processString(text.content)
return PlainText(content=anonymized_content, source_type=text.source_type)
return PlainText(content=anonymizedContent, source_type=text.source_type)
def process_text_content(self, content: str) -> tuple:
def processTextContent(self, content: str) -> tuple:
"""
Process text content and return anonymized data
@ -68,35 +68,35 @@ class TextProcessor:
Tuple of (anonymized_content, mapping, replaced_fields, processed_info)
"""
# Extract tables and plain text sections
tables, plain_texts = self.extract_tables_from_text(content)
tables, plainTexts = self._extractTablesFromText(content)
# Process plain text sections
anonymized_texts = [self.anonymize_plain_text(text) for text in plain_texts]
anonymizedTexts = [self._anonymizePlainText(text) for text in plainTexts]
# Combine all processed content
result = content
for text, anonymized_text in zip(plain_texts, anonymized_texts):
if text.content != anonymized_text.content:
result = result.replace(text.content, anonymized_text.content)
for text, anonymizedText in zip(plainTexts, anonymizedTexts):
if text.content != anonymizedText.content:
result = result.replace(text.content, anonymizedText.content)
# Get processing information
processed_info = {
processedInfo = {
'type': 'text',
'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
if tables else [])
}
return result, self.string_parser.get_mapping(), [], processed_info
return result, self.string_parser.getMapping(), [], processedInfo
def get_mapping(self) -> Dict[str, str]:
def getMapping(self) -> Dict[str, str]:
"""
Get the current mapping of original values to placeholders
Returns:
Dict[str, str]: Mapping dictionary
"""
return self.string_parser.get_mapping()
return self.string_parser.getMapping()
def clear_mapping(self):
def clearMapping(self):
"""Clear the current mapping"""
self.string_parser.clear_mapping()
self.string_parser.clearMapping()

View file

@ -20,8 +20,8 @@ class SharepointService:
Use setAccessTokenFromConnection() method to configure the access token before making API calls.
"""
self.services = serviceCenter
self.access_token = None
self.base_url = "https://graph.microsoft.com/v1.0"
self.accessToken = None
self.baseUrl = "https://graph.microsoft.com/v1.0"
def setAccessTokenFromConnection(self, userConnection) -> bool:
"""Set access token from UserConnection.
@ -52,21 +52,21 @@ class SharepointService:
logger.error(f"Error setting access token: {str(e)}")
return False
async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
"""Make a Microsoft Graph API call with proper error handling."""
try:
if self.access_token is None:
if self.accessToken is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."}
headers = {
"Authorization": f"Bearer {self.access_token}",
"Authorization": f"Bearer {self.accessToken}",
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
}
# Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}"
cleanEndpoint = endpoint.lstrip('/')
url = f"{self.baseUrl}/{cleanEndpoint}"
logger.debug(f"Making Graph API call: {method} {url}")
timeout = aiohttp.ClientTimeout(total=30)
@ -106,10 +106,10 @@ class SharepointService:
logger.error(f"Error making Graph API call: {str(e)}")
return {"error": f"Error making Graph API call: {str(e)}"}
async def discover_sites(self) -> List[Dict[str, Any]]:
async def discoverSites(self) -> List[Dict[str, Any]]:
"""Discover all SharePoint sites accessible to the user."""
try:
result = await self._make_graph_api_call("sites?search=*")
result = await self._makeGraphApiCall("sites?search=*")
if "error" in result:
logger.error(f"Error discovering SharePoint sites: {result['error']}")
@ -118,9 +118,9 @@ class SharepointService:
sites = result.get("value", [])
logger.info(f"Discovered {len(sites)} SharePoint sites")
processed_sites = []
processedSites = []
for site in sites:
site_info = {
siteInfo = {
"id": site.get("id"),
"displayName": site.get("displayName"),
"name": site.get("name"),
@ -129,24 +129,24 @@ class SharepointService:
"createdDateTime": site.get("createdDateTime"),
"lastModifiedDateTime": site.get("lastModifiedDateTime")
}
processed_sites.append(site_info)
logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
processedSites.append(siteInfo)
logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}")
return processed_sites
return processedSites
except Exception as e:
logger.error(f"Error discovering SharePoint sites: {str(e)}")
return []
async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]:
async def findSiteByName(self, siteName: str) -> Optional[Dict[str, Any]]:
"""Find a specific SharePoint site by name using direct Graph API call."""
try:
# Try to get the site directly by name using Graph API
endpoint = f"sites/{site_name}"
result = await self._make_graph_api_call(endpoint)
endpoint = f"sites/{siteName}"
result = await self._makeGraphApiCall(endpoint)
if result and "error" not in result:
site_info = {
siteInfo = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
@ -155,15 +155,15 @@ class SharepointService:
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}")
return site_info
logger.info(f"Found site directly: {siteInfo['displayName']} - {siteInfo['webUrl']}")
return siteInfo
except Exception as e:
logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}")
logger.debug(f"Direct site lookup failed for '{siteName}': {str(e)}")
# Fallback to discovery if direct lookup fails
logger.info(f"Direct lookup failed, trying discovery for site: {site_name}")
sites = await self.discover_sites()
logger.info(f"Direct lookup failed, trying discovery for site: {siteName}")
sites = await self.discoverSites()
if not sites:
logger.warning("No sites discovered")
return None
@ -174,46 +174,46 @@ class SharepointService:
# Try exact match first
for site in sites:
if site.get("displayName", "").strip().lower() == site_name.strip().lower():
if site.get("displayName", "").strip().lower() == siteName.strip().lower():
logger.info(f"Found exact match: {site.get('displayName')}")
return site
# Try partial match
for site in sites:
if site_name.lower() in site.get("displayName", "").lower():
if siteName.lower() in site.get("displayName", "").lower():
logger.info(f"Found partial match: {site.get('displayName')}")
return site
logger.warning(f"No site found matching: {site_name}")
logger.warning(f"No site found matching: {siteName}")
return None
async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]:
async def findSiteByWebUrl(self, webUrl: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using its web URL (useful for guest sites)."""
try:
# Use the web URL format: sites/{hostname}:/sites/{site-path}
# Extract hostname and site path from the web URL
if not web_url.startswith("https://"):
web_url = f"https://{web_url}"
if not webUrl.startswith("https://"):
webUrl = f"https://{webUrl}"
# Parse the URL to extract hostname and site path
from urllib.parse import urlparse
parsed = urlparse(web_url)
parsed = urlparse(webUrl)
hostname = parsed.hostname
path_parts = parsed.path.strip('/').split('/')
pathParts = parsed.path.strip('/').split('/')
if len(path_parts) >= 2 and path_parts[0] == 'sites':
site_path = '/'.join(path_parts[1:]) # Everything after 'sites/'
if len(pathParts) >= 2 and pathParts[0] == 'sites':
sitePath = '/'.join(pathParts[1:]) # Everything after 'sites/'
else:
logger.error(f"Invalid SharePoint URL format: {web_url}")
logger.error(f"Invalid SharePoint URL format: {webUrl}")
return None
endpoint = f"sites/{hostname}:/sites/{site_path}"
endpoint = f"sites/{hostname}:/sites/{sitePath}"
logger.debug(f"Trying web URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint)
result = await self._makeGraphApiCall(endpoint)
if result and "error" not in result:
site_info = {
siteInfo = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
@ -222,33 +222,33 @@ class SharepointService:
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
return site_info
logger.info(f"Found site by web URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
return siteInfo
else:
logger.warning(f"Site not found using web URL: {web_url}")
logger.warning(f"Site not found using web URL: {webUrl}")
return None
except Exception as e:
logger.error(f"Error finding site by web URL: {str(e)}")
return None
async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]:
async def findSiteByUrl(self, hostname: str, sitePath: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using the site URL format."""
try:
# For guest sites, try different URL formats
url_formats = [
f"sites/{hostname}:/sites/{site_path}", # Standard format
f"sites/{hostname}:/sites/{site_path}/", # With trailing slash
f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase
f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash
urlFormats = [
f"sites/{hostname}:/sites/{sitePath}", # Standard format
f"sites/{hostname}:/sites/{sitePath}/", # With trailing slash
f"sites/{hostname}:/sites/{sitePath.lower()}", # Lowercase
f"sites/{hostname}:/sites/{sitePath.lower()}/", # Lowercase with slash
]
for endpoint in url_formats:
for endpoint in urlFormats:
logger.debug(f"Trying URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint)
result = await self._makeGraphApiCall(endpoint)
if result and "error" not in result:
site_info = {
siteInfo = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
@ -257,29 +257,29 @@ class SharepointService:
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
return site_info
logger.info(f"Found site by URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
return siteInfo
else:
logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}")
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}")
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{sitePath}")
return None
except Exception as e:
logger.error(f"Error finding site by URL: {str(e)}")
return None
async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]:
async def getFolderByPath(self, siteId: str, folderPath: str) -> Optional[Dict[str, Any]]:
"""Get folder information by path within a site."""
try:
# Clean the path
clean_path = folder_path.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}"
cleanPath = folderPath.lstrip('/')
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
result = await self._make_graph_api_call(endpoint)
result = await self._makeGraphApiCall(endpoint)
if "error" in result:
logger.warning(f"Folder not found at path {folder_path}: {result['error']}")
logger.warning(f"Folder not found at path {folderPath}: {result['error']}")
return None
return result
@ -288,43 +288,43 @@ class SharepointService:
logger.error(f"Error getting folder by path: {str(e)}")
return None
async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]:
async def uploadFile(self, siteId: str, folderPath: str, fileName: str, content: bytes) -> Dict[str, Any]:
"""Upload a file to SharePoint."""
try:
# Clean the path
clean_path = folder_path.lstrip('/')
upload_path = f"{clean_path.rstrip('/')}/{file_name}"
endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content"
cleanPath = folderPath.lstrip('/')
uploadPath = f"{cleanPath.rstrip('/')}/{fileName}"
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
logger.info(f"Uploading file to: {endpoint}")
result = await self._make_graph_api_call(endpoint, method="PUT", data=content)
result = await self._makeGraphApiCall(endpoint, method="PUT", data=content)
if "error" in result:
logger.error(f"Upload failed: {result['error']}")
return result
logger.info(f"File uploaded successfully: {file_name}")
logger.info(f"File uploaded successfully: {fileName}")
return result
except Exception as e:
logger.error(f"Error uploading file: {str(e)}")
return {"error": f"Error uploading file: {str(e)}"}
async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]:
async def downloadFile(self, siteId: str, fileId: str) -> Optional[bytes]:
"""Download a file from SharePoint."""
try:
if self.access_token is None:
if self.accessToken is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return None
endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
endpoint = f"sites/{siteId}/drive/items/{fileId}/content"
headers = {"Authorization": f"Bearer {self.access_token}"}
headers = {"Authorization": f"Bearer {self.accessToken}"}
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response:
async with session.get(f"{self.baseUrl}/{endpoint}", headers=headers) as response:
if response.status == 200:
return await response.read()
else:
@ -335,32 +335,32 @@ class SharepointService:
logger.error(f"Error downloading file: {str(e)}")
return None
async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]:
async def listFolderContents(self, siteId: str, folderPath: str = "") -> List[Dict[str, Any]]:
"""List contents of a folder."""
try:
if not folder_path or folder_path == "/":
endpoint = f"sites/{site_id}/drive/root/children"
if not folderPath or folderPath == "/":
endpoint = f"sites/{siteId}/drive/root/children"
else:
clean_path = folder_path.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children"
cleanPath = folderPath.lstrip('/')
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
result = await self._make_graph_api_call(endpoint)
result = await self._makeGraphApiCall(endpoint)
if "error" in result:
logger.warning(f"Failed to list folder contents: {result['error']}")
return None
items = result.get("value", [])
processed_items = []
processedItems = []
for item in items:
# Determine if it's a folder or file
is_folder = 'folder' in item
isFolder = 'folder' in item
item_info = {
itemInfo = {
"id": item.get("id"),
"name": item.get("name"),
"type": "folder" if is_folder else "file",
"type": "folder" if isFolder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
@ -368,42 +368,42 @@ class SharepointService:
}
if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
itemInfo["mimeType"] = item["file"].get("mimeType")
itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
if "folder" in item:
item_info["childCount"] = item["folder"].get("childCount", 0)
itemInfo["childCount"] = item["folder"].get("childCount", 0)
processed_items.append(item_info)
processedItems.append(itemInfo)
return processed_items
return processedItems
except Exception as e:
logger.error(f"Error listing folder contents: {str(e)}")
return []
async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]:
async def searchFiles(self, siteId: str, query: str) -> List[Dict[str, Any]]:
"""Search for files in a site."""
try:
search_query = query.replace("'", "''") # Escape single quotes for OData
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
searchQuery = query.replace("'", "''") # Escape single quotes for OData
endpoint = f"sites/{siteId}/drive/root/search(q='{searchQuery}')"
result = await self._make_graph_api_call(endpoint)
result = await self._makeGraphApiCall(endpoint)
if "error" in result:
logger.warning(f"Search failed: {result['error']}")
return []
items = result.get("value", [])
processed_items = []
processedItems = []
for item in items:
is_folder = 'folder' in item
isFolder = 'folder' in item
item_info = {
itemInfo = {
"id": item.get("id"),
"name": item.get("name"),
"type": "folder" if is_folder else "file",
"type": "folder" if isFolder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
@ -412,64 +412,64 @@ class SharepointService:
}
if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
itemInfo["mimeType"] = item["file"].get("mimeType")
itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
processed_items.append(item_info)
processedItems.append(itemInfo)
return processed_items
return processedItems
except Exception as e:
logger.error(f"Error searching files: {str(e)}")
return []
async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None:
async def copyFileAsync(self, siteId: str, sourceFolder: str, sourceFile: str, destFolder: str, destFile: str) -> None:
"""Copy a file from source to destination folder (like original synchronizer)."""
try:
# First, download the source file
source_path = f"{source_folder}/{source_file}"
file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path)
sourcePath = f"{sourceFolder}/{sourceFile}"
fileContent = await self.downloadFileByPath(siteId=siteId, filePath=sourcePath)
if not file_content:
raise Exception(f"Failed to download source file: {source_path}")
if not fileContent:
raise Exception(f"Failed to download source file: {sourcePath}")
# Upload to destination
await self.upload_file(
site_id=site_id,
folder_path=dest_folder,
file_name=dest_file,
content=file_content
await self.uploadFile(
siteId=siteId,
folderPath=destFolder,
fileName=destFile,
content=fileContent
)
logger.info(f"File copied: {source_file} -> {dest_file}")
logger.info(f"File copied: {sourceFile} -> {destFile}")
except Exception as e:
# Provide more specific error information
error_msg = str(e)
if "itemNotFound" in error_msg or "404" in error_msg:
raise Exception(f"Source file not found (404): {source_path} - {error_msg}")
errorMsg = str(e)
if "itemNotFound" in errorMsg or "404" in errorMsg:
raise Exception(f"Source file not found (404): {sourcePath} - {errorMsg}")
else:
raise Exception(f"Error copying file: {error_msg}")
raise Exception(f"Error copying file: {errorMsg}")
async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]:
async def downloadFileByPath(self, siteId: str, filePath: str) -> Optional[bytes]:
"""Download a file by its path within a site."""
try:
if self.access_token is None:
if self.accessToken is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return None
# Clean the path
clean_path = file_path.strip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content"
cleanPath = filePath.strip('/')
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/content"
# Use direct HTTP call for file downloads (binary content)
headers = {
"Authorization": f"Bearer {self.access_token}",
"Authorization": f"Bearer {self.accessToken}",
}
# Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}"
cleanEndpoint = endpoint.lstrip('/')
url = f"{self.baseUrl}/{cleanEndpoint}"
logger.debug(f"Downloading file: GET {url}")
timeout = aiohttp.ClientTimeout(total=30)

View file

@ -7,7 +7,7 @@ import logging
from typing import Any, Optional, Dict, Callable, List
from modules.shared.configuration import APP_CONFIG
from modules.shared.eventManagement import eventManager
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.timezoneUtils import getUtcTimestamp
from modules.shared import jsonUtils
logger = logging.getLogger(__name__)
@ -122,7 +122,7 @@ class UtilsService:
float: Current UTC timestamp in seconds
"""
try:
return get_utc_timestamp()
return getUtcTimestamp()
except Exception as e:
logger.error(f"Error getting UTC timestamp: {str(e)}")
return 0.0
@ -185,6 +185,75 @@ class UtilsService:
# Silent fail to never break main flow
pass
# ===== Prompt sanitization =====
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
"""
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
This is the single source of truth for all prompt sanitization across the system.
Replaces all scattered sanitization functions with a unified approach.
Args:
content: The content to sanitize
contentType: Type of content ("text", "userinput", "json", "document")
Returns:
Safely sanitized content ready for AI prompt insertion
"""
if not content:
return ""
try:
import re
# Convert to string if not already
content_str = str(content)
# Remove null bytes and control characters (except newlines and tabs)
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
# Handle different content types with appropriate sanitization
if contentType == "userinput":
# Extra security for user-controlled content
# Escape curly braces to prevent placeholder injection
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
# Escape quotes and wrap in single quotes
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
return f"'{sanitized}'"
elif contentType == "json":
# For JSON content, escape quotes and backslashes
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
elif contentType == "document":
# For document content, escape special characters
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace("'", "\\'")
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
else: # contentType == "text" or default
# Basic text sanitization
sanitized = sanitized.replace('\\', '\\\\')
sanitized = sanitized.replace('"', '\\"')
sanitized = sanitized.replace("'", "\\'")
sanitized = sanitized.replace('\n', '\\n')
sanitized = sanitized.replace('\r', '\\r')
sanitized = sanitized.replace('\t', '\\t')
return sanitized
except Exception as e:
logger.error(f"Error sanitizing prompt content: {str(e)}")
# Return a safe fallback
return "[ERROR: Content could not be safely sanitized]"
# ===== JSON utility wrappers =====
def jsonStripCodeFences(self, text: str) -> str:

View file

@ -34,54 +34,54 @@ class AttributeDefinition(BaseModel):
MODEL_LABELS: Dict[str, Dict[str, Dict[str, str]]] = {}
def register_model_labels(model_name: str, model_label: Dict[str, str], labels: Dict[str, Dict[str, str]]):
def registerModelLabels(modelName: str, modelLabel: Dict[str, str], labels: Dict[str, Dict[str, str]]):
"""
Register labels for a model's attributes and the model itself.
Args:
model_name: Name of the model class
model_label: Dictionary mapping language codes to model labels
modelName: Name of the model class
modelLabel: Dictionary mapping language codes to model labels
e.g. {"en": "Prompt", "fr": "Invite"}
labels: Dictionary mapping attribute names to their translations
e.g. {"name": {"en": "Name", "fr": "Nom"}}
"""
MODEL_LABELS[model_name] = {"model": model_label, "attributes": labels}
MODEL_LABELS[modelName] = {"model": modelLabel, "attributes": labels}
def get_model_labels(model_name: str, language: str = "en") -> Dict[str, str]:
def getModelLabels(modelName: str, language: str = "en") -> Dict[str, str]:
"""
Get labels for a model's attributes in the specified language.
Args:
model_name: Name of the model class
modelName: Name of the model class
language: Language code (default: "en")
Returns:
Dictionary mapping attribute names to their labels in the specified language
"""
model_data = MODEL_LABELS.get(model_name, {})
attribute_labels = model_data.get("attributes", {})
modelData = MODEL_LABELS.get(modelName, {})
attributeLabels = modelData.get("attributes", {})
return {
attr: translations.get(language, translations.get("en", attr))
for attr, translations in attribute_labels.items()
for attr, translations in attributeLabels.items()
}
def get_model_label(model_name: str, language: str = "en") -> str:
def getModelLabel(modelName: str, language: str = "en") -> str:
"""
Get the label for a model in the specified language.
Args:
model_name: Name of the model class
modelName: Name of the model class
language: Language code (default: "en")
Returns:
Model label in the specified language, or model name if no label exists
"""
model_data = MODEL_LABELS.get(model_name, {})
model_label = model_data.get("model", {})
return model_label.get(language, model_label.get("en", model_name))
modelData = MODEL_LABELS.get(modelName, {})
modelLabel = modelData.get("model", {})
return modelLabel.get(language, modelLabel.get("en", modelName))
def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguage: str = "en") -> Dict[str, Any]:
@ -100,8 +100,8 @@ def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguag
attributes = []
model_name = modelClass.__name__
labels = get_model_labels(model_name, userLanguage)
model_label = get_model_label(model_name, userLanguage)
labels = getModelLabels(model_name, userLanguage)
model_label = getModelLabel(model_name, userLanguage)
# Pydantic v2 only
fields = modelClass.model_fields

View file

@ -19,42 +19,42 @@ class DailyRotatingFileHandler(RotatingFileHandler):
The log file name includes the current date and switches at midnight.
"""
def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs):
self.log_dir = log_dir
self.filename_prefix = filename_prefix
self.current_date = None
self.current_file = None
def __init__(self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs):
self.logDir = logDir
self.filenamePrefix = filenamePrefix
self.currentDate = None
self.currentFile = None
# Initialize with today's file
self._update_file_if_needed()
self._updateFileIfNeeded()
# Call parent constructor with current file
super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs)
super().__init__(self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs)
def _update_file_if_needed(self):
def _updateFileIfNeeded(self):
"""Update the log file if the date has changed"""
today = datetime.now().strftime("%Y%m%d")
if self.current_date != today:
self.current_date = today
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
if self.currentDate != today:
self.currentDate = today
newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
if self.current_file != new_file:
self.current_file = new_file
if self.currentFile != newFile:
self.currentFile = newFile
return True
return False
def emit(self, record):
"""Emit a log record, switching files if date has changed"""
# Check if we need to switch to a new file
if self._update_file_if_needed():
if self._updateFileIfNeeded():
# Close current file and open new one
if self.stream:
self.stream.close()
self.stream = None
# Update the baseFilename for the parent class
self.baseFilename = self.current_file
self.baseFilename = self.currentFile
# Reopen the stream
if not self.delay:
self.stream = self._open()
@ -68,9 +68,9 @@ class AuditLogger:
def __init__(self):
self.logger = None
self._setup_audit_logger()
self._setupAuditLogger()
def _setup_audit_logger(self):
def _setupAuditLogger(self):
"""Setup the audit logger with daily file rotation"""
try:
# Get log directory from config
@ -96,10 +96,10 @@ class AuditLogger:
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
fileHandler = DailyRotatingFileHandler(
log_dir=logDir,
filename_prefix="log_audit",
max_bytes=rotationSize,
backup_count=backupCount
logDir=logDir,
filenamePrefix="log_audit",
maxBytes=rotationSize,
backupCount=backupCount
)
# Create formatter for audit log
@ -120,9 +120,9 @@ class AuditLogger:
self.logger = logging.getLogger(__name__)
self.logger.error(f"Failed to setup audit logger: {str(e)}")
def log_event(self,
user_id: str,
mandate_id: str,
def logEvent(self,
userId: str,
mandateId: str,
category: str,
action: str,
details: str = "",
@ -131,8 +131,8 @@ class AuditLogger:
Log an audit event
Args:
user_id: User identifier
mandate_id: Mandate identifier (can be empty if not applicable)
userId: User identifier
mandateId: Mandate identifier (can be empty if not applicable)
category: Event category (e.g., 'key', 'access', 'data')
action: Specific action (e.g., 'decode', 'login', 'logout')
details: Additional details about the event
@ -148,50 +148,50 @@ class AuditLogger:
# Format the audit log entry
# Format: timestamp | userid | mandateid | category | action | details
audit_entry = f"{user_id} | {mandate_id} | {category} | {action} | {details}"
auditEntry = f"{userId} | {mandateId} | {category} | {action} | {details}"
# Log the event
self.logger.info(audit_entry)
self.logger.info(auditEntry)
except Exception as e:
# Use standard logger as fallback
logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}")
def log_key_access(self, user_id: str, mandate_id: str, key_name: str, action: str) -> None:
def logKeyAccess(self, userId: str, mandateId: str, keyName: str, action: str) -> None:
"""Log key access events (decode/encode)"""
self.log_event(
user_id=user_id,
mandate_id=mandate_id,
self.logEvent(
userId=userId,
mandateId=mandateId,
category="key",
action=action,
details=key_name
details=keyName
)
def log_user_access(self, user_id: str, mandate_id: str, action: str, success_info: str = "") -> None:
def logUserAccess(self, userId: str, mandateId: str, action: str, successInfo: str = "") -> None:
"""Log user access events (login/logout)"""
self.log_event(
user_id=user_id,
mandate_id=mandate_id,
self.logEvent(
userId=userId,
mandateId=mandateId,
category="access",
action=action,
details=success_info
details=successInfo
)
def log_data_access(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
def logDataAccess(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
"""Log data access events"""
self.log_event(
user_id=user_id,
mandate_id=mandate_id,
self.logEvent(
userId=userId,
mandateId=mandateId,
category="data",
action=action,
details=details
)
def log_security_event(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
def logSecurityEvent(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
"""Log security-related events"""
self.log_event(
user_id=user_id,
mandate_id=mandate_id,
self.logEvent(
userId=userId,
mandateId=mandateId,
category="security",
action=action,
details=details

View file

@ -199,10 +199,10 @@ class Configuration:
# Log audit event for secret key access
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_key_access(
user_id=user_id,
mandate_id="system",
key_name=key,
audit_logger.logKeyAccess(
userId=user_id,
mandateId="system",
keyName=key,
action="decode"
)
except Exception:
@ -211,9 +211,9 @@ class Configuration:
if value.startswith("{") and value.endswith("}"):
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
return handleSecretJson(value, user_id, key)
return handleSecretJson(value, userId=user_id, keyName=key)
else:
return handleSecretText(value, user_id, key)
return handleSecretText(value, userId=user_id, keyName=key)
return value
return default
@ -235,31 +235,31 @@ class Configuration:
"""Set a configuration value (for testing/overrides)"""
self._data[key] = value
def handleSecretText(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
def handleSecretText(value: str, userId: str = "system", keyName: str = "unknown") -> str:
"""
Handle secret values with encryption/decryption support.
Args:
value: The secret value to handle (may be encrypted)
user_id: The user ID making the request (default: "system")
key_name: The name of the key being decrypted (default: "unknown")
userId: The user ID making the request (default: "system")
keyName: The name of the key being decrypted (default: "unknown")
Returns:
str: Processed secret value (decrypted if encrypted)
"""
if _is_encrypted_value(value):
return decrypt_value(value, user_id, key_name)
if _isEncryptedValue(value):
return decryptValue(value, userId, keyName)
return value
def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
def handleSecretJson(value: str, userId: str = "system", keyName: str = "unknown") -> str:
"""
Handle JSON secret values (like Google service account keys) with encryption/decryption support.
Validates that the value is valid JSON after decryption.
Args:
value: The JSON secret value to handle (may be encrypted)
user_id: The user ID making the request (default: "system")
key_name: The name of the key being decrypted (default: "unknown")
userId: The user ID making the request (default: "system")
keyName: The name of the key being decrypted (default: "unknown")
Returns:
str: Processed JSON secret value (decrypted if encrypted)
@ -268,15 +268,15 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
ValueError: If the value is not valid JSON after decryption
"""
# Decrypt if encrypted
if _is_encrypted_value(value):
decrypted_value = decrypt_value(value, user_id, key_name)
if _isEncryptedValue(value):
decryptedValue = decryptValue(value, userId, keyName)
else:
decrypted_value = value
decryptedValue = value
try:
# Validate that it's valid JSON
json.loads(decrypted_value)
return decrypted_value
json.loads(decryptedValue)
return decryptedValue
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in secret value: {e}")
@ -284,12 +284,12 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
# Structure: {user_id: {key_name: [timestamps]}}
_decryption_attempts = {}
def _get_master_key(env_type: str = None) -> bytes:
def _getMasterKey(envType: str = None) -> bytes:
"""
Get the master key for the specified environment.
Args:
env_type: The environment type (dev, int, prod, etc.). If None, uses current config.
envType: The environment type (dev, int, prod, etc.). If None, uses current config.
Returns:
bytes: The master key for encryption/decryption
@ -298,24 +298,24 @@ def _get_master_key(env_type: str = None) -> bytes:
ValueError: If no master key is found
"""
# Get the key location from config
key_location = APP_CONFIG.get('APP_KEY_SYSVAR')
if env_type is None:
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
keyLocation = APP_CONFIG.get('APP_KEY_SYSVAR')
if envType is None:
envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
if not key_location:
if not keyLocation:
raise ValueError("APP_KEY_SYSVAR not configured")
# First try to get from environment variable
master_key = os.environ.get(key_location)
masterKey = os.environ.get(keyLocation)
if master_key:
if masterKey:
# If found in environment, use it directly
return master_key.encode('utf-8')
return masterKey.encode('utf-8')
# If not in environment, try to read from file
if os.path.exists(key_location):
if os.path.exists(keyLocation):
try:
with open(key_location, 'r') as f:
with open(keyLocation, 'r') as f:
content = f.read().strip()
# Parse the key file format: env = key
@ -326,26 +326,26 @@ def _get_master_key(env_type: str = None) -> bytes:
continue
if '=' in line:
key_env, key_value = line.split('=', 1)
key_env = key_env.strip()
key_value = key_value.strip()
keyEnv, keyValue = line.split('=', 1)
keyEnv = keyEnv.strip()
keyValue = keyValue.strip()
if key_env == env_type:
return key_value.encode('utf-8')
if keyEnv == envType:
return keyValue.encode('utf-8')
raise ValueError(f"No key found for environment '{env_type}' in {key_location}")
raise ValueError(f"No key found for environment '{envType}' in {keyLocation}")
except Exception as e:
raise ValueError(f"Error reading key file {key_location}: {e}")
raise ValueError(f"Error reading key file {keyLocation}: {e}")
raise ValueError(f"Master key not found. Checked environment variable '{key_location}' and file path")
raise ValueError(f"Master key not found. Checked environment variable '{keyLocation}' and file path")
def _derive_encryption_key(master_key: bytes) -> bytes:
def _deriveEncryptionKey(masterKey: bytes) -> bytes:
"""
Derive a 32-byte encryption key from the master key using PBKDF2.
Args:
master_key: The master key bytes
masterKey: The master key bytes
Returns:
bytes: 32-byte derived key suitable for Fernet
@ -360,9 +360,9 @@ def _derive_encryption_key(master_key: bytes) -> bytes:
iterations=100000,
)
return base64.urlsafe_b64encode(kdf.derive(master_key))
return base64.urlsafe_b64encode(kdf.derive(masterKey))
def _is_encrypted_value(value: str) -> bool:
def _isEncryptedValue(value: str) -> bool:
"""
Check if a value is encrypted (starts with environment-specific prefix).
@ -382,64 +382,64 @@ def _is_encrypted_value(value: str) -> bool:
value.startswith('TEST_ENC:') or
value.startswith('STAGING_ENC:'))
def _get_encryption_prefix(env_type: str) -> str:
def _getEncryptionPrefix(envType: str) -> str:
"""
Get the encryption prefix for the given environment type.
Args:
env_type: The environment type (dev, int, prod, etc.)
envType: The environment type (dev, int, prod, etc.)
Returns:
str: The encryption prefix
"""
return f"{env_type.upper()}_ENC:"
return f"{envType.upper()}_ENC:"
def _check_decryption_rate_limit(user_id: str, key_name: str, max_per_second: int = 10) -> bool:
def _checkDecryptionRateLimit(userId: str, keyName: str, maxPerSecond: int = 10) -> bool:
"""
Check if decryption is allowed based on rate limiting (max 10 per second per user per key).
Args:
user_id: The user ID making the request
key_name: The name of the key being decrypted
max_per_second: Maximum decryptions per second (default: 10)
userId: The user ID making the request
keyName: The name of the key being decrypted
maxPerSecond: Maximum decryptions per second (default: 10)
Returns:
bool: True if allowed, False if rate limited
"""
current_time = time.time()
currentTime = time.time()
# Initialize tracking for this user if not exists
if user_id not in _decryption_attempts:
_decryption_attempts[user_id] = {}
if userId not in _decryption_attempts:
_decryption_attempts[userId] = {}
# Initialize tracking for this key if not exists
if key_name not in _decryption_attempts[user_id]:
_decryption_attempts[user_id][key_name] = []
if keyName not in _decryption_attempts[userId]:
_decryption_attempts[userId][keyName] = []
# Clean old attempts (older than 1 second)
_decryption_attempts[user_id][key_name] = [
timestamp for timestamp in _decryption_attempts[user_id][key_name]
if current_time - timestamp < 1.0
_decryption_attempts[userId][keyName] = [
timestamp for timestamp in _decryption_attempts[userId][keyName]
if currentTime - timestamp < 1.0
]
# Check if we're within rate limit
if len(_decryption_attempts[user_id][key_name]) >= max_per_second:
logger.warning(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' ({max_per_second}/sec)")
if len(_decryption_attempts[userId][keyName]) >= maxPerSecond:
logger.warning(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' ({maxPerSecond}/sec)")
return False
# Record this attempt
_decryption_attempts[user_id][key_name].append(current_time)
_decryption_attempts[userId][keyName].append(currentTime)
return True
def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key_name: str = "unknown") -> str:
def encryptValue(value: str, envType: str = None, userId: str = "system", keyName: str = "unknown") -> str:
"""
Encrypt a value using the master key for the specified environment.
Args:
value: The plain text value to encrypt
env_type: The environment type (dev, int, prod). If None, uses current environment.
user_id: The user ID making the request (default: "system")
key_name: The name of the key being encrypted (default: "unknown")
envType: The environment type (dev, int, prod). If None, uses current environment.
userId: The user ID making the request (default: "system")
keyName: The name of the key being encrypted (default: "unknown")
Returns:
str: The encrypted value with prefix
@ -447,48 +447,48 @@ def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key
Raises:
ValueError: If encryption fails
"""
if env_type is None:
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
if envType is None:
envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
try:
master_key = _get_master_key(env_type)
derived_key = _derive_encryption_key(master_key)
fernet = Fernet(derived_key)
masterKey = _getMasterKey(envType)
derivedKey = _deriveEncryptionKey(masterKey)
fernet = Fernet(derivedKey)
# Encrypt the value
encrypted_bytes = fernet.encrypt(value.encode('utf-8'))
encrypted_b64 = base64.urlsafe_b64encode(encrypted_bytes).decode('utf-8')
encryptedBytes = fernet.encrypt(value.encode('utf-8'))
encryptedB64 = base64.urlsafe_b64encode(encryptedBytes).decode('utf-8')
# Add environment prefix
prefix = _get_encryption_prefix(env_type)
encrypted_value = f"{prefix}{encrypted_b64}"
prefix = _getEncryptionPrefix(envType)
encryptedValue = f"{prefix}{encryptedB64}"
# Log audit event for encryption
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_key_access(
user_id=user_id,
mandate_id="system",
key_name=key_name,
audit_logger.logKeyAccess(
userId=userId,
mandateId="system",
keyName=keyName,
action="encrypt"
)
except Exception:
# Don't fail if audit logging fails
pass
return encrypted_value
return encryptedValue
except Exception as e:
raise ValueError(f"Encryption failed: {e}")
def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = "unknown") -> str:
def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "unknown") -> str:
"""
Decrypt a value using the master key for the current environment.
Args:
encrypted_value: The encrypted value with prefix
user_id: The user ID making the request (default: "system")
key_name: The name of the key being decrypted (default: "unknown")
encryptedValue: The encrypted value with prefix
userId: The user ID making the request (default: "system")
keyName: The name of the key being decrypted (default: "unknown")
Returns:
str: The decrypted plain text value
@ -496,59 +496,59 @@ def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str =
Raises:
ValueError: If decryption fails
"""
if not _is_encrypted_value(encrypted_value):
return encrypted_value # Return as-is if not encrypted
if not _isEncryptedValue(encryptedValue):
return encryptedValue # Return as-is if not encrypted
# Check rate limiting (10 per second per user per key)
if not _check_decryption_rate_limit(user_id, key_name, max_per_second=10):
raise ValueError(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' (10/sec)")
if not _checkDecryptionRateLimit(userId, keyName, maxPerSecond=10):
raise ValueError(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' (10/sec)")
try:
# Extract environment type from prefix
if encrypted_value.startswith('DEV_ENC:'):
env_type = 'dev'
if encryptedValue.startswith('DEV_ENC:'):
envType = 'dev'
prefix = 'DEV_ENC:'
elif encrypted_value.startswith('INT_ENC:'):
env_type = 'int'
elif encryptedValue.startswith('INT_ENC:'):
envType = 'int'
prefix = 'INT_ENC:'
elif encrypted_value.startswith('PROD_ENC:'):
env_type = 'prod'
elif encryptedValue.startswith('PROD_ENC:'):
envType = 'prod'
prefix = 'PROD_ENC:'
elif encrypted_value.startswith('TEST_ENC:'):
env_type = 'test'
elif encryptedValue.startswith('TEST_ENC:'):
envType = 'test'
prefix = 'TEST_ENC:'
elif encrypted_value.startswith('STAGING_ENC:'):
env_type = 'staging'
elif encryptedValue.startswith('STAGING_ENC:'):
envType = 'staging'
prefix = 'STAGING_ENC:'
else:
raise ValueError(f"Invalid encryption prefix. Expected DEV_ENC:, INT_ENC:, PROD_ENC:, TEST_ENC:, or STAGING_ENC:")
encrypted_part = encrypted_value[len(prefix):]
encryptedPart = encryptedValue[len(prefix):]
# Get master key for the specific environment and derive encryption key
master_key = _get_master_key(env_type)
derived_key = _derive_encryption_key(master_key)
fernet = Fernet(derived_key)
masterKey = _getMasterKey(envType)
derivedKey = _deriveEncryptionKey(masterKey)
fernet = Fernet(derivedKey)
# Decode and decrypt
encrypted_bytes = base64.urlsafe_b64decode(encrypted_part.encode('utf-8'))
decrypted_bytes = fernet.decrypt(encrypted_bytes)
decrypted_value = decrypted_bytes.decode('utf-8')
encryptedBytes = base64.urlsafe_b64decode(encryptedPart.encode('utf-8'))
decryptedBytes = fernet.decrypt(encryptedBytes)
decryptedValue = decryptedBytes.decode('utf-8')
# Log audit event for decryption
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_key_access(
user_id=user_id,
mandate_id="system",
key_name=key_name,
audit_logger.logKeyAccess(
userId=userId,
mandateId="system",
keyName=keyName,
action="decrypt"
)
except Exception:
# Don't fail if audit logging fails
pass
return decrypted_value
return decryptedValue
except Exception as e:
raise ValueError(f"Decryption failed: {e}")

View file

@ -120,8 +120,8 @@ def debugLogToFile(message: str, context: str = "DEBUG") -> None:
debug_file = os.path.join(debug_dir, "debug_workflow.log")
# Format the debug entry
from modules.shared.timezoneUtils import get_utc_timestamp
timestamp = get_utc_timestamp()
from modules.shared.timezoneUtils import getUtcTimestamp
timestamp = getUtcTimestamp()
debug_entry = f"[{timestamp}] [{context}] {message}\n"
# Write to debug file

View file

@ -102,7 +102,7 @@ def parseJsonOrRaise(text: Union[str, bytes]) -> Union[Dict, List]:
return obj
def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
def mergeRootLists(jsonParts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
"""
Generic merger for root-level lists: take first dict as base; for each subsequent part:
- if value is list and same key exists as list, extend it
@ -112,7 +112,7 @@ def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
"""
base: Optional[Dict[str, Any]] = None
parsed: List[Dict[str, Any]] = []
for part in json_parts:
for part in jsonParts:
if isinstance(part, (dict, list)):
obj = part
else:
@ -146,61 +146,61 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
# Strategy 1: Try to extract sections from the entire text first
# This handles cases where the JSON structure is broken but content is intact
extracted_sections = _extractSectionsRegex(text)
if extracted_sections:
logger.info(f"Extracted {len(extracted_sections)} sections using regex")
extractedSections = _extractSectionsRegex(text)
if extractedSections:
logger.info(f"Extracted {len(extractedSections)} sections using regex")
return {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": [{"sections": extracted_sections}]
"documents": [{"sections": extractedSections}]
}
# Strategy 2: Progressive parsing - try to find longest valid prefix
best_result = None
best_valid_length = 0
bestResult = None
bestValidLength = 0
# Try different step sizes to find the best valid JSON
for step_size in [100, 50, 10, 1]:
for i in range(len(text), 0, -step_size):
test_str = text[:i]
closed_str = _closeJsonStructures(test_str)
obj, err, _ = tryParseJson(closed_str)
for stepSize in [100, 50, 10, 1]:
for i in range(len(text), 0, -stepSize):
testStr = text[:i]
closedStr = _closeJsonStructures(testStr)
obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict):
best_result = obj
best_valid_length = i
logger.debug(f"Progressive parsing success at length {i} (step: {step_size})")
bestResult = obj
bestValidLength = i
logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
break
if best_result:
if bestResult:
break
if best_result:
logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})")
if bestResult:
logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
# Check if we have sections in the result
sections = extractSectionsFromDocument(best_result)
sections = extractSectionsFromDocument(bestResult)
if sections:
logger.info(f"Progressive parsing found {len(sections)} sections")
return best_result
return bestResult
else:
# No sections found in progressive parsing, try to extract from broken part
logger.info("Progressive parsing found no sections, trying to extract from broken part")
extracted_sections = _extractSectionsRegex(text[best_valid_length:])
if extracted_sections:
logger.info(f"Extracted {len(extracted_sections)} sections from broken part")
extractedSections = _extractSectionsRegex(text[bestValidLength:])
if extractedSections:
logger.info(f"Extracted {len(extractedSections)} sections from broken part")
# Merge with the valid part
if "documents" not in best_result:
best_result["documents"] = []
if not best_result["documents"]:
best_result["documents"] = [{"sections": []}]
best_result["documents"][0]["sections"].extend(extracted_sections)
return best_result
if "documents" not in bestResult:
bestResult["documents"] = []
if not bestResult["documents"]:
bestResult["documents"] = [{"sections": []}]
bestResult["documents"][0]["sections"].extend(extractedSections)
return bestResult
# Strategy 3: Structure closing - close incomplete structures
closed_str = _closeJsonStructures(text)
obj, err, _ = tryParseJson(closed_str)
closedStr = _closeJsonStructures(text)
obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict):
logger.info("Repaired JSON using structure closing")
return obj
@ -217,16 +217,16 @@ def _closeJsonStructures(text: str) -> str:
return text
# Count open/close brackets and braces
open_braces = text.count('{')
close_braces = text.count('}')
open_brackets = text.count('[')
close_brackets = text.count(']')
openBraces = text.count('{')
closeBraces = text.count('}')
openBrackets = text.count('[')
closeBrackets = text.count(']')
# Close incomplete structures
result = text
for _ in range(open_braces - close_braces):
for _ in range(openBraces - closeBraces):
result += '}'
for _ in range(open_brackets - close_brackets):
for _ in range(openBrackets - closeBrackets):
result += ']'
return result
@ -242,32 +242,32 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
sections = []
# Pattern to find section objects
section_pattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
sectionPattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
for match in re.finditer(section_pattern, text, re.IGNORECASE):
section_id = match.group(1)
content_type = match.group(2)
for match in re.finditer(sectionPattern, text, re.IGNORECASE):
sectionId = match.group(1)
contentType = match.group(2)
order = int(match.group(3))
# Try to extract elements array - look for the elements array after this section
elements_match = re.search(
elementsMatch = re.search(
r'"elements"\s*:\s*\[(.*?)\]',
text[match.end():match.end()+5000] # Look ahead for elements (large range)
)
elements = []
if elements_match:
if elementsMatch:
try:
elements_str = '[' + elements_match.group(1) + ']'
elements = json.loads(elements_str)
elementsStr = '[' + elementsMatch.group(1) + ']'
elements = json.loads(elementsStr)
except:
# If JSON parsing fails, try to extract individual items manually
elements_text = elements_match.group(1)
elements = _extractElementsFromText(elements_text, content_type)
elementsText = elementsMatch.group(1)
elements = _extractElementsFromText(elementsText, contentType)
sections.append({
"id": section_id,
"content_type": content_type,
"id": sectionId,
"content_type": contentType,
"elements": elements,
"order": order
})
@ -279,7 +279,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
return sections
def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict[str, Any]]:
def _extractElementsFromText(elementsText: str, contentType: str) -> List[Dict[str, Any]]:
"""
Extract elements from text when JSON parsing fails.
Generic approach that works for any content type.
@ -290,11 +290,11 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
elements = []
if content_type == "list":
if contentType == "list":
# Look for {"text": "..."} patterns, including incomplete ones
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
# Also look for incomplete patterns like {"text": "36
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
# Combine both complete and incomplete items
all_items = text_items + incomplete_items
@ -303,41 +303,41 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"text": item} for item in unique_items]
elif content_type == "paragraph":
elif contentType == "paragraph":
# Look for {"text": "..."} patterns, including incomplete ones
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = text_items + incomplete_items
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"text": item} for item in unique_items]
elif content_type == "heading":
elif contentType == "heading":
# Look for {"level": X, "text": "..."} patterns, including incomplete ones
heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elements_text)
incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elementsText)
incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = heading_items + incomplete_heading_items
unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()]))
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"level": level, "text": text} for level, text in unique_items]
elif content_type == "table":
elif contentType == "table":
# Look for table patterns
table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elements_text)
table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elementsText)
for headers_str, rows_str, caption in table_items:
# Extract headers
headers = re.findall(r'"([^"]+)"', headers_str)
@ -354,31 +354,31 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
"caption": caption
})
elif content_type == "code":
elif contentType == "code":
# Look for {"code": "...", "language": "..."} patterns, including incomplete ones
code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elements_text)
incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elementsText)
incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = code_items + [(code, "unknown") for code in incomplete_code_items]
unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()]))
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"code": code, "language": lang} for code, lang in unique_items]
else:
# Generic fallback - look for any text content, including incomplete
text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elements_text)
incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elementsText)
incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
all_items = text_items + incomplete_text_items
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
# Remove the last item if it appears to be incomplete/corrupted
if unique_items:
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
elements = [{"text": item} for item in unique_items]

View file

@ -6,7 +6,7 @@ Ensures all timestamps are properly handled as UTC.
from datetime import datetime, timezone
import time
def get_utc_now() -> datetime:
def getUtcNow() -> datetime:
"""
Get current time in UTC with timezone info.
@ -15,7 +15,7 @@ def get_utc_now() -> datetime:
"""
return datetime.now(timezone.utc)
def get_utc_timestamp() -> float:
def getUtcTimestamp() -> float:
"""
Get current UTC timestamp (seconds since epoch with millisecond precision).
@ -24,14 +24,14 @@ def get_utc_timestamp() -> float:
"""
return time.time()
def create_expiration_timestamp(expires_in_seconds: int) -> float:
def createExpirationTimestamp(expiresInSeconds: int) -> float:
"""
Create a new expiration timestamp from seconds until expiration.
Args:
expires_in_seconds (int): Seconds until expiration
expiresInSeconds (int): Seconds until expiration
Returns:
float: UTC timestamp in seconds
"""
return get_utc_timestamp() + expires_in_seconds
return getUtcTimestamp() + expiresInSeconds

View file

@ -22,13 +22,11 @@ class AdaptiveLearningEngine:
workflowId: str, attemptNumber: int):
"""Record validation result and learn from it"""
try:
actionType = actionContext.get('actionType', 'unknown')
actionName = actionContext.get('actionName', 'unknown')
# Store validation history
validationEntry = {
'workflowId': workflowId,
'actionType': actionType,
'actionName': actionName,
'attemptNumber': attemptNumber,
'validationResult': validationResult,
@ -42,17 +40,17 @@ class AdaptiveLearningEngine:
# Track patterns
if validationResult.get('overallSuccess', False):
self.successPatterns[actionType].append(validationEntry)
self.successPatterns[actionName].append(validationEntry)
else:
self.failurePatterns[actionType].append(validationEntry)
self.failurePatterns[actionName].append(validationEntry)
# Update attempt count
self.actionAttempts[f"{workflowId}:{actionType}"] += 1
self.actionAttempts[f"{workflowId}:{actionName}"] += 1
# Generate learning insights
self._generateLearningInsights(workflowId, actionType)
self._generateLearningInsights(workflowId, actionName)
logger.info(f"Recorded validation for {actionType} (attempt {attemptNumber}): "
logger.info(f"Recorded validation for {actionName} (attempt {attemptNumber}): "
f"Success={validationResult.get('overallSuccess', False)}, "
f"Quality={validationResult.get('qualityScore', 0.0)}")
@ -86,21 +84,21 @@ class AdaptiveLearningEngine:
logger.error(f"Error generating adaptive context: {str(e)}")
return {}
def getAdaptiveContextForParameters(self, workflowId: str, actionType: str,
def getAdaptiveContextForParameters(self, workflowId: str, actionName: str,
parametersContext: str) -> Dict[str, Any]:
"""Generate adaptive context for parameter selection prompt"""
try:
# Get validation history for this specific action type
# Get validation history for this specific action name
actionValidations = [
v for v in self.validationHistory
if v['workflowId'] == workflowId and v['actionType'] == actionType
if v['workflowId'] == workflowId and v['actionName'] == actionName
][-3:] # Last 3 attempts for this action
# Analyze what went wrong in previous attempts
failureAnalysis = self._analyzeParameterFailures(actionValidations)
# Generate specific parameter guidance
parameterGuidance = self._generateParameterGuidance(actionType, parametersContext, failureAnalysis)
parameterGuidance = self._generateParameterGuidance(actionName, parametersContext, failureAnalysis)
return {
'actionValidations': actionValidations,
@ -206,36 +204,28 @@ class AdaptiveLearningEngine:
return "\n".join(guidance_parts) if guidance_parts else "No specific guidance available."
def _generateParameterGuidance(self, actionType: str, parametersContext: str,
def _generateParameterGuidance(self, actionName: str, parametersContext: str,
failureAnalysis: Dict[str, Any]) -> str:
"""Generate specific parameter guidance based on previous failures"""
"""Generate generic parameter guidance based on previous failures (no app-specific logic)."""
if not failureAnalysis.get('hasFailures', False):
return "No previous parameter failures. Use standard parameter values."
guidance_parts = []
guidanceParts = []
# Add attempt awareness
# Attempt awareness
attemptNumber = failureAnalysis.get('attemptNumber', 1)
if attemptNumber >= 3:
guidance_parts.append(f"ATTEMPT #{attemptNumber}: Previous attempts failed. Adjust parameters based on validation feedback.")
if attemptNumber and attemptNumber >= 3:
guidanceParts.append(f"Attempt #{attemptNumber}: Adjust parameters based on validation feedback.")
# Add specific parameter guidance based on action type
if actionType == "outlook.composeAndSendEmailWithContext":
guidance_parts.append("EMAIL PARAMETER GUIDANCE:")
guidance_parts.append("- context: Be very specific about account (valueon), appointment time (Friday), and requirements")
guidance_parts.append("- emailStyle: Use 'formal' for business emails")
guidance_parts.append("- maxLength: Set to 2000+ for detailed emails with summaries")
# Add specific guidance based on common failures
commonIssues = failureAnalysis.get('commonIssues', {})
if any("account" in str(issue).lower() for issue in commonIssues.keys()):
guidance_parts.append("- context: MUST specify 'from valueon account' explicitly")
if any("attachment" in str(issue).lower() for issue in commonIssues.keys()):
guidance_parts.append("- documentList: Ensure PDF is properly referenced")
if any("summary" in str(issue).lower() for issue in commonIssues.keys()):
guidance_parts.append("- context: MUST request '10-12 sentence German summary' explicitly")
# Generic issues summary
commonIssues = failureAnalysis.get('commonIssues', {}) or {}
if commonIssues:
guidanceParts.append("Address the following parameter issues:")
for issueKey, issueDesc in commonIssues.items():
guidanceParts.append(f"- {issueKey}: {issueDesc}")
return "\n".join(guidance_parts) if guidance_parts else "Use standard parameter values."
# Keep guidance format stable
return "\n".join(guidanceParts) if guidanceParts else "Use standard parameter values."
def _getEscalationLevel(self, workflowId: str) -> str:
"""Determine escalation level based on failure patterns"""
@ -251,7 +241,7 @@ class AdaptiveLearningEngine:
else:
return "low"
def _generateLearningInsights(self, workflowId: str, actionType: str):
def _generateLearningInsights(self, workflowId: str, actionName: str):
"""Generate learning insights for a workflow"""
if workflowId not in self.learningInsights:
self.learningInsights[workflowId] = {}
@ -263,7 +253,7 @@ class AdaptiveLearningEngine:
'totalAttempts': len(workflowValidations),
'successfulAttempts': len([v for v in workflowValidations if v['success']]),
'failedAttempts': len([v for v in workflowValidations if not v['success']]),
'lastActionType': actionType,
'lastActionName': actionName,
'escalationLevel': self._getEscalationLevel(workflowId)
}

View file

@ -26,14 +26,14 @@ class ContentValidator:
if isinstance(data, dict) and 'content' in data:
content = data['content']
# For large content, check size before converting to string
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
# For very large content, return a size indicator instead
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
return str(content)
else:
content = data
# For large content, check size before converting to string
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
return str(content)
return ""

View file

@ -30,7 +30,7 @@ class IntentAnalyzer:
analysisPrompt = f"""
You are an intent analyzer. Analyze the user's request to understand what they want delivered.
USER REQUEST: {self.services.ai.sanitizePromptContent(userPrompt, 'userinput')}
USER REQUEST: {self.services.utils.sanitizePromptContent(userPrompt, 'userinput')}
CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}

View file

@ -571,7 +571,7 @@ class ActionplanMode(BaseMode):
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
@ -715,7 +715,7 @@ class ActionplanMode(BaseMode):
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)

View file

@ -98,7 +98,12 @@ class ReactMode(BaseMode):
# NEW: Add content validation (against original cleaned user prompt / workflow intent)
if getattr(self, 'workflowIntent', None) and result.documents:
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent)
# Validate ONLY the produced JSON (structured content), not rendered files
from types import SimpleNamespace
validationDocs = []
if hasattr(result, 'content') and result.content:
validationDocs.append(SimpleNamespace(documentName='generated.json', documentData={'content': result.content}))
validationResult = await self.contentValidator.validateContent(validationDocs, self.workflowIntent)
observation['contentValidation'] = validationResult
quality_score = validationResult.get('qualityScore', 0.0)
if quality_score is None:
@ -106,9 +111,9 @@ class ReactMode(BaseMode):
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})")
# NEW: Record validation result for adaptive learning
actionValue = selection.get('action', 'unknown')
actionContext = {
'actionType': selection.get('action', {}).get('action', 'unknown'),
'actionName': selection.get('action', {}).get('action', 'unknown'),
'actionName': actionValue,
'workflowId': context.workflow_id
}
@ -747,7 +752,7 @@ Return only the user-friendly message, no technical details."""
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
@ -838,7 +843,7 @@ Return only the user-friendly message, no technical details."""
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)

View file

@ -215,7 +215,7 @@ class WorkflowManager:
" }\n"
" ]\n"
"}\n\n"
f"User message:\n{self.services.ai.sanitizePromptContent(userInput.prompt, 'userinput')}"
f"User message:\n{self.services.utils.sanitizePromptContent(userInput.prompt, 'userinput')}"
)
# Call AI analyzer (planning call - will use static parameters)

View file

@ -0,0 +1,107 @@
Module,Function Names,Parameter Names,Variable Names,Total
modules/workflows/methods/methodSharepoint.py,0,2,211,213
modules/workflows/methods/methodOutlook.py,0,3,131,134
modules/services/serviceAi/subDocumentProcessing.py,0,0,104,104
modules/features/syncDelta/mainSyncDelta.py,1,10,88,99
modules/shared/jsonUtils.py,0,3,88,91
modules/services/serviceGeneration/renderers/rendererDocx.py,3,8,79,90
modules/services/serviceWorkflow/mainServiceWorkflow.py,0,3,85,88
modules/services/serviceGeneration/renderers/rendererPptx.py,2,7,73,82
modules/services/serviceGeneration/renderers/rendererPdf.py,3,8,50,61
modules/connectors/connectorVoiceGoogle.py,1,2,52,55
modules/services/serviceGeneration/renderers/rendererHtml.py,3,6,46,55
modules/services/serviceGeneration/renderers/rendererBaseTemplate.py,3,21,27,51
modules/shared/configuration.py,2,17,30,49
modules/services/serviceExtraction/subMerger.py,2,5,31,38
modules/connectors/connectorDbPostgre.py,0,14,20,34
modules/interfaces/interfaceDbAppObjects.py,0,8,26,34
modules/routes/routeSecurityGoogle.py,0,0,32,32
modules/shared/attributeUtils.py,3,4,25,32
modules/interfaces/interfaceDbChatObjects.py,0,4,27,31
modules/routes/routeSecurityAdmin.py,0,2,28,30
modules/services/serviceNeutralization/subProcessList.py,7,0,22,29
modules/services/serviceGeneration/renderers/rendererText.py,3,7,19,29
modules/routes/routeSecurityMsft.py,0,0,27,27
modules/services/serviceGeneration/renderers/rendererMarkdown.py,3,7,17,27
modules/services/serviceGeneration/renderers/rendererXlsx.py,3,0,24,27
modules/services/serviceGeneration/renderers/rendererImage.py,3,2,21,26
modules/security/tokenManager.py,4,7,14,25
modules/workflows/workflowManager.py,0,0,25,25
modules/services/serviceGeneration/renderers/rendererCsv.py,3,5,17,25
modules/shared/auditLogger.py,5,16,3,24
modules/shared/debugLogger.py,0,0,24,24
modules/workflows/processing/shared/placeholderFactory.py,0,0,24,24
modules/interfaces/interfaceDbAppAccess.py,0,2,21,23
modules/connectors/connectorTicketsJira.py,0,0,22,22
modules/services/serviceGeneration/renderers/registry.py,7,3,12,22
modules/routes/routeDataConnections.py,1,1,19,21
modules/security/tokenRefreshService.py,0,2,19,21
modules/services/serviceExtraction/extractors/extractorPptx.py,0,1,16,17
modules/routes/routeSecurityLocal.py,0,0,16,16
modules/workflows/methods/methodBase.py,0,4,12,16
modules/services/serviceGeneration/mainServiceGeneration.py,0,4,11,15
modules/services/serviceUtils/mainServiceUtils.py,0,14,1,15
modules/features/neutralizePlayground/mainNeutralizePlayground.py,8,5,2,15
modules/interfaces/interfaceTicketObjects.py,0,5,9,14
modules/services/serviceNeutralization/subParseString.py,7,0,6,13
modules/workflows/processing/modes/modeReact.py,0,1,11,12
modules/interfaces/interfaceDbComponentAccess.py,0,2,9,11
modules/services/serviceAi/subCoreAi.py,0,0,11,11
modules/services/serviceExtraction/subRegistry.py,0,0,11,11
modules/services/serviceNeutralization/mainServiceNeutralization.py,0,2,9,11
modules/interfaces/interfaceAiObjects.py,0,0,10,10
modules/services/serviceAi/subSharedAiUtils.py,0,3,7,10
modules/connectors/connectorDbJson.py,0,3,6,9
modules/workflows/methods/methodAi.py,0,0,9,9
modules/services/serviceExtraction/subPromptBuilderExtraction.py,0,0,9,9
modules/services/serviceGeneration/subDocumentUtility.py,0,3,6,9
modules/services/serviceNeutralization/subProcessCommon.py,7,2,0,9
modules/services/serviceNeutralization/subProcessText.py,5,0,4,9
modules/interfaces/interfaceDbChatAccess.py,0,2,6,8
modules/security/auth.py,0,1,7,8
modules/aicore/aicorePluginAnthropic.py,0,0,7,7
modules/security/tokenRefreshMiddleware.py,0,2,4,6
modules/services/serviceGeneration/renderers/rendererJson.py,3,0,3,6
analyze_naming_violations.py,5,0,0,5
modules/aicore/aicorePluginOpenai.py,0,0,5,5
modules/routes/routeVoiceGoogle.py,0,0,5,5
modules/shared/eventManagement.py,2,3,0,5
modules/workflows/processing/adaptive/intentAnalyzer.py,0,0,5,5
modules/workflows/processing/shared/executionState.py,0,5,0,5
modules/services/serviceGeneration/subJsonSchema.py,0,0,5,5
modules/services/serviceNeutralization/subPatterns.py,5,0,0,5
modules/services/serviceNeutralization/subProcessBinary.py,4,0,1,5
modules/services/serviceExtraction/extractors/extractorXlsx.py,0,0,5,5
modules/interfaces/interfaceDbComponentObjects.py,0,3,1,4
modules/routes/routeDataNeutralization.py,0,0,4,4
modules/routes/routeWorkflows.py,0,0,4,4
modules/shared/timezoneUtils.py,3,1,0,4
modules/workflows/processing/adaptive/contentValidator.py,0,0,4,4
modules/workflows/processing/core/messageCreator.py,0,0,4,4
modules/services/serviceSharepoint/mainServiceSharepoint.py,0,0,4,4
modules/routes/routeDataUsers.py,0,0,3,3
modules/services/serviceExtraction/subPipeline.py,0,0,3,3
app.py,0,0,2,2
modules/datamodels/datamodelChat.py,0,1,1,2
modules/routes/routeAttributes.py,0,0,2,2
modules/routes/routeDataPrompts.py,0,0,2,2
modules/security/csrf.py,0,1,1,2
modules/security/jwtService.py,0,0,2,2
modules/workflows/processing/adaptive/learningEngine.py,0,0,2,2
modules/workflows/processing/modes/modeActionplan.py,0,0,2,2
modules/workflows/processing/shared/methodDiscovery.py,0,0,2,2
modules/services/serviceNormalization/mainServiceNormalization.py,0,0,2,2
modules/services/serviceExtraction/extractors/extractorImage.py,0,0,2,2
modules/aicore/aicoreBase.py,0,0,1,1
modules/aicore/aicoreModelSelector.py,0,0,1,1
modules/connectors/connectorTicketsClickup.py,0,0,1,1
modules/datamodels/datamodelDocument.py,0,1,0,1
modules/datamodels/datamodelSecurity.py,0,0,1,1
modules/routes/routeAdmin.py,0,0,1,1
modules/routes/routeDataFiles.py,0,0,1,1
modules/workflows/processing/workflowProcessor.py,0,0,1,1
modules/workflows/processing/adaptive/adaptiveLearningEngine.py,0,0,1,1
modules/workflows/processing/core/actionExecutor.py,0,0,1,1
modules/workflows/processing/core/taskPlanner.py,0,0,1,1
modules/workflows/processing/modes/modeBase.py,0,0,1,1
modules/services/serviceAi/subDocumentGeneration.py,0,0,1,1
1 Module Function Names Parameter Names Variable Names Total
2 modules/workflows/methods/methodSharepoint.py 0 2 211 213
3 modules/workflows/methods/methodOutlook.py 0 3 131 134
4 modules/services/serviceAi/subDocumentProcessing.py 0 0 104 104
5 modules/features/syncDelta/mainSyncDelta.py 1 10 88 99
6 modules/shared/jsonUtils.py 0 3 88 91
7 modules/services/serviceGeneration/renderers/rendererDocx.py 3 8 79 90
8 modules/services/serviceWorkflow/mainServiceWorkflow.py 0 3 85 88
9 modules/services/serviceGeneration/renderers/rendererPptx.py 2 7 73 82
10 modules/services/serviceGeneration/renderers/rendererPdf.py 3 8 50 61
11 modules/connectors/connectorVoiceGoogle.py 1 2 52 55
12 modules/services/serviceGeneration/renderers/rendererHtml.py 3 6 46 55
13 modules/services/serviceGeneration/renderers/rendererBaseTemplate.py 3 21 27 51
14 modules/shared/configuration.py 2 17 30 49
15 modules/services/serviceExtraction/subMerger.py 2 5 31 38
16 modules/connectors/connectorDbPostgre.py 0 14 20 34
17 modules/interfaces/interfaceDbAppObjects.py 0 8 26 34
18 modules/routes/routeSecurityGoogle.py 0 0 32 32
19 modules/shared/attributeUtils.py 3 4 25 32
20 modules/interfaces/interfaceDbChatObjects.py 0 4 27 31
21 modules/routes/routeSecurityAdmin.py 0 2 28 30
22 modules/services/serviceNeutralization/subProcessList.py 7 0 22 29
23 modules/services/serviceGeneration/renderers/rendererText.py 3 7 19 29
24 modules/routes/routeSecurityMsft.py 0 0 27 27
25 modules/services/serviceGeneration/renderers/rendererMarkdown.py 3 7 17 27
26 modules/services/serviceGeneration/renderers/rendererXlsx.py 3 0 24 27
27 modules/services/serviceGeneration/renderers/rendererImage.py 3 2 21 26
28 modules/security/tokenManager.py 4 7 14 25
29 modules/workflows/workflowManager.py 0 0 25 25
30 modules/services/serviceGeneration/renderers/rendererCsv.py 3 5 17 25
31 modules/shared/auditLogger.py 5 16 3 24
32 modules/shared/debugLogger.py 0 0 24 24
33 modules/workflows/processing/shared/placeholderFactory.py 0 0 24 24
34 modules/interfaces/interfaceDbAppAccess.py 0 2 21 23
35 modules/connectors/connectorTicketsJira.py 0 0 22 22
36 modules/services/serviceGeneration/renderers/registry.py 7 3 12 22
37 modules/routes/routeDataConnections.py 1 1 19 21
38 modules/security/tokenRefreshService.py 0 2 19 21
39 modules/services/serviceExtraction/extractors/extractorPptx.py 0 1 16 17
40 modules/routes/routeSecurityLocal.py 0 0 16 16
41 modules/workflows/methods/methodBase.py 0 4 12 16
42 modules/services/serviceGeneration/mainServiceGeneration.py 0 4 11 15
43 modules/services/serviceUtils/mainServiceUtils.py 0 14 1 15
44 modules/features/neutralizePlayground/mainNeutralizePlayground.py 8 5 2 15
45 modules/interfaces/interfaceTicketObjects.py 0 5 9 14
46 modules/services/serviceNeutralization/subParseString.py 7 0 6 13
47 modules/workflows/processing/modes/modeReact.py 0 1 11 12
48 modules/interfaces/interfaceDbComponentAccess.py 0 2 9 11
49 modules/services/serviceAi/subCoreAi.py 0 0 11 11
50 modules/services/serviceExtraction/subRegistry.py 0 0 11 11
51 modules/services/serviceNeutralization/mainServiceNeutralization.py 0 2 9 11
52 modules/interfaces/interfaceAiObjects.py 0 0 10 10
53 modules/services/serviceAi/subSharedAiUtils.py 0 3 7 10
54 modules/connectors/connectorDbJson.py 0 3 6 9
55 modules/workflows/methods/methodAi.py 0 0 9 9
56 modules/services/serviceExtraction/subPromptBuilderExtraction.py 0 0 9 9
57 modules/services/serviceGeneration/subDocumentUtility.py 0 3 6 9
58 modules/services/serviceNeutralization/subProcessCommon.py 7 2 0 9
59 modules/services/serviceNeutralization/subProcessText.py 5 0 4 9
60 modules/interfaces/interfaceDbChatAccess.py 0 2 6 8
61 modules/security/auth.py 0 1 7 8
62 modules/aicore/aicorePluginAnthropic.py 0 0 7 7
63 modules/security/tokenRefreshMiddleware.py 0 2 4 6
64 modules/services/serviceGeneration/renderers/rendererJson.py 3 0 3 6
65 analyze_naming_violations.py 5 0 0 5
66 modules/aicore/aicorePluginOpenai.py 0 0 5 5
67 modules/routes/routeVoiceGoogle.py 0 0 5 5
68 modules/shared/eventManagement.py 2 3 0 5
69 modules/workflows/processing/adaptive/intentAnalyzer.py 0 0 5 5
70 modules/workflows/processing/shared/executionState.py 0 5 0 5
71 modules/services/serviceGeneration/subJsonSchema.py 0 0 5 5
72 modules/services/serviceNeutralization/subPatterns.py 5 0 0 5
73 modules/services/serviceNeutralization/subProcessBinary.py 4 0 1 5
74 modules/services/serviceExtraction/extractors/extractorXlsx.py 0 0 5 5
75 modules/interfaces/interfaceDbComponentObjects.py 0 3 1 4
76 modules/routes/routeDataNeutralization.py 0 0 4 4
77 modules/routes/routeWorkflows.py 0 0 4 4
78 modules/shared/timezoneUtils.py 3 1 0 4
79 modules/workflows/processing/adaptive/contentValidator.py 0 0 4 4
80 modules/workflows/processing/core/messageCreator.py 0 0 4 4
81 modules/services/serviceSharepoint/mainServiceSharepoint.py 0 0 4 4
82 modules/routes/routeDataUsers.py 0 0 3 3
83 modules/services/serviceExtraction/subPipeline.py 0 0 3 3
84 app.py 0 0 2 2
85 modules/datamodels/datamodelChat.py 0 1 1 2
86 modules/routes/routeAttributes.py 0 0 2 2
87 modules/routes/routeDataPrompts.py 0 0 2 2
88 modules/security/csrf.py 0 1 1 2
89 modules/security/jwtService.py 0 0 2 2
90 modules/workflows/processing/adaptive/learningEngine.py 0 0 2 2
91 modules/workflows/processing/modes/modeActionplan.py 0 0 2 2
92 modules/workflows/processing/shared/methodDiscovery.py 0 0 2 2
93 modules/services/serviceNormalization/mainServiceNormalization.py 0 0 2 2
94 modules/services/serviceExtraction/extractors/extractorImage.py 0 0 2 2
95 modules/aicore/aicoreBase.py 0 0 1 1
96 modules/aicore/aicoreModelSelector.py 0 0 1 1
97 modules/connectors/connectorTicketsClickup.py 0 0 1 1
98 modules/datamodels/datamodelDocument.py 0 1 0 1
99 modules/datamodels/datamodelSecurity.py 0 0 1 1
100 modules/routes/routeAdmin.py 0 0 1 1
101 modules/routes/routeDataFiles.py 0 0 1 1
102 modules/workflows/processing/workflowProcessor.py 0 0 1 1
103 modules/workflows/processing/adaptive/adaptiveLearningEngine.py 0 0 1 1
104 modules/workflows/processing/core/actionExecutor.py 0 0 1 1
105 modules/workflows/processing/core/taskPlanner.py 0 0 1 1
106 modules/workflows/processing/modes/modeBase.py 0 0 1 1
107 modules/services/serviceAi/subDocumentGeneration.py 0 0 1 1

View file

@ -0,0 +1,184 @@
# Analysis: `processDocumentsWithContinuation` and Subfunctions Usage
## Executive Summary
**FINDING**: The function `processDocumentsWithContinuation` in `subDocumentProcessing.py` is **NOT USED** anywhere in the active codebase. The continuation chain was only referenced by the deleted `subDocumentGeneration.py` module.
---
## Main Function: `processDocumentsWithContinuation`
**Location**: `gateway/modules/services/serviceAi/subDocumentProcessing.py:303`
**Status**: ❌ **NOT USED**
### Usage Search Results
- ❌ No actual code calls to `.processDocumentsWithContinuation(`
- ⚠️ Only mentioned in documentation files:
- `wiki/poweron/appdoc/doc_system_function_relationship_ai.md` (documentation)
- `gateway/callAiWithDocumentGeneration_usage_analysis.md` (previous analysis - noted it was called by deleted code)
### Why It's Not Used
The only caller was `subDocumentGeneration._processDocumentsUnified()` which we just deleted. The current active codebase uses `subCoreAi.callAiDocuments()` which has its own continuation logic via `_callAiWithLooping()`.
---
## Function Call Chain Analysis
```
processDocumentsWithContinuation (line 303) - ❌ NOT USED
├─> _buildContinuationPrompt (line 319, 324) - ❌ ONLY USED HERE
└─> _processWithContinuationLoop (line 322, 373) - ❌ ONLY USED HERE
├─> _buildContinuationIterationPrompt (line 393, 459) - ❌ ONLY USED HERE
└─> processDocumentsPerChunkJsonWithPrompt (line 402) - ✅ USED ELSEWHERE
```
---
## Subfunction Analysis
### 1. `_buildContinuationPrompt`
**Location**: Line 324-371
**Status**: ✅ **USED** (but only internally)
**Called by**: `processDocumentsWithContinuation` (line 319)
**Effectively**: ❌ **UNUSED** (because parent function is unused)
**Internal Usage**:
- Called from `processDocumentsWithContinuation` at line 319
**Functionality**:
- Builds a prompt with continuation instructions
- Adds JSON structure requirements with `"continue": true/false` flag
- Adds `continuation_context` field specification
**Note**: This uses a different continuation pattern than `SubCoreAi._callAiWithLooping()`:
- This uses `"continue": true/false + "continuation_context"` for document sections
- SubCoreAi uses `buildContinuationContext()` with `last_raw_json`
---
### 2. `_processWithContinuationLoop`
**Location**: Line 373-457
**Status**: ✅ **USED** (but only internally)
**Called by**: `processDocumentsWithContinuation` (line 322)
**Effectively**: ❌ **UNUSED** (because parent function is unused)
**Internal Usage**:
- Called from `processDocumentsWithContinuation` at line 322
**External Dependencies**:
- Calls `self._buildContinuationIterationPrompt()` (line 393)
- Calls `self.processDocumentsPerChunkJsonWithPrompt()` (line 402)
**Functionality**:
- Implements continuation loop (max 10 iterations)
- Accumulates sections across iterations
- Checks `continue` flag and `continuation_context` to determine if more iterations needed
- Builds final result with accumulated sections
---
### 3. `_buildContinuationIterationPrompt`
**Location**: Line 459-498
**Status**: ✅ **USED** (but only internally)
**Called by**: `_processWithContinuationLoop` (line 393)
**Effectively**: ❌ **UNUSED** (because parent chain is unused)
**Internal Usage**:
- Called from `_processWithContinuationLoop` at line 393 (in loop, conditionally)
**Functionality**:
- Builds a prompt for continuation iteration with context
- Includes summary of previously generated content (last 3 sections)
- Includes continuation instructions with last section ID, element index, remaining requirements
---
### 4. `processDocumentsPerChunkJsonWithPrompt`
**Location**: Line 219-301
**Status**: ✅ **USED ELSEWHERE**
**Called by**:
- `_processWithContinuationLoop` (line 402)
- Also referenced in backup files (not active code)
**Internal Usage**:
- Called from `_processWithContinuationLoop` at line 402
**External Usage Search**:
- ✅ Used internally by continuation loop
- ⚠️ Referenced in `local/backup/backup_mainServiceAi.py.txt` (backup file, not active)
- ❌ Not used by any other active code
**Functionality**:
- Processes documents with per-chunk AI calls
- Uses a custom prompt instead of default extraction prompt
- Returns merged JSON document
**Note**: This function itself is only used by the continuation loop. However, it's a more general function that could be useful, so it's not "dead code" - it's just currently only used by unused code.
---
## Summary Table
| Function | Line | Status | Called By | Effectively Used? |
|----------|------|--------|-----------|-------------------|
| `processDocumentsWithContinuation` | 303 | ❌ Not used | (external) | ❌ No |
| `_buildContinuationPrompt` | 324 | ✅ Used internally | `processDocumentsWithContinuation:319` | ❌ No |
| `_processWithContinuationLoop` | 373 | ✅ Used internally | `processDocumentsWithContinuation:322` | ❌ No |
| `_buildContinuationIterationPrompt` | 459 | ✅ Used internally | `_processWithContinuationLoop:393` | ❌ No |
| `processDocumentsPerChunkJsonWithPrompt` | 219 | ✅ Used internally | `_processWithContinuationLoop:402` | ⚠️ **ONLY USED BY UNUSED CODE** |
---
## Current Active Implementation
The active continuation logic is in `subCoreAi.callAiDocuments()``_callAiWithLooping()`:
- Uses `buildGenerationPrompt()` with `continuationContext` parameter
- Uses `buildContinuationContext()` to build context from sections
- Different continuation pattern (uses `last_raw_json` instead of `continuation_context`)
---
## Dead Code Identification
**Completely Unused Chain** (can be safely removed):
1. ✅ `processDocumentsWithContinuation` - entry point, not called
2. ✅ `_buildContinuationPrompt` - only used by #1
3. ✅ `_processWithContinuationLoop` - only used by #1
4. ✅ `_buildContinuationIterationPrompt` - only used by #3
**Potentially Unused** (only used by dead code):
- ⚠️ `processDocumentsPerChunkJsonWithPrompt` - only caller is dead code, but function is general-purpose
---
## Recommendations
1. **Remove Dead Code Chain**: All four functions (`processDocumentsWithContinuation`, `_buildContinuationPrompt`, `_processWithContinuationLoop`, `_buildContinuationIterationPrompt`) can be safely removed.
2. **For `processDocumentsPerChunkJsonWithPrompt`**:
- **Option A**: Remove if not needed (it's only used by the dead continuation chain)
- **Option B**: Keep if it might be useful for future custom prompt processing
- **Recommendation**: Since it's a general-purpose function that could be useful, keep it but note that it's currently unused.
3. **If Keeping**: Document why this continuation logic exists but is unused, or mark as deprecated/legacy alternative to `_callAiWithLooping()`.
---
## Verification Commands
To verify these findings:
```bash
# Search for actual function calls (should return no results for the main function)
grep -r "\.processDocumentsWithContinuation(" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup
# Search for _buildContinuationPrompt usage (should only find the definition)
grep -r "_buildContinuationPrompt" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
# Search for _processWithContinuationLoop usage (should only find the definition)
grep -r "_processWithContinuationLoop" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
```

View file

@ -39,7 +39,7 @@ else:
# Import encryption functions
try:
from modules.shared.configuration import encrypt_value
from modules.shared.configuration import encryptValue
except ImportError as e:
print(f"Error: Could not import encryption functions from shared.configuration: {e}")
print(f"Make sure you're running this script from the gateway directory")
@ -254,7 +254,7 @@ def encrypt_all_secrets_in_file(file_path: Path, dry_run: bool = False, create_b
print(f" 🔐 Encrypting {key}...")
# Encrypt the value using the environment type from the file
encrypted_value = encrypt_value(value, file_env_type)
encrypted_value = encryptValue(value, file_env_type)
# Replace the line in the file content
new_line = f"{key} = {encrypted_value}\n"

View file

@ -30,7 +30,7 @@ from datetime import datetime
# Add the modules directory to the Python path
sys.path.insert(0, str(Path(__file__).parent / 'modules'))
from shared.configuration import encrypt_value, decrypt_value, _is_encrypted_value
from modules.shared.configuration import encryptValue, decryptValue, isEncryptedValue
def get_env_type_from_file(file_path: Path) -> str:
"""
@ -247,7 +247,7 @@ def encrypt_all_secrets_in_file(file_path: Path, env_type: str, dry_run: bool =
print(f" 🔐 Encrypting {key}...")
# Encrypt the value using the environment type from the file
encrypted_value = encrypt_value(value, file_env_type)
encrypted_value = encryptValue(value, file_env_type)
# Replace the line in the file content
new_line = f"{key} = {encrypted_value}\n"
@ -360,8 +360,8 @@ def main():
# Handle decryption
if args.decrypt:
if _is_encrypted_value(args.decrypt):
decrypted = decrypt_value(args.decrypt)
if isEncryptedValue(args.decrypt):
decrypted = decryptValue(args.decrypt)
print(f"Decrypted value: {decrypted}")
else:
print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)")
@ -411,7 +411,7 @@ def main():
return
# Encrypt the value
encrypted_value = encrypt_value(value_to_encrypt, args.env)
encrypted_value = encryptValue(value_to_encrypt, args.env)
print(f"\n✓ Encryption successful!")
print(f"Environment: {args.env or 'current'}")