refactored whole codebase for camelCase part 1 of 2
This commit is contained in:
parent
26b2109844
commit
c44fc92568
86 changed files with 3969 additions and 5005 deletions
242
analyze_naming_violations.py
Normal file
242
analyze_naming_violations.py
Normal file
|
|
@ -0,0 +1,242 @@
|
|||
"""
|
||||
Script to analyze codebase for snake_case naming violations that should be camelStyle.
|
||||
Excludes routes (decorated endpoint functions) and JSON field names.
|
||||
"""
|
||||
import ast
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
import csv
|
||||
|
||||
# Patterns to exclude (external library interfaces, etc.)
|
||||
EXCLUDE_PATTERNS = [
|
||||
r'@.*\.(get|post|put|delete|patch|options|head)', # FastAPI route decorators
|
||||
r'self\.(db|db_|model|orm)', # Database ORM attributes
|
||||
r'\.(objects|query|filter|get|all)', # ORM methods
|
||||
r'(request|response|response_model|status_code)', # FastAPI params
|
||||
r'(snake_case|kebab-case)', # String literals
|
||||
]
|
||||
|
||||
# External library attribute patterns (should not be changed)
|
||||
EXTERNAL_LIB_ATTRIBUTES = {
|
||||
'pydantic', 'fastapi', 'sqlalchemy', 'psycopg', 'requests',
|
||||
'aiohttp', 'azure', 'google', 'openai', 'anthropic', 'reportlab',
|
||||
'docx', 'pptx', 'openpyxl', 'json', 'logging', 'datetime', 'typing'
|
||||
}
|
||||
|
||||
def isRouteFile(filePath: str) -> bool:
|
||||
"""Check if file is a route file"""
|
||||
return 'routes' in filePath or 'route' in os.path.basename(filePath).lower()
|
||||
|
||||
def shouldExcludeName(name: str, context: str = "") -> bool:
|
||||
"""Check if a name should be excluded from analysis"""
|
||||
# Skip if it's a builtin or external library attribute
|
||||
if name.startswith('__') and name.endswith('__'):
|
||||
return True
|
||||
|
||||
# Skip if context suggests external library usage
|
||||
for pattern in EXCLUDE_PATTERNS:
|
||||
if re.search(pattern, context, re.IGNORECASE):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def isSnakeCase(name: str) -> bool:
|
||||
"""Check if a name is snake_case"""
|
||||
if not name or name.startswith('_'):
|
||||
return False
|
||||
# Check if contains underscore and is not all caps
|
||||
return '_' in name and not name.isupper()
|
||||
|
||||
def analyzeFile(filePath: str) -> Dict[str, List[str]]:
|
||||
"""Analyze a Python file for naming violations"""
|
||||
violations = {
|
||||
'functions': [],
|
||||
'parameters': [],
|
||||
'variables': []
|
||||
}
|
||||
|
||||
try:
|
||||
with open(filePath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
tree = ast.parse(content, filename=filePath)
|
||||
except (SyntaxError, UnicodeDecodeError):
|
||||
return violations
|
||||
|
||||
# Track current context
|
||||
currentClass = None
|
||||
inRouteDecorator = False
|
||||
|
||||
class NamingAnalyzer(ast.NodeVisitor):
|
||||
def __init__(self):
|
||||
self.violations = violations
|
||||
self.currentClass = None
|
||||
self.inRouteDecorator = False
|
||||
self.functionDefs = []
|
||||
|
||||
def visit_FunctionDef(self, node):
|
||||
# Check if this is a route endpoint (has FastAPI decorator)
|
||||
isRouteEndpoint = False
|
||||
for decorator in node.decorator_list:
|
||||
if isinstance(decorator, ast.Attribute):
|
||||
if decorator.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
|
||||
isRouteEndpoint = True
|
||||
break
|
||||
elif isinstance(decorator, ast.Call):
|
||||
if isinstance(decorator.func, ast.Attribute):
|
||||
if decorator.func.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
|
||||
isRouteEndpoint = True
|
||||
break
|
||||
|
||||
# Skip route endpoint function names
|
||||
# But we still need to check their parameters and variables
|
||||
funcName = node.name
|
||||
if not isRouteEndpoint and isSnakeCase(funcName) and not shouldExcludeName(funcName):
|
||||
self.violations['functions'].append(f"{funcName} (line {node.lineno})")
|
||||
|
||||
# Analyze parameters
|
||||
for arg in node.args.args:
|
||||
if arg.arg != 'self' and arg.arg != 'cls':
|
||||
paramName = arg.arg
|
||||
if isSnakeCase(paramName) and not shouldExcludeName(paramName):
|
||||
self.violations['parameters'].append(f"{paramName} in {funcName} (line {node.lineno})")
|
||||
|
||||
# Analyze function body for local variables
|
||||
for stmt in node.body:
|
||||
self.visit(stmt)
|
||||
|
||||
def visit_ClassDef(self, node):
|
||||
oldClass = self.currentClass
|
||||
self.currentClass = node.name
|
||||
self.generic_visit(node)
|
||||
self.currentClass = oldClass
|
||||
|
||||
def visit_Assign(self, node):
|
||||
for target in node.targets:
|
||||
if isinstance(target, ast.Name):
|
||||
varName = target.id
|
||||
# Skip constants (ALL_CAPS), builtins, and private (_xxx)
|
||||
if varName.isupper() or varName.startswith('_'):
|
||||
continue
|
||||
# Local variables should be camelStyle
|
||||
if isSnakeCase(varName) and not shouldExcludeName(varName):
|
||||
self.violations['variables'].append(f"{varName} (line {node.lineno})")
|
||||
|
||||
def visit_For(self, node):
|
||||
if isinstance(node.target, ast.Name):
|
||||
varName = node.target.id
|
||||
if isSnakeCase(varName) and not shouldExcludeName(varName):
|
||||
self.violations['variables'].append(f"{varName} (line {node.lineno})")
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_With(self, node):
|
||||
if node.items:
|
||||
for item in node.items:
|
||||
if item.optional_vars:
|
||||
if isinstance(item.optional_vars, ast.Name):
|
||||
varName = item.optional_vars.id
|
||||
if isSnakeCase(varName) and not shouldExcludeName(varName):
|
||||
self.violations['variables'].append(f"{varName} (line {node.lineno})")
|
||||
self.generic_visit(node)
|
||||
|
||||
analyzer = NamingAnalyzer()
|
||||
analyzer.visit(tree)
|
||||
|
||||
return violations
|
||||
|
||||
def analyzeCodebase(rootDir: str = 'gateway') -> Dict[str, Dict[str, int]]:
|
||||
"""Analyze entire codebase"""
|
||||
results = defaultdict(lambda: {
|
||||
'functions': 0,
|
||||
'parameters': 0,
|
||||
'variables': 0,
|
||||
'details': {
|
||||
'functions': [],
|
||||
'parameters': [],
|
||||
'variables': []
|
||||
}
|
||||
})
|
||||
|
||||
# Handle both absolute and relative paths
|
||||
rootPath = Path(rootDir)
|
||||
if not rootPath.exists():
|
||||
# Try relative to current directory
|
||||
rootPath = Path('.').resolve() / rootDir
|
||||
if not rootPath.exists():
|
||||
# Try just current directory if we're already in gateway
|
||||
rootPath = Path('.')
|
||||
|
||||
# Find all Python files
|
||||
for pyFile in rootPath.rglob('*.py'):
|
||||
# Skip route files for function name analysis (but analyze their internals)
|
||||
filePath = str(pyFile.relative_to(rootPath))
|
||||
|
||||
# Skip test files and special scripts
|
||||
if 'test' in filePath.lower() or 'tool_' in filePath or '__pycache__' in filePath:
|
||||
continue
|
||||
|
||||
violations = analyzeFile(str(pyFile))
|
||||
|
||||
# Check if there are any violations
|
||||
totalViolations = len(violations['functions']) + len(violations['parameters']) + len(violations['variables'])
|
||||
if totalViolations > 0:
|
||||
moduleName = filePath.replace('\\', '/')
|
||||
results[moduleName]['functions'] = len(violations['functions'])
|
||||
results[moduleName]['parameters'] = len(violations['parameters'])
|
||||
results[moduleName]['variables'] = len(violations['variables'])
|
||||
results[moduleName]['details'] = violations
|
||||
|
||||
return results
|
||||
|
||||
def generateCSV(results: Dict[str, Dict[str, int]], outputFile: str = 'naming_violations.csv'):
|
||||
"""Generate CSV report"""
|
||||
with open(outputFile, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(['Module', 'Function Names', 'Parameter Names', 'Variable Names', 'Total'])
|
||||
|
||||
# Sort by total violations
|
||||
sortedResults = sorted(
|
||||
results.items(),
|
||||
key=lambda x: x[1]['functions'] + x[1]['parameters'] + x[1]['variables'],
|
||||
reverse=True
|
||||
)
|
||||
|
||||
rowsWritten = 0
|
||||
for module, stats in sortedResults:
|
||||
total = stats['functions'] + stats['parameters'] + stats['variables']
|
||||
if total > 0:
|
||||
writer.writerow([
|
||||
module,
|
||||
stats['functions'],
|
||||
stats['parameters'],
|
||||
stats['variables'],
|
||||
total
|
||||
])
|
||||
rowsWritten += 1
|
||||
|
||||
if rowsWritten == 0:
|
||||
print("WARNING: No rows written to CSV despite finding violations!")
|
||||
|
||||
print(f"CSV report generated: {outputFile}")
|
||||
print(f"Total modules analyzed: {len(results)}")
|
||||
|
||||
# Print summary
|
||||
totalFuncs = sum(r['functions'] for r in results.values())
|
||||
totalParams = sum(r['parameters'] for r in results.values())
|
||||
totalVars = sum(r['variables'] for r in results.values())
|
||||
print(f"\nSummary:")
|
||||
print(f" Function names: {totalFuncs}")
|
||||
print(f" Parameter names: {totalParams}")
|
||||
print(f" Variable names: {totalVars}")
|
||||
print(f" Total violations: {totalFuncs + totalParams + totalVars}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("Analyzing codebase for naming violations...")
|
||||
results = analyzeCodebase('gateway')
|
||||
|
||||
# Write CSV to gateway directory
|
||||
outputPath = Path('gateway') / 'naming_violations_report.csv'
|
||||
generateCSV(results, str(outputPath))
|
||||
|
||||
62
app.py
62
app.py
|
|
@ -24,45 +24,45 @@ class DailyRotatingFileHandler(RotatingFileHandler):
|
|||
"""
|
||||
|
||||
def __init__(
|
||||
self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs
|
||||
self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs
|
||||
):
|
||||
self.log_dir = log_dir
|
||||
self.filename_prefix = filename_prefix
|
||||
self.current_date = None
|
||||
self.current_file = None
|
||||
self.logDir = logDir
|
||||
self.filenamePrefix = filenamePrefix
|
||||
self.currentDate = None
|
||||
self.currentFile = None
|
||||
|
||||
# Initialize with today's file
|
||||
self._update_file_if_needed()
|
||||
self._updateFileIfNeeded()
|
||||
|
||||
# Call parent constructor with current file
|
||||
super().__init__(
|
||||
self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs
|
||||
self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs
|
||||
)
|
||||
|
||||
def _update_file_if_needed(self):
|
||||
def _updateFileIfNeeded(self):
|
||||
"""Update the log file if the date has changed"""
|
||||
today = datetime.now().strftime("%Y%m%d")
|
||||
|
||||
if self.current_date != today:
|
||||
self.current_date = today
|
||||
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
|
||||
if self.currentDate != today:
|
||||
self.currentDate = today
|
||||
newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
|
||||
|
||||
if self.current_file != new_file:
|
||||
self.current_file = new_file
|
||||
if self.currentFile != newFile:
|
||||
self.currentFile = newFile
|
||||
return True
|
||||
return False
|
||||
|
||||
def emit(self, record):
|
||||
"""Emit a log record, switching files if date has changed"""
|
||||
# Check if we need to switch to a new file
|
||||
if self._update_file_if_needed():
|
||||
if self._updateFileIfNeeded():
|
||||
# Close current file and open new one
|
||||
if self.stream:
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
|
||||
# Update the baseFilename for the parent class
|
||||
self.baseFilename = self.current_file
|
||||
self.baseFilename = self.currentFile
|
||||
# Reopen the stream
|
||||
if not self.delay:
|
||||
self.stream = self._open()
|
||||
|
|
@ -200,10 +200,10 @@ def initLogging():
|
|||
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
|
||||
|
||||
fileHandler = DailyRotatingFileHandler(
|
||||
log_dir=logDir,
|
||||
filename_prefix="log_app",
|
||||
max_bytes=rotationSize,
|
||||
backup_count=backupCount,
|
||||
logDir=logDir,
|
||||
filenamePrefix="log_app",
|
||||
maxBytes=rotationSize,
|
||||
backupCount=backupCount,
|
||||
encoding="utf-8",
|
||||
)
|
||||
fileHandler.setFormatter(fileFormatter)
|
||||
|
|
@ -252,7 +252,7 @@ def initLogging():
|
|||
)
|
||||
|
||||
|
||||
def make_sqlalchemy_db_url() -> str:
|
||||
def makeSqlalchemyDbUrl() -> str:
|
||||
host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost")
|
||||
port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432")
|
||||
db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway")
|
||||
|
|
@ -299,17 +299,17 @@ app = FastAPI(
|
|||
|
||||
# Configure OpenAPI security scheme for Swagger UI
|
||||
# This adds the "Authorize" button to the /docs page
|
||||
security_scheme = HTTPBearer()
|
||||
securityScheme = HTTPBearer()
|
||||
app.openapi_schema = None # Reset schema to regenerate with security
|
||||
|
||||
|
||||
def custom_openapi():
|
||||
def customOpenapi():
|
||||
if app.openapi_schema:
|
||||
return app.openapi_schema
|
||||
|
||||
from fastapi.openapi.utils import get_openapi
|
||||
|
||||
openapi_schema = get_openapi(
|
||||
openapiSchema = get_openapi(
|
||||
title=app.title,
|
||||
version="1.0.0",
|
||||
description=app.description,
|
||||
|
|
@ -317,7 +317,7 @@ def custom_openapi():
|
|||
)
|
||||
|
||||
# Add security scheme definition
|
||||
openapi_schema["components"]["securitySchemes"] = {
|
||||
openapiSchema["components"]["securitySchemes"] = {
|
||||
"BearerAuth": {
|
||||
"type": "http",
|
||||
"scheme": "bearer",
|
||||
|
|
@ -328,20 +328,20 @@ def custom_openapi():
|
|||
|
||||
# Apply security globally to all endpoints
|
||||
# Individual endpoints can override this if needed
|
||||
openapi_schema["security"] = [{"BearerAuth": []}]
|
||||
openapiSchema["security"] = [{"BearerAuth": []}]
|
||||
|
||||
app.openapi_schema = openapi_schema
|
||||
app.openapi_schema = openapiSchema
|
||||
return app.openapi_schema
|
||||
|
||||
|
||||
app.openapi = custom_openapi
|
||||
app.openapi = customOpenapi
|
||||
|
||||
|
||||
# Parse CORS origins from environment variable
|
||||
def get_allowed_origins():
|
||||
origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
|
||||
def getAllowedOrigins():
|
||||
originsStr = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
|
||||
# Split by comma and strip whitespace
|
||||
origins = [origin.strip() for origin in origins_str.split(",")]
|
||||
origins = [origin.strip() for origin in originsStr.split(",")]
|
||||
logger.info(f"CORS allowed origins: {origins}")
|
||||
return origins
|
||||
|
||||
|
|
@ -349,7 +349,7 @@ def get_allowed_origins():
|
|||
# CORS configuration using environment variables
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=get_allowed_origins(),
|
||||
allow_origins=getAllowedOrigins(),
|
||||
allow_credentials=True,
|
||||
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
||||
allow_headers=["*"],
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from pydantic import BaseModel
|
|||
import threading
|
||||
import time
|
||||
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -232,7 +232,7 @@ class DatabaseConnector:
|
|||
raise ValueError(f"Record ID mismatch: file name ID ({recordId}) does not match record ID ({record['id']})")
|
||||
|
||||
# Add metadata
|
||||
currentTime = get_utc_timestamp()
|
||||
currentTime = getUtcTimestamp()
|
||||
if "_createdAt" not in record:
|
||||
record["_createdAt"] = currentTime
|
||||
record["_createdBy"] = self.userId
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import uuid
|
|||
from pydantic import BaseModel, Field
|
||||
import threading
|
||||
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -287,7 +287,7 @@ class DatabaseConnector:
|
|||
INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt")
|
||||
VALUES (%s, %s, %s)
|
||||
""",
|
||||
(table_name, initial_id, get_utc_timestamp()),
|
||||
(table_name, initial_id, getUtcTimestamp()),
|
||||
)
|
||||
|
||||
self.connection.commit()
|
||||
|
|
@ -611,7 +611,7 @@ class DatabaseConnector:
|
|||
raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}")
|
||||
|
||||
# Add metadata
|
||||
currentTime = get_utc_timestamp()
|
||||
currentTime = getUtcTimestamp()
|
||||
if "_createdAt" not in record:
|
||||
record["_createdAt"] = currentTime
|
||||
record["_createdBy"] = self.userId
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class ConnectorTicketClickup(TicketBase):
|
|||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
async def read_attributes(self) -> list[TicketFieldAttribute]:
|
||||
async def readAttributes(self) -> list[TicketFieldAttribute]:
|
||||
"""Fetch field attributes. Uses list custom fields if listId provided; else basic fields."""
|
||||
attributes: list[TicketFieldAttribute] = []
|
||||
try:
|
||||
|
|
@ -65,7 +65,7 @@ class ConnectorTicketClickup(TicketBase):
|
|||
logger.error(f"ClickUp read_attributes error: {e}")
|
||||
return attributes
|
||||
|
||||
async def read_tasks(self, *, limit: int = 0) -> list[dict]:
|
||||
async def readTasks(self, *, limit: int = 0) -> list[dict]:
|
||||
"""Read tasks from ClickUp, always returning full task records.
|
||||
If list_id is set, read from that list; otherwise read from team.
|
||||
"""
|
||||
|
|
@ -102,7 +102,7 @@ class ConnectorTicketClickup(TicketBase):
|
|||
logger.error(f"ClickUp read_tasks error: {e}")
|
||||
return tasks
|
||||
|
||||
async def write_tasks(self, tasklist: list[dict]) -> None:
|
||||
async def writeTasks(self, tasklist: list[dict]) -> None:
|
||||
"""Update tasks in ClickUp. Expects each item to contain {'ID' or 'id' or 'task_id', 'fields': {...}}"""
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class ConnectorTicketJira(TicketBase):
|
|||
self.ticketType = ticketType
|
||||
|
||||
|
||||
async def read_attributes(self) -> list[TicketFieldAttribute]:
|
||||
async def readAttributes(self) -> list[TicketFieldAttribute]:
|
||||
"""
|
||||
Read field attributes from Jira by querying for a single issue
|
||||
and extracting the field mappings.
|
||||
|
|
@ -130,7 +130,7 @@ class ConnectorTicketJira(TicketBase):
|
|||
logger.error(f"Error while calling fields API: {str(e)}")
|
||||
return []
|
||||
|
||||
async def read_tasks(self, *, limit: int = 0) -> list[dict]:
|
||||
async def readTasks(self, *, limit: int = 0) -> list[dict]:
|
||||
"""
|
||||
Read tasks from Jira with pagination support.
|
||||
|
||||
|
|
@ -253,7 +253,7 @@ class ConnectorTicketJira(TicketBase):
|
|||
logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}")
|
||||
raise
|
||||
|
||||
async def write_tasks(self, tasklist: list[dict]) -> None:
|
||||
async def writeTasks(self, tasklist: list[dict]) -> None:
|
||||
"""
|
||||
Write/update tasks to Jira.
|
||||
|
||||
|
|
|
|||
|
|
@ -26,18 +26,18 @@ class ConnectorGoogleSpeech:
|
|||
"""
|
||||
try:
|
||||
# Get JSON key from config.ini
|
||||
api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
|
||||
apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
|
||||
|
||||
if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
|
||||
if not apiKey or apiKey == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
|
||||
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key")
|
||||
|
||||
# Parse the JSON key and set up authentication
|
||||
try:
|
||||
credentials_info = json.loads(api_key)
|
||||
credentialsInfo = json.loads(apiKey)
|
||||
|
||||
# Create credentials object directly (no file needed!)
|
||||
from google.oauth2 import service_account
|
||||
credentials = service_account.Credentials.from_service_account_info(credentials_info)
|
||||
credentials = service_account.Credentials.from_service_account_info(credentialsInfo)
|
||||
|
||||
logger.info("✅ Using Google Speech credentials from config.ini")
|
||||
|
||||
|
|
@ -55,8 +55,8 @@ class ConnectorGoogleSpeech:
|
|||
logger.error(f"❌ Failed to initialize Google Cloud clients: {e}")
|
||||
raise
|
||||
|
||||
async def speech_to_text(self, audio_content: bytes, language: str = "de-DE",
|
||||
sample_rate: int = None, channels: int = None) -> Dict:
|
||||
async def speech_to_text(self, audioContent: bytes, language: str = "de-DE",
|
||||
sampleRate: int = None, channels: int = None) -> Dict:
|
||||
"""
|
||||
Convert speech to text using Google Cloud Speech-to-Text API.
|
||||
|
||||
|
|
@ -71,8 +71,8 @@ class ConnectorGoogleSpeech:
|
|||
"""
|
||||
try:
|
||||
# Auto-detect audio format if not provided
|
||||
if sample_rate is None or channels is None:
|
||||
validation = self.validate_audio_format(audio_content)
|
||||
if sampleRate is None or channels is None:
|
||||
validation = self.validate_audio_format(audioContent)
|
||||
if not validation["valid"]:
|
||||
return {
|
||||
"success": False,
|
||||
|
|
@ -80,59 +80,59 @@ class ConnectorGoogleSpeech:
|
|||
"confidence": 0.0,
|
||||
"error": f"Invalid audio format: {validation.get('error', 'Unknown error')}"
|
||||
}
|
||||
sample_rate = validation["sample_rate"]
|
||||
sampleRate = validation["sample_rate"]
|
||||
channels = validation["channels"]
|
||||
audio_format = validation["format"]
|
||||
logger.info(f"Auto-detected audio: {audio_format}, {sample_rate}Hz, {channels}ch")
|
||||
audioFormat = validation["format"]
|
||||
logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch")
|
||||
|
||||
logger.info(f"Processing audio with Google Cloud Speech-to-Text")
|
||||
logger.info(f"Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch")
|
||||
logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch")
|
||||
|
||||
# Configure audio settings
|
||||
audio = speech.RecognitionAudio(content=audio_content)
|
||||
audio = speech.RecognitionAudio(content=audioContent)
|
||||
|
||||
# Determine encoding based on detected format
|
||||
# Google Cloud Speech API has specific requirements for different formats
|
||||
if audio_format == "webm_opus":
|
||||
if audioFormat == "webm_opus":
|
||||
# For WEBM OPUS, we need to ensure proper format
|
||||
encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
|
||||
# WEBM_OPUS requires specific sample rate handling - must match header
|
||||
if sample_rate != 48000:
|
||||
logger.warning(f"WEBM_OPUS detected but sample rate is {sample_rate}, adjusting to 48000")
|
||||
sample_rate = 48000
|
||||
if sampleRate != 48000:
|
||||
logger.warning(f"WEBM_OPUS detected but sample rate is {sampleRate}, adjusting to 48000")
|
||||
sampleRate = 48000
|
||||
# For WEBM_OPUS, don't specify sample_rate_hertz in config
|
||||
# Google Cloud will read it from the WEBM header
|
||||
use_sample_rate = False
|
||||
elif audio_format == "linear16":
|
||||
useSampleRate = False
|
||||
elif audioFormat == "linear16":
|
||||
# For LINEAR16 format (PCM)
|
||||
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
||||
# Ensure sample rate is reasonable
|
||||
if sample_rate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
|
||||
logger.warning(f"Unusual sample rate {sample_rate}, adjusting to 16000")
|
||||
sample_rate = 16000
|
||||
use_sample_rate = True
|
||||
elif audio_format == "mp3":
|
||||
if sampleRate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
|
||||
logger.warning(f"Unusual sample rate {sampleRate}, adjusting to 16000")
|
||||
sampleRate = 16000
|
||||
useSampleRate = True
|
||||
elif audioFormat == "mp3":
|
||||
# For MP3 format
|
||||
encoding = speech.RecognitionConfig.AudioEncoding.MP3
|
||||
use_sample_rate = True
|
||||
elif audio_format == "flac":
|
||||
useSampleRate = True
|
||||
elif audioFormat == "flac":
|
||||
# For FLAC format
|
||||
encoding = speech.RecognitionConfig.AudioEncoding.FLAC
|
||||
use_sample_rate = True
|
||||
elif audio_format == "wav":
|
||||
useSampleRate = True
|
||||
elif audioFormat == "wav":
|
||||
# For WAV format
|
||||
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
||||
use_sample_rate = True
|
||||
useSampleRate = True
|
||||
else:
|
||||
# For unknown formats, try LINEAR16 as fallback
|
||||
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
||||
sample_rate = 16000 # Use standard sample rate
|
||||
sampleRate = 16000 # Use standard sample rate
|
||||
channels = 1 # Use mono
|
||||
use_sample_rate = True
|
||||
logger.warning(f"Unknown audio format '{audio_format}', using LINEAR16 encoding with 16000Hz")
|
||||
useSampleRate = True
|
||||
logger.warning(f"Unknown audio format '{audioFormat}', using LINEAR16 encoding with 16000Hz")
|
||||
|
||||
# Build config based on format requirements
|
||||
config_params = {
|
||||
configParams = {
|
||||
"encoding": encoding,
|
||||
"audio_channel_count": channels,
|
||||
"language_code": language,
|
||||
|
|
@ -145,13 +145,13 @@ class ConnectorGoogleSpeech:
|
|||
}
|
||||
|
||||
# Only add sample_rate_hertz if needed (not for WEBM_OPUS)
|
||||
if use_sample_rate:
|
||||
config_params["sample_rate_hertz"] = sample_rate
|
||||
logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sample_rate}, channels={channels}, language={language}")
|
||||
if useSampleRate:
|
||||
configParams["sample_rate_hertz"] = sampleRate
|
||||
logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sampleRate}, channels={channels}, language={language}")
|
||||
else:
|
||||
logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}")
|
||||
|
||||
config = speech.RecognitionConfig(**config_params)
|
||||
config = speech.RecognitionConfig(**configParams)
|
||||
|
||||
# Perform speech recognition
|
||||
logger.info("Sending audio to Google Cloud Speech-to-Text...")
|
||||
|
|
@ -162,12 +162,12 @@ class ConnectorGoogleSpeech:
|
|||
response = self.speech_client.recognize(config=config, audio=audio)
|
||||
logger.debug(f"Google Cloud response: {response}")
|
||||
|
||||
except Exception as api_error:
|
||||
logger.error(f"Google Cloud API error: {api_error}")
|
||||
except Exception as apiError:
|
||||
logger.error(f"Google Cloud API error: {apiError}")
|
||||
# Try with different encoding as fallback
|
||||
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
|
||||
logger.info("Trying fallback with LINEAR16 encoding...")
|
||||
fallback_config = speech.RecognitionConfig(
|
||||
fallbackConfig = speech.RecognitionConfig(
|
||||
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
sample_rate_hertz=16000, # Use standard sample rate
|
||||
audio_channel_count=1,
|
||||
|
|
@ -177,13 +177,13 @@ class ConnectorGoogleSpeech:
|
|||
)
|
||||
|
||||
try:
|
||||
response = self.speech_client.recognize(config=fallback_config, audio=audio)
|
||||
response = self.speech_client.recognize(config=fallbackConfig, audio=audio)
|
||||
logger.debug(f"Google Cloud fallback response: {response}")
|
||||
except Exception as fallback_error:
|
||||
logger.error(f"Google Cloud fallback error: {fallback_error}")
|
||||
raise api_error
|
||||
except Exception as fallbackError:
|
||||
logger.error(f"Google Cloud fallback error: {fallbackError}")
|
||||
raise apiError
|
||||
else:
|
||||
raise api_error
|
||||
raise apiError
|
||||
|
||||
# Process results
|
||||
if response.results:
|
||||
|
|
@ -234,18 +234,18 @@ class ConnectorGoogleSpeech:
|
|||
|
||||
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
|
||||
# For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts
|
||||
if audio_format != "webm_opus":
|
||||
if audioFormat != "webm_opus":
|
||||
# Try LINEAR16 with detected sample rate for non-WEBM formats
|
||||
fallback_configs.append({
|
||||
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
"sample_rate": sample_rate,
|
||||
"sample_rate": sampleRate,
|
||||
"channels": channels,
|
||||
"use_sample_rate": True,
|
||||
"description": f"LINEAR16 with {sample_rate}Hz"
|
||||
"description": f"LINEAR16 with {sampleRate}Hz"
|
||||
})
|
||||
|
||||
# For WEBM_OPUS, only try compatible sample rates or skip sample rate specification
|
||||
if audio_format == "webm_opus":
|
||||
if audioFormat == "webm_opus":
|
||||
# Try WEBM_OPUS without sample rate specification (let Google read from header)
|
||||
fallback_configs.append({
|
||||
"encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
|
||||
|
|
@ -273,7 +273,7 @@ class ConnectorGoogleSpeech:
|
|||
else:
|
||||
# For other formats, try standard sample rates
|
||||
for std_rate in [16000, 8000, 22050, 44100]:
|
||||
if std_rate != sample_rate:
|
||||
if std_rate != sampleRate:
|
||||
fallback_configs.append({
|
||||
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
"sample_rate": std_rate,
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
from typing import List, Dict, Any, Optional
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import register_model_labels
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
import uuid
|
||||
|
||||
|
||||
|
|
@ -26,7 +26,7 @@ class ChatStat(BaseModel):
|
|||
priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ChatStat",
|
||||
{"en": "Chat Statistics", "fr": "Statistiques de chat"},
|
||||
{
|
||||
|
|
@ -51,7 +51,7 @@ class ChatLog(BaseModel):
|
|||
message: str = Field(description="Log message")
|
||||
type: str = Field(description="Log type (info, warning, error, etc.)")
|
||||
timestamp: float = Field(
|
||||
default_factory=get_utc_timestamp,
|
||||
default_factory=getUtcTimestamp,
|
||||
description="When the log entry was created (UTC timestamp in seconds)",
|
||||
)
|
||||
status: Optional[str] = Field(None, description="Status of the log entry")
|
||||
|
|
@ -63,7 +63,7 @@ class ChatLog(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ChatLog",
|
||||
{"en": "Chat Log", "fr": "Journal de chat"},
|
||||
{
|
||||
|
|
@ -96,7 +96,7 @@ class ChatDocument(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ChatDocument",
|
||||
{"en": "Chat Document", "fr": "Document de chat"},
|
||||
{
|
||||
|
|
@ -133,7 +133,7 @@ class ContentMetadata(BaseModel):
|
|||
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ContentMetadata",
|
||||
{"en": "Content Metadata", "fr": "Métadonnées du contenu"},
|
||||
{
|
||||
|
|
@ -157,7 +157,7 @@ class ContentItem(BaseModel):
|
|||
metadata: ContentMetadata = Field(description="Content metadata")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ContentItem",
|
||||
{"en": "Content Item", "fr": "Élément de contenu"},
|
||||
{
|
||||
|
|
@ -175,7 +175,7 @@ class ChatContentExtracted(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ChatContentExtracted",
|
||||
{"en": "Extracted Content", "fr": "Contenu extrait"},
|
||||
{
|
||||
|
|
@ -209,7 +209,7 @@ class ChatMessage(BaseModel):
|
|||
description="Sequence number of the message (set automatically)"
|
||||
)
|
||||
publishedAt: float = Field(
|
||||
default_factory=get_utc_timestamp,
|
||||
default_factory=getUtcTimestamp,
|
||||
description="When the message was published (UTC timestamp in seconds)",
|
||||
)
|
||||
success: Optional[bool] = Field(
|
||||
|
|
@ -235,7 +235,7 @@ class ChatMessage(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ChatMessage",
|
||||
{"en": "Chat Message", "fr": "Message de chat"},
|
||||
{
|
||||
|
|
@ -331,14 +331,14 @@ class ChatWorkflow(BaseModel):
|
|||
frontend_required=False,
|
||||
)
|
||||
lastActivity: float = Field(
|
||||
default_factory=get_utc_timestamp,
|
||||
default_factory=getUtcTimestamp,
|
||||
description="Timestamp of last activity (UTC timestamp in seconds)",
|
||||
frontend_type="timestamp",
|
||||
frontend_readonly=True,
|
||||
frontend_required=False,
|
||||
)
|
||||
startedAt: float = Field(
|
||||
default_factory=get_utc_timestamp,
|
||||
default_factory=getUtcTimestamp,
|
||||
description="When the workflow started (UTC timestamp in seconds)",
|
||||
frontend_type="timestamp",
|
||||
frontend_readonly=True,
|
||||
|
|
@ -395,7 +395,7 @@ class ChatWorkflow(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ChatWorkflow",
|
||||
{"en": "Chat Workflow", "fr": "Flux de travail de chat"},
|
||||
{
|
||||
|
|
@ -426,7 +426,7 @@ class UserInputRequest(BaseModel):
|
|||
userLanguage: str = Field(default="en", description="User's preferred language")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"UserInputRequest",
|
||||
{"en": "User Input Request", "fr": "Demande de saisie utilisateur"},
|
||||
{
|
||||
|
|
@ -445,7 +445,7 @@ class ActionDocument(BaseModel):
|
|||
mimeType: str = Field(description="MIME type of the document")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ActionDocument",
|
||||
{"en": "Action Document", "fr": "Document d'action"},
|
||||
{
|
||||
|
|
@ -485,7 +485,7 @@ class ActionResult(BaseModel):
|
|||
return cls(success=False, documents=documents or [], error=error)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ActionResult",
|
||||
{"en": "Action Result", "fr": "Résultat de l'action"},
|
||||
{
|
||||
|
|
@ -504,7 +504,7 @@ class ActionSelection(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ActionSelection",
|
||||
{"en": "Action Selection", "fr": "Sélection d'action"},
|
||||
{
|
||||
|
|
@ -520,7 +520,7 @@ class ActionParameters(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ActionParameters",
|
||||
{"en": "Action Parameters", "fr": "Paramètres d'action"},
|
||||
{
|
||||
|
|
@ -535,7 +535,7 @@ class ObservationPreview(BaseModel):
|
|||
snippet: str = Field(description="Short snippet or summary")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ObservationPreview",
|
||||
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
|
||||
{
|
||||
|
|
@ -558,7 +558,7 @@ class Observation(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"Observation",
|
||||
{"en": "Observation", "fr": "Observation"},
|
||||
{
|
||||
|
|
@ -579,7 +579,7 @@ class TaskStatus(str, Enum):
|
|||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"TaskStatus",
|
||||
{"en": "Task Status", "fr": "Statut de la tâche"},
|
||||
{
|
||||
|
|
@ -599,7 +599,7 @@ class DocumentExchange(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"DocumentExchange",
|
||||
{"en": "Document Exchange", "fr": "Échange de documents"},
|
||||
{
|
||||
|
|
@ -650,7 +650,7 @@ class ActionItem(BaseModel):
|
|||
self.error = error_message
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ActionItem",
|
||||
{"en": "Task Action", "fr": "Action de tâche"},
|
||||
{
|
||||
|
|
@ -683,7 +683,7 @@ class TaskResult(BaseModel):
|
|||
error: Optional[str] = Field(None, description="Error message if task failed")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"TaskResult",
|
||||
{"en": "Task Result", "fr": "Résultat de tâche"},
|
||||
{
|
||||
|
|
@ -728,7 +728,7 @@ class TaskItem(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"TaskItem",
|
||||
{"en": "Task", "fr": "Tâche"},
|
||||
{
|
||||
|
|
@ -758,7 +758,7 @@ class TaskStep(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"TaskStep",
|
||||
{"en": "Task Step", "fr": "Étape de tâche"},
|
||||
{
|
||||
|
|
@ -805,7 +805,7 @@ class TaskHandover(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"TaskHandover",
|
||||
{"en": "Task Handover", "fr": "Transfert de tâche"},
|
||||
{
|
||||
|
|
@ -879,7 +879,7 @@ class ReviewResult(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"ReviewResult",
|
||||
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
|
||||
{
|
||||
|
|
@ -904,7 +904,7 @@ class TaskPlan(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"TaskPlan",
|
||||
{"en": "Task Plan", "fr": "Plan de tâches"},
|
||||
{
|
||||
|
|
@ -927,7 +927,7 @@ class PromptPlaceholder(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"PromptPlaceholder",
|
||||
{"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"},
|
||||
{
|
||||
|
|
@ -943,7 +943,7 @@ class PromptBundle(BaseModel):
|
|||
placeholders: List[PromptPlaceholder] = Field(default_factory=list)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"PromptBundle",
|
||||
{"en": "Prompt Bundle", "fr": "Lot d'invite"},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -81,11 +81,11 @@ class StructuredDocument(BaseModel):
|
|||
summary: Optional[str] = Field(default=None, description="Document summary")
|
||||
tags: List[str] = Field(default_factory=list, description="Document tags")
|
||||
|
||||
def get_sections_by_type(self, content_type: str) -> List[DocumentSection]:
|
||||
def getSectionsByType(self, content_type: str) -> List[DocumentSection]:
|
||||
"""Get all sections of a specific content type."""
|
||||
return [section for section in self.sections if section.content_type == content_type]
|
||||
|
||||
def get_all_tables(self) -> List[TableData]:
|
||||
def getAllTables(self) -> List[TableData]:
|
||||
"""Get all table data from the document."""
|
||||
tables = []
|
||||
for section in self.sections:
|
||||
|
|
@ -94,7 +94,7 @@ class StructuredDocument(BaseModel):
|
|||
tables.append(element)
|
||||
return tables
|
||||
|
||||
def get_all_lists(self) -> List[BulletList]:
|
||||
def getAllLists(self) -> List[BulletList]:
|
||||
"""Get all lists from the document."""
|
||||
lists = []
|
||||
for section in self.sections:
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
from typing import Dict, Any, Optional, Union
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import register_model_labels
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
import uuid
|
||||
import base64
|
||||
|
||||
|
|
@ -15,9 +15,9 @@ class FileItem(BaseModel):
|
|||
mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False)
|
||||
creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"FileItem",
|
||||
{"en": "File Item", "fr": "Élément de fichier"},
|
||||
{
|
||||
|
|
@ -45,7 +45,7 @@ class FilePreview(BaseModel):
|
|||
if isinstance(data.get("content"), bytes):
|
||||
data["content"] = base64.b64encode(data["content"]).decode("utf-8")
|
||||
return data
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"FilePreview",
|
||||
{"en": "File Preview", "fr": "Aperçu du fichier"},
|
||||
{
|
||||
|
|
@ -62,7 +62,7 @@ class FileData(BaseModel):
|
|||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
||||
data: str = Field(description="File data content")
|
||||
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"FileData",
|
||||
{"en": "File Data", "fr": "Données de fichier"},
|
||||
{
|
||||
|
|
|
|||
90
modules/datamodels/datamodelJson.py
Normal file
90
modules/datamodels/datamodelJson.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
"""
|
||||
Unified JSON document schema and helpers used by both generation prompts and renderers.
|
||||
|
||||
This defines a single canonical template and the supported section types.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
# Canonical list of supported section types across the system
|
||||
supportedSectionTypes: List[str] = [
|
||||
"table",
|
||||
"bullet_list",
|
||||
"heading",
|
||||
"paragraph",
|
||||
"code_block",
|
||||
"image",
|
||||
]
|
||||
|
||||
# Canonical JSON template used for AI generation (documents array + sections)
|
||||
# Rendering pipelines can select the first document and read its sections.
|
||||
jsonTemplateDocument: str = """{
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_1",
|
||||
"title": "{{DOCUMENT_TITLE}}",
|
||||
"filename": "document.json",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_heading_example",
|
||||
"content_type": "heading",
|
||||
"elements": [
|
||||
{"level": 1, "text": "Heading Text"}
|
||||
],
|
||||
"order": 0
|
||||
},
|
||||
{
|
||||
"id": "section_paragraph_example",
|
||||
"content_type": "paragraph",
|
||||
"elements": [
|
||||
{"text": "Paragraph text content"}
|
||||
],
|
||||
"order": 0
|
||||
},
|
||||
{
|
||||
"id": "section_bullet_list_example",
|
||||
"content_type": "bullet_list",
|
||||
"elements": [
|
||||
{
|
||||
"items": ["Item 1", "Item 2"]
|
||||
}
|
||||
],
|
||||
"order": 0
|
||||
},
|
||||
{
|
||||
"id": "section_table_example",
|
||||
"content_type": "table",
|
||||
"elements": [
|
||||
{
|
||||
"headers": ["Column 1", "Column 2"],
|
||||
"rows": [
|
||||
["Row 1 Col 1", "Row 1 Col 2"],
|
||||
["Row 2 Col 1", "Row 2 Col 2"]
|
||||
],
|
||||
"caption": "Table caption"
|
||||
}
|
||||
],
|
||||
"order": 0
|
||||
},
|
||||
{
|
||||
"id": "section_code_example",
|
||||
"content_type": "code_block",
|
||||
"elements": [
|
||||
{
|
||||
"code": "function example() { return true; }",
|
||||
"language": "javascript"
|
||||
}
|
||||
],
|
||||
"order": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
import uuid
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import register_model_labels
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
|
||||
|
||||
class DataNeutraliserConfig(BaseModel):
|
||||
|
|
@ -14,7 +14,7 @@ class DataNeutraliserConfig(BaseModel):
|
|||
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
|
||||
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
||||
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"DataNeutraliserConfig",
|
||||
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
|
||||
{
|
||||
|
|
@ -35,7 +35,7 @@ class DataNeutralizerAttributes(BaseModel):
|
|||
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"DataNeutralizerAttributes",
|
||||
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import register_model_labels
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
from .datamodelUam import AuthAuthority
|
||||
from enum import Enum
|
||||
import uuid
|
||||
|
|
@ -51,7 +51,7 @@ class Token(BaseModel):
|
|||
use_enum_values = True
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"Token",
|
||||
{"en": "Token", "fr": "Jeton"},
|
||||
{
|
||||
|
|
@ -95,7 +95,7 @@ class AuthEvent(BaseModel):
|
|||
frontend_required=True,
|
||||
)
|
||||
timestamp: float = Field(
|
||||
default_factory=get_utc_timestamp,
|
||||
default_factory=getUtcTimestamp,
|
||||
description="Unix timestamp when the event occurred",
|
||||
frontend_type="datetime",
|
||||
frontend_readonly=True,
|
||||
|
|
@ -131,7 +131,7 @@ class AuthEvent(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"AuthEvent",
|
||||
{"en": "Authentication Event", "fr": "Événement d'authentification"},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -11,12 +11,12 @@ class TicketFieldAttribute(BaseModel):
|
|||
|
||||
class TicketBase(ABC):
|
||||
@abstractmethod
|
||||
async def read_attributes(self) -> list[TicketFieldAttribute]: ...
|
||||
async def readAttributes(self) -> list[TicketFieldAttribute]: ...
|
||||
|
||||
@abstractmethod
|
||||
async def read_tasks(self, *, limit: int = 0) -> list[dict]: ...
|
||||
async def readTasks(self, *, limit: int = 0) -> list[dict]: ...
|
||||
|
||||
@abstractmethod
|
||||
async def write_tasks(self, tasklist: list[dict]) -> None: ...
|
||||
async def writeTasks(self, tasklist: list[dict]) -> None: ...
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ import uuid
|
|||
from typing import Optional
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel, Field, EmailStr
|
||||
from modules.shared.attributeUtils import register_model_labels
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
|
||||
|
||||
class AuthAuthority(str, Enum):
|
||||
|
|
@ -34,7 +34,7 @@ class Mandate(BaseModel):
|
|||
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
|
||||
])
|
||||
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"Mandate",
|
||||
{"en": "Mandate", "fr": "Mandat"},
|
||||
{
|
||||
|
|
@ -62,8 +62,8 @@ class UserConnection(BaseModel):
|
|||
{"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}},
|
||||
{"value": "pending", "label": {"en": "Pending", "fr": "En attente"}},
|
||||
])
|
||||
connectedAt: float = Field(default_factory=get_utc_timestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
lastChecked: float = Field(default_factory=get_utc_timestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
connectedAt: float = Field(default_factory=getUtcTimestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
lastChecked: float = Field(default_factory=getUtcTimestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[
|
||||
{"value": "active", "label": {"en": "Active", "fr": "Actif"}},
|
||||
|
|
@ -71,7 +71,7 @@ class UserConnection(BaseModel):
|
|||
{"value": "none", "label": {"en": "None", "fr": "Aucun"}},
|
||||
])
|
||||
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"UserConnection",
|
||||
{"en": "User Connection", "fr": "Connexion utilisateur"},
|
||||
{
|
||||
|
|
@ -113,7 +113,7 @@ class User(BaseModel):
|
|||
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
|
||||
])
|
||||
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"User",
|
||||
{"en": "User", "fr": "Utilisateur"},
|
||||
{
|
||||
|
|
@ -131,7 +131,7 @@ register_model_labels(
|
|||
|
||||
class UserInDB(User):
|
||||
hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"UserInDB",
|
||||
{"en": "User Access", "fr": "Accès de l'utilisateur"},
|
||||
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"""Utility datamodels: Prompt."""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import register_model_labels
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
import uuid
|
||||
|
||||
|
||||
|
|
@ -10,7 +10,7 @@ class Prompt(BaseModel):
|
|||
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
|
||||
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"Prompt",
|
||||
{"en": "Prompt", "fr": "Invite"},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
"""Voice settings datamodel."""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import register_model_labels
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.attributeUtils import registerModelLabels
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
import uuid
|
||||
|
||||
|
||||
|
|
@ -15,11 +15,11 @@ class VoiceSettings(BaseModel):
|
|||
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True)
|
||||
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
|
||||
targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False)
|
||||
creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
lastModified: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
lastModified: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
registerModelLabels(
|
||||
"VoiceSettings",
|
||||
{"en": "Voice Settings", "fr": "Paramètres vocaux"},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -43,10 +43,6 @@ class NeutralizationPlayground:
|
|||
'errors': errors,
|
||||
}
|
||||
|
||||
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
|
||||
from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
|
||||
processor = SharepointProcessor(self.currentUser, self.services)
|
||||
return await processor.processSharepointFiles(sourcePath, targetPath)
|
||||
|
||||
# Cleanup attributes
|
||||
def cleanAttributes(self, fileId: str) -> bool:
|
||||
|
|
@ -77,49 +73,51 @@ class NeutralizationPlayground:
|
|||
}
|
||||
|
||||
# Additional methods needed by the route
|
||||
def get_config(self) -> Optional[DataNeutraliserConfig]:
|
||||
def getConfig(self) -> Optional[DataNeutraliserConfig]:
|
||||
"""Get neutralization configuration"""
|
||||
return self.services.neutralization.getConfig()
|
||||
|
||||
def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
|
||||
def saveConfig(self, configData: Dict[str, Any]) -> DataNeutraliserConfig:
|
||||
"""Save neutralization configuration"""
|
||||
return self.services.neutralization.saveConfig(config_data)
|
||||
return self.services.neutralization.saveConfig(configData)
|
||||
|
||||
def neutralize_text(self, text: str, file_id: str = None) -> Dict[str, Any]:
|
||||
def neutralizeText(self, text: str, fileId: str = None) -> Dict[str, Any]:
|
||||
"""Neutralize text content"""
|
||||
return self.services.neutralization.processText(text)
|
||||
|
||||
def resolve_text(self, text: str) -> str:
|
||||
def resolveText(self, text: str) -> str:
|
||||
"""Resolve UIDs in neutralized text back to original text"""
|
||||
return self.services.neutralization.resolveText(text)
|
||||
|
||||
def get_attributes(self, file_id: str = None) -> List[DataNeutralizerAttributes]:
|
||||
def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]:
|
||||
"""Get neutralization attributes, optionally filtered by file ID"""
|
||||
try:
|
||||
all_attributes = self.services.neutralization.getAttributes()
|
||||
if file_id:
|
||||
return [attr for attr in all_attributes if attr.fileId == file_id]
|
||||
return all_attributes
|
||||
allAttributes = self.services.neutralization.getAttributes()
|
||||
if fileId:
|
||||
return [attr for attr in allAttributes if attr.fileId == fileId]
|
||||
return allAttributes
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting attributes: {str(e)}")
|
||||
return []
|
||||
|
||||
async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]:
|
||||
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
|
||||
"""Process files from SharePoint source path and store neutralized files in target path"""
|
||||
return await self.processSharepointFiles(source_path, target_path)
|
||||
from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
|
||||
processor = SharepointProcessor(self.currentUser, self.services)
|
||||
return await processor.processSharepointFiles(sourcePath, targetPath)
|
||||
|
||||
def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
def batchNeutralizeFiles(self, filesData: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Process multiple files for neutralization"""
|
||||
file_ids = [file_data.get('fileId') for file_data in files_data if file_data.get('fileId')]
|
||||
return self.processFiles(file_ids)
|
||||
fileIds = [fileData.get('fileId') for fileData in filesData if fileData.get('fileId')]
|
||||
return self.processFiles(fileIds)
|
||||
|
||||
def get_processing_stats(self) -> Dict[str, Any]:
|
||||
def getProcessingStats(self) -> Dict[str, Any]:
|
||||
"""Get neutralization processing statistics"""
|
||||
return self.getStats()
|
||||
|
||||
def cleanup_file_attributes(self, file_id: str) -> bool:
|
||||
def cleanupFileAttributes(self, fileId: str) -> bool:
|
||||
"""Clean up neutralization attributes for a specific file"""
|
||||
return self.cleanAttributes(file_id)
|
||||
return self.cleanAttributes(fileId)
|
||||
|
||||
|
||||
# Internal SharePoint helper module separated to keep feature logic tidy
|
||||
|
|
@ -208,7 +206,7 @@ class SharepointProcessor:
|
|||
siteUrl, _ = self._parseSharepointPath(sharepointPath)
|
||||
if not siteUrl:
|
||||
return False
|
||||
siteInfo = await self.services.sharepoint.find_site_by_web_url(siteUrl)
|
||||
siteInfo = await self.services.sharepoint.findSiteByWebUrl(siteUrl)
|
||||
return siteInfo is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
|
@ -219,17 +217,17 @@ class SharepointProcessor:
|
|||
targetSite, targetFolder = self._parseSharepointPath(targetPath)
|
||||
if not sourceSite or not targetSite:
|
||||
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
|
||||
sourceSiteInfo = await self.services.sharepoint.find_site_by_web_url(sourceSite)
|
||||
sourceSiteInfo = await self.services.sharepoint.findSiteByWebUrl(sourceSite)
|
||||
if not sourceSiteInfo:
|
||||
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
|
||||
targetSiteInfo = await self.services.sharepoint.find_site_by_web_url(targetSite)
|
||||
targetSiteInfo = await self.services.sharepoint.findSiteByWebUrl(targetSite)
|
||||
if not targetSiteInfo:
|
||||
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
|
||||
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
|
||||
files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], sourceFolder)
|
||||
files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], sourceFolder)
|
||||
if not files:
|
||||
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
|
||||
files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], '')
|
||||
files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], '')
|
||||
if files:
|
||||
folders = [f for f in files if f.get('type') == 'folder']
|
||||
folderNames = [f.get('name') for f in folders]
|
||||
|
|
@ -251,7 +249,7 @@ class SharepointProcessor:
|
|||
|
||||
async def _processSingle(fileInfo: Dict[str, Any]):
|
||||
try:
|
||||
fileContent = await self.services.sharepoint.download_file(sourceSiteInfo['id'], fileInfo['id'])
|
||||
fileContent = await self.services.sharepoint.downloadFile(sourceSiteInfo['id'], fileInfo['id'])
|
||||
if not fileContent:
|
||||
return {'error': f"Failed to download file: {fileInfo['name']}"}
|
||||
try:
|
||||
|
|
@ -260,7 +258,7 @@ class SharepointProcessor:
|
|||
textContent = fileContent.decode('latin-1')
|
||||
result = self.services.neutralization.processText(textContent)
|
||||
neutralizedFilename = f"neutralized_{fileInfo['name']}"
|
||||
uploadResult = await self.services.sharepoint.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
|
||||
uploadResult = await self.services.sharepoint.uploadFile(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
|
||||
if 'error' in uploadResult:
|
||||
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -204,9 +204,9 @@ class ManagerSyncDelta:
|
|||
logger.info(
|
||||
f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}"
|
||||
)
|
||||
resolved = await self.services.sharepoint.find_site_by_url(
|
||||
resolved = await self.services.sharepoint.findSiteByUrl(
|
||||
hostname=self.SHAREPOINT_HOSTNAME,
|
||||
site_path=self.SHAREPOINT_SITE_PATH
|
||||
sitePath=self.SHAREPOINT_SITE_PATH
|
||||
)
|
||||
|
||||
if not resolved:
|
||||
|
|
@ -223,9 +223,9 @@ class ManagerSyncDelta:
|
|||
|
||||
# Test site access by listing root of the drive
|
||||
logger.info("Testing site access using resolved site ID...")
|
||||
test_result = await self.services.sharepoint.list_folder_contents(
|
||||
site_id=self.targetSite["id"],
|
||||
folder_path=""
|
||||
test_result = await self.services.sharepoint.listFolderContents(
|
||||
siteId=self.targetSite["id"],
|
||||
folderPath=""
|
||||
)
|
||||
|
||||
if test_result is not None:
|
||||
|
|
@ -293,8 +293,8 @@ class ManagerSyncDelta:
|
|||
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
|
||||
try:
|
||||
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
|
||||
excel_content = await self.services.sharepoint.download_file_by_path(
|
||||
site_id=self.targetSite['id'], file_path=file_path
|
||||
excel_content = await self.services.sharepoint.downloadFileByPath(
|
||||
siteId=self.targetSite['id'], filePath=file_path
|
||||
)
|
||||
existing_data, existing_headers = self.parseExcelContent(excel_content)
|
||||
except Exception:
|
||||
|
|
@ -307,16 +307,16 @@ class ManagerSyncDelta:
|
|||
|
||||
await self.backupSharepointFile(filename=sync_file_name)
|
||||
excel_bytes = self.createExcelContent(merged_data, existing_headers)
|
||||
await self.services.sharepoint.upload_file(
|
||||
site_id=self.targetSite['id'],
|
||||
folder_path=self.SHAREPOINT_MAIN_FOLDER,
|
||||
file_name=sync_file_name,
|
||||
await self.services.sharepoint.uploadFile(
|
||||
siteId=self.targetSite['id'],
|
||||
folderPath=self.SHAREPOINT_MAIN_FOLDER,
|
||||
fileName=sync_file_name,
|
||||
content=excel_bytes,
|
||||
)
|
||||
# Import back to tickets
|
||||
try:
|
||||
excel_content = await self.services.sharepoint.download_file_by_path(
|
||||
site_id=self.targetSite['id'], file_path=file_path
|
||||
excel_content = await self.services.sharepoint.downloadFileByPath(
|
||||
siteId=self.targetSite['id'], filePath=file_path
|
||||
)
|
||||
excel_rows, _ = self.parseExcelContent(excel_content)
|
||||
self._log_audit_event("SYNC_IMPORT", "INFO", f"Importing {len(excel_rows)} Excel rows back to tickets")
|
||||
|
|
@ -333,8 +333,8 @@ class ManagerSyncDelta:
|
|||
existing_data: list[dict] = []
|
||||
try:
|
||||
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
|
||||
csv_content = await self.services.sharepoint.download_file_by_path(
|
||||
site_id=self.targetSite['id'], file_path=file_path
|
||||
csv_content = await self.services.sharepoint.downloadFileByPath(
|
||||
siteId=self.targetSite['id'], filePath=file_path
|
||||
)
|
||||
csv_lines = csv_content.decode('utf-8').split('\n')
|
||||
if len(csv_lines) >= 2:
|
||||
|
|
@ -348,16 +348,16 @@ class ManagerSyncDelta:
|
|||
await self.backupSharepointFile(filename=sync_file_name)
|
||||
merged_data, _ = self.mergeJiraWithExistingDetailed(data_list, existing_data)
|
||||
csv_bytes = self.createCsvContent(merged_data, existing_headers)
|
||||
await self.services.sharepoint.upload_file(
|
||||
site_id=self.targetSite['id'],
|
||||
folder_path=self.SHAREPOINT_MAIN_FOLDER,
|
||||
file_name=sync_file_name,
|
||||
await self.services.sharepoint.uploadFile(
|
||||
siteId=self.targetSite['id'],
|
||||
folderPath=self.SHAREPOINT_MAIN_FOLDER,
|
||||
fileName=sync_file_name,
|
||||
content=csv_bytes,
|
||||
)
|
||||
# Import from CSV
|
||||
try:
|
||||
csv_content = await self.services.sharepoint.download_file_by_path(
|
||||
site_id=self.targetSite['id'], file_path=file_path
|
||||
csv_content = await self.services.sharepoint.downloadFileByPath(
|
||||
siteId=self.targetSite['id'], filePath=file_path
|
||||
)
|
||||
df = pd.read_csv(io.BytesIO(csv_content), skiprows=2, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python')
|
||||
csv_rows = df.to_dict('records')
|
||||
|
|
@ -388,12 +388,12 @@ class ManagerSyncDelta:
|
|||
try:
|
||||
timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y%m%d_%H%M%S")
|
||||
backup_filename = f"backup_{timestamp}_{filename}"
|
||||
await self.services.sharepoint.copy_file_async(
|
||||
site_id=self.targetSite['id'],
|
||||
source_folder=self.SHAREPOINT_MAIN_FOLDER,
|
||||
source_file=filename,
|
||||
dest_folder=self.SHAREPOINT_BACKUP_FOLDER,
|
||||
dest_file=backup_filename,
|
||||
await self.services.sharepoint.copyFileAsync(
|
||||
siteId=self.targetSite['id'],
|
||||
sourceFolder=self.SHAREPOINT_MAIN_FOLDER,
|
||||
sourceFile=filename,
|
||||
destFolder=self.SHAREPOINT_BACKUP_FOLDER,
|
||||
destFile=backup_filename,
|
||||
)
|
||||
self._log_audit_event("SYNC_BACKUP", "SUCCESS", f"Backed up file: {filename} -> {backup_filename}")
|
||||
return True
|
||||
|
|
@ -679,7 +679,7 @@ class ManagerSyncDelta:
|
|||
connectorType=connectorType,
|
||||
connectorParams=connectorParams,
|
||||
)
|
||||
attributes = await ticket_interface.connector_ticket.read_attributes()
|
||||
attributes = await ticket_interface.connector_ticket.readAttributes()
|
||||
if not attributes:
|
||||
logger.warning("No ticket attributes returned; nothing to write.")
|
||||
return False
|
||||
|
|
@ -713,7 +713,7 @@ class ManagerSyncDelta:
|
|||
connectorType=connectorType,
|
||||
connectorParams=connectorParams,
|
||||
)
|
||||
tickets = await ticket_interface.connector_ticket.read_tasks(limit=sampleLimit)
|
||||
tickets = await ticket_interface.connector_ticket.readTasks(limit=sampleLimit)
|
||||
if not tickets:
|
||||
logger.warning("No tickets returned; nothing to write.")
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -54,8 +54,6 @@ class AiObjects:
|
|||
# No need to manually create connectors - they're auto-discovered
|
||||
return cls()
|
||||
|
||||
|
||||
|
||||
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
|
||||
"""Select the best model using dynamic model selection system."""
|
||||
# Get available models from the dynamic registry
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import uuid
|
|||
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
from modules.interfaces.interfaceDbAppAccess import AppAccess
|
||||
from modules.datamodels.datamodelUam import (
|
||||
User,
|
||||
|
|
@ -604,8 +604,8 @@ class AppObjects:
|
|||
externalUsername=externalUsername,
|
||||
externalEmail=externalEmail,
|
||||
status=status,
|
||||
connectedAt=get_utc_timestamp(),
|
||||
lastChecked=get_utc_timestamp(),
|
||||
connectedAt=getUtcTimestamp(),
|
||||
lastChecked=getUtcTimestamp(),
|
||||
expiresAt=None, # Optional field, set to None by default
|
||||
)
|
||||
|
||||
|
|
@ -755,7 +755,7 @@ class AppObjects:
|
|||
if not token.id:
|
||||
token.id = str(uuid.uuid4())
|
||||
if not token.createdAt:
|
||||
token.createdAt = get_utc_timestamp()
|
||||
token.createdAt = getUtcTimestamp()
|
||||
|
||||
# If replace_existing is True, delete old access tokens for this user and authority first
|
||||
if replace_existing:
|
||||
|
|
@ -822,7 +822,7 @@ class AppObjects:
|
|||
if not token.id:
|
||||
token.id = str(uuid.uuid4())
|
||||
if not token.createdAt:
|
||||
token.createdAt = get_utc_timestamp()
|
||||
token.createdAt = getUtcTimestamp()
|
||||
|
||||
# Convert to dict and ensure all fields are properly set
|
||||
token_dict = token.model_dump()
|
||||
|
|
@ -932,7 +932,7 @@ class AppObjects:
|
|||
return True
|
||||
tokenUpdate = {
|
||||
"status": TokenStatus.REVOKED,
|
||||
"revokedAt": get_utc_timestamp(),
|
||||
"revokedAt": getUtcTimestamp(),
|
||||
"revokedBy": revokedBy,
|
||||
"reason": reason or "revoked",
|
||||
}
|
||||
|
|
@ -970,7 +970,7 @@ class AppObjects:
|
|||
t["id"],
|
||||
{
|
||||
"status": TokenStatus.REVOKED,
|
||||
"revokedAt": get_utc_timestamp(),
|
||||
"revokedAt": getUtcTimestamp(),
|
||||
"revokedBy": revokedBy,
|
||||
"reason": reason or "session logout",
|
||||
},
|
||||
|
|
@ -1008,7 +1008,7 @@ class AppObjects:
|
|||
t["id"],
|
||||
{
|
||||
"status": TokenStatus.REVOKED,
|
||||
"revokedAt": get_utc_timestamp(),
|
||||
"revokedAt": getUtcTimestamp(),
|
||||
"revokedBy": revokedBy,
|
||||
"reason": reason or "admin revoke",
|
||||
},
|
||||
|
|
@ -1022,7 +1022,7 @@ class AppObjects:
|
|||
def cleanupExpiredTokens(self) -> int:
|
||||
"""Clean up expired tokens for all connections, returns count of cleaned tokens"""
|
||||
try:
|
||||
current_time = get_utc_timestamp()
|
||||
current_time = getUtcTimestamp()
|
||||
cleaned_count = 0
|
||||
|
||||
# Get all tokens
|
||||
|
|
@ -1100,7 +1100,7 @@ class AppObjects:
|
|||
# Update existing config
|
||||
update_data = existing_config.model_dump()
|
||||
update_data.update(config_data)
|
||||
update_data["updatedAt"] = get_utc_timestamp()
|
||||
update_data["updatedAt"] = getUtcTimestamp()
|
||||
|
||||
updated_config = DataNeutraliserConfig(**update_data)
|
||||
self.db.recordModify(
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ from modules.datamodels.datamodelUam import User
|
|||
|
||||
# DYNAMIC PART: Connectors to the Interface
|
||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
|
||||
# Basic Configurations
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
|
@ -66,56 +66,56 @@ class ChatObjects:
|
|||
|
||||
# ===== Generic Utility Methods =====
|
||||
|
||||
def _is_object_field(self, field_type) -> bool:
|
||||
def _isObjectField(self, fieldType) -> bool:
|
||||
"""Check if a field type represents a complex object (not a simple type)."""
|
||||
# Simple scalar types
|
||||
if field_type in (str, int, float, bool, type(None)):
|
||||
if fieldType in (str, int, float, bool, type(None)):
|
||||
return False
|
||||
|
||||
# Everything else is an object
|
||||
return True
|
||||
|
||||
def _separate_object_fields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
def _separateObjectFields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
"""Separate simple fields from object fields based on Pydantic model structure."""
|
||||
simple_fields = {}
|
||||
object_fields = {}
|
||||
simpleFields = {}
|
||||
objectFields = {}
|
||||
|
||||
# Get field information from the Pydantic model
|
||||
model_fields = model_class.model_fields
|
||||
modelFields = model_class.model_fields
|
||||
|
||||
for field_name, value in data.items():
|
||||
for fieldName, value in data.items():
|
||||
# Check if this field should be stored as JSONB in the database
|
||||
if field_name in model_fields:
|
||||
field_info = model_fields[field_name]
|
||||
if fieldName in modelFields:
|
||||
fieldInfo = modelFields[fieldName]
|
||||
# Pydantic v2 only
|
||||
field_type = field_info.annotation
|
||||
fieldType = fieldInfo.annotation
|
||||
|
||||
# Always route relational/object fields to object_fields for separate handling
|
||||
if field_name in ['documents', 'stats']:
|
||||
object_fields[field_name] = value
|
||||
if fieldName in ['documents', 'stats']:
|
||||
objectFields[fieldName] = value
|
||||
continue
|
||||
|
||||
# Check if this is a JSONB field (Dict, List, or complex types)
|
||||
if (field_type == dict or
|
||||
field_type == list or
|
||||
(hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or
|
||||
field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']):
|
||||
if (fieldType == dict or
|
||||
fieldType == list or
|
||||
(hasattr(fieldType, '__origin__') and fieldType.__origin__ in (dict, list)) or
|
||||
fieldName in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']):
|
||||
# Store as JSONB - include in simple_fields for database storage
|
||||
simple_fields[field_name] = value
|
||||
simpleFields[fieldName] = value
|
||||
elif isinstance(value, (str, int, float, bool, type(None))):
|
||||
# Simple scalar types
|
||||
simple_fields[field_name] = value
|
||||
simpleFields[fieldName] = value
|
||||
else:
|
||||
# Complex objects that should be filtered out
|
||||
object_fields[field_name] = value
|
||||
objectFields[fieldName] = value
|
||||
else:
|
||||
# Field not in model - treat as scalar if simple, otherwise filter out
|
||||
if isinstance(value, (str, int, float, bool, type(None))):
|
||||
simple_fields[field_name] = value
|
||||
simpleFields[fieldName] = value
|
||||
else:
|
||||
object_fields[field_name] = value
|
||||
objectFields[fieldName] = value
|
||||
|
||||
return simple_fields, object_fields
|
||||
return simpleFields, objectFields
|
||||
|
||||
def _initializeServices(self):
|
||||
pass
|
||||
|
|
@ -240,8 +240,8 @@ class ChatObjects:
|
|||
currentAction=workflow.get("currentAction", 0),
|
||||
totalTasks=workflow.get("totalTasks", 0),
|
||||
totalActions=workflow.get("totalActions", 0),
|
||||
lastActivity=workflow.get("lastActivity", get_utc_timestamp()),
|
||||
startedAt=workflow.get("startedAt", get_utc_timestamp()),
|
||||
lastActivity=workflow.get("lastActivity", getUtcTimestamp()),
|
||||
startedAt=workflow.get("startedAt", getUtcTimestamp()),
|
||||
logs=logs,
|
||||
messages=messages,
|
||||
stats=stats,
|
||||
|
|
@ -257,7 +257,7 @@ class ChatObjects:
|
|||
raise PermissionError("No permission to create workflows")
|
||||
|
||||
# Set timestamp if not present
|
||||
currentTime = get_utc_timestamp()
|
||||
currentTime = getUtcTimestamp()
|
||||
if "startedAt" not in workflowData:
|
||||
workflowData["startedAt"] = currentTime
|
||||
|
||||
|
|
@ -265,10 +265,10 @@ class ChatObjects:
|
|||
workflowData["lastActivity"] = currentTime
|
||||
|
||||
# Use generic field separation based on ChatWorkflow model
|
||||
simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData)
|
||||
simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
|
||||
|
||||
# Create workflow in database
|
||||
created = self.db.recordCreate(ChatWorkflow, simple_fields)
|
||||
created = self.db.recordCreate(ChatWorkflow, simpleFields)
|
||||
|
||||
|
||||
# Convert to ChatWorkflow model (empty related data for new workflow)
|
||||
|
|
@ -302,13 +302,13 @@ class ChatObjects:
|
|||
raise PermissionError(f"No permission to update workflow {workflowId}")
|
||||
|
||||
# Use generic field separation based on ChatWorkflow model
|
||||
simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData)
|
||||
simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
|
||||
|
||||
# Set update time for main workflow
|
||||
simple_fields["lastActivity"] = get_utc_timestamp()
|
||||
simpleFields["lastActivity"] = getUtcTimestamp()
|
||||
|
||||
# Update main workflow in database
|
||||
updated = self.db.recordModify(ChatWorkflow, workflowId, simple_fields)
|
||||
updated = self.db.recordModify(ChatWorkflow, workflowId, simpleFields)
|
||||
|
||||
# Removed cascade writes for logs/messages/stats during workflow update.
|
||||
# CUD for child entities must be executed via dedicated service methods.
|
||||
|
|
@ -423,7 +423,7 @@ class ChatObjects:
|
|||
role=msg.get("role", "assistant"),
|
||||
status=msg.get("status", "step"),
|
||||
sequenceNr=msg.get("sequenceNr", 0),
|
||||
publishedAt=msg.get("publishedAt", get_utc_timestamp()),
|
||||
publishedAt=msg.get("publishedAt", getUtcTimestamp()),
|
||||
success=msg.get("success"),
|
||||
actionId=msg.get("actionId"),
|
||||
actionMethod=msg.get("actionMethod"),
|
||||
|
|
@ -490,20 +490,30 @@ class ChatObjects:
|
|||
messageData["actionNumber"] = workflow.currentAction
|
||||
|
||||
# Use generic field separation based on ChatMessage model
|
||||
simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData)
|
||||
simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
|
||||
|
||||
# Handle documents separately - they will be stored in normalized documents table
|
||||
documents_to_create = object_fields.get("documents", [])
|
||||
documents_to_create = objectFields.get("documents", [])
|
||||
|
||||
# Create message in normalized table using only simple fields
|
||||
createdMessage = self.db.recordCreate(ChatMessage, simple_fields)
|
||||
createdMessage = self.db.recordCreate(ChatMessage, simpleFields)
|
||||
|
||||
|
||||
# Create documents in normalized documents table
|
||||
created_documents = []
|
||||
for doc_data in documents_to_create:
|
||||
# Use the document data directly
|
||||
doc_dict = doc_data
|
||||
# Normalize to plain dict before assignment
|
||||
if isinstance(doc_data, ChatDocument):
|
||||
doc_dict = doc_data.model_dump()
|
||||
elif isinstance(doc_data, dict):
|
||||
doc_dict = dict(doc_data)
|
||||
else:
|
||||
# Attempt to coerce to ChatDocument then dump
|
||||
try:
|
||||
doc_dict = ChatDocument(**doc_data).model_dump()
|
||||
except Exception:
|
||||
logger.error("Invalid document data type for message creation")
|
||||
continue
|
||||
|
||||
doc_dict["messageId"] = createdMessage["id"]
|
||||
created_doc = self.createDocument(doc_dict)
|
||||
|
|
@ -522,8 +532,8 @@ class ChatObjects:
|
|||
role=createdMessage.get("role", "assistant"),
|
||||
status=createdMessage.get("status", "step"),
|
||||
sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number
|
||||
publishedAt=createdMessage.get("publishedAt", get_utc_timestamp()),
|
||||
stats=object_fields.get("stats"), # Use stats from object_fields
|
||||
publishedAt=createdMessage.get("publishedAt", getUtcTimestamp()),
|
||||
stats=objectFields.get("stats"), # Use stats from objectFields
|
||||
roundNumber=createdMessage.get("roundNumber"),
|
||||
taskNumber=createdMessage.get("taskNumber"),
|
||||
actionNumber=createdMessage.get("actionNumber"),
|
||||
|
|
@ -588,31 +598,41 @@ class ChatObjects:
|
|||
raise PermissionError(f"No permission to modify workflow {workflowId}")
|
||||
|
||||
# Use generic field separation based on ChatMessage model
|
||||
simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData)
|
||||
simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
|
||||
|
||||
# Ensure required fields present
|
||||
for key in ["role", "agentName"]:
|
||||
if key not in simple_fields and key not in existingMessage:
|
||||
simple_fields[key] = "assistant" if key == "role" else ""
|
||||
if key not in simpleFields and key not in existingMessage:
|
||||
simpleFields[key] = "assistant" if key == "role" else ""
|
||||
|
||||
# Ensure ID is in the dataset
|
||||
if 'id' not in simple_fields:
|
||||
simple_fields['id'] = messageId
|
||||
if 'id' not in simpleFields:
|
||||
simpleFields['id'] = messageId
|
||||
|
||||
# Convert createdAt to startedAt if needed
|
||||
if "createdAt" in simple_fields and "startedAt" not in simple_fields:
|
||||
simple_fields["startedAt"] = simple_fields["createdAt"]
|
||||
del simple_fields["createdAt"]
|
||||
if "createdAt" in simpleFields and "startedAt" not in simpleFields:
|
||||
simpleFields["startedAt"] = simpleFields["createdAt"]
|
||||
del simpleFields["createdAt"]
|
||||
|
||||
# Update the message with simple fields only
|
||||
updatedMessage = self.db.recordModify(ChatMessage, messageId, simple_fields)
|
||||
updatedMessage = self.db.recordModify(ChatMessage, messageId, simpleFields)
|
||||
|
||||
# Handle object field updates (documents, stats) inline
|
||||
if 'documents' in object_fields:
|
||||
documents_data = object_fields['documents']
|
||||
if 'documents' in objectFields:
|
||||
documents_data = objectFields['documents']
|
||||
try:
|
||||
for doc_data in documents_data:
|
||||
doc_dict = doc_data
|
||||
# Normalize to dict before mutation
|
||||
if isinstance(doc_data, ChatDocument):
|
||||
doc_dict = doc_data.model_dump()
|
||||
elif isinstance(doc_data, dict):
|
||||
doc_dict = dict(doc_data)
|
||||
else:
|
||||
try:
|
||||
doc_dict = ChatDocument(**doc_data).model_dump()
|
||||
except Exception:
|
||||
logger.error("Invalid document data type for message update")
|
||||
continue
|
||||
doc_dict["messageId"] = messageId
|
||||
self.createDocument(doc_dict)
|
||||
except Exception as e:
|
||||
|
|
@ -732,11 +752,9 @@ class ChatObjects:
|
|||
def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument:
|
||||
"""Creates a document for a message in normalized table."""
|
||||
try:
|
||||
# Validate document data
|
||||
# Validate and normalize document data to dict
|
||||
document = ChatDocument(**documentData)
|
||||
|
||||
# Create document in normalized table
|
||||
created = self.db.recordCreate(ChatDocument, document)
|
||||
created = self.db.recordCreate(ChatDocument, document.model_dump())
|
||||
|
||||
|
||||
return ChatDocument(**created)
|
||||
|
|
@ -785,7 +803,7 @@ class ChatObjects:
|
|||
|
||||
# Make sure required fields are present
|
||||
if "timestamp" not in logData:
|
||||
logData["timestamp"] = get_utc_timestamp()
|
||||
logData["timestamp"] = getUtcTimestamp()
|
||||
|
||||
# Add status information if not present
|
||||
if "status" not in logData and "type" in logData:
|
||||
|
|
@ -882,7 +900,7 @@ class ChatObjects:
|
|||
messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId})
|
||||
for msg in messages:
|
||||
# Apply timestamp filtering in Python
|
||||
msg_timestamp = msg.get("publishedAt", get_utc_timestamp())
|
||||
msg_timestamp = msg.get("publishedAt", getUtcTimestamp())
|
||||
if afterTimestamp is not None and msg_timestamp <= afterTimestamp:
|
||||
continue
|
||||
|
||||
|
|
@ -900,7 +918,7 @@ class ChatObjects:
|
|||
role=msg.get("role", "assistant"),
|
||||
status=msg.get("status", "step"),
|
||||
sequenceNr=msg.get("sequenceNr", 0),
|
||||
publishedAt=msg.get("publishedAt", get_utc_timestamp()),
|
||||
publishedAt=msg.get("publishedAt", getUtcTimestamp()),
|
||||
success=msg.get("success"),
|
||||
actionId=msg.get("actionId"),
|
||||
actionMethod=msg.get("actionMethod"),
|
||||
|
|
@ -923,7 +941,7 @@ class ChatObjects:
|
|||
logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId})
|
||||
for log in logs:
|
||||
# Apply timestamp filtering in Python
|
||||
log_timestamp = log.get("timestamp", get_utc_timestamp())
|
||||
log_timestamp = log.get("timestamp", getUtcTimestamp())
|
||||
if afterTimestamp is not None and log_timestamp <= afterTimestamp:
|
||||
continue
|
||||
|
||||
|
|
@ -938,7 +956,7 @@ class ChatObjects:
|
|||
stats = self.getStats(workflowId)
|
||||
for stat in stats:
|
||||
# Apply timestamp filtering in Python
|
||||
stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else get_utc_timestamp()
|
||||
stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else getUtcTimestamp()
|
||||
if afterTimestamp is not None and stat_timestamp <= afterTimestamp:
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from modules.datamodels.datamodelUtils import Prompt
|
|||
from modules.datamodels.datamodelVoice import VoiceSettings
|
||||
from modules.datamodels.datamodelUam import User, Mandate
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -466,7 +466,7 @@ class ComponentObjects:
|
|||
# Ensure proper values, use defaults for invalid data
|
||||
creationDate = file.get("creationDate")
|
||||
if creationDate is None or not isinstance(creationDate, (int, float)) or creationDate <= 0:
|
||||
creationDate = get_utc_timestamp()
|
||||
creationDate = getUtcTimestamp()
|
||||
|
||||
fileName = file.get("fileName")
|
||||
if not fileName or fileName == "None":
|
||||
|
|
@ -503,7 +503,7 @@ class ComponentObjects:
|
|||
# Get creation date from record or use current time
|
||||
creationDate = file.get("creationDate")
|
||||
if not creationDate:
|
||||
creationDate = get_utc_timestamp()
|
||||
creationDate = getUtcTimestamp()
|
||||
|
||||
return FileItem(
|
||||
id=file.get("id"),
|
||||
|
|
@ -881,9 +881,9 @@ class ComponentObjects:
|
|||
# Ensure timestamps are set for validation
|
||||
settings_data = filteredSettings[0]
|
||||
if not settings_data.get("creationDate"):
|
||||
settings_data["creationDate"] = get_utc_timestamp()
|
||||
settings_data["creationDate"] = getUtcTimestamp()
|
||||
if not settings_data.get("lastModified"):
|
||||
settings_data["lastModified"] = get_utc_timestamp()
|
||||
settings_data["lastModified"] = getUtcTimestamp()
|
||||
|
||||
return VoiceSettings(**settings_data)
|
||||
|
||||
|
|
@ -931,7 +931,7 @@ class ComponentObjects:
|
|||
raise ValueError(f"Voice settings not found for user {userId}")
|
||||
|
||||
# Update lastModified timestamp
|
||||
updateData["lastModified"] = get_utc_timestamp()
|
||||
updateData["lastModified"] = getUtcTimestamp()
|
||||
|
||||
# Update voice settings record
|
||||
success = self.db.recordModify(VoiceSettings, existingSettings.id, updateData)
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ class TicketInterface:
|
|||
self.task_sync_definition = task_sync_definition
|
||||
|
||||
async def exportTicketsAsList(self) -> list[dict]:
|
||||
tickets: list[dict] = await self.connector_ticket.read_tasks(limit=0)
|
||||
tickets: list[dict] = await self.connector_ticket.readTasks(limit=0)
|
||||
transformed_tasks = self._transformTicketRecords(tickets, includePut=True)
|
||||
# Return plain dictionaries filtered by presence of ID
|
||||
rows: list[dict] = []
|
||||
|
|
@ -57,7 +57,7 @@ class TicketInterface:
|
|||
if fields:
|
||||
updates.append({"ID": task_id, "fields": fields})
|
||||
if updates:
|
||||
await self.connector_ticket.write_tasks(updates)
|
||||
await self.connector_ticket.writeTasks(updates)
|
||||
|
||||
def _transformTicketRecords(
|
||||
self, tasks: list[dict], includePut: bool = False
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from typing import Dict, Any, Optional, List
|
|||
from modules.connectors.connectorVoiceGoogle import ConnectorGoogleSpeech
|
||||
from modules.datamodels.datamodelVoice import VoiceSettings
|
||||
from modules.datamodels.datamodelUam import User
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -269,7 +269,7 @@ class VoiceObjects:
|
|||
logger.info(f"Creating voice settings: {settingsData}")
|
||||
|
||||
# Add timestamps
|
||||
currentTime = get_utc_timestamp()
|
||||
currentTime = getUtcTimestamp()
|
||||
settingsData["creationDate"] = currentTime
|
||||
settingsData["lastModified"] = currentTime
|
||||
|
||||
|
|
@ -298,7 +298,7 @@ class VoiceObjects:
|
|||
logger.info(f"Updating voice settings for user {userId}: {settingsData}")
|
||||
|
||||
# Add last modified timestamp
|
||||
settingsData["lastModified"] = get_utc_timestamp()
|
||||
settingsData["lastModified"] = getUtcTimestamp()
|
||||
|
||||
# Create updated VoiceSettings object
|
||||
voiceSettings = VoiceSettings(**settingsData)
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ from modules.datamodels.datamodelUam import User, UserConnection, AuthAuthority,
|
|||
from modules.datamodels.datamodelSecurity import Token
|
||||
from modules.security.auth import getCurrentUser, limiter
|
||||
from modules.interfaces.interfaceDbAppObjects import getInterface
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -64,7 +64,7 @@ def get_token_status_for_connection(interface, connection_id: str) -> tuple[str,
|
|||
if not expires_at:
|
||||
return "none", None
|
||||
|
||||
current_time = get_utc_timestamp()
|
||||
current_time = getUtcTimestamp()
|
||||
|
||||
# Add 5 minute buffer for proactive refresh
|
||||
buffer_time = 5 * 60 # 5 minutes in seconds
|
||||
|
|
@ -247,7 +247,7 @@ async def update_connection(
|
|||
setattr(connection, field, value)
|
||||
|
||||
# Update lastChecked timestamp using UTC timestamp
|
||||
connection.lastChecked = get_utc_timestamp()
|
||||
connection.lastChecked = getUtcTimestamp()
|
||||
|
||||
# Update connection - models now handle timestamp serialization automatically
|
||||
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
|
||||
|
|
@ -382,7 +382,7 @@ async def disconnect_service(
|
|||
|
||||
# Update connection status
|
||||
connection.status = ConnectionStatus.INACTIVE
|
||||
connection.lastChecked = get_utc_timestamp()
|
||||
connection.lastChecked = getUtcTimestamp()
|
||||
|
||||
# Update connection record - models now handle timestamp serialization automatically
|
||||
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ async def get_neutralization_config(
|
|||
"""Get data neutralization configuration"""
|
||||
try:
|
||||
service = NeutralizationPlayground(currentUser)
|
||||
config = service.get_config()
|
||||
config = service.getConfig()
|
||||
|
||||
if not config:
|
||||
# Return default config instead of 404
|
||||
|
|
@ -69,7 +69,7 @@ async def save_neutralization_config(
|
|||
"""Save or update data neutralization configuration"""
|
||||
try:
|
||||
service = NeutralizationPlayground(currentUser)
|
||||
config = service.save_config(config_data)
|
||||
config = service.saveConfig(config_data)
|
||||
|
||||
return config
|
||||
|
||||
|
|
@ -99,7 +99,7 @@ async def neutralize_text(
|
|||
)
|
||||
|
||||
service = NeutralizationPlayground(currentUser)
|
||||
result = service.neutralize_text(text, file_id)
|
||||
result = service.neutralizeText(text, file_id)
|
||||
|
||||
return result
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ async def resolve_text(
|
|||
)
|
||||
|
||||
service = NeutralizationPlayground(currentUser)
|
||||
resolved_text = service.resolve_text(text)
|
||||
resolved_text = service.resolveText(text)
|
||||
|
||||
return {"resolved_text": resolved_text}
|
||||
|
||||
|
|
@ -153,7 +153,7 @@ async def get_neutralization_attributes(
|
|||
"""Get neutralization attributes, optionally filtered by file ID"""
|
||||
try:
|
||||
service = NeutralizationPlayground(currentUser)
|
||||
attributes = service.get_attributes(fileId)
|
||||
attributes = service.getAttributes(fileId)
|
||||
|
||||
return attributes
|
||||
|
||||
|
|
@ -183,7 +183,7 @@ async def process_sharepoint_files(
|
|||
)
|
||||
|
||||
service = NeutralizationPlayground(currentUser)
|
||||
result = await service.process_sharepoint_files(source_path, target_path)
|
||||
result = await service.processSharepointFiles(source_path, target_path)
|
||||
|
||||
return result
|
||||
|
||||
|
|
@ -212,7 +212,7 @@ async def batch_process_files(
|
|||
)
|
||||
|
||||
service = NeutralizationPlayground(currentUser)
|
||||
result = service.batch_neutralize_files(files_data)
|
||||
result = service.batchNeutralizeFiles(files_data)
|
||||
|
||||
return result
|
||||
|
||||
|
|
@ -234,7 +234,7 @@ async def get_neutralization_stats(
|
|||
"""Get neutralization processing statistics"""
|
||||
try:
|
||||
service = NeutralizationPlayground(currentUser)
|
||||
stats = service.get_processing_stats()
|
||||
stats = service.getProcessingStats()
|
||||
|
||||
return stats
|
||||
|
||||
|
|
@ -255,7 +255,7 @@ async def cleanup_file_attributes(
|
|||
"""Clean up neutralization attributes for a specific file"""
|
||||
try:
|
||||
service = NeutralizationPlayground(currentUser)
|
||||
success = service.cleanup_file_attributes(fileId)
|
||||
success = service.cleanupFileAttributes(fileId)
|
||||
|
||||
if success:
|
||||
return {"message": f"Successfully cleaned up attributes for file {fileId}"}
|
||||
|
|
|
|||
|
|
@ -181,9 +181,9 @@ async def reset_user_password(
|
|||
# Log password reset
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_security_event(
|
||||
user_id=str(currentUser.id),
|
||||
mandate_id=str(currentUser.mandateId),
|
||||
audit_logger.logSecurityEvent(
|
||||
userId=str(currentUser.id),
|
||||
mandateId=str(currentUser.mandateId),
|
||||
action="password_reset",
|
||||
details=f"Reset password for user {userId}"
|
||||
)
|
||||
|
|
@ -257,9 +257,9 @@ async def change_password(
|
|||
# Log password change
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_security_event(
|
||||
user_id=str(currentUser.id),
|
||||
mandate_id=str(currentUser.mandateId),
|
||||
audit_logger.logSecurityEvent(
|
||||
userId=str(currentUser.id),
|
||||
mandateId=str(currentUser.mandateId),
|
||||
action="password_change",
|
||||
details="User changed their own password"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from modules.shared.configuration import APP_CONFIG
|
|||
from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface
|
||||
from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
|
||||
from modules.security.auth import getCurrentUser, limiter
|
||||
from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
|
||||
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -356,7 +356,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
tokenRefresh=token_response.get("refresh_token", ""),
|
||||
tokenType="bearer",
|
||||
expiresAt=jwt_expires_at.timestamp(),
|
||||
createdAt=get_utc_timestamp()
|
||||
createdAt=getUtcTimestamp()
|
||||
)
|
||||
|
||||
# Save access token (no connectionId)
|
||||
|
|
@ -460,8 +460,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
logger.info(f"Updating connection {connection_id} for user {user.username}")
|
||||
# Update connection with external service details
|
||||
connection.status = ConnectionStatus.ACTIVE
|
||||
connection.lastChecked = get_utc_timestamp()
|
||||
connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0)
|
||||
connection.lastChecked = getUtcTimestamp()
|
||||
connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
|
||||
connection.externalId = user_info.get("id")
|
||||
connection.externalUsername = user_info.get("email")
|
||||
connection.externalEmail = user_info.get("email")
|
||||
|
|
@ -479,8 +479,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
tokenAccess=token_response["access_token"],
|
||||
tokenRefresh=token_response.get("refresh_token", ""),
|
||||
tokenType=token_response.get("token_type", "bearer"),
|
||||
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
|
||||
createdAt=get_utc_timestamp()
|
||||
expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
|
||||
createdAt=getUtcTimestamp()
|
||||
)
|
||||
interface.saveConnectionToken(token)
|
||||
|
||||
|
|
@ -498,8 +498,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
id: '{connection.id}',
|
||||
status: 'connected',
|
||||
type: 'google',
|
||||
lastChecked: {get_utc_timestamp()},
|
||||
expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))}
|
||||
lastChecked: {getUtcTimestamp()},
|
||||
expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
|
||||
}}
|
||||
}}, '*');
|
||||
// Wait for message to be sent before closing
|
||||
|
|
@ -592,11 +592,11 @@ async def logout(
|
|||
# Log successful logout
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id=str(currentUser.id),
|
||||
mandate_id=str(currentUser.mandateId),
|
||||
audit_logger.logUserAccess(
|
||||
userId=str(currentUser.id),
|
||||
mandateId=str(currentUser.mandateId),
|
||||
action="logout",
|
||||
success_info="google_auth_logout"
|
||||
successInfo="google_auth_logout"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
|
|
@ -726,12 +726,12 @@ async def refresh_token(
|
|||
|
||||
# Update the connection status and timing
|
||||
google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt
|
||||
google_connection.lastChecked = get_utc_timestamp()
|
||||
google_connection.lastChecked = getUtcTimestamp()
|
||||
google_connection.status = ConnectionStatus.ACTIVE
|
||||
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.model_dump())
|
||||
|
||||
# Calculate time until expiration
|
||||
current_time = get_utc_timestamp()
|
||||
current_time = getUtcTimestamp()
|
||||
expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0
|
||||
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -131,11 +131,11 @@ async def login(
|
|||
# Log successful login
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id=str(user.id),
|
||||
mandate_id=str(user.mandateId),
|
||||
audit_logger.logUserAccess(
|
||||
userId=str(user.id),
|
||||
mandateId=str(user.mandateId),
|
||||
action="login",
|
||||
success_info="local_auth_success"
|
||||
successInfo="local_auth_success"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
|
|
@ -159,11 +159,11 @@ async def login(
|
|||
# Log failed login attempt
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id="unknown",
|
||||
mandate_id="unknown",
|
||||
audit_logger.logUserAccess(
|
||||
userId="unknown",
|
||||
mandateId="unknown",
|
||||
action="login",
|
||||
success_info=f"failed: {error_msg}"
|
||||
successInfo=f"failed: {error_msg}"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
|
|
@ -367,11 +367,11 @@ async def logout(request: Request, response: Response, currentUser: User = Depen
|
|||
# Log successful logout
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id=str(currentUser.id),
|
||||
mandate_id=str(currentUser.mandateId),
|
||||
audit_logger.logUserAccess(
|
||||
userId=str(currentUser.id),
|
||||
mandateId=str(currentUser.mandateId),
|
||||
action="logout",
|
||||
success_info=f"revoked_tokens: {revoked}"
|
||||
successInfo=f"revoked_tokens: {revoked}"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatu
|
|||
from modules.datamodels.datamodelSecurity import Token
|
||||
from modules.security.auth import getCurrentUser, limiter
|
||||
from modules.security.jwtService import createAccessToken
|
||||
from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
|
||||
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -199,8 +199,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
tokenAccess=token_response["access_token"],
|
||||
tokenRefresh=token_response.get("refresh_token", ""),
|
||||
tokenType=token_response.get("token_type", "bearer"),
|
||||
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
|
||||
createdAt=get_utc_timestamp()
|
||||
expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
|
||||
createdAt=getUtcTimestamp()
|
||||
)
|
||||
|
||||
# Save access token (no connectionId)
|
||||
|
|
@ -225,7 +225,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
tokenAccess=jwt_token,
|
||||
tokenType="bearer",
|
||||
expiresAt=jwt_expires_at.timestamp(),
|
||||
createdAt=get_utc_timestamp()
|
||||
createdAt=getUtcTimestamp()
|
||||
)
|
||||
|
||||
# Save JWT access token
|
||||
|
|
@ -332,8 +332,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
logger.info(f"Updating connection {connection_id} for user {user.username}")
|
||||
# Update connection with external service details
|
||||
connection.status = ConnectionStatus.ACTIVE
|
||||
connection.lastChecked = get_utc_timestamp()
|
||||
connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0)
|
||||
connection.lastChecked = getUtcTimestamp()
|
||||
connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
|
||||
connection.externalId = user_info.get("id")
|
||||
connection.externalUsername = user_info.get("userPrincipalName")
|
||||
connection.externalEmail = user_info.get("mail")
|
||||
|
|
@ -351,8 +351,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
tokenAccess=token_response["access_token"],
|
||||
tokenRefresh=token_response.get("refresh_token", ""),
|
||||
tokenType=token_response.get("token_type", "bearer"),
|
||||
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
|
||||
createdAt=get_utc_timestamp()
|
||||
expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
|
||||
createdAt=getUtcTimestamp()
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -373,8 +373,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
id: '{connection.id}',
|
||||
status: 'connected',
|
||||
type: 'msft',
|
||||
lastChecked: {get_utc_timestamp()},
|
||||
expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))}
|
||||
lastChecked: {getUtcTimestamp()},
|
||||
expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
|
||||
}}
|
||||
}}, '*');
|
||||
// Wait for message to be sent before closing
|
||||
|
|
@ -467,11 +467,11 @@ async def logout(
|
|||
# Log successful logout
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id=str(currentUser.id),
|
||||
mandate_id=str(currentUser.mandateId),
|
||||
audit_logger.logUserAccess(
|
||||
userId=str(currentUser.id),
|
||||
mandateId=str(currentUser.mandateId),
|
||||
action="logout",
|
||||
success_info="microsoft_auth_logout"
|
||||
successInfo="microsoft_auth_logout"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
|
|
@ -575,27 +575,27 @@ async def refresh_token(
|
|||
from modules.security.tokenManager import TokenManager
|
||||
token_manager = TokenManager()
|
||||
|
||||
refreshed_token = token_manager.refresh_token(current_token)
|
||||
if refreshed_token:
|
||||
refreshedToken = token_manager.refreshToken(current_token)
|
||||
if refreshedToken:
|
||||
# Save the new connection token (which will automatically replace old ones)
|
||||
appInterface.saveConnectionToken(refreshed_token)
|
||||
appInterface.saveConnectionToken(refreshedToken)
|
||||
|
||||
# Update the connection's expiration time
|
||||
msft_connection.expiresAt = float(refreshed_token.expiresAt)
|
||||
msft_connection.lastChecked = get_utc_timestamp()
|
||||
msft_connection.expiresAt = float(refreshedToken.expiresAt)
|
||||
msft_connection.lastChecked = getUtcTimestamp()
|
||||
msft_connection.status = ConnectionStatus.ACTIVE
|
||||
|
||||
# Save updated connection
|
||||
appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.model_dump())
|
||||
|
||||
# Calculate time until expiration
|
||||
current_time = get_utc_timestamp()
|
||||
expires_in = int(refreshed_token.expiresAt - current_time)
|
||||
current_time = getUtcTimestamp()
|
||||
expiresIn = int(refreshedToken.expiresAt - current_time)
|
||||
|
||||
return {
|
||||
"message": "Token refreshed successfully",
|
||||
"expires_at": refreshed_token.expiresAt,
|
||||
"expires_in_seconds": expires_in
|
||||
"expires_at": refreshedToken.expiresAt,
|
||||
"expires_in_seconds": expiresIn
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
|
|
|
|||
|
|
@ -18,26 +18,26 @@ logger = logging.getLogger(__name__)
|
|||
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
|
||||
|
||||
# Store active WebSocket connections
|
||||
active_connections: Dict[str, WebSocket] = {}
|
||||
activeConnections: Dict[str, WebSocket] = {}
|
||||
|
||||
class ConnectionManager:
|
||||
def __init__(self):
|
||||
self.active_connections: List[WebSocket] = []
|
||||
self.activeConnections: List[WebSocket] = []
|
||||
|
||||
async def connect(self, websocket: WebSocket, connection_id: str):
|
||||
async def connect(self, websocket: WebSocket, connectionId: str):
|
||||
await websocket.accept()
|
||||
self.active_connections.append(websocket)
|
||||
active_connections[connection_id] = websocket
|
||||
logger.info(f"WebSocket connected: {connection_id}")
|
||||
self.activeConnections.append(websocket)
|
||||
activeConnections[connectionId] = websocket
|
||||
logger.info(f"WebSocket connected: {connectionId}")
|
||||
|
||||
def disconnect(self, websocket: WebSocket, connection_id: str):
|
||||
if websocket in self.active_connections:
|
||||
self.active_connections.remove(websocket)
|
||||
if connection_id in active_connections:
|
||||
del active_connections[connection_id]
|
||||
logger.info(f"WebSocket disconnected: {connection_id}")
|
||||
def disconnect(self, websocket: WebSocket, connectionId: str):
|
||||
if websocket in self.activeConnections:
|
||||
self.activeConnections.remove(websocket)
|
||||
if connectionId in activeConnections:
|
||||
del activeConnections[connectionId]
|
||||
logger.info(f"WebSocket disconnected: {connectionId}")
|
||||
|
||||
async def send_personal_message(self, message: dict, websocket: WebSocket):
|
||||
async def sendPersonalMessage(self, message: dict, websocket: WebSocket):
|
||||
try:
|
||||
await websocket.send_text(json.dumps(message))
|
||||
except Exception as e:
|
||||
|
|
@ -45,10 +45,10 @@ class ConnectionManager:
|
|||
|
||||
manager = ConnectionManager()
|
||||
|
||||
def get_voice_interface(current_user: User) -> VoiceObjects:
|
||||
def _getVoiceInterface(currentUser: User) -> VoiceObjects:
|
||||
"""Get voice interface instance with user context."""
|
||||
try:
|
||||
return getVoiceInterface(current_user)
|
||||
return getVoiceInterface(currentUser)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize voice interface: {e}")
|
||||
raise HTTPException(
|
||||
|
|
@ -58,23 +58,23 @@ def get_voice_interface(current_user: User) -> VoiceObjects:
|
|||
|
||||
@router.post("/speech-to-text")
|
||||
async def speech_to_text(
|
||||
audio_file: UploadFile = File(...),
|
||||
audioFile: UploadFile = File(...),
|
||||
language: str = Form("de-DE"),
|
||||
current_user: User = Depends(getCurrentUser)
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
):
|
||||
"""Convert speech to text using Google Cloud Speech-to-Text API."""
|
||||
try:
|
||||
logger.info(f"🎤 Speech-to-text request: {audio_file.filename}, language: {language}")
|
||||
logger.info(f"🎤 Speech-to-text request: {audioFile.filename}, language: {language}")
|
||||
|
||||
# Read audio file
|
||||
audio_content = await audio_file.read()
|
||||
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
|
||||
audioContent = await audioFile.read()
|
||||
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
|
||||
|
||||
# Get voice interface
|
||||
voice_interface = get_voice_interface(current_user)
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
|
||||
# Validate audio format
|
||||
validation = voice_interface.validateAudioFormat(audio_content)
|
||||
validation = voiceInterface.validateAudioFormat(audioContent)
|
||||
|
||||
if not validation["valid"]:
|
||||
raise HTTPException(
|
||||
|
|
@ -83,8 +83,8 @@ async def speech_to_text(
|
|||
)
|
||||
|
||||
# Perform speech recognition
|
||||
result = await voice_interface.speechToText(
|
||||
audioContent=audio_content,
|
||||
result = await voiceInterface.speechToText(
|
||||
audioContent=audioContent,
|
||||
language=language
|
||||
)
|
||||
|
||||
|
|
@ -95,7 +95,7 @@ async def speech_to_text(
|
|||
"confidence": result["confidence"],
|
||||
"language": result["language"],
|
||||
"audio_info": {
|
||||
"size": len(audio_content),
|
||||
"size": len(audioContent),
|
||||
"format": validation["format"],
|
||||
"estimated_duration": validation.get("estimated_duration", 0)
|
||||
}
|
||||
|
|
@ -118,13 +118,13 @@ async def speech_to_text(
|
|||
@router.post("/translate")
|
||||
async def translate_text(
|
||||
text: str = Form(...),
|
||||
source_language: str = Form("de"),
|
||||
target_language: str = Form("en"),
|
||||
current_user: User = Depends(getCurrentUser)
|
||||
sourceLanguage: str = Form("de"),
|
||||
targetLanguage: str = Form("en"),
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
):
|
||||
"""Translate text using Google Cloud Translation API."""
|
||||
try:
|
||||
logger.info(f"🌐 Translation request: '{text}' ({source_language} -> {target_language})")
|
||||
logger.info(f"🌐 Translation request: '{text}' ({sourceLanguage} -> {targetLanguage})")
|
||||
|
||||
if not text.strip():
|
||||
raise HTTPException(
|
||||
|
|
@ -133,13 +133,13 @@ async def translate_text(
|
|||
)
|
||||
|
||||
# Get voice interface
|
||||
voice_interface = get_voice_interface(current_user)
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
|
||||
# Perform translation
|
||||
result = await voice_interface.translateText(
|
||||
result = await voiceInterface.translateText(
|
||||
text=text,
|
||||
sourceLanguage=source_language,
|
||||
targetLanguage=target_language
|
||||
sourceLanguage=sourceLanguage,
|
||||
targetLanguage=targetLanguage
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
|
|
@ -167,21 +167,21 @@ async def translate_text(
|
|||
|
||||
@router.post("/realtime-interpreter")
|
||||
async def realtime_interpreter(
|
||||
audio_file: UploadFile = File(...),
|
||||
from_language: str = Form("de-DE"),
|
||||
to_language: str = Form("en-US"),
|
||||
connection_id: str = Form(None),
|
||||
current_user: User = Depends(getCurrentUser)
|
||||
audioFile: UploadFile = File(...),
|
||||
fromLanguage: str = Form("de-DE"),
|
||||
toLanguage: str = Form("en-US"),
|
||||
connectionId: str = Form(None),
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
):
|
||||
"""Real-time interpreter: speech to translated text using Google Cloud APIs."""
|
||||
try:
|
||||
logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}")
|
||||
logger.info(f" From: {from_language} -> To: {to_language}")
|
||||
logger.info(f" MIME type: {audio_file.content_type}")
|
||||
logger.info(f"🔄 Real-time interpreter request: {audioFile.filename}")
|
||||
logger.info(f" From: {fromLanguage} -> To: {toLanguage}")
|
||||
logger.info(f" MIME type: {audioFile.content_type}")
|
||||
|
||||
# Read audio file
|
||||
audio_content = await audio_file.read()
|
||||
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
|
||||
audioContent = await audioFile.read()
|
||||
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
|
||||
|
||||
# Save audio file for debugging with correct extension
|
||||
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
|
||||
|
|
@ -192,10 +192,10 @@ async def realtime_interpreter(
|
|||
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
|
||||
|
||||
# Get voice interface
|
||||
voice_interface = get_voice_interface(current_user)
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
|
||||
# Validate audio format
|
||||
validation = voice_interface.validateAudioFormat(audio_content)
|
||||
validation = voiceInterface.validateAudioFormat(audioContent)
|
||||
|
||||
if not validation["valid"]:
|
||||
raise HTTPException(
|
||||
|
|
@ -204,10 +204,10 @@ async def realtime_interpreter(
|
|||
)
|
||||
|
||||
# Perform complete pipeline: Speech-to-Text + Translation
|
||||
result = await voice_interface.speechToTranslatedText(
|
||||
audioContent=audio_content,
|
||||
fromLanguage=from_language,
|
||||
toLanguage=to_language
|
||||
result = await voiceInterface.speechToTranslatedText(
|
||||
audioContent=audioContent,
|
||||
fromLanguage=fromLanguage,
|
||||
toLanguage=toLanguage
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
|
|
@ -223,7 +223,7 @@ async def realtime_interpreter(
|
|||
"source_language": result["source_language"],
|
||||
"target_language": result["target_language"],
|
||||
"audio_info": {
|
||||
"size": len(audio_content),
|
||||
"size": len(audioContent),
|
||||
"format": validation["format"],
|
||||
"estimated_duration": validation.get("estimated_duration", 0)
|
||||
}
|
||||
|
|
@ -249,7 +249,7 @@ async def text_to_speech(
|
|||
text: str = Form(...),
|
||||
language: str = Form("de-DE"),
|
||||
voice: str = Form(None),
|
||||
current_user: User = Depends(getCurrentUser)
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
):
|
||||
"""Convert text to speech using Google Cloud Text-to-Speech."""
|
||||
try:
|
||||
|
|
@ -261,8 +261,8 @@ async def text_to_speech(
|
|||
detail="Empty text provided for text-to-speech"
|
||||
)
|
||||
|
||||
voice_interface = get_voice_interface(current_user)
|
||||
result = await voice_interface.textToSpeech(
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
result = await voiceInterface.textToSpeech(
|
||||
text=text,
|
||||
languageCode=language,
|
||||
voiceName=voice
|
||||
|
|
@ -294,13 +294,13 @@ async def text_to_speech(
|
|||
)
|
||||
|
||||
@router.get("/languages")
|
||||
async def get_available_languages(current_user: User = Depends(getCurrentUser)):
|
||||
async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
|
||||
"""Get available languages from Google Cloud Text-to-Speech."""
|
||||
try:
|
||||
logger.info("🌐 Getting available languages from Google Cloud TTS")
|
||||
|
||||
voice_interface = get_voice_interface(current_user)
|
||||
result = await voice_interface.getAvailableLanguages()
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
result = await voiceInterface.getAvailableLanguages()
|
||||
|
||||
if result["success"]:
|
||||
return {
|
||||
|
|
@ -324,21 +324,21 @@ async def get_available_languages(current_user: User = Depends(getCurrentUser)):
|
|||
|
||||
@router.get("/voices")
|
||||
async def get_available_voices(
|
||||
language_code: Optional[str] = None,
|
||||
current_user: User = Depends(getCurrentUser)
|
||||
languageCode: Optional[str] = None,
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
):
|
||||
"""Get available voices from Google Cloud Text-to-Speech."""
|
||||
try:
|
||||
logger.info(f"🎤 Getting available voices, language filter: {language_code}")
|
||||
logger.info(f"🎤 Getting available voices, language filter: {languageCode}")
|
||||
|
||||
voice_interface = get_voice_interface(current_user)
|
||||
result = await voice_interface.getAvailableVoices(languageCode=language_code)
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
result = await voiceInterface.getAvailableVoices(languageCode=languageCode)
|
||||
|
||||
if result["success"]:
|
||||
return {
|
||||
"success": True,
|
||||
"voices": result["voices"],
|
||||
"language_filter": language_code
|
||||
"language_filter": languageCode
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
|
|
@ -356,11 +356,11 @@ async def get_available_voices(
|
|||
)
|
||||
|
||||
@router.get("/health")
|
||||
async def health_check(current_user: User = Depends(getCurrentUser)):
|
||||
async def health_check(currentUser: User = Depends(getCurrentUser)):
|
||||
"""Health check for Google Cloud voice services."""
|
||||
try:
|
||||
voice_interface = get_voice_interface(current_user)
|
||||
test_result = await voice_interface.healthCheck()
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
test_result = await voiceInterface.healthCheck()
|
||||
|
||||
return test_result
|
||||
|
||||
|
|
@ -372,16 +372,16 @@ async def health_check(current_user: User = Depends(getCurrentUser)):
|
|||
}
|
||||
|
||||
@router.get("/settings")
|
||||
async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
|
||||
async def get_voice_settings(currentUser: User = Depends(getCurrentUser)):
|
||||
"""Get voice settings for the current user."""
|
||||
try:
|
||||
logger.info(f"Getting voice settings for user: {current_user.id}")
|
||||
logger.info(f"Getting voice settings for user: {currentUser.id}")
|
||||
|
||||
# Get voice interface
|
||||
voice_interface = get_voice_interface(current_user)
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
|
||||
# Get or create voice settings for the user
|
||||
voice_settings = voice_interface.getOrCreateVoiceSettings(current_user.id)
|
||||
voice_settings = voiceInterface.getOrCreateVoiceSettings(currentUser.id)
|
||||
|
||||
if voice_settings:
|
||||
# Return user settings
|
||||
|
|
@ -425,16 +425,16 @@ async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
|
|||
@router.post("/settings")
|
||||
async def save_voice_settings(
|
||||
settings: Dict[str, Any] = Body(...),
|
||||
current_user: User = Depends(getCurrentUser)
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
):
|
||||
"""Save voice settings for the current user."""
|
||||
try:
|
||||
logger.info(f"Saving voice settings for user: {current_user.id}")
|
||||
logger.info(f"Saving voice settings for user: {currentUser.id}")
|
||||
logger.info(f"Settings: {settings}")
|
||||
|
||||
# Validate required settings
|
||||
required_fields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
|
||||
for field in required_fields:
|
||||
requiredFields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
|
||||
for field in requiredFields:
|
||||
if field not in settings:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
|
|
@ -448,23 +448,23 @@ async def save_voice_settings(
|
|||
settings["targetLanguage"] = "en-US"
|
||||
|
||||
# Get voice interface
|
||||
voice_interface = get_voice_interface(current_user)
|
||||
voiceInterface = _getVoiceInterface(currentUser)
|
||||
|
||||
# Check if settings already exist for this user
|
||||
existing_settings = voice_interface.getVoiceSettings(current_user.id)
|
||||
existing_settings = voiceInterface.getVoiceSettings(currentUser.id)
|
||||
|
||||
if existing_settings:
|
||||
# Update existing settings
|
||||
logger.info(f"Updating existing voice settings for user {current_user.id}")
|
||||
updated_settings = voice_interface.updateVoiceSettings(current_user.id, settings)
|
||||
logger.info(f"Voice settings updated for user {current_user.id}: {updated_settings}")
|
||||
logger.info(f"Updating existing voice settings for user {currentUser.id}")
|
||||
updated_settings = voiceInterface.updateVoiceSettings(currentUser.id, settings)
|
||||
logger.info(f"Voice settings updated for user {currentUser.id}: {updated_settings}")
|
||||
else:
|
||||
# Create new settings
|
||||
logger.info(f"Creating new voice settings for user {current_user.id}")
|
||||
logger.info(f"Creating new voice settings for user {currentUser.id}")
|
||||
# Add userId to settings
|
||||
settings["userId"] = current_user.id
|
||||
created_settings = voice_interface.createVoiceSettings(settings)
|
||||
logger.info(f"Voice settings created for user {current_user.id}: {created_settings}")
|
||||
settings["userId"] = currentUser.id
|
||||
created_settings = voiceInterface.createVoiceSettings(settings)
|
||||
logger.info(f"Voice settings created for user {currentUser.id}: {created_settings}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
|
|
@ -486,25 +486,25 @@ async def save_voice_settings(
|
|||
@router.websocket("/ws/realtime-interpreter")
|
||||
async def websocket_realtime_interpreter(
|
||||
websocket: WebSocket,
|
||||
user_id: str = "default",
|
||||
from_language: str = "de-DE",
|
||||
to_language: str = "en-US"
|
||||
userId: str = "default",
|
||||
fromLanguage: str = "de-DE",
|
||||
toLanguage: str = "en-US"
|
||||
):
|
||||
"""WebSocket endpoint for real-time voice interpretation"""
|
||||
connection_id = f"realtime_{user_id}_{from_language}_{to_language}"
|
||||
connectionId = f"realtime_{userId}_{fromLanguage}_{toLanguage}"
|
||||
|
||||
try:
|
||||
await manager.connect(websocket, connection_id)
|
||||
await manager.connect(websocket, connectionId)
|
||||
|
||||
# Send connection confirmation
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "connected",
|
||||
"connection_id": connection_id,
|
||||
"connection_id": connectionId,
|
||||
"message": "Connected to real-time interpreter"
|
||||
}, websocket)
|
||||
|
||||
# Initialize voice interface
|
||||
voice_interface = get_voice_interface(User(id=user_id))
|
||||
voiceInterface = _getVoiceInterface(User(id=userId))
|
||||
|
||||
while True:
|
||||
# Receive message from client
|
||||
|
|
@ -515,7 +515,7 @@ async def websocket_realtime_interpreter(
|
|||
# Process audio chunk
|
||||
try:
|
||||
# Decode base64 audio data
|
||||
audio_data = base64.b64decode(message["data"])
|
||||
audioData = base64.b64decode(message["data"])
|
||||
|
||||
# For now, just acknowledge receipt
|
||||
# In a full implementation, this would:
|
||||
|
|
@ -524,9 +524,9 @@ async def websocket_realtime_interpreter(
|
|||
# 3. Send partial results back
|
||||
# 4. Handle translation
|
||||
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "audio_received",
|
||||
"chunk_size": len(audio_data),
|
||||
"chunk_size": len(audioData),
|
||||
"timestamp": message.get("timestamp")
|
||||
}, websocket)
|
||||
|
||||
|
|
@ -539,7 +539,7 @@ async def websocket_realtime_interpreter(
|
|||
|
||||
elif message["type"] == "ping":
|
||||
# Respond to ping
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "pong",
|
||||
"timestamp": message.get("timestamp")
|
||||
}, websocket)
|
||||
|
|
@ -548,32 +548,32 @@ async def websocket_realtime_interpreter(
|
|||
logger.warning(f"Unknown message type: {message['type']}")
|
||||
|
||||
except WebSocketDisconnect:
|
||||
manager.disconnect(websocket, connection_id)
|
||||
logger.info(f"Client disconnected: {connection_id}")
|
||||
manager.disconnect(websocket, connectionId)
|
||||
logger.info(f"Client disconnected: {connectionId}")
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket error: {e}")
|
||||
manager.disconnect(websocket, connection_id)
|
||||
manager.disconnect(websocket, connectionId)
|
||||
|
||||
@router.websocket("/ws/speech-to-text")
|
||||
async def websocket_speech_to_text(
|
||||
websocket: WebSocket,
|
||||
user_id: str = "default",
|
||||
userId: str = "default",
|
||||
language: str = "de-DE"
|
||||
):
|
||||
"""WebSocket endpoint for real-time speech-to-text"""
|
||||
connection_id = f"stt_{user_id}_{language}"
|
||||
connectionId = f"stt_{userId}_{language}"
|
||||
|
||||
try:
|
||||
await manager.connect(websocket, connection_id)
|
||||
await manager.connect(websocket, connectionId)
|
||||
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "connected",
|
||||
"connection_id": connection_id,
|
||||
"connection_id": connectionId,
|
||||
"message": "Connected to speech-to-text"
|
||||
}, websocket)
|
||||
|
||||
# Initialize voice interface
|
||||
voice_interface = get_voice_interface(User(id=user_id))
|
||||
voiceInterface = _getVoiceInterface(User(id=userId))
|
||||
|
||||
while True:
|
||||
data = await websocket.receive_text()
|
||||
|
|
@ -581,12 +581,12 @@ async def websocket_speech_to_text(
|
|||
|
||||
if message["type"] == "audio_chunk":
|
||||
try:
|
||||
audio_data = base64.b64decode(message["data"])
|
||||
audioData = base64.b64decode(message["data"])
|
||||
|
||||
# Process audio chunk
|
||||
# This would integrate with Google Cloud Speech-to-Text streaming API
|
||||
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "transcription_result",
|
||||
"text": "Audio chunk received", # Placeholder
|
||||
"confidence": 0.95,
|
||||
|
|
@ -595,39 +595,39 @@ async def websocket_speech_to_text(
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing audio: {e}")
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "error",
|
||||
"error": f"Failed to process audio: {str(e)}"
|
||||
}, websocket)
|
||||
|
||||
elif message["type"] == "ping":
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "pong",
|
||||
"timestamp": message.get("timestamp")
|
||||
}, websocket)
|
||||
|
||||
except WebSocketDisconnect:
|
||||
manager.disconnect(websocket, connection_id)
|
||||
manager.disconnect(websocket, connectionId)
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket error: {e}")
|
||||
manager.disconnect(websocket, connection_id)
|
||||
manager.disconnect(websocket, connectionId)
|
||||
|
||||
@router.websocket("/ws/text-to-speech")
|
||||
async def websocket_text_to_speech(
|
||||
websocket: WebSocket,
|
||||
user_id: str = "default",
|
||||
userId: str = "default",
|
||||
language: str = "de-DE",
|
||||
voice: str = "de-DE-Wavenet-A"
|
||||
):
|
||||
"""WebSocket endpoint for real-time text-to-speech"""
|
||||
connection_id = f"tts_{user_id}_{language}_{voice}"
|
||||
connectionId = f"tts_{userId}_{language}_{voice}"
|
||||
|
||||
try:
|
||||
await manager.connect(websocket, connection_id)
|
||||
await manager.connect(websocket, connectionId)
|
||||
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "connected",
|
||||
"connection_id": connection_id,
|
||||
"connection_id": connectionId,
|
||||
"message": "Connected to text-to-speech"
|
||||
}, websocket)
|
||||
|
||||
|
|
@ -643,7 +643,7 @@ async def websocket_text_to_speech(
|
|||
# This would integrate with Google Cloud Text-to-Speech API
|
||||
|
||||
# For now, send a placeholder response
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "audio_data",
|
||||
"audio": "base64_encoded_audio_here", # Placeholder
|
||||
"format": "mp3"
|
||||
|
|
@ -651,19 +651,19 @@ async def websocket_text_to_speech(
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing text-to-speech: {e}")
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "error",
|
||||
"error": f"Failed to process text: {str(e)}"
|
||||
}, websocket)
|
||||
|
||||
elif message["type"] == "ping":
|
||||
await manager.send_personal_message({
|
||||
await manager.sendPersonalMessage({
|
||||
"type": "pong",
|
||||
"timestamp": message.get("timestamp")
|
||||
}, websocket)
|
||||
|
||||
except WebSocketDisconnect:
|
||||
manager.disconnect(websocket, connection_id)
|
||||
manager.disconnect(websocket, connectionId)
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket error: {e}")
|
||||
manager.disconnect(websocket, connection_id)
|
||||
manager.disconnect(websocket, connectionId)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from fastapi import Response
|
|||
from jose import jwt
|
||||
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.timezoneUtils import get_utc_now
|
||||
from modules.shared.timezoneUtils import getUtcNow
|
||||
|
||||
# Config
|
||||
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
|
||||
|
|
@ -30,7 +30,7 @@ def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> T
|
|||
import uuid
|
||||
toEncode["jti"] = str(uuid.uuid4())
|
||||
|
||||
expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
|
||||
expire = getUtcNow() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
|
||||
toEncode.update({"exp": expire})
|
||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||
return encodedJwt, expire
|
||||
|
|
@ -44,7 +44,7 @@ def createRefreshToken(data: dict) -> Tuple[str, "datetime"]:
|
|||
toEncode["jti"] = str(uuid.uuid4())
|
||||
toEncode["type"] = "refresh"
|
||||
|
||||
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
||||
expire = getUtcNow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
||||
toEncode.update({"exp": expire})
|
||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||
return encodedJwt, expire
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from typing import Optional, Dict, Any, Callable
|
|||
from modules.datamodels.datamodelSecurity import Token
|
||||
from modules.datamodels.datamodelUam import AuthAuthority
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp, createExpirationTimestamp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -27,54 +27,54 @@ class TokenManager:
|
|||
self.google_client_id = APP_CONFIG.get("Service_GOOGLE_CLIENT_ID")
|
||||
self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
|
||||
|
||||
def refresh_microsoft_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]:
|
||||
def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
|
||||
"""Refresh Microsoft OAuth token using refresh token"""
|
||||
try:
|
||||
logger.debug(f"refresh_microsoft_token: Starting Microsoft token refresh for user {user_id}")
|
||||
logger.debug(f"refresh_microsoft_token: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}")
|
||||
logger.debug(f"refreshMicrosoftToken: Starting Microsoft token refresh for user {userId}")
|
||||
logger.debug(f"refreshMicrosoftToken: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}")
|
||||
|
||||
if not self.msft_client_id or not self.msft_client_secret:
|
||||
logger.error("Microsoft OAuth configuration not found")
|
||||
return None
|
||||
|
||||
# Microsoft token refresh endpoint
|
||||
token_url = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token"
|
||||
logger.debug(f"refresh_microsoft_token: Using token URL: {token_url}")
|
||||
tokenUrl = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token"
|
||||
logger.debug(f"refreshMicrosoftToken: Using token URL: {tokenUrl}")
|
||||
|
||||
# Prepare refresh request
|
||||
data = {
|
||||
"client_id": self.msft_client_id,
|
||||
"client_secret": self.msft_client_secret,
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"refresh_token": refreshToken,
|
||||
"scope": "Mail.ReadWrite Mail.Send Mail.ReadWrite.Shared User.Read"
|
||||
}
|
||||
logger.debug(f"refresh_microsoft_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})")
|
||||
logger.debug(f"refreshMicrosoftToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
|
||||
|
||||
# Make refresh request
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
logger.debug(f"refresh_microsoft_token: Making HTTP request to Microsoft OAuth endpoint")
|
||||
response = client.post(token_url, data=data)
|
||||
logger.debug(f"refresh_microsoft_token: HTTP response status: {response.status_code}")
|
||||
logger.debug(f"refreshMicrosoftToken: Making HTTP request to Microsoft OAuth endpoint")
|
||||
response = client.post(tokenUrl, data=data)
|
||||
logger.debug(f"refreshMicrosoftToken: HTTP response status: {response.status_code}")
|
||||
|
||||
if response.status_code == 200:
|
||||
token_data = response.json()
|
||||
logger.debug(f"refresh_microsoft_token: Token refresh successful, creating new token")
|
||||
tokenData = response.json()
|
||||
logger.debug(f"refreshMicrosoftToken: Token refresh successful, creating new token")
|
||||
|
||||
# Create new token
|
||||
new_token = Token(
|
||||
userId=user_id,
|
||||
newToken = Token(
|
||||
userId=userId,
|
||||
authority=AuthAuthority.MSFT,
|
||||
connectionId=old_token.connectionId, # Preserve connection ID
|
||||
tokenAccess=token_data["access_token"],
|
||||
tokenRefresh=token_data.get("refresh_token", refresh_token), # Keep old refresh token if new one not provided
|
||||
tokenType=token_data.get("token_type", "bearer"),
|
||||
expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)),
|
||||
createdAt=get_utc_timestamp()
|
||||
connectionId=oldToken.connectionId, # Preserve connection ID
|
||||
tokenAccess=tokenData["access_token"],
|
||||
tokenRefresh=tokenData.get("refresh_token", refreshToken), # Keep old refresh token if new one not provided
|
||||
tokenType=tokenData.get("token_type", "bearer"),
|
||||
expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
|
||||
createdAt=getUtcTimestamp()
|
||||
)
|
||||
|
||||
logger.debug(f"refresh_microsoft_token: New token created with ID: {new_token.id}")
|
||||
return new_token
|
||||
logger.debug(f"refreshMicrosoftToken: New token created with ID: {newToken.id}")
|
||||
return newToken
|
||||
else:
|
||||
logger.error(f"Failed to refresh Microsoft token: {response.status_code} - {response.text}")
|
||||
return None
|
||||
|
|
@ -83,70 +83,70 @@ class TokenManager:
|
|||
logger.error(f"Error refreshing Microsoft token: {str(e)}")
|
||||
return None
|
||||
|
||||
def refresh_google_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]:
|
||||
def refreshGoogleToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
|
||||
"""Refresh Google OAuth token using refresh token"""
|
||||
try:
|
||||
logger.debug(f"refresh_google_token: Starting Google token refresh for user {user_id}")
|
||||
logger.debug(f"refresh_google_token: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}")
|
||||
logger.debug(f"refreshGoogleToken: Starting Google token refresh for user {userId}")
|
||||
logger.debug(f"refreshGoogleToken: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}")
|
||||
|
||||
if not self.google_client_id or not self.google_client_secret:
|
||||
logger.error("Google OAuth configuration not found")
|
||||
return None
|
||||
|
||||
# Google token refresh endpoint
|
||||
token_url = "https://oauth2.googleapis.com/token"
|
||||
logger.debug(f"refresh_google_token: Using token URL: {token_url}")
|
||||
tokenUrl = "https://oauth2.googleapis.com/token"
|
||||
logger.debug(f"refreshGoogleToken: Using token URL: {tokenUrl}")
|
||||
|
||||
# Prepare refresh request
|
||||
data = {
|
||||
"client_id": self.google_client_id,
|
||||
"client_secret": self.google_client_secret,
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token
|
||||
"refresh_token": refreshToken
|
||||
}
|
||||
logger.debug(f"refresh_google_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})")
|
||||
logger.debug(f"refreshGoogleToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
|
||||
|
||||
# Make refresh request
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
logger.debug(f"refresh_google_token: Making HTTP request to Google OAuth endpoint")
|
||||
response = client.post(token_url, data=data)
|
||||
logger.debug(f"refresh_google_token: HTTP response status: {response.status_code}")
|
||||
logger.debug(f"refreshGoogleToken: Making HTTP request to Google OAuth endpoint")
|
||||
response = client.post(tokenUrl, data=data)
|
||||
logger.debug(f"refreshGoogleToken: HTTP response status: {response.status_code}")
|
||||
|
||||
if response.status_code == 200:
|
||||
token_data = response.json()
|
||||
logger.debug(f"refresh_google_token: Token refresh successful, creating new token")
|
||||
tokenData = response.json()
|
||||
logger.debug(f"refreshGoogleToken: Token refresh successful, creating new token")
|
||||
|
||||
# Validate the response contains required fields
|
||||
if "access_token" not in token_data:
|
||||
if "access_token" not in tokenData:
|
||||
logger.error("Google token refresh response missing access_token")
|
||||
return None
|
||||
|
||||
# Create new token
|
||||
new_token = Token(
|
||||
userId=user_id,
|
||||
newToken = Token(
|
||||
userId=userId,
|
||||
authority=AuthAuthority.GOOGLE,
|
||||
connectionId=old_token.connectionId, # Preserve connection ID
|
||||
tokenAccess=token_data["access_token"],
|
||||
tokenRefresh=token_data.get("refresh_token", refresh_token), # Use new refresh token if provided
|
||||
tokenType=token_data.get("token_type", "bearer"),
|
||||
expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)),
|
||||
createdAt=get_utc_timestamp()
|
||||
connectionId=oldToken.connectionId, # Preserve connection ID
|
||||
tokenAccess=tokenData["access_token"],
|
||||
tokenRefresh=tokenData.get("refresh_token", refreshToken), # Use new refresh token if provided
|
||||
tokenType=tokenData.get("token_type", "bearer"),
|
||||
expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
|
||||
createdAt=getUtcTimestamp()
|
||||
)
|
||||
|
||||
logger.debug(f"refresh_google_token: New token created with ID: {new_token.id}")
|
||||
return new_token
|
||||
logger.debug(f"refreshGoogleToken: New token created with ID: {newToken.id}")
|
||||
return newToken
|
||||
else:
|
||||
error_details = response.text
|
||||
logger.error(f"Failed to refresh Google token: {response.status_code} - {error_details}")
|
||||
errorDetails = response.text
|
||||
logger.error(f"Failed to refresh Google token: {response.status_code} - {errorDetails}")
|
||||
|
||||
# Handle specific error cases
|
||||
if response.status_code == 400:
|
||||
try:
|
||||
error_data = response.json()
|
||||
error_code = error_data.get("error")
|
||||
if error_code == "invalid_grant":
|
||||
errorData = response.json()
|
||||
errorCode = errorData.get("error")
|
||||
if errorCode == "invalid_grant":
|
||||
logger.warning("Google refresh token is invalid or expired - user needs to re-authenticate")
|
||||
elif error_code == "invalid_client":
|
||||
elif errorCode == "invalid_client":
|
||||
logger.error("Google OAuth client configuration is invalid")
|
||||
except:
|
||||
pass
|
||||
|
|
@ -157,55 +157,55 @@ class TokenManager:
|
|||
logger.error(f"Error refreshing Google token: {str(e)}")
|
||||
return None
|
||||
|
||||
def refresh_token(self, old_token: Token) -> Optional[Token]:
|
||||
def refreshToken(self, oldToken: Token) -> Optional[Token]:
|
||||
"""Refresh an expired token using the appropriate OAuth service"""
|
||||
try:
|
||||
logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}")
|
||||
logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}")
|
||||
logger.debug(f"refreshToken: Starting refresh for token {oldToken.id}, authority: {oldToken.authority}")
|
||||
logger.debug(f"refreshToken: Token details: userId={oldToken.userId}, connectionId={oldToken.connectionId}, hasRefreshToken={bool(oldToken.tokenRefresh)}")
|
||||
|
||||
# Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly
|
||||
# Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed
|
||||
try:
|
||||
now_ts = get_utc_timestamp()
|
||||
created_ts = float(old_token.createdAt) if old_token.createdAt is not None else 0.0
|
||||
seconds_since_last_refresh = now_ts - created_ts
|
||||
if seconds_since_last_refresh < 10 * 60:
|
||||
nowTs = getUtcTimestamp()
|
||||
createdTs = float(oldToken.createdAt) if oldToken.createdAt is not None else 0.0
|
||||
secondsSinceLastRefresh = nowTs - createdTs
|
||||
if secondsSinceLastRefresh < 10 * 60:
|
||||
logger.info(
|
||||
f"refresh_token: Skipping refresh for connection {old_token.connectionId} due to cooldown. "
|
||||
f"Last refresh {int(seconds_since_last_refresh)}s ago (< 600s)."
|
||||
f"refreshToken: Skipping refresh for connection {oldToken.connectionId} due to cooldown. "
|
||||
f"Last refresh {int(secondsSinceLastRefresh)}s ago (< 600s)."
|
||||
)
|
||||
# Return the existing token to avoid caller errors while preventing provider rate limits
|
||||
return old_token
|
||||
return oldToken
|
||||
except Exception:
|
||||
# If any issue reading timestamps, proceed with normal refresh to be safe
|
||||
pass
|
||||
|
||||
if not old_token.tokenRefresh:
|
||||
logger.warning(f"No refresh token available for {old_token.authority}")
|
||||
if not oldToken.tokenRefresh:
|
||||
logger.warning(f"No refresh token available for {oldToken.authority}")
|
||||
return None
|
||||
|
||||
# Route to appropriate refresh method
|
||||
if old_token.authority == AuthAuthority.MSFT:
|
||||
logger.debug(f"refresh_token: Refreshing Microsoft token")
|
||||
return self.refresh_microsoft_token(old_token.tokenRefresh, old_token.userId, old_token)
|
||||
elif old_token.authority == AuthAuthority.GOOGLE:
|
||||
logger.debug(f"refresh_token: Refreshing Google token")
|
||||
return self.refresh_google_token(old_token.tokenRefresh, old_token.userId, old_token)
|
||||
if oldToken.authority == AuthAuthority.MSFT:
|
||||
logger.debug(f"refreshToken: Refreshing Microsoft token")
|
||||
return self.refreshMicrosoftToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
|
||||
elif oldToken.authority == AuthAuthority.GOOGLE:
|
||||
logger.debug(f"refreshToken: Refreshing Google token")
|
||||
return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
|
||||
else:
|
||||
logger.warning(f"Unknown authority for token refresh: {old_token.authority}")
|
||||
logger.warning(f"Unknown authority for token refresh: {oldToken.authority}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error refreshing token: {str(e)}")
|
||||
return None
|
||||
|
||||
def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
|
||||
def ensureFreshToken(self, token: Token, *, secondsBeforeExpiry: int = 30 * 60, saveCallback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
|
||||
"""Ensure a token is fresh; refresh if expiring within threshold.
|
||||
|
||||
Args:
|
||||
token: Existing token to validate/refresh.
|
||||
seconds_before_expiry: Threshold window to proactively refresh.
|
||||
save_callback: Optional function to persist a refreshed token.
|
||||
secondsBeforeExpiry: Threshold window to proactively refresh.
|
||||
saveCallback: Optional function to persist a refreshed token.
|
||||
|
||||
Returns:
|
||||
A fresh token (refreshed or original) or None if refresh failed.
|
||||
|
|
@ -214,31 +214,31 @@ class TokenManager:
|
|||
if token is None:
|
||||
return None
|
||||
|
||||
now_ts = get_utc_timestamp()
|
||||
expires_at = token.expiresAt or 0
|
||||
nowTs = getUtcTimestamp()
|
||||
expiresAt = token.expiresAt or 0
|
||||
|
||||
# If token expires within the threshold, try to refresh
|
||||
if expires_at and expires_at < (now_ts + seconds_before_expiry):
|
||||
if expiresAt and expiresAt < (nowTs + secondsBeforeExpiry):
|
||||
logger.info(
|
||||
f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon "
|
||||
f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh."
|
||||
f"ensureFreshToken: Token for connection {token.connectionId} expiring soon "
|
||||
f"(in {max(0, expiresAt - nowTs)}s). Attempting proactive refresh."
|
||||
)
|
||||
refreshed = self.refresh_token(token)
|
||||
refreshed = self.refreshToken(token)
|
||||
if refreshed:
|
||||
if save_callback is not None:
|
||||
if saveCallback is not None:
|
||||
try:
|
||||
save_callback(refreshed)
|
||||
saveCallback(refreshed)
|
||||
except Exception as e:
|
||||
logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}")
|
||||
logger.warning(f"ensureFreshToken: Failed to persist refreshed token: {e}")
|
||||
return refreshed
|
||||
else:
|
||||
logger.warning("ensure_fresh_token: Token refresh failed")
|
||||
logger.warning("ensureFreshToken: Token refresh failed")
|
||||
return None
|
||||
|
||||
# Token is sufficiently fresh
|
||||
return token
|
||||
except Exception as e:
|
||||
logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}")
|
||||
logger.error(f"ensureFreshToken: Error ensuring fresh token: {e}")
|
||||
return None
|
||||
|
||||
# Convenience wrapper to fetch and ensure fresh token for a connection via interface layer
|
||||
|
|
@ -256,10 +256,10 @@ class TokenManager:
|
|||
token = interfaceDbApp.getConnectionToken(connectionId)
|
||||
if not token:
|
||||
return None
|
||||
return self.ensure_fresh_token(
|
||||
return self.ensureFreshToken(
|
||||
token,
|
||||
seconds_before_expiry=secondsBeforeExpiry,
|
||||
save_callback=lambda t: interfaceDbApp.saveConnectionToken(t)
|
||||
secondsBeforeExpiry=secondsBeforeExpiry,
|
||||
saveCallback=lambda t: interfaceDbApp.saveConnectionToken(t)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
|
|||
from typing import Callable
|
||||
import asyncio
|
||||
from modules.security.tokenRefreshService import token_refresh_service
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -135,7 +135,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
|
|||
try:
|
||||
# Perform proactive refresh in background
|
||||
asyncio.create_task(self._proactive_refresh_tokens(user_id))
|
||||
self.last_check[user_id] = get_utc_timestamp()
|
||||
self.last_check[user_id] = getUtcTimestamp()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error scheduling proactive refresh: {str(e)}")
|
||||
|
|
@ -160,7 +160,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
|
|||
Check if we should perform proactive refresh for this user
|
||||
"""
|
||||
try:
|
||||
current_time = get_utc_timestamp()
|
||||
current_time = getUtcTimestamp()
|
||||
last_check = self.last_check.get(user_id, 0)
|
||||
|
||||
# Check every 5 minutes
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ to ensure users don't experience token expiration issues.
|
|||
import logging
|
||||
from typing import Dict, Any
|
||||
from modules.datamodels.datamodelUam import UserConnection, AuthAuthority
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -24,7 +24,7 @@ class TokenRefreshService:
|
|||
|
||||
def _is_rate_limited(self, connection_id: str) -> bool:
|
||||
"""Check if connection is rate limited for refresh attempts"""
|
||||
now = get_utc_timestamp()
|
||||
now = getUtcTimestamp()
|
||||
if connection_id not in self.rate_limit_map:
|
||||
return False
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ class TokenRefreshService:
|
|||
|
||||
def _record_refresh_attempt(self, connection_id: str) -> None:
|
||||
"""Record a refresh attempt for rate limiting"""
|
||||
now = get_utc_timestamp()
|
||||
now = getUtcTimestamp()
|
||||
if connection_id not in self.rate_limit_map:
|
||||
self.rate_limit_map[connection_id] = []
|
||||
self.rate_limit_map[connection_id].append(now)
|
||||
|
|
@ -60,14 +60,14 @@ class TokenRefreshService:
|
|||
token_manager = TokenManager()
|
||||
|
||||
# Attempt to refresh the token
|
||||
refreshed_token = token_manager.refresh_token(current_token)
|
||||
if refreshed_token:
|
||||
refreshedToken = token_manager.refreshToken(current_token)
|
||||
if refreshedToken:
|
||||
# Save the refreshed token
|
||||
interface.saveConnectionToken(refreshed_token)
|
||||
interface.saveConnectionToken(refreshedToken)
|
||||
|
||||
# Update connection status
|
||||
interface.db.recordModify(UserConnection, connection.id, {
|
||||
"lastChecked": get_utc_timestamp(),
|
||||
"lastChecked": getUtcTimestamp(),
|
||||
"expiresAt": refreshed_token.expiresAt
|
||||
})
|
||||
|
||||
|
|
@ -75,9 +75,9 @@ class TokenRefreshService:
|
|||
|
||||
# Log audit event
|
||||
try:
|
||||
audit_logger.log_security_event(
|
||||
user_id=str(connection.userId),
|
||||
mandate_id="system",
|
||||
audit_logger.logSecurityEvent(
|
||||
userId=str(connection.userId),
|
||||
mandateId="system",
|
||||
action="token_refresh",
|
||||
details=f"Google token refreshed for connection {connection.id}"
|
||||
)
|
||||
|
|
@ -109,14 +109,14 @@ class TokenRefreshService:
|
|||
token_manager = TokenManager()
|
||||
|
||||
# Attempt to refresh the token
|
||||
refreshed_token = token_manager.refresh_token(current_token)
|
||||
if refreshed_token:
|
||||
refreshedToken = token_manager.refreshToken(current_token)
|
||||
if refreshedToken:
|
||||
# Save the refreshed token
|
||||
interface.saveConnectionToken(refreshed_token)
|
||||
interface.saveConnectionToken(refreshedToken)
|
||||
|
||||
# Update connection status
|
||||
interface.db.recordModify(UserConnection, connection.id, {
|
||||
"lastChecked": get_utc_timestamp(),
|
||||
"lastChecked": getUtcTimestamp(),
|
||||
"expiresAt": refreshed_token.expiresAt
|
||||
})
|
||||
|
||||
|
|
@ -124,9 +124,9 @@ class TokenRefreshService:
|
|||
|
||||
# Log audit event
|
||||
try:
|
||||
audit_logger.log_security_event(
|
||||
user_id=str(connection.userId),
|
||||
mandate_id="system",
|
||||
audit_logger.logSecurityEvent(
|
||||
userId=str(connection.userId),
|
||||
mandateId="system",
|
||||
action="token_refresh",
|
||||
details=f"Microsoft token refreshed for connection {connection.id}"
|
||||
)
|
||||
|
|
@ -234,7 +234,7 @@ class TokenRefreshService:
|
|||
refreshed_count = 0
|
||||
failed_count = 0
|
||||
rate_limited_count = 0
|
||||
current_time = get_utc_timestamp()
|
||||
current_time = getUtcTimestamp()
|
||||
five_minutes = 5 * 60 # 5 minutes in seconds
|
||||
|
||||
# Process each connection
|
||||
|
|
|
|||
|
|
@ -11,18 +11,18 @@ class PublicService:
|
|||
- Optional name_filter predicate for allow-list patterns
|
||||
"""
|
||||
|
||||
def __init__(self, target: Any, functions_only: bool = True, name_filter=None):
|
||||
def __init__(self, target: Any, functionsOnly: bool = True, nameFilter=None):
|
||||
self._target = target
|
||||
self._functions_only = functions_only
|
||||
self._name_filter = name_filter
|
||||
self._functionsOnly = functionsOnly
|
||||
self._nameFilter = nameFilter
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
if name.startswith('_'):
|
||||
raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private")
|
||||
if self._name_filter and not self._name_filter(name):
|
||||
if self._nameFilter and not self._nameFilter(name):
|
||||
raise AttributeError(f"'{name}' not exposed by policy")
|
||||
attr = getattr(self._target, name)
|
||||
if self._functions_only and not callable(attr):
|
||||
if self._functionsOnly and not callable(attr):
|
||||
raise AttributeError(f"'{name}' is not a function")
|
||||
return attr
|
||||
|
||||
|
|
@ -30,8 +30,8 @@ class PublicService:
|
|||
names = [
|
||||
n for n in dir(self._target)
|
||||
if not n.startswith('_')
|
||||
and (not self._functions_only or callable(getattr(self._target, n, None)))
|
||||
and (self._name_filter(n) if self._name_filter else True)
|
||||
and (not self._functionsOnly or callable(getattr(self._target, n, None)))
|
||||
and (self._nameFilter(n) if self._nameFilter else True)
|
||||
]
|
||||
return sorted(names)
|
||||
|
||||
|
|
@ -70,7 +70,7 @@ class Services:
|
|||
self.sharepoint = PublicService(SharepointService(self))
|
||||
|
||||
from .serviceAi.mainServiceAi import AiService
|
||||
self.ai = PublicService(AiService(self))
|
||||
self.ai = PublicService(AiService(self), functionsOnly=False)
|
||||
|
||||
from .serviceTicket.mainServiceTicket import TicketService
|
||||
self.ticket = PublicService(TicketService(self))
|
||||
|
|
|
|||
|
|
@ -1,30 +1,26 @@
|
|||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional, Tuple, Union
|
||||
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||
from modules.services.serviceAi.subCoreAi import SubCoreAi
|
||||
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
|
||||
from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration
|
||||
from modules.services.serviceAi.subSharedAiUtils import sanitizePromptContent
|
||||
|
||||
from modules.shared.jsonUtils import (
|
||||
extractJsonString,
|
||||
repairBrokenJson,
|
||||
extractSectionsFromDocument,
|
||||
buildContinuationContext
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Rebuild the model to resolve forward references
|
||||
AiCallRequest.model_rebuild()
|
||||
|
||||
class AiService:
|
||||
"""Lightweight AI service orchestrator that delegates to specialized sub-modules.
|
||||
|
||||
Manager delegates to specialized sub-modules:
|
||||
- SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls)
|
||||
- SubDocumentProcessing: Document chunking, processing, and merging logic
|
||||
- SubDocumentGeneration: Single-file and multi-file document generation
|
||||
|
||||
The main service acts as a coordinator:
|
||||
1. Manages lazy initialization of sub-modules
|
||||
2. Delegates operations to appropriate sub-modules
|
||||
3. Maintains the same public API for backward compatibility
|
||||
"""
|
||||
"""AI service with core operations integrated."""
|
||||
|
||||
def __init__(self, serviceCenter=None) -> None:
|
||||
"""Initialize AI service with service center access.
|
||||
|
|
@ -34,64 +30,638 @@ class AiService:
|
|||
"""
|
||||
self.services = serviceCenter
|
||||
# Only depend on interfaces
|
||||
self.aiObjects = None # Will be initialized in create()
|
||||
self._extractionService = None # Lazy initialization
|
||||
self._coreAi = None # Lazy initialization
|
||||
self._documentProcessor = None # Lazy initialization
|
||||
self._documentGenerator = None # Lazy initialization
|
||||
self.aiObjects = None # Will be initialized in create() or _ensureAiObjectsInitialized()
|
||||
# Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready
|
||||
self.extractionService = None
|
||||
self.documentProcessor = None
|
||||
|
||||
@property
|
||||
def extractionService(self):
|
||||
"""Lazy initialization of extraction service."""
|
||||
if self._extractionService is None:
|
||||
logger.info("Lazy initializing ExtractionService...")
|
||||
self._extractionService = ExtractionService(self.services)
|
||||
return self._extractionService
|
||||
|
||||
@property
|
||||
def coreAi(self):
|
||||
"""Lazy initialization of core AI service."""
|
||||
if self._coreAi is None:
|
||||
if self.aiObjects is None:
|
||||
raise RuntimeError("AiService.aiObjects must be initialized before accessing coreAi. Use await AiService.create() or await service._ensureAiObjectsInitialized()")
|
||||
logger.info("Lazy initializing SubCoreAi...")
|
||||
self._coreAi = SubCoreAi(self.services, self.aiObjects)
|
||||
return self._coreAi
|
||||
|
||||
@property
|
||||
def documentProcessor(self):
|
||||
"""Lazy initialization of document processing service."""
|
||||
if self._documentProcessor is None:
|
||||
logger.info("Lazy initializing SubDocumentProcessing...")
|
||||
self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
|
||||
return self._documentProcessor
|
||||
|
||||
|
||||
@property
|
||||
def documentGenerator(self):
|
||||
"""Lazy initialization of document generation service."""
|
||||
if self._documentGenerator is None:
|
||||
logger.info("Lazy initializing SubDocumentGeneration...")
|
||||
self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor)
|
||||
return self._documentGenerator
|
||||
def _initializeSubmodules(self):
|
||||
"""Initialize all submodules after aiObjects is ready."""
|
||||
if self.aiObjects is None:
|
||||
raise RuntimeError("aiObjects must be initialized before initializing submodules")
|
||||
|
||||
if self.extractionService is None:
|
||||
logger.info("Initializing ExtractionService...")
|
||||
self.extractionService = ExtractionService(self.services)
|
||||
|
||||
if self.documentProcessor is None:
|
||||
logger.info("Initializing SubDocumentProcessing...")
|
||||
self.documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
|
||||
|
||||
async def _ensureAiObjectsInitialized(self):
|
||||
"""Ensure aiObjects is initialized."""
|
||||
"""Ensure aiObjects is initialized and submodules are ready."""
|
||||
if self.aiObjects is None:
|
||||
logger.info("Lazy initializing AiObjects...")
|
||||
self.aiObjects = await AiObjects.create()
|
||||
logger.info("AiObjects initialization completed")
|
||||
# Initialize submodules after aiObjects is ready
|
||||
self._initializeSubmodules()
|
||||
|
||||
@classmethod
|
||||
async def create(cls, serviceCenter=None) -> "AiService":
|
||||
"""Create AiService instance with all connectors initialized."""
|
||||
"""Create AiService instance with all connectors and submodules initialized."""
|
||||
logger.info("AiService.create() called")
|
||||
instance = cls(serviceCenter)
|
||||
logger.info("AiService created, about to call AiObjects.create()...")
|
||||
instance.aiObjects = await AiObjects.create()
|
||||
logger.info("AiObjects.create() completed")
|
||||
# Initialize all submodules after aiObjects is ready
|
||||
instance._initializeSubmodules()
|
||||
logger.info("AiService submodules initialized")
|
||||
return instance
|
||||
|
||||
# Helper methods
|
||||
|
||||
def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
|
||||
"""
|
||||
Build full prompt by replacing placeholders with their content.
|
||||
Uses the new {{KEY:placeholder}} format.
|
||||
|
||||
Args:
|
||||
prompt: The base prompt template
|
||||
placeholders: Dictionary of placeholder key-value pairs
|
||||
|
||||
Returns:
|
||||
Prompt with placeholders replaced
|
||||
"""
|
||||
if not placeholders:
|
||||
return prompt
|
||||
|
||||
full_prompt = prompt
|
||||
for placeholder, content in placeholders.items():
|
||||
# Skip if content is None or empty
|
||||
if content is None:
|
||||
continue
|
||||
# Replace {{KEY:placeholder}}
|
||||
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
|
||||
|
||||
return full_prompt
|
||||
|
||||
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
|
||||
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
|
||||
try:
|
||||
# Get dynamic enum values from Pydantic models
|
||||
operationTypes = [e.value for e in OperationTypeEnum]
|
||||
priorities = [e.value for e in PriorityEnum]
|
||||
processingModes = [e.value for e in ProcessingModeEnum]
|
||||
|
||||
# Create analysis prompt for AI to determine operation type and parameters
|
||||
analysisPrompt = f"""
|
||||
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
|
||||
|
||||
PROMPT TO ANALYZE:
|
||||
{self.services.utils.sanitizePromptContent(prompt, 'userinput')}
|
||||
|
||||
Based on the prompt content, determine:
|
||||
1. operationType: Choose the most appropriate from: {', '.join(operationTypes)}
|
||||
2. priority: Choose from: {', '.join(priorities)}
|
||||
3. processingMode: Choose from: {', '.join(processingModes)}
|
||||
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
|
||||
5. compressContext: true/false (true to summarize context, false to process fully)
|
||||
|
||||
Respond with ONLY a JSON object in this exact format:
|
||||
{{
|
||||
"operationType": "dataAnalyse",
|
||||
"priority": "balanced",
|
||||
"processingMode": "basic",
|
||||
"compressPrompt": true,
|
||||
"compressContext": true
|
||||
}}
|
||||
"""
|
||||
|
||||
# Use AI to analyze the prompt
|
||||
request = AiCallRequest(
|
||||
prompt=analysisPrompt,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.SPEED,
|
||||
processingMode=ProcessingModeEnum.BASIC,
|
||||
compressPrompt=True,
|
||||
compressContext=False
|
||||
)
|
||||
)
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
|
||||
# Parse AI response
|
||||
try:
|
||||
jsonStart = response.content.find('{')
|
||||
jsonEnd = response.content.rfind('}') + 1
|
||||
if jsonStart != -1 and jsonEnd > jsonStart:
|
||||
analysis = json.loads(response.content[jsonStart:jsonEnd])
|
||||
|
||||
# Map string values to enums
|
||||
operationType = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
|
||||
priority = PriorityEnum(analysis.get('priority', 'balanced'))
|
||||
processingMode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
|
||||
|
||||
return AiCallOptions(
|
||||
operationType=operationType,
|
||||
priority=priority,
|
||||
processingMode=processingMode,
|
||||
compressPrompt=analysis.get('compressPrompt', True),
|
||||
compressContext=analysis.get('compressContext', True)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse AI analysis response: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Prompt analysis failed: {e}")
|
||||
|
||||
# Fallback to default options
|
||||
return AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.BASIC
|
||||
)
|
||||
|
||||
async def _callAiWithLooping(
|
||||
self,
|
||||
prompt: str,
|
||||
options: AiCallOptions,
|
||||
debugPrefix: str = "ai_call",
|
||||
promptBuilder: Optional[callable] = None,
|
||||
promptArgs: Optional[Dict[str, Any]] = None,
|
||||
operationId: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Shared core function for AI calls with repair-based looping system.
|
||||
Automatically repairs broken JSON and continues generation seamlessly.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send to AI
|
||||
options: AI call configuration options
|
||||
debugPrefix: Prefix for debug file names
|
||||
promptBuilder: Optional function to rebuild prompts for continuation
|
||||
promptArgs: Optional arguments for prompt builder
|
||||
operationId: Optional operation ID for progress tracking
|
||||
|
||||
Returns:
|
||||
Complete AI response after all iterations
|
||||
"""
|
||||
maxIterations = 50 # Prevent infinite loops
|
||||
iteration = 0
|
||||
allSections = [] # Accumulate all sections across iterations
|
||||
lastRawResponse = None # Store last raw JSON response for continuation
|
||||
|
||||
while iteration < maxIterations:
|
||||
iteration += 1
|
||||
|
||||
# Update progress for iteration start
|
||||
if operationId:
|
||||
if iteration == 1:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
|
||||
else:
|
||||
# For continuation iterations, show progress incrementally
|
||||
baseProgress = 0.5 + (min(iteration - 1, maxIterations) / maxIterations * 0.4) # Progress from 0.5 to 0.9 over maxIterations iterations
|
||||
self.services.workflow.progressLogUpdate(operationId, baseProgress, f"Continuing generation (iteration {iteration})")
|
||||
|
||||
# Build iteration prompt
|
||||
if len(allSections) > 0 and promptBuilder and promptArgs:
|
||||
# This is a continuation - build continuation context with raw JSON and rebuild prompt
|
||||
continuationContext = buildContinuationContext(allSections, lastRawResponse)
|
||||
if not lastRawResponse:
|
||||
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
|
||||
|
||||
# Rebuild prompt with continuation context using the provided prompt builder
|
||||
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
|
||||
else:
|
||||
# First iteration - use original prompt
|
||||
iterationPrompt = prompt
|
||||
|
||||
# Make AI call
|
||||
try:
|
||||
if operationId and iteration == 1:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
|
||||
request = AiCallRequest(
|
||||
prompt=iterationPrompt,
|
||||
context="",
|
||||
options=options
|
||||
)
|
||||
|
||||
# Write the ACTUAL prompt sent to AI
|
||||
if iteration == 1:
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
|
||||
else:
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
result = response.content
|
||||
|
||||
# Update progress after AI call
|
||||
if operationId:
|
||||
if iteration == 1:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
|
||||
else:
|
||||
progress = 0.6 + (min(iteration - 1, 10) * 0.03)
|
||||
self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
|
||||
|
||||
# Write raw AI response to debug file
|
||||
if iteration == 1:
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
||||
else:
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
||||
|
||||
# Emit stats for this iteration
|
||||
self.services.workflow.storeWorkflowStat(
|
||||
self.services.currentWorkflow,
|
||||
response,
|
||||
f"ai.call.{debugPrefix}.iteration_{iteration}"
|
||||
)
|
||||
|
||||
if not result or not result.strip():
|
||||
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
||||
break
|
||||
|
||||
# Store raw response for continuation (even if broken)
|
||||
lastRawResponse = result
|
||||
|
||||
# Check for complete_response flag in raw response (before parsing)
|
||||
import re
|
||||
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
|
||||
pass # Flag detected, will stop in _shouldContinueGeneration
|
||||
|
||||
# Extract sections from response (handles both valid and broken JSON)
|
||||
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
||||
|
||||
# Update progress after parsing
|
||||
if operationId:
|
||||
if extractedSections:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
|
||||
|
||||
if not extractedSections:
|
||||
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
|
||||
if iteration > 1 and not wasJsonComplete:
|
||||
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
|
||||
continue
|
||||
# Otherwise, stop if no sections
|
||||
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
|
||||
break
|
||||
|
||||
# Add new sections to accumulator
|
||||
allSections.extend(extractedSections)
|
||||
|
||||
# Check if we should continue (completion detection)
|
||||
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
|
||||
continue
|
||||
else:
|
||||
# Done - build final result
|
||||
if operationId:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
|
||||
break
|
||||
|
||||
if iteration >= maxIterations:
|
||||
logger.warning(f"AI call stopped after maximum iterations ({maxIterations})")
|
||||
|
||||
# Build final result from accumulated sections
|
||||
final_result = self._buildFinalResultFromSections(allSections)
|
||||
|
||||
# Write final result to debug file
|
||||
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
|
||||
|
||||
return final_result
|
||||
|
||||
def _extractSectionsFromResponse(
|
||||
self,
|
||||
result: str,
|
||||
iteration: int,
|
||||
debugPrefix: str
|
||||
) -> Tuple[List[Dict[str, Any]], bool]:
|
||||
"""
|
||||
Extract sections from AI response, handling both valid and broken JSON.
|
||||
Uses repair mechanism for broken JSON.
|
||||
Checks for "complete_response": true flag to determine completion.
|
||||
Returns (sections, wasJsonComplete)
|
||||
"""
|
||||
# First, try to parse as valid JSON
|
||||
try:
|
||||
extracted = extractJsonString(result)
|
||||
parsed_result = json.loads(extracted)
|
||||
|
||||
# Check if AI marked response as complete
|
||||
isComplete = parsed_result.get("complete_response", False) == True
|
||||
|
||||
# Extract sections from parsed JSON
|
||||
sections = extractSectionsFromDocument(parsed_result)
|
||||
|
||||
# If AI marked as complete, always return as complete
|
||||
if isComplete:
|
||||
return sections, True
|
||||
|
||||
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
|
||||
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
|
||||
if len(sections) == 0 and iteration > 1:
|
||||
return sections, False # Mark as incomplete so loop continues
|
||||
|
||||
# First iteration with 0 sections means empty response - stop
|
||||
if len(sections) == 0:
|
||||
return sections, True # Complete but empty
|
||||
|
||||
return sections, True # JSON was complete with sections
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
# Broken JSON - try repair mechanism (normal in iterative generation)
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
|
||||
|
||||
# Try to repair
|
||||
repaired_json = repairBrokenJson(result)
|
||||
|
||||
if repaired_json:
|
||||
# Extract sections from repaired JSON
|
||||
sections = extractSectionsFromDocument(repaired_json)
|
||||
return sections, False # JSON was broken but repaired
|
||||
else:
|
||||
# Repair failed - log error
|
||||
logger.error(f"Iteration {iteration}: All repair strategies failed")
|
||||
return [], False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
|
||||
return [], False
|
||||
|
||||
def _shouldContinueGeneration(
|
||||
self,
|
||||
allSections: List[Dict[str, Any]],
|
||||
iteration: int,
|
||||
wasJsonComplete: bool,
|
||||
rawResponse: str = None
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if generation should continue based on JSON completeness and complete_response flag.
|
||||
Returns True if we should continue, False if done.
|
||||
"""
|
||||
if len(allSections) == 0:
|
||||
return True # No sections yet, continue
|
||||
|
||||
# Check for complete_response flag in raw response
|
||||
if rawResponse:
|
||||
import re
|
||||
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
|
||||
return False
|
||||
|
||||
# If JSON was complete (and no complete_response flag), we're done
|
||||
# If JSON was broken and repaired, continue to get more content
|
||||
if wasJsonComplete:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def _buildFinalResultFromSections(
|
||||
self,
|
||||
allSections: List[Dict[str, Any]]
|
||||
) -> str:
|
||||
"""
|
||||
Build final JSON result from accumulated sections.
|
||||
"""
|
||||
if not allSections:
|
||||
return ""
|
||||
|
||||
# Build documents structure
|
||||
# Assuming single document for now
|
||||
documents = [{
|
||||
"id": "doc_1",
|
||||
"title": "Generated Document", # This should come from prompt
|
||||
"filename": "document.json",
|
||||
"sections": allSections
|
||||
}]
|
||||
|
||||
result = {
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": documents
|
||||
}
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
# Public API Methods
|
||||
|
||||
# Planning AI Call
|
||||
async def callAiPlanning(
|
||||
self,
|
||||
prompt: str,
|
||||
placeholders: Optional[List[PromptPlaceholder]] = None
|
||||
) -> str:
|
||||
"""
|
||||
Planning AI call for task planning, action planning, action selection, etc.
|
||||
Always uses static parameters optimized for planning tasks.
|
||||
|
||||
Args:
|
||||
prompt: The planning prompt
|
||||
placeholders: Optional list of placeholder replacements
|
||||
|
||||
Returns:
|
||||
Planning JSON response
|
||||
"""
|
||||
await self._ensureAiObjectsInitialized()
|
||||
|
||||
# Planning calls always use static parameters
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.PLAN,
|
||||
priority=PriorityEnum.QUALITY,
|
||||
processingMode=ProcessingModeEnum.DETAILED,
|
||||
compressPrompt=False,
|
||||
compressContext=False
|
||||
)
|
||||
|
||||
# Build full prompt with placeholders
|
||||
if placeholders:
|
||||
placeholdersDict = {p.label: p.content for p in placeholders}
|
||||
fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict)
|
||||
else:
|
||||
fullPrompt = prompt
|
||||
|
||||
# Root-cause fix: planning must return raw single-shot JSON, not section-based output
|
||||
request = AiCallRequest(
|
||||
prompt=fullPrompt,
|
||||
context="",
|
||||
options=options
|
||||
)
|
||||
|
||||
# Debug: persist prompt/response for analysis
|
||||
self.services.utils.writeDebugFile(fullPrompt, "plan_prompt")
|
||||
response = await self.aiObjects.call(request)
|
||||
result = response.content or ""
|
||||
self.services.utils.writeDebugFile(result, "plan_response")
|
||||
return result
|
||||
|
||||
# Document Generation AI Call
|
||||
async def callAiDocuments(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: Optional[List[ChatDocument]] = None,
|
||||
options: Optional[AiCallOptions] = None,
|
||||
outputFormat: Optional[str] = None,
|
||||
title: Optional[str] = None
|
||||
) -> Union[str, Dict[str, Any]]:
|
||||
"""
|
||||
Document generation AI call for all non-planning calls.
|
||||
Uses the current unified path with extraction and generation.
|
||||
|
||||
Args:
|
||||
prompt: The main prompt for the AI call
|
||||
documents: Optional list of documents to process
|
||||
options: AI call configuration options
|
||||
outputFormat: Optional output format for document generation
|
||||
title: Optional title for generated documents
|
||||
|
||||
Returns:
|
||||
AI response as string, or dict with documents if outputFormat is specified
|
||||
"""
|
||||
await self._ensureAiObjectsInitialized()
|
||||
|
||||
# Create separate operationId for detailed progress tracking
|
||||
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
|
||||
aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking for this operation
|
||||
self.services.workflow.progressLogStart(
|
||||
aiOperationId,
|
||||
"AI call with documents",
|
||||
"Document Generation",
|
||||
f"Format: {outputFormat or 'text'}"
|
||||
)
|
||||
|
||||
try:
|
||||
# Ensure AI connectors are initialized before delegating to documentProcessor/generator
|
||||
if hasattr(self.services, 'ai') and hasattr(self.services.ai, '_ensureAiObjectsInitialized'):
|
||||
await self.services.ai._ensureAiObjectsInitialized()
|
||||
if options is None or (hasattr(options, 'operationType') and options.operationType is None):
|
||||
# Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
|
||||
options = await self._analyzePromptAndCreateOptions(prompt)
|
||||
|
||||
# Route image-generation requests directly to image pipeline to avoid JSON loop
|
||||
imgFormats = {"png", "jpg", "jpeg", "webp", "image", "base64"}
|
||||
opType = getattr(options, "operationType", None)
|
||||
fmt = (outputFormat or "").lower() if outputFormat else None
|
||||
isImageRequest = (opType == OperationTypeEnum.IMAGE_GENERATE) or (fmt in imgFormats)
|
||||
if isImageRequest:
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
|
||||
imageResponse = await self.generateImage(prompt, options=options)
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.9, "Image generated")
|
||||
self.services.workflow.progressLogFinish(aiOperationId, True)
|
||||
return imageResponse
|
||||
|
||||
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
|
||||
# Compressing would truncate the template structure and confuse the AI
|
||||
if outputFormat: # Document generation with structured output
|
||||
if not options:
|
||||
options = AiCallOptions()
|
||||
options.compressPrompt = False # JSON templates must NOT be truncated
|
||||
options.compressContext = False # Context also should not be compressed
|
||||
|
||||
# Handle document generation with specific output format using unified approach
|
||||
if outputFormat:
|
||||
# Use unified generation method for all document generation
|
||||
if documents and len(documents) > 0:
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
|
||||
extracted_content = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
|
||||
else:
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
|
||||
extracted_content = None
|
||||
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
|
||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||
# First call without continuation context
|
||||
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
|
||||
|
||||
# Prepare prompt builder arguments for continuation
|
||||
promptArgs = {
|
||||
"outputFormat": outputFormat,
|
||||
"userPrompt": prompt,
|
||||
"title": title,
|
||||
"extracted_content": extracted_content
|
||||
}
|
||||
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
|
||||
generated_json = await self._callAiWithLooping(
|
||||
generation_prompt,
|
||||
options,
|
||||
"document_generation",
|
||||
buildGenerationPrompt,
|
||||
promptArgs,
|
||||
aiOperationId
|
||||
)
|
||||
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
|
||||
# Parse the generated JSON (extract fenced/embedded JSON first)
|
||||
try:
|
||||
extracted_json = self.services.utils.jsonExtractString(generated_json)
|
||||
generated_data = json.loads(extracted_json)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse generated JSON: {str(e)}")
|
||||
logger.error(f"JSON content length: {len(generated_json)}")
|
||||
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
|
||||
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
|
||||
|
||||
# Write the problematic JSON to debug file
|
||||
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
|
||||
|
||||
self.services.workflow.progressLogFinish(aiOperationId, False)
|
||||
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
|
||||
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
|
||||
# Render to final format using the existing renderer
|
||||
try:
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
rendered_content, mime_type = await generationService.renderReport(
|
||||
generated_data, outputFormat, title or "Generated Document", prompt, self
|
||||
)
|
||||
|
||||
# Build result in the expected format
|
||||
result = {
|
||||
"success": True,
|
||||
"content": generated_data,
|
||||
"documents": [{
|
||||
"documentName": f"generated.{outputFormat}",
|
||||
"documentData": rendered_content,
|
||||
"mimeType": mime_type,
|
||||
"title": title or "Generated Document"
|
||||
}],
|
||||
"is_multi_file": False,
|
||||
"format": outputFormat,
|
||||
"title": title,
|
||||
"split_strategy": "single",
|
||||
"total_documents": 1,
|
||||
"processed_documents": 1
|
||||
}
|
||||
|
||||
# Log AI response for debugging
|
||||
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
|
||||
|
||||
self.services.workflow.progressLogFinish(aiOperationId, True)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering document: {str(e)}")
|
||||
self.services.workflow.progressLogFinish(aiOperationId, False)
|
||||
return {"success": False, "error": f"Rendering failed: {str(e)}"}
|
||||
|
||||
# Handle text calls (no output format specified)
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
|
||||
if documents:
|
||||
# Use document processing for text calls with documents
|
||||
result = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
|
||||
else:
|
||||
# Use shared core function for direct text calls
|
||||
result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
|
||||
|
||||
self.services.workflow.progressLogFinish(aiOperationId, True)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in callAiDocuments: {str(e)}")
|
||||
self.services.workflow.progressLogFinish(aiOperationId, False)
|
||||
raise
|
||||
|
||||
# AI Image Analysis
|
||||
async def readImage(
|
||||
self,
|
||||
|
|
@ -102,7 +672,64 @@ class AiService:
|
|||
) -> str:
|
||||
"""Call AI for image analysis using interface.call() with contentParts."""
|
||||
await self._ensureAiObjectsInitialized()
|
||||
return await self.coreAi.readImage(prompt, imageData, mimeType, options)
|
||||
|
||||
try:
|
||||
# Check if imageData is valid
|
||||
if not imageData:
|
||||
error_msg = "No image data provided"
|
||||
logger.error(f"Error in AI image analysis: {error_msg}")
|
||||
return f"Error: {error_msg}"
|
||||
|
||||
|
||||
# Always use IMAGE_ANALYSE operation type for image processing
|
||||
if options is None:
|
||||
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
|
||||
else:
|
||||
# Override the operation type to ensure image analysis
|
||||
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
|
||||
|
||||
# Create content parts with image data
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
import base64
|
||||
|
||||
# ContentPart.data must be a string - convert bytes to base64 if needed
|
||||
if isinstance(imageData, bytes):
|
||||
imageDataStr = base64.b64encode(imageData).decode('utf-8')
|
||||
else:
|
||||
# Already a base64 string
|
||||
imageDataStr = imageData
|
||||
|
||||
imagePart = ContentPart(
|
||||
id="image_0",
|
||||
parentId=None,
|
||||
label="Image",
|
||||
typeGroup="image",
|
||||
mimeType=mimeType or "image/jpeg",
|
||||
data=imageDataStr, # Must be a string (base64 encoded)
|
||||
metadata={"imageAnalysis": True}
|
||||
)
|
||||
|
||||
# Create request with content parts
|
||||
request = AiCallRequest(
|
||||
prompt=prompt,
|
||||
context="",
|
||||
options=options,
|
||||
contentParts=[imagePart]
|
||||
)
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
result = response.content
|
||||
|
||||
# Check if result is valid
|
||||
if not result or (isinstance(result, str) and not result.strip()):
|
||||
error_msg = f"No response from AI image analysis (result: {repr(result)})"
|
||||
logger.error(f"Error in AI image analysis: {error_msg}")
|
||||
return f"Error: {error_msg}"
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI image analysis: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
# AI Image Generation
|
||||
async def generateImage(
|
||||
|
|
@ -115,34 +742,19 @@ class AiService:
|
|||
) -> Dict[str, Any]:
|
||||
"""Generate an image using AI using interface.generateImage()."""
|
||||
await self._ensureAiObjectsInitialized()
|
||||
return await self.coreAi.generateImage(prompt, size, quality, style, options)
|
||||
|
||||
|
||||
# Core AI Methods - Delegating to SubCoreAi
|
||||
async def callAiPlanning(
|
||||
self,
|
||||
prompt: str,
|
||||
placeholders: Optional[List[PromptPlaceholder]] = None
|
||||
) -> str:
|
||||
"""Planning AI call for task planning, action planning, action selection, etc."""
|
||||
await self._ensureAiObjectsInitialized()
|
||||
# Always use "json" for planning calls since they return JSON
|
||||
return await self.coreAi.callAiPlanning(prompt, placeholders)
|
||||
|
||||
async def callAiDocuments(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: Optional[List[ChatDocument]] = None,
|
||||
options: Optional[AiCallOptions] = None,
|
||||
outputFormat: Optional[str] = None,
|
||||
title: Optional[str] = None
|
||||
) -> Union[str, Dict[str, Any]]:
|
||||
"""Document generation AI call for all non-planning calls."""
|
||||
await self._ensureAiObjectsInitialized()
|
||||
return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title)
|
||||
|
||||
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
|
||||
"""Sanitize prompt content to prevent injection attacks and ensure safe presentation."""
|
||||
return sanitizePromptContent(content, contentType)
|
||||
|
||||
|
||||
try:
|
||||
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
|
||||
|
||||
# Emit stats for image generation
|
||||
self.services.workflow.storeWorkflowStat(
|
||||
self.services.currentWorkflow,
|
||||
response,
|
||||
f"ai.generate.image"
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI image generation: {str(e)}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,687 +0,0 @@
|
|||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Tuple, Union
|
||||
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
from modules.services.serviceAi.subSharedAiUtils import (
|
||||
buildPromptWithPlaceholders,
|
||||
extractTextFromContentParts,
|
||||
reduceText,
|
||||
determineCallType
|
||||
)
|
||||
from modules.shared.jsonUtils import (
|
||||
extractJsonString,
|
||||
repairBrokenJson,
|
||||
extractSectionsFromDocument,
|
||||
buildContinuationContext
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Repair-based looping system - no longer needs LOOP_INSTRUCTION_TEXT
|
||||
# Sections are accumulated and repair mechanism handles broken JSON automatically
|
||||
|
||||
# Rebuild the model to resolve forward references
|
||||
AiCallRequest.model_rebuild()
|
||||
|
||||
|
||||
class SubCoreAi:
|
||||
"""Core AI operations including image analysis, text generation, and planning calls."""
|
||||
|
||||
def __init__(self, services, aiObjects):
|
||||
"""Initialize core AI operations.
|
||||
|
||||
Args:
|
||||
services: Service center instance for accessing other services
|
||||
aiObjects: Initialized AiObjects instance
|
||||
"""
|
||||
self.services = services
|
||||
self.aiObjects = aiObjects
|
||||
|
||||
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
|
||||
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
|
||||
try:
|
||||
# Get dynamic enum values from Pydantic models
|
||||
operation_types = [e.value for e in OperationTypeEnum]
|
||||
priorities = [e.value for e in PriorityEnum]
|
||||
processing_modes = [e.value for e in ProcessingModeEnum]
|
||||
|
||||
# Create analysis prompt for AI to determine operation type and parameters
|
||||
analysisPrompt = f"""
|
||||
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
|
||||
|
||||
PROMPT TO ANALYZE:
|
||||
{self.services.ai.sanitizePromptContent(prompt, 'userinput')}
|
||||
|
||||
Based on the prompt content, determine:
|
||||
1. operationType: Choose the most appropriate from: {', '.join(operation_types)}
|
||||
2. priority: Choose from: {', '.join(priorities)}
|
||||
3. processingMode: Choose from: {', '.join(processing_modes)}
|
||||
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
|
||||
5. compressContext: true/false (true to summarize context, false to process fully)
|
||||
|
||||
Respond with ONLY a JSON object in this exact format:
|
||||
{{
|
||||
"operationType": "dataAnalyse",
|
||||
"priority": "balanced",
|
||||
"processingMode": "basic",
|
||||
"compressPrompt": true,
|
||||
"compressContext": true
|
||||
}}
|
||||
"""
|
||||
|
||||
# Use AI to analyze the prompt
|
||||
request = AiCallRequest(
|
||||
prompt=analysisPrompt,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.SPEED,
|
||||
processingMode=ProcessingModeEnum.BASIC,
|
||||
compressPrompt=True,
|
||||
compressContext=False
|
||||
)
|
||||
)
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
|
||||
# Parse AI response
|
||||
try:
|
||||
import json
|
||||
json_start = response.content.find('{')
|
||||
json_end = response.content.rfind('}') + 1
|
||||
if json_start != -1 and json_end > json_start:
|
||||
analysis = json.loads(response.content[json_start:json_end])
|
||||
|
||||
# Map string values to enums
|
||||
operation_type = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
|
||||
priority = PriorityEnum(analysis.get('priority', 'balanced'))
|
||||
processing_mode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
|
||||
|
||||
return AiCallOptions(
|
||||
operationType=operation_type,
|
||||
priority=priority,
|
||||
processingMode=processing_mode,
|
||||
compressPrompt=analysis.get('compressPrompt', True),
|
||||
compressContext=analysis.get('compressContext', True)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse AI analysis response: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Prompt analysis failed: {e}")
|
||||
|
||||
# Fallback to default options
|
||||
return AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.BASIC
|
||||
)
|
||||
|
||||
|
||||
|
||||
# Shared Core Function for AI Calls with Looping and Repair
|
||||
async def _callAiWithLooping(
|
||||
self,
|
||||
prompt: str,
|
||||
options: AiCallOptions,
|
||||
debugPrefix: str = "ai_call",
|
||||
promptBuilder: Optional[callable] = None,
|
||||
promptArgs: Optional[Dict[str, Any]] = None,
|
||||
operationId: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Shared core function for AI calls with repair-based looping system.
|
||||
Automatically repairs broken JSON and continues generation seamlessly.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send to AI
|
||||
options: AI call configuration options
|
||||
debugPrefix: Prefix for debug file names
|
||||
promptBuilder: Optional function to rebuild prompts for continuation
|
||||
promptArgs: Optional arguments for prompt builder
|
||||
operationId: Optional operation ID for progress tracking
|
||||
|
||||
Returns:
|
||||
Complete AI response after all iterations
|
||||
"""
|
||||
max_iterations = 50 # Prevent infinite loops
|
||||
iteration = 0
|
||||
allSections = [] # Accumulate all sections across iterations
|
||||
lastRawResponse = None # Store last raw JSON response for continuation
|
||||
|
||||
while iteration < max_iterations:
|
||||
iteration += 1
|
||||
|
||||
# Update progress for iteration start
|
||||
if operationId:
|
||||
if iteration == 1:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
|
||||
else:
|
||||
# For continuation iterations, show progress incrementally
|
||||
base_progress = 0.5 + (min(iteration - 1, max_iterations) / max_iterations * 0.4) # Progress from 0.5 to 0.9 over max_iterations iterations
|
||||
self.services.workflow.progressLogUpdate(operationId, base_progress, f"Continuing generation (iteration {iteration})")
|
||||
|
||||
# Build iteration prompt
|
||||
if len(allSections) > 0 and promptBuilder and promptArgs:
|
||||
# This is a continuation - build continuation context with raw JSON and rebuild prompt
|
||||
continuationContext = buildContinuationContext(allSections, lastRawResponse)
|
||||
if not lastRawResponse:
|
||||
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
|
||||
|
||||
# Rebuild prompt with continuation context using the provided prompt builder
|
||||
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
|
||||
else:
|
||||
# First iteration - use original prompt
|
||||
iterationPrompt = prompt
|
||||
|
||||
# Make AI call
|
||||
try:
|
||||
if operationId and iteration == 1:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
|
||||
from modules.datamodels.datamodelAi import AiCallRequest
|
||||
request = AiCallRequest(
|
||||
prompt=iterationPrompt,
|
||||
context="",
|
||||
options=options
|
||||
)
|
||||
|
||||
# Write the ACTUAL prompt sent to AI
|
||||
if iteration == 1:
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
|
||||
else:
|
||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
result = response.content
|
||||
|
||||
# Update progress after AI call
|
||||
if operationId:
|
||||
if iteration == 1:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
|
||||
else:
|
||||
progress = 0.6 + (min(iteration - 1, 10) * 0.03)
|
||||
self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
|
||||
|
||||
# Write raw AI response to debug file
|
||||
if iteration == 1:
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
||||
else:
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
||||
|
||||
# Emit stats for this iteration
|
||||
self.services.workflow.storeWorkflowStat(
|
||||
self.services.currentWorkflow,
|
||||
response,
|
||||
f"ai.call.{debugPrefix}.iteration_{iteration}"
|
||||
)
|
||||
|
||||
if not result or not result.strip():
|
||||
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
||||
break
|
||||
|
||||
# Store raw response for continuation (even if broken)
|
||||
lastRawResponse = result
|
||||
|
||||
# Check for complete_response flag in raw response (before parsing)
|
||||
import re
|
||||
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
|
||||
pass # Flag detected, will stop in _shouldContinueGeneration
|
||||
|
||||
# Extract sections from response (handles both valid and broken JSON)
|
||||
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
||||
|
||||
# Update progress after parsing
|
||||
if operationId:
|
||||
if extractedSections:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
|
||||
|
||||
if not extractedSections:
|
||||
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
|
||||
if iteration > 1 and not wasJsonComplete:
|
||||
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
|
||||
continue
|
||||
# Otherwise, stop if no sections
|
||||
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
|
||||
break
|
||||
|
||||
# Add new sections to accumulator
|
||||
allSections.extend(extractedSections)
|
||||
|
||||
# Check if we should continue (completion detection)
|
||||
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
|
||||
continue
|
||||
else:
|
||||
# Done - build final result
|
||||
if operationId:
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
|
||||
break
|
||||
|
||||
if iteration >= max_iterations:
|
||||
logger.warning(f"AI call stopped after maximum iterations ({max_iterations})")
|
||||
|
||||
# Build final result from accumulated sections
|
||||
final_result = self._buildFinalResultFromSections(allSections)
|
||||
|
||||
# Write final result to debug file
|
||||
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
|
||||
|
||||
return final_result
|
||||
|
||||
def _extractSectionsFromResponse(
|
||||
self,
|
||||
result: str,
|
||||
iteration: int,
|
||||
debugPrefix: str
|
||||
) -> Tuple[List[Dict[str, Any]], bool]:
|
||||
"""
|
||||
Extract sections from AI response, handling both valid and broken JSON.
|
||||
Uses repair mechanism for broken JSON.
|
||||
Checks for "complete_response": true flag to determine completion.
|
||||
Returns (sections, wasJsonComplete)
|
||||
"""
|
||||
# First, try to parse as valid JSON
|
||||
try:
|
||||
extracted = extractJsonString(result)
|
||||
parsed_result = json.loads(extracted)
|
||||
|
||||
# Check if AI marked response as complete
|
||||
isComplete = parsed_result.get("complete_response", False) == True
|
||||
|
||||
# Extract sections from parsed JSON
|
||||
sections = extractSectionsFromDocument(parsed_result)
|
||||
|
||||
# If AI marked as complete, always return as complete
|
||||
if isComplete:
|
||||
return sections, True
|
||||
|
||||
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
|
||||
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
|
||||
if len(sections) == 0 and iteration > 1:
|
||||
return sections, False # Mark as incomplete so loop continues
|
||||
|
||||
# First iteration with 0 sections means empty response - stop
|
||||
if len(sections) == 0:
|
||||
return sections, True # Complete but empty
|
||||
|
||||
return sections, True # JSON was complete with sections
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
# Broken JSON - try repair mechanism (normal in iterative generation)
|
||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
|
||||
|
||||
# Try to repair
|
||||
repaired_json = repairBrokenJson(result)
|
||||
|
||||
if repaired_json:
|
||||
# Extract sections from repaired JSON
|
||||
sections = extractSectionsFromDocument(repaired_json)
|
||||
return sections, False # JSON was broken but repaired
|
||||
else:
|
||||
# Repair failed - log error
|
||||
logger.error(f"Iteration {iteration}: All repair strategies failed")
|
||||
return [], False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
|
||||
return [], False
|
||||
|
||||
def _shouldContinueGeneration(
|
||||
self,
|
||||
allSections: List[Dict[str, Any]],
|
||||
iteration: int,
|
||||
wasJsonComplete: bool,
|
||||
rawResponse: str = None
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if generation should continue based on JSON completeness and complete_response flag.
|
||||
Returns True if we should continue, False if done.
|
||||
"""
|
||||
if len(allSections) == 0:
|
||||
return True # No sections yet, continue
|
||||
|
||||
# Check for complete_response flag in raw response
|
||||
if rawResponse:
|
||||
import re
|
||||
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
|
||||
return False
|
||||
|
||||
# If JSON was complete (and no complete_response flag), we're done
|
||||
# If JSON was broken and repaired, continue to get more content
|
||||
if wasJsonComplete:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def _buildFinalResultFromSections(
|
||||
self,
|
||||
allSections: List[Dict[str, Any]]
|
||||
) -> str:
|
||||
"""
|
||||
Build final JSON result from accumulated sections.
|
||||
"""
|
||||
if not allSections:
|
||||
return ""
|
||||
|
||||
# Build documents structure
|
||||
# Assuming single document for now
|
||||
documents = [{
|
||||
"id": "doc_1",
|
||||
"title": "Generated Document", # This should come from prompt
|
||||
"filename": "document.json",
|
||||
"sections": allSections
|
||||
}]
|
||||
|
||||
result = {
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": documents
|
||||
}
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
# Old _buildContinuationPrompt and _mergeJsonContent methods removed
|
||||
# Now handled by repair mechanism in jsonUtils.py and section accumulation
|
||||
|
||||
|
||||
# Planning AI Call
|
||||
async def callAiPlanning(
|
||||
self,
|
||||
prompt: str,
|
||||
placeholders: Optional[List[PromptPlaceholder]] = None
|
||||
) -> str:
|
||||
"""
|
||||
Planning AI call for task planning, action planning, action selection, etc.
|
||||
Always uses static parameters optimized for planning tasks.
|
||||
|
||||
Args:
|
||||
prompt: The planning prompt
|
||||
placeholders: Optional list of placeholder replacements
|
||||
|
||||
Returns:
|
||||
Planning JSON response
|
||||
"""
|
||||
# Planning calls always use static parameters
|
||||
options = AiCallOptions(
|
||||
operationType=OperationTypeEnum.PLAN,
|
||||
priority=PriorityEnum.QUALITY,
|
||||
processingMode=ProcessingModeEnum.DETAILED,
|
||||
compressPrompt=False,
|
||||
compressContext=False
|
||||
)
|
||||
|
||||
# Build full prompt with placeholders
|
||||
if placeholders:
|
||||
placeholders_dict = {p.label: p.content for p in placeholders}
|
||||
full_prompt = buildPromptWithPlaceholders(prompt, placeholders_dict)
|
||||
else:
|
||||
full_prompt = prompt
|
||||
|
||||
# Use shared core function with planning-specific debug prefix
|
||||
return await self._callAiWithLooping(full_prompt, options, "plan")
|
||||
|
||||
# Document Generation AI Call
|
||||
async def callAiDocuments(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: Optional[List[ChatDocument]] = None,
|
||||
options: Optional[AiCallOptions] = None,
|
||||
outputFormat: Optional[str] = None,
|
||||
title: Optional[str] = None
|
||||
) -> Union[str, Dict[str, Any]]:
|
||||
"""
|
||||
Document generation AI call for all non-planning calls.
|
||||
Uses the current unified path with extraction and generation.
|
||||
|
||||
Args:
|
||||
prompt: The main prompt for the AI call
|
||||
documents: Optional list of documents to process
|
||||
options: AI call configuration options
|
||||
outputFormat: Optional output format for document generation
|
||||
title: Optional title for generated documents
|
||||
|
||||
Returns:
|
||||
AI response as string, or dict with documents if outputFormat is specified
|
||||
"""
|
||||
# Create separate operationId for detailed progress tracking
|
||||
import time
|
||||
import uuid
|
||||
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
|
||||
aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Start progress tracking for this operation
|
||||
self.services.workflow.progressLogStart(
|
||||
aiOperationId,
|
||||
"AI call with documents",
|
||||
"Document Generation",
|
||||
f"Format: {outputFormat or 'text'}"
|
||||
)
|
||||
|
||||
try:
|
||||
if options is None or (hasattr(options, 'operationType') and options.operationType is None):
|
||||
# Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
|
||||
options = await self._analyzePromptAndCreateOptions(prompt)
|
||||
|
||||
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
|
||||
# Compressing would truncate the template structure and confuse the AI
|
||||
if outputFormat: # Document generation with structured output
|
||||
if not options:
|
||||
options = AiCallOptions()
|
||||
options.compressPrompt = False # JSON templates must NOT be truncated
|
||||
options.compressContext = False # Context also should not be compressed
|
||||
|
||||
# Handle document generation with specific output format using unified approach
|
||||
if outputFormat:
|
||||
# Use unified generation method for all document generation
|
||||
if documents and len(documents) > 0:
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
|
||||
extracted_content = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
|
||||
else:
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
|
||||
extracted_content = None
|
||||
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
|
||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||
# First call without continuation context
|
||||
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
|
||||
|
||||
# Prepare prompt builder arguments for continuation
|
||||
promptArgs = {
|
||||
"outputFormat": outputFormat,
|
||||
"userPrompt": prompt,
|
||||
"title": title,
|
||||
"extracted_content": extracted_content
|
||||
}
|
||||
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
|
||||
generated_json = await self._callAiWithLooping(
|
||||
generation_prompt,
|
||||
options,
|
||||
"document_generation",
|
||||
buildGenerationPrompt,
|
||||
promptArgs,
|
||||
aiOperationId
|
||||
)
|
||||
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
|
||||
# Parse the generated JSON (extract fenced/embedded JSON first)
|
||||
try:
|
||||
extracted_json = self.services.utils.jsonExtractString(generated_json)
|
||||
generated_data = json.loads(extracted_json)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse generated JSON: {str(e)}")
|
||||
logger.error(f"JSON content length: {len(generated_json)}")
|
||||
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
|
||||
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
|
||||
|
||||
# Write the problematic JSON to debug file
|
||||
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
|
||||
|
||||
self.services.workflow.progressLogFinish(aiOperationId, False)
|
||||
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
|
||||
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
|
||||
# Render to final format using the existing renderer
|
||||
try:
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
rendered_content, mime_type = await generationService.renderReport(
|
||||
generated_data, outputFormat, title or "Generated Document", prompt, self
|
||||
)
|
||||
|
||||
# Build result in the expected format
|
||||
result = {
|
||||
"success": True,
|
||||
"content": generated_data,
|
||||
"documents": [{
|
||||
"documentName": f"generated.{outputFormat}",
|
||||
"documentData": rendered_content,
|
||||
"mimeType": mime_type,
|
||||
"title": title or "Generated Document"
|
||||
}],
|
||||
"is_multi_file": False,
|
||||
"format": outputFormat,
|
||||
"title": title,
|
||||
"split_strategy": "single",
|
||||
"total_documents": 1,
|
||||
"processed_documents": 1
|
||||
}
|
||||
|
||||
# Log AI response for debugging
|
||||
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
|
||||
|
||||
self.services.workflow.progressLogFinish(aiOperationId, True)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering document: {str(e)}")
|
||||
self.services.workflow.progressLogFinish(aiOperationId, False)
|
||||
return {"success": False, "error": f"Rendering failed: {str(e)}"}
|
||||
|
||||
# Handle text calls (no output format specified)
|
||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
|
||||
if documents:
|
||||
# Use document processing for text calls with documents
|
||||
result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
|
||||
else:
|
||||
# Use shared core function for direct text calls
|
||||
result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
|
||||
|
||||
self.services.workflow.progressLogFinish(aiOperationId, True)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in callAiDocuments: {str(e)}")
|
||||
self.services.workflow.progressLogFinish(aiOperationId, False)
|
||||
raise
|
||||
|
||||
|
||||
# AI Image Analysis
|
||||
async def readImage(
|
||||
self,
|
||||
prompt: str,
|
||||
imageData: Union[str, bytes],
|
||||
mimeType: str = None,
|
||||
options: Optional[AiCallOptions] = None,
|
||||
) -> str:
|
||||
"""Call AI for image analysis using interface.call() with contentParts."""
|
||||
try:
|
||||
# Check if imageData is valid
|
||||
if not imageData:
|
||||
error_msg = "No image data provided"
|
||||
logger.error(f"Error in AI image analysis: {error_msg}")
|
||||
return f"Error: {error_msg}"
|
||||
|
||||
|
||||
# Always use IMAGE_ANALYSE operation type for image processing
|
||||
if options is None:
|
||||
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
|
||||
else:
|
||||
# Override the operation type to ensure image analysis
|
||||
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
|
||||
|
||||
# Create content parts with image data
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
import base64
|
||||
|
||||
# ContentPart.data must be a string - convert bytes to base64 if needed
|
||||
if isinstance(imageData, bytes):
|
||||
imageDataStr = base64.b64encode(imageData).decode('utf-8')
|
||||
else:
|
||||
# Already a base64 string
|
||||
imageDataStr = imageData
|
||||
|
||||
imagePart = ContentPart(
|
||||
id="image_0",
|
||||
parentId=None,
|
||||
label="Image",
|
||||
typeGroup="image",
|
||||
mimeType=mimeType or "image/jpeg",
|
||||
data=imageDataStr, # Must be a string (base64 encoded)
|
||||
metadata={"imageAnalysis": True}
|
||||
)
|
||||
|
||||
# Create request with content parts
|
||||
from modules.datamodels.datamodelAi import AiCallRequest
|
||||
request = AiCallRequest(
|
||||
prompt=prompt,
|
||||
context="",
|
||||
options=options,
|
||||
contentParts=[imagePart]
|
||||
)
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
result = response.content
|
||||
|
||||
# Check if result is valid
|
||||
if not result or (isinstance(result, str) and not result.strip()):
|
||||
error_msg = f"No response from AI image analysis (result: {repr(result)})"
|
||||
logger.error(f"Error in AI image analysis: {error_msg}")
|
||||
return f"Error: {error_msg}"
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI image analysis: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
# AI Image Generation
|
||||
async def generateImage(
|
||||
self,
|
||||
prompt: str,
|
||||
size: str = "1024x1024",
|
||||
quality: str = "standard",
|
||||
style: str = "vivid",
|
||||
options: Optional[AiCallOptions] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image using AI using interface.generateImage()."""
|
||||
try:
|
||||
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
|
||||
|
||||
# Emit stats for image generation
|
||||
self.services.workflow.storeWorkflowStat(
|
||||
self.services.currentWorkflow,
|
||||
response,
|
||||
f"ai.generate.image"
|
||||
)
|
||||
|
||||
# Convert response to dict format for backward compatibility
|
||||
if hasattr(response, 'content'):
|
||||
return {
|
||||
"success": True,
|
||||
"content": response.content,
|
||||
"modelName": response.modelName,
|
||||
"priceUsd": response.priceUsd,
|
||||
"processingTime": response.processingTime
|
||||
}
|
||||
else:
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI image generation: {str(e)}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
|
@ -1,500 +0,0 @@
|
|||
import re
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, UTC
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.datamodels.datamodelAi import AiCallOptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubDocumentGeneration:
|
||||
"""Document generation operations including single-file and multi-file generation."""
|
||||
|
||||
def __init__(self, services, aiObjects, documentProcessor):
|
||||
"""Initialize document generation service.
|
||||
|
||||
Args:
|
||||
services: Service center instance for accessing other services
|
||||
aiObjects: Initialized AiObjects instance
|
||||
documentProcessor: Document processing service instance
|
||||
"""
|
||||
self.services = services
|
||||
self.aiObjects = aiObjects
|
||||
self.documentProcessor = documentProcessor
|
||||
|
||||
async def callAiWithDocumentGeneration(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: Optional[List[ChatDocument]],
|
||||
options: AiCallOptions,
|
||||
outputFormat: str,
|
||||
title: Optional[str]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Unified document generation method that handles both single and multi-file cases.
|
||||
Always uses multi-file approach internally.
|
||||
|
||||
Args:
|
||||
prompt: The main prompt for the AI call
|
||||
documents: Optional list of documents to process
|
||||
options: AI call configuration options
|
||||
outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx)
|
||||
title: Optional title for generated documents
|
||||
|
||||
Returns:
|
||||
Dict with generated documents and metadata in unified structure
|
||||
"""
|
||||
try:
|
||||
# 1. Get unified extraction prompt
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
|
||||
extractionPrompt = await generationService.getAdaptiveExtractionPrompt(
|
||||
outputFormat=outputFormat,
|
||||
userPrompt=prompt,
|
||||
title=title,
|
||||
aiService=self
|
||||
)
|
||||
|
||||
# 2. Process with unified pipeline (always multi-file approach)
|
||||
aiResponse = await self._processDocumentsUnified(
|
||||
documents, extractionPrompt, options
|
||||
)
|
||||
|
||||
# 3. Return unified result structure
|
||||
return await self._buildUnifiedResult(aiResponse, outputFormat, title)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in unified document generation: {str(e)}")
|
||||
return self._buildErrorResult(str(e), outputFormat, title)
|
||||
|
||||
async def _processDocumentsUnified(
|
||||
self,
|
||||
documents: Optional[List[ChatDocument]],
|
||||
extractionPrompt: str,
|
||||
options: AiCallOptions
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Unified document processing that handles both single and multi-file cases.
|
||||
Always processes as multi-file structure internally.
|
||||
"""
|
||||
|
||||
# Init progress logger
|
||||
workflow = self.services.currentWorkflow
|
||||
operationId = f"docGenUnified_{workflow.id}_{int(time.time())}"
|
||||
|
||||
try:
|
||||
# Start progress tracking
|
||||
self.services.workflow.progressLogStart(
|
||||
operationId,
|
||||
"Generate",
|
||||
"Unified Document Generation",
|
||||
f"Processing {len(documents) if documents else 0} documents"
|
||||
)
|
||||
|
||||
# Update progress - generating extraction prompt
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.1, "Generating prompt")
|
||||
|
||||
# Write prompt to debug file
|
||||
self.services.utils.writeDebugFile(extractionPrompt, "extraction_prompt", documents)
|
||||
|
||||
# Process with unified JSON pipeline using continuation logic
|
||||
aiResponse = await self.documentProcessor.processDocumentsWithContinuation(
|
||||
documents, extractionPrompt, options
|
||||
)
|
||||
|
||||
# Update progress - AI processing completed
|
||||
self.services.workflow.progressLogUpdate(operationId, 0.6, "Processing done")
|
||||
|
||||
|
||||
|
||||
# Write AI response to debug file
|
||||
response_json = json.dumps(aiResponse, indent=2, ensure_ascii=False) if isinstance(aiResponse, dict) else str(aiResponse)
|
||||
self.services.utils.writeDebugFile(response_json, "ai_response", documents)
|
||||
|
||||
# Validate response structure
|
||||
if not self._validateUnifiedResponseStructure(aiResponse):
|
||||
raise Exception("AI response is not valid unified document structure")
|
||||
|
||||
# Emit raw extracted data as a chat message attachment
|
||||
try:
|
||||
await self._postRawDataChatMessage(aiResponse, label="raw_extraction_unified")
|
||||
except Exception:
|
||||
logger.warning("Failed to emit raw extraction chat message (unified)")
|
||||
|
||||
# Complete progress tracking
|
||||
self.services.workflow.progressLogFinish(operationId, True)
|
||||
|
||||
return aiResponse
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in unified document processing: {str(e)}")
|
||||
self.services.workflow.progressLogFinish(operationId, False)
|
||||
raise
|
||||
|
||||
def _validateUnifiedResponseStructure(self, response: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Unified validation that checks for document structure.
|
||||
Handles both multi-file (documents array) and single-file (sections array) structures.
|
||||
"""
|
||||
try:
|
||||
if not isinstance(response, dict):
|
||||
logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
|
||||
return False
|
||||
|
||||
# Check for documents array (multi-file structure)
|
||||
hasDocuments = "documents" in response
|
||||
isDocumentsList = isinstance(response.get("documents"), list)
|
||||
|
||||
# Check for sections array (single-file structure)
|
||||
hasSections = "sections" in response
|
||||
isSectionsList = isinstance(response.get("sections"), list)
|
||||
|
||||
if hasDocuments and isDocumentsList:
|
||||
# Multi-file structure
|
||||
documents = response.get("documents", [])
|
||||
if not documents:
|
||||
logger.warning("Unified validation failed: documents array is empty")
|
||||
return False
|
||||
|
||||
# Validate each document individually
|
||||
validDocuments = 0
|
||||
for i, doc in enumerate(documents):
|
||||
if self._validateDocumentStructure(doc, i):
|
||||
validDocuments += 1
|
||||
else:
|
||||
logger.warning(f"Document {i} failed validation, but continuing with others")
|
||||
|
||||
# Process succeeds if at least one document is valid
|
||||
if validDocuments == 0:
|
||||
logger.error("Unified validation failed: no valid documents found")
|
||||
return False
|
||||
|
||||
logger.info(f"Unified validation passed: {validDocuments}/{len(documents)} documents valid")
|
||||
return True
|
||||
|
||||
elif hasSections and isSectionsList:
|
||||
# Single-file structure - convert to multi-file format
|
||||
logger.info("Converting single-file structure to multi-file format")
|
||||
sections = response.get("sections", [])
|
||||
if not sections:
|
||||
logger.warning("Unified validation failed: sections array is empty")
|
||||
return False
|
||||
|
||||
# Convert to documents array format
|
||||
response["documents"] = [{
|
||||
"id": "document_1",
|
||||
"title": response.get("metadata", {}).get("title", "Generated Document"),
|
||||
"filename": "document_1",
|
||||
"sections": sections
|
||||
}]
|
||||
|
||||
logger.info("Successfully converted single-file structure to multi-file format")
|
||||
return True
|
||||
|
||||
else:
|
||||
# No valid structure found - fail with clear error details
|
||||
logger.error("Unified validation failed: No valid structure found")
|
||||
logger.error(f"Response type: {type(response)}")
|
||||
logger.error(f"Available keys: {list(response.keys()) if isinstance(response, dict) else 'Not a dict'}")
|
||||
logger.error(f"hasDocuments={hasDocuments}, isDocumentsList={isDocumentsList}")
|
||||
logger.error(f"hasSections={hasSections}, isSectionsList={isSectionsList}")
|
||||
logger.error(f"Full response: {response}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Unified response validation failed with exception: {str(e)}")
|
||||
return False
|
||||
|
||||
def _validateDocumentStructure(self, document: Dict[str, Any], documentIndex: int) -> bool:
|
||||
"""
|
||||
Validate individual document structure.
|
||||
Returns True if document is valid, False otherwise.
|
||||
Does not fail the entire process if one document is invalid.
|
||||
"""
|
||||
try:
|
||||
if not isinstance(document, dict):
|
||||
logger.error(f"Document {documentIndex} validation failed: not a dict, got {type(document)}")
|
||||
logger.error(f"Document {documentIndex} content: {document}")
|
||||
return False
|
||||
|
||||
# Check for required fields
|
||||
hasTitle = "title" in document
|
||||
hasSections = "sections" in document
|
||||
isSectionsList = isinstance(document.get("sections"), list)
|
||||
|
||||
logger.debug(f"Document {documentIndex} structure check:")
|
||||
logger.debug(f" - hasTitle: {hasTitle}")
|
||||
logger.debug(f" - hasSections: {hasSections}")
|
||||
logger.debug(f" - isSectionsList: {isSectionsList}")
|
||||
logger.debug(f" - available keys: {list(document.keys())}")
|
||||
|
||||
if not (hasTitle and hasSections and isSectionsList):
|
||||
logger.error(f"Document {documentIndex} validation failed:")
|
||||
logger.error(f" - title present: {hasTitle}")
|
||||
logger.error(f" - sections present: {hasSections}")
|
||||
logger.error(f" - sections is list: {isSectionsList}")
|
||||
logger.error(f" - document content: {document}")
|
||||
return False
|
||||
|
||||
sections = document.get("sections", [])
|
||||
if not sections:
|
||||
logger.error(f"Document {documentIndex} validation failed: sections array is empty")
|
||||
logger.error(f" - document content: {document}")
|
||||
return False
|
||||
|
||||
logger.info(f"Document {documentIndex} validation passed")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Document {documentIndex} validation failed with exception: {str(e)}")
|
||||
logger.error(f" - document content: {document}")
|
||||
return False
|
||||
|
||||
async def _buildUnifiedResult(
|
||||
self,
|
||||
aiResponse: Dict[str, Any],
|
||||
outputFormat: str,
|
||||
title: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build unified result structure that always returns array-based format.
|
||||
Content is always a multi-document structure.
|
||||
"""
|
||||
try:
|
||||
# Process all documents uniformly
|
||||
generatedDocuments = []
|
||||
documents = aiResponse.get("documents", [])
|
||||
|
||||
for i, docData in enumerate(documents):
|
||||
try:
|
||||
processedDocument = await self._processDocument(
|
||||
docData, outputFormat, title, i
|
||||
)
|
||||
generatedDocuments.append(processedDocument)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process document {i}: {str(e)}, skipping")
|
||||
continue
|
||||
|
||||
if not generatedDocuments:
|
||||
raise Exception("No documents could be processed successfully")
|
||||
|
||||
# Build unified result
|
||||
result = {
|
||||
"success": True,
|
||||
"content": aiResponse, # Always multi-document structure
|
||||
"documents": generatedDocuments, # Always array
|
||||
"is_multi_file": len(generatedDocuments) > 1,
|
||||
"format": outputFormat,
|
||||
"title": title,
|
||||
"total_documents": len(generatedDocuments),
|
||||
"processed_documents": len(generatedDocuments)
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error building unified result: {str(e)}")
|
||||
return self._buildErrorResult(str(e), outputFormat, title)
|
||||
|
||||
async def _processDocument(
|
||||
self,
|
||||
docData: Dict[str, Any],
|
||||
outputFormat: str,
|
||||
title: str,
|
||||
documentIndex: int
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process individual document with content enhancement and rendering.
|
||||
"""
|
||||
try:
|
||||
# Get generation service
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
|
||||
# Use AI generation to enhance the extracted JSON before rendering
|
||||
enhancedContent = docData # Default to original
|
||||
if docData.get("sections"):
|
||||
try:
|
||||
# Get generation prompt directly
|
||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||
generationPrompt = await buildGenerationPrompt(
|
||||
outputFormat=outputFormat,
|
||||
userPrompt=title,
|
||||
title=docData.get("title", title)
|
||||
)
|
||||
|
||||
# Prepare the AI call
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||
requestOptions = AiCallOptions()
|
||||
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
|
||||
# Create context with the extracted JSON content
|
||||
context = f"Extracted JSON content:\n{json.dumps(docData, indent=2)}"
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=generationPrompt,
|
||||
context=context,
|
||||
options=requestOptions
|
||||
)
|
||||
|
||||
# Write document generation prompt to debug file
|
||||
self.services.utils.writeDebugFile(generationPrompt, "document_generation_enhancement_prompt")
|
||||
|
||||
# Call AI to enhance the content
|
||||
response = await self.aiObjects.call(request)
|
||||
|
||||
# Write document generation response to debug file
|
||||
self.services.utils.writeDebugFile(response.content or '', "document_generation_enhancement_response")
|
||||
|
||||
if response and response.content:
|
||||
# Parse the AI response as JSON
|
||||
try:
|
||||
result = response.content.strip()
|
||||
|
||||
# Extract JSON from markdown if present
|
||||
jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
||||
if jsonMatch:
|
||||
result = jsonMatch.group(1).strip()
|
||||
elif result.startswith('```json'):
|
||||
result = re.sub(r'^```json\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
elif result.startswith('```'):
|
||||
result = re.sub(r'^```\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
|
||||
# Try to parse JSON
|
||||
enhancedContent = json.loads(result)
|
||||
logger.info(f"AI enhanced JSON content successfully for document {documentIndex}")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"AI generation returned invalid JSON for document {documentIndex}: {str(e)}, using original content")
|
||||
enhancedContent = docData
|
||||
else:
|
||||
logger.warning(f"AI generation returned empty response for document {documentIndex}, using original content")
|
||||
enhancedContent = docData
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"AI generation failed for document {documentIndex}: {str(e)}, using original content")
|
||||
enhancedContent = docData
|
||||
|
||||
# Render the enhanced JSON content
|
||||
renderedContent, mimeType = await generationService.renderReport(
|
||||
extractedContent=enhancedContent,
|
||||
outputFormat=outputFormat,
|
||||
title=docData.get("title", title),
|
||||
userPrompt=title,
|
||||
aiService=self
|
||||
)
|
||||
|
||||
# Generate proper filename
|
||||
baseFilename = docData.get("filename", f"document_{documentIndex + 1}")
|
||||
if '.' in baseFilename:
|
||||
baseFilename = baseFilename.rsplit('.', 1)[0]
|
||||
|
||||
# Add proper extension based on output format
|
||||
if outputFormat.lower() == "docx":
|
||||
filename = f"{baseFilename}.docx"
|
||||
elif outputFormat.lower() == "pdf":
|
||||
filename = f"{baseFilename}.pdf"
|
||||
elif outputFormat.lower() == "html":
|
||||
filename = f"{baseFilename}.html"
|
||||
else:
|
||||
filename = f"{baseFilename}.{outputFormat}"
|
||||
|
||||
return {
|
||||
"documentName": filename,
|
||||
"documentData": renderedContent,
|
||||
"mimeType": mimeType,
|
||||
"title": docData.get("title", title),
|
||||
"documentIndex": documentIndex
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing document {documentIndex}: {str(e)}")
|
||||
raise
|
||||
|
||||
def _buildErrorResult(self, errorMessage: str, outputFormat: str, title: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Build error result with unified structure.
|
||||
"""
|
||||
return {
|
||||
"success": False,
|
||||
"error": errorMessage,
|
||||
"content": {},
|
||||
"documents": [],
|
||||
"is_multi_file": False,
|
||||
"format": outputFormat,
|
||||
"title": title,
|
||||
"split_strategy": "error",
|
||||
"total_documents": 0,
|
||||
"processed_documents": 0
|
||||
}
|
||||
|
||||
async def _callAiJson(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: Optional[List[ChatDocument]],
|
||||
options: AiCallOptions
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Handle AI calls with document processing for JSON output.
|
||||
Returns structured JSON document instead of text.
|
||||
"""
|
||||
# Process documents with JSON merging
|
||||
return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
|
||||
|
||||
|
||||
async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
|
||||
"""
|
||||
Create a ChatMessage with the extracted raw JSON attached as a file so the user
|
||||
has access to the data even if downstream processing fails.
|
||||
"""
|
||||
try:
|
||||
services = self.services
|
||||
workflow = services.currentWorkflow
|
||||
|
||||
# Serialize payload
|
||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
content_text = json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
content_bytes = content_text.encode('utf-8')
|
||||
|
||||
# Store as file via component storage
|
||||
file_name = f"{label}_{ts}.json"
|
||||
file_item = services.interfaceDbComponent.createFile(
|
||||
name=file_name,
|
||||
mimeType="application/json",
|
||||
content=content_bytes
|
||||
)
|
||||
services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
|
||||
|
||||
# Lookup file info for ChatDocument
|
||||
file_info = services.workflow.getFileInfo(file_item.id)
|
||||
doc = ChatDocument(
|
||||
messageId="", # set after message creation
|
||||
fileId=file_item.id,
|
||||
fileName=file_info.get("fileName", file_name) if file_info else file_name,
|
||||
fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
|
||||
mimeType=file_info.get("mimeType", "application/json") if file_info else "application/json"
|
||||
)
|
||||
|
||||
# Create message referencing the file - include document in initial call
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": "Raw extraction data saved",
|
||||
"status": "data",
|
||||
"sequenceNr": len(getattr(workflow, 'messages', []) or []) + 1,
|
||||
"publishedAt": services.utils.timestampGetUtc(),
|
||||
"documentsLabel": label,
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Store message with document included from the start
|
||||
services.workflow.storeMessageWithDocuments(services.workflow.workflow, messageData, [doc])
|
||||
except Exception:
|
||||
# Non-fatal; ignore if storage or chat creation fails
|
||||
return
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,165 +0,0 @@
|
|||
"""
|
||||
Shared utilities for AI services to eliminate code duplication.
|
||||
|
||||
This module contains common functions used across multiple AI service modules
|
||||
to maintain DRY principles and ensure consistency.
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def buildPromptWithPlaceholders(prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
|
||||
"""
|
||||
Build full prompt by replacing placeholders with their content.
|
||||
Uses the new {{KEY:placeholder}} format.
|
||||
|
||||
Args:
|
||||
prompt: The base prompt template
|
||||
placeholders: Dictionary of placeholder key-value pairs
|
||||
|
||||
Returns:
|
||||
Prompt with placeholders replaced
|
||||
"""
|
||||
if not placeholders:
|
||||
return prompt
|
||||
|
||||
full_prompt = prompt
|
||||
for placeholder, content in placeholders.items():
|
||||
# Skip if content is None or empty
|
||||
if content is None:
|
||||
continue
|
||||
# Replace {{KEY:placeholder}}
|
||||
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
|
||||
|
||||
return full_prompt
|
||||
|
||||
|
||||
def sanitizePromptContent(content: str, contentType: str = "text") -> str:
|
||||
"""
|
||||
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
|
||||
|
||||
This is the single source of truth for all prompt sanitization across the system.
|
||||
Replaces all scattered sanitization functions with a unified approach.
|
||||
|
||||
Args:
|
||||
content: The content to sanitize
|
||||
contentType: Type of content ("text", "userinput", "json", "document")
|
||||
|
||||
Returns:
|
||||
Safely sanitized content ready for AI prompt insertion
|
||||
"""
|
||||
if not content:
|
||||
return ""
|
||||
|
||||
try:
|
||||
# Convert to string if not already
|
||||
content_str = str(content)
|
||||
|
||||
# Remove null bytes and control characters (except newlines and tabs)
|
||||
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
|
||||
|
||||
# Handle different content types with appropriate sanitization
|
||||
if contentType == "userinput":
|
||||
# Extra security for user-controlled content
|
||||
# Escape curly braces to prevent placeholder injection
|
||||
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
|
||||
# Escape quotes and wrap in single quotes
|
||||
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
|
||||
return f"'{sanitized}'"
|
||||
|
||||
elif contentType == "json":
|
||||
# For JSON content, escape quotes and backslashes
|
||||
sanitized = sanitized.replace('\\', '\\\\')
|
||||
sanitized = sanitized.replace('"', '\\"')
|
||||
sanitized = sanitized.replace('\n', '\\n')
|
||||
sanitized = sanitized.replace('\r', '\\r')
|
||||
sanitized = sanitized.replace('\t', '\\t')
|
||||
|
||||
elif contentType == "document":
|
||||
# For document content, escape special characters
|
||||
sanitized = sanitized.replace('\\', '\\\\')
|
||||
sanitized = sanitized.replace('"', '\\"')
|
||||
sanitized = sanitized.replace("'", "\\'")
|
||||
sanitized = sanitized.replace('\n', '\\n')
|
||||
sanitized = sanitized.replace('\r', '\\r')
|
||||
sanitized = sanitized.replace('\t', '\\t')
|
||||
|
||||
else: # contentType == "text" or default
|
||||
# Basic text sanitization
|
||||
sanitized = sanitized.replace('\\', '\\\\')
|
||||
sanitized = sanitized.replace('"', '\\"')
|
||||
sanitized = sanitized.replace("'", "\\'")
|
||||
sanitized = sanitized.replace('\n', '\\n')
|
||||
sanitized = sanitized.replace('\r', '\\r')
|
||||
sanitized = sanitized.replace('\t', '\\t')
|
||||
|
||||
return sanitized
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sanitizing prompt content: {str(e)}")
|
||||
# Return a safe fallback
|
||||
return "[ERROR: Content could not be safely sanitized]"
|
||||
|
||||
|
||||
def extractTextFromContentParts(extracted_content) -> str:
|
||||
"""
|
||||
Extract text content from ExtractionService ContentPart objects.
|
||||
|
||||
Args:
|
||||
extracted_content: ContentExtracted object with parts
|
||||
|
||||
Returns:
|
||||
Concatenated text content from all text/table/structure parts
|
||||
"""
|
||||
if not extracted_content or not hasattr(extracted_content, 'parts'):
|
||||
return ""
|
||||
|
||||
text_parts = []
|
||||
for part in extracted_content.parts:
|
||||
if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
|
||||
if hasattr(part, 'data') and part.data:
|
||||
text_parts.append(part.data)
|
||||
|
||||
return "\n\n".join(text_parts)
|
||||
|
||||
|
||||
def reduceText(text: str, reduction_factor: float) -> str:
|
||||
"""
|
||||
Reduce text size by the specified factor.
|
||||
|
||||
Args:
|
||||
text: Text to reduce
|
||||
reduction_factor: Factor by which to reduce (0.0 to 1.0)
|
||||
|
||||
Returns:
|
||||
Reduced text with truncation indicator
|
||||
"""
|
||||
if reduction_factor >= 1.0:
|
||||
return text
|
||||
|
||||
target_length = int(len(text) * reduction_factor)
|
||||
return text[:target_length] + "... [reduced]"
|
||||
|
||||
|
||||
def determineCallType(documents: Optional[List], operation_type: str) -> str:
|
||||
"""
|
||||
Determine call type based on documents and operation type.
|
||||
|
||||
Args:
|
||||
documents: List of ChatDocument objects
|
||||
operation_type: Type of operation being performed
|
||||
|
||||
Returns:
|
||||
Call type: "plan" or "text"
|
||||
"""
|
||||
has_documents = documents is not None and len(documents) > 0
|
||||
is_planning_operation = operation_type == "plan"
|
||||
|
||||
if not has_documents and is_planning_operation:
|
||||
return "plan"
|
||||
else:
|
||||
return "text"
|
||||
|
|
@ -19,6 +19,16 @@ class ExtractionService:
|
|||
self.services = services
|
||||
self._extractorRegistry = ExtractorRegistry()
|
||||
self._chunkerRegistry = ChunkerRegistry()
|
||||
# Ensure AI connectors are discovered so pricing models are available
|
||||
try:
|
||||
# If internal model is missing, trigger discovery and registration
|
||||
if modelRegistry.getModel("internal-extractor") is None:
|
||||
discovered = modelRegistry.discoverConnectors()
|
||||
for connector in discovered:
|
||||
modelRegistry.registerConnector(connector)
|
||||
except Exception:
|
||||
# Propagate actual errors during use; init should be fast and side-effect free otherwise
|
||||
pass
|
||||
|
||||
def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]:
|
||||
"""
|
||||
|
|
@ -82,12 +92,12 @@ class ExtractionService:
|
|||
p.metadata["documentMimeType"] = documentData["mimeType"]
|
||||
|
||||
# Log chunking information
|
||||
chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)]
|
||||
if chunked_parts:
|
||||
chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
|
||||
if chunkedParts:
|
||||
logger.debug(f"=== CHUNKING RESULTS ===")
|
||||
logger.debug(f"Total parts: {len(ec.parts)}")
|
||||
logger.debug(f"Chunked parts: {len(chunked_parts)}")
|
||||
for chunk in chunked_parts:
|
||||
logger.debug(f"Chunked parts: {len(chunkedParts)}")
|
||||
for chunk in chunkedParts:
|
||||
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
|
||||
else:
|
||||
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
|
||||
|
|
@ -101,8 +111,11 @@ class ExtractionService:
|
|||
# Emit stats for extraction operation
|
||||
|
||||
# Use internal extraction model for pricing
|
||||
modelName = "internal_extraction"
|
||||
modelName = "internal-extractor"
|
||||
model = modelRegistry.getModel(modelName)
|
||||
# Hard fail if model is missing; caller must ensure connectors are registered
|
||||
if model is None or model.calculatePriceUsd is None:
|
||||
raise RuntimeError(f"Pricing model not available: {modelName}")
|
||||
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
|
||||
|
||||
# Create AiCallResponse with real calculation
|
||||
|
|
|
|||
|
|
@ -20,13 +20,13 @@ class IntelligentTokenAwareMerger:
|
|||
4. Minimize total number of AI calls
|
||||
"""
|
||||
|
||||
def __init__(self, model_capabilities: Dict[str, Any]):
|
||||
self.max_tokens = model_capabilities.get("maxTokens", 4000)
|
||||
self.safety_margin = model_capabilities.get("safetyMargin", 0.1)
|
||||
self.effective_max_tokens = int(self.max_tokens * (1 - self.safety_margin))
|
||||
self.chars_per_token = model_capabilities.get("charsPerToken", 4) # Rough estimation
|
||||
def __init__(self, modelCapabilities: Dict[str, Any]):
|
||||
self.maxTokens = modelCapabilities.get("maxTokens", 4000)
|
||||
self.safetyMargin = modelCapabilities.get("safetyMargin", 0.1)
|
||||
self.effectiveMaxTokens = int(self.maxTokens * (1 - self.safetyMargin))
|
||||
self.charsPerToken = modelCapabilities.get("charsPerToken", 4) # Rough estimation
|
||||
|
||||
def merge_chunks_intelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
|
||||
def mergeChunksIntelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
|
||||
"""
|
||||
Merge chunks intelligently based on token limits.
|
||||
|
||||
|
|
@ -40,125 +40,125 @@ class IntelligentTokenAwareMerger:
|
|||
if not chunks:
|
||||
return chunks
|
||||
|
||||
logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, max_tokens={self.effective_max_tokens}")
|
||||
logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, maxTokens={self.effectiveMaxTokens}")
|
||||
|
||||
# Calculate tokens for prompt
|
||||
prompt_tokens = self._estimate_tokens(prompt)
|
||||
available_tokens = self.effective_max_tokens - prompt_tokens
|
||||
promptTokens = self._estimateTokens(prompt)
|
||||
availableTokens = self.effectiveMaxTokens - promptTokens
|
||||
|
||||
logger.info(f"📊 Prompt tokens: {prompt_tokens}, Available for content: {available_tokens}")
|
||||
logger.info(f"📊 Prompt tokens: {promptTokens}, Available for content: {availableTokens}")
|
||||
|
||||
# Group chunks by document and type for semantic coherence
|
||||
grouped_chunks = self._group_chunks_by_document_and_type(chunks)
|
||||
groupedChunks = self._groupChunksByDocumentAndType(chunks)
|
||||
|
||||
merged_parts = []
|
||||
mergedParts = []
|
||||
|
||||
for group_key, group_chunks in grouped_chunks.items():
|
||||
logger.info(f"📁 Processing group: {group_key} ({len(group_chunks)} chunks)")
|
||||
for groupKey, groupChunks in groupedChunks.items():
|
||||
logger.info(f"📁 Processing group: {groupKey} ({len(groupChunks)} chunks)")
|
||||
|
||||
# Merge chunks within this group optimally
|
||||
group_merged = self._merge_group_optimally(group_chunks, available_tokens)
|
||||
merged_parts.extend(group_merged)
|
||||
groupMerged = self._mergeGroupOptimally(groupChunks, availableTokens)
|
||||
mergedParts.extend(groupMerged)
|
||||
|
||||
logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(merged_parts)} parts")
|
||||
return merged_parts
|
||||
logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(mergedParts)} parts")
|
||||
return mergedParts
|
||||
|
||||
def _group_chunks_by_document_and_type(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
|
||||
def _groupChunksByDocumentAndType(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
|
||||
"""Group chunks by document and type for semantic coherence."""
|
||||
groups = {}
|
||||
|
||||
for chunk in chunks:
|
||||
# Create group key: document_id + type_group
|
||||
doc_id = chunk.metadata.get("documentId", "unknown")
|
||||
type_group = chunk.typeGroup
|
||||
group_key = f"{doc_id}_{type_group}"
|
||||
|
||||
if group_key not in groups:
|
||||
groups[group_key] = []
|
||||
groups[group_key].append(chunk)
|
||||
docId = chunk.metadata.get("documentId", "unknown")
|
||||
typeGroup = chunk.typeGroup
|
||||
groupKey = f"{docId}_{typeGroup}"
|
||||
|
||||
if groupKey not in groups:
|
||||
groups[groupKey] = []
|
||||
groups[groupKey].append(chunk)
|
||||
|
||||
return groups
|
||||
|
||||
def _merge_group_optimally(self, chunks: List[ContentPart], available_tokens: int) -> List[ContentPart]:
|
||||
def _mergeGroupOptimally(self, chunks: List[ContentPart], availableTokens: int) -> List[ContentPart]:
|
||||
"""Merge chunks within a group optimally to minimize AI calls."""
|
||||
if not chunks:
|
||||
return []
|
||||
|
||||
# Sort chunks by size (smallest first for better packing)
|
||||
sorted_chunks = sorted(chunks, key=lambda c: self._estimate_tokens(c.data))
|
||||
sortedChunks = sorted(chunks, key=lambda c: self._estimateTokens(c.data))
|
||||
|
||||
merged_parts = []
|
||||
current_group = []
|
||||
current_tokens = 0
|
||||
mergedParts = []
|
||||
currentGroup = []
|
||||
currentTokens = 0
|
||||
|
||||
for chunk in sorted_chunks:
|
||||
chunk_tokens = self._estimate_tokens(chunk.data)
|
||||
for chunk in sortedChunks:
|
||||
chunkTokens = self._estimateTokens(chunk.data)
|
||||
|
||||
# Special case: If single chunk is already at max size, process it alone
|
||||
if chunk_tokens >= available_tokens * 0.9: # 90% of available tokens
|
||||
if chunkTokens >= availableTokens * 0.9: # 90% of available tokens
|
||||
# Finalize current group if it exists
|
||||
if current_group:
|
||||
merged_part = self._create_merged_part(current_group, current_tokens)
|
||||
merged_parts.append(merged_part)
|
||||
current_group = []
|
||||
current_tokens = 0
|
||||
if currentGroup:
|
||||
mergedPart = self._createMergedPart(currentGroup, currentTokens)
|
||||
mergedParts.append(mergedPart)
|
||||
currentGroup = []
|
||||
currentTokens = 0
|
||||
|
||||
# Process large chunk individually
|
||||
merged_parts.append(chunk)
|
||||
logger.debug(f"🔍 Large chunk processed individually: {chunk_tokens} tokens")
|
||||
mergedParts.append(chunk)
|
||||
logger.debug(f"🔍 Large chunk processed individually: {chunkTokens} tokens")
|
||||
continue
|
||||
|
||||
# If adding this chunk would exceed limit, finalize current group
|
||||
if current_tokens + chunk_tokens > available_tokens and current_group:
|
||||
merged_part = self._create_merged_part(current_group, current_tokens)
|
||||
merged_parts.append(merged_part)
|
||||
current_group = [chunk]
|
||||
current_tokens = chunk_tokens
|
||||
if currentTokens + chunkTokens > availableTokens and currentGroup:
|
||||
mergedPart = self._createMergedPart(currentGroup, currentTokens)
|
||||
mergedParts.append(mergedPart)
|
||||
currentGroup = [chunk]
|
||||
currentTokens = chunkTokens
|
||||
else:
|
||||
current_group.append(chunk)
|
||||
current_tokens += chunk_tokens
|
||||
currentGroup.append(chunk)
|
||||
currentTokens += chunkTokens
|
||||
|
||||
# Finalize remaining group
|
||||
if current_group:
|
||||
merged_part = self._create_merged_part(current_group, current_tokens)
|
||||
merged_parts.append(merged_part)
|
||||
if currentGroup:
|
||||
mergedPart = self._createMergedPart(currentGroup, currentTokens)
|
||||
mergedParts.append(mergedPart)
|
||||
|
||||
logger.info(f"📦 Group merged: {len(chunks)} → {len(merged_parts)} parts")
|
||||
return merged_parts
|
||||
logger.info(f"📦 Group merged: {len(chunks)} → {len(mergedParts)} parts")
|
||||
return mergedParts
|
||||
|
||||
def _create_merged_part(self, chunks: List[ContentPart], total_tokens: int) -> ContentPart:
|
||||
def _createMergedPart(self, chunks: List[ContentPart], totalTokens: int) -> ContentPart:
|
||||
"""Create a merged ContentPart from multiple chunks."""
|
||||
if len(chunks) == 1:
|
||||
return chunks[0] # No need to merge single chunk
|
||||
|
||||
# Combine data with semantic separators
|
||||
combined_data = self._combine_chunk_data(chunks)
|
||||
combinedData = self._combineChunkData(chunks)
|
||||
|
||||
# Use metadata from first chunk as base
|
||||
base_chunk = chunks[0]
|
||||
merged_metadata = base_chunk.metadata.copy()
|
||||
merged_metadata.update({
|
||||
baseChunk = chunks[0]
|
||||
mergedMetadata = baseChunk.metadata.copy()
|
||||
mergedMetadata.update({
|
||||
"merged": True,
|
||||
"originalChunkCount": len(chunks),
|
||||
"totalTokens": total_tokens,
|
||||
"totalTokens": totalTokens,
|
||||
"originalChunkIds": [c.id for c in chunks],
|
||||
"size": len(combined_data.encode('utf-8'))
|
||||
"size": len(combinedData.encode('utf-8'))
|
||||
})
|
||||
|
||||
merged_part = ContentPart(
|
||||
mergedPart = ContentPart(
|
||||
id=makeId(),
|
||||
parentId=base_chunk.parentId,
|
||||
parentId=baseChunk.parentId,
|
||||
label=f"merged_{len(chunks)}_chunks",
|
||||
typeGroup=base_chunk.typeGroup,
|
||||
mimeType=base_chunk.mimeType,
|
||||
data=combined_data,
|
||||
metadata=merged_metadata
|
||||
typeGroup=baseChunk.typeGroup,
|
||||
mimeType=baseChunk.mimeType,
|
||||
data=combinedData,
|
||||
metadata=mergedMetadata
|
||||
)
|
||||
|
||||
logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {total_tokens} tokens")
|
||||
return merged_part
|
||||
logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {totalTokens} tokens")
|
||||
return mergedPart
|
||||
|
||||
def _combine_chunk_data(self, chunks: List[ContentPart]) -> str:
|
||||
def _combineChunkData(self, chunks: List[ContentPart]) -> str:
|
||||
"""Combine chunk data with appropriate separators."""
|
||||
if not chunks:
|
||||
return ""
|
||||
|
|
@ -173,37 +173,37 @@ class IntelligentTokenAwareMerger:
|
|||
|
||||
return separator.join([chunk.data for chunk in chunks])
|
||||
|
||||
def _estimate_tokens(self, text: str) -> int:
|
||||
def _estimateTokens(self, text: str) -> int:
|
||||
"""Estimate token count for text."""
|
||||
if not text:
|
||||
return 0
|
||||
return len(text) // self.chars_per_token
|
||||
return len(text) // self.charsPerToken
|
||||
|
||||
def calculate_optimization_stats(self, original_chunks: List[ContentPart], merged_parts: List[ContentPart]) -> Dict[str, Any]:
|
||||
def calculateOptimizationStats(self, originalChunks: List[ContentPart], mergedParts: List[ContentPart]) -> Dict[str, Any]:
|
||||
"""Calculate optimization statistics with detailed analysis."""
|
||||
original_calls = len(original_chunks)
|
||||
optimized_calls = len(merged_parts)
|
||||
reduction_percent = ((original_calls - optimized_calls) / original_calls * 100) if original_calls > 0 else 0
|
||||
originalCalls = len(originalChunks)
|
||||
optimizedCalls = len(mergedParts)
|
||||
reductionPercent = ((originalCalls - optimizedCalls) / originalCalls * 100) if originalCalls > 0 else 0
|
||||
|
||||
# Analyze chunk sizes
|
||||
large_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) >= self.effective_max_tokens * 0.9]
|
||||
small_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) < self.effective_max_tokens * 0.9]
|
||||
largeChunks = [c for c in originalChunks if self._estimateTokens(c.data) >= self.effectiveMaxTokens * 0.9]
|
||||
smallChunks = [c for c in originalChunks if self._estimateTokens(c.data) < self.effectiveMaxTokens * 0.9]
|
||||
|
||||
# Calculate theoretical maximum optimization (if all small chunks could be merged)
|
||||
theoretical_min_calls = len(large_chunks) + max(1, len(small_chunks) // 3) # Assume 3 small chunks per call
|
||||
theoretical_reduction = ((original_calls - theoretical_min_calls) / original_calls * 100) if original_calls > 0 else 0
|
||||
theoreticalMinCalls = len(largeChunks) + max(1, len(smallChunks) // 3) # Assume 3 small chunks per call
|
||||
theoreticalReduction = ((originalCalls - theoreticalMinCalls) / originalCalls * 100) if originalCalls > 0 else 0
|
||||
|
||||
return {
|
||||
"original_ai_calls": original_calls,
|
||||
"optimized_ai_calls": optimized_calls,
|
||||
"reduction_percent": round(reduction_percent, 1),
|
||||
"cost_savings": f"{reduction_percent:.1f}%",
|
||||
"efficiency_gain": f"{original_calls / optimized_calls:.1f}x" if optimized_calls > 0 else "∞",
|
||||
"original_ai_calls": originalCalls,
|
||||
"optimized_ai_calls": optimizedCalls,
|
||||
"reduction_percent": round(reductionPercent, 1),
|
||||
"cost_savings": f"{reductionPercent:.1f}%",
|
||||
"efficiency_gain": f"{originalCalls / optimizedCalls:.1f}x" if optimizedCalls > 0 else "∞",
|
||||
"analysis": {
|
||||
"large_chunks": len(large_chunks),
|
||||
"small_chunks": len(small_chunks),
|
||||
"theoretical_min_calls": theoretical_min_calls,
|
||||
"theoretical_reduction": round(theoretical_reduction, 1),
|
||||
"optimization_potential": "high" if reduction_percent > 50 else "moderate" if reduction_percent > 20 else "low"
|
||||
"large_chunks": len(largeChunks),
|
||||
"small_chunks": len(smallChunks),
|
||||
"theoretical_min_calls": theoreticalMinCalls,
|
||||
"theoretical_reduction": round(theoreticalReduction, 1),
|
||||
"optimization_potential": "high" if reductionPercent > 50 else "moderate" if reductionPercent > 20 else "low"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,10 +96,10 @@ def _applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[Con
|
|||
subMerger = IntelligentTokenAwareMerger(model_capabilities)
|
||||
|
||||
# Use intelligent merging for all parts
|
||||
merged = subMerger.merge_chunks_intelligently(parts, strategy.prompt or "")
|
||||
merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "")
|
||||
|
||||
# Calculate and log optimization stats
|
||||
stats = subMerger.calculate_optimization_stats(parts, merged)
|
||||
stats = subMerger.calculateOptimizationStats(parts, merged)
|
||||
logger.info(f"🧠 Intelligent merging stats: {stats}")
|
||||
logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
|
||||
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ async def buildExtractionPrompt(
|
|||
|
||||
# Build base prompt
|
||||
adaptive_prompt = f"""
|
||||
{services.ai.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
|
||||
{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
|
||||
|
||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
||||
|
||||
|
|
|
|||
|
|
@ -37,13 +37,13 @@ class GenerationService:
|
|||
return []
|
||||
|
||||
# Process each document from the AI action result
|
||||
processed_documents = []
|
||||
processedDocuments = []
|
||||
for doc in documents:
|
||||
processed_doc = self.processSingleDocument(doc, action)
|
||||
if processed_doc:
|
||||
processed_documents.append(processed_doc)
|
||||
processedDoc = self.processSingleDocument(doc, action)
|
||||
if processedDoc:
|
||||
processedDocuments.append(processedDoc)
|
||||
|
||||
return processed_documents
|
||||
return processedDocuments
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing action result documents: {str(e)}")
|
||||
return []
|
||||
|
|
@ -77,20 +77,20 @@ class GenerationService:
|
|||
try:
|
||||
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
||||
|
||||
created_documents = []
|
||||
createdDocuments = []
|
||||
for i, doc_data in enumerate(processed_docs):
|
||||
try:
|
||||
document_name = doc_data['fileName']
|
||||
document_data = doc_data['content']
|
||||
mime_type = doc_data['mimeType']
|
||||
documentName = doc_data['fileName']
|
||||
documentData = doc_data['content']
|
||||
mimeType = doc_data['mimeType']
|
||||
|
||||
# Convert document data to string content
|
||||
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
|
||||
content = convertDocumentDataToString(documentData, getFileExtension(documentName))
|
||||
|
||||
# Skip empty or minimal content
|
||||
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
||||
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
||||
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
||||
minimalContentPatterns = ['{}', '[]', 'null', '""', "''"]
|
||||
if not content or content.strip() == "" or content.strip() in minimalContentPatterns:
|
||||
logger.warning(f"Empty or minimal content for document {documentName}, skipping")
|
||||
continue
|
||||
|
||||
# Normalize file extension based on mime type if missing or incorrect
|
||||
|
|
@ -105,35 +105,35 @@ class GenerationService:
|
|||
"text/plain": ".txt",
|
||||
"application/json": ".json",
|
||||
}
|
||||
expected_ext = mime_to_ext.get(mime_type)
|
||||
if expected_ext:
|
||||
if not document_name.lower().endswith(expected_ext):
|
||||
expectedExt = mime_to_ext.get(mimeType)
|
||||
if expectedExt:
|
||||
if not documentName.lower().endswith(expectedExt):
|
||||
# Append/replace extension to match mime type
|
||||
if "." in document_name:
|
||||
document_name = document_name.rsplit(".", 1)[0] + expected_ext
|
||||
if "." in documentName:
|
||||
documentName = documentName.rsplit(".", 1)[0] + expectedExt
|
||||
else:
|
||||
document_name = document_name + expected_ext
|
||||
documentName = documentName + expectedExt
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
|
||||
base64encoded = False
|
||||
try:
|
||||
binary_mime_types = {
|
||||
binaryMimeTypes = {
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"application/pdf",
|
||||
}
|
||||
if isinstance(document_data, str) and mime_type in binary_mime_types:
|
||||
if isinstance(documentData, str) and mimeType in binaryMimeTypes:
|
||||
base64encoded = True
|
||||
except Exception:
|
||||
base64encoded = False
|
||||
|
||||
# Create document with file in one step using interfaces directly
|
||||
document = self._createDocument(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
fileName=documentName,
|
||||
mimeType=mimeType,
|
||||
content=content,
|
||||
base64encoded=base64encoded,
|
||||
messageId=message_id
|
||||
|
|
@ -141,14 +141,14 @@ class GenerationService:
|
|||
if document:
|
||||
# Set workflow context on the document if possible
|
||||
self._setDocumentWorkflowContext(document, action, workflow)
|
||||
created_documents.append(document)
|
||||
createdDocuments.append(document)
|
||||
else:
|
||||
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
||||
logger.error(f"Failed to create ChatDocument object for {documentName}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
|
||||
continue
|
||||
|
||||
return created_documents
|
||||
return createdDocuments
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating documents from action result: {str(e)}")
|
||||
return []
|
||||
|
|
@ -157,28 +157,28 @@ class GenerationService:
|
|||
"""Set workflow context on a document for proper routing and labeling"""
|
||||
try:
|
||||
# Get current workflow context directly from workflow object
|
||||
workflow_context = self._getWorkflowContext(workflow)
|
||||
workflow_stats = self._getWorkflowStats(workflow)
|
||||
workflowContext = self._getWorkflowContext(workflow)
|
||||
workflowStats = self._getWorkflowStats(workflow)
|
||||
|
||||
current_round = workflow_context.get('currentRound', 0)
|
||||
current_task = workflow_context.get('currentTask', 0)
|
||||
current_action = workflow_context.get('currentAction', 0)
|
||||
currentRound = workflowContext.get('currentRound', 0)
|
||||
currentTask = workflowContext.get('currentTask', 0)
|
||||
currentAction = workflowContext.get('currentAction', 0)
|
||||
|
||||
# Try to set workflow context attributes if they exist
|
||||
if hasattr(document, 'roundNumber'):
|
||||
document.roundNumber = current_round
|
||||
document.roundNumber = currentRound
|
||||
if hasattr(document, 'taskNumber'):
|
||||
document.taskNumber = current_task
|
||||
document.taskNumber = currentTask
|
||||
if hasattr(document, 'actionNumber'):
|
||||
document.actionNumber = current_action
|
||||
document.actionNumber = currentAction
|
||||
if hasattr(document, 'actionId'):
|
||||
document.actionId = action.id if hasattr(action, 'id') else None
|
||||
|
||||
# Set additional workflow metadata if available
|
||||
if hasattr(document, 'workflowId'):
|
||||
document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
|
||||
document.workflowId = workflowStats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
|
||||
if hasattr(document, 'workflowStatus'):
|
||||
document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
|
||||
document.workflowStatus = workflowStats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
|
||||
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -355,17 +355,17 @@ class GenerationService:
|
|||
def _getFormatRenderer(self, output_format: str):
|
||||
"""Get the appropriate renderer for the specified format using auto-discovery."""
|
||||
try:
|
||||
from .renderers.registry import get_renderer
|
||||
renderer = get_renderer(output_format, services=self.services)
|
||||
from .renderers.registry import getRenderer
|
||||
renderer = getRenderer(output_format, services=self.services)
|
||||
|
||||
if renderer:
|
||||
return renderer
|
||||
|
||||
# Fallback to text renderer if no specific renderer found
|
||||
logger.warning(f"No renderer found for format {output_format}, falling back to text")
|
||||
fallback_renderer = get_renderer('text', services=self.services)
|
||||
if fallback_renderer:
|
||||
return fallback_renderer
|
||||
fallbackRenderer = getRenderer('text', services=self.services)
|
||||
if fallbackRenderer:
|
||||
return fallbackRenderer
|
||||
|
||||
logger.error("Even text renderer fallback failed")
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ class RendererRegistry:
|
|||
self._format_mappings: Dict[str, str] = {}
|
||||
self._discovered = False
|
||||
|
||||
def discover_renderers(self) -> None:
|
||||
def discoverRenderers(self) -> None:
|
||||
"""Automatically discover and register all renderers by scanning files."""
|
||||
if self._discovered:
|
||||
return
|
||||
|
|
@ -28,38 +28,38 @@ class RendererRegistry:
|
|||
from pathlib import Path
|
||||
|
||||
# Get the directory containing this registry file
|
||||
current_dir = Path(__file__).parent
|
||||
renderers_dir = current_dir
|
||||
currentDir = Path(__file__).parent
|
||||
renderersDir = currentDir
|
||||
|
||||
# Get the package name dynamically
|
||||
package_name = __name__.rsplit('.', 1)[0]
|
||||
packageName = __name__.rsplit('.', 1)[0]
|
||||
|
||||
# Scan all Python files in the renderers directory
|
||||
for file_path in renderers_dir.glob("*.py"):
|
||||
if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
|
||||
for filePath in renderersDir.glob("*.py"):
|
||||
if filePath.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
|
||||
continue
|
||||
|
||||
# Extract module name from filename
|
||||
module_name = file_path.stem
|
||||
moduleName = filePath.stem
|
||||
|
||||
try:
|
||||
# Import the module dynamically
|
||||
full_module_name = f"{package_name}.{module_name}"
|
||||
module = importlib.import_module(full_module_name)
|
||||
fullModuleName = f"{packageName}.{moduleName}"
|
||||
module = importlib.import_module(fullModuleName)
|
||||
|
||||
# Look for renderer classes in the module
|
||||
for attr_name in dir(module):
|
||||
attr = getattr(module, attr_name)
|
||||
for attrName in dir(module):
|
||||
attr = getattr(module, attrName)
|
||||
if (isinstance(attr, type) and
|
||||
issubclass(attr, BaseRenderer) and
|
||||
attr != BaseRenderer and
|
||||
hasattr(attr, 'get_supported_formats')):
|
||||
hasattr(attr, 'getSupportedFormats')):
|
||||
|
||||
# Register the renderer
|
||||
self._register_renderer_class(attr)
|
||||
self._registerRendererClass(attr)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load renderer from {module_name}: {str(e)}")
|
||||
logger.warning(f"Could not load renderer from {moduleName}: {str(e)}")
|
||||
continue
|
||||
|
||||
self._discovered = True
|
||||
|
|
@ -68,72 +68,72 @@ class RendererRegistry:
|
|||
logger.error(f"Error during renderer discovery: {str(e)}")
|
||||
self._discovered = True # Mark as discovered to avoid repeated attempts
|
||||
|
||||
def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None:
|
||||
def _registerRendererClass(self, rendererClass: Type[BaseRenderer]) -> None:
|
||||
"""Register a renderer class with its supported formats."""
|
||||
try:
|
||||
# Get supported formats from the renderer class
|
||||
supported_formats = renderer_class.get_supported_formats()
|
||||
supportedFormats = rendererClass.getSupportedFormats()
|
||||
|
||||
for format_name in supported_formats:
|
||||
for formatName in supportedFormats:
|
||||
# Register primary format
|
||||
self._renderers[format_name.lower()] = renderer_class
|
||||
self._renderers[formatName.lower()] = rendererClass
|
||||
|
||||
# Register aliases if any
|
||||
if hasattr(renderer_class, 'get_format_aliases'):
|
||||
aliases = renderer_class.get_format_aliases()
|
||||
if hasattr(rendererClass, 'getFormatAliases'):
|
||||
aliases = rendererClass.getFormatAliases()
|
||||
for alias in aliases:
|
||||
self._format_mappings[alias.lower()] = format_name.lower()
|
||||
self._format_mappings[alias.lower()] = formatName.lower()
|
||||
|
||||
logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}")
|
||||
logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}")
|
||||
logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}")
|
||||
|
||||
def get_renderer(self, output_format: str, services=None) -> Optional[BaseRenderer]:
|
||||
def getRenderer(self, outputFormat: str, services=None) -> Optional[BaseRenderer]:
|
||||
"""Get a renderer instance for the specified format."""
|
||||
if not self._discovered:
|
||||
self.discover_renderers()
|
||||
self.discoverRenderers()
|
||||
|
||||
# Normalize format name
|
||||
format_name = output_format.lower().strip()
|
||||
formatName = outputFormat.lower().strip()
|
||||
|
||||
# Check for aliases first
|
||||
if format_name in self._format_mappings:
|
||||
format_name = self._format_mappings[format_name]
|
||||
if formatName in self._format_mappings:
|
||||
formatName = self._format_mappings[formatName]
|
||||
|
||||
# Get renderer class
|
||||
renderer_class = self._renderers.get(format_name)
|
||||
rendererClass = self._renderers.get(formatName)
|
||||
|
||||
if renderer_class:
|
||||
if rendererClass:
|
||||
try:
|
||||
return renderer_class(services=services)
|
||||
return rendererClass(services=services)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating renderer instance for {format_name}: {str(e)}")
|
||||
logger.error(f"Error creating renderer instance for {formatName}: {str(e)}")
|
||||
return None
|
||||
|
||||
logger.warning(f"No renderer found for format: {output_format}")
|
||||
logger.warning(f"No renderer found for format: {outputFormat}")
|
||||
return None
|
||||
|
||||
def get_supported_formats(self) -> List[str]:
|
||||
def getSupportedFormats(self) -> List[str]:
|
||||
"""Get list of all supported formats."""
|
||||
if not self._discovered:
|
||||
self.discover_renderers()
|
||||
self.discoverRenderers()
|
||||
|
||||
formats = list(self._renderers.keys())
|
||||
formats.extend(self._format_mappings.keys())
|
||||
return sorted(set(formats))
|
||||
|
||||
def get_renderer_info(self) -> Dict[str, Dict[str, str]]:
|
||||
def getRendererInfo(self) -> Dict[str, Dict[str, str]]:
|
||||
"""Get information about all registered renderers."""
|
||||
if not self._discovered:
|
||||
self.discover_renderers()
|
||||
self.discoverRenderers()
|
||||
|
||||
info = {}
|
||||
for format_name, renderer_class in self._renderers.items():
|
||||
info[format_name] = {
|
||||
'class_name': renderer_class.__name__,
|
||||
'module': renderer_class.__module__,
|
||||
'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description'
|
||||
for formatName, rendererClass in self._renderers.items():
|
||||
info[formatName] = {
|
||||
'class_name': rendererClass.__name__,
|
||||
'module': rendererClass.__module__,
|
||||
'description': getattr(rendererClass, '__doc__', 'No description').strip().split('\n')[0] if rendererClass.__doc__ else 'No description'
|
||||
}
|
||||
|
||||
return info
|
||||
|
|
@ -141,14 +141,14 @@ class RendererRegistry:
|
|||
# Global registry instance
|
||||
_registry = RendererRegistry()
|
||||
|
||||
def get_renderer(output_format: str, services=None) -> Optional[BaseRenderer]:
|
||||
def getRenderer(outputFormat: str, services=None) -> Optional[BaseRenderer]:
|
||||
"""Get a renderer instance for the specified format."""
|
||||
return _registry.get_renderer(output_format, services)
|
||||
return _registry.getRenderer(outputFormat, services)
|
||||
|
||||
def get_supported_formats() -> List[str]:
|
||||
def getSupportedFormats() -> List[str]:
|
||||
"""Get list of all supported formats."""
|
||||
return _registry.get_supported_formats()
|
||||
return _registry.getSupportedFormats()
|
||||
|
||||
def get_renderer_info() -> Dict[str, Dict[str, str]]:
|
||||
def getRendererInfo() -> Dict[str, Dict[str, str]]:
|
||||
"""Get information about all registered renderers."""
|
||||
return _registry.get_renderer_info()
|
||||
return _registry.getRendererInfo()
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ Base renderer class for all format renderers.
|
|||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
|
@ -23,7 +24,7 @@ class BaseRenderer(ABC):
|
|||
self.services = services # Add services attribute
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""
|
||||
Return list of supported format names for this renderer.
|
||||
Override this method in subclasses to specify supported formats.
|
||||
|
|
@ -31,7 +32,7 @@ class BaseRenderer(ABC):
|
|||
return []
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""
|
||||
Return list of format aliases for this renderer.
|
||||
Override this method in subclasses to specify format aliases.
|
||||
|
|
@ -39,7 +40,7 @@ class BaseRenderer(ABC):
|
|||
return []
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""
|
||||
Return priority for this renderer (higher number = higher priority).
|
||||
Used when multiple renderers support the same format.
|
||||
|
|
@ -47,43 +48,43 @@ class BaseRenderer(ABC):
|
|||
return 0
|
||||
|
||||
@abstractmethod
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""
|
||||
Render extracted JSON content to the target format.
|
||||
|
||||
Args:
|
||||
extracted_content: Structured JSON content with sections and metadata
|
||||
extractedContent: Structured JSON content with sections and metadata
|
||||
title: Report title
|
||||
user_prompt: Original user prompt for context
|
||||
ai_service: AI service instance for additional processing
|
||||
userPrompt: Original user prompt for context
|
||||
aiService: AI service instance for additional processing
|
||||
|
||||
Returns:
|
||||
tuple: (rendered_content, mime_type)
|
||||
tuple: (renderedContent, mimeType)
|
||||
"""
|
||||
pass
|
||||
|
||||
def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Extract sections from report data."""
|
||||
return report_data.get('sections', [])
|
||||
return reportData.get('sections', [])
|
||||
|
||||
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract metadata from report data."""
|
||||
return report_data.get('metadata', {})
|
||||
return reportData.get('metadata', {})
|
||||
|
||||
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
|
||||
def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str:
|
||||
"""Get title from report data or use fallback."""
|
||||
metadata = report_data.get('metadata', {})
|
||||
return metadata.get('title', fallback_title)
|
||||
metadata = reportData.get('metadata', {})
|
||||
return metadata.get('title', fallbackTitle)
|
||||
|
||||
def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool:
|
||||
def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool:
|
||||
"""Validate that JSON content has the expected structure."""
|
||||
if not isinstance(json_content, dict):
|
||||
if not isinstance(jsonContent, dict):
|
||||
return False
|
||||
|
||||
if "sections" not in json_content:
|
||||
if "sections" not in jsonContent:
|
||||
return False
|
||||
|
||||
sections = json_content.get("sections", [])
|
||||
sections = jsonContent.get("sections", [])
|
||||
if not isinstance(sections, list):
|
||||
return False
|
||||
|
||||
|
|
@ -96,14 +97,14 @@ class BaseRenderer(ABC):
|
|||
|
||||
return True
|
||||
|
||||
def _get_section_type(self, section: Dict[str, Any]) -> str:
|
||||
def _getSectionType(self, section: Dict[str, Any]) -> str:
|
||||
"""Get the type of a section; default to 'paragraph' for non-dict inputs."""
|
||||
if isinstance(section, dict):
|
||||
return section.get("content_type", "paragraph")
|
||||
# If section is a list or any other type, treat as paragraph elements
|
||||
return "paragraph"
|
||||
|
||||
def _get_section_data(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
def _getSectionData(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Get the elements of a section; if a list is provided directly, return it."""
|
||||
if isinstance(section, dict):
|
||||
return section.get("elements", [])
|
||||
|
|
@ -111,21 +112,30 @@ class BaseRenderer(ABC):
|
|||
return section
|
||||
return []
|
||||
|
||||
def _get_section_id(self, section: Dict[str, Any]) -> str:
|
||||
def _getSectionId(self, section: Dict[str, Any]) -> str:
|
||||
"""Get the ID of a section (if available)."""
|
||||
if isinstance(section, dict):
|
||||
return section.get("id", "unknown")
|
||||
return "unknown"
|
||||
|
||||
def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
|
||||
def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
|
||||
"""Extract table headers and rows from section data."""
|
||||
headers = section_data.get("headers", [])
|
||||
rows = section_data.get("rows", [])
|
||||
# Normalize when elements array was passed in
|
||||
if isinstance(sectionData, list) and sectionData:
|
||||
candidate = sectionData[0]
|
||||
sectionData = candidate if isinstance(candidate, dict) else {}
|
||||
headers = sectionData.get("headers", [])
|
||||
rows = sectionData.get("rows", [])
|
||||
return headers, rows
|
||||
|
||||
def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]:
|
||||
def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]:
|
||||
"""Extract bullet list items from section data."""
|
||||
items = section_data.get("items", [])
|
||||
# Normalize when elements array or raw list was passed in
|
||||
if isinstance(sectionData, list):
|
||||
# Already a list of items (strings or dicts)
|
||||
items = sectionData
|
||||
else:
|
||||
items = sectionData.get("items", [])
|
||||
result = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
|
|
@ -134,29 +144,47 @@ class BaseRenderer(ABC):
|
|||
result.append(item["text"])
|
||||
return result
|
||||
|
||||
def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]:
|
||||
def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]:
|
||||
"""Extract heading level and text from section data."""
|
||||
level = section_data.get("level", 1)
|
||||
text = section_data.get("text", "")
|
||||
# Normalize when elements array was passed in
|
||||
if isinstance(sectionData, list) and sectionData:
|
||||
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
|
||||
level = sectionData.get("level", 1)
|
||||
text = sectionData.get("text", "")
|
||||
return level, text
|
||||
|
||||
def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str:
|
||||
def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str:
|
||||
"""Extract paragraph text from section data."""
|
||||
return section_data.get("text", "")
|
||||
if isinstance(sectionData, list):
|
||||
# Join multiple paragraph elements if provided as a list
|
||||
texts = []
|
||||
for el in sectionData:
|
||||
if isinstance(el, dict) and "text" in el:
|
||||
texts.append(el["text"])
|
||||
elif isinstance(el, str):
|
||||
texts.append(el)
|
||||
return "\n".join(texts)
|
||||
return sectionData.get("text", "")
|
||||
|
||||
def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
|
||||
def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
|
||||
"""Extract code and language from section data."""
|
||||
code = section_data.get("code", "")
|
||||
language = section_data.get("language", "")
|
||||
# Normalize when elements array was passed in
|
||||
if isinstance(sectionData, list) and sectionData:
|
||||
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
|
||||
code = sectionData.get("code", "")
|
||||
language = sectionData.get("language", "")
|
||||
return code, language
|
||||
|
||||
def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
|
||||
def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
|
||||
"""Extract base64 data and alt text from section data."""
|
||||
base64_data = section_data.get("base64Data", "")
|
||||
alt_text = section_data.get("altText", "Image")
|
||||
return base64_data, alt_text
|
||||
# Normalize when elements array was passed in
|
||||
if isinstance(sectionData, list) and sectionData:
|
||||
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
|
||||
base64Data = sectionData.get("base64Data", "")
|
||||
altText = sectionData.get("altText", "Image")
|
||||
return base64Data, altText
|
||||
|
||||
def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
|
||||
def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
|
||||
"""
|
||||
Render an image section. This is a base implementation that should be overridden
|
||||
by format-specific renderers.
|
||||
|
|
@ -168,47 +196,47 @@ class BaseRenderer(ABC):
|
|||
Returns:
|
||||
Format-specific image representation
|
||||
"""
|
||||
section_data = self._get_section_data(section)
|
||||
base64_data, alt_text = self._extract_image_data(section_data)
|
||||
sectionData = self._getSectionData(section)
|
||||
base64Data, altText = self._extractImageData(sectionData)
|
||||
|
||||
# Base implementation returns a simple dict
|
||||
# Format-specific renderers should override this method
|
||||
return {
|
||||
"content_type": "image",
|
||||
"base64Data": base64_data,
|
||||
"altText": alt_text,
|
||||
"width": section_data.get("width", None),
|
||||
"height": section_data.get("height", None),
|
||||
"caption": section_data.get("caption", "")
|
||||
"base64Data": base64Data,
|
||||
"altText": altText,
|
||||
"width": sectionData.get("width", None),
|
||||
"height": sectionData.get("height", None),
|
||||
"caption": sectionData.get("caption", "")
|
||||
}
|
||||
|
||||
def _validate_image_data(self, base64_data: str, alt_text: str) -> bool:
|
||||
def _validateImageData(self, base64Data: str, altText: str) -> bool:
|
||||
"""Validate image data."""
|
||||
if not base64_data:
|
||||
if not base64Data:
|
||||
self.logger.warning("Image section has no base64 data")
|
||||
return False
|
||||
|
||||
if not alt_text:
|
||||
if not altText:
|
||||
self.logger.warning("Image section has no alt text")
|
||||
return False
|
||||
|
||||
# Basic base64 validation
|
||||
try:
|
||||
base64.b64decode(base64_data, validate=True)
|
||||
base64.b64decode(base64Data, validate=True)
|
||||
return True
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Invalid base64 image data: {str(e)}")
|
||||
return False
|
||||
|
||||
def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]:
|
||||
def _getImageDimensions(self, base64Data: str) -> Tuple[int, int]:
|
||||
"""
|
||||
Get image dimensions from base64 data.
|
||||
This is a helper method that format-specific renderers can use.
|
||||
"""
|
||||
try:
|
||||
# Decode base64 data
|
||||
image_data = base64.b64decode(base64_data)
|
||||
image = Image.open(io.BytesIO(image_data))
|
||||
imageData = base64.b64decode(base64Data)
|
||||
image = Image.open(io.BytesIO(imageData))
|
||||
|
||||
return image.size # Returns (width, height)
|
||||
|
||||
|
|
@ -216,89 +244,89 @@ class BaseRenderer(ABC):
|
|||
self.logger.warning(f"Could not determine image dimensions: {str(e)}")
|
||||
return (0, 0)
|
||||
|
||||
def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str:
|
||||
def _resizeImageIfNeeded(self, base64Data: str, maxWidth: int = 800, maxHeight: int = 600) -> str:
|
||||
"""
|
||||
Resize image if it exceeds maximum dimensions.
|
||||
Returns the resized image as base64 string.
|
||||
"""
|
||||
try:
|
||||
# Decode base64 data
|
||||
image_data = base64.b64decode(base64_data)
|
||||
image = Image.open(io.BytesIO(image_data))
|
||||
imageData = base64.b64decode(base64Data)
|
||||
image = Image.open(io.BytesIO(imageData))
|
||||
|
||||
# Check if resizing is needed
|
||||
width, height = image.size
|
||||
if width <= max_width and height <= max_height:
|
||||
return base64_data # No resizing needed
|
||||
if width <= maxWidth and height <= maxHeight:
|
||||
return base64Data # No resizing needed
|
||||
|
||||
# Calculate new dimensions maintaining aspect ratio
|
||||
ratio = min(max_width / width, max_height / height)
|
||||
new_width = int(width * ratio)
|
||||
new_height = int(height * ratio)
|
||||
ratio = min(maxWidth / width, maxHeight / height)
|
||||
newWidth = int(width * ratio)
|
||||
newHeight = int(height * ratio)
|
||||
|
||||
# Resize image
|
||||
resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
||||
resizedImage = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS)
|
||||
|
||||
# Convert back to base64
|
||||
buffer = io.BytesIO()
|
||||
resized_image.save(buffer, format=image.format or 'PNG')
|
||||
resized_data = buffer.getvalue()
|
||||
resizedImage.save(buffer, format=image.format or 'PNG')
|
||||
resizedData = buffer.getvalue()
|
||||
|
||||
return base64.b64encode(resized_data).decode('utf-8')
|
||||
return base64.b64encode(resizedData).decode('utf-8')
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not resize image: {str(e)}")
|
||||
return base64_data # Return original if resize fails
|
||||
return base64Data # Return original if resize fails
|
||||
|
||||
def _get_supported_section_types(self) -> List[str]:
|
||||
"""Return list of supported section types."""
|
||||
return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
|
||||
def _getSupportedSectionTypes(self) -> List[str]:
|
||||
"""Return list of supported section types (from unified schema)."""
|
||||
return supportedSectionTypes
|
||||
|
||||
def _is_valid_section_type(self, section_type: str) -> bool:
|
||||
def _isValidSectionType(self, sectionType: str) -> bool:
|
||||
"""Check if a section type is valid."""
|
||||
return section_type in self._get_supported_section_types()
|
||||
return sectionType in self._getSupportedSectionTypes()
|
||||
|
||||
def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _processSectionByType(self, section: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process a section and return structured data based on its type."""
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
sectionType = self._getSectionType(section)
|
||||
sectionData = self._getSectionData(section)
|
||||
|
||||
if section_type == "table":
|
||||
headers, rows = self._extract_table_data(section_data)
|
||||
if sectionType == "table":
|
||||
headers, rows = self._extractTableData(sectionData)
|
||||
return {"content_type": "table", "headers": headers, "rows": rows}
|
||||
elif section_type == "bullet_list":
|
||||
items = self._extract_bullet_list_items(section_data)
|
||||
elif sectionType == "bullet_list":
|
||||
items = self._extractBulletListItems(sectionData)
|
||||
return {"content_type": "bullet_list", "items": items}
|
||||
elif section_type == "heading":
|
||||
level, text = self._extract_heading_data(section_data)
|
||||
elif sectionType == "heading":
|
||||
level, text = self._extractHeadingData(sectionData)
|
||||
return {"content_type": "heading", "level": level, "text": text}
|
||||
elif section_type == "paragraph":
|
||||
text = self._extract_paragraph_text(section_data)
|
||||
elif sectionType == "paragraph":
|
||||
text = self._extractParagraphText(sectionData)
|
||||
return {"content_type": "paragraph", "text": text}
|
||||
elif section_type == "code_block":
|
||||
code, language = self._extract_code_block_data(section_data)
|
||||
elif sectionType == "code_block":
|
||||
code, language = self._extractCodeBlockData(sectionData)
|
||||
return {"content_type": "code_block", "code": code, "language": language}
|
||||
elif section_type == "image":
|
||||
base64_data, alt_text = self._extract_image_data(section_data)
|
||||
elif sectionType == "image":
|
||||
base64Data, altText = self._extractImageData(sectionData)
|
||||
# Validate image data
|
||||
if self._validate_image_data(base64_data, alt_text):
|
||||
if self._validateImageData(base64Data, altText):
|
||||
return {
|
||||
"content_type": "image",
|
||||
"base64Data": base64_data,
|
||||
"altText": alt_text,
|
||||
"width": section_data.get("width"),
|
||||
"height": section_data.get("height"),
|
||||
"caption": section_data.get("caption", "")
|
||||
"base64Data": base64Data,
|
||||
"altText": altText,
|
||||
"width": sectionData.get("width") if isinstance(sectionData, dict) else None,
|
||||
"height": sectionData.get("height") if isinstance(sectionData, dict) else None,
|
||||
"caption": sectionData.get("caption", "") if isinstance(sectionData, dict) else ""
|
||||
}
|
||||
else:
|
||||
# Return placeholder if image data is invalid
|
||||
return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"}
|
||||
return {"content_type": "paragraph", "text": f"[Image: {altText}]"}
|
||||
else:
|
||||
# Fallback to paragraph
|
||||
text = self._extract_paragraph_text(section_data)
|
||||
text = self._extractParagraphText(sectionData)
|
||||
return {"content_type": "paragraph", "text": text}
|
||||
|
||||
def _format_timestamp(self, timestamp: str = None) -> str:
|
||||
def _formatTimestamp(self, timestamp: str = None) -> str:
|
||||
"""Format timestamp for display."""
|
||||
if timestamp:
|
||||
return timestamp
|
||||
|
|
@ -306,38 +334,38 @@ class BaseRenderer(ABC):
|
|||
|
||||
# ===== GENERIC AI STYLING HELPERS =====
|
||||
|
||||
async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
async def _getAiStyles(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generic AI styling method that can be used by all renderers.
|
||||
|
||||
Args:
|
||||
ai_service: AI service instance
|
||||
style_template: Format-specific style template
|
||||
default_styles: Default styles to fall back to
|
||||
aiService: AI service instance
|
||||
styleTemplate: Format-specific style template
|
||||
defaultStyles: Default styles to fall back to
|
||||
|
||||
Returns:
|
||||
Dict with styling definitions
|
||||
"""
|
||||
# DEBUG: Show which renderer is calling this method
|
||||
|
||||
if not ai_service:
|
||||
return default_styles
|
||||
if not aiService:
|
||||
return defaultStyles
|
||||
|
||||
try:
|
||||
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
requestOptions = AiCallOptions()
|
||||
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
|
||||
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
||||
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
|
||||
|
||||
# DEBUG: Show the actual prompt being sent to AI
|
||||
self.logger.debug(f"AI Style Template Prompt:")
|
||||
self.logger.debug(f"{style_template}")
|
||||
self.logger.debug(f"{styleTemplate}")
|
||||
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
response = await aiService.aiObjects.call(request)
|
||||
|
||||
# Save styling prompt and response to debug
|
||||
self.services.utils.writeDebugFile(style_template, "renderer_styling_prompt")
|
||||
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
|
||||
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
||||
|
||||
# Clean and parse JSON
|
||||
|
|
@ -346,12 +374,12 @@ class BaseRenderer(ABC):
|
|||
# Check if result is empty
|
||||
if not result:
|
||||
self.logger.warning("AI styling returned empty response, using defaults")
|
||||
return default_styles
|
||||
return defaultStyles
|
||||
|
||||
# Extract JSON from markdown if present
|
||||
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
||||
if json_match:
|
||||
result = json_match.group(1).strip()
|
||||
jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
||||
if jsonMatch:
|
||||
result = jsonMatch.group(1).strip()
|
||||
elif result.startswith('```json'):
|
||||
result = re.sub(r'^```json\s*', '', result)
|
||||
result = re.sub(r'\s*```$', '', result)
|
||||
|
|
@ -362,8 +390,8 @@ class BaseRenderer(ABC):
|
|||
# Try to parse JSON
|
||||
try:
|
||||
styles = json.loads(result)
|
||||
except json.JSONDecodeError as json_error:
|
||||
self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
|
||||
except json.JSONDecodeError as jsonError:
|
||||
self.logger.warning(f"AI styling returned invalid JSON: {jsonError}")
|
||||
|
||||
# Use print instead of logger to avoid truncation
|
||||
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER")
|
||||
|
|
@ -372,88 +400,88 @@ class BaseRenderer(ABC):
|
|||
self.logger.warning(f"Raw content that failed to parse: {result}")
|
||||
|
||||
# Try to fix incomplete JSON by adding missing closing braces
|
||||
open_braces = result.count('{')
|
||||
close_braces = result.count('}')
|
||||
openBraces = result.count('{')
|
||||
closeBraces = result.count('}')
|
||||
|
||||
if open_braces > close_braces:
|
||||
if openBraces > closeBraces:
|
||||
# JSON is incomplete, add missing closing braces
|
||||
missing_braces = open_braces - close_braces
|
||||
result = result + '}' * missing_braces
|
||||
self.logger.info(f"Added {missing_braces} missing closing brace(s)")
|
||||
missingBraces = openBraces - closeBraces
|
||||
result = result + '}' * missingBraces
|
||||
self.logger.info(f"Added {missingBraces} missing closing brace(s)")
|
||||
self.logger.debug(f"Fixed JSON: {result}")
|
||||
|
||||
# Try parsing the fixed JSON
|
||||
try:
|
||||
styles = json.loads(result)
|
||||
self.logger.info("Successfully fixed incomplete JSON")
|
||||
except json.JSONDecodeError as fix_error:
|
||||
self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
|
||||
except json.JSONDecodeError as fixError:
|
||||
self.logger.warning(f"Fixed JSON still invalid: {fixError}")
|
||||
self.logger.warning(f"Fixed JSON content: {result}")
|
||||
# Try to extract just the JSON part if it's embedded in text
|
||||
json_start = result.find('{')
|
||||
json_end = result.rfind('}')
|
||||
if json_start != -1 and json_end != -1 and json_end > json_start:
|
||||
json_part = result[json_start:json_end+1]
|
||||
jsonStart = result.find('{')
|
||||
jsonEnd = result.rfind('}')
|
||||
if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
|
||||
jsonPart = result[jsonStart:jsonEnd+1]
|
||||
try:
|
||||
styles = json.loads(json_part)
|
||||
styles = json.loads(jsonPart)
|
||||
self.logger.info("Successfully extracted JSON from explanatory text")
|
||||
except json.JSONDecodeError:
|
||||
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
||||
return default_styles
|
||||
return defaultStyles
|
||||
else:
|
||||
return default_styles
|
||||
return defaultStyles
|
||||
else:
|
||||
# Try to extract just the JSON part if it's embedded in text
|
||||
json_start = result.find('{')
|
||||
json_end = result.rfind('}')
|
||||
if json_start != -1 and json_end != -1 and json_end > json_start:
|
||||
json_part = result[json_start:json_end+1]
|
||||
jsonStart = result.find('{')
|
||||
jsonEnd = result.rfind('}')
|
||||
if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
|
||||
jsonPart = result[jsonStart:jsonEnd+1]
|
||||
try:
|
||||
styles = json.loads(json_part)
|
||||
styles = json.loads(jsonPart)
|
||||
self.logger.info("Successfully extracted JSON from explanatory text")
|
||||
except json.JSONDecodeError:
|
||||
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
||||
return default_styles
|
||||
return defaultStyles
|
||||
else:
|
||||
return default_styles
|
||||
return defaultStyles
|
||||
|
||||
# Convert colors to appropriate format
|
||||
styles = self._convert_colors_format(styles)
|
||||
styles = self._convertColorsFormat(styles)
|
||||
|
||||
return styles
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return default_styles
|
||||
return defaultStyles
|
||||
|
||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert colors to appropriate format based on renderer type.
|
||||
Override this method in subclasses for format-specific color handling.
|
||||
"""
|
||||
return styles
|
||||
|
||||
def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str:
|
||||
def _createAiStyleTemplate(self, formatName: str, userPrompt: str, styleSchema: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Create a standardized AI style template for any format.
|
||||
|
||||
Args:
|
||||
format_name: Name of the format (e.g., "docx", "xlsx", "pptx")
|
||||
user_prompt: User's original prompt
|
||||
style_schema: Format-specific style schema
|
||||
formatName: Name of the format (e.g., "docx", "xlsx", "pptx")
|
||||
userPrompt: User's original prompt
|
||||
styleSchema: Format-specific style schema
|
||||
|
||||
Returns:
|
||||
Formatted prompt string
|
||||
"""
|
||||
schema_json = json.dumps(style_schema, indent=4)
|
||||
schemaJson = json.dumps(styleSchema, indent=4)
|
||||
|
||||
# DEBUG: Show the schema being sent
|
||||
|
||||
return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents.
|
||||
return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {formatName.upper()} documents.
|
||||
|
||||
Use this schema as a template and customize the values for professional document styling:
|
||||
|
||||
{schema_json}
|
||||
{schemaJson}
|
||||
|
||||
Requirements:
|
||||
- Return ONLY the complete JSON object (no markdown, no explanations)
|
||||
|
|
|
|||
|
|
@ -9,163 +9,163 @@ class RendererCsv(BaseRenderer):
|
|||
"""Renders content to CSV format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported CSV formats."""
|
||||
return ['csv']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['spreadsheet', 'table']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for CSV renderer."""
|
||||
return 70
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to CSV format."""
|
||||
try:
|
||||
# Generate CSV directly from JSON (no styling needed for CSV)
|
||||
csv_content = await self._generate_csv_from_json(extracted_content, title)
|
||||
csvContent = await self._generateCsvFromJson(extractedContent, title)
|
||||
|
||||
return csv_content, "text/csv"
|
||||
return csvContent, "text/csv"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering CSV: {str(e)}")
|
||||
# Return minimal CSV fallback
|
||||
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
|
||||
|
||||
async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str:
|
||||
async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||
"""Generate CSV content from structured JSON document."""
|
||||
try:
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
if not isinstance(jsonContent, dict):
|
||||
raise ValueError("JSON content must be a dictionary")
|
||||
|
||||
if "sections" not in json_content:
|
||||
if "sections" not in jsonContent:
|
||||
raise ValueError("JSON content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = json_content.get("metadata", {}).get("title", title)
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", title)
|
||||
|
||||
# Generate CSV content
|
||||
csv_rows = []
|
||||
csvRows = []
|
||||
|
||||
# Add title row
|
||||
if document_title:
|
||||
csv_rows.append([document_title])
|
||||
csv_rows.append([]) # Empty row
|
||||
if documentTitle:
|
||||
csvRows.append([documentTitle])
|
||||
csvRows.append([]) # Empty row
|
||||
|
||||
# Process each section in order
|
||||
sections = json_content.get("sections", [])
|
||||
sections = jsonContent.get("sections", [])
|
||||
for section in sections:
|
||||
section_csv = self._render_json_section_to_csv(section)
|
||||
if section_csv:
|
||||
csv_rows.extend(section_csv)
|
||||
csv_rows.append([]) # Empty row between sections
|
||||
sectionCsv = self._renderJsonSectionToCsv(section)
|
||||
if sectionCsv:
|
||||
csvRows.extend(sectionCsv)
|
||||
csvRows.append([]) # Empty row between sections
|
||||
|
||||
# Convert to CSV string
|
||||
csv_content = self._convert_rows_to_csv(csv_rows)
|
||||
csvContent = self._convertRowsToCsv(csvRows)
|
||||
|
||||
return csv_content
|
||||
return csvContent
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating CSV from JSON: {str(e)}")
|
||||
raise Exception(f"CSV generation failed: {str(e)}")
|
||||
|
||||
def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]:
|
||||
def _renderJsonSectionToCsv(self, section: Dict[str, Any]) -> List[List[str]]:
|
||||
"""Render a single JSON section to CSV rows."""
|
||||
try:
|
||||
section_type = section.get("content_type", "paragraph")
|
||||
sectionType = section.get("content_type", "paragraph")
|
||||
elements = section.get("elements", [])
|
||||
|
||||
csv_rows = []
|
||||
csvRows = []
|
||||
|
||||
# Add section title if available
|
||||
section_title = section.get("title")
|
||||
if section_title:
|
||||
csv_rows.append([f"# {section_title}"])
|
||||
sectionTitle = section.get("title")
|
||||
if sectionTitle:
|
||||
csvRows.append([f"# {sectionTitle}"])
|
||||
|
||||
# Process each element in the section
|
||||
for element in elements:
|
||||
if section_type == "table":
|
||||
csv_rows.extend(self._render_json_table_to_csv(element))
|
||||
elif section_type == "list":
|
||||
csv_rows.extend(self._render_json_list_to_csv(element))
|
||||
elif section_type == "heading":
|
||||
csv_rows.extend(self._render_json_heading_to_csv(element))
|
||||
elif section_type == "paragraph":
|
||||
csv_rows.extend(self._render_json_paragraph_to_csv(element))
|
||||
elif section_type == "code":
|
||||
csv_rows.extend(self._render_json_code_to_csv(element))
|
||||
if sectionType == "table":
|
||||
csvRows.extend(self._renderJsonTableToCsv(element))
|
||||
elif sectionType == "list":
|
||||
csvRows.extend(self._renderJsonListToCsv(element))
|
||||
elif sectionType == "heading":
|
||||
csvRows.extend(self._renderJsonHeadingToCsv(element))
|
||||
elif sectionType == "paragraph":
|
||||
csvRows.extend(self._renderJsonParagraphToCsv(element))
|
||||
elif sectionType == "code":
|
||||
csvRows.extend(self._renderJsonCodeToCsv(element))
|
||||
else:
|
||||
# Fallback to paragraph for unknown types
|
||||
csv_rows.extend(self._render_json_paragraph_to_csv(element))
|
||||
csvRows.extend(self._renderJsonParagraphToCsv(element))
|
||||
|
||||
return csv_rows
|
||||
return csvRows
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
|
||||
return [["[Error rendering section]"]]
|
||||
|
||||
def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]:
|
||||
def _renderJsonTableToCsv(self, tableData: Dict[str, Any]) -> List[List[str]]:
|
||||
"""Render a JSON table to CSV rows."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
rows = table_data.get("rows", [])
|
||||
headers = tableData.get("headers", [])
|
||||
rows = tableData.get("rows", [])
|
||||
|
||||
csv_rows = []
|
||||
csvRows = []
|
||||
|
||||
if headers:
|
||||
csv_rows.append(headers)
|
||||
csvRows.append(headers)
|
||||
|
||||
if rows:
|
||||
csv_rows.extend(rows)
|
||||
csvRows.extend(rows)
|
||||
|
||||
return csv_rows
|
||||
return csvRows
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return [["[Error rendering table]"]]
|
||||
|
||||
def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]:
|
||||
def _renderJsonListToCsv(self, listData: Dict[str, Any]) -> List[List[str]]:
|
||||
"""Render a JSON list to CSV rows."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
csv_rows = []
|
||||
items = listData.get("items", [])
|
||||
csvRows = []
|
||||
|
||||
for item in items:
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text", "")
|
||||
subitems = item.get("subitems", [])
|
||||
csv_rows.append([text])
|
||||
csvRows.append([text])
|
||||
|
||||
# Add subitems as indented rows
|
||||
for subitem in subitems:
|
||||
if isinstance(subitem, dict):
|
||||
csv_rows.append([f" - {subitem.get('text', '')}"])
|
||||
csvRows.append([f" - {subitem.get('text', '')}"])
|
||||
else:
|
||||
csv_rows.append([f" - {subitem}"])
|
||||
csvRows.append([f" - {subitem}"])
|
||||
else:
|
||||
csv_rows.append([str(item)])
|
||||
csvRows.append([str(item)])
|
||||
|
||||
return csv_rows
|
||||
return csvRows
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering list: {str(e)}")
|
||||
return [["[Error rendering list]"]]
|
||||
|
||||
def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]:
|
||||
def _renderJsonHeadingToCsv(self, headingData: Dict[str, Any]) -> List[List[str]]:
|
||||
"""Render a JSON heading to CSV rows."""
|
||||
try:
|
||||
text = heading_data.get("text", "")
|
||||
level = heading_data.get("level", 1)
|
||||
text = headingData.get("text", "")
|
||||
level = headingData.get("level", 1)
|
||||
|
||||
if text:
|
||||
# Use # symbols for heading levels
|
||||
heading_text = f"{'#' * level} {text}"
|
||||
return [[heading_text]]
|
||||
headingText = f"{'#' * level} {text}"
|
||||
return [[headingText]]
|
||||
|
||||
return []
|
||||
|
||||
|
|
@ -173,30 +173,30 @@ class RendererCsv(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
return [["[Error rendering heading]"]]
|
||||
|
||||
def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]:
|
||||
def _renderJsonParagraphToCsv(self, paragraphData: Dict[str, Any]) -> List[List[str]]:
|
||||
"""Render a JSON paragraph to CSV rows."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
text = paragraphData.get("text", "")
|
||||
|
||||
if text:
|
||||
# Split long paragraphs into multiple rows if needed
|
||||
if len(text) > 100:
|
||||
words = text.split()
|
||||
rows = []
|
||||
current_row = []
|
||||
current_length = 0
|
||||
currentRow = []
|
||||
currentLength = 0
|
||||
|
||||
for word in words:
|
||||
if current_length + len(word) > 100 and current_row:
|
||||
rows.append([" ".join(current_row)])
|
||||
current_row = [word]
|
||||
current_length = len(word)
|
||||
if currentLength + len(word) > 100 and currentRow:
|
||||
rows.append([" ".join(currentRow)])
|
||||
currentRow = [word]
|
||||
currentLength = len(word)
|
||||
else:
|
||||
current_row.append(word)
|
||||
current_length += len(word) + 1
|
||||
currentRow.append(word)
|
||||
currentLength += len(word) + 1
|
||||
|
||||
if current_row:
|
||||
rows.append([" ".join(current_row)])
|
||||
if currentRow:
|
||||
rows.append([" ".join(currentRow)])
|
||||
|
||||
return rows
|
||||
else:
|
||||
|
|
@ -208,30 +208,30 @@ class RendererCsv(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return [["[Error rendering paragraph]"]]
|
||||
|
||||
def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]:
|
||||
def _renderJsonCodeToCsv(self, codeData: Dict[str, Any]) -> List[List[str]]:
|
||||
"""Render a JSON code block to CSV rows."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
language = code_data.get("language", "")
|
||||
code = codeData.get("code", "")
|
||||
language = codeData.get("language", "")
|
||||
|
||||
csv_rows = []
|
||||
csvRows = []
|
||||
|
||||
if language:
|
||||
csv_rows.append([f"Code ({language}):"])
|
||||
csvRows.append([f"Code ({language}):"])
|
||||
|
||||
if code:
|
||||
# Split code into lines
|
||||
code_lines = code.split('\n')
|
||||
for line in code_lines:
|
||||
csv_rows.append([f" {line}"])
|
||||
codeLines = code.split('\n')
|
||||
for line in codeLines:
|
||||
csvRows.append([f" {line}"])
|
||||
|
||||
return csv_rows
|
||||
return csvRows
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
return [["[Error rendering code block]"]]
|
||||
|
||||
def _convert_rows_to_csv(self, rows: List[List[str]]) -> str:
|
||||
def _convertRowsToCsv(self, rows: List[List[str]]) -> str:
|
||||
"""Convert rows to CSV string."""
|
||||
import csv
|
||||
import io
|
||||
|
|
@ -245,7 +245,7 @@ class RendererCsv(BaseRenderer):
|
|||
|
||||
return output.getvalue()
|
||||
|
||||
def _clean_csv_content(self, content: str, title: str) -> str:
|
||||
def _cleanCsvContent(self, content: str, title: str) -> str:
|
||||
"""Clean and validate CSV content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
|
|
|
|||
|
|
@ -21,33 +21,33 @@ class RendererDocx(BaseRenderer):
|
|||
"""Renders content to DOCX format using python-docx."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported DOCX formats."""
|
||||
return ['docx', 'doc']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['word', 'document']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for DOCX renderer."""
|
||||
return 115
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
||||
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...", "DOCX_RENDERER")
|
||||
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
|
||||
try:
|
||||
if not DOCX_AVAILABLE:
|
||||
# Fallback to HTML if python-docx not available
|
||||
from .rendererHtml import RendererHtml
|
||||
html_renderer = RendererHtml()
|
||||
html_content, _ = await html_renderer.render(extracted_content, title)
|
||||
return html_content, "text/html"
|
||||
htmlRenderer = RendererHtml()
|
||||
htmlContent, _ = await htmlRenderer.render(extractedContent, title)
|
||||
return htmlContent, "text/html"
|
||||
|
||||
# Generate DOCX using AI-analyzed styling
|
||||
docx_content = await self._generate_docx_from_json(extracted_content, title, user_prompt, ai_service)
|
||||
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
|
||||
|
|
@ -56,18 +56,18 @@ class RendererDocx(BaseRenderer):
|
|||
# Return minimal fallback
|
||||
return f"DOCX Generation Error: {str(e)}", "text/plain"
|
||||
|
||||
async def _generate_docx_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate DOCX content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Create new document
|
||||
doc = Document()
|
||||
|
||||
# Get AI-generated styling definitions
|
||||
self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...")
|
||||
styles = await self._get_docx_styles(user_prompt, ai_service)
|
||||
self.logger.info(f"About to call AI styling with user_prompt: {userPrompt[:100] if userPrompt else 'None'}...")
|
||||
styles = await self._getDocxStyles(userPrompt, aiService)
|
||||
|
||||
# Apply basic document setup
|
||||
self._setup_basic_document_styles(doc)
|
||||
self._setupBasicDocumentStyles(doc)
|
||||
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
|
|
@ -104,7 +104,7 @@ class RendererDocx(BaseRenderer):
|
|||
self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
|
||||
raise Exception(f"DOCX generation failed: {str(e)}")
|
||||
|
||||
async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
async def _getDocxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
|
||||
"""Get DOCX styling definitions using base template AI styling."""
|
||||
style_schema = {
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
|
||||
|
|
@ -118,13 +118,13 @@ class RendererDocx(BaseRenderer):
|
|||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
|
||||
}
|
||||
|
||||
style_template = self._create_ai_style_template("docx", user_prompt, style_schema)
|
||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles())
|
||||
style_template = self._createAiStyleTemplate("docx", userPrompt, style_schema)
|
||||
styles = await self._getAiStyles(aiService, style_template, self._getDefaultStyles())
|
||||
|
||||
# Validate and fix contrast issues
|
||||
return self._validate_styles_contrast(styles)
|
||||
return self._validateStylesContrast(styles)
|
||||
|
||||
def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix contrast issues in AI-generated styles."""
|
||||
try:
|
||||
# Fix table header contrast
|
||||
|
|
@ -159,9 +159,9 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||
return self._get_default_styles()
|
||||
return self._getDefaultStyles()
|
||||
|
||||
def _get_default_styles(self) -> Dict[str, Any]:
|
||||
def _getDefaultStyles(self) -> Dict[str, Any]:
|
||||
"""Default DOCX styles."""
|
||||
return {
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
|
||||
|
|
@ -175,7 +175,7 @@ class RendererDocx(BaseRenderer):
|
|||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
|
||||
}
|
||||
|
||||
def _setup_basic_document_styles(self, doc: Document) -> None:
|
||||
def _setupBasicDocumentStyles(self, doc: Document) -> None:
|
||||
"""Set up basic document styles."""
|
||||
try:
|
||||
# Set default font
|
||||
|
|
@ -189,7 +189,7 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
|
||||
|
||||
def _clear_template_content(self, doc: Document) -> None:
|
||||
def _clearTemplateContent(self, doc: Document) -> None:
|
||||
"""Clear template content while preserving styles."""
|
||||
try:
|
||||
# Remove all paragraphs except keep the styles
|
||||
|
|
@ -204,7 +204,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not clear template content: {str(e)}")
|
||||
|
||||
def _render_json_section(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a single JSON section to DOCX using AI-generated styles."""
|
||||
try:
|
||||
section_type = section.get("content_type", "paragraph")
|
||||
|
|
@ -213,27 +213,27 @@ class RendererDocx(BaseRenderer):
|
|||
# Process each element in the section
|
||||
for element in elements:
|
||||
if section_type == "table":
|
||||
self._render_json_table(doc, element, styles)
|
||||
self._renderJsonTable(doc, element, styles)
|
||||
elif section_type == "bullet_list":
|
||||
self._render_json_bullet_list(doc, element, styles)
|
||||
self._renderJsonBulletList(doc, element, styles)
|
||||
elif section_type == "heading":
|
||||
self._render_json_heading(doc, element, styles)
|
||||
self._renderJsonHeading(doc, element, styles)
|
||||
elif section_type == "paragraph":
|
||||
self._render_json_paragraph(doc, element, styles)
|
||||
self._renderJsonParagraph(doc, element, styles)
|
||||
elif section_type == "code_block":
|
||||
self._render_json_code_block(doc, element, styles)
|
||||
self._renderJsonCodeBlock(doc, element, styles)
|
||||
elif section_type == "image":
|
||||
self._render_json_image(doc, element, styles)
|
||||
self._renderJsonImage(doc, element, styles)
|
||||
else:
|
||||
# Fallback to paragraph for unknown types
|
||||
self._render_json_paragraph(doc, element, styles)
|
||||
self._renderJsonParagraph(doc, element, styles)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
|
||||
# Add error paragraph as fallback
|
||||
error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
|
||||
|
||||
def _render_json_table(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON table to DOCX using AI-generated styles."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
|
|
@ -249,7 +249,7 @@ class RendererDocx(BaseRenderer):
|
|||
# Apply table borders based on AI style
|
||||
border_style = styles["table_border"]["style"]
|
||||
if border_style == "horizontal_only":
|
||||
self._apply_horizontal_borders_only(table)
|
||||
self._applyHorizontalBordersOnly(table)
|
||||
elif border_style == "grid":
|
||||
table.style = 'Table Grid'
|
||||
# else: no borders
|
||||
|
|
@ -264,7 +264,7 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
# Apply background color
|
||||
bg_color = header_style["background"].lstrip('#')
|
||||
self._set_cell_background(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
|
||||
self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
|
||||
|
||||
# Apply text styling
|
||||
for paragraph in cell.paragraphs:
|
||||
|
|
@ -296,7 +296,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
|
||||
def _apply_horizontal_borders_only(self, table) -> None:
|
||||
def _applyHorizontalBordersOnly(self, table) -> None:
|
||||
"""Apply only horizontal borders to the table (no vertical borders)."""
|
||||
try:
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
|
|
@ -359,7 +359,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not apply horizontal borders: {str(e)}")
|
||||
|
||||
def _set_cell_background(self, cell, color: RGBColor) -> None:
|
||||
def _setCellBackground(self, cell, color: RGBColor) -> None:
|
||||
"""Set the background color of a table cell."""
|
||||
try:
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
|
|
@ -389,7 +389,7 @@ class RendererDocx(BaseRenderer):
|
|||
self.logger.warning(f"Could not set cell background: {str(e)}")
|
||||
|
||||
|
||||
def _render_json_bullet_list(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON bullet list to DOCX using AI-generated styles."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
|
|
@ -404,7 +404,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
|
||||
def _render_json_heading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON heading to DOCX using AI-generated styles."""
|
||||
try:
|
||||
level = heading_data.get("level", 1)
|
||||
|
|
@ -417,7 +417,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
|
||||
def _render_json_paragraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON paragraph to DOCX using AI-generated styles."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
|
|
@ -428,7 +428,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
|
||||
def _render_json_code_block(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON code block to DOCX using AI-generated styles."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
|
|
@ -447,7 +447,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
|
||||
def _render_json_image(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Render a JSON image to DOCX."""
|
||||
try:
|
||||
base64_data = image_data.get("base64Data", "")
|
||||
|
|
@ -465,7 +465,7 @@ class RendererDocx(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||
doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]")
|
||||
|
||||
def _extract_structure_from_prompt(self, user_prompt: str, title: str) -> Dict[str, Any]:
|
||||
def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]:
|
||||
"""Extract document structure from user prompt."""
|
||||
structure = {
|
||||
'title': title,
|
||||
|
|
@ -473,21 +473,21 @@ class RendererDocx(BaseRenderer):
|
|||
'format': 'standard'
|
||||
}
|
||||
|
||||
if not user_prompt:
|
||||
if not userPrompt:
|
||||
return structure
|
||||
|
||||
# Extract title from prompt if not provided
|
||||
if not title or title == "Generated Document":
|
||||
# Look for "create a ... document" or "generate a ... report"
|
||||
import re
|
||||
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', user_prompt.lower())
|
||||
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
|
||||
if title_match:
|
||||
structure['title'] = title_match.group(1).strip().title()
|
||||
|
||||
# Extract sections from numbered lists in prompt
|
||||
import re
|
||||
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
|
||||
sections = re.findall(section_pattern, user_prompt)
|
||||
sections = re.findall(section_pattern, userPrompt)
|
||||
|
||||
for num, section_text in sections:
|
||||
structure['sections'].append({
|
||||
|
|
@ -498,7 +498,7 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
# If no numbered sections found, try to extract from "including:" patterns
|
||||
if not structure['sections']:
|
||||
including_match = re.search(r'including:\s*(.+?)(?:\.|$)', user_prompt, re.DOTALL)
|
||||
including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL)
|
||||
if including_match:
|
||||
including_text = including_match.group(1)
|
||||
# Split by common separators
|
||||
|
|
@ -516,7 +516,7 @@ class RendererDocx(BaseRenderer):
|
|||
if not structure['sections']:
|
||||
# Look for bullet points or dashes
|
||||
bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
|
||||
bullets = re.findall(bullet_pattern, user_prompt)
|
||||
bullets = re.findall(bullet_pattern, userPrompt)
|
||||
for i, bullet in enumerate(bullets, 1):
|
||||
bullet = bullet.strip()
|
||||
if bullet and len(bullet) > 3:
|
||||
|
|
@ -529,7 +529,7 @@ class RendererDocx(BaseRenderer):
|
|||
# If still no sections, extract from sentence structure
|
||||
if not structure['sections']:
|
||||
# Split prompt into sentences and use as sections
|
||||
sentences = re.split(r'[.!?]\s+', user_prompt)
|
||||
sentences = re.split(r'[.!?]\s+', userPrompt)
|
||||
for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections
|
||||
sentence = sentence.strip()
|
||||
if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
|
||||
|
|
@ -545,7 +545,7 @@ class RendererDocx(BaseRenderer):
|
|||
action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
|
||||
found_actions = []
|
||||
for action in action_words:
|
||||
if action in user_prompt.lower():
|
||||
if action in userPrompt.lower():
|
||||
found_actions.append(action.title())
|
||||
|
||||
if found_actions:
|
||||
|
|
@ -565,7 +565,7 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
return structure
|
||||
|
||||
def _generate_from_structure(self, doc, content: str, structure: Dict[str, Any]):
|
||||
def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]):
|
||||
"""Generate DOCX content based on extracted structure."""
|
||||
# Add sections based on prompt structure
|
||||
for section in structure['sections']:
|
||||
|
|
@ -574,7 +574,7 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
# Add AI-generated content for this section
|
||||
# Try to extract relevant content for this section from the AI response
|
||||
section_content = self._extract_section_content(content, section['title'])
|
||||
section_content = self._extractSectionContent(content, section['title'])
|
||||
|
||||
if section_content:
|
||||
doc.add_paragraph(section_content)
|
||||
|
|
@ -590,7 +590,7 @@ class RendererDocx(BaseRenderer):
|
|||
doc.add_heading("Complete Analysis", level=1)
|
||||
doc.add_paragraph(content)
|
||||
|
||||
def _extract_section_content(self, content: str, section_title: str) -> str:
|
||||
def _extractSectionContent(self, content: str, section_title: str) -> str:
|
||||
"""Extract relevant content for a specific section from AI response."""
|
||||
if not content or not section_title:
|
||||
return ""
|
||||
|
|
@ -613,7 +613,7 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
return ""
|
||||
|
||||
def _setup_document_styles(self, doc):
|
||||
def _setupDocumentStyles(self, doc):
|
||||
"""Set up document styles."""
|
||||
try:
|
||||
# Set default font
|
||||
|
|
@ -632,7 +632,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not set up document styles: {str(e)}")
|
||||
|
||||
def _process_section(self, doc, lines: list):
|
||||
def _processSection(self, doc, lines: list):
|
||||
"""Process a section of content into DOCX elements."""
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
|
|
@ -641,9 +641,9 @@ class RendererDocx(BaseRenderer):
|
|||
# Check for tables (lines with |)
|
||||
if '|' in line and not line.startswith('|'):
|
||||
# This might be part of a table, process as table
|
||||
table_data = self._extract_table_data(lines)
|
||||
table_data = self._extractTableData(lines)
|
||||
if table_data:
|
||||
self._add_table(doc, table_data)
|
||||
self._addTable(doc, table_data)
|
||||
return
|
||||
|
||||
# Check for lists
|
||||
|
|
@ -657,7 +657,7 @@ class RendererDocx(BaseRenderer):
|
|||
# Regular paragraph
|
||||
doc.add_paragraph(line)
|
||||
|
||||
def _extract_table_data(self, lines: list) -> list:
|
||||
def _extractTableData(self, lines: list) -> list:
|
||||
"""Extract table data from lines."""
|
||||
table_data = []
|
||||
in_table = False
|
||||
|
|
@ -676,7 +676,7 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
return table_data if len(table_data) > 1 else []
|
||||
|
||||
def _add_table(self, doc, table_data: list):
|
||||
def _addTable(self, doc, table_data: list):
|
||||
"""Add a table to the document."""
|
||||
try:
|
||||
if not table_data:
|
||||
|
|
@ -693,12 +693,12 @@ class RendererDocx(BaseRenderer):
|
|||
table.rows[row_idx].cells[col_idx].text = cell_data
|
||||
|
||||
# Style the table
|
||||
self._style_table(table)
|
||||
self._styleTable(table)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add table: {str(e)}")
|
||||
|
||||
def _style_table(self, table):
|
||||
def _styleTable(self, table):
|
||||
"""Apply styling to the table."""
|
||||
try:
|
||||
# Style header row
|
||||
|
|
@ -711,7 +711,7 @@ class RendererDocx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not style table: {str(e)}")
|
||||
|
||||
def _process_table_row(self, doc, line: str):
|
||||
def _processTableRow(self, doc, line: str):
|
||||
"""Process a table row and add it to the document."""
|
||||
if not line.strip():
|
||||
return
|
||||
|
|
@ -745,7 +745,7 @@ class RendererDocx(BaseRenderer):
|
|||
# Not a table row, treat as regular text
|
||||
doc.add_paragraph(line)
|
||||
|
||||
def _clean_ai_content(self, content: str) -> str:
|
||||
def _cleanAiContent(self, content: str) -> str:
|
||||
"""Clean AI-generated content by removing debug information and duplicates."""
|
||||
if not content:
|
||||
return ""
|
||||
|
|
@ -781,7 +781,7 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
return '\n\n'.join(unique_sections)
|
||||
|
||||
def _process_tables(self, doc, content: str) -> str:
|
||||
def _processTables(self, doc, content: str) -> str:
|
||||
"""
|
||||
Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
|
||||
Returns the content with tables replaced by placeholders.
|
||||
|
|
@ -864,13 +864,13 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
return '\n'.join(processed_lines)
|
||||
|
||||
def _parse_and_format_content(self, doc, content: str, title: str):
|
||||
def _parseAndFormatContent(self, doc, content: str, title: str):
|
||||
"""Parse AI-generated content in standardized format and apply proper DOCX formatting."""
|
||||
if not content:
|
||||
return
|
||||
|
||||
# Process tables and replace them with placeholders
|
||||
content = self._process_tables(doc, content)
|
||||
content = self._processTables(doc, content)
|
||||
|
||||
# Parse content line by line in exact sequence
|
||||
lines = content.split('\n')
|
||||
|
|
@ -920,9 +920,9 @@ class RendererDocx(BaseRenderer):
|
|||
|
||||
# Regular paragraph
|
||||
else:
|
||||
self._add_paragraph_to_doc(doc, line)
|
||||
self._addParagraphToDoc(doc, line)
|
||||
|
||||
def _add_paragraph_to_doc(self, doc, text: str):
|
||||
def _addParagraphToDoc(self, doc, text: str):
|
||||
"""Add a paragraph to the document with proper formatting."""
|
||||
if not text.strip():
|
||||
return
|
||||
|
|
|
|||
|
|
@ -9,97 +9,97 @@ class RendererHtml(BaseRenderer):
|
|||
"""Renders content to HTML format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported HTML formats."""
|
||||
return ['html', 'htm']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['web', 'webpage']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for HTML renderer."""
|
||||
return 100
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
|
||||
try:
|
||||
# Generate HTML using AI-analyzed styling
|
||||
html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service)
|
||||
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return html_content, "text/html"
|
||||
return htmlContent, "text/html"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering HTML: {str(e)}")
|
||||
# Return minimal HTML fallback
|
||||
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
||||
|
||||
async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate HTML content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Get AI-generated styling definitions
|
||||
styles = await self._get_html_styles(user_prompt, ai_service)
|
||||
styles = await self._getHtmlStyles(userPrompt, aiService)
|
||||
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
if not isinstance(jsonContent, dict):
|
||||
raise ValueError("JSON content must be a dictionary")
|
||||
|
||||
if "sections" not in json_content:
|
||||
if "sections" not in jsonContent:
|
||||
raise ValueError("JSON content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = json_content.get("metadata", {}).get("title", title)
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", title)
|
||||
|
||||
# Build HTML document
|
||||
html_parts = []
|
||||
htmlParts = []
|
||||
|
||||
# HTML document structure
|
||||
html_parts.append('<!DOCTYPE html>')
|
||||
html_parts.append('<html lang="en">')
|
||||
html_parts.append('<head>')
|
||||
html_parts.append('<meta charset="UTF-8">')
|
||||
html_parts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
|
||||
html_parts.append(f'<title>{document_title}</title>')
|
||||
html_parts.append('<style>')
|
||||
html_parts.append(self._generate_css_styles(styles))
|
||||
html_parts.append('</style>')
|
||||
html_parts.append('</head>')
|
||||
html_parts.append('<body>')
|
||||
htmlParts.append('<!DOCTYPE html>')
|
||||
htmlParts.append('<html lang="en">')
|
||||
htmlParts.append('<head>')
|
||||
htmlParts.append('<meta charset="UTF-8">')
|
||||
htmlParts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
|
||||
htmlParts.append(f'<title>{documentTitle}</title>')
|
||||
htmlParts.append('<style>')
|
||||
htmlParts.append(self._generateCssStyles(styles))
|
||||
htmlParts.append('</style>')
|
||||
htmlParts.append('</head>')
|
||||
htmlParts.append('<body>')
|
||||
|
||||
# Document header
|
||||
html_parts.append(f'<header><h1 class="document-title">{document_title}</h1></header>')
|
||||
htmlParts.append(f'<header><h1 class="document-title">{documentTitle}</h1></header>')
|
||||
|
||||
# Main content
|
||||
html_parts.append('<main>')
|
||||
htmlParts.append('<main>')
|
||||
|
||||
# Process each section
|
||||
sections = json_content.get("sections", [])
|
||||
sections = jsonContent.get("sections", [])
|
||||
for section in sections:
|
||||
section_html = self._render_json_section(section, styles)
|
||||
if section_html:
|
||||
html_parts.append(section_html)
|
||||
sectionHtml = self._renderJsonSection(section, styles)
|
||||
if sectionHtml:
|
||||
htmlParts.append(sectionHtml)
|
||||
|
||||
html_parts.append('</main>')
|
||||
htmlParts.append('</main>')
|
||||
|
||||
# Footer
|
||||
html_parts.append('<footer>')
|
||||
html_parts.append(f'<p class="generated-info">Generated: {self._format_timestamp()}</p>')
|
||||
html_parts.append('</footer>')
|
||||
htmlParts.append('<footer>')
|
||||
htmlParts.append(f'<p class="generated-info">Generated: {self._formatTimestamp()}</p>')
|
||||
htmlParts.append('</footer>')
|
||||
|
||||
html_parts.append('</body>')
|
||||
html_parts.append('</html>')
|
||||
htmlParts.append('</body>')
|
||||
htmlParts.append('</html>')
|
||||
|
||||
return '\n'.join(html_parts)
|
||||
return '\n'.join(htmlParts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating HTML from JSON: {str(e)}")
|
||||
raise Exception(f"HTML generation failed: {str(e)}")
|
||||
|
||||
async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
async def _getHtmlStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
|
||||
"""Get HTML styling definitions using base template AI styling."""
|
||||
style_schema = {
|
||||
styleSchema = {
|
||||
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
|
||||
"heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
|
||||
"heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
|
||||
|
|
@ -113,40 +113,40 @@ class RendererHtml(BaseRenderer):
|
|||
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
|
||||
}
|
||||
|
||||
style_template = self._create_ai_style_template("html", user_prompt, style_schema)
|
||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_html_styles())
|
||||
styleTemplate = self._createAiStyleTemplate("html", userPrompt, styleSchema)
|
||||
styles = await self._getAiStyles(aiService, styleTemplate, self._getDefaultHtmlStyles())
|
||||
|
||||
# Validate and fix contrast issues
|
||||
return self._validate_html_styles_contrast(styles)
|
||||
return self._validateHtmlStylesContrast(styles)
|
||||
|
||||
def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _validateHtmlStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix contrast issues in AI-generated styles."""
|
||||
try:
|
||||
# Fix table header contrast
|
||||
if "table_header" in styles:
|
||||
header = styles["table_header"]
|
||||
bg_color = header.get("background", "#FFFFFF")
|
||||
text_color = header.get("color", "#000000")
|
||||
bgColor = header.get("background", "#FFFFFF")
|
||||
textColor = header.get("color", "#000000")
|
||||
|
||||
# If both are white or both are dark, fix it
|
||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||||
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
||||
header["background"] = "#4F4F4F"
|
||||
header["color"] = "#FFFFFF"
|
||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||||
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
||||
header["background"] = "#4F4F4F"
|
||||
header["color"] = "#FFFFFF"
|
||||
|
||||
# Fix table cell contrast
|
||||
if "table_cell" in styles:
|
||||
cell = styles["table_cell"]
|
||||
bg_color = cell.get("background", "#FFFFFF")
|
||||
text_color = cell.get("color", "#000000")
|
||||
bgColor = cell.get("background", "#FFFFFF")
|
||||
textColor = cell.get("color", "#000000")
|
||||
|
||||
# If both are white or both are dark, fix it
|
||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||||
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
||||
cell["background"] = "#FFFFFF"
|
||||
cell["color"] = "#2F2F2F"
|
||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||||
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
||||
cell["background"] = "#FFFFFF"
|
||||
cell["color"] = "#2F2F2F"
|
||||
|
||||
|
|
@ -154,10 +154,10 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||
return self._get_default_html_styles()
|
||||
return self._getDefaultHtmlStyles()
|
||||
|
||||
|
||||
def _get_default_html_styles(self) -> Dict[str, Any]:
|
||||
def _getDefaultHtmlStyles(self) -> Dict[str, Any]:
|
||||
"""Default HTML styles."""
|
||||
return {
|
||||
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
|
||||
|
|
@ -173,7 +173,7 @@ class RendererHtml(BaseRenderer):
|
|||
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
|
||||
}
|
||||
|
||||
def _generate_css_styles(self, styles: Dict[str, Any]) -> str:
|
||||
def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
|
||||
"""Generate CSS from style definitions."""
|
||||
css_parts = []
|
||||
|
||||
|
|
@ -271,109 +271,109 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
return '\n'.join(css_parts)
|
||||
|
||||
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to HTML using AI-generated styles."""
|
||||
try:
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
sectionType = self._getSectionType(section)
|
||||
sectionData = self._getSectionData(section)
|
||||
|
||||
if section_type == "table":
|
||||
if sectionType == "table":
|
||||
# Process the section data to extract table structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_table(processed_data, styles)
|
||||
elif section_type == "bullet_list":
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonTable(processedData, styles)
|
||||
elif sectionType == "bullet_list":
|
||||
# Process the section data to extract bullet list structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_bullet_list(processed_data, styles)
|
||||
elif section_type == "heading":
|
||||
return self._render_json_heading(section_data, styles)
|
||||
elif section_type == "paragraph":
|
||||
return self._render_json_paragraph(section_data, styles)
|
||||
elif section_type == "code_block":
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonBulletList(processedData, styles)
|
||||
elif sectionType == "heading":
|
||||
return self._renderJsonHeading(sectionData, styles)
|
||||
elif sectionType == "paragraph":
|
||||
return self._renderJsonParagraph(sectionData, styles)
|
||||
elif sectionType == "code_block":
|
||||
# Process the section data to extract code block structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_code_block(processed_data, styles)
|
||||
elif section_type == "image":
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonCodeBlock(processedData, styles)
|
||||
elif sectionType == "image":
|
||||
# Process the section data to extract image structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_image(processed_data, styles)
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonImage(processedData, styles)
|
||||
else:
|
||||
# Fallback to paragraph for unknown types
|
||||
return self._render_json_paragraph(section_data, styles)
|
||||
return self._renderJsonParagraph(sectionData, styles)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
||||
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
||||
return f'<div class="error">[Error rendering section: {str(e)}]</div>'
|
||||
|
||||
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON table to HTML using AI-generated styles."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
rows = table_data.get("rows", [])
|
||||
headers = tableData.get("headers", [])
|
||||
rows = tableData.get("rows", [])
|
||||
|
||||
if not headers or not rows:
|
||||
return ""
|
||||
|
||||
html_parts = ['<table>']
|
||||
htmlParts = ['<table>']
|
||||
|
||||
# Table header
|
||||
html_parts.append('<thead><tr>')
|
||||
htmlParts.append('<thead><tr>')
|
||||
for header in headers:
|
||||
html_parts.append(f'<th>{header}</th>')
|
||||
html_parts.append('</tr></thead>')
|
||||
htmlParts.append(f'<th>{header}</th>')
|
||||
htmlParts.append('</tr></thead>')
|
||||
|
||||
# Table body
|
||||
html_parts.append('<tbody>')
|
||||
htmlParts.append('<tbody>')
|
||||
for row in rows:
|
||||
html_parts.append('<tr>')
|
||||
for cell_data in row:
|
||||
html_parts.append(f'<td>{cell_data}</td>')
|
||||
html_parts.append('</tr>')
|
||||
html_parts.append('</tbody>')
|
||||
htmlParts.append('<tr>')
|
||||
for cellData in row:
|
||||
htmlParts.append(f'<td>{cellData}</td>')
|
||||
htmlParts.append('</tr>')
|
||||
htmlParts.append('</tbody>')
|
||||
|
||||
html_parts.append('</table>')
|
||||
return '\n'.join(html_parts)
|
||||
htmlParts.append('</table>')
|
||||
return '\n'.join(htmlParts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON bullet list to HTML using AI-generated styles."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
items = listData.get("items", [])
|
||||
|
||||
if not items:
|
||||
return ""
|
||||
|
||||
html_parts = ['<ul>']
|
||||
htmlParts = ['<ul>']
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
html_parts.append(f'<li>{item}</li>')
|
||||
htmlParts.append(f'<li>{item}</li>')
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
html_parts.append(f'<li>{item["text"]}</li>')
|
||||
html_parts.append('</ul>')
|
||||
htmlParts.append(f'<li>{item["text"]}</li>')
|
||||
htmlParts.append('</ul>')
|
||||
|
||||
return '\n'.join(html_parts)
|
||||
return '\n'.join(htmlParts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON heading to HTML using AI-generated styles."""
|
||||
try:
|
||||
# Normalize non-dict inputs
|
||||
if isinstance(heading_data, str):
|
||||
heading_data = {"text": heading_data, "level": 2}
|
||||
elif isinstance(heading_data, list):
|
||||
if isinstance(headingData, str):
|
||||
headingData = {"text": headingData, "level": 2}
|
||||
elif isinstance(headingData, list):
|
||||
# Render a list as bullet list under a default heading label
|
||||
return self._render_json_bullet_list({"items": heading_data}, styles)
|
||||
elif not isinstance(heading_data, dict):
|
||||
return self._renderJsonBulletList({"items": headingData}, styles)
|
||||
elif not isinstance(headingData, dict):
|
||||
return ""
|
||||
|
||||
level = heading_data.get("level", 1)
|
||||
text = heading_data.get("text", "")
|
||||
level = headingData.get("level", 1)
|
||||
text = headingData.get("text", "")
|
||||
|
||||
if text:
|
||||
level = max(1, min(6, level))
|
||||
|
|
@ -385,19 +385,19 @@ class RendererHtml(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON paragraph to HTML using AI-generated styles."""
|
||||
try:
|
||||
# Normalize non-dict inputs
|
||||
if isinstance(paragraph_data, str):
|
||||
paragraph_data = {"text": paragraph_data}
|
||||
elif isinstance(paragraph_data, list):
|
||||
if isinstance(paragraphData, str):
|
||||
paragraphData = {"text": paragraphData}
|
||||
elif isinstance(paragraphData, list):
|
||||
# Treat list as bullet list paragraph
|
||||
return self._render_json_bullet_list({"items": paragraph_data}, styles)
|
||||
elif not isinstance(paragraph_data, dict):
|
||||
return self._renderJsonBulletList({"items": paragraphData}, styles)
|
||||
elif not isinstance(paragraphData, dict):
|
||||
return ""
|
||||
|
||||
text = paragraph_data.get("text", "")
|
||||
text = paragraphData.get("text", "")
|
||||
|
||||
if text:
|
||||
return f'<p>{text}</p>'
|
||||
|
|
@ -408,11 +408,11 @@ class RendererHtml(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON code block to HTML using AI-generated styles."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
language = code_data.get("language", "")
|
||||
code = codeData.get("code", "")
|
||||
language = codeData.get("language", "")
|
||||
|
||||
if code:
|
||||
if language:
|
||||
|
|
@ -426,17 +426,17 @@ class RendererHtml(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON image to HTML."""
|
||||
try:
|
||||
base64_data = image_data.get("base64Data", "")
|
||||
alt_text = image_data.get("altText", "Image")
|
||||
base64Data = imageData.get("base64Data", "")
|
||||
altText = imageData.get("altText", "Image")
|
||||
|
||||
if base64_data:
|
||||
return f'<img src="data:image/png;base64,{base64_data}" alt="{alt_text}">'
|
||||
if base64Data:
|
||||
return f'<img src="data:image/png;base64,{base64Data}" alt="{altText}">'
|
||||
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||
return f'<div class="error">[Image: {image_data.get("altText", "Image")}]</div>'
|
||||
return f'<div class="error">[Image: {imageData.get("altText", "Image")}]</div>'
|
||||
|
|
|
|||
|
|
@ -12,154 +12,156 @@ class RendererImage(BaseRenderer):
|
|||
"""Renders content to image format using AI image generation."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported image formats."""
|
||||
return ['png', 'jpg', 'jpeg', 'image']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['img', 'picture', 'photo', 'graphic']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for image renderer."""
|
||||
return 90
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to image format using AI image generation."""
|
||||
try:
|
||||
# Generate AI image from content
|
||||
image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service)
|
||||
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return image_content, "image/png"
|
||||
return imageContent, "image/png"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering image: {str(e)}")
|
||||
# Re-raise the exception instead of using fallback
|
||||
raise Exception(f"Image rendering failed: {str(e)}")
|
||||
|
||||
async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate AI image from extracted content."""
|
||||
try:
|
||||
if not ai_service:
|
||||
if not aiService:
|
||||
raise ValueError("AI service is required for image generation")
|
||||
|
||||
# Validate JSON structure
|
||||
if not isinstance(extracted_content, dict):
|
||||
if not isinstance(extractedContent, dict):
|
||||
raise ValueError("Extracted content must be a dictionary")
|
||||
|
||||
if "sections" not in extracted_content:
|
||||
if "sections" not in extractedContent:
|
||||
raise ValueError("Extracted content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = extracted_content.get("metadata", {}).get("title", title)
|
||||
documentTitle = extractedContent.get("metadata", {}).get("title", title)
|
||||
|
||||
# Create AI prompt for image generation
|
||||
image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service)
|
||||
imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)
|
||||
|
||||
# Save image generation prompt to debug
|
||||
ai_service.services.utils.writeDebugFile(image_prompt, "image_generation_prompt")
|
||||
aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt")
|
||||
|
||||
# Generate image using AI
|
||||
image_result = await ai_service.aiObjects.generateImage(
|
||||
prompt=image_prompt,
|
||||
imageResult = await aiService.aiObjects.generateImage(
|
||||
prompt=imagePrompt,
|
||||
size="1024x1024",
|
||||
quality="standard",
|
||||
style="vivid"
|
||||
)
|
||||
|
||||
# Save image generation response to debug
|
||||
ai_service.services.utils.writeDebugFile(str(image_result), "image_generation_response")
|
||||
aiService.services.utils.writeDebugFile(str(imageResult), "image_generation_response")
|
||||
|
||||
# Extract base64 image data from result
|
||||
if image_result and image_result.get("success", False):
|
||||
image_data = image_result.get("image_data", "")
|
||||
if image_data:
|
||||
return image_data
|
||||
if imageResult and imageResult.get("success", False):
|
||||
imageData = imageResult.get("image_data", "")
|
||||
if imageData:
|
||||
return imageData
|
||||
else:
|
||||
raise ValueError("No image data returned from AI")
|
||||
else:
|
||||
error_msg = image_result.get("error", "Unknown error") if image_result else "No result"
|
||||
raise ValueError(f"AI image generation failed: {error_msg}")
|
||||
errorMsg = imageResult.get("error", "Unknown error") if imageResult else "No result"
|
||||
raise ValueError(f"AI image generation failed: {errorMsg}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating AI image: {str(e)}")
|
||||
raise Exception(f"AI image generation failed: {str(e)}")
|
||||
|
||||
async def _create_imageGenerate_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Create a detailed prompt for AI image generation based on the content."""
|
||||
try:
|
||||
# Start with base prompt
|
||||
prompt_parts = []
|
||||
promptParts = []
|
||||
|
||||
# Add user's original intent if available
|
||||
if user_prompt:
|
||||
prompt_parts.append(f"User Request: {ai_service.sanitizePromptContent(user_prompt, 'userinput')}")
|
||||
if userPrompt:
|
||||
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
|
||||
promptParts.append(f"User Request: {sanitized_prompt}")
|
||||
|
||||
# Add document title
|
||||
prompt_parts.append(f"Document Title: {title}")
|
||||
promptParts.append(f"Document Title: {title}")
|
||||
|
||||
# Analyze content and create visual description
|
||||
sections = extracted_content.get("sections", [])
|
||||
content_description = self._analyze_content_for_visual_description(sections)
|
||||
sections = extractedContent.get("sections", [])
|
||||
contentDescription = self._analyzeContentForVisualDescription(sections)
|
||||
|
||||
if content_description:
|
||||
prompt_parts.append(f"Content to Visualize: {content_description}")
|
||||
if contentDescription:
|
||||
promptParts.append(f"Content to Visualize: {contentDescription}")
|
||||
|
||||
# Add style guidance
|
||||
style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt)
|
||||
if style_guidance:
|
||||
prompt_parts.append(f"Visual Style: {style_guidance}")
|
||||
styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt)
|
||||
if styleGuidance:
|
||||
promptParts.append(f"Visual Style: {styleGuidance}")
|
||||
|
||||
# Combine all parts
|
||||
full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts)
|
||||
fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts)
|
||||
|
||||
# Add technical requirements
|
||||
full_prompt += "\n\nTechnical Requirements:"
|
||||
full_prompt += "\n- High quality, professional appearance"
|
||||
full_prompt += "\n- Clear, readable text if any text is included"
|
||||
full_prompt += "\n- Appropriate colors and layout"
|
||||
full_prompt += "\n- Suitable for business/professional use"
|
||||
fullPrompt += "\n\nTechnical Requirements:"
|
||||
fullPrompt += "\n- High quality, professional appearance"
|
||||
fullPrompt += "\n- Clear, readable text if any text is included"
|
||||
fullPrompt += "\n- Appropriate colors and layout"
|
||||
fullPrompt += "\n- Suitable for business/professional use"
|
||||
|
||||
# Truncate prompt if it exceeds DALL-E's 4000 character limit
|
||||
if len(full_prompt) > 4000:
|
||||
if len(fullPrompt) > 4000:
|
||||
# Use AI to compress the prompt intelligently
|
||||
compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service)
|
||||
if compressed_prompt and len(compressed_prompt) <= 4000:
|
||||
return compressed_prompt
|
||||
compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService)
|
||||
if compressedPrompt and len(compressedPrompt) <= 4000:
|
||||
return compressedPrompt
|
||||
|
||||
# Fallback to minimal prompt if AI compression fails or is still too long
|
||||
minimal_prompt = f"Create a professional image representing: {title}"
|
||||
if user_prompt:
|
||||
minimal_prompt += f" - {ai_service.sanitizePromptContent(user_prompt, 'userinput')}"
|
||||
minimalPrompt = f"Create a professional image representing: {title}"
|
||||
if userPrompt:
|
||||
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
|
||||
minimalPrompt += f" - {sanitized_prompt}"
|
||||
|
||||
# If even the minimal prompt is too long, truncate it
|
||||
if len(minimal_prompt) > 4000:
|
||||
minimal_prompt = minimal_prompt[:3997] + "..."
|
||||
if len(minimalPrompt) > 4000:
|
||||
minimalPrompt = minimalPrompt[:3997] + "..."
|
||||
|
||||
return minimal_prompt
|
||||
return minimalPrompt
|
||||
|
||||
return full_prompt
|
||||
return fullPrompt
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error creating image prompt: {str(e)}")
|
||||
# Fallback to simple prompt
|
||||
return f"Create a professional image representing: {title}"
|
||||
|
||||
async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str:
|
||||
async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str:
|
||||
"""Use AI to intelligently compress a long prompt while preserving key information."""
|
||||
try:
|
||||
if not ai_service:
|
||||
if not aiService:
|
||||
return None
|
||||
|
||||
compression_prompt = f"""
|
||||
compressionPrompt = f"""
|
||||
You are an expert at creating concise, effective prompts for AI image generation.
|
||||
|
||||
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
|
||||
|
||||
Original prompt ({len(long_prompt)} characters):
|
||||
{long_prompt}
|
||||
Original prompt ({len(longPrompt)} characters):
|
||||
{longPrompt}
|
||||
|
||||
Please create a compressed version that:
|
||||
1. Keeps the most important visual elements and requirements
|
||||
|
|
@ -176,7 +178,7 @@ Return only the compressed prompt, no explanations.
|
|||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=compression_prompt,
|
||||
prompt=compressionPrompt,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||
maxTokens=None, # Let the model use its full context length
|
||||
|
|
@ -184,12 +186,12 @@ Return only the compressed prompt, no explanations.
|
|||
)
|
||||
)
|
||||
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
response = await aiService.aiObjects.call(request)
|
||||
compressed = response.content.strip()
|
||||
|
||||
# Validate the compressed prompt
|
||||
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
|
||||
self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters")
|
||||
self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters")
|
||||
return compressed
|
||||
else:
|
||||
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
|
||||
|
|
@ -199,42 +201,42 @@ Return only the compressed prompt, no explanations.
|
|||
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
|
||||
return None
|
||||
|
||||
def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str:
|
||||
def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str:
|
||||
"""Analyze content sections and create a visual description for AI."""
|
||||
try:
|
||||
descriptions = []
|
||||
|
||||
for section in sections:
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
sectionType = self._getSectionType(section)
|
||||
sectionData = self._getSectionData(section)
|
||||
|
||||
if section_type == "table":
|
||||
headers = section_data.get("headers", [])
|
||||
rows = section_data.get("rows", [])
|
||||
if sectionType == "table":
|
||||
headers = sectionData.get("headers", [])
|
||||
rows = sectionData.get("rows", [])
|
||||
if headers and rows:
|
||||
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
|
||||
|
||||
elif section_type == "bullet_list":
|
||||
items = section_data.get("items", [])
|
||||
elif sectionType == "bullet_list":
|
||||
items = sectionData.get("items", [])
|
||||
if items:
|
||||
descriptions.append(f"List with {len(items)} items")
|
||||
|
||||
elif section_type == "heading":
|
||||
text = section_data.get("text", "")
|
||||
level = section_data.get("level", 1)
|
||||
elif sectionType == "heading":
|
||||
text = sectionData.get("text", "")
|
||||
level = sectionData.get("level", 1)
|
||||
if text:
|
||||
descriptions.append(f"Heading {level}: {text}")
|
||||
|
||||
elif section_type == "paragraph":
|
||||
text = section_data.get("text", "")
|
||||
elif sectionType == "paragraph":
|
||||
text = sectionData.get("text", "")
|
||||
if text and len(text) > 10: # Only include substantial paragraphs
|
||||
# Truncate long text
|
||||
truncated = text[:100] + "..." if len(text) > 100 else text
|
||||
descriptions.append(f"Text content: {truncated}")
|
||||
|
||||
elif section_type == "code_block":
|
||||
code = section_data.get("code", "")
|
||||
language = section_data.get("language", "")
|
||||
elif sectionType == "code_block":
|
||||
code = sectionData.get("code", "")
|
||||
language = sectionData.get("language", "")
|
||||
if code:
|
||||
descriptions.append(f"Code block ({language}): {code[:50]}...")
|
||||
|
||||
|
|
@ -244,42 +246,42 @@ Return only the compressed prompt, no explanations.
|
|||
self.logger.warning(f"Error analyzing content: {str(e)}")
|
||||
return "Document content"
|
||||
|
||||
def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str:
|
||||
def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str:
|
||||
"""Determine visual style guidance based on content and user prompt."""
|
||||
try:
|
||||
style_elements = []
|
||||
styleElements = []
|
||||
|
||||
# Analyze user prompt for style hints
|
||||
if user_prompt:
|
||||
prompt_lower = user_prompt.lower()
|
||||
if userPrompt:
|
||||
promptLower = userPrompt.lower()
|
||||
|
||||
if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]):
|
||||
style_elements.append("modern, clean design")
|
||||
elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]):
|
||||
style_elements.append("classic, formal design")
|
||||
elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]):
|
||||
style_elements.append("creative, artistic design")
|
||||
elif any(word in prompt_lower for word in ["corporate", "business", "professional"]):
|
||||
style_elements.append("corporate, professional design")
|
||||
if any(word in promptLower for word in ["modern", "contemporary", "sleek"]):
|
||||
styleElements.append("modern, clean design")
|
||||
elif any(word in promptLower for word in ["classic", "traditional", "formal"]):
|
||||
styleElements.append("classic, formal design")
|
||||
elif any(word in promptLower for word in ["creative", "artistic", "colorful"]):
|
||||
styleElements.append("creative, artistic design")
|
||||
elif any(word in promptLower for word in ["corporate", "business", "professional"]):
|
||||
styleElements.append("corporate, professional design")
|
||||
|
||||
# Analyze content type for additional style hints
|
||||
sections = extracted_content.get("sections", [])
|
||||
has_tables = any(self._get_section_type(s) == "table" for s in sections)
|
||||
has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections)
|
||||
has_code = any(self._get_section_type(s) == "code_block" for s in sections)
|
||||
sections = extractedContent.get("sections", [])
|
||||
hasTables = any(self._getSectionType(s) == "table" for s in sections)
|
||||
hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
|
||||
hasCode = any(self._getSectionType(s) == "code_block" for s in sections)
|
||||
|
||||
if has_tables:
|
||||
style_elements.append("data-focused layout")
|
||||
if has_lists:
|
||||
style_elements.append("organized, structured presentation")
|
||||
if has_code:
|
||||
style_elements.append("technical, developer-friendly")
|
||||
if hasTables:
|
||||
styleElements.append("data-focused layout")
|
||||
if hasLists:
|
||||
styleElements.append("organized, structured presentation")
|
||||
if hasCode:
|
||||
styleElements.append("technical, developer-friendly")
|
||||
|
||||
# Default style if no specific guidance
|
||||
if not style_elements:
|
||||
style_elements.append("professional, clean design")
|
||||
if not styleElements:
|
||||
styleElements.append("professional, clean design")
|
||||
|
||||
return ", ".join(style_elements)
|
||||
return ", ".join(styleElements)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error determining style guidance: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -10,40 +10,40 @@ class RendererJson(BaseRenderer):
|
|||
"""Renders content to JSON format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported JSON formats."""
|
||||
return ['json']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['data']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for JSON renderer."""
|
||||
return 80
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to JSON format."""
|
||||
try:
|
||||
# The extracted content should already be JSON from the AI
|
||||
# Just validate and format it
|
||||
json_content = self._clean_json_content(extracted_content, title)
|
||||
jsonContent = self._cleanJsonContent(extractedContent, title)
|
||||
|
||||
return json_content, "application/json"
|
||||
return jsonContent, "application/json"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering JSON: {str(e)}")
|
||||
# Return minimal JSON fallback
|
||||
fallback_data = {
|
||||
fallbackData = {
|
||||
"title": title,
|
||||
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
|
||||
"metadata": {"error": str(e)}
|
||||
}
|
||||
return json.dumps(fallback_data, indent=2), "application/json"
|
||||
return json.dumps(fallbackData, indent=2), "application/json"
|
||||
|
||||
def _clean_json_content(self, content: Dict[str, Any], title: str) -> str:
|
||||
def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
|
||||
"""Clean and validate JSON content from AI."""
|
||||
try:
|
||||
# Validate JSON structure
|
||||
|
|
@ -72,8 +72,8 @@ class RendererJson(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Error cleaning JSON content: {str(e)}")
|
||||
# Return minimal valid JSON
|
||||
fallback_data = {
|
||||
fallbackData = {
|
||||
"sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
|
||||
"metadata": {"title": title, "error": str(e)}
|
||||
}
|
||||
return json.dumps(fallback_data, indent=2, ensure_ascii=False)
|
||||
return json.dumps(fallbackData, indent=2, ensure_ascii=False)
|
||||
|
|
|
|||
|
|
@ -9,161 +9,161 @@ class RendererMarkdown(BaseRenderer):
|
|||
"""Renders content to Markdown format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported Markdown formats."""
|
||||
return ['md', 'markdown']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['mdown', 'mkd']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for markdown renderer."""
|
||||
return 95
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to Markdown format."""
|
||||
try:
|
||||
# Generate markdown from JSON structure
|
||||
markdown_content = self._generate_markdown_from_json(extracted_content, title)
|
||||
markdownContent = self._generateMarkdownFromJson(extractedContent, title)
|
||||
|
||||
return markdown_content, "text/markdown"
|
||||
return markdownContent, "text/markdown"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering markdown: {str(e)}")
|
||||
# Return minimal markdown fallback
|
||||
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
||||
|
||||
def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str:
|
||||
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||
"""Generate markdown content from structured JSON document."""
|
||||
try:
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
if not isinstance(jsonContent, dict):
|
||||
raise ValueError("JSON content must be a dictionary")
|
||||
|
||||
if "sections" not in json_content:
|
||||
if "sections" not in jsonContent:
|
||||
raise ValueError("JSON content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = json_content.get("metadata", {}).get("title", title)
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", title)
|
||||
|
||||
# Build markdown content
|
||||
markdown_parts = []
|
||||
markdownParts = []
|
||||
|
||||
# Document title
|
||||
markdown_parts.append(f"# {document_title}")
|
||||
markdown_parts.append("")
|
||||
markdownParts.append(f"# {documentTitle}")
|
||||
markdownParts.append("")
|
||||
|
||||
# Process each section
|
||||
sections = json_content.get("sections", [])
|
||||
sections = jsonContent.get("sections", [])
|
||||
for section in sections:
|
||||
section_markdown = self._render_json_section(section)
|
||||
if section_markdown:
|
||||
markdown_parts.append(section_markdown)
|
||||
markdown_parts.append("") # Add spacing between sections
|
||||
sectionMarkdown = self._renderJsonSection(section)
|
||||
if sectionMarkdown:
|
||||
markdownParts.append(sectionMarkdown)
|
||||
markdownParts.append("") # Add spacing between sections
|
||||
|
||||
# Add generation info
|
||||
markdown_parts.append("---")
|
||||
markdown_parts.append(f"*Generated: {self._format_timestamp()}*")
|
||||
markdownParts.append("---")
|
||||
markdownParts.append(f"*Generated: {self._formatTimestamp()}*")
|
||||
|
||||
return '\n'.join(markdown_parts)
|
||||
return '\n'.join(markdownParts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating markdown from JSON: {str(e)}")
|
||||
raise Exception(f"Markdown generation failed: {str(e)}")
|
||||
|
||||
def _render_json_section(self, section: Dict[str, Any]) -> str:
|
||||
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to markdown."""
|
||||
try:
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
sectionType = self._getSectionType(section)
|
||||
sectionData = self._getSectionData(section)
|
||||
|
||||
if section_type == "table":
|
||||
if sectionType == "table":
|
||||
# Process the section data to extract table structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_table(processed_data)
|
||||
elif section_type == "bullet_list":
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonTable(processedData)
|
||||
elif sectionType == "bullet_list":
|
||||
# Process the section data to extract bullet list structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_bullet_list(processed_data)
|
||||
elif section_type == "heading":
|
||||
return self._render_json_heading(section_data)
|
||||
elif section_type == "paragraph":
|
||||
return self._render_json_paragraph(section_data)
|
||||
elif section_type == "code_block":
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonBulletList(processedData)
|
||||
elif sectionType == "heading":
|
||||
return self._renderJsonHeading(sectionData)
|
||||
elif sectionType == "paragraph":
|
||||
return self._renderJsonParagraph(sectionData)
|
||||
elif sectionType == "code_block":
|
||||
# Process the section data to extract code block structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_code_block(processed_data)
|
||||
elif section_type == "image":
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonCodeBlock(processedData)
|
||||
elif sectionType == "image":
|
||||
# Process the section data to extract image structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_image(processed_data)
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonImage(processedData)
|
||||
else:
|
||||
# Fallback to paragraph for unknown types
|
||||
return self._render_json_paragraph(section_data)
|
||||
return self._renderJsonParagraph(sectionData)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
||||
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
||||
return f"*[Error rendering section: {str(e)}]*"
|
||||
|
||||
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON table to markdown."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
rows = table_data.get("rows", [])
|
||||
headers = tableData.get("headers", [])
|
||||
rows = tableData.get("rows", [])
|
||||
|
||||
if not headers or not rows:
|
||||
return ""
|
||||
|
||||
markdown_parts = []
|
||||
markdownParts = []
|
||||
|
||||
# Create table header
|
||||
header_line = " | ".join(str(header) for header in headers)
|
||||
markdown_parts.append(header_line)
|
||||
headerLine = " | ".join(str(header) for header in headers)
|
||||
markdownParts.append(headerLine)
|
||||
|
||||
# Add separator line
|
||||
separator_line = " | ".join("---" for _ in headers)
|
||||
markdown_parts.append(separator_line)
|
||||
separatorLine = " | ".join("---" for _ in headers)
|
||||
markdownParts.append(separatorLine)
|
||||
|
||||
# Add data rows
|
||||
for row in rows:
|
||||
row_line = " | ".join(str(cell_data) for cell_data in row)
|
||||
markdown_parts.append(row_line)
|
||||
rowLine = " | ".join(str(cellData) for cellData in row)
|
||||
markdownParts.append(rowLine)
|
||||
|
||||
return '\n'.join(markdown_parts)
|
||||
return '\n'.join(markdownParts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON bullet list to markdown."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
items = listData.get("items", [])
|
||||
|
||||
if not items:
|
||||
return ""
|
||||
|
||||
markdown_parts = []
|
||||
markdownParts = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
markdown_parts.append(f"- {item}")
|
||||
markdownParts.append(f"- {item}")
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
markdown_parts.append(f"- {item['text']}")
|
||||
markdownParts.append(f"- {item['text']}")
|
||||
|
||||
return '\n'.join(markdown_parts)
|
||||
return '\n'.join(markdownParts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON heading to markdown."""
|
||||
try:
|
||||
level = heading_data.get("level", 1)
|
||||
text = heading_data.get("text", "")
|
||||
level = headingData.get("level", 1)
|
||||
text = headingData.get("text", "")
|
||||
|
||||
if text:
|
||||
level = max(1, min(6, level))
|
||||
|
|
@ -175,21 +175,21 @@ class RendererMarkdown(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON paragraph to markdown."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
text = paragraphData.get("text", "")
|
||||
return text if text else ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON code block to markdown."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
language = code_data.get("language", "")
|
||||
code = codeData.get("code", "")
|
||||
language = codeData.get("language", "")
|
||||
|
||||
if code:
|
||||
if language:
|
||||
|
|
@ -203,19 +203,19 @@ class RendererMarkdown(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON image to markdown."""
|
||||
try:
|
||||
alt_text = image_data.get("altText", "Image")
|
||||
base64_data = image_data.get("base64Data", "")
|
||||
altText = imageData.get("altText", "Image")
|
||||
base64Data = imageData.get("base64Data", "")
|
||||
|
||||
if base64_data:
|
||||
if base64Data:
|
||||
# For base64 images, we can't embed them directly in markdown
|
||||
# So we'll use a placeholder with the alt text
|
||||
return f""
|
||||
return f""
|
||||
else:
|
||||
return f""
|
||||
return f""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||
return f""
|
||||
return f""
|
||||
|
|
|
|||
|
|
@ -22,32 +22,32 @@ class RendererPdf(BaseRenderer):
|
|||
"""Renders content to PDF format using reportlab."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported PDF formats."""
|
||||
return ['pdf']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['document', 'print']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for PDF renderer."""
|
||||
return 120
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
||||
try:
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
# Fallback to HTML if reportlab not available
|
||||
from .rendererHtml import RendererHtml
|
||||
html_renderer = RendererHtml()
|
||||
html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service)
|
||||
html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return html_content, "text/html"
|
||||
|
||||
# Generate PDF using AI-analyzed styling
|
||||
pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service)
|
||||
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return pdf_content, "application/pdf"
|
||||
|
||||
|
|
@ -56,11 +56,11 @@ class RendererPdf(BaseRenderer):
|
|||
# Return minimal fallback
|
||||
return f"PDF Generation Error: {str(e)}", "text/plain"
|
||||
|
||||
async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Get AI-generated styling definitions
|
||||
styles = await self._get_pdf_styles(user_prompt, ai_service)
|
||||
styles = await self._getPdfStyles(userPrompt, aiService)
|
||||
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
|
|
@ -93,10 +93,10 @@ class RendererPdf(BaseRenderer):
|
|||
story = []
|
||||
|
||||
# Title page
|
||||
title_style = self._create_title_style(styles)
|
||||
title_style = self._createTitleStyle(styles)
|
||||
story.append(Paragraph(document_title, title_style))
|
||||
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
|
||||
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles)))
|
||||
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._createNormalStyle(styles)))
|
||||
story.append(Spacer(1, 30)) # Add spacing before page break
|
||||
story.append(PageBreak())
|
||||
|
||||
|
|
@ -105,7 +105,7 @@ class RendererPdf(BaseRenderer):
|
|||
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
|
||||
for i, section in enumerate(sections):
|
||||
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
|
||||
section_elements = self._render_json_section(section, styles)
|
||||
section_elements = self._renderJsonSection(section, styles)
|
||||
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
|
||||
story.extend(section_elements)
|
||||
|
||||
|
|
@ -123,7 +123,7 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
|
||||
raise Exception(f"PDF generation failed: {str(e)}")
|
||||
|
||||
async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
async def _getPdfStyles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
"""Get PDF styling definitions using base template AI styling."""
|
||||
style_schema = {
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
||||
|
|
@ -136,21 +136,21 @@ class RendererPdf(BaseRenderer):
|
|||
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
||||
}
|
||||
|
||||
style_template = self._create_ai_style_template("pdf", user_prompt, style_schema)
|
||||
style_template = self._createAiStyleTemplate("pdf", user_prompt, style_schema)
|
||||
|
||||
# Use base template method like DOCX does (this works!)
|
||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles())
|
||||
styles = await self._getAiStyles(ai_service, style_template, self._getDefaultPdfStyles())
|
||||
|
||||
if styles is None:
|
||||
return self._get_default_pdf_styles()
|
||||
return self._getDefaultPdfStyles()
|
||||
|
||||
# Convert colors to PDF format after getting styles
|
||||
styles = self._convert_colors_format(styles)
|
||||
styles = self._convertColorsFormat(styles)
|
||||
|
||||
# Validate and fix contrast issues
|
||||
return self._validate_pdf_styles_contrast(styles)
|
||||
return self._validatePdfStylesContrast(styles)
|
||||
|
||||
async def _get_ai_styles_with_pdf_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get AI styles with proper PDF color conversion."""
|
||||
if not ai_service:
|
||||
return default_styles
|
||||
|
|
@ -279,7 +279,7 @@ class RendererPdf(BaseRenderer):
|
|||
return default_styles
|
||||
|
||||
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
|
||||
styles = self._convert_colors_format(styles)
|
||||
styles = self._convertColorsFormat(styles)
|
||||
|
||||
return styles
|
||||
|
||||
|
|
@ -287,7 +287,7 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return default_styles
|
||||
|
||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert colors to proper format for PDF compatibility."""
|
||||
try:
|
||||
for style_name, style_config in styles.items():
|
||||
|
|
@ -304,7 +304,7 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.warning(f"Color conversion failed: {str(e)}")
|
||||
return styles
|
||||
|
||||
def _get_safe_color(self, color_value: str, default: str = "#000000") -> str:
|
||||
def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
|
||||
"""Get a safe hex color value for PDF."""
|
||||
if isinstance(color_value, str) and color_value.startswith('#'):
|
||||
if len(color_value) == 7:
|
||||
|
|
@ -313,7 +313,7 @@ class RendererPdf(BaseRenderer):
|
|||
return color_value
|
||||
return default
|
||||
|
||||
def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _validatePdfStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix contrast issues in AI-generated styles."""
|
||||
try:
|
||||
# Fix table header contrast
|
||||
|
|
@ -348,9 +348,9 @@ class RendererPdf(BaseRenderer):
|
|||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||
return self._get_default_pdf_styles()
|
||||
return self._getDefaultPdfStyles()
|
||||
|
||||
def _get_default_pdf_styles(self) -> Dict[str, Any]:
|
||||
def _getDefaultPdfStyles(self) -> Dict[str, Any]:
|
||||
"""Default PDF styles."""
|
||||
return {
|
||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
||||
|
|
@ -363,27 +363,27 @@ class RendererPdf(BaseRenderer):
|
|||
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
||||
}
|
||||
|
||||
def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||
def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||
"""Create title style from style definitions."""
|
||||
title_style_def = styles.get("title", {})
|
||||
|
||||
# DEBUG: Show what color and spacing is being used for title
|
||||
title_color = title_style_def.get("color", "#1F4E79")
|
||||
title_space_after = title_style_def.get("space_after", 30)
|
||||
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hex_to_color(title_color)}", "PDF_RENDERER")
|
||||
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER")
|
||||
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
|
||||
|
||||
return ParagraphStyle(
|
||||
'CustomTitle',
|
||||
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
|
||||
spaceAfter=title_style_def.get("space_after", 30),
|
||||
alignment=self._get_alignment(title_style_def.get("align", "center")),
|
||||
textColor=self._hex_to_color(title_color),
|
||||
alignment=self._getAlignment(title_style_def.get("align", "center")),
|
||||
textColor=self._hexToColor(title_color),
|
||||
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
|
||||
spaceBefore=0 # Ensure no space before title
|
||||
)
|
||||
|
||||
def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
|
||||
def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
|
||||
"""Create heading style from style definitions."""
|
||||
heading_key = f"heading{level}"
|
||||
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
|
||||
|
|
@ -393,11 +393,11 @@ class RendererPdf(BaseRenderer):
|
|||
fontSize=heading_style_def.get("font_size", 18 - level * 2),
|
||||
spaceAfter=heading_style_def.get("space_after", 12),
|
||||
spaceBefore=heading_style_def.get("space_before", 12),
|
||||
alignment=self._get_alignment(heading_style_def.get("align", "left")),
|
||||
textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F"))
|
||||
alignment=self._getAlignment(heading_style_def.get("align", "left")),
|
||||
textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F"))
|
||||
)
|
||||
|
||||
def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||
def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||
"""Create normal paragraph style from style definitions."""
|
||||
paragraph_style_def = styles.get("paragraph", {})
|
||||
|
||||
|
|
@ -405,12 +405,12 @@ class RendererPdf(BaseRenderer):
|
|||
'CustomNormal',
|
||||
fontSize=paragraph_style_def.get("font_size", 11),
|
||||
spaceAfter=paragraph_style_def.get("space_after", 6),
|
||||
alignment=self._get_alignment(paragraph_style_def.get("align", "left")),
|
||||
textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")),
|
||||
alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
|
||||
textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
|
||||
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
|
||||
)
|
||||
|
||||
def _get_alignment(self, align: str) -> int:
|
||||
def _getAlignment(self, align: str) -> int:
|
||||
"""Convert alignment string to reportlab alignment constant."""
|
||||
if not align or not isinstance(align, str):
|
||||
return TA_LEFT
|
||||
|
|
@ -426,7 +426,7 @@ class RendererPdf(BaseRenderer):
|
|||
}
|
||||
return align_map.get(align.lower().strip(), TA_LEFT)
|
||||
|
||||
def _get_table_alignment(self, align: str) -> str:
|
||||
def _getTableAlignment(self, align: str) -> str:
|
||||
"""Convert alignment string to ReportLab table alignment string."""
|
||||
if not align or not isinstance(align, str):
|
||||
return 'LEFT'
|
||||
|
|
@ -442,7 +442,7 @@ class RendererPdf(BaseRenderer):
|
|||
}
|
||||
return align_map.get(align.lower().strip(), 'LEFT')
|
||||
|
||||
def _hex_to_color(self, hex_color: str) -> colors.Color:
|
||||
def _hexToColor(self, hex_color: str) -> colors.Color:
|
||||
"""Convert hex color to reportlab color."""
|
||||
try:
|
||||
hex_color = hex_color.lstrip('#')
|
||||
|
|
@ -464,38 +464,38 @@ class RendererPdf(BaseRenderer):
|
|||
except:
|
||||
return colors.black
|
||||
|
||||
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a single JSON section to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
section_type = self._get_section_type(section)
|
||||
elements = self._get_section_data(section)
|
||||
section_type = self._getSectionType(section)
|
||||
elements = self._getSectionData(section)
|
||||
|
||||
# Process each element in the section
|
||||
all_elements = []
|
||||
for element in elements:
|
||||
if section_type == "table":
|
||||
all_elements.extend(self._render_json_table(element, styles))
|
||||
all_elements.extend(self._renderJsonTable(element, styles))
|
||||
elif section_type == "bullet_list":
|
||||
all_elements.extend(self._render_json_bullet_list(element, styles))
|
||||
all_elements.extend(self._renderJsonBulletList(element, styles))
|
||||
elif section_type == "heading":
|
||||
all_elements.extend(self._render_json_heading(element, styles))
|
||||
all_elements.extend(self._renderJsonHeading(element, styles))
|
||||
elif section_type == "paragraph":
|
||||
all_elements.extend(self._render_json_paragraph(element, styles))
|
||||
all_elements.extend(self._renderJsonParagraph(element, styles))
|
||||
elif section_type == "code_block":
|
||||
all_elements.extend(self._render_json_code_block(element, styles))
|
||||
all_elements.extend(self._renderJsonCodeBlock(element, styles))
|
||||
elif section_type == "image":
|
||||
all_elements.extend(self._render_json_image(element, styles))
|
||||
all_elements.extend(self._renderJsonImage(element, styles))
|
||||
else:
|
||||
# Fallback to paragraph for unknown types
|
||||
all_elements.extend(self._render_json_paragraph(element, styles))
|
||||
all_elements.extend(self._renderJsonParagraph(element, styles))
|
||||
|
||||
return all_elements
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
||||
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
||||
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
|
||||
|
||||
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON table to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
|
|
@ -517,7 +517,7 @@ class RendererPdf(BaseRenderer):
|
|||
table_style = [
|
||||
('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
|
||||
('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
|
||||
('ALIGN', (0, 0), (-1, -1), self._get_table_alignment(table_cell_style.get("align", "left"))),
|
||||
('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))),
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
|
||||
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
|
||||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||
|
|
@ -534,7 +534,7 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
|
|
@ -556,7 +556,7 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON heading to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
level = heading_data.get("level", 1)
|
||||
|
|
@ -564,7 +564,7 @@ class RendererPdf(BaseRenderer):
|
|||
|
||||
if text:
|
||||
level = max(1, min(6, level))
|
||||
heading_style = self._create_heading_style(styles, level)
|
||||
heading_style = self._createHeadingStyle(styles, level)
|
||||
return [Paragraph(text, heading_style)]
|
||||
|
||||
return []
|
||||
|
|
@ -573,13 +573,13 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
|
||||
if text:
|
||||
return [Paragraph(text, self._create_normal_style(styles))]
|
||||
return [Paragraph(text, self._createNormalStyle(styles))]
|
||||
|
||||
return []
|
||||
|
||||
|
|
@ -587,7 +587,7 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON code block to PDF elements using AI-generated styles."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
|
|
@ -601,7 +601,7 @@ class RendererPdf(BaseRenderer):
|
|||
lang_style = ParagraphStyle(
|
||||
'CodeLanguage',
|
||||
fontSize=code_style_def.get("font_size", 9),
|
||||
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
|
||||
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
||||
fontName='Helvetica-Bold'
|
||||
)
|
||||
elements.append(Paragraph(f"Code ({language}):", lang_style))
|
||||
|
|
@ -609,9 +609,9 @@ class RendererPdf(BaseRenderer):
|
|||
code_style = ParagraphStyle(
|
||||
'CodeBlock',
|
||||
fontSize=code_style_def.get("font_size", 9),
|
||||
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
|
||||
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
||||
fontName=code_style_def.get("font", "Courier"),
|
||||
backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")),
|
||||
backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")),
|
||||
spaceAfter=code_style_def.get("space_after", 6)
|
||||
)
|
||||
elements.append(Paragraph(code, code_style))
|
||||
|
|
@ -624,7 +624,7 @@ class RendererPdf(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
return []
|
||||
|
||||
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||
"""Render a JSON image to PDF elements."""
|
||||
try:
|
||||
base64_data = image_data.get("base64Data", "")
|
||||
|
|
@ -632,10 +632,10 @@ class RendererPdf(BaseRenderer):
|
|||
|
||||
if base64_data:
|
||||
# For now, just add a placeholder since reportlab image handling is complex
|
||||
return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))]
|
||||
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
|
||||
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))]
|
||||
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._createNormalStyle(styles))]
|
||||
|
|
@ -12,23 +12,23 @@ class RendererPptx(BaseRenderer):
|
|||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.supported_formats = ["pptx", "ppt"]
|
||||
self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
self.supportedFormats = ["pptx", "ppt"]
|
||||
self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> list:
|
||||
def getSupportedFormats(cls) -> list:
|
||||
"""Get list of supported output formats."""
|
||||
return ["pptx", "ppt"]
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""
|
||||
Render content as PowerPoint presentation from JSON data.
|
||||
|
||||
Args:
|
||||
extracted_content: JSON content to render as presentation
|
||||
extractedContent: JSON content to render as presentation
|
||||
title: Title for the presentation
|
||||
user_prompt: User prompt for AI styling
|
||||
ai_service: AI service for styling
|
||||
userPrompt: User prompt for AI styling
|
||||
aiService: AI service for styling
|
||||
**kwargs: Additional rendering options
|
||||
|
||||
Returns:
|
||||
|
|
@ -43,7 +43,7 @@ class RendererPptx(BaseRenderer):
|
|||
import re
|
||||
|
||||
# Get AI-generated styling definitions first
|
||||
styles = await self._get_pptx_styles(user_prompt, ai_service)
|
||||
styles = await self._getPptxStyles(userPrompt, aiService)
|
||||
|
||||
# Create new presentation
|
||||
prs = Presentation()
|
||||
|
|
@ -58,13 +58,13 @@ class RendererPptx(BaseRenderer):
|
|||
prs.slide_height = Inches(7.5)
|
||||
|
||||
# Generate slides from JSON content
|
||||
slides_data = await self._parse_json_to_slides(extracted_content, title, styles)
|
||||
logger.info(f"Parsed {len(slides_data)} slides from JSON content")
|
||||
slidesData = await self._parseJsonToSlides(extractedContent, title, styles)
|
||||
logger.info(f"Parsed {len(slidesData)} slides from JSON content")
|
||||
|
||||
# Debug: Show first 200 chars of content
|
||||
logger.info(f"JSON content preview: {str(extracted_content)[:200]}...")
|
||||
logger.info(f"JSON content preview: {str(extractedContent)[:200]}...")
|
||||
|
||||
for i, slide_data in enumerate(slides_data):
|
||||
for i, slide_data in enumerate(slidesData):
|
||||
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
|
||||
# Debug: Show slide content preview
|
||||
slide_content = slide_data.get('content', '')
|
||||
|
|
@ -74,8 +74,8 @@ class RendererPptx(BaseRenderer):
|
|||
logger.warning(f" ⚠️ Slide {i+1} has NO content!")
|
||||
|
||||
# Create slide with appropriate layout based on content
|
||||
slide_layout_index = self._get_slide_layout_index(slide_data, styles)
|
||||
slide_layout = prs.slide_layouts[slide_layout_index]
|
||||
slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles)
|
||||
slide_layout = prs.slide_layouts[slideLayoutIndex]
|
||||
slide = prs.slides.add_slide(slide_layout)
|
||||
|
||||
# Set title with AI-generated styling
|
||||
|
|
@ -153,7 +153,7 @@ class RendererPptx(BaseRenderer):
|
|||
p.alignment = PP_ALIGN.LEFT
|
||||
|
||||
# If no slides were created, create a default slide
|
||||
if not slides_data:
|
||||
if not slidesData:
|
||||
slide_layout = prs.slide_layouts[0] # Title slide layout
|
||||
slide = prs.slides.add_slide(slide_layout)
|
||||
|
||||
|
|
@ -198,7 +198,7 @@ class RendererPptx(BaseRenderer):
|
|||
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
|
||||
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
|
||||
|
||||
def _parse_content_to_slides(self, content: str, title: str) -> list:
|
||||
def _parseContentToSlides(self, content: str, title: str) -> list:
|
||||
"""
|
||||
Parse content into slide data structure.
|
||||
|
||||
|
|
@ -212,7 +212,7 @@ class RendererPptx(BaseRenderer):
|
|||
slides = []
|
||||
|
||||
# Split content by slide markers or headers
|
||||
slide_sections = self._split_content_into_slides(content)
|
||||
slide_sections = self._splitContentIntoSlides(content)
|
||||
|
||||
for i, section in enumerate(slide_sections):
|
||||
if section.strip():
|
||||
|
|
@ -239,7 +239,7 @@ class RendererPptx(BaseRenderer):
|
|||
|
||||
return slides
|
||||
|
||||
def _split_content_into_slides(self, content: str) -> list:
|
||||
def _splitContentIntoSlides(self, content: str) -> list:
|
||||
"""
|
||||
Split content into individual slides based on headers and structure.
|
||||
|
||||
|
|
@ -299,11 +299,11 @@ class RendererPptx(BaseRenderer):
|
|||
return [content.strip()]
|
||||
|
||||
|
||||
def get_output_mime_type(self) -> str:
|
||||
def getOutputMimeType(self) -> str:
|
||||
"""Get MIME type for rendered output."""
|
||||
return self.output_mime_type
|
||||
return self.outputMimeType
|
||||
|
||||
async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
async def _getPptxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
|
||||
"""Get PowerPoint styling definitions using base template AI styling."""
|
||||
style_schema = {
|
||||
"title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"},
|
||||
|
|
@ -323,21 +323,21 @@ class RendererPptx(BaseRenderer):
|
|||
"executive_ready": True
|
||||
}
|
||||
|
||||
style_template = self._create_professional_pptx_template(user_prompt, style_schema)
|
||||
# Use our own _get_ai_styles_with_pptx_colors method to ensure proper color conversion
|
||||
styles = await self._get_ai_styles_with_pptx_colors(ai_service, style_template, self._get_default_pptx_styles())
|
||||
style_template = self._createProfessionalPptxTemplate(userPrompt, style_schema)
|
||||
# Use our own _getAiStylesWithPptxColors method to ensure proper color conversion
|
||||
styles = await self._getAiStylesWithPptxColors(aiService, style_template, self._getDefaultPptxStyles())
|
||||
|
||||
# Validate PowerPoint-specific requirements
|
||||
return self._validate_pptx_styles_readability(styles)
|
||||
return self._validatePptxStylesReadability(styles)
|
||||
|
||||
def _create_professional_pptx_template(self, user_prompt: str, style_schema: Dict[str, Any]) -> str:
|
||||
def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str:
|
||||
"""Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
|
||||
import json
|
||||
schema_json = json.dumps(style_schema, indent=4)
|
||||
|
||||
return f"""Customize the JSON below for professional PowerPoint slides.
|
||||
|
||||
User Request: {user_prompt or "Create professional corporate slides"}
|
||||
User Request: {userPrompt or "Create professional corporate slides"}
|
||||
|
||||
Rules:
|
||||
- Use professional colors (blues, grays, deep greens)
|
||||
|
|
@ -351,9 +351,9 @@ Return ONLY this JSON with your changes:
|
|||
|
||||
JSON ONLY. NO OTHER TEXT."""
|
||||
|
||||
async def _get_ai_styles_with_pptx_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get AI styles with proper PowerPoint color conversion."""
|
||||
if not ai_service:
|
||||
if not aiService:
|
||||
return default_styles
|
||||
|
||||
try:
|
||||
|
|
@ -365,11 +365,11 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
||||
|
||||
# Check if AI service is properly configured
|
||||
if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
|
||||
if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects:
|
||||
self.logger.warning("AI service not properly configured, using defaults")
|
||||
return default_styles
|
||||
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
response = await aiService.aiObjects.call(request)
|
||||
|
||||
# Check if response is valid
|
||||
if not response:
|
||||
|
|
@ -445,7 +445,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
return default_styles
|
||||
|
||||
# Convert colors to PowerPoint RGB format
|
||||
styles = self._convert_colors_format(styles)
|
||||
styles = self._convertColorsFormat(styles)
|
||||
|
||||
return styles
|
||||
|
||||
|
|
@ -453,7 +453,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return default_styles
|
||||
|
||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert hex colors to RGB format for PowerPoint compatibility."""
|
||||
try:
|
||||
for style_name, style_config in styles.items():
|
||||
|
|
@ -477,7 +477,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
self.logger.warning(f"Color conversion failed: {str(e)}")
|
||||
return styles
|
||||
|
||||
def _get_safe_color(self, color_value, default=(0, 0, 0)) -> tuple:
|
||||
def _getSafeColor(self, color_value, default=(0, 0, 0)) -> tuple:
|
||||
"""Get a safe RGB color tuple for PowerPoint."""
|
||||
if isinstance(color_value, tuple) and len(color_value) == 3:
|
||||
return color_value
|
||||
|
|
@ -495,7 +495,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
return (r, g, b)
|
||||
return default
|
||||
|
||||
def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _validatePptxStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix readability issues in AI-generated styles."""
|
||||
try:
|
||||
# Ensure minimum font sizes for PowerPoint readability
|
||||
|
|
@ -519,9 +519,9 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
|
||||
except Exception as e:
|
||||
logger.warning(f"Style validation failed: {str(e)}")
|
||||
return self._get_default_pptx_styles()
|
||||
return self._getDefaultPptxStyles()
|
||||
|
||||
def _get_default_pptx_styles(self) -> Dict[str, Any]:
|
||||
def _getDefaultPptxStyles(self) -> Dict[str, Any]:
|
||||
"""Default PowerPoint styles with corporate professional color scheme."""
|
||||
return {
|
||||
"title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"},
|
||||
|
|
@ -541,7 +541,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
"executive_ready": True
|
||||
}
|
||||
|
||||
async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
async def _parseJsonToSlides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Parse JSON content into slide data structure.
|
||||
|
||||
|
|
@ -569,12 +569,12 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
# Create title slide
|
||||
slides.append({
|
||||
"title": document_title,
|
||||
"content": "Generated by PowerOn AI System\n\n" + self._format_timestamp()
|
||||
"content": "Generated by PowerOn AI System\n\n" + self._formatTimestamp()
|
||||
})
|
||||
|
||||
# Process sections into slides based on content and user intent
|
||||
sections = json_content.get("sections", [])
|
||||
slides.extend(self._create_slides_from_sections(sections, styles))
|
||||
slides.extend(self._createSlidesFromSections(sections, styles))
|
||||
|
||||
# If no content slides were created, create a default content slide
|
||||
if len(slides) == 1: # Only title slide
|
||||
|
|
@ -595,7 +595,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
}
|
||||
]
|
||||
|
||||
def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Create a slide from a JSON section."""
|
||||
try:
|
||||
# Get section title from data or use default
|
||||
|
|
@ -616,15 +616,15 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
content_parts = []
|
||||
|
||||
if content_type == "table":
|
||||
content_parts.append(self._format_table_for_slide(elements))
|
||||
content_parts.append(self._formatTableForSlide(elements))
|
||||
elif content_type == "list":
|
||||
content_parts.append(self._format_list_for_slide(elements))
|
||||
content_parts.append(self._formatListForSlide(elements))
|
||||
elif content_type == "heading":
|
||||
content_parts.append(self._format_heading_for_slide(elements))
|
||||
content_parts.append(self._formatHeadingForSlide(elements))
|
||||
elif content_type == "paragraph":
|
||||
content_parts.append(self._format_paragraph_for_slide(elements))
|
||||
content_parts.append(self._formatParagraphForSlide(elements))
|
||||
elif content_type == "code":
|
||||
content_parts.append(self._format_code_for_slide(elements))
|
||||
content_parts.append(self._formatCodeForSlide(elements))
|
||||
else:
|
||||
content_parts.append(self._format_paragraph_for_slide(elements))
|
||||
|
||||
|
|
@ -640,7 +640,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error creating slide from section: {str(e)}")
|
||||
return None
|
||||
|
||||
def _format_table_for_slide(self, elements: List[Dict[str, Any]]) -> str:
|
||||
def _formatTableForSlide(self, elements: List[Dict[str, Any]]) -> str:
|
||||
"""Format table data for slide presentation."""
|
||||
try:
|
||||
# Extract table data from elements array
|
||||
|
|
@ -681,7 +681,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error formatting table for slide: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str:
|
||||
def _formatListForSlide(self, list_data: Dict[str, Any]) -> str:
|
||||
"""Format list data for slide presentation."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
|
|
@ -713,7 +713,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error formatting list for slide: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str:
|
||||
def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str:
|
||||
"""Format heading data for slide presentation."""
|
||||
try:
|
||||
text = heading_data.get("text", "")
|
||||
|
|
@ -728,7 +728,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error formatting heading for slide: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str:
|
||||
def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str:
|
||||
"""Format paragraph data for slide presentation."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
|
|
@ -747,7 +747,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error formatting paragraph for slide: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str:
|
||||
def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str:
|
||||
"""Format code data for slide presentation."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
|
|
@ -770,7 +770,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error formatting code for slide: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
|
||||
def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
|
||||
"""Determine the best professional slide layout based on content."""
|
||||
try:
|
||||
content = slide_data.get("content", "")
|
||||
|
|
@ -804,7 +804,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error determining slide layout: {str(e)}")
|
||||
return 1 # Default to title and content layout
|
||||
|
||||
def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Create slides from sections based on content density and user intent."""
|
||||
try:
|
||||
slides = []
|
||||
|
|
@ -834,7 +834,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
break
|
||||
else:
|
||||
# Add content to current slide
|
||||
formatted_content = self._format_section_content(section)
|
||||
formatted_content = self._formatSectionContent(section)
|
||||
if formatted_content:
|
||||
current_slide_content.append(formatted_content)
|
||||
|
||||
|
|
@ -851,7 +851,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error creating slides from sections: {str(e)}")
|
||||
return []
|
||||
|
||||
def _format_section_content(self, section: Dict[str, Any]) -> str:
|
||||
def _formatSectionContent(self, section: Dict[str, Any]) -> str:
|
||||
"""Format section content for slide presentation."""
|
||||
try:
|
||||
content_type = section.get("content_type", "paragraph")
|
||||
|
|
@ -861,15 +861,15 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
content_parts = []
|
||||
for element in elements:
|
||||
if content_type == "table":
|
||||
content_parts.append(self._format_table_for_slide([element]))
|
||||
content_parts.append(self._formatTableForSlide([element]))
|
||||
elif content_type == "list":
|
||||
content_parts.append(self._format_list_for_slide([element]))
|
||||
content_parts.append(self._formatListForSlide([element]))
|
||||
elif content_type == "heading":
|
||||
content_parts.append(self._format_heading_for_slide([element]))
|
||||
content_parts.append(self._formatHeadingForSlide([element]))
|
||||
elif content_type == "paragraph":
|
||||
content_parts.append(self._format_paragraph_for_slide([element]))
|
||||
content_parts.append(self._formatParagraphForSlide([element]))
|
||||
elif content_type == "code":
|
||||
content_parts.append(self._format_code_for_slide([element]))
|
||||
content_parts.append(self._formatCodeForSlide([element]))
|
||||
else:
|
||||
content_parts.append(self._format_paragraph_for_slide([element]))
|
||||
|
||||
|
|
@ -879,7 +879,7 @@ JSON ONLY. NO OTHER TEXT."""
|
|||
logger.warning(f"Error formatting section content: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _format_timestamp(self) -> str:
|
||||
def _formatTimestamp(self) -> str:
|
||||
"""Format current timestamp for presentation generation."""
|
||||
from datetime import datetime, UTC
|
||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ class RendererText(BaseRenderer):
|
|||
"""Renders content to plain text format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported text formats (excluding formats with dedicated renderers)."""
|
||||
return [
|
||||
'txt', 'text', 'plain',
|
||||
|
|
@ -32,7 +32,7 @@ class RendererText(BaseRenderer):
|
|||
]
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return [
|
||||
'ascii', 'utf8', 'utf-8', 'code', 'source',
|
||||
|
|
@ -41,166 +41,166 @@ class RendererText(BaseRenderer):
|
|||
]
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for text renderer."""
|
||||
return 90
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to plain text format."""
|
||||
try:
|
||||
# Generate text from JSON structure
|
||||
text_content = self._generate_text_from_json(extracted_content, title)
|
||||
textContent = self._generateTextFromJson(extractedContent, title)
|
||||
|
||||
return text_content, "text/plain"
|
||||
return textContent, "text/plain"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering text: {str(e)}")
|
||||
# Return minimal text fallback
|
||||
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
||||
|
||||
def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str:
|
||||
def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||
"""Generate text content from structured JSON document."""
|
||||
try:
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
if not isinstance(jsonContent, dict):
|
||||
raise ValueError("JSON content must be a dictionary")
|
||||
|
||||
if "sections" not in json_content:
|
||||
if "sections" not in jsonContent:
|
||||
raise ValueError("JSON content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = json_content.get("metadata", {}).get("title", title)
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", title)
|
||||
|
||||
# Build text content
|
||||
text_parts = []
|
||||
textParts = []
|
||||
|
||||
# Document title
|
||||
text_parts.append(document_title)
|
||||
text_parts.append("=" * len(document_title))
|
||||
text_parts.append("")
|
||||
textParts.append(documentTitle)
|
||||
textParts.append("=" * len(documentTitle))
|
||||
textParts.append("")
|
||||
|
||||
# Process each section
|
||||
sections = json_content.get("sections", [])
|
||||
sections = jsonContent.get("sections", [])
|
||||
for section in sections:
|
||||
section_text = self._render_json_section(section)
|
||||
if section_text:
|
||||
text_parts.append(section_text)
|
||||
text_parts.append("") # Add spacing between sections
|
||||
sectionText = self._renderJsonSection(section)
|
||||
if sectionText:
|
||||
textParts.append(sectionText)
|
||||
textParts.append("") # Add spacing between sections
|
||||
|
||||
# Add generation info
|
||||
text_parts.append("")
|
||||
text_parts.append(f"Generated: {self._format_timestamp()}")
|
||||
textParts.append("")
|
||||
textParts.append(f"Generated: {self._formatTimestamp()}")
|
||||
|
||||
return '\n'.join(text_parts)
|
||||
return '\n'.join(textParts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating text from JSON: {str(e)}")
|
||||
raise Exception(f"Text generation failed: {str(e)}")
|
||||
|
||||
def _render_json_section(self, section: Dict[str, Any]) -> str:
|
||||
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
|
||||
"""Render a single JSON section to text."""
|
||||
try:
|
||||
section_type = self._get_section_type(section)
|
||||
section_data = self._get_section_data(section)
|
||||
sectionType = self._getSectionType(section)
|
||||
sectionData = self._getSectionData(section)
|
||||
|
||||
if section_type == "table":
|
||||
if sectionType == "table":
|
||||
# Process the section data to extract table structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_table(processed_data)
|
||||
elif section_type == "bullet_list":
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonTable(processedData)
|
||||
elif sectionType == "bullet_list":
|
||||
# Process the section data to extract bullet list structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_bullet_list(processed_data)
|
||||
elif section_type == "heading":
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonBulletList(processedData)
|
||||
elif sectionType == "heading":
|
||||
# Render each heading element in the elements array
|
||||
# section_data is already the elements array from _get_section_data
|
||||
rendered_elements = []
|
||||
for element in section_data:
|
||||
rendered_elements.append(self._render_json_heading(element))
|
||||
return "\n".join(rendered_elements)
|
||||
elif section_type == "paragraph":
|
||||
# sectionData is already the elements array from _getSectionData
|
||||
renderedElements = []
|
||||
for element in sectionData:
|
||||
renderedElements.append(self._renderJsonHeading(element))
|
||||
return "\n".join(renderedElements)
|
||||
elif sectionType == "paragraph":
|
||||
# Render each paragraph element in the elements array
|
||||
# section_data is already the elements array from _get_section_data
|
||||
rendered_elements = []
|
||||
for element in section_data:
|
||||
rendered_elements.append(self._render_json_paragraph(element))
|
||||
return "\n".join(rendered_elements)
|
||||
elif section_type == "code_block":
|
||||
# sectionData is already the elements array from _getSectionData
|
||||
renderedElements = []
|
||||
for element in sectionData:
|
||||
renderedElements.append(self._renderJsonParagraph(element))
|
||||
return "\n".join(renderedElements)
|
||||
elif sectionType == "code_block":
|
||||
# Process the section data to extract code block structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_code_block(processed_data)
|
||||
elif section_type == "image":
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonCodeBlock(processedData)
|
||||
elif sectionType == "image":
|
||||
# Process the section data to extract image structure
|
||||
processed_data = self._process_section_by_type(section)
|
||||
return self._render_json_image(processed_data)
|
||||
processedData = self._processSectionByType(section)
|
||||
return self._renderJsonImage(processedData)
|
||||
else:
|
||||
# Fallback to paragraph for unknown types - render each element
|
||||
# section_data is already the elements array from _get_section_data
|
||||
rendered_elements = []
|
||||
for element in section_data:
|
||||
rendered_elements.append(self._render_json_paragraph(element))
|
||||
return "\n".join(rendered_elements)
|
||||
# sectionData is already the elements array from _getSectionData
|
||||
renderedElements = []
|
||||
for element in sectionData:
|
||||
renderedElements.append(self._renderJsonParagraph(element))
|
||||
return "\n".join(renderedElements)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
||||
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
||||
return f"[Error rendering section: {str(e)}]"
|
||||
|
||||
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON table to text."""
|
||||
try:
|
||||
headers = table_data.get("headers", [])
|
||||
rows = table_data.get("rows", [])
|
||||
headers = tableData.get("headers", [])
|
||||
rows = tableData.get("rows", [])
|
||||
|
||||
if not headers or not rows:
|
||||
return ""
|
||||
|
||||
text_parts = []
|
||||
textParts = []
|
||||
|
||||
# Create table header
|
||||
header_line = " | ".join(str(header) for header in headers)
|
||||
text_parts.append(header_line)
|
||||
headerLine = " | ".join(str(header) for header in headers)
|
||||
textParts.append(headerLine)
|
||||
|
||||
# Add separator line
|
||||
separator_line = " | ".join("-" * len(str(header)) for header in headers)
|
||||
text_parts.append(separator_line)
|
||||
separatorLine = " | ".join("-" * len(str(header)) for header in headers)
|
||||
textParts.append(separatorLine)
|
||||
|
||||
# Add data rows
|
||||
for row in rows:
|
||||
row_line = " | ".join(str(cell_data) for cell_data in row)
|
||||
text_parts.append(row_line)
|
||||
rowLine = " | ".join(str(cellData) for cellData in row)
|
||||
textParts.append(rowLine)
|
||||
|
||||
return '\n'.join(text_parts)
|
||||
return '\n'.join(textParts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON bullet list to text."""
|
||||
try:
|
||||
items = list_data.get("items", [])
|
||||
items = listData.get("items", [])
|
||||
|
||||
if not items:
|
||||
return ""
|
||||
|
||||
text_parts = []
|
||||
textParts = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
text_parts.append(f"- {item}")
|
||||
textParts.append(f"- {item}")
|
||||
elif isinstance(item, dict) and "text" in item:
|
||||
text_parts.append(f"- {item['text']}")
|
||||
textParts.append(f"- {item['text']}")
|
||||
|
||||
return '\n'.join(text_parts)
|
||||
return '\n'.join(textParts)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON heading to text."""
|
||||
try:
|
||||
level = heading_data.get("level", 1)
|
||||
text = heading_data.get("text", "")
|
||||
level = headingData.get("level", 1)
|
||||
text = headingData.get("text", "")
|
||||
|
||||
if text:
|
||||
level = max(1, min(6, level))
|
||||
|
|
@ -217,21 +217,21 @@ class RendererText(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON paragraph to text."""
|
||||
try:
|
||||
text = paragraph_data.get("text", "")
|
||||
text = paragraphData.get("text", "")
|
||||
return text if text else ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON code block to text."""
|
||||
try:
|
||||
code = code_data.get("code", "")
|
||||
language = code_data.get("language", "")
|
||||
code = codeData.get("code", "")
|
||||
language = codeData.get("language", "")
|
||||
|
||||
if code:
|
||||
if language:
|
||||
|
|
@ -245,12 +245,12 @@ class RendererText(BaseRenderer):
|
|||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
|
||||
def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
|
||||
"""Render a JSON image to text."""
|
||||
try:
|
||||
alt_text = image_data.get("altText", "Image")
|
||||
return f"[Image: {alt_text}]"
|
||||
altText = imageData.get("altText", "Image")
|
||||
return f"[Image: {altText}]"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||
return f"[Image: {image_data.get('altText', 'Image')}]"
|
||||
return f"[Image: {imageData.get('altText', 'Image')}]"
|
||||
|
|
|
|||
|
|
@ -21,41 +21,41 @@ class RendererXlsx(BaseRenderer):
|
|||
"""Renders content to Excel format using openpyxl."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
def getSupportedFormats(cls) -> List[str]:
|
||||
"""Return supported Excel formats."""
|
||||
return ['xlsx', 'xls', 'excel']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
def getFormatAliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['spreadsheet', 'workbook']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
def getPriority(cls) -> int:
|
||||
"""Return priority for Excel renderer."""
|
||||
return 110
|
||||
|
||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
||||
try:
|
||||
if not OPENPYXL_AVAILABLE:
|
||||
# Fallback to CSV if openpyxl not available
|
||||
from .rendererCsv import RendererCsv
|
||||
csv_renderer = RendererCsv()
|
||||
csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
|
||||
return csv_content, "text/csv"
|
||||
csvRenderer = RendererCsv()
|
||||
csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return csvContent, "text/csv"
|
||||
|
||||
# Generate Excel using AI-analyzed styling
|
||||
excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service)
|
||||
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering Excel: {str(e)}")
|
||||
# Return CSV fallback
|
||||
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
|
||||
|
||||
def _generate_excel(self, content: str, title: str) -> str:
|
||||
def _generateExcel(self, content: str, title: str) -> str:
|
||||
"""Generate Excel content using openpyxl."""
|
||||
try:
|
||||
# Create workbook
|
||||
|
|
@ -65,14 +65,14 @@ class RendererXlsx(BaseRenderer):
|
|||
wb.remove(wb.active)
|
||||
|
||||
# Create sheets
|
||||
summary_sheet = wb.create_sheet("Summary", 0)
|
||||
data_sheet = wb.create_sheet("Data", 1)
|
||||
analysis_sheet = wb.create_sheet("Analysis", 2)
|
||||
summarySheet = wb.create_sheet("Summary", 0)
|
||||
dataSheet = wb.create_sheet("Data", 1)
|
||||
analysisSheet = wb.create_sheet("Analysis", 2)
|
||||
|
||||
# Add content to sheets
|
||||
self._populate_summary_sheet(summary_sheet, title)
|
||||
self._populate_data_sheet(data_sheet, content)
|
||||
self._populate_analysis_sheet(analysis_sheet, content)
|
||||
self._populateSummarySheet(summarySheet, title)
|
||||
self._populateDataSheet(dataSheet, content)
|
||||
self._populateAnalysisSheet(analysisSheet, content)
|
||||
|
||||
# Save to buffer
|
||||
buffer = io.BytesIO()
|
||||
|
|
@ -80,16 +80,16 @@ class RendererXlsx(BaseRenderer):
|
|||
buffer.seek(0)
|
||||
|
||||
# Convert to base64
|
||||
excel_bytes = buffer.getvalue()
|
||||
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
|
||||
excelBytes = buffer.getvalue()
|
||||
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
|
||||
|
||||
return excel_base64
|
||||
return excelBase64
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating Excel: {str(e)}")
|
||||
raise
|
||||
|
||||
def _populate_summary_sheet(self, sheet, title: str):
|
||||
def _populateSummarySheet(self, sheet, title: str):
|
||||
"""Populate the summary sheet."""
|
||||
try:
|
||||
# Title
|
||||
|
|
@ -99,7 +99,7 @@ class RendererXlsx(BaseRenderer):
|
|||
|
||||
# Generation info
|
||||
sheet['A3'] = "Generated:"
|
||||
sheet['B3'] = self._format_timestamp()
|
||||
sheet['B3'] = self._formatTimestamp()
|
||||
sheet['A4'] = "Status:"
|
||||
sheet['B4'] = "Generated Successfully"
|
||||
|
||||
|
|
@ -116,7 +116,7 @@ class RendererXlsx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
|
||||
|
||||
def _populate_data_sheet(self, sheet, content: str):
|
||||
def _populateDataSheet(self, sheet, content: str):
|
||||
"""Populate the data sheet."""
|
||||
try:
|
||||
# Headers
|
||||
|
|
@ -138,8 +138,8 @@ class RendererXlsx(BaseRenderer):
|
|||
# Check for table data (lines with |)
|
||||
if '|' in line:
|
||||
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
||||
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
|
||||
sheet.cell(row=row, column=col, value=cell_data)
|
||||
for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns
|
||||
sheet.cell(row=row, column=col, value=cellData)
|
||||
row += 1
|
||||
else:
|
||||
# Regular content
|
||||
|
|
@ -153,7 +153,7 @@ class RendererXlsx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate data sheet: {str(e)}")
|
||||
|
||||
def _populate_analysis_sheet(self, sheet, content: str):
|
||||
def _populateAnalysisSheet(self, sheet, content: str):
|
||||
"""Populate the analysis sheet."""
|
||||
try:
|
||||
# Title
|
||||
|
|
@ -169,17 +169,17 @@ class RendererXlsx(BaseRenderer):
|
|||
row += 1
|
||||
|
||||
# Count different types of content
|
||||
table_lines = sum(1 for line in lines if '|' in line)
|
||||
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
|
||||
text_lines = len(lines) - table_lines - list_lines
|
||||
tableLines = sum(1 for line in lines if '|' in line)
|
||||
listLines = sum(1 for line in lines if line.startswith(('- ', '* ')))
|
||||
textLines = len(lines) - tableLines - listLines
|
||||
|
||||
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
|
||||
row += 1
|
||||
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
|
||||
sheet[f'A{row}'] = f"Table Rows: {tableLines}"
|
||||
row += 1
|
||||
sheet[f'A{row}'] = f"List Items: {list_lines}"
|
||||
sheet[f'A{row}'] = f"List Items: {listLines}"
|
||||
row += 1
|
||||
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
|
||||
sheet[f'A{row}'] = f"Text Lines: {textLines}"
|
||||
row += 2
|
||||
|
||||
# Recommendations
|
||||
|
|
@ -198,35 +198,35 @@ class RendererXlsx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
|
||||
|
||||
async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
||||
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate Excel content from structured JSON document using AI-generated styling."""
|
||||
try:
|
||||
# Debug output
|
||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(json_content)}", "EXCEL_RENDERER")
|
||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}", "EXCEL_RENDERER")
|
||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
|
||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
|
||||
|
||||
# Get AI-generated styling definitions
|
||||
styles = await self._get_excel_styles(user_prompt, ai_service)
|
||||
styles = await self._getExcelStyles(userPrompt, aiService)
|
||||
|
||||
# Validate JSON structure
|
||||
if not isinstance(json_content, dict):
|
||||
if not isinstance(jsonContent, dict):
|
||||
raise ValueError("JSON content must be a dictionary")
|
||||
|
||||
if "sections" not in json_content:
|
||||
if "sections" not in jsonContent:
|
||||
raise ValueError("JSON content must contain 'sections' field")
|
||||
|
||||
# Use title from JSON metadata if available, otherwise use provided title
|
||||
document_title = json_content.get("metadata", {}).get("title", title)
|
||||
document_title = jsonContent.get("metadata", {}).get("title", title)
|
||||
|
||||
# Create workbook
|
||||
wb = Workbook()
|
||||
|
||||
# Create sheets based on content
|
||||
sheets = self._create_excel_sheets(wb, json_content, styles)
|
||||
sheets = self._createExcelSheets(wb, jsonContent, styles)
|
||||
self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
|
||||
|
||||
# Populate sheets with content
|
||||
self._populate_excel_sheets(sheets, json_content, styles)
|
||||
self._populateExcelSheets(sheets, jsonContent, styles)
|
||||
|
||||
# Save to buffer
|
||||
buffer = io.BytesIO()
|
||||
|
|
@ -234,24 +234,24 @@ class RendererXlsx(BaseRenderer):
|
|||
buffer.seek(0)
|
||||
|
||||
# Convert to base64
|
||||
excel_bytes = buffer.getvalue()
|
||||
self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excel_bytes)}", "EXCEL_RENDERER")
|
||||
excelBytes = buffer.getvalue()
|
||||
self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excelBytes)}", "EXCEL_RENDERER")
|
||||
try:
|
||||
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
|
||||
self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excel_base64)}", "EXCEL_RENDERER")
|
||||
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
|
||||
self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excelBase64)}", "EXCEL_RENDERER")
|
||||
except Exception as b64_error:
|
||||
self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
|
||||
raise
|
||||
|
||||
return excel_base64
|
||||
return excelBase64
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
|
||||
raise Exception(f"Excel generation failed: {str(e)}")
|
||||
|
||||
async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||
async def _getExcelStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
|
||||
"""Get Excel styling definitions using base template AI styling."""
|
||||
style_schema = {
|
||||
styleSchema = {
|
||||
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
||||
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
|
||||
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
|
||||
|
|
@ -261,26 +261,26 @@ class RendererXlsx(BaseRenderer):
|
|||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
|
||||
}
|
||||
|
||||
style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema)
|
||||
# Use our own _get_ai_styles_with_excel_colors method to ensure proper color conversion
|
||||
styles = await self._get_ai_styles_with_excel_colors(ai_service, style_template, self._get_default_excel_styles())
|
||||
styleTemplate = self._createAiStyleTemplate("xlsx", userPrompt, styleSchema)
|
||||
# Use our own _getAiStylesWithExcelColors method to ensure proper color conversion
|
||||
styles = await self._getAiStylesWithExcelColors(aiService, styleTemplate, self._getDefaultExcelStyles())
|
||||
|
||||
# Validate and fix contrast issues
|
||||
return self._validate_excel_styles_contrast(styles)
|
||||
return self._validateExcelStylesContrast(styles)
|
||||
|
||||
async def _get_ai_styles_with_excel_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get AI styles with proper Excel color conversion."""
|
||||
if not ai_service:
|
||||
return default_styles
|
||||
if not aiService:
|
||||
return defaultStyles
|
||||
|
||||
try:
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
requestOptions = AiCallOptions()
|
||||
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
|
||||
|
||||
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
||||
response = await ai_service.aiObjects.call(request)
|
||||
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
|
||||
response = await aiService.aiObjects.call(request)
|
||||
|
||||
import json
|
||||
import re
|
||||
|
|
@ -291,7 +291,7 @@ class RendererXlsx(BaseRenderer):
|
|||
# Check if result is empty
|
||||
if not result:
|
||||
self.logger.warning("AI styling returned empty response, using defaults")
|
||||
return default_styles
|
||||
return defaultStyles
|
||||
|
||||
# Extract JSON from markdown if present
|
||||
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
||||
|
|
@ -312,46 +312,46 @@ class RendererXlsx(BaseRenderer):
|
|||
styles = json.loads(result)
|
||||
except json.JSONDecodeError as json_error:
|
||||
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
|
||||
return default_styles
|
||||
return defaultStyles
|
||||
|
||||
# Convert colors to Excel aRGB format
|
||||
styles = self._convert_colors_format(styles)
|
||||
styles = self._convertColorsFormat(styles)
|
||||
|
||||
return styles
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||
return default_styles
|
||||
return defaultStyles
|
||||
|
||||
def _get_safe_color(self, color_value: str, default: str = "FF000000") -> str:
|
||||
def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
|
||||
"""Get a safe aRGB color value for Excel (without # prefix)."""
|
||||
if not isinstance(color_value, str):
|
||||
if not isinstance(colorValue, str):
|
||||
return default
|
||||
|
||||
# Remove # prefix if present
|
||||
if color_value.startswith('#'):
|
||||
color_value = color_value[1:]
|
||||
if colorValue.startswith('#'):
|
||||
colorValue = colorValue[1:]
|
||||
|
||||
if len(color_value) == 6:
|
||||
if len(colorValue) == 6:
|
||||
# Convert RRGGBB to AARRGGBB
|
||||
return f"FF{color_value}"
|
||||
elif len(color_value) == 8:
|
||||
return f"FF{colorValue}"
|
||||
elif len(colorValue) == 8:
|
||||
# Already aRGB format
|
||||
return color_value
|
||||
return colorValue
|
||||
else:
|
||||
# Unexpected format, return default
|
||||
return default
|
||||
|
||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert hex colors to aRGB format for Excel compatibility."""
|
||||
try:
|
||||
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
|
||||
for style_name, style_config in styles.items():
|
||||
if isinstance(style_config, dict):
|
||||
for prop, value in style_config.items():
|
||||
for styleName, styleConfig in styles.items():
|
||||
if isinstance(styleConfig, dict):
|
||||
for prop, value in styleConfig.items():
|
||||
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
|
||||
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
|
||||
styles[style_name][prop] = f"FF{value[1:]}"
|
||||
styles[styleName][prop] = f"FF{value[1:]}"
|
||||
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
|
||||
pass # Already aRGB format
|
||||
elif isinstance(value, str) and value.startswith('#'):
|
||||
|
|
@ -360,34 +360,34 @@ class RendererXlsx(BaseRenderer):
|
|||
except Exception as e:
|
||||
return styles
|
||||
|
||||
def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _validateExcelStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate and fix contrast issues in AI-generated styles."""
|
||||
try:
|
||||
# Fix table header contrast
|
||||
if "table_header" in styles:
|
||||
header = styles["table_header"]
|
||||
bg_color = header.get("background", "#FFFFFF")
|
||||
text_color = header.get("text_color", "#000000")
|
||||
bgColor = header.get("background", "#FFFFFF")
|
||||
textColor = header.get("text_color", "#000000")
|
||||
|
||||
# If both are white or both are dark, fix it
|
||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||||
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
||||
header["background"] = "#4F4F4F"
|
||||
header["text_color"] = "#FFFFFF"
|
||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||||
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
||||
header["background"] = "#4F4F4F"
|
||||
header["text_color"] = "#FFFFFF"
|
||||
|
||||
# Fix table cell contrast
|
||||
if "table_cell" in styles:
|
||||
cell = styles["table_cell"]
|
||||
bg_color = cell.get("background", "#FFFFFF")
|
||||
text_color = cell.get("text_color", "#000000")
|
||||
bgColor = cell.get("background", "#FFFFFF")
|
||||
textColor = cell.get("text_color", "#000000")
|
||||
|
||||
# If both are white or both are dark, fix it
|
||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
||||
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
||||
cell["background"] = "#FFFFFF"
|
||||
cell["text_color"] = "#2F2F2F"
|
||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
||||
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
||||
cell["background"] = "#FFFFFF"
|
||||
cell["text_color"] = "#2F2F2F"
|
||||
|
||||
|
|
@ -395,9 +395,9 @@ class RendererXlsx(BaseRenderer):
|
|||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||
return self._get_default_excel_styles()
|
||||
return self._getDefaultExcelStyles()
|
||||
|
||||
def _get_default_excel_styles(self) -> Dict[str, Any]:
|
||||
def _getDefaultExcelStyles(self) -> Dict[str, Any]:
|
||||
"""Default Excel styles with aRGB color format."""
|
||||
return {
|
||||
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
||||
|
|
@ -409,104 +409,104 @@ class RendererXlsx(BaseRenderer):
|
|||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
|
||||
}
|
||||
|
||||
def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Create Excel sheets based on content structure and user intent."""
|
||||
sheets = {}
|
||||
|
||||
# Get sheet names from AI styles or generate based on content
|
||||
sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
|
||||
self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheet_names}", "EXCEL_RENDERER")
|
||||
sheetNames = styles.get("sheet_names", self._generateSheetNamesFromContent(jsonContent))
|
||||
self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheetNames}", "EXCEL_RENDERER")
|
||||
|
||||
# Create sheets
|
||||
for i, sheet_name in enumerate(sheet_names):
|
||||
for i, sheetName in enumerate(sheetNames):
|
||||
if i == 0:
|
||||
# Use the default sheet for the first sheet
|
||||
sheet = wb.active
|
||||
sheet.title = sheet_name
|
||||
sheet.title = sheetName
|
||||
else:
|
||||
# Create additional sheets
|
||||
sheet = wb.create_sheet(sheet_name, i)
|
||||
sheets[sheet_name.lower()] = sheet
|
||||
sheet = wb.create_sheet(sheetName, i)
|
||||
sheets[sheetName.lower()] = sheet
|
||||
|
||||
return sheets
|
||||
|
||||
def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]:
|
||||
def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
|
||||
"""Generate sheet names based on actual content structure."""
|
||||
sections = json_content.get("sections", [])
|
||||
sections = jsonContent.get("sections", [])
|
||||
|
||||
# If no sections, create a single sheet
|
||||
if not sections:
|
||||
return ["Content"]
|
||||
|
||||
# Generate sheet names based on content structure
|
||||
sheet_names = []
|
||||
sheetNames = []
|
||||
|
||||
# Check if we have multiple table sections
|
||||
table_sections = [s for s in sections if s.get("content_type") == "table"]
|
||||
tableSections = [s for s in sections if s.get("content_type") == "table"]
|
||||
|
||||
if len(table_sections) > 1:
|
||||
if len(tableSections) > 1:
|
||||
# Create separate sheets for each table
|
||||
for i, section in enumerate(table_sections, 1):
|
||||
section_title = section.get("title", f"Table {i}")
|
||||
sheet_names.append(section_title[:31]) # Excel sheet name limit
|
||||
for i, section in enumerate(tableSections, 1):
|
||||
sectionTitle = section.get("title", f"Table {i}")
|
||||
sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
|
||||
else:
|
||||
# Single table or mixed content - create main sheet
|
||||
document_title = json_content.get("metadata", {}).get("title", "Document")
|
||||
sheet_names.append(document_title[:31]) # Excel sheet name limit
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
|
||||
sheetNames.append(documentTitle[:31]) # Excel sheet name limit
|
||||
|
||||
# Add additional sheets for other content types
|
||||
content_types = set()
|
||||
contentTypes = set()
|
||||
for section in sections:
|
||||
content_type = section.get("content_type", "paragraph")
|
||||
content_types.add(content_type)
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
contentTypes.add(contentType)
|
||||
|
||||
if "table" in content_types and len(table_sections) == 1:
|
||||
sheet_names.append("Table Data")
|
||||
if "list" in content_types:
|
||||
sheet_names.append("Lists")
|
||||
if "paragraph" in content_types or "heading" in content_types:
|
||||
sheet_names.append("Text")
|
||||
if "table" in contentTypes and len(tableSections) == 1:
|
||||
sheetNames.append("Table Data")
|
||||
if "list" in contentTypes:
|
||||
sheetNames.append("Lists")
|
||||
if "paragraph" in contentTypes or "heading" in contentTypes:
|
||||
sheetNames.append("Text")
|
||||
|
||||
# Limit to 4 sheets maximum
|
||||
return sheet_names[:4]
|
||||
return sheetNames[:4]
|
||||
|
||||
def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||
"""Populate Excel sheets with content from JSON based on actual sheet names."""
|
||||
try:
|
||||
# Get the actual sheet names that were created
|
||||
sheet_names = list(sheets.keys())
|
||||
sheetNames = list(sheets.keys())
|
||||
|
||||
if not sheet_names:
|
||||
if not sheetNames:
|
||||
return
|
||||
|
||||
sections = json_content.get("sections", [])
|
||||
table_sections = [s for s in sections if s.get("content_type") == "table"]
|
||||
sections = jsonContent.get("sections", [])
|
||||
tableSections = [s for s in sections if s.get("content_type") == "table"]
|
||||
|
||||
if len(table_sections) > 1:
|
||||
if len(tableSections) > 1:
|
||||
# Multiple tables - populate each sheet with its corresponding table
|
||||
for i, section in enumerate(table_sections):
|
||||
if i < len(sheet_names):
|
||||
sheet_name = sheet_names[i]
|
||||
sheet = sheets[sheet_name]
|
||||
self._populate_table_sheet(sheet, section, styles, f"Table {i+1}")
|
||||
for i, section in enumerate(tableSections):
|
||||
if i < len(sheetNames):
|
||||
sheetName = sheetNames[i]
|
||||
sheet = sheets[sheetName]
|
||||
self._populateTableSheet(sheet, section, styles, f"Table {i+1}")
|
||||
else:
|
||||
# Single table or mixed content - use original logic
|
||||
first_sheet_name = sheet_names[0]
|
||||
self._populate_main_sheet(sheets[first_sheet_name], json_content, styles)
|
||||
firstSheetName = sheetNames[0]
|
||||
self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
|
||||
|
||||
# If we have multiple sheets, distribute content by type
|
||||
if len(sheet_names) > 1:
|
||||
self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:])
|
||||
if len(sheetNames) > 1:
|
||||
self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
|
||||
|
||||
def _populate_table_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheet_title: str):
|
||||
def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str):
|
||||
"""Populate a sheet with a single table section."""
|
||||
try:
|
||||
# Sheet title
|
||||
sheet['A1'] = sheet_title
|
||||
sheet['A1'].font = Font(size=16, bold=True, color=self._get_safe_color(styles.get("title", {}).get("color", "FF1F4E79")))
|
||||
sheet['A1'] = sheetTitle
|
||||
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(styles.get("title", {}).get("color", "FF1F4E79")))
|
||||
sheet['A1'].alignment = Alignment(horizontal="center")
|
||||
|
||||
# Get table data from elements (canonical JSON format)
|
||||
|
|
@ -528,9 +528,9 @@ class RendererXlsx(BaseRenderer):
|
|||
for col, header in enumerate(headers, 1):
|
||||
cell = sheet.cell(row=3, column=col, value=header)
|
||||
if header_style.get("bold"):
|
||||
cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
|
||||
cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
|
||||
if header_style.get("background"):
|
||||
cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
|
||||
cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
|
||||
|
||||
# Add rows
|
||||
cell_style = styles.get("table_cell", {})
|
||||
|
|
@ -538,7 +538,7 @@ class RendererXlsx(BaseRenderer):
|
|||
for col_idx, cell_value in enumerate(row_data, 1):
|
||||
cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value)
|
||||
if cell_style.get("text_color"):
|
||||
cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
|
||||
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
|
||||
|
||||
# Auto-adjust column widths
|
||||
for col in range(1, len(headers) + 1):
|
||||
|
|
@ -547,17 +547,17 @@ class RendererXlsx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate table sheet: {str(e)}")
|
||||
|
||||
def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]):
|
||||
def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
|
||||
"""Populate the main sheet with document overview and all content."""
|
||||
try:
|
||||
# Document title
|
||||
document_title = json_content.get("metadata", {}).get("title", "Generated Report")
|
||||
sheet['A1'] = document_title
|
||||
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
|
||||
sheet['A1'] = documentTitle
|
||||
|
||||
# Safety check for title style
|
||||
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
|
||||
try:
|
||||
safe_color = self._get_safe_color(title_style["color"])
|
||||
safe_color = self._getSafeColor(title_style["color"])
|
||||
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
|
||||
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
|
||||
except Exception as font_error:
|
||||
|
|
@ -567,12 +567,12 @@ class RendererXlsx(BaseRenderer):
|
|||
|
||||
# Generation info
|
||||
sheet['A3'] = "Generated:"
|
||||
sheet['B3'] = self._format_timestamp()
|
||||
sheet['B3'] = self._formatTimestamp()
|
||||
sheet['A4'] = "Status:"
|
||||
sheet['B4'] = "Generated Successfully"
|
||||
|
||||
# Document metadata
|
||||
metadata = json_content.get("metadata", {})
|
||||
metadata = jsonContent.get("metadata", {})
|
||||
if metadata:
|
||||
sheet['A6'] = "Document Information:"
|
||||
sheet['A6'].font = Font(bold=True)
|
||||
|
|
@ -585,7 +585,7 @@ class RendererXlsx(BaseRenderer):
|
|||
row += 1
|
||||
|
||||
# Content overview
|
||||
sections = json_content.get("sections", [])
|
||||
sections = jsonContent.get("sections", [])
|
||||
sheet[f'A{row + 1}'] = "Content Overview:"
|
||||
sheet[f'A{row + 1}'].font = Font(bold=True)
|
||||
|
||||
|
|
@ -605,7 +605,7 @@ class RendererXlsx(BaseRenderer):
|
|||
# Add all content to this sheet
|
||||
row += 2
|
||||
for section in sections:
|
||||
row = self._add_section_to_sheet(sheet, section, styles, row)
|
||||
row = self._addSectionToSheet(sheet, section, styles, row)
|
||||
row += 1 # Empty row between sections
|
||||
|
||||
# Auto-adjust column widths
|
||||
|
|
@ -615,34 +615,34 @@ class RendererXlsx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate main sheet: {str(e)}")
|
||||
|
||||
def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]):
|
||||
def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
|
||||
"""Populate additional sheets based on content types."""
|
||||
try:
|
||||
sections = json_content.get("sections", [])
|
||||
sections = jsonContent.get("sections", [])
|
||||
|
||||
for sheet_name in sheet_names:
|
||||
if sheet_name not in sheets:
|
||||
for sheetName in sheetNames:
|
||||
if sheetName not in sheets:
|
||||
continue
|
||||
|
||||
sheet = sheets[sheet_name]
|
||||
sheet_title = sheet_name.title()
|
||||
sheet['A1'] = sheet_title
|
||||
sheet = sheets[sheetName]
|
||||
sheetTitle = sheetName.title()
|
||||
sheet['A1'] = sheetTitle
|
||||
sheet['A1'].font = Font(size=16, bold=True)
|
||||
|
||||
row = 3
|
||||
|
||||
# Filter sections by content type
|
||||
if sheet_name == "tables":
|
||||
if sheetName == "tables":
|
||||
filtered_sections = [s for s in sections if s.get("content_type") == "table"]
|
||||
elif sheet_name == "lists":
|
||||
elif sheetName == "lists":
|
||||
filtered_sections = [s for s in sections if s.get("content_type") == "list"]
|
||||
elif sheet_name == "text":
|
||||
elif sheetName == "text":
|
||||
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
|
||||
else:
|
||||
filtered_sections = sections
|
||||
|
||||
for section in filtered_sections:
|
||||
row = self._add_section_to_sheet(sheet, section, styles, row)
|
||||
row = self._addSectionToSheet(sheet, section, styles, row)
|
||||
row += 1 # Empty row between sections
|
||||
|
||||
# Auto-adjust column widths
|
||||
|
|
@ -652,15 +652,15 @@ class RendererXlsx(BaseRenderer):
|
|||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate content type sheets: {str(e)}")
|
||||
|
||||
def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
||||
def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a section to a sheet and return the next row."""
|
||||
try:
|
||||
# Add section title
|
||||
section_title = section.get("title")
|
||||
if section_title:
|
||||
sheet[f'A{start_row}'] = f"# {section_title}"
|
||||
sheet[f'A{start_row}'].font = Font(bold=True)
|
||||
start_row += 1
|
||||
sheet[f'A{startRow}'] = f"# {section_title}"
|
||||
sheet[f'A{startRow}'].font = Font(bold=True)
|
||||
startRow += 1
|
||||
|
||||
# Process section based on type
|
||||
section_type = section.get("content_type", "paragraph")
|
||||
|
|
@ -669,23 +669,23 @@ class RendererXlsx(BaseRenderer):
|
|||
elements = section.get("elements", [])
|
||||
for element in elements:
|
||||
if section_type == "table":
|
||||
start_row = self._add_table_to_excel(sheet, element, styles, start_row)
|
||||
startRow = self._addTableToExcel(sheet, element, styles, startRow)
|
||||
elif section_type == "list":
|
||||
start_row = self._add_list_to_excel(sheet, element, styles, start_row)
|
||||
startRow = self._addListToExcel(sheet, element, styles, startRow)
|
||||
elif section_type == "paragraph":
|
||||
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
|
||||
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
|
||||
elif section_type == "heading":
|
||||
start_row = self._add_heading_to_excel(sheet, element, styles, start_row)
|
||||
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
|
||||
else:
|
||||
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
|
||||
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
|
||||
|
||||
return start_row
|
||||
return startRow
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add section to sheet: {str(e)}")
|
||||
return start_row + 1
|
||||
return startRow + 1
|
||||
|
||||
def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
||||
def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a table element to Excel sheet."""
|
||||
try:
|
||||
# In canonical JSON format, table elements have headers and rows directly
|
||||
|
|
@ -693,99 +693,99 @@ class RendererXlsx(BaseRenderer):
|
|||
rows = element.get("rows", [])
|
||||
|
||||
if not headers and not rows:
|
||||
return start_row
|
||||
return startRow
|
||||
|
||||
# Add headers
|
||||
header_style = styles.get("table_header", {})
|
||||
for col, header in enumerate(headers, 1):
|
||||
cell = sheet.cell(row=start_row, column=col, value=header)
|
||||
cell = sheet.cell(row=startRow, column=col, value=header)
|
||||
if header_style.get("bold"):
|
||||
cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
|
||||
cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
|
||||
if header_style.get("background"):
|
||||
cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
|
||||
cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
|
||||
|
||||
start_row += 1
|
||||
startRow += 1
|
||||
|
||||
# Add rows
|
||||
cell_style = styles.get("table_cell", {})
|
||||
for row_data in rows:
|
||||
for col, cell_value in enumerate(row_data, 1):
|
||||
cell = sheet.cell(row=start_row, column=col, value=cell_value)
|
||||
cell = sheet.cell(row=startRow, column=col, value=cell_value)
|
||||
if cell_style.get("text_color"):
|
||||
cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
|
||||
start_row += 1
|
||||
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
|
||||
startRow += 1
|
||||
|
||||
return start_row
|
||||
return startRow
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add table to Excel: {str(e)}")
|
||||
return start_row + 1
|
||||
return startRow + 1
|
||||
|
||||
def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
||||
def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a list element to Excel sheet."""
|
||||
try:
|
||||
list_items = element.get("items", [])
|
||||
|
||||
list_style = styles.get("bullet_list", {})
|
||||
for item in list_items:
|
||||
sheet.cell(row=start_row, column=1, value=f"• {item}")
|
||||
sheet.cell(row=startRow, column=1, value=f"• {item}")
|
||||
if list_style.get("color"):
|
||||
sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(list_style["color"]))
|
||||
start_row += 1
|
||||
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
|
||||
startRow += 1
|
||||
|
||||
return start_row
|
||||
return startRow
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add list to Excel: {str(e)}")
|
||||
return start_row + 1
|
||||
return startRow + 1
|
||||
|
||||
def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
||||
def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a paragraph element to Excel sheet."""
|
||||
try:
|
||||
text = element.get("text", "")
|
||||
if text:
|
||||
sheet.cell(row=start_row, column=1, value=text)
|
||||
sheet.cell(row=startRow, column=1, value=text)
|
||||
|
||||
paragraph_style = styles.get("paragraph", {})
|
||||
if paragraph_style.get("color"):
|
||||
sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(paragraph_style["color"]))
|
||||
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"]))
|
||||
|
||||
start_row += 1
|
||||
startRow += 1
|
||||
|
||||
return start_row
|
||||
return startRow
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
|
||||
return start_row + 1
|
||||
return startRow + 1
|
||||
|
||||
def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
||||
def _addHeadingToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||
"""Add a heading element to Excel sheet."""
|
||||
try:
|
||||
text = element.get("text", "")
|
||||
level = element.get("level", 1)
|
||||
|
||||
if text:
|
||||
sheet.cell(row=start_row, column=1, value=text)
|
||||
sheet.cell(row=startRow, column=1, value=text)
|
||||
|
||||
heading_style = styles.get("heading", {})
|
||||
font_size = heading_style.get("font_size", 14)
|
||||
if level > 1:
|
||||
font_size = max(10, font_size - (level - 1) * 2)
|
||||
|
||||
sheet.cell(row=start_row, column=1).font = Font(
|
||||
sheet.cell(row=startRow, column=1).font = Font(
|
||||
size=font_size,
|
||||
bold=True,
|
||||
color=self._get_safe_color(heading_style.get("color", "FF000000"))
|
||||
color=self._getSafeColor(heading_style.get("color", "FF000000"))
|
||||
)
|
||||
|
||||
start_row += 1
|
||||
startRow += 1
|
||||
|
||||
return start_row
|
||||
return startRow
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
|
||||
return start_row + 1
|
||||
return startRow + 1
|
||||
|
||||
def _format_timestamp(self) -> str:
|
||||
def _formatTimestamp(self) -> str:
|
||||
"""Format current timestamp for document generation."""
|
||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
|
|
|||
|
|
@ -1,25 +1,32 @@
|
|||
"""
|
||||
JSON Schema definitions for AI-generated document structures.
|
||||
This module provides schemas that guide AI to generate structured JSON output.
|
||||
JSON Schema definitions for AI-generated document structures (unified).
|
||||
This module provides schemas that guide AI to generate structured JSON output
|
||||
that matches the master template in modules.datamodels.datamodelJson.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any
|
||||
|
||||
|
||||
def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
||||
"""Get the JSON schema for multi-document generation."""
|
||||
def getMultiDocumentSchema() -> Dict[str, Any]:
|
||||
"""Get the JSON schema for multi-document generation (unified)."""
|
||||
return {
|
||||
"type": "object",
|
||||
"required": ["metadata", "documents"],
|
||||
"properties": {
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"required": ["title", "split_strategy"],
|
||||
"required": ["split_strategy"],
|
||||
"properties": {
|
||||
"title": {"type": "string", "description": "Document title"},
|
||||
"split_strategy": {
|
||||
"type": "string",
|
||||
"enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
|
||||
"enum": [
|
||||
"single_document",
|
||||
"per_entity",
|
||||
"by_section",
|
||||
"by_criteria",
|
||||
"by_data_type",
|
||||
"custom"
|
||||
],
|
||||
"description": "Strategy for splitting content into multiple files"
|
||||
},
|
||||
"splitCriteria": {
|
||||
|
|
@ -30,7 +37,6 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
|||
"type": "string",
|
||||
"description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')"
|
||||
},
|
||||
"author": {"type": "string", "description": "Document author (optional)"},
|
||||
"source_documents": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
|
|
@ -38,7 +44,7 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
|||
},
|
||||
"extraction_method": {
|
||||
"type": "string",
|
||||
"default": "ai_extraction",
|
||||
"default": "ai_generation",
|
||||
"description": "Method used for extraction"
|
||||
}
|
||||
}
|
||||
|
|
@ -64,7 +70,15 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
|||
"title": {"type": "string", "description": "Section title (optional)"},
|
||||
"content_type": {
|
||||
"type": "string",
|
||||
"enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
|
||||
"enum": [
|
||||
"table",
|
||||
"bullet_list",
|
||||
"paragraph",
|
||||
"heading",
|
||||
"code_block",
|
||||
"image",
|
||||
"mixed"
|
||||
],
|
||||
"description": "Primary content type of this section"
|
||||
},
|
||||
"elements": {
|
||||
|
|
@ -76,7 +90,8 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
|||
{"$ref": "#/definitions/bullet_list"},
|
||||
{"$ref": "#/definitions/paragraph"},
|
||||
{"$ref": "#/definitions/heading"},
|
||||
{"$ref": "#/definitions/code_block"}
|
||||
{"$ref": "#/definitions/code_block"},
|
||||
{"$ref": "#/definitions/image"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
|
@ -191,11 +206,20 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
|||
"code": {"type": "string", "description": "Code content"},
|
||||
"language": {"type": "string", "description": "Programming language (optional)"}
|
||||
}
|
||||
},
|
||||
"image": {
|
||||
"type": "object",
|
||||
"required": ["url"],
|
||||
"properties": {
|
||||
"url": {"type": "string", "description": "Image URL or data URI"},
|
||||
"caption": {"type": "string", "description": "Image caption (optional)"},
|
||||
"alt": {"type": "string", "description": "Alt text (optional)"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_document_subJsonSchema() -> Dict[str, Any]:
|
||||
def getDocumentSchema() -> Dict[str, Any]:
|
||||
"""Get the JSON schema for structured document generation (single document)."""
|
||||
return {
|
||||
"type": "object",
|
||||
|
|
@ -206,7 +230,6 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
|||
"required": ["title"],
|
||||
"properties": {
|
||||
"title": {"type": "string", "description": "Document title"},
|
||||
"author": {"type": "string", "description": "Document author (optional)"},
|
||||
"source_documents": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
|
|
@ -214,7 +237,7 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
|||
},
|
||||
"extraction_method": {
|
||||
"type": "string",
|
||||
"default": "ai_extraction",
|
||||
"default": "ai_generation",
|
||||
"description": "Method used for extraction"
|
||||
}
|
||||
}
|
||||
|
|
@ -230,7 +253,15 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
|||
"title": {"type": "string", "description": "Section title (optional)"},
|
||||
"content_type": {
|
||||
"type": "string",
|
||||
"enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
|
||||
"enum": [
|
||||
"table",
|
||||
"bullet_list",
|
||||
"paragraph",
|
||||
"heading",
|
||||
"code_block",
|
||||
"image",
|
||||
"mixed"
|
||||
],
|
||||
"description": "Primary content type of this section"
|
||||
},
|
||||
"elements": {
|
||||
|
|
@ -242,7 +273,8 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
|||
{"$ref": "#/definitions/bullet_list"},
|
||||
{"$ref": "#/definitions/paragraph"},
|
||||
{"$ref": "#/definitions/heading"},
|
||||
{"$ref": "#/definitions/code_block"}
|
||||
{"$ref": "#/definitions/code_block"},
|
||||
{"$ref": "#/definitions/image"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
|
@ -359,12 +391,21 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
|||
"code": {"type": "string", "description": "Code content"},
|
||||
"language": {"type": "string", "description": "Programming language (optional)"}
|
||||
}
|
||||
},
|
||||
"image": {
|
||||
"type": "object",
|
||||
"required": ["url"],
|
||||
"properties": {
|
||||
"url": {"type": "string", "description": "Image URL or data URI"},
|
||||
"caption": {"type": "string", "description": "Image caption (optional)"},
|
||||
"alt": {"type": "string", "description": "Alt text (optional)"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def get_extraction_prompt_template() -> str:
|
||||
def getExtractionPromptTemplate() -> str:
|
||||
"""Get the template for AI extraction prompts that request JSON output."""
|
||||
return """
|
||||
You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document.
|
||||
|
|
@ -390,7 +431,7 @@ Return only the JSON structure following the schema. Do not include any text bef
|
|||
"""
|
||||
|
||||
|
||||
def get_generation_prompt_template() -> str:
|
||||
def getGenerationPromptTemplate() -> str:
|
||||
"""Get the template for AI generation prompts that work with JSON input."""
|
||||
return """
|
||||
You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content.
|
||||
|
|
@ -416,31 +457,31 @@ Return only the enhanced JSON structure following the schema. Do not include any
|
|||
"""
|
||||
|
||||
|
||||
def get_adaptive_json_schema(prompt_analysis: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
def getAdaptiveJsonSchema(promptAnalysis: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""Automatically select appropriate schema based on prompt analysis."""
|
||||
if prompt_analysis and prompt_analysis.get("is_multi_file", False):
|
||||
return get_multi_document_subJsonSchema()
|
||||
if promptAnalysis and promptAnalysis.get("is_multi_file", False):
|
||||
return getMultiDocumentSchema()
|
||||
else:
|
||||
return get_document_subJsonSchema()
|
||||
return getDocumentSchema()
|
||||
|
||||
def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
||||
"""Validate that the JSON data follows the document schema."""
|
||||
def validateJsonDocument(jsonData: Dict[str, Any]) -> bool:
|
||||
"""Validate that the JSON data follows the unified document schema."""
|
||||
try:
|
||||
# Basic validation - check required fields
|
||||
if not isinstance(json_data, dict):
|
||||
if not isinstance(jsonData, dict):
|
||||
return False
|
||||
|
||||
# Check if it's multi-document or single-document structure
|
||||
if "documents" in json_data:
|
||||
if "documents" in jsonData:
|
||||
# Multi-document structure
|
||||
if "metadata" not in json_data:
|
||||
if "metadata" not in jsonData:
|
||||
return False
|
||||
|
||||
metadata = json_data["metadata"]
|
||||
if not isinstance(metadata, dict) or "title" not in metadata or "split_strategy" not in metadata:
|
||||
metadata = jsonData["metadata"]
|
||||
if not isinstance(metadata, dict) or "split_strategy" not in metadata:
|
||||
return False
|
||||
|
||||
documents = json_data["documents"]
|
||||
documents = jsonData["documents"]
|
||||
if not isinstance(documents, list):
|
||||
return False
|
||||
|
||||
|
|
@ -469,7 +510,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
|||
return False
|
||||
|
||||
# Validate content_type
|
||||
valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
|
||||
valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
|
||||
if section["content_type"] not in valid_types:
|
||||
return False
|
||||
|
||||
|
|
@ -477,16 +518,16 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
|||
if not isinstance(section["elements"], list):
|
||||
return False
|
||||
|
||||
elif "sections" in json_data:
|
||||
elif "sections" in jsonData:
|
||||
# Single-document structure (existing validation)
|
||||
if "metadata" not in json_data:
|
||||
if "metadata" not in jsonData:
|
||||
return False
|
||||
|
||||
metadata = json_data["metadata"]
|
||||
metadata = jsonData["metadata"]
|
||||
if not isinstance(metadata, dict) or "title" not in metadata:
|
||||
return False
|
||||
|
||||
sections = json_data["sections"]
|
||||
sections = jsonData["sections"]
|
||||
if not isinstance(sections, list):
|
||||
return False
|
||||
|
||||
|
|
@ -501,7 +542,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
|||
return False
|
||||
|
||||
# Validate content_type
|
||||
valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
|
||||
valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
|
||||
if section["content_type"] not in valid_types:
|
||||
return False
|
||||
|
||||
|
|
|
|||
|
|
@ -5,83 +5,10 @@ This module builds prompts for generating documents from extracted content.
|
|||
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
from modules.datamodels.datamodelJson import jsonTemplateDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Centralized JSON structure template for document generation
|
||||
# Includes examples for all content types so AI knows the structure patterns
|
||||
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_1",
|
||||
"title": "{{DOCUMENT_TITLE}}",
|
||||
"filename": "document.json",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_heading_example",
|
||||
"content_type": "heading",
|
||||
"elements": [
|
||||
{"level": 1, "text": "Heading Text"}
|
||||
],
|
||||
"order": 0
|
||||
},
|
||||
{
|
||||
"id": "section_paragraph_example",
|
||||
"content_type": "paragraph",
|
||||
"elements": [
|
||||
{"text": "Paragraph text content"}
|
||||
],
|
||||
"order": 0
|
||||
},
|
||||
{
|
||||
"id": "section_list_example",
|
||||
"content_type": "list",
|
||||
"elements": [
|
||||
{
|
||||
"items": [
|
||||
{"text": "Item 1"},
|
||||
{"text": "Item 2"}
|
||||
],
|
||||
"list_type": "numbered"
|
||||
}
|
||||
],
|
||||
"order": 0
|
||||
},
|
||||
{
|
||||
"id": "section_table_example",
|
||||
"content_type": "table",
|
||||
"elements": [
|
||||
{
|
||||
"headers": ["Column 1", "Column 2"],
|
||||
"rows": [
|
||||
["Row 1 Col 1", "Row 1 Col 2"],
|
||||
["Row 2 Col 1", "Row 2 Col 2"]
|
||||
],
|
||||
"caption": "Table caption"
|
||||
}
|
||||
],
|
||||
"order": 0
|
||||
},
|
||||
{
|
||||
"id": "section_code_example",
|
||||
"content_type": "code",
|
||||
"elements": [
|
||||
{
|
||||
"code": "function example() { return true; }",
|
||||
"language": "javascript"
|
||||
}
|
||||
],
|
||||
"order": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
|
||||
async def buildGenerationPrompt(
|
||||
|
|
@ -106,99 +33,101 @@ async def buildGenerationPrompt(
|
|||
Complete generation prompt string
|
||||
"""
|
||||
# Create a template - let AI generate title if not provided
|
||||
title_value = title if title else "Generated Document"
|
||||
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
|
||||
titleValue = title if title else "Generated Document"
|
||||
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
|
||||
|
||||
# Build prompt based on whether this is a continuation or first call
|
||||
# Check if we have valid continuation context with actual JSON fragment
|
||||
has_continuation = (
|
||||
hasContinuation = (
|
||||
continuationContext
|
||||
and continuationContext.get("section_count", 0) > 0
|
||||
and continuationContext.get("last_raw_json", "")
|
||||
and continuationContext.get("last_raw_json", "").strip() != "{}"
|
||||
)
|
||||
|
||||
if has_continuation:
|
||||
if hasContinuation:
|
||||
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
|
||||
last_raw_json = continuationContext.get("last_raw_json", "")
|
||||
last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
|
||||
last_items_from_fragment = continuationContext.get("last_items_from_fragment", "")
|
||||
total_items_count = continuationContext.get("total_items_count", 0)
|
||||
lastRawJson = continuationContext.get("last_raw_json", "")
|
||||
lastItemObject = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
|
||||
lastItemsFromFragment = continuationContext.get("last_items_from_fragment", "")
|
||||
totalItemsCount = continuationContext.get("total_items_count", 0)
|
||||
|
||||
# Show the last few items to indicate where to continue (limit fragment size)
|
||||
# Extract just the ending portion of the JSON to show where it cut off
|
||||
fragment_snippet = ""
|
||||
if last_raw_json:
|
||||
fragmentSnippet = ""
|
||||
if lastRawJson:
|
||||
# Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
|
||||
fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json
|
||||
fragmentSnippet = lastRawJson[-1500:] if len(lastRawJson) > 1500 else lastRawJson
|
||||
# Add ellipsis if truncated
|
||||
if len(last_raw_json) > 1500:
|
||||
fragment_snippet = "..." + fragment_snippet
|
||||
if len(lastRawJson) > 1500:
|
||||
fragmentSnippet = "..." + fragmentSnippet
|
||||
|
||||
# Build clear continuation guidance
|
||||
continuation_guidance = []
|
||||
continuationGuidance = []
|
||||
|
||||
if total_items_count > 0:
|
||||
continuation_guidance.append(f"You have already generated {total_items_count} items.")
|
||||
if totalItemsCount > 0:
|
||||
continuationGuidance.append(f"You have already generated {totalItemsCount} items.")
|
||||
|
||||
# Show the last complete item object (full object format)
|
||||
if last_item_object:
|
||||
continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.")
|
||||
if lastItemObject:
|
||||
continuationGuidance.append(f"Last item in previous response: {lastItemObject}. Continue with the NEXT item after this.")
|
||||
|
||||
continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped."
|
||||
continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."
|
||||
|
||||
generation_prompt = f"""User request: "{userPrompt}"
|
||||
generationPrompt = f"""User request: "{userPrompt}"
|
||||
|
||||
The user already received part of the response. Continue generating the remaining content.
|
||||
|
||||
{continuation_text}
|
||||
{continuationText}
|
||||
|
||||
Previous response ended here (JSON was cut off at this point):
|
||||
```json
|
||||
{fragment_snippet if fragment_snippet else "(No fragment available)"}
|
||||
{fragmentSnippet if fragmentSnippet else "(No fragment available)"}
|
||||
```
|
||||
|
||||
JSON structure template:
|
||||
{json_template}
|
||||
{jsonTemplate}
|
||||
|
||||
Instructions:
|
||||
- Return full JSON structure (metadata + documents + sections)
|
||||
- Continue from where it stopped - add NEW items only, do not repeat old items
|
||||
- Use the element structures shown in the template
|
||||
- Generate all remaining content needed to complete the user request
|
||||
- Fill with actual content (no comments, no "Add more..." text, no placeholders)
|
||||
- When fully complete, add "complete_response": true at root level
|
||||
- Return only valid JSON (no comments, no markdown blocks)
|
||||
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
|
||||
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
|
||||
- Use ONLY the element structures shown in the template.
|
||||
- Continue from where it stopped — add NEW items only; do not repeat existing items.
|
||||
- Generate all remaining content needed to complete the user request.
|
||||
- Fill with actual content (no placeholders or instructional text such as "Add more...").
|
||||
- When fully complete, add "complete_response": true at root level.
|
||||
- Output JSON only; no markdown fences or extra text before/after.
|
||||
|
||||
Continue generating:
|
||||
"""
|
||||
else:
|
||||
# FIRST CALL - initial generation
|
||||
generation_prompt = f"""User request: "{userPrompt}"
|
||||
generationPrompt = f"""User request: "{userPrompt}"
|
||||
|
||||
Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
|
||||
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
|
||||
|
||||
JSON structure template (reference only - shows the pattern):
|
||||
{json_template}
|
||||
JSON structure template:
|
||||
{jsonTemplate}
|
||||
|
||||
Instructions:
|
||||
- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning
|
||||
- Do NOT continue from the template examples above - create your own sections
|
||||
- Generate complete content based on the user request
|
||||
- Use the element structures shown in the template (heading, paragraph, list, table, code)
|
||||
- Create your own section IDs (do not use the example IDs like "section_heading_example")
|
||||
- When fully complete, add "complete_response": true at root level
|
||||
- Return only valid JSON (no comments, no markdown blocks, no text before/after)
|
||||
- Start your response with {{"metadata": ...}} — return COMPLETE, STRICT JSON.
|
||||
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
|
||||
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
|
||||
- Do NOT reuse the example section IDs from the template; create your own.
|
||||
- Use ONLY the element structures shown in the template.
|
||||
- Generate complete content based on the user request.
|
||||
- When fully complete, add "complete_response": true at root level.
|
||||
- Output JSON only; no markdown fences or any additional text.
|
||||
|
||||
Generate your complete response starting from {{"metadata": ...}}:
|
||||
"""
|
||||
|
||||
# If we have extracted content, prepend it to the prompt
|
||||
if extracted_content:
|
||||
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
||||
generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
||||
{extracted_content}
|
||||
|
||||
{generation_prompt}"""
|
||||
{generationPrompt}"""
|
||||
|
||||
return generation_prompt.strip()
|
||||
return generationPrompt.strip()
|
||||
|
||||
|
|
|
|||
|
|
@ -152,11 +152,11 @@ class NeutralizationService:
|
|||
try:
|
||||
# Auto-detect content type if not provided
|
||||
if textType is None:
|
||||
textType = self.commonUtils.detect_content_type(text)
|
||||
textType = self.commonUtils.detectContentType(text)
|
||||
|
||||
# Check if content is binary data
|
||||
if self.binaryProcessor.is_binary_content(text):
|
||||
data, mapping, replaced_fields, processed_info = self.binaryProcessor.process_binary_content(text)
|
||||
if self.binaryProcessor.isBinaryContent(text):
|
||||
data, mapping, replaced_fields, processed_info = self.binaryProcessor.processBinaryContent(text)
|
||||
neutralized_text = text if isinstance(data, str) else str(data)
|
||||
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
|
||||
return NeutralizationResult(
|
||||
|
|
@ -169,13 +169,13 @@ class NeutralizationService:
|
|||
# Inline former _processData routing
|
||||
if textType in ['csv', 'json', 'xml']:
|
||||
if textType == 'csv':
|
||||
data, mapping, replaced_fields, processed_info = self.listProcessor.process_csv_content(text)
|
||||
data, mapping, replaced_fields, processed_info = self.listProcessor.processCsvContent(text)
|
||||
elif textType == 'json':
|
||||
data, mapping, replaced_fields, processed_info = self.listProcessor.process_json_content(text)
|
||||
data, mapping, replaced_fields, processed_info = self.listProcessor.processJsonContent(text)
|
||||
else: # xml
|
||||
data, mapping, replaced_fields, processed_info = self.listProcessor.process_xml_content(text)
|
||||
data, mapping, replaced_fields, processed_info = self.listProcessor.processXmlContent(text)
|
||||
else:
|
||||
data, mapping, replaced_fields, processed_info = self.textProcessor.process_text_content(text)
|
||||
data, mapping, replaced_fields, processed_info = self.textProcessor.processTextContent(text)
|
||||
# Stringify data consistently
|
||||
if textType == 'csv':
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ Handles pattern matching and replacement for emails, phones, addresses, IDs and
|
|||
import re
|
||||
import uuid
|
||||
from typing import Dict, List, Tuple, Any
|
||||
from modules.services.serviceNeutralization.subPatterns import DataPatterns, find_patterns_in_text
|
||||
from modules.services.serviceNeutralization.subPatterns import DataPatterns, findPatternsInText
|
||||
|
||||
class StringParser:
|
||||
"""Handles string parsing and replacement operations"""
|
||||
|
|
@ -22,7 +22,7 @@ class StringParser:
|
|||
self.NamesToParse = NamesToParse or []
|
||||
self.mapping = {}
|
||||
|
||||
def is_placeholder(self, text: str) -> bool:
|
||||
def _isPlaceholder(self, text: str) -> bool:
|
||||
"""
|
||||
Check if text is already a placeholder in format [tag.uuid]
|
||||
|
||||
|
|
@ -34,7 +34,7 @@ class StringParser:
|
|||
"""
|
||||
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text))
|
||||
|
||||
def replace_pattern_matches(self, text: str) -> str:
|
||||
def _replacePatternMatches(self, text: str) -> str:
|
||||
"""
|
||||
Replace pattern-based matches (emails, phones, etc.) in text
|
||||
|
||||
|
|
@ -44,37 +44,37 @@ class StringParser:
|
|||
Returns:
|
||||
str: Text with pattern matches replaced
|
||||
"""
|
||||
pattern_matches = find_patterns_in_text(text, self.data_patterns)
|
||||
patternMatches = findPatternsInText(text, self.data_patterns)
|
||||
|
||||
# Process pattern matches from right to left to avoid position shifts
|
||||
for pattern_name, matched_text, start, end in reversed(pattern_matches):
|
||||
for patternName, matchedText, start, end in reversed(patternMatches):
|
||||
# Skip if already a placeholder
|
||||
if self.is_placeholder(matched_text):
|
||||
if self._isPlaceholder(matchedText):
|
||||
continue
|
||||
|
||||
# Skip if contains placeholder characters
|
||||
if '[' in matched_text or ']' in matched_text:
|
||||
if '[' in matchedText or ']' in matchedText:
|
||||
continue
|
||||
|
||||
if matched_text not in self.mapping:
|
||||
if matchedText not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
placeholderId = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
typeMapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
placeholderType = typeMapping.get(patternName, 'data')
|
||||
self.mapping[matchedText] = f"[{placeholderType}.{placeholderId}]"
|
||||
|
||||
replacement = self.mapping[matched_text]
|
||||
replacement = self.mapping[matchedText]
|
||||
text = text[:start] + replacement + text[end:]
|
||||
|
||||
return text
|
||||
|
||||
def replace_custom_names(self, text: str) -> str:
|
||||
def _replaceCustomNames(self, text: str) -> str:
|
||||
"""
|
||||
Replace custom names from the user list in text
|
||||
|
||||
|
|
@ -96,19 +96,19 @@ class StringParser:
|
|||
|
||||
# Replace each match with a placeholder
|
||||
for match in reversed(matches): # Process from right to left to avoid position shifts
|
||||
matched_text = match.group()
|
||||
if matched_text not in self.mapping:
|
||||
matchedText = match.group()
|
||||
if matchedText not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
self.mapping[matched_text] = f"[name.{placeholder_id}]"
|
||||
placeholderId = str(uuid.uuid4())
|
||||
self.mapping[matchedText] = f"[name.{placeholderId}]"
|
||||
|
||||
replacement = self.mapping[matched_text]
|
||||
replacement = self.mapping[matchedText]
|
||||
start, end = match.span()
|
||||
text = text[:start] + replacement + text[end:]
|
||||
|
||||
return text
|
||||
|
||||
def process_string(self, text: str) -> str:
|
||||
def processString(self, text: str) -> str:
|
||||
"""
|
||||
Process a string by replacing patterns first, then custom names
|
||||
|
||||
|
|
@ -118,18 +118,18 @@ class StringParser:
|
|||
Returns:
|
||||
str: Processed text with replacements
|
||||
"""
|
||||
if self.is_placeholder(text):
|
||||
if self._isPlaceholder(text):
|
||||
return text
|
||||
|
||||
# Step 1: Replace pattern-based matches FIRST
|
||||
text = self.replace_pattern_matches(text)
|
||||
text = self._replacePatternMatches(text)
|
||||
|
||||
# Step 2: Replace custom names SECOND
|
||||
text = self.replace_custom_names(text)
|
||||
text = self._replaceCustomNames(text)
|
||||
|
||||
return text
|
||||
|
||||
def process_json_value(self, value: Any) -> Any:
|
||||
def processJsonValue(self, value: Any) -> Any:
|
||||
"""
|
||||
Process a JSON value for anonymization
|
||||
|
||||
|
|
@ -140,15 +140,15 @@ class StringParser:
|
|||
Any: Processed value
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
return self.process_string(value)
|
||||
return self.processString(value)
|
||||
elif isinstance(value, dict):
|
||||
return {k: self.process_json_value(v) for k, v in value.items()}
|
||||
return {k: self.processJsonValue(v) for k, v in value.items()}
|
||||
elif isinstance(value, list):
|
||||
return [self.process_json_value(item) for item in value]
|
||||
return [self.processJsonValue(item) for item in value]
|
||||
else:
|
||||
return value
|
||||
|
||||
def get_mapping(self) -> Dict[str, str]:
|
||||
def getMapping(self) -> Dict[str, str]:
|
||||
"""
|
||||
Get the current mapping of original values to placeholders
|
||||
|
||||
|
|
@ -157,6 +157,6 @@ class StringParser:
|
|||
"""
|
||||
return self.mapping.copy()
|
||||
|
||||
def clear_mapping(self):
|
||||
def clearMapping(self):
|
||||
"""Clear the current mapping"""
|
||||
self.mapping.clear()
|
||||
|
|
|
|||
|
|
@ -316,7 +316,7 @@ class TextTablePatterns:
|
|||
"""Patterns for identifying table-like structures in text"""
|
||||
|
||||
@staticmethod
|
||||
def get_patterns() -> List[Tuple[str, str]]:
|
||||
def getPatterns() -> List[Tuple[str, str]]:
|
||||
return [
|
||||
# key: value pattern (with optional whitespace)
|
||||
(r'^([^:]+):\s*(.+)$', ':'),
|
||||
|
|
@ -329,15 +329,15 @@ class TextTablePatterns:
|
|||
]
|
||||
|
||||
@staticmethod
|
||||
def is_table_line(line: str) -> bool:
|
||||
def _isTableLine(line: str) -> bool:
|
||||
"""Check if a line matches any table pattern"""
|
||||
patterns = TextTablePatterns.get_patterns()
|
||||
patterns = TextTablePatterns.getPatterns()
|
||||
return any(re.match(pattern[0], line.strip()) for pattern in patterns)
|
||||
|
||||
@staticmethod
|
||||
def extract_key_value(line: str) -> Optional[Tuple[str, str]]:
|
||||
def extractKeyValue(line: str) -> Optional[Tuple[str, str]]:
|
||||
"""Extract key and value from a table line"""
|
||||
patterns = TextTablePatterns.get_patterns()
|
||||
patterns = TextTablePatterns.getPatterns()
|
||||
for pattern, separator in patterns:
|
||||
match = re.match(pattern, line.strip())
|
||||
if match:
|
||||
|
|
@ -346,7 +346,7 @@ class TextTablePatterns:
|
|||
return key, value
|
||||
return None
|
||||
|
||||
def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
|
||||
def getPatternForHeader(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
|
||||
"""
|
||||
Find matching pattern for a header
|
||||
|
||||
|
|
@ -368,7 +368,7 @@ def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pat
|
|||
return pattern
|
||||
return None
|
||||
|
||||
def find_patterns_in_text(text: str, patterns: List[Pattern]) -> List[tuple]:
|
||||
def findPatternsInText(text: str, patterns: List[Pattern]) -> List[tuple]:
|
||||
"""
|
||||
Find all pattern matches in text
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class BinaryProcessor:
|
|||
'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx']
|
||||
}
|
||||
|
||||
def detect_binary_type(self, content: str) -> str:
|
||||
def _detectBinaryType(self, content: str) -> str:
|
||||
"""
|
||||
Detect if content is binary data and determine type
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ class BinaryProcessor:
|
|||
|
||||
return 'text'
|
||||
|
||||
def is_binary_content(self, content: str) -> bool:
|
||||
def isBinaryContent(self, content: str) -> bool:
|
||||
"""
|
||||
Check if content is binary data
|
||||
|
||||
|
|
@ -64,9 +64,9 @@ class BinaryProcessor:
|
|||
Returns:
|
||||
bool: True if content is binary
|
||||
"""
|
||||
return self.detect_binary_type(content) == 'binary'
|
||||
return self._detectBinaryType(content) == 'binary'
|
||||
|
||||
def process_binary_content(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
|
||||
def processBinaryContent(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
|
||||
"""
|
||||
Process binary content for anonymization
|
||||
|
||||
|
|
@ -83,15 +83,15 @@ class BinaryProcessor:
|
|||
# 3. Handling metadata and embedded content
|
||||
# 4. Preserving binary integrity while removing sensitive data
|
||||
|
||||
processed_info = {
|
||||
processedInfo = {
|
||||
'type': 'binary',
|
||||
'status': 'not_implemented',
|
||||
'message': 'Binary data neutralization not yet implemented'
|
||||
}
|
||||
|
||||
return content, {}, [], processed_info
|
||||
return content, {}, [], processedInfo
|
||||
|
||||
def get_supported_types(self) -> Dict[str, list]:
|
||||
def getSupportedTypes(self) -> Dict[str, list]:
|
||||
"""
|
||||
Get list of supported binary file types
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class CommonUtils:
|
|||
"""Common utility functions for data processing"""
|
||||
|
||||
@staticmethod
|
||||
def normalize_whitespace(text: str) -> str:
|
||||
def normalizeWhitespace(text: str) -> str:
|
||||
"""
|
||||
Normalize whitespace in text
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ class CommonUtils:
|
|||
return text.strip()
|
||||
|
||||
@staticmethod
|
||||
def is_table_line(line: str) -> bool:
|
||||
def _isTableLine(line: str) -> bool:
|
||||
"""
|
||||
Check if a line represents a table row
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ class CommonUtils:
|
|||
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
|
||||
|
||||
@staticmethod
|
||||
def detect_content_type(content: str) -> str:
|
||||
def detectContentType(content: str) -> str:
|
||||
"""
|
||||
Detect the type of content based on its structure
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ class CommonUtils:
|
|||
return 'text'
|
||||
|
||||
@staticmethod
|
||||
def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]:
|
||||
def mergeMappings(*mappings: Dict[str, str]) -> Dict[str, str]:
|
||||
"""
|
||||
Merge multiple mapping dictionaries
|
||||
|
||||
|
|
@ -114,21 +114,21 @@ class CommonUtils:
|
|||
return merged
|
||||
|
||||
@staticmethod
|
||||
def create_placeholder(placeholder_type: str, placeholder_id: str) -> str:
|
||||
def createPlaceholder(placeholderType: str, placeholderId: str) -> str:
|
||||
"""
|
||||
Create a placeholder string in the format [type.uuid]
|
||||
|
||||
Args:
|
||||
placeholder_type: Type of placeholder (email, phone, name, etc.)
|
||||
placeholder_id: Unique identifier for the placeholder
|
||||
placeholderType: Type of placeholder (email, phone, name, etc.)
|
||||
placeholderId: Unique identifier for the placeholder
|
||||
|
||||
Returns:
|
||||
str: Formatted placeholder string
|
||||
"""
|
||||
return f"[{placeholder_type}.{placeholder_id}]"
|
||||
return f"[{placeholderType}.{placeholderId}]"
|
||||
|
||||
@staticmethod
|
||||
def validate_placeholder(placeholder: str) -> bool:
|
||||
def validatePlaceholder(placeholder: str) -> bool:
|
||||
"""
|
||||
Validate if a string is a valid placeholder
|
||||
|
||||
|
|
@ -141,7 +141,7 @@ class CommonUtils:
|
|||
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder))
|
||||
|
||||
@staticmethod
|
||||
def extract_placeholder_info(placeholder: str) -> Optional[tuple]:
|
||||
def extractPlaceholderInfo(placeholder: str) -> Optional[tuple]:
|
||||
"""
|
||||
Extract type and ID from a placeholder
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from typing import Dict, List, Any, Union
|
|||
from dataclasses import dataclass
|
||||
from io import StringIO
|
||||
from modules.services.serviceNeutralization.subParseString import StringParser
|
||||
from modules.services.serviceNeutralization.subPatterns import get_pattern_for_header, HeaderPatterns
|
||||
from modules.services.serviceNeutralization.subPatterns import getPatternForHeader, HeaderPatterns
|
||||
|
||||
@dataclass
|
||||
class TableData:
|
||||
|
|
@ -32,7 +32,7 @@ class ListProcessor:
|
|||
self.string_parser = StringParser(NamesToParse)
|
||||
self.header_patterns = HeaderPatterns.patterns
|
||||
|
||||
def anonymize_table(self, table: TableData) -> TableData:
|
||||
def _anonymizeTable(self, table: TableData) -> TableData:
|
||||
"""
|
||||
Anonymize table data based on headers
|
||||
|
||||
|
|
@ -42,28 +42,28 @@ class ListProcessor:
|
|||
Returns:
|
||||
TableData: Anonymized table
|
||||
"""
|
||||
anonymized_table = TableData(
|
||||
anonymizedTable = TableData(
|
||||
headers=table.headers.copy(),
|
||||
rows=[row.copy() for row in table.rows],
|
||||
source_type=table.source_type
|
||||
)
|
||||
|
||||
for i, header in enumerate(anonymized_table.headers):
|
||||
pattern = get_pattern_for_header(header, self.header_patterns)
|
||||
for i, header in enumerate(anonymizedTable.headers):
|
||||
pattern = getPatternForHeader(header, self.header_patterns)
|
||||
if pattern:
|
||||
for row in anonymized_table.rows:
|
||||
for row in anonymizedTable.rows:
|
||||
if row[i] is not None:
|
||||
original = str(row[i])
|
||||
if original not in self.string_parser.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
placeholderId = str(uuid.uuid4())
|
||||
self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1)
|
||||
row[i] = self.string_parser.mapping[original]
|
||||
|
||||
return anonymized_table
|
||||
return anonymizedTable
|
||||
|
||||
def process_csv_content(self, content: str) -> tuple:
|
||||
def processCsvContent(self, content: str) -> tuple:
|
||||
"""
|
||||
Process CSV content
|
||||
|
||||
|
|
@ -81,29 +81,29 @@ class ListProcessor:
|
|||
)
|
||||
|
||||
if not table.rows:
|
||||
return None, self.string_parser.get_mapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
|
||||
return None, self.string_parser.getMapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
|
||||
|
||||
anonymized_table = self.anonymize_table(table)
|
||||
anonymizedTable = self._anonymizeTable(table)
|
||||
|
||||
# Track replaced fields
|
||||
replaced_fields = []
|
||||
for i, header in enumerate(anonymized_table.headers):
|
||||
for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
|
||||
if anon_row[i] != orig_row[i]:
|
||||
replaced_fields.append(header)
|
||||
replacedFields = []
|
||||
for i, header in enumerate(anonymizedTable.headers):
|
||||
for origRow, anonRow in zip(table.rows, anonymizedTable.rows):
|
||||
if anonRow[i] != origRow[i]:
|
||||
replacedFields.append(header)
|
||||
|
||||
# Convert back to DataFrame
|
||||
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
|
||||
result = pd.DataFrame(anonymizedTable.rows, columns=anonymizedTable.headers)
|
||||
|
||||
processed_info = {
|
||||
processedInfo = {
|
||||
'type': 'table',
|
||||
'headers': table.headers,
|
||||
'row_count': len(table.rows)
|
||||
}
|
||||
|
||||
return result, self.string_parser.get_mapping(), replaced_fields, processed_info
|
||||
return result, self.string_parser.getMapping(), replacedFields, processedInfo
|
||||
|
||||
def process_json_content(self, content: str) -> tuple:
|
||||
def processJsonContent(self, content: str) -> tuple:
|
||||
"""
|
||||
Process JSON content
|
||||
|
||||
|
|
@ -116,13 +116,13 @@ class ListProcessor:
|
|||
data = json.loads(content)
|
||||
|
||||
# Process JSON recursively using string parser
|
||||
result = self.string_parser.process_json_value(data)
|
||||
result = self.string_parser.processJsonValue(data)
|
||||
|
||||
processed_info = {'type': 'json'}
|
||||
processedInfo = {'type': 'json'}
|
||||
|
||||
return result, self.string_parser.get_mapping(), [], processed_info
|
||||
return result, self.string_parser.getMapping(), [], processedInfo
|
||||
|
||||
def anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
|
||||
def _anonymizeXmlElement(self, element: ET.Element, indent: str = '') -> str:
|
||||
"""
|
||||
Recursively process XML element and return formatted string
|
||||
|
||||
|
|
@ -134,69 +134,69 @@ class ListProcessor:
|
|||
Formatted XML string
|
||||
"""
|
||||
# Process attributes
|
||||
processed_attrs = {}
|
||||
for attr_name, attr_value in element.attrib.items():
|
||||
processedAttrs = {}
|
||||
for attrName, attrValue in element.attrib.items():
|
||||
# Check if attribute name matches any header patterns
|
||||
pattern = get_pattern_for_header(attr_name, self.header_patterns)
|
||||
pattern = getPatternForHeader(attrName, self.header_patterns)
|
||||
if pattern:
|
||||
if attr_value not in self.string_parser.mapping:
|
||||
if attrValue not in self.string_parser.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
placeholderId = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
typeMapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern.name, 'data')
|
||||
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
|
||||
placeholderType = typeMapping.get(pattern.name, 'data')
|
||||
self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
|
||||
processedAttrs[attrName] = self.string_parser.mapping[attrValue]
|
||||
else:
|
||||
# Check if attribute value matches any data patterns
|
||||
from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns
|
||||
matches = find_patterns_in_text(attr_value, DataPatterns.patterns)
|
||||
from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
|
||||
matches = findPatternsInText(attrValue, DataPatterns.patterns)
|
||||
if matches:
|
||||
pattern_name = matches[0][0]
|
||||
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
|
||||
patternName = matches[0][0]
|
||||
pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
|
||||
if pattern:
|
||||
if attr_value not in self.string_parser.mapping:
|
||||
if attrValue not in self.string_parser.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
placeholderId = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
typeMapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
|
||||
placeholderType = typeMapping.get(patternName, 'data')
|
||||
self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
|
||||
processedAttrs[attrName] = self.string_parser.mapping[attrValue]
|
||||
else:
|
||||
processed_attrs[attr_name] = attr_value
|
||||
processedAttrs[attrName] = attrValue
|
||||
else:
|
||||
processed_attrs[attr_name] = attr_value
|
||||
processedAttrs[attrName] = attrValue
|
||||
|
||||
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
|
||||
attrs = ' '.join(f'{k}="{v}"' for k, v in processedAttrs.items())
|
||||
attrs = f' {attrs}' if attrs else ''
|
||||
|
||||
# Process text content
|
||||
text = element.text.strip() if element.text and element.text.strip() else ''
|
||||
if text:
|
||||
# Skip if already a placeholder
|
||||
if not self.string_parser.is_placeholder(text):
|
||||
if not self.string_parser._isPlaceholder(text):
|
||||
# Check if text matches any patterns
|
||||
from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns
|
||||
pattern_matches = find_patterns_in_text(text, DataPatterns.patterns)
|
||||
from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
|
||||
patternMatches = findPatternsInText(text, DataPatterns.patterns)
|
||||
|
||||
if pattern_matches:
|
||||
pattern_name = pattern_matches[0][0]
|
||||
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
|
||||
if patternMatches:
|
||||
patternName = patternMatches[0][0]
|
||||
pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
|
||||
if pattern:
|
||||
if text not in self.string_parser.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
|
|
@ -210,8 +210,8 @@ class ListProcessor:
|
|||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.string_parser.mapping[text] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
placeholderType = typeMapping.get(patternName, 'data')
|
||||
self.string_parser.mapping[text] = f"[{placeholderType}.{placeholderId}]"
|
||||
text = self.string_parser.mapping[text]
|
||||
else:
|
||||
# Check if text matches any custom names from the user list
|
||||
|
|
@ -230,8 +230,8 @@ class ListProcessor:
|
|||
# Process child elements
|
||||
children = []
|
||||
for child in element:
|
||||
child_str = self.anonymize_xml_element(child, indent + ' ')
|
||||
children.append(child_str)
|
||||
childStr = self._anonymizeXmlElement(child, indent + ' ')
|
||||
children.append(childStr)
|
||||
|
||||
# Build element string
|
||||
if not children and not text:
|
||||
|
|
@ -246,7 +246,7 @@ class ListProcessor:
|
|||
result.append(f"{indent}</{element.tag}>")
|
||||
return '\n'.join(result)
|
||||
|
||||
def process_xml_content(self, content: str) -> tuple:
|
||||
def processXmlContent(self, content: str) -> tuple:
|
||||
"""
|
||||
Process XML content
|
||||
|
||||
|
|
@ -259,21 +259,21 @@ class ListProcessor:
|
|||
root = ET.fromstring(content)
|
||||
|
||||
# Process XML recursively with proper formatting
|
||||
result = self.anonymize_xml_element(root)
|
||||
result = self._anonymizeXmlElement(root)
|
||||
|
||||
processed_info = {'type': 'xml'}
|
||||
processedInfo = {'type': 'xml'}
|
||||
|
||||
return result, self.string_parser.get_mapping(), [], processed_info
|
||||
return result, self.string_parser.getMapping(), [], processedInfo
|
||||
|
||||
def get_mapping(self) -> Dict[str, str]:
|
||||
def getMapping(self) -> Dict[str, str]:
|
||||
"""
|
||||
Get the current mapping of original values to placeholders
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Mapping dictionary
|
||||
"""
|
||||
return self.string_parser.get_mapping()
|
||||
return self.string_parser.getMapping()
|
||||
|
||||
def clear_mapping(self):
|
||||
def clearMapping(self):
|
||||
"""Clear the current mapping"""
|
||||
self.string_parser.clear_mapping()
|
||||
self.string_parser.clearMapping()
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ class TextProcessor:
|
|||
"""
|
||||
self.string_parser = StringParser(NamesToParse)
|
||||
|
||||
def extract_tables_from_text(self, content: str) -> tuple:
|
||||
def _extractTablesFromText(self, content: str) -> tuple:
|
||||
"""
|
||||
Extract tables and plain text from content
|
||||
|
||||
|
|
@ -38,11 +38,11 @@ class TextProcessor:
|
|||
# For now, process the entire content as plain text
|
||||
# This can be extended later to detect table-like structures
|
||||
tables = []
|
||||
plain_texts = [PlainText(content=content, source_type='text_plain')]
|
||||
plainTexts = [PlainText(content=content, source_type='text_plain')]
|
||||
|
||||
return tables, plain_texts
|
||||
return tables, plainTexts
|
||||
|
||||
def anonymize_plain_text(self, text: PlainText) -> PlainText:
|
||||
def _anonymizePlainText(self, text: PlainText) -> PlainText:
|
||||
"""
|
||||
Anonymize plain text content
|
||||
|
||||
|
|
@ -53,11 +53,11 @@ class TextProcessor:
|
|||
PlainText: Anonymized text
|
||||
"""
|
||||
# Use the string parser to process the content
|
||||
anonymized_content = self.string_parser.process_string(text.content)
|
||||
anonymizedContent = self.string_parser.processString(text.content)
|
||||
|
||||
return PlainText(content=anonymized_content, source_type=text.source_type)
|
||||
return PlainText(content=anonymizedContent, source_type=text.source_type)
|
||||
|
||||
def process_text_content(self, content: str) -> tuple:
|
||||
def processTextContent(self, content: str) -> tuple:
|
||||
"""
|
||||
Process text content and return anonymized data
|
||||
|
||||
|
|
@ -68,35 +68,35 @@ class TextProcessor:
|
|||
Tuple of (anonymized_content, mapping, replaced_fields, processed_info)
|
||||
"""
|
||||
# Extract tables and plain text sections
|
||||
tables, plain_texts = self.extract_tables_from_text(content)
|
||||
tables, plainTexts = self._extractTablesFromText(content)
|
||||
|
||||
# Process plain text sections
|
||||
anonymized_texts = [self.anonymize_plain_text(text) for text in plain_texts]
|
||||
anonymizedTexts = [self._anonymizePlainText(text) for text in plainTexts]
|
||||
|
||||
# Combine all processed content
|
||||
result = content
|
||||
for text, anonymized_text in zip(plain_texts, anonymized_texts):
|
||||
if text.content != anonymized_text.content:
|
||||
result = result.replace(text.content, anonymized_text.content)
|
||||
for text, anonymizedText in zip(plainTexts, anonymizedTexts):
|
||||
if text.content != anonymizedText.content:
|
||||
result = result.replace(text.content, anonymizedText.content)
|
||||
|
||||
# Get processing information
|
||||
processed_info = {
|
||||
processedInfo = {
|
||||
'type': 'text',
|
||||
'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
|
||||
if tables else [])
|
||||
}
|
||||
|
||||
return result, self.string_parser.get_mapping(), [], processed_info
|
||||
return result, self.string_parser.getMapping(), [], processedInfo
|
||||
|
||||
def get_mapping(self) -> Dict[str, str]:
|
||||
def getMapping(self) -> Dict[str, str]:
|
||||
"""
|
||||
Get the current mapping of original values to placeholders
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Mapping dictionary
|
||||
"""
|
||||
return self.string_parser.get_mapping()
|
||||
return self.string_parser.getMapping()
|
||||
|
||||
def clear_mapping(self):
|
||||
def clearMapping(self):
|
||||
"""Clear the current mapping"""
|
||||
self.string_parser.clear_mapping()
|
||||
self.string_parser.clearMapping()
|
||||
|
|
|
|||
|
|
@ -20,8 +20,8 @@ class SharepointService:
|
|||
Use setAccessTokenFromConnection() method to configure the access token before making API calls.
|
||||
"""
|
||||
self.services = serviceCenter
|
||||
self.access_token = None
|
||||
self.base_url = "https://graph.microsoft.com/v1.0"
|
||||
self.accessToken = None
|
||||
self.baseUrl = "https://graph.microsoft.com/v1.0"
|
||||
|
||||
def setAccessTokenFromConnection(self, userConnection) -> bool:
|
||||
"""Set access token from UserConnection.
|
||||
|
|
@ -52,21 +52,21 @@ class SharepointService:
|
|||
logger.error(f"Error setting access token: {str(e)}")
|
||||
return False
|
||||
|
||||
async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
||||
async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
||||
"""Make a Microsoft Graph API call with proper error handling."""
|
||||
try:
|
||||
if self.access_token is None:
|
||||
if self.accessToken is None:
|
||||
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
|
||||
return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.access_token}",
|
||||
"Authorization": f"Bearer {self.accessToken}",
|
||||
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
|
||||
}
|
||||
|
||||
# Remove leading slash from endpoint to avoid double slash
|
||||
clean_endpoint = endpoint.lstrip('/')
|
||||
url = f"{self.base_url}/{clean_endpoint}"
|
||||
cleanEndpoint = endpoint.lstrip('/')
|
||||
url = f"{self.baseUrl}/{cleanEndpoint}"
|
||||
logger.debug(f"Making Graph API call: {method} {url}")
|
||||
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
|
|
@ -106,10 +106,10 @@ class SharepointService:
|
|||
logger.error(f"Error making Graph API call: {str(e)}")
|
||||
return {"error": f"Error making Graph API call: {str(e)}"}
|
||||
|
||||
async def discover_sites(self) -> List[Dict[str, Any]]:
|
||||
async def discoverSites(self) -> List[Dict[str, Any]]:
|
||||
"""Discover all SharePoint sites accessible to the user."""
|
||||
try:
|
||||
result = await self._make_graph_api_call("sites?search=*")
|
||||
result = await self._makeGraphApiCall("sites?search=*")
|
||||
|
||||
if "error" in result:
|
||||
logger.error(f"Error discovering SharePoint sites: {result['error']}")
|
||||
|
|
@ -118,9 +118,9 @@ class SharepointService:
|
|||
sites = result.get("value", [])
|
||||
logger.info(f"Discovered {len(sites)} SharePoint sites")
|
||||
|
||||
processed_sites = []
|
||||
processedSites = []
|
||||
for site in sites:
|
||||
site_info = {
|
||||
siteInfo = {
|
||||
"id": site.get("id"),
|
||||
"displayName": site.get("displayName"),
|
||||
"name": site.get("name"),
|
||||
|
|
@ -129,24 +129,24 @@ class SharepointService:
|
|||
"createdDateTime": site.get("createdDateTime"),
|
||||
"lastModifiedDateTime": site.get("lastModifiedDateTime")
|
||||
}
|
||||
processed_sites.append(site_info)
|
||||
logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
|
||||
processedSites.append(siteInfo)
|
||||
logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}")
|
||||
|
||||
return processed_sites
|
||||
return processedSites
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error discovering SharePoint sites: {str(e)}")
|
||||
return []
|
||||
|
||||
async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]:
|
||||
async def findSiteByName(self, siteName: str) -> Optional[Dict[str, Any]]:
|
||||
"""Find a specific SharePoint site by name using direct Graph API call."""
|
||||
try:
|
||||
# Try to get the site directly by name using Graph API
|
||||
endpoint = f"sites/{site_name}"
|
||||
result = await self._make_graph_api_call(endpoint)
|
||||
endpoint = f"sites/{siteName}"
|
||||
result = await self._makeGraphApiCall(endpoint)
|
||||
|
||||
if result and "error" not in result:
|
||||
site_info = {
|
||||
siteInfo = {
|
||||
"id": result.get("id"),
|
||||
"displayName": result.get("displayName"),
|
||||
"name": result.get("name"),
|
||||
|
|
@ -155,15 +155,15 @@ class SharepointService:
|
|||
"createdDateTime": result.get("createdDateTime"),
|
||||
"lastModifiedDateTime": result.get("lastModifiedDateTime")
|
||||
}
|
||||
logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}")
|
||||
return site_info
|
||||
logger.info(f"Found site directly: {siteInfo['displayName']} - {siteInfo['webUrl']}")
|
||||
return siteInfo
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}")
|
||||
logger.debug(f"Direct site lookup failed for '{siteName}': {str(e)}")
|
||||
|
||||
# Fallback to discovery if direct lookup fails
|
||||
logger.info(f"Direct lookup failed, trying discovery for site: {site_name}")
|
||||
sites = await self.discover_sites()
|
||||
logger.info(f"Direct lookup failed, trying discovery for site: {siteName}")
|
||||
sites = await self.discoverSites()
|
||||
if not sites:
|
||||
logger.warning("No sites discovered")
|
||||
return None
|
||||
|
|
@ -174,46 +174,46 @@ class SharepointService:
|
|||
|
||||
# Try exact match first
|
||||
for site in sites:
|
||||
if site.get("displayName", "").strip().lower() == site_name.strip().lower():
|
||||
if site.get("displayName", "").strip().lower() == siteName.strip().lower():
|
||||
logger.info(f"Found exact match: {site.get('displayName')}")
|
||||
return site
|
||||
|
||||
# Try partial match
|
||||
for site in sites:
|
||||
if site_name.lower() in site.get("displayName", "").lower():
|
||||
if siteName.lower() in site.get("displayName", "").lower():
|
||||
logger.info(f"Found partial match: {site.get('displayName')}")
|
||||
return site
|
||||
|
||||
logger.warning(f"No site found matching: {site_name}")
|
||||
logger.warning(f"No site found matching: {siteName}")
|
||||
return None
|
||||
|
||||
async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]:
|
||||
async def findSiteByWebUrl(self, webUrl: str) -> Optional[Dict[str, Any]]:
|
||||
"""Find a SharePoint site using its web URL (useful for guest sites)."""
|
||||
try:
|
||||
# Use the web URL format: sites/{hostname}:/sites/{site-path}
|
||||
# Extract hostname and site path from the web URL
|
||||
if not web_url.startswith("https://"):
|
||||
web_url = f"https://{web_url}"
|
||||
if not webUrl.startswith("https://"):
|
||||
webUrl = f"https://{webUrl}"
|
||||
|
||||
# Parse the URL to extract hostname and site path
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(web_url)
|
||||
parsed = urlparse(webUrl)
|
||||
hostname = parsed.hostname
|
||||
path_parts = parsed.path.strip('/').split('/')
|
||||
pathParts = parsed.path.strip('/').split('/')
|
||||
|
||||
if len(path_parts) >= 2 and path_parts[0] == 'sites':
|
||||
site_path = '/'.join(path_parts[1:]) # Everything after 'sites/'
|
||||
if len(pathParts) >= 2 and pathParts[0] == 'sites':
|
||||
sitePath = '/'.join(pathParts[1:]) # Everything after 'sites/'
|
||||
else:
|
||||
logger.error(f"Invalid SharePoint URL format: {web_url}")
|
||||
logger.error(f"Invalid SharePoint URL format: {webUrl}")
|
||||
return None
|
||||
|
||||
endpoint = f"sites/{hostname}:/sites/{site_path}"
|
||||
endpoint = f"sites/{hostname}:/sites/{sitePath}"
|
||||
logger.debug(f"Trying web URL format: {endpoint}")
|
||||
|
||||
result = await self._make_graph_api_call(endpoint)
|
||||
result = await self._makeGraphApiCall(endpoint)
|
||||
|
||||
if result and "error" not in result:
|
||||
site_info = {
|
||||
siteInfo = {
|
||||
"id": result.get("id"),
|
||||
"displayName": result.get("displayName"),
|
||||
"name": result.get("name"),
|
||||
|
|
@ -222,33 +222,33 @@ class SharepointService:
|
|||
"createdDateTime": result.get("createdDateTime"),
|
||||
"lastModifiedDateTime": result.get("lastModifiedDateTime")
|
||||
}
|
||||
logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
|
||||
return site_info
|
||||
logger.info(f"Found site by web URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
|
||||
return siteInfo
|
||||
else:
|
||||
logger.warning(f"Site not found using web URL: {web_url}")
|
||||
logger.warning(f"Site not found using web URL: {webUrl}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error finding site by web URL: {str(e)}")
|
||||
return None
|
||||
|
||||
async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]:
|
||||
async def findSiteByUrl(self, hostname: str, sitePath: str) -> Optional[Dict[str, Any]]:
|
||||
"""Find a SharePoint site using the site URL format."""
|
||||
try:
|
||||
# For guest sites, try different URL formats
|
||||
url_formats = [
|
||||
f"sites/{hostname}:/sites/{site_path}", # Standard format
|
||||
f"sites/{hostname}:/sites/{site_path}/", # With trailing slash
|
||||
f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase
|
||||
f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash
|
||||
urlFormats = [
|
||||
f"sites/{hostname}:/sites/{sitePath}", # Standard format
|
||||
f"sites/{hostname}:/sites/{sitePath}/", # With trailing slash
|
||||
f"sites/{hostname}:/sites/{sitePath.lower()}", # Lowercase
|
||||
f"sites/{hostname}:/sites/{sitePath.lower()}/", # Lowercase with slash
|
||||
]
|
||||
|
||||
for endpoint in url_formats:
|
||||
for endpoint in urlFormats:
|
||||
logger.debug(f"Trying URL format: {endpoint}")
|
||||
result = await self._make_graph_api_call(endpoint)
|
||||
result = await self._makeGraphApiCall(endpoint)
|
||||
|
||||
if result and "error" not in result:
|
||||
site_info = {
|
||||
siteInfo = {
|
||||
"id": result.get("id"),
|
||||
"displayName": result.get("displayName"),
|
||||
"name": result.get("name"),
|
||||
|
|
@ -257,29 +257,29 @@ class SharepointService:
|
|||
"createdDateTime": result.get("createdDateTime"),
|
||||
"lastModifiedDateTime": result.get("lastModifiedDateTime")
|
||||
}
|
||||
logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
|
||||
return site_info
|
||||
logger.info(f"Found site by URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
|
||||
return siteInfo
|
||||
else:
|
||||
logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}")
|
||||
|
||||
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}")
|
||||
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{sitePath}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error finding site by URL: {str(e)}")
|
||||
return None
|
||||
|
||||
async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]:
|
||||
async def getFolderByPath(self, siteId: str, folderPath: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get folder information by path within a site."""
|
||||
try:
|
||||
# Clean the path
|
||||
clean_path = folder_path.lstrip('/')
|
||||
endpoint = f"sites/{site_id}/drive/root:/{clean_path}"
|
||||
cleanPath = folderPath.lstrip('/')
|
||||
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
|
||||
|
||||
result = await self._make_graph_api_call(endpoint)
|
||||
result = await self._makeGraphApiCall(endpoint)
|
||||
|
||||
if "error" in result:
|
||||
logger.warning(f"Folder not found at path {folder_path}: {result['error']}")
|
||||
logger.warning(f"Folder not found at path {folderPath}: {result['error']}")
|
||||
return None
|
||||
|
||||
return result
|
||||
|
|
@ -288,43 +288,43 @@ class SharepointService:
|
|||
logger.error(f"Error getting folder by path: {str(e)}")
|
||||
return None
|
||||
|
||||
async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]:
|
||||
async def uploadFile(self, siteId: str, folderPath: str, fileName: str, content: bytes) -> Dict[str, Any]:
|
||||
"""Upload a file to SharePoint."""
|
||||
try:
|
||||
# Clean the path
|
||||
clean_path = folder_path.lstrip('/')
|
||||
upload_path = f"{clean_path.rstrip('/')}/{file_name}"
|
||||
endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content"
|
||||
cleanPath = folderPath.lstrip('/')
|
||||
uploadPath = f"{cleanPath.rstrip('/')}/{fileName}"
|
||||
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
|
||||
|
||||
logger.info(f"Uploading file to: {endpoint}")
|
||||
|
||||
result = await self._make_graph_api_call(endpoint, method="PUT", data=content)
|
||||
result = await self._makeGraphApiCall(endpoint, method="PUT", data=content)
|
||||
|
||||
if "error" in result:
|
||||
logger.error(f"Upload failed: {result['error']}")
|
||||
return result
|
||||
|
||||
logger.info(f"File uploaded successfully: {file_name}")
|
||||
logger.info(f"File uploaded successfully: {fileName}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading file: {str(e)}")
|
||||
return {"error": f"Error uploading file: {str(e)}"}
|
||||
|
||||
async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]:
|
||||
async def downloadFile(self, siteId: str, fileId: str) -> Optional[bytes]:
|
||||
"""Download a file from SharePoint."""
|
||||
try:
|
||||
if self.access_token is None:
|
||||
if self.accessToken is None:
|
||||
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
|
||||
return None
|
||||
|
||||
endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
|
||||
endpoint = f"sites/{siteId}/drive/items/{fileId}/content"
|
||||
|
||||
headers = {"Authorization": f"Bearer {self.access_token}"}
|
||||
headers = {"Authorization": f"Bearer {self.accessToken}"}
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response:
|
||||
async with session.get(f"{self.baseUrl}/{endpoint}", headers=headers) as response:
|
||||
if response.status == 200:
|
||||
return await response.read()
|
||||
else:
|
||||
|
|
@ -335,32 +335,32 @@ class SharepointService:
|
|||
logger.error(f"Error downloading file: {str(e)}")
|
||||
return None
|
||||
|
||||
async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]:
|
||||
async def listFolderContents(self, siteId: str, folderPath: str = "") -> List[Dict[str, Any]]:
|
||||
"""List contents of a folder."""
|
||||
try:
|
||||
if not folder_path or folder_path == "/":
|
||||
endpoint = f"sites/{site_id}/drive/root/children"
|
||||
if not folderPath or folderPath == "/":
|
||||
endpoint = f"sites/{siteId}/drive/root/children"
|
||||
else:
|
||||
clean_path = folder_path.lstrip('/')
|
||||
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children"
|
||||
cleanPath = folderPath.lstrip('/')
|
||||
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
|
||||
|
||||
result = await self._make_graph_api_call(endpoint)
|
||||
result = await self._makeGraphApiCall(endpoint)
|
||||
|
||||
if "error" in result:
|
||||
logger.warning(f"Failed to list folder contents: {result['error']}")
|
||||
return None
|
||||
|
||||
items = result.get("value", [])
|
||||
processed_items = []
|
||||
processedItems = []
|
||||
|
||||
for item in items:
|
||||
# Determine if it's a folder or file
|
||||
is_folder = 'folder' in item
|
||||
isFolder = 'folder' in item
|
||||
|
||||
item_info = {
|
||||
itemInfo = {
|
||||
"id": item.get("id"),
|
||||
"name": item.get("name"),
|
||||
"type": "folder" if is_folder else "file",
|
||||
"type": "folder" if isFolder else "file",
|
||||
"size": item.get("size", 0),
|
||||
"createdDateTime": item.get("createdDateTime"),
|
||||
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
||||
|
|
@ -368,42 +368,42 @@ class SharepointService:
|
|||
}
|
||||
|
||||
if "file" in item:
|
||||
item_info["mimeType"] = item["file"].get("mimeType")
|
||||
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
|
||||
itemInfo["mimeType"] = item["file"].get("mimeType")
|
||||
itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
|
||||
|
||||
if "folder" in item:
|
||||
item_info["childCount"] = item["folder"].get("childCount", 0)
|
||||
itemInfo["childCount"] = item["folder"].get("childCount", 0)
|
||||
|
||||
processed_items.append(item_info)
|
||||
processedItems.append(itemInfo)
|
||||
|
||||
return processed_items
|
||||
return processedItems
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing folder contents: {str(e)}")
|
||||
return []
|
||||
|
||||
async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]:
|
||||
async def searchFiles(self, siteId: str, query: str) -> List[Dict[str, Any]]:
|
||||
"""Search for files in a site."""
|
||||
try:
|
||||
search_query = query.replace("'", "''") # Escape single quotes for OData
|
||||
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
||||
searchQuery = query.replace("'", "''") # Escape single quotes for OData
|
||||
endpoint = f"sites/{siteId}/drive/root/search(q='{searchQuery}')"
|
||||
|
||||
result = await self._make_graph_api_call(endpoint)
|
||||
result = await self._makeGraphApiCall(endpoint)
|
||||
|
||||
if "error" in result:
|
||||
logger.warning(f"Search failed: {result['error']}")
|
||||
return []
|
||||
|
||||
items = result.get("value", [])
|
||||
processed_items = []
|
||||
processedItems = []
|
||||
|
||||
for item in items:
|
||||
is_folder = 'folder' in item
|
||||
isFolder = 'folder' in item
|
||||
|
||||
item_info = {
|
||||
itemInfo = {
|
||||
"id": item.get("id"),
|
||||
"name": item.get("name"),
|
||||
"type": "folder" if is_folder else "file",
|
||||
"type": "folder" if isFolder else "file",
|
||||
"size": item.get("size", 0),
|
||||
"createdDateTime": item.get("createdDateTime"),
|
||||
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
||||
|
|
@ -412,64 +412,64 @@ class SharepointService:
|
|||
}
|
||||
|
||||
if "file" in item:
|
||||
item_info["mimeType"] = item["file"].get("mimeType")
|
||||
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
|
||||
itemInfo["mimeType"] = item["file"].get("mimeType")
|
||||
itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
|
||||
|
||||
processed_items.append(item_info)
|
||||
processedItems.append(itemInfo)
|
||||
|
||||
return processed_items
|
||||
return processedItems
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching files: {str(e)}")
|
||||
return []
|
||||
|
||||
async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None:
|
||||
async def copyFileAsync(self, siteId: str, sourceFolder: str, sourceFile: str, destFolder: str, destFile: str) -> None:
|
||||
"""Copy a file from source to destination folder (like original synchronizer)."""
|
||||
try:
|
||||
# First, download the source file
|
||||
source_path = f"{source_folder}/{source_file}"
|
||||
file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path)
|
||||
sourcePath = f"{sourceFolder}/{sourceFile}"
|
||||
fileContent = await self.downloadFileByPath(siteId=siteId, filePath=sourcePath)
|
||||
|
||||
if not file_content:
|
||||
raise Exception(f"Failed to download source file: {source_path}")
|
||||
if not fileContent:
|
||||
raise Exception(f"Failed to download source file: {sourcePath}")
|
||||
|
||||
# Upload to destination
|
||||
await self.upload_file(
|
||||
site_id=site_id,
|
||||
folder_path=dest_folder,
|
||||
file_name=dest_file,
|
||||
content=file_content
|
||||
await self.uploadFile(
|
||||
siteId=siteId,
|
||||
folderPath=destFolder,
|
||||
fileName=destFile,
|
||||
content=fileContent
|
||||
)
|
||||
|
||||
logger.info(f"File copied: {source_file} -> {dest_file}")
|
||||
logger.info(f"File copied: {sourceFile} -> {destFile}")
|
||||
|
||||
except Exception as e:
|
||||
# Provide more specific error information
|
||||
error_msg = str(e)
|
||||
if "itemNotFound" in error_msg or "404" in error_msg:
|
||||
raise Exception(f"Source file not found (404): {source_path} - {error_msg}")
|
||||
errorMsg = str(e)
|
||||
if "itemNotFound" in errorMsg or "404" in errorMsg:
|
||||
raise Exception(f"Source file not found (404): {sourcePath} - {errorMsg}")
|
||||
else:
|
||||
raise Exception(f"Error copying file: {error_msg}")
|
||||
raise Exception(f"Error copying file: {errorMsg}")
|
||||
|
||||
async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]:
|
||||
async def downloadFileByPath(self, siteId: str, filePath: str) -> Optional[bytes]:
|
||||
"""Download a file by its path within a site."""
|
||||
try:
|
||||
if self.access_token is None:
|
||||
if self.accessToken is None:
|
||||
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
|
||||
return None
|
||||
|
||||
# Clean the path
|
||||
clean_path = file_path.strip('/')
|
||||
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content"
|
||||
cleanPath = filePath.strip('/')
|
||||
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/content"
|
||||
|
||||
# Use direct HTTP call for file downloads (binary content)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.access_token}",
|
||||
"Authorization": f"Bearer {self.accessToken}",
|
||||
}
|
||||
|
||||
# Remove leading slash from endpoint to avoid double slash
|
||||
clean_endpoint = endpoint.lstrip('/')
|
||||
url = f"{self.base_url}/{clean_endpoint}"
|
||||
cleanEndpoint = endpoint.lstrip('/')
|
||||
url = f"{self.baseUrl}/{cleanEndpoint}"
|
||||
logger.debug(f"Downloading file: GET {url}")
|
||||
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import logging
|
|||
from typing import Any, Optional, Dict, Callable, List
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.eventManagement import eventManager
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
from modules.shared import jsonUtils
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -122,7 +122,7 @@ class UtilsService:
|
|||
float: Current UTC timestamp in seconds
|
||||
"""
|
||||
try:
|
||||
return get_utc_timestamp()
|
||||
return getUtcTimestamp()
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting UTC timestamp: {str(e)}")
|
||||
return 0.0
|
||||
|
|
@ -185,6 +185,75 @@ class UtilsService:
|
|||
# Silent fail to never break main flow
|
||||
pass
|
||||
|
||||
# ===== Prompt sanitization =====
|
||||
|
||||
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
|
||||
"""
|
||||
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
|
||||
|
||||
This is the single source of truth for all prompt sanitization across the system.
|
||||
Replaces all scattered sanitization functions with a unified approach.
|
||||
|
||||
Args:
|
||||
content: The content to sanitize
|
||||
contentType: Type of content ("text", "userinput", "json", "document")
|
||||
|
||||
Returns:
|
||||
Safely sanitized content ready for AI prompt insertion
|
||||
"""
|
||||
if not content:
|
||||
return ""
|
||||
|
||||
try:
|
||||
import re
|
||||
# Convert to string if not already
|
||||
content_str = str(content)
|
||||
|
||||
# Remove null bytes and control characters (except newlines and tabs)
|
||||
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
|
||||
|
||||
# Handle different content types with appropriate sanitization
|
||||
if contentType == "userinput":
|
||||
# Extra security for user-controlled content
|
||||
# Escape curly braces to prevent placeholder injection
|
||||
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
|
||||
# Escape quotes and wrap in single quotes
|
||||
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
|
||||
return f"'{sanitized}'"
|
||||
|
||||
elif contentType == "json":
|
||||
# For JSON content, escape quotes and backslashes
|
||||
sanitized = sanitized.replace('\\', '\\\\')
|
||||
sanitized = sanitized.replace('"', '\\"')
|
||||
sanitized = sanitized.replace('\n', '\\n')
|
||||
sanitized = sanitized.replace('\r', '\\r')
|
||||
sanitized = sanitized.replace('\t', '\\t')
|
||||
|
||||
elif contentType == "document":
|
||||
# For document content, escape special characters
|
||||
sanitized = sanitized.replace('\\', '\\\\')
|
||||
sanitized = sanitized.replace('"', '\\"')
|
||||
sanitized = sanitized.replace("'", "\\'")
|
||||
sanitized = sanitized.replace('\n', '\\n')
|
||||
sanitized = sanitized.replace('\r', '\\r')
|
||||
sanitized = sanitized.replace('\t', '\\t')
|
||||
|
||||
else: # contentType == "text" or default
|
||||
# Basic text sanitization
|
||||
sanitized = sanitized.replace('\\', '\\\\')
|
||||
sanitized = sanitized.replace('"', '\\"')
|
||||
sanitized = sanitized.replace("'", "\\'")
|
||||
sanitized = sanitized.replace('\n', '\\n')
|
||||
sanitized = sanitized.replace('\r', '\\r')
|
||||
sanitized = sanitized.replace('\t', '\\t')
|
||||
|
||||
return sanitized
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sanitizing prompt content: {str(e)}")
|
||||
# Return a safe fallback
|
||||
return "[ERROR: Content could not be safely sanitized]"
|
||||
|
||||
# ===== JSON utility wrappers =====
|
||||
|
||||
def jsonStripCodeFences(self, text: str) -> str:
|
||||
|
|
|
|||
|
|
@ -34,54 +34,54 @@ class AttributeDefinition(BaseModel):
|
|||
MODEL_LABELS: Dict[str, Dict[str, Dict[str, str]]] = {}
|
||||
|
||||
|
||||
def register_model_labels(model_name: str, model_label: Dict[str, str], labels: Dict[str, Dict[str, str]]):
|
||||
def registerModelLabels(modelName: str, modelLabel: Dict[str, str], labels: Dict[str, Dict[str, str]]):
|
||||
"""
|
||||
Register labels for a model's attributes and the model itself.
|
||||
|
||||
Args:
|
||||
model_name: Name of the model class
|
||||
model_label: Dictionary mapping language codes to model labels
|
||||
modelName: Name of the model class
|
||||
modelLabel: Dictionary mapping language codes to model labels
|
||||
e.g. {"en": "Prompt", "fr": "Invite"}
|
||||
labels: Dictionary mapping attribute names to their translations
|
||||
e.g. {"name": {"en": "Name", "fr": "Nom"}}
|
||||
"""
|
||||
MODEL_LABELS[model_name] = {"model": model_label, "attributes": labels}
|
||||
MODEL_LABELS[modelName] = {"model": modelLabel, "attributes": labels}
|
||||
|
||||
|
||||
def get_model_labels(model_name: str, language: str = "en") -> Dict[str, str]:
|
||||
def getModelLabels(modelName: str, language: str = "en") -> Dict[str, str]:
|
||||
"""
|
||||
Get labels for a model's attributes in the specified language.
|
||||
|
||||
Args:
|
||||
model_name: Name of the model class
|
||||
modelName: Name of the model class
|
||||
language: Language code (default: "en")
|
||||
|
||||
Returns:
|
||||
Dictionary mapping attribute names to their labels in the specified language
|
||||
"""
|
||||
model_data = MODEL_LABELS.get(model_name, {})
|
||||
attribute_labels = model_data.get("attributes", {})
|
||||
modelData = MODEL_LABELS.get(modelName, {})
|
||||
attributeLabels = modelData.get("attributes", {})
|
||||
|
||||
return {
|
||||
attr: translations.get(language, translations.get("en", attr))
|
||||
for attr, translations in attribute_labels.items()
|
||||
for attr, translations in attributeLabels.items()
|
||||
}
|
||||
|
||||
|
||||
def get_model_label(model_name: str, language: str = "en") -> str:
|
||||
def getModelLabel(modelName: str, language: str = "en") -> str:
|
||||
"""
|
||||
Get the label for a model in the specified language.
|
||||
|
||||
Args:
|
||||
model_name: Name of the model class
|
||||
modelName: Name of the model class
|
||||
language: Language code (default: "en")
|
||||
|
||||
Returns:
|
||||
Model label in the specified language, or model name if no label exists
|
||||
"""
|
||||
model_data = MODEL_LABELS.get(model_name, {})
|
||||
model_label = model_data.get("model", {})
|
||||
return model_label.get(language, model_label.get("en", model_name))
|
||||
modelData = MODEL_LABELS.get(modelName, {})
|
||||
modelLabel = modelData.get("model", {})
|
||||
return modelLabel.get(language, modelLabel.get("en", modelName))
|
||||
|
||||
|
||||
def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguage: str = "en") -> Dict[str, Any]:
|
||||
|
|
@ -100,8 +100,8 @@ def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguag
|
|||
|
||||
attributes = []
|
||||
model_name = modelClass.__name__
|
||||
labels = get_model_labels(model_name, userLanguage)
|
||||
model_label = get_model_label(model_name, userLanguage)
|
||||
labels = getModelLabels(model_name, userLanguage)
|
||||
model_label = getModelLabel(model_name, userLanguage)
|
||||
|
||||
# Pydantic v2 only
|
||||
fields = modelClass.model_fields
|
||||
|
|
|
|||
|
|
@ -19,42 +19,42 @@ class DailyRotatingFileHandler(RotatingFileHandler):
|
|||
The log file name includes the current date and switches at midnight.
|
||||
"""
|
||||
|
||||
def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs):
|
||||
self.log_dir = log_dir
|
||||
self.filename_prefix = filename_prefix
|
||||
self.current_date = None
|
||||
self.current_file = None
|
||||
def __init__(self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs):
|
||||
self.logDir = logDir
|
||||
self.filenamePrefix = filenamePrefix
|
||||
self.currentDate = None
|
||||
self.currentFile = None
|
||||
|
||||
# Initialize with today's file
|
||||
self._update_file_if_needed()
|
||||
self._updateFileIfNeeded()
|
||||
|
||||
# Call parent constructor with current file
|
||||
super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs)
|
||||
super().__init__(self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs)
|
||||
|
||||
def _update_file_if_needed(self):
|
||||
def _updateFileIfNeeded(self):
|
||||
"""Update the log file if the date has changed"""
|
||||
today = datetime.now().strftime("%Y%m%d")
|
||||
|
||||
if self.current_date != today:
|
||||
self.current_date = today
|
||||
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
|
||||
if self.currentDate != today:
|
||||
self.currentDate = today
|
||||
newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
|
||||
|
||||
if self.current_file != new_file:
|
||||
self.current_file = new_file
|
||||
if self.currentFile != newFile:
|
||||
self.currentFile = newFile
|
||||
return True
|
||||
return False
|
||||
|
||||
def emit(self, record):
|
||||
"""Emit a log record, switching files if date has changed"""
|
||||
# Check if we need to switch to a new file
|
||||
if self._update_file_if_needed():
|
||||
if self._updateFileIfNeeded():
|
||||
# Close current file and open new one
|
||||
if self.stream:
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
|
||||
# Update the baseFilename for the parent class
|
||||
self.baseFilename = self.current_file
|
||||
self.baseFilename = self.currentFile
|
||||
# Reopen the stream
|
||||
if not self.delay:
|
||||
self.stream = self._open()
|
||||
|
|
@ -68,9 +68,9 @@ class AuditLogger:
|
|||
|
||||
def __init__(self):
|
||||
self.logger = None
|
||||
self._setup_audit_logger()
|
||||
self._setupAuditLogger()
|
||||
|
||||
def _setup_audit_logger(self):
|
||||
def _setupAuditLogger(self):
|
||||
"""Setup the audit logger with daily file rotation"""
|
||||
try:
|
||||
# Get log directory from config
|
||||
|
|
@ -96,10 +96,10 @@ class AuditLogger:
|
|||
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
|
||||
|
||||
fileHandler = DailyRotatingFileHandler(
|
||||
log_dir=logDir,
|
||||
filename_prefix="log_audit",
|
||||
max_bytes=rotationSize,
|
||||
backup_count=backupCount
|
||||
logDir=logDir,
|
||||
filenamePrefix="log_audit",
|
||||
maxBytes=rotationSize,
|
||||
backupCount=backupCount
|
||||
)
|
||||
|
||||
# Create formatter for audit log
|
||||
|
|
@ -120,9 +120,9 @@ class AuditLogger:
|
|||
self.logger = logging.getLogger(__name__)
|
||||
self.logger.error(f"Failed to setup audit logger: {str(e)}")
|
||||
|
||||
def log_event(self,
|
||||
user_id: str,
|
||||
mandate_id: str,
|
||||
def logEvent(self,
|
||||
userId: str,
|
||||
mandateId: str,
|
||||
category: str,
|
||||
action: str,
|
||||
details: str = "",
|
||||
|
|
@ -131,8 +131,8 @@ class AuditLogger:
|
|||
Log an audit event
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
mandate_id: Mandate identifier (can be empty if not applicable)
|
||||
userId: User identifier
|
||||
mandateId: Mandate identifier (can be empty if not applicable)
|
||||
category: Event category (e.g., 'key', 'access', 'data')
|
||||
action: Specific action (e.g., 'decode', 'login', 'logout')
|
||||
details: Additional details about the event
|
||||
|
|
@ -148,50 +148,50 @@ class AuditLogger:
|
|||
|
||||
# Format the audit log entry
|
||||
# Format: timestamp | userid | mandateid | category | action | details
|
||||
audit_entry = f"{user_id} | {mandate_id} | {category} | {action} | {details}"
|
||||
auditEntry = f"{userId} | {mandateId} | {category} | {action} | {details}"
|
||||
|
||||
# Log the event
|
||||
self.logger.info(audit_entry)
|
||||
self.logger.info(auditEntry)
|
||||
|
||||
except Exception as e:
|
||||
# Use standard logger as fallback
|
||||
logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}")
|
||||
|
||||
def log_key_access(self, user_id: str, mandate_id: str, key_name: str, action: str) -> None:
|
||||
def logKeyAccess(self, userId: str, mandateId: str, keyName: str, action: str) -> None:
|
||||
"""Log key access events (decode/encode)"""
|
||||
self.log_event(
|
||||
user_id=user_id,
|
||||
mandate_id=mandate_id,
|
||||
self.logEvent(
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
category="key",
|
||||
action=action,
|
||||
details=key_name
|
||||
details=keyName
|
||||
)
|
||||
|
||||
def log_user_access(self, user_id: str, mandate_id: str, action: str, success_info: str = "") -> None:
|
||||
def logUserAccess(self, userId: str, mandateId: str, action: str, successInfo: str = "") -> None:
|
||||
"""Log user access events (login/logout)"""
|
||||
self.log_event(
|
||||
user_id=user_id,
|
||||
mandate_id=mandate_id,
|
||||
self.logEvent(
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
category="access",
|
||||
action=action,
|
||||
details=success_info
|
||||
details=successInfo
|
||||
)
|
||||
|
||||
def log_data_access(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
|
||||
def logDataAccess(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
|
||||
"""Log data access events"""
|
||||
self.log_event(
|
||||
user_id=user_id,
|
||||
mandate_id=mandate_id,
|
||||
self.logEvent(
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
category="data",
|
||||
action=action,
|
||||
details=details
|
||||
)
|
||||
|
||||
def log_security_event(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
|
||||
def logSecurityEvent(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
|
||||
"""Log security-related events"""
|
||||
self.log_event(
|
||||
user_id=user_id,
|
||||
mandate_id=mandate_id,
|
||||
self.logEvent(
|
||||
userId=userId,
|
||||
mandateId=mandateId,
|
||||
category="security",
|
||||
action=action,
|
||||
details=details
|
||||
|
|
|
|||
|
|
@ -199,10 +199,10 @@ class Configuration:
|
|||
# Log audit event for secret key access
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_key_access(
|
||||
user_id=user_id,
|
||||
mandate_id="system",
|
||||
key_name=key,
|
||||
audit_logger.logKeyAccess(
|
||||
userId=user_id,
|
||||
mandateId="system",
|
||||
keyName=key,
|
||||
action="decode"
|
||||
)
|
||||
except Exception:
|
||||
|
|
@ -211,9 +211,9 @@ class Configuration:
|
|||
|
||||
if value.startswith("{") and value.endswith("}"):
|
||||
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
|
||||
return handleSecretJson(value, user_id, key)
|
||||
return handleSecretJson(value, userId=user_id, keyName=key)
|
||||
else:
|
||||
return handleSecretText(value, user_id, key)
|
||||
return handleSecretText(value, userId=user_id, keyName=key)
|
||||
return value
|
||||
return default
|
||||
|
||||
|
|
@ -235,31 +235,31 @@ class Configuration:
|
|||
"""Set a configuration value (for testing/overrides)"""
|
||||
self._data[key] = value
|
||||
|
||||
def handleSecretText(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
|
||||
def handleSecretText(value: str, userId: str = "system", keyName: str = "unknown") -> str:
|
||||
"""
|
||||
Handle secret values with encryption/decryption support.
|
||||
|
||||
Args:
|
||||
value: The secret value to handle (may be encrypted)
|
||||
user_id: The user ID making the request (default: "system")
|
||||
key_name: The name of the key being decrypted (default: "unknown")
|
||||
userId: The user ID making the request (default: "system")
|
||||
keyName: The name of the key being decrypted (default: "unknown")
|
||||
|
||||
Returns:
|
||||
str: Processed secret value (decrypted if encrypted)
|
||||
"""
|
||||
if _is_encrypted_value(value):
|
||||
return decrypt_value(value, user_id, key_name)
|
||||
if _isEncryptedValue(value):
|
||||
return decryptValue(value, userId, keyName)
|
||||
return value
|
||||
|
||||
def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
|
||||
def handleSecretJson(value: str, userId: str = "system", keyName: str = "unknown") -> str:
|
||||
"""
|
||||
Handle JSON secret values (like Google service account keys) with encryption/decryption support.
|
||||
Validates that the value is valid JSON after decryption.
|
||||
|
||||
Args:
|
||||
value: The JSON secret value to handle (may be encrypted)
|
||||
user_id: The user ID making the request (default: "system")
|
||||
key_name: The name of the key being decrypted (default: "unknown")
|
||||
userId: The user ID making the request (default: "system")
|
||||
keyName: The name of the key being decrypted (default: "unknown")
|
||||
|
||||
Returns:
|
||||
str: Processed JSON secret value (decrypted if encrypted)
|
||||
|
|
@ -268,15 +268,15 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
|
|||
ValueError: If the value is not valid JSON after decryption
|
||||
"""
|
||||
# Decrypt if encrypted
|
||||
if _is_encrypted_value(value):
|
||||
decrypted_value = decrypt_value(value, user_id, key_name)
|
||||
if _isEncryptedValue(value):
|
||||
decryptedValue = decryptValue(value, userId, keyName)
|
||||
else:
|
||||
decrypted_value = value
|
||||
decryptedValue = value
|
||||
|
||||
try:
|
||||
# Validate that it's valid JSON
|
||||
json.loads(decrypted_value)
|
||||
return decrypted_value
|
||||
json.loads(decryptedValue)
|
||||
return decryptedValue
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Invalid JSON in secret value: {e}")
|
||||
|
||||
|
|
@ -284,12 +284,12 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
|
|||
# Structure: {user_id: {key_name: [timestamps]}}
|
||||
_decryption_attempts = {}
|
||||
|
||||
def _get_master_key(env_type: str = None) -> bytes:
|
||||
def _getMasterKey(envType: str = None) -> bytes:
|
||||
"""
|
||||
Get the master key for the specified environment.
|
||||
|
||||
Args:
|
||||
env_type: The environment type (dev, int, prod, etc.). If None, uses current config.
|
||||
envType: The environment type (dev, int, prod, etc.). If None, uses current config.
|
||||
|
||||
Returns:
|
||||
bytes: The master key for encryption/decryption
|
||||
|
|
@ -298,24 +298,24 @@ def _get_master_key(env_type: str = None) -> bytes:
|
|||
ValueError: If no master key is found
|
||||
"""
|
||||
# Get the key location from config
|
||||
key_location = APP_CONFIG.get('APP_KEY_SYSVAR')
|
||||
if env_type is None:
|
||||
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
||||
keyLocation = APP_CONFIG.get('APP_KEY_SYSVAR')
|
||||
if envType is None:
|
||||
envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
||||
|
||||
if not key_location:
|
||||
if not keyLocation:
|
||||
raise ValueError("APP_KEY_SYSVAR not configured")
|
||||
|
||||
# First try to get from environment variable
|
||||
master_key = os.environ.get(key_location)
|
||||
masterKey = os.environ.get(keyLocation)
|
||||
|
||||
if master_key:
|
||||
if masterKey:
|
||||
# If found in environment, use it directly
|
||||
return master_key.encode('utf-8')
|
||||
return masterKey.encode('utf-8')
|
||||
|
||||
# If not in environment, try to read from file
|
||||
if os.path.exists(key_location):
|
||||
if os.path.exists(keyLocation):
|
||||
try:
|
||||
with open(key_location, 'r') as f:
|
||||
with open(keyLocation, 'r') as f:
|
||||
content = f.read().strip()
|
||||
|
||||
# Parse the key file format: env = key
|
||||
|
|
@ -326,26 +326,26 @@ def _get_master_key(env_type: str = None) -> bytes:
|
|||
continue
|
||||
|
||||
if '=' in line:
|
||||
key_env, key_value = line.split('=', 1)
|
||||
key_env = key_env.strip()
|
||||
key_value = key_value.strip()
|
||||
keyEnv, keyValue = line.split('=', 1)
|
||||
keyEnv = keyEnv.strip()
|
||||
keyValue = keyValue.strip()
|
||||
|
||||
if key_env == env_type:
|
||||
return key_value.encode('utf-8')
|
||||
if keyEnv == envType:
|
||||
return keyValue.encode('utf-8')
|
||||
|
||||
raise ValueError(f"No key found for environment '{env_type}' in {key_location}")
|
||||
raise ValueError(f"No key found for environment '{envType}' in {keyLocation}")
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error reading key file {key_location}: {e}")
|
||||
raise ValueError(f"Error reading key file {keyLocation}: {e}")
|
||||
|
||||
raise ValueError(f"Master key not found. Checked environment variable '{key_location}' and file path")
|
||||
raise ValueError(f"Master key not found. Checked environment variable '{keyLocation}' and file path")
|
||||
|
||||
def _derive_encryption_key(master_key: bytes) -> bytes:
|
||||
def _deriveEncryptionKey(masterKey: bytes) -> bytes:
|
||||
"""
|
||||
Derive a 32-byte encryption key from the master key using PBKDF2.
|
||||
|
||||
Args:
|
||||
master_key: The master key bytes
|
||||
masterKey: The master key bytes
|
||||
|
||||
Returns:
|
||||
bytes: 32-byte derived key suitable for Fernet
|
||||
|
|
@ -360,9 +360,9 @@ def _derive_encryption_key(master_key: bytes) -> bytes:
|
|||
iterations=100000,
|
||||
)
|
||||
|
||||
return base64.urlsafe_b64encode(kdf.derive(master_key))
|
||||
return base64.urlsafe_b64encode(kdf.derive(masterKey))
|
||||
|
||||
def _is_encrypted_value(value: str) -> bool:
|
||||
def _isEncryptedValue(value: str) -> bool:
|
||||
"""
|
||||
Check if a value is encrypted (starts with environment-specific prefix).
|
||||
|
||||
|
|
@ -382,64 +382,64 @@ def _is_encrypted_value(value: str) -> bool:
|
|||
value.startswith('TEST_ENC:') or
|
||||
value.startswith('STAGING_ENC:'))
|
||||
|
||||
def _get_encryption_prefix(env_type: str) -> str:
|
||||
def _getEncryptionPrefix(envType: str) -> str:
|
||||
"""
|
||||
Get the encryption prefix for the given environment type.
|
||||
|
||||
Args:
|
||||
env_type: The environment type (dev, int, prod, etc.)
|
||||
envType: The environment type (dev, int, prod, etc.)
|
||||
|
||||
Returns:
|
||||
str: The encryption prefix
|
||||
"""
|
||||
return f"{env_type.upper()}_ENC:"
|
||||
return f"{envType.upper()}_ENC:"
|
||||
|
||||
def _check_decryption_rate_limit(user_id: str, key_name: str, max_per_second: int = 10) -> bool:
|
||||
def _checkDecryptionRateLimit(userId: str, keyName: str, maxPerSecond: int = 10) -> bool:
|
||||
"""
|
||||
Check if decryption is allowed based on rate limiting (max 10 per second per user per key).
|
||||
|
||||
Args:
|
||||
user_id: The user ID making the request
|
||||
key_name: The name of the key being decrypted
|
||||
max_per_second: Maximum decryptions per second (default: 10)
|
||||
userId: The user ID making the request
|
||||
keyName: The name of the key being decrypted
|
||||
maxPerSecond: Maximum decryptions per second (default: 10)
|
||||
|
||||
Returns:
|
||||
bool: True if allowed, False if rate limited
|
||||
"""
|
||||
current_time = time.time()
|
||||
currentTime = time.time()
|
||||
|
||||
# Initialize tracking for this user if not exists
|
||||
if user_id not in _decryption_attempts:
|
||||
_decryption_attempts[user_id] = {}
|
||||
if userId not in _decryption_attempts:
|
||||
_decryption_attempts[userId] = {}
|
||||
|
||||
# Initialize tracking for this key if not exists
|
||||
if key_name not in _decryption_attempts[user_id]:
|
||||
_decryption_attempts[user_id][key_name] = []
|
||||
if keyName not in _decryption_attempts[userId]:
|
||||
_decryption_attempts[userId][keyName] = []
|
||||
|
||||
# Clean old attempts (older than 1 second)
|
||||
_decryption_attempts[user_id][key_name] = [
|
||||
timestamp for timestamp in _decryption_attempts[user_id][key_name]
|
||||
if current_time - timestamp < 1.0
|
||||
_decryption_attempts[userId][keyName] = [
|
||||
timestamp for timestamp in _decryption_attempts[userId][keyName]
|
||||
if currentTime - timestamp < 1.0
|
||||
]
|
||||
|
||||
# Check if we're within rate limit
|
||||
if len(_decryption_attempts[user_id][key_name]) >= max_per_second:
|
||||
logger.warning(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' ({max_per_second}/sec)")
|
||||
if len(_decryption_attempts[userId][keyName]) >= maxPerSecond:
|
||||
logger.warning(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' ({maxPerSecond}/sec)")
|
||||
return False
|
||||
|
||||
# Record this attempt
|
||||
_decryption_attempts[user_id][key_name].append(current_time)
|
||||
_decryption_attempts[userId][keyName].append(currentTime)
|
||||
return True
|
||||
|
||||
def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key_name: str = "unknown") -> str:
|
||||
def encryptValue(value: str, envType: str = None, userId: str = "system", keyName: str = "unknown") -> str:
|
||||
"""
|
||||
Encrypt a value using the master key for the specified environment.
|
||||
|
||||
Args:
|
||||
value: The plain text value to encrypt
|
||||
env_type: The environment type (dev, int, prod). If None, uses current environment.
|
||||
user_id: The user ID making the request (default: "system")
|
||||
key_name: The name of the key being encrypted (default: "unknown")
|
||||
envType: The environment type (dev, int, prod). If None, uses current environment.
|
||||
userId: The user ID making the request (default: "system")
|
||||
keyName: The name of the key being encrypted (default: "unknown")
|
||||
|
||||
Returns:
|
||||
str: The encrypted value with prefix
|
||||
|
|
@ -447,48 +447,48 @@ def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key
|
|||
Raises:
|
||||
ValueError: If encryption fails
|
||||
"""
|
||||
if env_type is None:
|
||||
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
||||
if envType is None:
|
||||
envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
||||
|
||||
try:
|
||||
master_key = _get_master_key(env_type)
|
||||
derived_key = _derive_encryption_key(master_key)
|
||||
fernet = Fernet(derived_key)
|
||||
masterKey = _getMasterKey(envType)
|
||||
derivedKey = _deriveEncryptionKey(masterKey)
|
||||
fernet = Fernet(derivedKey)
|
||||
|
||||
# Encrypt the value
|
||||
encrypted_bytes = fernet.encrypt(value.encode('utf-8'))
|
||||
encrypted_b64 = base64.urlsafe_b64encode(encrypted_bytes).decode('utf-8')
|
||||
encryptedBytes = fernet.encrypt(value.encode('utf-8'))
|
||||
encryptedB64 = base64.urlsafe_b64encode(encryptedBytes).decode('utf-8')
|
||||
|
||||
# Add environment prefix
|
||||
prefix = _get_encryption_prefix(env_type)
|
||||
encrypted_value = f"{prefix}{encrypted_b64}"
|
||||
prefix = _getEncryptionPrefix(envType)
|
||||
encryptedValue = f"{prefix}{encryptedB64}"
|
||||
|
||||
# Log audit event for encryption
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_key_access(
|
||||
user_id=user_id,
|
||||
mandate_id="system",
|
||||
key_name=key_name,
|
||||
audit_logger.logKeyAccess(
|
||||
userId=userId,
|
||||
mandateId="system",
|
||||
keyName=keyName,
|
||||
action="encrypt"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
return encrypted_value
|
||||
return encryptedValue
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Encryption failed: {e}")
|
||||
|
||||
def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = "unknown") -> str:
|
||||
def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "unknown") -> str:
|
||||
"""
|
||||
Decrypt a value using the master key for the current environment.
|
||||
|
||||
Args:
|
||||
encrypted_value: The encrypted value with prefix
|
||||
user_id: The user ID making the request (default: "system")
|
||||
key_name: The name of the key being decrypted (default: "unknown")
|
||||
encryptedValue: The encrypted value with prefix
|
||||
userId: The user ID making the request (default: "system")
|
||||
keyName: The name of the key being decrypted (default: "unknown")
|
||||
|
||||
Returns:
|
||||
str: The decrypted plain text value
|
||||
|
|
@ -496,59 +496,59 @@ def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str =
|
|||
Raises:
|
||||
ValueError: If decryption fails
|
||||
"""
|
||||
if not _is_encrypted_value(encrypted_value):
|
||||
return encrypted_value # Return as-is if not encrypted
|
||||
if not _isEncryptedValue(encryptedValue):
|
||||
return encryptedValue # Return as-is if not encrypted
|
||||
|
||||
# Check rate limiting (10 per second per user per key)
|
||||
if not _check_decryption_rate_limit(user_id, key_name, max_per_second=10):
|
||||
raise ValueError(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' (10/sec)")
|
||||
if not _checkDecryptionRateLimit(userId, keyName, maxPerSecond=10):
|
||||
raise ValueError(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' (10/sec)")
|
||||
|
||||
try:
|
||||
# Extract environment type from prefix
|
||||
if encrypted_value.startswith('DEV_ENC:'):
|
||||
env_type = 'dev'
|
||||
if encryptedValue.startswith('DEV_ENC:'):
|
||||
envType = 'dev'
|
||||
prefix = 'DEV_ENC:'
|
||||
elif encrypted_value.startswith('INT_ENC:'):
|
||||
env_type = 'int'
|
||||
elif encryptedValue.startswith('INT_ENC:'):
|
||||
envType = 'int'
|
||||
prefix = 'INT_ENC:'
|
||||
elif encrypted_value.startswith('PROD_ENC:'):
|
||||
env_type = 'prod'
|
||||
elif encryptedValue.startswith('PROD_ENC:'):
|
||||
envType = 'prod'
|
||||
prefix = 'PROD_ENC:'
|
||||
elif encrypted_value.startswith('TEST_ENC:'):
|
||||
env_type = 'test'
|
||||
elif encryptedValue.startswith('TEST_ENC:'):
|
||||
envType = 'test'
|
||||
prefix = 'TEST_ENC:'
|
||||
elif encrypted_value.startswith('STAGING_ENC:'):
|
||||
env_type = 'staging'
|
||||
elif encryptedValue.startswith('STAGING_ENC:'):
|
||||
envType = 'staging'
|
||||
prefix = 'STAGING_ENC:'
|
||||
else:
|
||||
raise ValueError(f"Invalid encryption prefix. Expected DEV_ENC:, INT_ENC:, PROD_ENC:, TEST_ENC:, or STAGING_ENC:")
|
||||
|
||||
encrypted_part = encrypted_value[len(prefix):]
|
||||
encryptedPart = encryptedValue[len(prefix):]
|
||||
|
||||
# Get master key for the specific environment and derive encryption key
|
||||
master_key = _get_master_key(env_type)
|
||||
derived_key = _derive_encryption_key(master_key)
|
||||
fernet = Fernet(derived_key)
|
||||
masterKey = _getMasterKey(envType)
|
||||
derivedKey = _deriveEncryptionKey(masterKey)
|
||||
fernet = Fernet(derivedKey)
|
||||
|
||||
# Decode and decrypt
|
||||
encrypted_bytes = base64.urlsafe_b64decode(encrypted_part.encode('utf-8'))
|
||||
decrypted_bytes = fernet.decrypt(encrypted_bytes)
|
||||
decrypted_value = decrypted_bytes.decode('utf-8')
|
||||
encryptedBytes = base64.urlsafe_b64decode(encryptedPart.encode('utf-8'))
|
||||
decryptedBytes = fernet.decrypt(encryptedBytes)
|
||||
decryptedValue = decryptedBytes.decode('utf-8')
|
||||
|
||||
# Log audit event for decryption
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_key_access(
|
||||
user_id=user_id,
|
||||
mandate_id="system",
|
||||
key_name=key_name,
|
||||
audit_logger.logKeyAccess(
|
||||
userId=userId,
|
||||
mandateId="system",
|
||||
keyName=keyName,
|
||||
action="decrypt"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
return decrypted_value
|
||||
return decryptedValue
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Decryption failed: {e}")
|
||||
|
|
|
|||
|
|
@ -120,8 +120,8 @@ def debugLogToFile(message: str, context: str = "DEBUG") -> None:
|
|||
debug_file = os.path.join(debug_dir, "debug_workflow.log")
|
||||
|
||||
# Format the debug entry
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
timestamp = get_utc_timestamp()
|
||||
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||
timestamp = getUtcTimestamp()
|
||||
debug_entry = f"[{timestamp}] [{context}] {message}\n"
|
||||
|
||||
# Write to debug file
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ def parseJsonOrRaise(text: Union[str, bytes]) -> Union[Dict, List]:
|
|||
return obj
|
||||
|
||||
|
||||
def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
||||
def mergeRootLists(jsonParts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generic merger for root-level lists: take first dict as base; for each subsequent part:
|
||||
- if value is list and same key exists as list, extend it
|
||||
|
|
@ -112,7 +112,7 @@ def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
|||
"""
|
||||
base: Optional[Dict[str, Any]] = None
|
||||
parsed: List[Dict[str, Any]] = []
|
||||
for part in json_parts:
|
||||
for part in jsonParts:
|
||||
if isinstance(part, (dict, list)):
|
||||
obj = part
|
||||
else:
|
||||
|
|
@ -146,61 +146,61 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
|||
|
||||
# Strategy 1: Try to extract sections from the entire text first
|
||||
# This handles cases where the JSON structure is broken but content is intact
|
||||
extracted_sections = _extractSectionsRegex(text)
|
||||
if extracted_sections:
|
||||
logger.info(f"Extracted {len(extracted_sections)} sections using regex")
|
||||
extractedSections = _extractSectionsRegex(text)
|
||||
if extractedSections:
|
||||
logger.info(f"Extracted {len(extractedSections)} sections using regex")
|
||||
return {
|
||||
"metadata": {
|
||||
"split_strategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": [{"sections": extracted_sections}]
|
||||
"documents": [{"sections": extractedSections}]
|
||||
}
|
||||
|
||||
# Strategy 2: Progressive parsing - try to find longest valid prefix
|
||||
best_result = None
|
||||
best_valid_length = 0
|
||||
bestResult = None
|
||||
bestValidLength = 0
|
||||
|
||||
# Try different step sizes to find the best valid JSON
|
||||
for step_size in [100, 50, 10, 1]:
|
||||
for i in range(len(text), 0, -step_size):
|
||||
test_str = text[:i]
|
||||
closed_str = _closeJsonStructures(test_str)
|
||||
obj, err, _ = tryParseJson(closed_str)
|
||||
for stepSize in [100, 50, 10, 1]:
|
||||
for i in range(len(text), 0, -stepSize):
|
||||
testStr = text[:i]
|
||||
closedStr = _closeJsonStructures(testStr)
|
||||
obj, err, _ = tryParseJson(closedStr)
|
||||
if err is None and isinstance(obj, dict):
|
||||
best_result = obj
|
||||
best_valid_length = i
|
||||
logger.debug(f"Progressive parsing success at length {i} (step: {step_size})")
|
||||
bestResult = obj
|
||||
bestValidLength = i
|
||||
logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
|
||||
break
|
||||
if best_result:
|
||||
if bestResult:
|
||||
break
|
||||
|
||||
if best_result:
|
||||
logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})")
|
||||
if bestResult:
|
||||
logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
|
||||
|
||||
# Check if we have sections in the result
|
||||
sections = extractSectionsFromDocument(best_result)
|
||||
sections = extractSectionsFromDocument(bestResult)
|
||||
if sections:
|
||||
logger.info(f"Progressive parsing found {len(sections)} sections")
|
||||
return best_result
|
||||
return bestResult
|
||||
else:
|
||||
# No sections found in progressive parsing, try to extract from broken part
|
||||
logger.info("Progressive parsing found no sections, trying to extract from broken part")
|
||||
extracted_sections = _extractSectionsRegex(text[best_valid_length:])
|
||||
if extracted_sections:
|
||||
logger.info(f"Extracted {len(extracted_sections)} sections from broken part")
|
||||
extractedSections = _extractSectionsRegex(text[bestValidLength:])
|
||||
if extractedSections:
|
||||
logger.info(f"Extracted {len(extractedSections)} sections from broken part")
|
||||
# Merge with the valid part
|
||||
if "documents" not in best_result:
|
||||
best_result["documents"] = []
|
||||
if not best_result["documents"]:
|
||||
best_result["documents"] = [{"sections": []}]
|
||||
best_result["documents"][0]["sections"].extend(extracted_sections)
|
||||
return best_result
|
||||
if "documents" not in bestResult:
|
||||
bestResult["documents"] = []
|
||||
if not bestResult["documents"]:
|
||||
bestResult["documents"] = [{"sections": []}]
|
||||
bestResult["documents"][0]["sections"].extend(extractedSections)
|
||||
return bestResult
|
||||
|
||||
# Strategy 3: Structure closing - close incomplete structures
|
||||
closed_str = _closeJsonStructures(text)
|
||||
obj, err, _ = tryParseJson(closed_str)
|
||||
closedStr = _closeJsonStructures(text)
|
||||
obj, err, _ = tryParseJson(closedStr)
|
||||
if err is None and isinstance(obj, dict):
|
||||
logger.info("Repaired JSON using structure closing")
|
||||
return obj
|
||||
|
|
@ -217,16 +217,16 @@ def _closeJsonStructures(text: str) -> str:
|
|||
return text
|
||||
|
||||
# Count open/close brackets and braces
|
||||
open_braces = text.count('{')
|
||||
close_braces = text.count('}')
|
||||
open_brackets = text.count('[')
|
||||
close_brackets = text.count(']')
|
||||
openBraces = text.count('{')
|
||||
closeBraces = text.count('}')
|
||||
openBrackets = text.count('[')
|
||||
closeBrackets = text.count(']')
|
||||
|
||||
# Close incomplete structures
|
||||
result = text
|
||||
for _ in range(open_braces - close_braces):
|
||||
for _ in range(openBraces - closeBraces):
|
||||
result += '}'
|
||||
for _ in range(open_brackets - close_brackets):
|
||||
for _ in range(openBrackets - closeBrackets):
|
||||
result += ']'
|
||||
|
||||
return result
|
||||
|
|
@ -242,32 +242,32 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
|||
sections = []
|
||||
|
||||
# Pattern to find section objects
|
||||
section_pattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
|
||||
sectionPattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
|
||||
|
||||
for match in re.finditer(section_pattern, text, re.IGNORECASE):
|
||||
section_id = match.group(1)
|
||||
content_type = match.group(2)
|
||||
for match in re.finditer(sectionPattern, text, re.IGNORECASE):
|
||||
sectionId = match.group(1)
|
||||
contentType = match.group(2)
|
||||
order = int(match.group(3))
|
||||
|
||||
# Try to extract elements array - look for the elements array after this section
|
||||
elements_match = re.search(
|
||||
elementsMatch = re.search(
|
||||
r'"elements"\s*:\s*\[(.*?)\]',
|
||||
text[match.end():match.end()+5000] # Look ahead for elements (large range)
|
||||
)
|
||||
|
||||
elements = []
|
||||
if elements_match:
|
||||
if elementsMatch:
|
||||
try:
|
||||
elements_str = '[' + elements_match.group(1) + ']'
|
||||
elements = json.loads(elements_str)
|
||||
elementsStr = '[' + elementsMatch.group(1) + ']'
|
||||
elements = json.loads(elementsStr)
|
||||
except:
|
||||
# If JSON parsing fails, try to extract individual items manually
|
||||
elements_text = elements_match.group(1)
|
||||
elements = _extractElementsFromText(elements_text, content_type)
|
||||
elementsText = elementsMatch.group(1)
|
||||
elements = _extractElementsFromText(elementsText, contentType)
|
||||
|
||||
sections.append({
|
||||
"id": section_id,
|
||||
"content_type": content_type,
|
||||
"id": sectionId,
|
||||
"content_type": contentType,
|
||||
"elements": elements,
|
||||
"order": order
|
||||
})
|
||||
|
|
@ -279,7 +279,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
|||
return sections
|
||||
|
||||
|
||||
def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict[str, Any]]:
|
||||
def _extractElementsFromText(elementsText: str, contentType: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract elements from text when JSON parsing fails.
|
||||
Generic approach that works for any content type.
|
||||
|
|
@ -290,11 +290,11 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
|
|||
|
||||
elements = []
|
||||
|
||||
if content_type == "list":
|
||||
if contentType == "list":
|
||||
# Look for {"text": "..."} patterns, including incomplete ones
|
||||
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
|
||||
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
|
||||
# Also look for incomplete patterns like {"text": "36
|
||||
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||
|
||||
# Combine both complete and incomplete items
|
||||
all_items = text_items + incomplete_items
|
||||
|
|
@ -303,41 +303,41 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
|
|||
|
||||
# Remove the last item if it appears to be incomplete/corrupted
|
||||
if unique_items:
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||
|
||||
elements = [{"text": item} for item in unique_items]
|
||||
|
||||
elif content_type == "paragraph":
|
||||
elif contentType == "paragraph":
|
||||
# Look for {"text": "..."} patterns, including incomplete ones
|
||||
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
|
||||
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
|
||||
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||
|
||||
all_items = text_items + incomplete_items
|
||||
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
|
||||
|
||||
# Remove the last item if it appears to be incomplete/corrupted
|
||||
if unique_items:
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||
|
||||
elements = [{"text": item} for item in unique_items]
|
||||
|
||||
elif content_type == "heading":
|
||||
elif contentType == "heading":
|
||||
# Look for {"level": X, "text": "..."} patterns, including incomplete ones
|
||||
heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elements_text)
|
||||
incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||
heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elementsText)
|
||||
incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||
|
||||
all_items = heading_items + incomplete_heading_items
|
||||
unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()]))
|
||||
|
||||
# Remove the last item if it appears to be incomplete/corrupted
|
||||
if unique_items:
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||
|
||||
elements = [{"level": level, "text": text} for level, text in unique_items]
|
||||
|
||||
elif content_type == "table":
|
||||
elif contentType == "table":
|
||||
# Look for table patterns
|
||||
table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elements_text)
|
||||
table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elementsText)
|
||||
for headers_str, rows_str, caption in table_items:
|
||||
# Extract headers
|
||||
headers = re.findall(r'"([^"]+)"', headers_str)
|
||||
|
|
@ -354,31 +354,31 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
|
|||
"caption": caption
|
||||
})
|
||||
|
||||
elif content_type == "code":
|
||||
elif contentType == "code":
|
||||
# Look for {"code": "...", "language": "..."} patterns, including incomplete ones
|
||||
code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elements_text)
|
||||
incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||
code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elementsText)
|
||||
incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||
|
||||
all_items = code_items + [(code, "unknown") for code in incomplete_code_items]
|
||||
unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()]))
|
||||
|
||||
# Remove the last item if it appears to be incomplete/corrupted
|
||||
if unique_items:
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||
|
||||
elements = [{"code": code, "language": lang} for code, lang in unique_items]
|
||||
|
||||
else:
|
||||
# Generic fallback - look for any text content, including incomplete
|
||||
text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elements_text)
|
||||
incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
||||
text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elementsText)
|
||||
incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||
|
||||
all_items = text_items + incomplete_text_items
|
||||
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
|
||||
|
||||
# Remove the last item if it appears to be incomplete/corrupted
|
||||
if unique_items:
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
||||
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||
|
||||
elements = [{"text": item} for item in unique_items]
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ Ensures all timestamps are properly handled as UTC.
|
|||
from datetime import datetime, timezone
|
||||
import time
|
||||
|
||||
def get_utc_now() -> datetime:
|
||||
def getUtcNow() -> datetime:
|
||||
"""
|
||||
Get current time in UTC with timezone info.
|
||||
|
||||
|
|
@ -15,7 +15,7 @@ def get_utc_now() -> datetime:
|
|||
"""
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
def get_utc_timestamp() -> float:
|
||||
def getUtcTimestamp() -> float:
|
||||
"""
|
||||
Get current UTC timestamp (seconds since epoch with millisecond precision).
|
||||
|
||||
|
|
@ -24,14 +24,14 @@ def get_utc_timestamp() -> float:
|
|||
"""
|
||||
return time.time()
|
||||
|
||||
def create_expiration_timestamp(expires_in_seconds: int) -> float:
|
||||
def createExpirationTimestamp(expiresInSeconds: int) -> float:
|
||||
"""
|
||||
Create a new expiration timestamp from seconds until expiration.
|
||||
|
||||
Args:
|
||||
expires_in_seconds (int): Seconds until expiration
|
||||
expiresInSeconds (int): Seconds until expiration
|
||||
|
||||
Returns:
|
||||
float: UTC timestamp in seconds
|
||||
"""
|
||||
return get_utc_timestamp() + expires_in_seconds
|
||||
return getUtcTimestamp() + expiresInSeconds
|
||||
|
|
@ -22,13 +22,11 @@ class AdaptiveLearningEngine:
|
|||
workflowId: str, attemptNumber: int):
|
||||
"""Record validation result and learn from it"""
|
||||
try:
|
||||
actionType = actionContext.get('actionType', 'unknown')
|
||||
actionName = actionContext.get('actionName', 'unknown')
|
||||
|
||||
# Store validation history
|
||||
validationEntry = {
|
||||
'workflowId': workflowId,
|
||||
'actionType': actionType,
|
||||
'actionName': actionName,
|
||||
'attemptNumber': attemptNumber,
|
||||
'validationResult': validationResult,
|
||||
|
|
@ -42,17 +40,17 @@ class AdaptiveLearningEngine:
|
|||
|
||||
# Track patterns
|
||||
if validationResult.get('overallSuccess', False):
|
||||
self.successPatterns[actionType].append(validationEntry)
|
||||
self.successPatterns[actionName].append(validationEntry)
|
||||
else:
|
||||
self.failurePatterns[actionType].append(validationEntry)
|
||||
self.failurePatterns[actionName].append(validationEntry)
|
||||
|
||||
# Update attempt count
|
||||
self.actionAttempts[f"{workflowId}:{actionType}"] += 1
|
||||
self.actionAttempts[f"{workflowId}:{actionName}"] += 1
|
||||
|
||||
# Generate learning insights
|
||||
self._generateLearningInsights(workflowId, actionType)
|
||||
self._generateLearningInsights(workflowId, actionName)
|
||||
|
||||
logger.info(f"Recorded validation for {actionType} (attempt {attemptNumber}): "
|
||||
logger.info(f"Recorded validation for {actionName} (attempt {attemptNumber}): "
|
||||
f"Success={validationResult.get('overallSuccess', False)}, "
|
||||
f"Quality={validationResult.get('qualityScore', 0.0)}")
|
||||
|
||||
|
|
@ -86,21 +84,21 @@ class AdaptiveLearningEngine:
|
|||
logger.error(f"Error generating adaptive context: {str(e)}")
|
||||
return {}
|
||||
|
||||
def getAdaptiveContextForParameters(self, workflowId: str, actionType: str,
|
||||
def getAdaptiveContextForParameters(self, workflowId: str, actionName: str,
|
||||
parametersContext: str) -> Dict[str, Any]:
|
||||
"""Generate adaptive context for parameter selection prompt"""
|
||||
try:
|
||||
# Get validation history for this specific action type
|
||||
# Get validation history for this specific action name
|
||||
actionValidations = [
|
||||
v for v in self.validationHistory
|
||||
if v['workflowId'] == workflowId and v['actionType'] == actionType
|
||||
if v['workflowId'] == workflowId and v['actionName'] == actionName
|
||||
][-3:] # Last 3 attempts for this action
|
||||
|
||||
# Analyze what went wrong in previous attempts
|
||||
failureAnalysis = self._analyzeParameterFailures(actionValidations)
|
||||
|
||||
# Generate specific parameter guidance
|
||||
parameterGuidance = self._generateParameterGuidance(actionType, parametersContext, failureAnalysis)
|
||||
parameterGuidance = self._generateParameterGuidance(actionName, parametersContext, failureAnalysis)
|
||||
|
||||
return {
|
||||
'actionValidations': actionValidations,
|
||||
|
|
@ -206,36 +204,28 @@ class AdaptiveLearningEngine:
|
|||
|
||||
return "\n".join(guidance_parts) if guidance_parts else "No specific guidance available."
|
||||
|
||||
def _generateParameterGuidance(self, actionType: str, parametersContext: str,
|
||||
def _generateParameterGuidance(self, actionName: str, parametersContext: str,
|
||||
failureAnalysis: Dict[str, Any]) -> str:
|
||||
"""Generate specific parameter guidance based on previous failures"""
|
||||
"""Generate generic parameter guidance based on previous failures (no app-specific logic)."""
|
||||
if not failureAnalysis.get('hasFailures', False):
|
||||
return "No previous parameter failures. Use standard parameter values."
|
||||
|
||||
guidance_parts = []
|
||||
guidanceParts = []
|
||||
|
||||
# Add attempt awareness
|
||||
# Attempt awareness
|
||||
attemptNumber = failureAnalysis.get('attemptNumber', 1)
|
||||
if attemptNumber >= 3:
|
||||
guidance_parts.append(f"ATTEMPT #{attemptNumber}: Previous attempts failed. Adjust parameters based on validation feedback.")
|
||||
if attemptNumber and attemptNumber >= 3:
|
||||
guidanceParts.append(f"Attempt #{attemptNumber}: Adjust parameters based on validation feedback.")
|
||||
|
||||
# Add specific parameter guidance based on action type
|
||||
if actionType == "outlook.composeAndSendEmailWithContext":
|
||||
guidance_parts.append("EMAIL PARAMETER GUIDANCE:")
|
||||
guidance_parts.append("- context: Be very specific about account (valueon), appointment time (Friday), and requirements")
|
||||
guidance_parts.append("- emailStyle: Use 'formal' for business emails")
|
||||
guidance_parts.append("- maxLength: Set to 2000+ for detailed emails with summaries")
|
||||
|
||||
# Add specific guidance based on common failures
|
||||
commonIssues = failureAnalysis.get('commonIssues', {})
|
||||
if any("account" in str(issue).lower() for issue in commonIssues.keys()):
|
||||
guidance_parts.append("- context: MUST specify 'from valueon account' explicitly")
|
||||
if any("attachment" in str(issue).lower() for issue in commonIssues.keys()):
|
||||
guidance_parts.append("- documentList: Ensure PDF is properly referenced")
|
||||
if any("summary" in str(issue).lower() for issue in commonIssues.keys()):
|
||||
guidance_parts.append("- context: MUST request '10-12 sentence German summary' explicitly")
|
||||
# Generic issues summary
|
||||
commonIssues = failureAnalysis.get('commonIssues', {}) or {}
|
||||
if commonIssues:
|
||||
guidanceParts.append("Address the following parameter issues:")
|
||||
for issueKey, issueDesc in commonIssues.items():
|
||||
guidanceParts.append(f"- {issueKey}: {issueDesc}")
|
||||
|
||||
return "\n".join(guidance_parts) if guidance_parts else "Use standard parameter values."
|
||||
# Keep guidance format stable
|
||||
return "\n".join(guidanceParts) if guidanceParts else "Use standard parameter values."
|
||||
|
||||
def _getEscalationLevel(self, workflowId: str) -> str:
|
||||
"""Determine escalation level based on failure patterns"""
|
||||
|
|
@ -251,7 +241,7 @@ class AdaptiveLearningEngine:
|
|||
else:
|
||||
return "low"
|
||||
|
||||
def _generateLearningInsights(self, workflowId: str, actionType: str):
|
||||
def _generateLearningInsights(self, workflowId: str, actionName: str):
|
||||
"""Generate learning insights for a workflow"""
|
||||
if workflowId not in self.learningInsights:
|
||||
self.learningInsights[workflowId] = {}
|
||||
|
|
@ -263,7 +253,7 @@ class AdaptiveLearningEngine:
|
|||
'totalAttempts': len(workflowValidations),
|
||||
'successfulAttempts': len([v for v in workflowValidations if v['success']]),
|
||||
'failedAttempts': len([v for v in workflowValidations if not v['success']]),
|
||||
'lastActionType': actionType,
|
||||
'lastActionName': actionName,
|
||||
'escalationLevel': self._getEscalationLevel(workflowId)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,14 +26,14 @@ class ContentValidator:
|
|||
if isinstance(data, dict) and 'content' in data:
|
||||
content = data['content']
|
||||
# For large content, check size before converting to string
|
||||
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
|
||||
if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
|
||||
# For very large content, return a size indicator instead
|
||||
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
|
||||
return str(content)
|
||||
else:
|
||||
content = data
|
||||
# For large content, check size before converting to string
|
||||
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
|
||||
if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
|
||||
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
|
||||
return str(content)
|
||||
return ""
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ class IntentAnalyzer:
|
|||
analysisPrompt = f"""
|
||||
You are an intent analyzer. Analyze the user's request to understand what they want delivered.
|
||||
|
||||
USER REQUEST: {self.services.ai.sanitizePromptContent(userPrompt, 'userinput')}
|
||||
USER REQUEST: {self.services.utils.sanitizePromptContent(userPrompt, 'userinput')}
|
||||
|
||||
CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}
|
||||
|
||||
|
|
|
|||
|
|
@ -571,7 +571,7 @@ class ActionplanMode(BaseMode):
|
|||
actionData["execParameters"] = {}
|
||||
|
||||
# Use generic field separation based on ActionItem model
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
|
||||
|
||||
# Create action in database
|
||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||
|
|
@ -715,7 +715,7 @@ class ActionplanMode(BaseMode):
|
|||
actionData["execParameters"] = {}
|
||||
|
||||
# Use generic field separation based on ActionItem model
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
|
||||
|
||||
# Create action in database
|
||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||
|
|
|
|||
|
|
@ -98,7 +98,12 @@ class ReactMode(BaseMode):
|
|||
|
||||
# NEW: Add content validation (against original cleaned user prompt / workflow intent)
|
||||
if getattr(self, 'workflowIntent', None) and result.documents:
|
||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent)
|
||||
# Validate ONLY the produced JSON (structured content), not rendered files
|
||||
from types import SimpleNamespace
|
||||
validationDocs = []
|
||||
if hasattr(result, 'content') and result.content:
|
||||
validationDocs.append(SimpleNamespace(documentName='generated.json', documentData={'content': result.content}))
|
||||
validationResult = await self.contentValidator.validateContent(validationDocs, self.workflowIntent)
|
||||
observation['contentValidation'] = validationResult
|
||||
quality_score = validationResult.get('qualityScore', 0.0)
|
||||
if quality_score is None:
|
||||
|
|
@ -106,9 +111,9 @@ class ReactMode(BaseMode):
|
|||
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})")
|
||||
|
||||
# NEW: Record validation result for adaptive learning
|
||||
actionValue = selection.get('action', 'unknown')
|
||||
actionContext = {
|
||||
'actionType': selection.get('action', {}).get('action', 'unknown'),
|
||||
'actionName': selection.get('action', {}).get('action', 'unknown'),
|
||||
'actionName': actionValue,
|
||||
'workflowId': context.workflow_id
|
||||
}
|
||||
|
||||
|
|
@ -747,7 +752,7 @@ Return only the user-friendly message, no technical details."""
|
|||
actionData["execParameters"] = {}
|
||||
|
||||
# Use generic field separation based on ActionItem model
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
|
||||
|
||||
# Create action in database
|
||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||
|
|
@ -838,7 +843,7 @@ Return only the user-friendly message, no technical details."""
|
|||
actionData["execParameters"] = {}
|
||||
|
||||
# Use generic field separation based on ActionItem model
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
|
||||
|
||||
# Create action in database
|
||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ class WorkflowManager:
|
|||
" }\n"
|
||||
" ]\n"
|
||||
"}\n\n"
|
||||
f"User message:\n{self.services.ai.sanitizePromptContent(userInput.prompt, 'userinput')}"
|
||||
f"User message:\n{self.services.utils.sanitizePromptContent(userInput.prompt, 'userinput')}"
|
||||
)
|
||||
|
||||
# Call AI analyzer (planning call - will use static parameters)
|
||||
|
|
|
|||
107
naming_violations_report.csv
Normal file
107
naming_violations_report.csv
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
Module,Function Names,Parameter Names,Variable Names,Total
|
||||
modules/workflows/methods/methodSharepoint.py,0,2,211,213
|
||||
modules/workflows/methods/methodOutlook.py,0,3,131,134
|
||||
modules/services/serviceAi/subDocumentProcessing.py,0,0,104,104
|
||||
modules/features/syncDelta/mainSyncDelta.py,1,10,88,99
|
||||
modules/shared/jsonUtils.py,0,3,88,91
|
||||
modules/services/serviceGeneration/renderers/rendererDocx.py,3,8,79,90
|
||||
modules/services/serviceWorkflow/mainServiceWorkflow.py,0,3,85,88
|
||||
modules/services/serviceGeneration/renderers/rendererPptx.py,2,7,73,82
|
||||
modules/services/serviceGeneration/renderers/rendererPdf.py,3,8,50,61
|
||||
modules/connectors/connectorVoiceGoogle.py,1,2,52,55
|
||||
modules/services/serviceGeneration/renderers/rendererHtml.py,3,6,46,55
|
||||
modules/services/serviceGeneration/renderers/rendererBaseTemplate.py,3,21,27,51
|
||||
modules/shared/configuration.py,2,17,30,49
|
||||
modules/services/serviceExtraction/subMerger.py,2,5,31,38
|
||||
modules/connectors/connectorDbPostgre.py,0,14,20,34
|
||||
modules/interfaces/interfaceDbAppObjects.py,0,8,26,34
|
||||
modules/routes/routeSecurityGoogle.py,0,0,32,32
|
||||
modules/shared/attributeUtils.py,3,4,25,32
|
||||
modules/interfaces/interfaceDbChatObjects.py,0,4,27,31
|
||||
modules/routes/routeSecurityAdmin.py,0,2,28,30
|
||||
modules/services/serviceNeutralization/subProcessList.py,7,0,22,29
|
||||
modules/services/serviceGeneration/renderers/rendererText.py,3,7,19,29
|
||||
modules/routes/routeSecurityMsft.py,0,0,27,27
|
||||
modules/services/serviceGeneration/renderers/rendererMarkdown.py,3,7,17,27
|
||||
modules/services/serviceGeneration/renderers/rendererXlsx.py,3,0,24,27
|
||||
modules/services/serviceGeneration/renderers/rendererImage.py,3,2,21,26
|
||||
modules/security/tokenManager.py,4,7,14,25
|
||||
modules/workflows/workflowManager.py,0,0,25,25
|
||||
modules/services/serviceGeneration/renderers/rendererCsv.py,3,5,17,25
|
||||
modules/shared/auditLogger.py,5,16,3,24
|
||||
modules/shared/debugLogger.py,0,0,24,24
|
||||
modules/workflows/processing/shared/placeholderFactory.py,0,0,24,24
|
||||
modules/interfaces/interfaceDbAppAccess.py,0,2,21,23
|
||||
modules/connectors/connectorTicketsJira.py,0,0,22,22
|
||||
modules/services/serviceGeneration/renderers/registry.py,7,3,12,22
|
||||
modules/routes/routeDataConnections.py,1,1,19,21
|
||||
modules/security/tokenRefreshService.py,0,2,19,21
|
||||
modules/services/serviceExtraction/extractors/extractorPptx.py,0,1,16,17
|
||||
modules/routes/routeSecurityLocal.py,0,0,16,16
|
||||
modules/workflows/methods/methodBase.py,0,4,12,16
|
||||
modules/services/serviceGeneration/mainServiceGeneration.py,0,4,11,15
|
||||
modules/services/serviceUtils/mainServiceUtils.py,0,14,1,15
|
||||
modules/features/neutralizePlayground/mainNeutralizePlayground.py,8,5,2,15
|
||||
modules/interfaces/interfaceTicketObjects.py,0,5,9,14
|
||||
modules/services/serviceNeutralization/subParseString.py,7,0,6,13
|
||||
modules/workflows/processing/modes/modeReact.py,0,1,11,12
|
||||
modules/interfaces/interfaceDbComponentAccess.py,0,2,9,11
|
||||
modules/services/serviceAi/subCoreAi.py,0,0,11,11
|
||||
modules/services/serviceExtraction/subRegistry.py,0,0,11,11
|
||||
modules/services/serviceNeutralization/mainServiceNeutralization.py,0,2,9,11
|
||||
modules/interfaces/interfaceAiObjects.py,0,0,10,10
|
||||
modules/services/serviceAi/subSharedAiUtils.py,0,3,7,10
|
||||
modules/connectors/connectorDbJson.py,0,3,6,9
|
||||
modules/workflows/methods/methodAi.py,0,0,9,9
|
||||
modules/services/serviceExtraction/subPromptBuilderExtraction.py,0,0,9,9
|
||||
modules/services/serviceGeneration/subDocumentUtility.py,0,3,6,9
|
||||
modules/services/serviceNeutralization/subProcessCommon.py,7,2,0,9
|
||||
modules/services/serviceNeutralization/subProcessText.py,5,0,4,9
|
||||
modules/interfaces/interfaceDbChatAccess.py,0,2,6,8
|
||||
modules/security/auth.py,0,1,7,8
|
||||
modules/aicore/aicorePluginAnthropic.py,0,0,7,7
|
||||
modules/security/tokenRefreshMiddleware.py,0,2,4,6
|
||||
modules/services/serviceGeneration/renderers/rendererJson.py,3,0,3,6
|
||||
analyze_naming_violations.py,5,0,0,5
|
||||
modules/aicore/aicorePluginOpenai.py,0,0,5,5
|
||||
modules/routes/routeVoiceGoogle.py,0,0,5,5
|
||||
modules/shared/eventManagement.py,2,3,0,5
|
||||
modules/workflows/processing/adaptive/intentAnalyzer.py,0,0,5,5
|
||||
modules/workflows/processing/shared/executionState.py,0,5,0,5
|
||||
modules/services/serviceGeneration/subJsonSchema.py,0,0,5,5
|
||||
modules/services/serviceNeutralization/subPatterns.py,5,0,0,5
|
||||
modules/services/serviceNeutralization/subProcessBinary.py,4,0,1,5
|
||||
modules/services/serviceExtraction/extractors/extractorXlsx.py,0,0,5,5
|
||||
modules/interfaces/interfaceDbComponentObjects.py,0,3,1,4
|
||||
modules/routes/routeDataNeutralization.py,0,0,4,4
|
||||
modules/routes/routeWorkflows.py,0,0,4,4
|
||||
modules/shared/timezoneUtils.py,3,1,0,4
|
||||
modules/workflows/processing/adaptive/contentValidator.py,0,0,4,4
|
||||
modules/workflows/processing/core/messageCreator.py,0,0,4,4
|
||||
modules/services/serviceSharepoint/mainServiceSharepoint.py,0,0,4,4
|
||||
modules/routes/routeDataUsers.py,0,0,3,3
|
||||
modules/services/serviceExtraction/subPipeline.py,0,0,3,3
|
||||
app.py,0,0,2,2
|
||||
modules/datamodels/datamodelChat.py,0,1,1,2
|
||||
modules/routes/routeAttributes.py,0,0,2,2
|
||||
modules/routes/routeDataPrompts.py,0,0,2,2
|
||||
modules/security/csrf.py,0,1,1,2
|
||||
modules/security/jwtService.py,0,0,2,2
|
||||
modules/workflows/processing/adaptive/learningEngine.py,0,0,2,2
|
||||
modules/workflows/processing/modes/modeActionplan.py,0,0,2,2
|
||||
modules/workflows/processing/shared/methodDiscovery.py,0,0,2,2
|
||||
modules/services/serviceNormalization/mainServiceNormalization.py,0,0,2,2
|
||||
modules/services/serviceExtraction/extractors/extractorImage.py,0,0,2,2
|
||||
modules/aicore/aicoreBase.py,0,0,1,1
|
||||
modules/aicore/aicoreModelSelector.py,0,0,1,1
|
||||
modules/connectors/connectorTicketsClickup.py,0,0,1,1
|
||||
modules/datamodels/datamodelDocument.py,0,1,0,1
|
||||
modules/datamodels/datamodelSecurity.py,0,0,1,1
|
||||
modules/routes/routeAdmin.py,0,0,1,1
|
||||
modules/routes/routeDataFiles.py,0,0,1,1
|
||||
modules/workflows/processing/workflowProcessor.py,0,0,1,1
|
||||
modules/workflows/processing/adaptive/adaptiveLearningEngine.py,0,0,1,1
|
||||
modules/workflows/processing/core/actionExecutor.py,0,0,1,1
|
||||
modules/workflows/processing/core/taskPlanner.py,0,0,1,1
|
||||
modules/workflows/processing/modes/modeBase.py,0,0,1,1
|
||||
modules/services/serviceAi/subDocumentGeneration.py,0,0,1,1
|
||||
|
184
processDocumentsWithContinuation_usage_analysis.md
Normal file
184
processDocumentsWithContinuation_usage_analysis.md
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
# Analysis: `processDocumentsWithContinuation` and Subfunctions Usage
|
||||
|
||||
## Executive Summary
|
||||
|
||||
**FINDING**: The function `processDocumentsWithContinuation` in `subDocumentProcessing.py` is **NOT USED** anywhere in the active codebase. The continuation chain was only referenced by the deleted `subDocumentGeneration.py` module.
|
||||
|
||||
---
|
||||
|
||||
## Main Function: `processDocumentsWithContinuation`
|
||||
|
||||
**Location**: `gateway/modules/services/serviceAi/subDocumentProcessing.py:303`
|
||||
|
||||
**Status**: ❌ **NOT USED**
|
||||
|
||||
### Usage Search Results
|
||||
|
||||
- ❌ No actual code calls to `.processDocumentsWithContinuation(`
|
||||
- ⚠️ Only mentioned in documentation files:
|
||||
- `wiki/poweron/appdoc/doc_system_function_relationship_ai.md` (documentation)
|
||||
- `gateway/callAiWithDocumentGeneration_usage_analysis.md` (previous analysis - noted it was called by deleted code)
|
||||
|
||||
### Why It's Not Used
|
||||
|
||||
The only caller was `subDocumentGeneration._processDocumentsUnified()` which we just deleted. The current active codebase uses `subCoreAi.callAiDocuments()` which has its own continuation logic via `_callAiWithLooping()`.
|
||||
|
||||
---
|
||||
|
||||
## Function Call Chain Analysis
|
||||
|
||||
```
|
||||
processDocumentsWithContinuation (line 303) - ❌ NOT USED
|
||||
├─> _buildContinuationPrompt (line 319, 324) - ❌ ONLY USED HERE
|
||||
└─> _processWithContinuationLoop (line 322, 373) - ❌ ONLY USED HERE
|
||||
├─> _buildContinuationIterationPrompt (line 393, 459) - ❌ ONLY USED HERE
|
||||
└─> processDocumentsPerChunkJsonWithPrompt (line 402) - ✅ USED ELSEWHERE
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Subfunction Analysis
|
||||
|
||||
### 1. `_buildContinuationPrompt`
|
||||
**Location**: Line 324-371
|
||||
**Status**: ✅ **USED** (but only internally)
|
||||
**Called by**: `processDocumentsWithContinuation` (line 319)
|
||||
**Effectively**: ❌ **UNUSED** (because parent function is unused)
|
||||
|
||||
**Internal Usage**:
|
||||
- Called from `processDocumentsWithContinuation` at line 319
|
||||
|
||||
**Functionality**:
|
||||
- Builds a prompt with continuation instructions
|
||||
- Adds JSON structure requirements with `"continue": true/false` flag
|
||||
- Adds `continuation_context` field specification
|
||||
|
||||
**Note**: This uses a different continuation pattern than `SubCoreAi._callAiWithLooping()`:
|
||||
- This uses `"continue": true/false + "continuation_context"` for document sections
|
||||
- SubCoreAi uses `buildContinuationContext()` with `last_raw_json`
|
||||
|
||||
---
|
||||
|
||||
### 2. `_processWithContinuationLoop`
|
||||
**Location**: Line 373-457
|
||||
**Status**: ✅ **USED** (but only internally)
|
||||
**Called by**: `processDocumentsWithContinuation` (line 322)
|
||||
**Effectively**: ❌ **UNUSED** (because parent function is unused)
|
||||
|
||||
**Internal Usage**:
|
||||
- Called from `processDocumentsWithContinuation` at line 322
|
||||
|
||||
**External Dependencies**:
|
||||
- Calls `self._buildContinuationIterationPrompt()` (line 393)
|
||||
- Calls `self.processDocumentsPerChunkJsonWithPrompt()` (line 402)
|
||||
|
||||
**Functionality**:
|
||||
- Implements continuation loop (max 10 iterations)
|
||||
- Accumulates sections across iterations
|
||||
- Checks `continue` flag and `continuation_context` to determine if more iterations needed
|
||||
- Builds final result with accumulated sections
|
||||
|
||||
---
|
||||
|
||||
### 3. `_buildContinuationIterationPrompt`
|
||||
**Location**: Line 459-498
|
||||
**Status**: ✅ **USED** (but only internally)
|
||||
**Called by**: `_processWithContinuationLoop` (line 393)
|
||||
**Effectively**: ❌ **UNUSED** (because parent chain is unused)
|
||||
|
||||
**Internal Usage**:
|
||||
- Called from `_processWithContinuationLoop` at line 393 (in loop, conditionally)
|
||||
|
||||
**Functionality**:
|
||||
- Builds a prompt for continuation iteration with context
|
||||
- Includes summary of previously generated content (last 3 sections)
|
||||
- Includes continuation instructions with last section ID, element index, remaining requirements
|
||||
|
||||
---
|
||||
|
||||
### 4. `processDocumentsPerChunkJsonWithPrompt`
|
||||
**Location**: Line 219-301
|
||||
**Status**: ✅ **USED ELSEWHERE**
|
||||
**Called by**:
|
||||
- `_processWithContinuationLoop` (line 402)
|
||||
- Also referenced in backup files (not active code)
|
||||
|
||||
**Internal Usage**:
|
||||
- Called from `_processWithContinuationLoop` at line 402
|
||||
|
||||
**External Usage Search**:
|
||||
- ✅ Used internally by continuation loop
|
||||
- ⚠️ Referenced in `local/backup/backup_mainServiceAi.py.txt` (backup file, not active)
|
||||
- ❌ Not used by any other active code
|
||||
|
||||
**Functionality**:
|
||||
- Processes documents with per-chunk AI calls
|
||||
- Uses a custom prompt instead of default extraction prompt
|
||||
- Returns merged JSON document
|
||||
|
||||
**Note**: This function itself is only used by the continuation loop. However, it's a more general function that could be useful, so it's not "dead code" - it's just currently only used by unused code.
|
||||
|
||||
---
|
||||
|
||||
## Summary Table
|
||||
|
||||
| Function | Line | Status | Called By | Effectively Used? |
|
||||
|----------|------|--------|-----------|-------------------|
|
||||
| `processDocumentsWithContinuation` | 303 | ❌ Not used | (external) | ❌ No |
|
||||
| `_buildContinuationPrompt` | 324 | ✅ Used internally | `processDocumentsWithContinuation:319` | ❌ No |
|
||||
| `_processWithContinuationLoop` | 373 | ✅ Used internally | `processDocumentsWithContinuation:322` | ❌ No |
|
||||
| `_buildContinuationIterationPrompt` | 459 | ✅ Used internally | `_processWithContinuationLoop:393` | ❌ No |
|
||||
| `processDocumentsPerChunkJsonWithPrompt` | 219 | ✅ Used internally | `_processWithContinuationLoop:402` | ⚠️ **ONLY USED BY UNUSED CODE** |
|
||||
|
||||
---
|
||||
|
||||
## Current Active Implementation
|
||||
|
||||
The active continuation logic is in `subCoreAi.callAiDocuments()` → `_callAiWithLooping()`:
|
||||
- Uses `buildGenerationPrompt()` with `continuationContext` parameter
|
||||
- Uses `buildContinuationContext()` to build context from sections
|
||||
- Different continuation pattern (uses `last_raw_json` instead of `continuation_context`)
|
||||
|
||||
---
|
||||
|
||||
## Dead Code Identification
|
||||
|
||||
**Completely Unused Chain** (can be safely removed):
|
||||
1. ✅ `processDocumentsWithContinuation` - entry point, not called
|
||||
2. ✅ `_buildContinuationPrompt` - only used by #1
|
||||
3. ✅ `_processWithContinuationLoop` - only used by #1
|
||||
4. ✅ `_buildContinuationIterationPrompt` - only used by #3
|
||||
|
||||
**Potentially Unused** (only used by dead code):
|
||||
- ⚠️ `processDocumentsPerChunkJsonWithPrompt` - only caller is dead code, but function is general-purpose
|
||||
|
||||
---
|
||||
|
||||
## Recommendations
|
||||
|
||||
1. **Remove Dead Code Chain**: All four functions (`processDocumentsWithContinuation`, `_buildContinuationPrompt`, `_processWithContinuationLoop`, `_buildContinuationIterationPrompt`) can be safely removed.
|
||||
|
||||
2. **For `processDocumentsPerChunkJsonWithPrompt`**:
|
||||
- **Option A**: Remove if not needed (it's only used by the dead continuation chain)
|
||||
- **Option B**: Keep if it might be useful for future custom prompt processing
|
||||
- **Recommendation**: Since it's a general-purpose function that could be useful, keep it but note that it's currently unused.
|
||||
|
||||
3. **If Keeping**: Document why this continuation logic exists but is unused, or mark as deprecated/legacy alternative to `_callAiWithLooping()`.
|
||||
|
||||
---
|
||||
|
||||
## Verification Commands
|
||||
|
||||
To verify these findings:
|
||||
|
||||
```bash
|
||||
# Search for actual function calls (should return no results for the main function)
|
||||
grep -r "\.processDocumentsWithContinuation(" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup
|
||||
|
||||
# Search for _buildContinuationPrompt usage (should only find the definition)
|
||||
grep -r "_buildContinuationPrompt" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
|
||||
|
||||
# Search for _processWithContinuationLoop usage (should only find the definition)
|
||||
grep -r "_processWithContinuationLoop" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
|
||||
```
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ else:
|
|||
|
||||
# Import encryption functions
|
||||
try:
|
||||
from modules.shared.configuration import encrypt_value
|
||||
from modules.shared.configuration import encryptValue
|
||||
except ImportError as e:
|
||||
print(f"Error: Could not import encryption functions from shared.configuration: {e}")
|
||||
print(f"Make sure you're running this script from the gateway directory")
|
||||
|
|
@ -254,7 +254,7 @@ def encrypt_all_secrets_in_file(file_path: Path, dry_run: bool = False, create_b
|
|||
print(f" 🔐 Encrypting {key}...")
|
||||
|
||||
# Encrypt the value using the environment type from the file
|
||||
encrypted_value = encrypt_value(value, file_env_type)
|
||||
encrypted_value = encryptValue(value, file_env_type)
|
||||
|
||||
# Replace the line in the file content
|
||||
new_line = f"{key} = {encrypted_value}\n"
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ from datetime import datetime
|
|||
# Add the modules directory to the Python path
|
||||
sys.path.insert(0, str(Path(__file__).parent / 'modules'))
|
||||
|
||||
from shared.configuration import encrypt_value, decrypt_value, _is_encrypted_value
|
||||
from modules.shared.configuration import encryptValue, decryptValue, isEncryptedValue
|
||||
|
||||
def get_env_type_from_file(file_path: Path) -> str:
|
||||
"""
|
||||
|
|
@ -247,7 +247,7 @@ def encrypt_all_secrets_in_file(file_path: Path, env_type: str, dry_run: bool =
|
|||
print(f" 🔐 Encrypting {key}...")
|
||||
|
||||
# Encrypt the value using the environment type from the file
|
||||
encrypted_value = encrypt_value(value, file_env_type)
|
||||
encrypted_value = encryptValue(value, file_env_type)
|
||||
|
||||
# Replace the line in the file content
|
||||
new_line = f"{key} = {encrypted_value}\n"
|
||||
|
|
@ -360,8 +360,8 @@ def main():
|
|||
|
||||
# Handle decryption
|
||||
if args.decrypt:
|
||||
if _is_encrypted_value(args.decrypt):
|
||||
decrypted = decrypt_value(args.decrypt)
|
||||
if isEncryptedValue(args.decrypt):
|
||||
decrypted = decryptValue(args.decrypt)
|
||||
print(f"Decrypted value: {decrypted}")
|
||||
else:
|
||||
print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)")
|
||||
|
|
@ -411,7 +411,7 @@ def main():
|
|||
return
|
||||
|
||||
# Encrypt the value
|
||||
encrypted_value = encrypt_value(value_to_encrypt, args.env)
|
||||
encrypted_value = encryptValue(value_to_encrypt, args.env)
|
||||
|
||||
print(f"\n✓ Encryption successful!")
|
||||
print(f"Environment: {args.env or 'current'}")
|
||||
|
|
|
|||
Loading…
Reference in a new issue