refactored whole codebase for camelCase part 1 of 2
This commit is contained in:
parent
26b2109844
commit
c44fc92568
86 changed files with 3969 additions and 5005 deletions
242
analyze_naming_violations.py
Normal file
242
analyze_naming_violations.py
Normal file
|
|
@ -0,0 +1,242 @@
|
||||||
|
"""
|
||||||
|
Script to analyze codebase for snake_case naming violations that should be camelStyle.
|
||||||
|
Excludes routes (decorated endpoint functions) and JSON field names.
|
||||||
|
"""
|
||||||
|
import ast
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Tuple
|
||||||
|
import csv
|
||||||
|
|
||||||
|
# Patterns to exclude (external library interfaces, etc.)
|
||||||
|
EXCLUDE_PATTERNS = [
|
||||||
|
r'@.*\.(get|post|put|delete|patch|options|head)', # FastAPI route decorators
|
||||||
|
r'self\.(db|db_|model|orm)', # Database ORM attributes
|
||||||
|
r'\.(objects|query|filter|get|all)', # ORM methods
|
||||||
|
r'(request|response|response_model|status_code)', # FastAPI params
|
||||||
|
r'(snake_case|kebab-case)', # String literals
|
||||||
|
]
|
||||||
|
|
||||||
|
# External library attribute patterns (should not be changed)
|
||||||
|
EXTERNAL_LIB_ATTRIBUTES = {
|
||||||
|
'pydantic', 'fastapi', 'sqlalchemy', 'psycopg', 'requests',
|
||||||
|
'aiohttp', 'azure', 'google', 'openai', 'anthropic', 'reportlab',
|
||||||
|
'docx', 'pptx', 'openpyxl', 'json', 'logging', 'datetime', 'typing'
|
||||||
|
}
|
||||||
|
|
||||||
|
def isRouteFile(filePath: str) -> bool:
|
||||||
|
"""Check if file is a route file"""
|
||||||
|
return 'routes' in filePath or 'route' in os.path.basename(filePath).lower()
|
||||||
|
|
||||||
|
def shouldExcludeName(name: str, context: str = "") -> bool:
|
||||||
|
"""Check if a name should be excluded from analysis"""
|
||||||
|
# Skip if it's a builtin or external library attribute
|
||||||
|
if name.startswith('__') and name.endswith('__'):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Skip if context suggests external library usage
|
||||||
|
for pattern in EXCLUDE_PATTERNS:
|
||||||
|
if re.search(pattern, context, re.IGNORECASE):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def isSnakeCase(name: str) -> bool:
|
||||||
|
"""Check if a name is snake_case"""
|
||||||
|
if not name or name.startswith('_'):
|
||||||
|
return False
|
||||||
|
# Check if contains underscore and is not all caps
|
||||||
|
return '_' in name and not name.isupper()
|
||||||
|
|
||||||
|
def analyzeFile(filePath: str) -> Dict[str, List[str]]:
|
||||||
|
"""Analyze a Python file for naming violations"""
|
||||||
|
violations = {
|
||||||
|
'functions': [],
|
||||||
|
'parameters': [],
|
||||||
|
'variables': []
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(filePath, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
tree = ast.parse(content, filename=filePath)
|
||||||
|
except (SyntaxError, UnicodeDecodeError):
|
||||||
|
return violations
|
||||||
|
|
||||||
|
# Track current context
|
||||||
|
currentClass = None
|
||||||
|
inRouteDecorator = False
|
||||||
|
|
||||||
|
class NamingAnalyzer(ast.NodeVisitor):
|
||||||
|
def __init__(self):
|
||||||
|
self.violations = violations
|
||||||
|
self.currentClass = None
|
||||||
|
self.inRouteDecorator = False
|
||||||
|
self.functionDefs = []
|
||||||
|
|
||||||
|
def visit_FunctionDef(self, node):
|
||||||
|
# Check if this is a route endpoint (has FastAPI decorator)
|
||||||
|
isRouteEndpoint = False
|
||||||
|
for decorator in node.decorator_list:
|
||||||
|
if isinstance(decorator, ast.Attribute):
|
||||||
|
if decorator.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
|
||||||
|
isRouteEndpoint = True
|
||||||
|
break
|
||||||
|
elif isinstance(decorator, ast.Call):
|
||||||
|
if isinstance(decorator.func, ast.Attribute):
|
||||||
|
if decorator.func.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
|
||||||
|
isRouteEndpoint = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# Skip route endpoint function names
|
||||||
|
# But we still need to check their parameters and variables
|
||||||
|
funcName = node.name
|
||||||
|
if not isRouteEndpoint and isSnakeCase(funcName) and not shouldExcludeName(funcName):
|
||||||
|
self.violations['functions'].append(f"{funcName} (line {node.lineno})")
|
||||||
|
|
||||||
|
# Analyze parameters
|
||||||
|
for arg in node.args.args:
|
||||||
|
if arg.arg != 'self' and arg.arg != 'cls':
|
||||||
|
paramName = arg.arg
|
||||||
|
if isSnakeCase(paramName) and not shouldExcludeName(paramName):
|
||||||
|
self.violations['parameters'].append(f"{paramName} in {funcName} (line {node.lineno})")
|
||||||
|
|
||||||
|
# Analyze function body for local variables
|
||||||
|
for stmt in node.body:
|
||||||
|
self.visit(stmt)
|
||||||
|
|
||||||
|
def visit_ClassDef(self, node):
|
||||||
|
oldClass = self.currentClass
|
||||||
|
self.currentClass = node.name
|
||||||
|
self.generic_visit(node)
|
||||||
|
self.currentClass = oldClass
|
||||||
|
|
||||||
|
def visit_Assign(self, node):
|
||||||
|
for target in node.targets:
|
||||||
|
if isinstance(target, ast.Name):
|
||||||
|
varName = target.id
|
||||||
|
# Skip constants (ALL_CAPS), builtins, and private (_xxx)
|
||||||
|
if varName.isupper() or varName.startswith('_'):
|
||||||
|
continue
|
||||||
|
# Local variables should be camelStyle
|
||||||
|
if isSnakeCase(varName) and not shouldExcludeName(varName):
|
||||||
|
self.violations['variables'].append(f"{varName} (line {node.lineno})")
|
||||||
|
|
||||||
|
def visit_For(self, node):
|
||||||
|
if isinstance(node.target, ast.Name):
|
||||||
|
varName = node.target.id
|
||||||
|
if isSnakeCase(varName) and not shouldExcludeName(varName):
|
||||||
|
self.violations['variables'].append(f"{varName} (line {node.lineno})")
|
||||||
|
self.generic_visit(node)
|
||||||
|
|
||||||
|
def visit_With(self, node):
|
||||||
|
if node.items:
|
||||||
|
for item in node.items:
|
||||||
|
if item.optional_vars:
|
||||||
|
if isinstance(item.optional_vars, ast.Name):
|
||||||
|
varName = item.optional_vars.id
|
||||||
|
if isSnakeCase(varName) and not shouldExcludeName(varName):
|
||||||
|
self.violations['variables'].append(f"{varName} (line {node.lineno})")
|
||||||
|
self.generic_visit(node)
|
||||||
|
|
||||||
|
analyzer = NamingAnalyzer()
|
||||||
|
analyzer.visit(tree)
|
||||||
|
|
||||||
|
return violations
|
||||||
|
|
||||||
|
def analyzeCodebase(rootDir: str = 'gateway') -> Dict[str, Dict[str, int]]:
|
||||||
|
"""Analyze entire codebase"""
|
||||||
|
results = defaultdict(lambda: {
|
||||||
|
'functions': 0,
|
||||||
|
'parameters': 0,
|
||||||
|
'variables': 0,
|
||||||
|
'details': {
|
||||||
|
'functions': [],
|
||||||
|
'parameters': [],
|
||||||
|
'variables': []
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
# Handle both absolute and relative paths
|
||||||
|
rootPath = Path(rootDir)
|
||||||
|
if not rootPath.exists():
|
||||||
|
# Try relative to current directory
|
||||||
|
rootPath = Path('.').resolve() / rootDir
|
||||||
|
if not rootPath.exists():
|
||||||
|
# Try just current directory if we're already in gateway
|
||||||
|
rootPath = Path('.')
|
||||||
|
|
||||||
|
# Find all Python files
|
||||||
|
for pyFile in rootPath.rglob('*.py'):
|
||||||
|
# Skip route files for function name analysis (but analyze their internals)
|
||||||
|
filePath = str(pyFile.relative_to(rootPath))
|
||||||
|
|
||||||
|
# Skip test files and special scripts
|
||||||
|
if 'test' in filePath.lower() or 'tool_' in filePath or '__pycache__' in filePath:
|
||||||
|
continue
|
||||||
|
|
||||||
|
violations = analyzeFile(str(pyFile))
|
||||||
|
|
||||||
|
# Check if there are any violations
|
||||||
|
totalViolations = len(violations['functions']) + len(violations['parameters']) + len(violations['variables'])
|
||||||
|
if totalViolations > 0:
|
||||||
|
moduleName = filePath.replace('\\', '/')
|
||||||
|
results[moduleName]['functions'] = len(violations['functions'])
|
||||||
|
results[moduleName]['parameters'] = len(violations['parameters'])
|
||||||
|
results[moduleName]['variables'] = len(violations['variables'])
|
||||||
|
results[moduleName]['details'] = violations
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def generateCSV(results: Dict[str, Dict[str, int]], outputFile: str = 'naming_violations.csv'):
|
||||||
|
"""Generate CSV report"""
|
||||||
|
with open(outputFile, 'w', newline='', encoding='utf-8') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(['Module', 'Function Names', 'Parameter Names', 'Variable Names', 'Total'])
|
||||||
|
|
||||||
|
# Sort by total violations
|
||||||
|
sortedResults = sorted(
|
||||||
|
results.items(),
|
||||||
|
key=lambda x: x[1]['functions'] + x[1]['parameters'] + x[1]['variables'],
|
||||||
|
reverse=True
|
||||||
|
)
|
||||||
|
|
||||||
|
rowsWritten = 0
|
||||||
|
for module, stats in sortedResults:
|
||||||
|
total = stats['functions'] + stats['parameters'] + stats['variables']
|
||||||
|
if total > 0:
|
||||||
|
writer.writerow([
|
||||||
|
module,
|
||||||
|
stats['functions'],
|
||||||
|
stats['parameters'],
|
||||||
|
stats['variables'],
|
||||||
|
total
|
||||||
|
])
|
||||||
|
rowsWritten += 1
|
||||||
|
|
||||||
|
if rowsWritten == 0:
|
||||||
|
print("WARNING: No rows written to CSV despite finding violations!")
|
||||||
|
|
||||||
|
print(f"CSV report generated: {outputFile}")
|
||||||
|
print(f"Total modules analyzed: {len(results)}")
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
totalFuncs = sum(r['functions'] for r in results.values())
|
||||||
|
totalParams = sum(r['parameters'] for r in results.values())
|
||||||
|
totalVars = sum(r['variables'] for r in results.values())
|
||||||
|
print(f"\nSummary:")
|
||||||
|
print(f" Function names: {totalFuncs}")
|
||||||
|
print(f" Parameter names: {totalParams}")
|
||||||
|
print(f" Variable names: {totalVars}")
|
||||||
|
print(f" Total violations: {totalFuncs + totalParams + totalVars}")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print("Analyzing codebase for naming violations...")
|
||||||
|
results = analyzeCodebase('gateway')
|
||||||
|
|
||||||
|
# Write CSV to gateway directory
|
||||||
|
outputPath = Path('gateway') / 'naming_violations_report.csv'
|
||||||
|
generateCSV(results, str(outputPath))
|
||||||
|
|
||||||
62
app.py
62
app.py
|
|
@ -24,45 +24,45 @@ class DailyRotatingFileHandler(RotatingFileHandler):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs
|
self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs
|
||||||
):
|
):
|
||||||
self.log_dir = log_dir
|
self.logDir = logDir
|
||||||
self.filename_prefix = filename_prefix
|
self.filenamePrefix = filenamePrefix
|
||||||
self.current_date = None
|
self.currentDate = None
|
||||||
self.current_file = None
|
self.currentFile = None
|
||||||
|
|
||||||
# Initialize with today's file
|
# Initialize with today's file
|
||||||
self._update_file_if_needed()
|
self._updateFileIfNeeded()
|
||||||
|
|
||||||
# Call parent constructor with current file
|
# Call parent constructor with current file
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs
|
self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
def _update_file_if_needed(self):
|
def _updateFileIfNeeded(self):
|
||||||
"""Update the log file if the date has changed"""
|
"""Update the log file if the date has changed"""
|
||||||
today = datetime.now().strftime("%Y%m%d")
|
today = datetime.now().strftime("%Y%m%d")
|
||||||
|
|
||||||
if self.current_date != today:
|
if self.currentDate != today:
|
||||||
self.current_date = today
|
self.currentDate = today
|
||||||
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
|
newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
|
||||||
|
|
||||||
if self.current_file != new_file:
|
if self.currentFile != newFile:
|
||||||
self.current_file = new_file
|
self.currentFile = newFile
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def emit(self, record):
|
def emit(self, record):
|
||||||
"""Emit a log record, switching files if date has changed"""
|
"""Emit a log record, switching files if date has changed"""
|
||||||
# Check if we need to switch to a new file
|
# Check if we need to switch to a new file
|
||||||
if self._update_file_if_needed():
|
if self._updateFileIfNeeded():
|
||||||
# Close current file and open new one
|
# Close current file and open new one
|
||||||
if self.stream:
|
if self.stream:
|
||||||
self.stream.close()
|
self.stream.close()
|
||||||
self.stream = None
|
self.stream = None
|
||||||
|
|
||||||
# Update the baseFilename for the parent class
|
# Update the baseFilename for the parent class
|
||||||
self.baseFilename = self.current_file
|
self.baseFilename = self.currentFile
|
||||||
# Reopen the stream
|
# Reopen the stream
|
||||||
if not self.delay:
|
if not self.delay:
|
||||||
self.stream = self._open()
|
self.stream = self._open()
|
||||||
|
|
@ -200,10 +200,10 @@ def initLogging():
|
||||||
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
|
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
|
||||||
|
|
||||||
fileHandler = DailyRotatingFileHandler(
|
fileHandler = DailyRotatingFileHandler(
|
||||||
log_dir=logDir,
|
logDir=logDir,
|
||||||
filename_prefix="log_app",
|
filenamePrefix="log_app",
|
||||||
max_bytes=rotationSize,
|
maxBytes=rotationSize,
|
||||||
backup_count=backupCount,
|
backupCount=backupCount,
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
fileHandler.setFormatter(fileFormatter)
|
fileHandler.setFormatter(fileFormatter)
|
||||||
|
|
@ -252,7 +252,7 @@ def initLogging():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def make_sqlalchemy_db_url() -> str:
|
def makeSqlalchemyDbUrl() -> str:
|
||||||
host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost")
|
host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost")
|
||||||
port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432")
|
port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432")
|
||||||
db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway")
|
db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway")
|
||||||
|
|
@ -299,17 +299,17 @@ app = FastAPI(
|
||||||
|
|
||||||
# Configure OpenAPI security scheme for Swagger UI
|
# Configure OpenAPI security scheme for Swagger UI
|
||||||
# This adds the "Authorize" button to the /docs page
|
# This adds the "Authorize" button to the /docs page
|
||||||
security_scheme = HTTPBearer()
|
securityScheme = HTTPBearer()
|
||||||
app.openapi_schema = None # Reset schema to regenerate with security
|
app.openapi_schema = None # Reset schema to regenerate with security
|
||||||
|
|
||||||
|
|
||||||
def custom_openapi():
|
def customOpenapi():
|
||||||
if app.openapi_schema:
|
if app.openapi_schema:
|
||||||
return app.openapi_schema
|
return app.openapi_schema
|
||||||
|
|
||||||
from fastapi.openapi.utils import get_openapi
|
from fastapi.openapi.utils import get_openapi
|
||||||
|
|
||||||
openapi_schema = get_openapi(
|
openapiSchema = get_openapi(
|
||||||
title=app.title,
|
title=app.title,
|
||||||
version="1.0.0",
|
version="1.0.0",
|
||||||
description=app.description,
|
description=app.description,
|
||||||
|
|
@ -317,7 +317,7 @@ def custom_openapi():
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add security scheme definition
|
# Add security scheme definition
|
||||||
openapi_schema["components"]["securitySchemes"] = {
|
openapiSchema["components"]["securitySchemes"] = {
|
||||||
"BearerAuth": {
|
"BearerAuth": {
|
||||||
"type": "http",
|
"type": "http",
|
||||||
"scheme": "bearer",
|
"scheme": "bearer",
|
||||||
|
|
@ -328,20 +328,20 @@ def custom_openapi():
|
||||||
|
|
||||||
# Apply security globally to all endpoints
|
# Apply security globally to all endpoints
|
||||||
# Individual endpoints can override this if needed
|
# Individual endpoints can override this if needed
|
||||||
openapi_schema["security"] = [{"BearerAuth": []}]
|
openapiSchema["security"] = [{"BearerAuth": []}]
|
||||||
|
|
||||||
app.openapi_schema = openapi_schema
|
app.openapi_schema = openapiSchema
|
||||||
return app.openapi_schema
|
return app.openapi_schema
|
||||||
|
|
||||||
|
|
||||||
app.openapi = custom_openapi
|
app.openapi = customOpenapi
|
||||||
|
|
||||||
|
|
||||||
# Parse CORS origins from environment variable
|
# Parse CORS origins from environment variable
|
||||||
def get_allowed_origins():
|
def getAllowedOrigins():
|
||||||
origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
|
originsStr = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
|
||||||
# Split by comma and strip whitespace
|
# Split by comma and strip whitespace
|
||||||
origins = [origin.strip() for origin in origins_str.split(",")]
|
origins = [origin.strip() for origin in originsStr.split(",")]
|
||||||
logger.info(f"CORS allowed origins: {origins}")
|
logger.info(f"CORS allowed origins: {origins}")
|
||||||
return origins
|
return origins
|
||||||
|
|
||||||
|
|
@ -349,7 +349,7 @@ def get_allowed_origins():
|
||||||
# CORS configuration using environment variables
|
# CORS configuration using environment variables
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
allow_origins=get_allowed_origins(),
|
allow_origins=getAllowedOrigins(),
|
||||||
allow_credentials=True,
|
allow_credentials=True,
|
||||||
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from pydantic import BaseModel
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -232,7 +232,7 @@ class DatabaseConnector:
|
||||||
raise ValueError(f"Record ID mismatch: file name ID ({recordId}) does not match record ID ({record['id']})")
|
raise ValueError(f"Record ID mismatch: file name ID ({recordId}) does not match record ID ({record['id']})")
|
||||||
|
|
||||||
# Add metadata
|
# Add metadata
|
||||||
currentTime = get_utc_timestamp()
|
currentTime = getUtcTimestamp()
|
||||||
if "_createdAt" not in record:
|
if "_createdAt" not in record:
|
||||||
record["_createdAt"] = currentTime
|
record["_createdAt"] = currentTime
|
||||||
record["_createdBy"] = self.userId
|
record["_createdBy"] = self.userId
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import uuid
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -287,7 +287,7 @@ class DatabaseConnector:
|
||||||
INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt")
|
INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt")
|
||||||
VALUES (%s, %s, %s)
|
VALUES (%s, %s, %s)
|
||||||
""",
|
""",
|
||||||
(table_name, initial_id, get_utc_timestamp()),
|
(table_name, initial_id, getUtcTimestamp()),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.connection.commit()
|
self.connection.commit()
|
||||||
|
|
@ -611,7 +611,7 @@ class DatabaseConnector:
|
||||||
raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}")
|
raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}")
|
||||||
|
|
||||||
# Add metadata
|
# Add metadata
|
||||||
currentTime = get_utc_timestamp()
|
currentTime = getUtcTimestamp()
|
||||||
if "_createdAt" not in record:
|
if "_createdAt" not in record:
|
||||||
record["_createdAt"] = currentTime
|
record["_createdAt"] = currentTime
|
||||||
record["_createdBy"] = self.userId
|
record["_createdBy"] = self.userId
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ class ConnectorTicketClickup(TicketBase):
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
async def read_attributes(self) -> list[TicketFieldAttribute]:
|
async def readAttributes(self) -> list[TicketFieldAttribute]:
|
||||||
"""Fetch field attributes. Uses list custom fields if listId provided; else basic fields."""
|
"""Fetch field attributes. Uses list custom fields if listId provided; else basic fields."""
|
||||||
attributes: list[TicketFieldAttribute] = []
|
attributes: list[TicketFieldAttribute] = []
|
||||||
try:
|
try:
|
||||||
|
|
@ -65,7 +65,7 @@ class ConnectorTicketClickup(TicketBase):
|
||||||
logger.error(f"ClickUp read_attributes error: {e}")
|
logger.error(f"ClickUp read_attributes error: {e}")
|
||||||
return attributes
|
return attributes
|
||||||
|
|
||||||
async def read_tasks(self, *, limit: int = 0) -> list[dict]:
|
async def readTasks(self, *, limit: int = 0) -> list[dict]:
|
||||||
"""Read tasks from ClickUp, always returning full task records.
|
"""Read tasks from ClickUp, always returning full task records.
|
||||||
If list_id is set, read from that list; otherwise read from team.
|
If list_id is set, read from that list; otherwise read from team.
|
||||||
"""
|
"""
|
||||||
|
|
@ -102,7 +102,7 @@ class ConnectorTicketClickup(TicketBase):
|
||||||
logger.error(f"ClickUp read_tasks error: {e}")
|
logger.error(f"ClickUp read_tasks error: {e}")
|
||||||
return tasks
|
return tasks
|
||||||
|
|
||||||
async def write_tasks(self, tasklist: list[dict]) -> None:
|
async def writeTasks(self, tasklist: list[dict]) -> None:
|
||||||
"""Update tasks in ClickUp. Expects each item to contain {'ID' or 'id' or 'task_id', 'fields': {...}}"""
|
"""Update tasks in ClickUp. Expects each item to contain {'ID' or 'id' or 'task_id', 'fields': {...}}"""
|
||||||
try:
|
try:
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ class ConnectorTicketJira(TicketBase):
|
||||||
self.ticketType = ticketType
|
self.ticketType = ticketType
|
||||||
|
|
||||||
|
|
||||||
async def read_attributes(self) -> list[TicketFieldAttribute]:
|
async def readAttributes(self) -> list[TicketFieldAttribute]:
|
||||||
"""
|
"""
|
||||||
Read field attributes from Jira by querying for a single issue
|
Read field attributes from Jira by querying for a single issue
|
||||||
and extracting the field mappings.
|
and extracting the field mappings.
|
||||||
|
|
@ -130,7 +130,7 @@ class ConnectorTicketJira(TicketBase):
|
||||||
logger.error(f"Error while calling fields API: {str(e)}")
|
logger.error(f"Error while calling fields API: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def read_tasks(self, *, limit: int = 0) -> list[dict]:
|
async def readTasks(self, *, limit: int = 0) -> list[dict]:
|
||||||
"""
|
"""
|
||||||
Read tasks from Jira with pagination support.
|
Read tasks from Jira with pagination support.
|
||||||
|
|
||||||
|
|
@ -253,7 +253,7 @@ class ConnectorTicketJira(TicketBase):
|
||||||
logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}")
|
logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def write_tasks(self, tasklist: list[dict]) -> None:
|
async def writeTasks(self, tasklist: list[dict]) -> None:
|
||||||
"""
|
"""
|
||||||
Write/update tasks to Jira.
|
Write/update tasks to Jira.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,18 +26,18 @@ class ConnectorGoogleSpeech:
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get JSON key from config.ini
|
# Get JSON key from config.ini
|
||||||
api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
|
apiKey = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
|
||||||
|
|
||||||
if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
|
if not apiKey or apiKey == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
|
||||||
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key")
|
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key")
|
||||||
|
|
||||||
# Parse the JSON key and set up authentication
|
# Parse the JSON key and set up authentication
|
||||||
try:
|
try:
|
||||||
credentials_info = json.loads(api_key)
|
credentialsInfo = json.loads(apiKey)
|
||||||
|
|
||||||
# Create credentials object directly (no file needed!)
|
# Create credentials object directly (no file needed!)
|
||||||
from google.oauth2 import service_account
|
from google.oauth2 import service_account
|
||||||
credentials = service_account.Credentials.from_service_account_info(credentials_info)
|
credentials = service_account.Credentials.from_service_account_info(credentialsInfo)
|
||||||
|
|
||||||
logger.info("✅ Using Google Speech credentials from config.ini")
|
logger.info("✅ Using Google Speech credentials from config.ini")
|
||||||
|
|
||||||
|
|
@ -55,8 +55,8 @@ class ConnectorGoogleSpeech:
|
||||||
logger.error(f"❌ Failed to initialize Google Cloud clients: {e}")
|
logger.error(f"❌ Failed to initialize Google Cloud clients: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def speech_to_text(self, audio_content: bytes, language: str = "de-DE",
|
async def speech_to_text(self, audioContent: bytes, language: str = "de-DE",
|
||||||
sample_rate: int = None, channels: int = None) -> Dict:
|
sampleRate: int = None, channels: int = None) -> Dict:
|
||||||
"""
|
"""
|
||||||
Convert speech to text using Google Cloud Speech-to-Text API.
|
Convert speech to text using Google Cloud Speech-to-Text API.
|
||||||
|
|
||||||
|
|
@ -71,8 +71,8 @@ class ConnectorGoogleSpeech:
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Auto-detect audio format if not provided
|
# Auto-detect audio format if not provided
|
||||||
if sample_rate is None or channels is None:
|
if sampleRate is None or channels is None:
|
||||||
validation = self.validate_audio_format(audio_content)
|
validation = self.validate_audio_format(audioContent)
|
||||||
if not validation["valid"]:
|
if not validation["valid"]:
|
||||||
return {
|
return {
|
||||||
"success": False,
|
"success": False,
|
||||||
|
|
@ -80,59 +80,59 @@ class ConnectorGoogleSpeech:
|
||||||
"confidence": 0.0,
|
"confidence": 0.0,
|
||||||
"error": f"Invalid audio format: {validation.get('error', 'Unknown error')}"
|
"error": f"Invalid audio format: {validation.get('error', 'Unknown error')}"
|
||||||
}
|
}
|
||||||
sample_rate = validation["sample_rate"]
|
sampleRate = validation["sample_rate"]
|
||||||
channels = validation["channels"]
|
channels = validation["channels"]
|
||||||
audio_format = validation["format"]
|
audioFormat = validation["format"]
|
||||||
logger.info(f"Auto-detected audio: {audio_format}, {sample_rate}Hz, {channels}ch")
|
logger.info(f"Auto-detected audio: {audioFormat}, {sampleRate}Hz, {channels}ch")
|
||||||
|
|
||||||
logger.info(f"Processing audio with Google Cloud Speech-to-Text")
|
logger.info(f"Processing audio with Google Cloud Speech-to-Text")
|
||||||
logger.info(f"Audio: {len(audio_content)} bytes, {sample_rate}Hz, {channels}ch")
|
logger.info(f"Audio: {len(audioContent)} bytes, {sampleRate}Hz, {channels}ch")
|
||||||
|
|
||||||
# Configure audio settings
|
# Configure audio settings
|
||||||
audio = speech.RecognitionAudio(content=audio_content)
|
audio = speech.RecognitionAudio(content=audioContent)
|
||||||
|
|
||||||
# Determine encoding based on detected format
|
# Determine encoding based on detected format
|
||||||
# Google Cloud Speech API has specific requirements for different formats
|
# Google Cloud Speech API has specific requirements for different formats
|
||||||
if audio_format == "webm_opus":
|
if audioFormat == "webm_opus":
|
||||||
# For WEBM OPUS, we need to ensure proper format
|
# For WEBM OPUS, we need to ensure proper format
|
||||||
encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
|
encoding = speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
|
||||||
# WEBM_OPUS requires specific sample rate handling - must match header
|
# WEBM_OPUS requires specific sample rate handling - must match header
|
||||||
if sample_rate != 48000:
|
if sampleRate != 48000:
|
||||||
logger.warning(f"WEBM_OPUS detected but sample rate is {sample_rate}, adjusting to 48000")
|
logger.warning(f"WEBM_OPUS detected but sample rate is {sampleRate}, adjusting to 48000")
|
||||||
sample_rate = 48000
|
sampleRate = 48000
|
||||||
# For WEBM_OPUS, don't specify sample_rate_hertz in config
|
# For WEBM_OPUS, don't specify sample_rate_hertz in config
|
||||||
# Google Cloud will read it from the WEBM header
|
# Google Cloud will read it from the WEBM header
|
||||||
use_sample_rate = False
|
useSampleRate = False
|
||||||
elif audio_format == "linear16":
|
elif audioFormat == "linear16":
|
||||||
# For LINEAR16 format (PCM)
|
# For LINEAR16 format (PCM)
|
||||||
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
||||||
# Ensure sample rate is reasonable
|
# Ensure sample rate is reasonable
|
||||||
if sample_rate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
|
if sampleRate not in [8000, 16000, 22050, 24000, 32000, 44100, 48000]:
|
||||||
logger.warning(f"Unusual sample rate {sample_rate}, adjusting to 16000")
|
logger.warning(f"Unusual sample rate {sampleRate}, adjusting to 16000")
|
||||||
sample_rate = 16000
|
sampleRate = 16000
|
||||||
use_sample_rate = True
|
useSampleRate = True
|
||||||
elif audio_format == "mp3":
|
elif audioFormat == "mp3":
|
||||||
# For MP3 format
|
# For MP3 format
|
||||||
encoding = speech.RecognitionConfig.AudioEncoding.MP3
|
encoding = speech.RecognitionConfig.AudioEncoding.MP3
|
||||||
use_sample_rate = True
|
useSampleRate = True
|
||||||
elif audio_format == "flac":
|
elif audioFormat == "flac":
|
||||||
# For FLAC format
|
# For FLAC format
|
||||||
encoding = speech.RecognitionConfig.AudioEncoding.FLAC
|
encoding = speech.RecognitionConfig.AudioEncoding.FLAC
|
||||||
use_sample_rate = True
|
useSampleRate = True
|
||||||
elif audio_format == "wav":
|
elif audioFormat == "wav":
|
||||||
# For WAV format
|
# For WAV format
|
||||||
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
||||||
use_sample_rate = True
|
useSampleRate = True
|
||||||
else:
|
else:
|
||||||
# For unknown formats, try LINEAR16 as fallback
|
# For unknown formats, try LINEAR16 as fallback
|
||||||
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
|
||||||
sample_rate = 16000 # Use standard sample rate
|
sampleRate = 16000 # Use standard sample rate
|
||||||
channels = 1 # Use mono
|
channels = 1 # Use mono
|
||||||
use_sample_rate = True
|
useSampleRate = True
|
||||||
logger.warning(f"Unknown audio format '{audio_format}', using LINEAR16 encoding with 16000Hz")
|
logger.warning(f"Unknown audio format '{audioFormat}', using LINEAR16 encoding with 16000Hz")
|
||||||
|
|
||||||
# Build config based on format requirements
|
# Build config based on format requirements
|
||||||
config_params = {
|
configParams = {
|
||||||
"encoding": encoding,
|
"encoding": encoding,
|
||||||
"audio_channel_count": channels,
|
"audio_channel_count": channels,
|
||||||
"language_code": language,
|
"language_code": language,
|
||||||
|
|
@ -145,13 +145,13 @@ class ConnectorGoogleSpeech:
|
||||||
}
|
}
|
||||||
|
|
||||||
# Only add sample_rate_hertz if needed (not for WEBM_OPUS)
|
# Only add sample_rate_hertz if needed (not for WEBM_OPUS)
|
||||||
if use_sample_rate:
|
if useSampleRate:
|
||||||
config_params["sample_rate_hertz"] = sample_rate
|
configParams["sample_rate_hertz"] = sampleRate
|
||||||
logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sample_rate}, channels={channels}, language={language}")
|
logger.debug(f"Recognition config: encoding={encoding}, sample_rate={sampleRate}, channels={channels}, language={language}")
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}")
|
logger.debug(f"Recognition config: encoding={encoding}, sample_rate=auto (from header), channels={channels}, language={language}")
|
||||||
|
|
||||||
config = speech.RecognitionConfig(**config_params)
|
config = speech.RecognitionConfig(**configParams)
|
||||||
|
|
||||||
# Perform speech recognition
|
# Perform speech recognition
|
||||||
logger.info("Sending audio to Google Cloud Speech-to-Text...")
|
logger.info("Sending audio to Google Cloud Speech-to-Text...")
|
||||||
|
|
@ -162,12 +162,12 @@ class ConnectorGoogleSpeech:
|
||||||
response = self.speech_client.recognize(config=config, audio=audio)
|
response = self.speech_client.recognize(config=config, audio=audio)
|
||||||
logger.debug(f"Google Cloud response: {response}")
|
logger.debug(f"Google Cloud response: {response}")
|
||||||
|
|
||||||
except Exception as api_error:
|
except Exception as apiError:
|
||||||
logger.error(f"Google Cloud API error: {api_error}")
|
logger.error(f"Google Cloud API error: {apiError}")
|
||||||
# Try with different encoding as fallback
|
# Try with different encoding as fallback
|
||||||
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
|
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
|
||||||
logger.info("Trying fallback with LINEAR16 encoding...")
|
logger.info("Trying fallback with LINEAR16 encoding...")
|
||||||
fallback_config = speech.RecognitionConfig(
|
fallbackConfig = speech.RecognitionConfig(
|
||||||
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||||
sample_rate_hertz=16000, # Use standard sample rate
|
sample_rate_hertz=16000, # Use standard sample rate
|
||||||
audio_channel_count=1,
|
audio_channel_count=1,
|
||||||
|
|
@ -177,13 +177,13 @@ class ConnectorGoogleSpeech:
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self.speech_client.recognize(config=fallback_config, audio=audio)
|
response = self.speech_client.recognize(config=fallbackConfig, audio=audio)
|
||||||
logger.debug(f"Google Cloud fallback response: {response}")
|
logger.debug(f"Google Cloud fallback response: {response}")
|
||||||
except Exception as fallback_error:
|
except Exception as fallbackError:
|
||||||
logger.error(f"Google Cloud fallback error: {fallback_error}")
|
logger.error(f"Google Cloud fallback error: {fallbackError}")
|
||||||
raise api_error
|
raise apiError
|
||||||
else:
|
else:
|
||||||
raise api_error
|
raise apiError
|
||||||
|
|
||||||
# Process results
|
# Process results
|
||||||
if response.results:
|
if response.results:
|
||||||
|
|
@ -234,18 +234,18 @@ class ConnectorGoogleSpeech:
|
||||||
|
|
||||||
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
|
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
|
||||||
# For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts
|
# For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts
|
||||||
if audio_format != "webm_opus":
|
if audioFormat != "webm_opus":
|
||||||
# Try LINEAR16 with detected sample rate for non-WEBM formats
|
# Try LINEAR16 with detected sample rate for non-WEBM formats
|
||||||
fallback_configs.append({
|
fallback_configs.append({
|
||||||
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||||
"sample_rate": sample_rate,
|
"sample_rate": sampleRate,
|
||||||
"channels": channels,
|
"channels": channels,
|
||||||
"use_sample_rate": True,
|
"use_sample_rate": True,
|
||||||
"description": f"LINEAR16 with {sample_rate}Hz"
|
"description": f"LINEAR16 with {sampleRate}Hz"
|
||||||
})
|
})
|
||||||
|
|
||||||
# For WEBM_OPUS, only try compatible sample rates or skip sample rate specification
|
# For WEBM_OPUS, only try compatible sample rates or skip sample rate specification
|
||||||
if audio_format == "webm_opus":
|
if audioFormat == "webm_opus":
|
||||||
# Try WEBM_OPUS without sample rate specification (let Google read from header)
|
# Try WEBM_OPUS without sample rate specification (let Google read from header)
|
||||||
fallback_configs.append({
|
fallback_configs.append({
|
||||||
"encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
|
"encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
|
||||||
|
|
@ -273,7 +273,7 @@ class ConnectorGoogleSpeech:
|
||||||
else:
|
else:
|
||||||
# For other formats, try standard sample rates
|
# For other formats, try standard sample rates
|
||||||
for std_rate in [16000, 8000, 22050, 44100]:
|
for std_rate in [16000, 8000, 22050, 44100]:
|
||||||
if std_rate != sample_rate:
|
if std_rate != sampleRate:
|
||||||
fallback_configs.append({
|
fallback_configs.append({
|
||||||
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||||
"sample_rate": std_rate,
|
"sample_rate": std_rate,
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,8 @@
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from modules.shared.attributeUtils import register_model_labels
|
from modules.shared.attributeUtils import registerModelLabels
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -26,7 +26,7 @@ class ChatStat(BaseModel):
|
||||||
priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation")
|
priceUsd: Optional[float] = Field(None, description="Calculated price in USD for the operation")
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ChatStat",
|
"ChatStat",
|
||||||
{"en": "Chat Statistics", "fr": "Statistiques de chat"},
|
{"en": "Chat Statistics", "fr": "Statistiques de chat"},
|
||||||
{
|
{
|
||||||
|
|
@ -51,7 +51,7 @@ class ChatLog(BaseModel):
|
||||||
message: str = Field(description="Log message")
|
message: str = Field(description="Log message")
|
||||||
type: str = Field(description="Log type (info, warning, error, etc.)")
|
type: str = Field(description="Log type (info, warning, error, etc.)")
|
||||||
timestamp: float = Field(
|
timestamp: float = Field(
|
||||||
default_factory=get_utc_timestamp,
|
default_factory=getUtcTimestamp,
|
||||||
description="When the log entry was created (UTC timestamp in seconds)",
|
description="When the log entry was created (UTC timestamp in seconds)",
|
||||||
)
|
)
|
||||||
status: Optional[str] = Field(None, description="Status of the log entry")
|
status: Optional[str] = Field(None, description="Status of the log entry")
|
||||||
|
|
@ -63,7 +63,7 @@ class ChatLog(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ChatLog",
|
"ChatLog",
|
||||||
{"en": "Chat Log", "fr": "Journal de chat"},
|
{"en": "Chat Log", "fr": "Journal de chat"},
|
||||||
{
|
{
|
||||||
|
|
@ -96,7 +96,7 @@ class ChatDocument(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ChatDocument",
|
"ChatDocument",
|
||||||
{"en": "Chat Document", "fr": "Document de chat"},
|
{"en": "Chat Document", "fr": "Document de chat"},
|
||||||
{
|
{
|
||||||
|
|
@ -133,7 +133,7 @@ class ContentMetadata(BaseModel):
|
||||||
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
|
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ContentMetadata",
|
"ContentMetadata",
|
||||||
{"en": "Content Metadata", "fr": "Métadonnées du contenu"},
|
{"en": "Content Metadata", "fr": "Métadonnées du contenu"},
|
||||||
{
|
{
|
||||||
|
|
@ -157,7 +157,7 @@ class ContentItem(BaseModel):
|
||||||
metadata: ContentMetadata = Field(description="Content metadata")
|
metadata: ContentMetadata = Field(description="Content metadata")
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ContentItem",
|
"ContentItem",
|
||||||
{"en": "Content Item", "fr": "Élément de contenu"},
|
{"en": "Content Item", "fr": "Élément de contenu"},
|
||||||
{
|
{
|
||||||
|
|
@ -175,7 +175,7 @@ class ChatContentExtracted(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ChatContentExtracted",
|
"ChatContentExtracted",
|
||||||
{"en": "Extracted Content", "fr": "Contenu extrait"},
|
{"en": "Extracted Content", "fr": "Contenu extrait"},
|
||||||
{
|
{
|
||||||
|
|
@ -209,7 +209,7 @@ class ChatMessage(BaseModel):
|
||||||
description="Sequence number of the message (set automatically)"
|
description="Sequence number of the message (set automatically)"
|
||||||
)
|
)
|
||||||
publishedAt: float = Field(
|
publishedAt: float = Field(
|
||||||
default_factory=get_utc_timestamp,
|
default_factory=getUtcTimestamp,
|
||||||
description="When the message was published (UTC timestamp in seconds)",
|
description="When the message was published (UTC timestamp in seconds)",
|
||||||
)
|
)
|
||||||
success: Optional[bool] = Field(
|
success: Optional[bool] = Field(
|
||||||
|
|
@ -235,7 +235,7 @@ class ChatMessage(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ChatMessage",
|
"ChatMessage",
|
||||||
{"en": "Chat Message", "fr": "Message de chat"},
|
{"en": "Chat Message", "fr": "Message de chat"},
|
||||||
{
|
{
|
||||||
|
|
@ -331,14 +331,14 @@ class ChatWorkflow(BaseModel):
|
||||||
frontend_required=False,
|
frontend_required=False,
|
||||||
)
|
)
|
||||||
lastActivity: float = Field(
|
lastActivity: float = Field(
|
||||||
default_factory=get_utc_timestamp,
|
default_factory=getUtcTimestamp,
|
||||||
description="Timestamp of last activity (UTC timestamp in seconds)",
|
description="Timestamp of last activity (UTC timestamp in seconds)",
|
||||||
frontend_type="timestamp",
|
frontend_type="timestamp",
|
||||||
frontend_readonly=True,
|
frontend_readonly=True,
|
||||||
frontend_required=False,
|
frontend_required=False,
|
||||||
)
|
)
|
||||||
startedAt: float = Field(
|
startedAt: float = Field(
|
||||||
default_factory=get_utc_timestamp,
|
default_factory=getUtcTimestamp,
|
||||||
description="When the workflow started (UTC timestamp in seconds)",
|
description="When the workflow started (UTC timestamp in seconds)",
|
||||||
frontend_type="timestamp",
|
frontend_type="timestamp",
|
||||||
frontend_readonly=True,
|
frontend_readonly=True,
|
||||||
|
|
@ -395,7 +395,7 @@ class ChatWorkflow(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ChatWorkflow",
|
"ChatWorkflow",
|
||||||
{"en": "Chat Workflow", "fr": "Flux de travail de chat"},
|
{"en": "Chat Workflow", "fr": "Flux de travail de chat"},
|
||||||
{
|
{
|
||||||
|
|
@ -426,7 +426,7 @@ class UserInputRequest(BaseModel):
|
||||||
userLanguage: str = Field(default="en", description="User's preferred language")
|
userLanguage: str = Field(default="en", description="User's preferred language")
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"UserInputRequest",
|
"UserInputRequest",
|
||||||
{"en": "User Input Request", "fr": "Demande de saisie utilisateur"},
|
{"en": "User Input Request", "fr": "Demande de saisie utilisateur"},
|
||||||
{
|
{
|
||||||
|
|
@ -445,7 +445,7 @@ class ActionDocument(BaseModel):
|
||||||
mimeType: str = Field(description="MIME type of the document")
|
mimeType: str = Field(description="MIME type of the document")
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ActionDocument",
|
"ActionDocument",
|
||||||
{"en": "Action Document", "fr": "Document d'action"},
|
{"en": "Action Document", "fr": "Document d'action"},
|
||||||
{
|
{
|
||||||
|
|
@ -485,7 +485,7 @@ class ActionResult(BaseModel):
|
||||||
return cls(success=False, documents=documents or [], error=error)
|
return cls(success=False, documents=documents or [], error=error)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ActionResult",
|
"ActionResult",
|
||||||
{"en": "Action Result", "fr": "Résultat de l'action"},
|
{"en": "Action Result", "fr": "Résultat de l'action"},
|
||||||
{
|
{
|
||||||
|
|
@ -504,7 +504,7 @@ class ActionSelection(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ActionSelection",
|
"ActionSelection",
|
||||||
{"en": "Action Selection", "fr": "Sélection d'action"},
|
{"en": "Action Selection", "fr": "Sélection d'action"},
|
||||||
{
|
{
|
||||||
|
|
@ -520,7 +520,7 @@ class ActionParameters(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ActionParameters",
|
"ActionParameters",
|
||||||
{"en": "Action Parameters", "fr": "Paramètres d'action"},
|
{"en": "Action Parameters", "fr": "Paramètres d'action"},
|
||||||
{
|
{
|
||||||
|
|
@ -535,7 +535,7 @@ class ObservationPreview(BaseModel):
|
||||||
snippet: str = Field(description="Short snippet or summary")
|
snippet: str = Field(description="Short snippet or summary")
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ObservationPreview",
|
"ObservationPreview",
|
||||||
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
|
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
|
||||||
{
|
{
|
||||||
|
|
@ -558,7 +558,7 @@ class Observation(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"Observation",
|
"Observation",
|
||||||
{"en": "Observation", "fr": "Observation"},
|
{"en": "Observation", "fr": "Observation"},
|
||||||
{
|
{
|
||||||
|
|
@ -579,7 +579,7 @@ class TaskStatus(str, Enum):
|
||||||
CANCELLED = "cancelled"
|
CANCELLED = "cancelled"
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"TaskStatus",
|
"TaskStatus",
|
||||||
{"en": "Task Status", "fr": "Statut de la tâche"},
|
{"en": "Task Status", "fr": "Statut de la tâche"},
|
||||||
{
|
{
|
||||||
|
|
@ -599,7 +599,7 @@ class DocumentExchange(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"DocumentExchange",
|
"DocumentExchange",
|
||||||
{"en": "Document Exchange", "fr": "Échange de documents"},
|
{"en": "Document Exchange", "fr": "Échange de documents"},
|
||||||
{
|
{
|
||||||
|
|
@ -650,7 +650,7 @@ class ActionItem(BaseModel):
|
||||||
self.error = error_message
|
self.error = error_message
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ActionItem",
|
"ActionItem",
|
||||||
{"en": "Task Action", "fr": "Action de tâche"},
|
{"en": "Task Action", "fr": "Action de tâche"},
|
||||||
{
|
{
|
||||||
|
|
@ -683,7 +683,7 @@ class TaskResult(BaseModel):
|
||||||
error: Optional[str] = Field(None, description="Error message if task failed")
|
error: Optional[str] = Field(None, description="Error message if task failed")
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"TaskResult",
|
"TaskResult",
|
||||||
{"en": "Task Result", "fr": "Résultat de tâche"},
|
{"en": "Task Result", "fr": "Résultat de tâche"},
|
||||||
{
|
{
|
||||||
|
|
@ -728,7 +728,7 @@ class TaskItem(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"TaskItem",
|
"TaskItem",
|
||||||
{"en": "Task", "fr": "Tâche"},
|
{"en": "Task", "fr": "Tâche"},
|
||||||
{
|
{
|
||||||
|
|
@ -758,7 +758,7 @@ class TaskStep(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"TaskStep",
|
"TaskStep",
|
||||||
{"en": "Task Step", "fr": "Étape de tâche"},
|
{"en": "Task Step", "fr": "Étape de tâche"},
|
||||||
{
|
{
|
||||||
|
|
@ -805,7 +805,7 @@ class TaskHandover(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"TaskHandover",
|
"TaskHandover",
|
||||||
{"en": "Task Handover", "fr": "Transfert de tâche"},
|
{"en": "Task Handover", "fr": "Transfert de tâche"},
|
||||||
{
|
{
|
||||||
|
|
@ -879,7 +879,7 @@ class ReviewResult(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"ReviewResult",
|
"ReviewResult",
|
||||||
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
|
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
|
||||||
{
|
{
|
||||||
|
|
@ -904,7 +904,7 @@ class TaskPlan(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"TaskPlan",
|
"TaskPlan",
|
||||||
{"en": "Task Plan", "fr": "Plan de tâches"},
|
{"en": "Task Plan", "fr": "Plan de tâches"},
|
||||||
{
|
{
|
||||||
|
|
@ -927,7 +927,7 @@ class PromptPlaceholder(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"PromptPlaceholder",
|
"PromptPlaceholder",
|
||||||
{"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"},
|
{"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"},
|
||||||
{
|
{
|
||||||
|
|
@ -943,7 +943,7 @@ class PromptBundle(BaseModel):
|
||||||
placeholders: List[PromptPlaceholder] = Field(default_factory=list)
|
placeholders: List[PromptPlaceholder] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"PromptBundle",
|
"PromptBundle",
|
||||||
{"en": "Prompt Bundle", "fr": "Lot d'invite"},
|
{"en": "Prompt Bundle", "fr": "Lot d'invite"},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -81,11 +81,11 @@ class StructuredDocument(BaseModel):
|
||||||
summary: Optional[str] = Field(default=None, description="Document summary")
|
summary: Optional[str] = Field(default=None, description="Document summary")
|
||||||
tags: List[str] = Field(default_factory=list, description="Document tags")
|
tags: List[str] = Field(default_factory=list, description="Document tags")
|
||||||
|
|
||||||
def get_sections_by_type(self, content_type: str) -> List[DocumentSection]:
|
def getSectionsByType(self, content_type: str) -> List[DocumentSection]:
|
||||||
"""Get all sections of a specific content type."""
|
"""Get all sections of a specific content type."""
|
||||||
return [section for section in self.sections if section.content_type == content_type]
|
return [section for section in self.sections if section.content_type == content_type]
|
||||||
|
|
||||||
def get_all_tables(self) -> List[TableData]:
|
def getAllTables(self) -> List[TableData]:
|
||||||
"""Get all table data from the document."""
|
"""Get all table data from the document."""
|
||||||
tables = []
|
tables = []
|
||||||
for section in self.sections:
|
for section in self.sections:
|
||||||
|
|
@ -94,7 +94,7 @@ class StructuredDocument(BaseModel):
|
||||||
tables.append(element)
|
tables.append(element)
|
||||||
return tables
|
return tables
|
||||||
|
|
||||||
def get_all_lists(self) -> List[BulletList]:
|
def getAllLists(self) -> List[BulletList]:
|
||||||
"""Get all lists from the document."""
|
"""Get all lists from the document."""
|
||||||
lists = []
|
lists = []
|
||||||
for section in self.sections:
|
for section in self.sections:
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,8 @@
|
||||||
|
|
||||||
from typing import Dict, Any, Optional, Union
|
from typing import Dict, Any, Optional, Union
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from modules.shared.attributeUtils import register_model_labels
|
from modules.shared.attributeUtils import registerModelLabels
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
import uuid
|
import uuid
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
|
|
@ -15,9 +15,9 @@ class FileItem(BaseModel):
|
||||||
mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
mimeType: str = Field(description="MIME type of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
fileHash: str = Field(description="Hash of the file", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False)
|
fileSize: int = Field(description="Size of the file in bytes", frontend_type="integer", frontend_readonly=True, frontend_required=False)
|
||||||
creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the file was created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"FileItem",
|
"FileItem",
|
||||||
{"en": "File Item", "fr": "Élément de fichier"},
|
{"en": "File Item", "fr": "Élément de fichier"},
|
||||||
{
|
{
|
||||||
|
|
@ -45,7 +45,7 @@ class FilePreview(BaseModel):
|
||||||
if isinstance(data.get("content"), bytes):
|
if isinstance(data.get("content"), bytes):
|
||||||
data["content"] = base64.b64encode(data["content"]).decode("utf-8")
|
data["content"] = base64.b64encode(data["content"]).decode("utf-8")
|
||||||
return data
|
return data
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"FilePreview",
|
"FilePreview",
|
||||||
{"en": "File Preview", "fr": "Aperçu du fichier"},
|
{"en": "File Preview", "fr": "Aperçu du fichier"},
|
||||||
{
|
{
|
||||||
|
|
@ -62,7 +62,7 @@ class FileData(BaseModel):
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
||||||
data: str = Field(description="File data content")
|
data: str = Field(description="File data content")
|
||||||
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
|
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"FileData",
|
"FileData",
|
||||||
{"en": "File Data", "fr": "Données de fichier"},
|
{"en": "File Data", "fr": "Données de fichier"},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
90
modules/datamodels/datamodelJson.py
Normal file
90
modules/datamodels/datamodelJson.py
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
"""
|
||||||
|
Unified JSON document schema and helpers used by both generation prompts and renderers.
|
||||||
|
|
||||||
|
This defines a single canonical template and the supported section types.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
# Canonical list of supported section types across the system
|
||||||
|
supportedSectionTypes: List[str] = [
|
||||||
|
"table",
|
||||||
|
"bullet_list",
|
||||||
|
"heading",
|
||||||
|
"paragraph",
|
||||||
|
"code_block",
|
||||||
|
"image",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Canonical JSON template used for AI generation (documents array + sections)
|
||||||
|
# Rendering pipelines can select the first document and read its sections.
|
||||||
|
jsonTemplateDocument: str = """{
|
||||||
|
"metadata": {
|
||||||
|
"split_strategy": "single_document",
|
||||||
|
"source_documents": [],
|
||||||
|
"extraction_method": "ai_generation"
|
||||||
|
},
|
||||||
|
"documents": [
|
||||||
|
{
|
||||||
|
"id": "doc_1",
|
||||||
|
"title": "{{DOCUMENT_TITLE}}",
|
||||||
|
"filename": "document.json",
|
||||||
|
"sections": [
|
||||||
|
{
|
||||||
|
"id": "section_heading_example",
|
||||||
|
"content_type": "heading",
|
||||||
|
"elements": [
|
||||||
|
{"level": 1, "text": "Heading Text"}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_paragraph_example",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"elements": [
|
||||||
|
{"text": "Paragraph text content"}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_bullet_list_example",
|
||||||
|
"content_type": "bullet_list",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"items": ["Item 1", "Item 2"]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_table_example",
|
||||||
|
"content_type": "table",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"headers": ["Column 1", "Column 2"],
|
||||||
|
"rows": [
|
||||||
|
["Row 1 Col 1", "Row 1 Col 2"],
|
||||||
|
["Row 2 Col 1", "Row 2 Col 2"]
|
||||||
|
],
|
||||||
|
"caption": "Table caption"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "section_code_example",
|
||||||
|
"content_type": "code_block",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"code": "function example() { return true; }",
|
||||||
|
"language": "javascript"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"order": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}"""
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from modules.shared.attributeUtils import register_model_labels
|
from modules.shared.attributeUtils import registerModelLabels
|
||||||
|
|
||||||
|
|
||||||
class DataNeutraliserConfig(BaseModel):
|
class DataNeutraliserConfig(BaseModel):
|
||||||
|
|
@ -14,7 +14,7 @@ class DataNeutraliserConfig(BaseModel):
|
||||||
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
|
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
|
||||||
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
||||||
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"DataNeutraliserConfig",
|
"DataNeutraliserConfig",
|
||||||
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
|
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
|
||||||
{
|
{
|
||||||
|
|
@ -35,7 +35,7 @@ class DataNeutralizerAttributes(BaseModel):
|
||||||
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||||
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"DataNeutralizerAttributes",
|
"DataNeutralizerAttributes",
|
||||||
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
|
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,8 @@
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from modules.shared.attributeUtils import register_model_labels
|
from modules.shared.attributeUtils import registerModelLabels
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
from .datamodelUam import AuthAuthority
|
from .datamodelUam import AuthAuthority
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import uuid
|
import uuid
|
||||||
|
|
@ -51,7 +51,7 @@ class Token(BaseModel):
|
||||||
use_enum_values = True
|
use_enum_values = True
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"Token",
|
"Token",
|
||||||
{"en": "Token", "fr": "Jeton"},
|
{"en": "Token", "fr": "Jeton"},
|
||||||
{
|
{
|
||||||
|
|
@ -95,7 +95,7 @@ class AuthEvent(BaseModel):
|
||||||
frontend_required=True,
|
frontend_required=True,
|
||||||
)
|
)
|
||||||
timestamp: float = Field(
|
timestamp: float = Field(
|
||||||
default_factory=get_utc_timestamp,
|
default_factory=getUtcTimestamp,
|
||||||
description="Unix timestamp when the event occurred",
|
description="Unix timestamp when the event occurred",
|
||||||
frontend_type="datetime",
|
frontend_type="datetime",
|
||||||
frontend_readonly=True,
|
frontend_readonly=True,
|
||||||
|
|
@ -131,7 +131,7 @@ class AuthEvent(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"AuthEvent",
|
"AuthEvent",
|
||||||
{"en": "Authentication Event", "fr": "Événement d'authentification"},
|
{"en": "Authentication Event", "fr": "Événement d'authentification"},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -11,12 +11,12 @@ class TicketFieldAttribute(BaseModel):
|
||||||
|
|
||||||
class TicketBase(ABC):
|
class TicketBase(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def read_attributes(self) -> list[TicketFieldAttribute]: ...
|
async def readAttributes(self) -> list[TicketFieldAttribute]: ...
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def read_tasks(self, *, limit: int = 0) -> list[dict]: ...
|
async def readTasks(self, *, limit: int = 0) -> list[dict]: ...
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def write_tasks(self, tasklist: list[dict]) -> None: ...
|
async def writeTasks(self, tasklist: list[dict]) -> None: ...
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,8 @@ import uuid
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pydantic import BaseModel, Field, EmailStr
|
from pydantic import BaseModel, Field, EmailStr
|
||||||
from modules.shared.attributeUtils import register_model_labels
|
from modules.shared.attributeUtils import registerModelLabels
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
|
|
||||||
|
|
||||||
class AuthAuthority(str, Enum):
|
class AuthAuthority(str, Enum):
|
||||||
|
|
@ -34,7 +34,7 @@ class Mandate(BaseModel):
|
||||||
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
|
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
|
||||||
])
|
])
|
||||||
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
|
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"Mandate",
|
"Mandate",
|
||||||
{"en": "Mandate", "fr": "Mandat"},
|
{"en": "Mandate", "fr": "Mandat"},
|
||||||
{
|
{
|
||||||
|
|
@ -62,8 +62,8 @@ class UserConnection(BaseModel):
|
||||||
{"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}},
|
{"value": "expired", "label": {"en": "Expired", "fr": "Expiré"}},
|
||||||
{"value": "pending", "label": {"en": "Pending", "fr": "En attente"}},
|
{"value": "pending", "label": {"en": "Pending", "fr": "En attente"}},
|
||||||
])
|
])
|
||||||
connectedAt: float = Field(default_factory=get_utc_timestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
connectedAt: float = Field(default_factory=getUtcTimestamp, description="When the connection was established (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||||
lastChecked: float = Field(default_factory=get_utc_timestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
lastChecked: float = Field(default_factory=getUtcTimestamp, description="When the connection was last verified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||||
expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
expiresAt: Optional[float] = Field(None, description="When the connection expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||||
tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[
|
tokenStatus: Optional[str] = Field(None, description="Current token status: active, expired, none", frontend_type="select", frontend_readonly=True, frontend_required=False, frontend_options=[
|
||||||
{"value": "active", "label": {"en": "Active", "fr": "Actif"}},
|
{"value": "active", "label": {"en": "Active", "fr": "Actif"}},
|
||||||
|
|
@ -71,7 +71,7 @@ class UserConnection(BaseModel):
|
||||||
{"value": "none", "label": {"en": "None", "fr": "Aucun"}},
|
{"value": "none", "label": {"en": "None", "fr": "Aucun"}},
|
||||||
])
|
])
|
||||||
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"UserConnection",
|
"UserConnection",
|
||||||
{"en": "User Connection", "fr": "Connexion utilisateur"},
|
{"en": "User Connection", "fr": "Connexion utilisateur"},
|
||||||
{
|
{
|
||||||
|
|
@ -113,7 +113,7 @@ class User(BaseModel):
|
||||||
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
|
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
|
||||||
])
|
])
|
||||||
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"User",
|
"User",
|
||||||
{"en": "User", "fr": "Utilisateur"},
|
{"en": "User", "fr": "Utilisateur"},
|
||||||
{
|
{
|
||||||
|
|
@ -131,7 +131,7 @@ register_model_labels(
|
||||||
|
|
||||||
class UserInDB(User):
|
class UserInDB(User):
|
||||||
hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
|
hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"UserInDB",
|
"UserInDB",
|
||||||
{"en": "User Access", "fr": "Accès de l'utilisateur"},
|
{"en": "User Access", "fr": "Accès de l'utilisateur"},
|
||||||
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},
|
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
"""Utility datamodels: Prompt."""
|
"""Utility datamodels: Prompt."""
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from modules.shared.attributeUtils import register_model_labels
|
from modules.shared.attributeUtils import registerModelLabels
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -10,7 +10,7 @@ class Prompt(BaseModel):
|
||||||
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
|
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
|
||||||
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"Prompt",
|
"Prompt",
|
||||||
{"en": "Prompt", "fr": "Invite"},
|
{"en": "Prompt", "fr": "Invite"},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
"""Voice settings datamodel."""
|
"""Voice settings datamodel."""
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from modules.shared.attributeUtils import register_model_labels
|
from modules.shared.attributeUtils import registerModelLabels
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -15,11 +15,11 @@ class VoiceSettings(BaseModel):
|
||||||
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True)
|
ttsVoice: str = Field(default="de-DE-KatjaNeural", description="Text-to-Speech voice", frontend_type="select", frontend_readonly=False, frontend_required=True)
|
||||||
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
|
translationEnabled: bool = Field(default=True, description="Whether translation is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
|
||||||
targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False)
|
targetLanguage: str = Field(default="en-US", description="Target language for translation", frontend_type="select", frontend_readonly=False, frontend_required=False)
|
||||||
creationDate: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
creationDate: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were created (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||||
lastModified: float = Field(default_factory=get_utc_timestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
lastModified: float = Field(default_factory=getUtcTimestamp, description="Date when the settings were last modified (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
registerModelLabels(
|
||||||
"VoiceSettings",
|
"VoiceSettings",
|
||||||
{"en": "Voice Settings", "fr": "Paramètres vocaux"},
|
{"en": "Voice Settings", "fr": "Paramètres vocaux"},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -43,10 +43,6 @@ class NeutralizationPlayground:
|
||||||
'errors': errors,
|
'errors': errors,
|
||||||
}
|
}
|
||||||
|
|
||||||
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
|
|
||||||
from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
|
|
||||||
processor = SharepointProcessor(self.currentUser, self.services)
|
|
||||||
return await processor.processSharepointFiles(sourcePath, targetPath)
|
|
||||||
|
|
||||||
# Cleanup attributes
|
# Cleanup attributes
|
||||||
def cleanAttributes(self, fileId: str) -> bool:
|
def cleanAttributes(self, fileId: str) -> bool:
|
||||||
|
|
@ -77,49 +73,51 @@ class NeutralizationPlayground:
|
||||||
}
|
}
|
||||||
|
|
||||||
# Additional methods needed by the route
|
# Additional methods needed by the route
|
||||||
def get_config(self) -> Optional[DataNeutraliserConfig]:
|
def getConfig(self) -> Optional[DataNeutraliserConfig]:
|
||||||
"""Get neutralization configuration"""
|
"""Get neutralization configuration"""
|
||||||
return self.services.neutralization.getConfig()
|
return self.services.neutralization.getConfig()
|
||||||
|
|
||||||
def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
|
def saveConfig(self, configData: Dict[str, Any]) -> DataNeutraliserConfig:
|
||||||
"""Save neutralization configuration"""
|
"""Save neutralization configuration"""
|
||||||
return self.services.neutralization.saveConfig(config_data)
|
return self.services.neutralization.saveConfig(configData)
|
||||||
|
|
||||||
def neutralize_text(self, text: str, file_id: str = None) -> Dict[str, Any]:
|
def neutralizeText(self, text: str, fileId: str = None) -> Dict[str, Any]:
|
||||||
"""Neutralize text content"""
|
"""Neutralize text content"""
|
||||||
return self.services.neutralization.processText(text)
|
return self.services.neutralization.processText(text)
|
||||||
|
|
||||||
def resolve_text(self, text: str) -> str:
|
def resolveText(self, text: str) -> str:
|
||||||
"""Resolve UIDs in neutralized text back to original text"""
|
"""Resolve UIDs in neutralized text back to original text"""
|
||||||
return self.services.neutralization.resolveText(text)
|
return self.services.neutralization.resolveText(text)
|
||||||
|
|
||||||
def get_attributes(self, file_id: str = None) -> List[DataNeutralizerAttributes]:
|
def getAttributes(self, fileId: str = None) -> List[DataNeutralizerAttributes]:
|
||||||
"""Get neutralization attributes, optionally filtered by file ID"""
|
"""Get neutralization attributes, optionally filtered by file ID"""
|
||||||
try:
|
try:
|
||||||
all_attributes = self.services.neutralization.getAttributes()
|
allAttributes = self.services.neutralization.getAttributes()
|
||||||
if file_id:
|
if fileId:
|
||||||
return [attr for attr in all_attributes if attr.fileId == file_id]
|
return [attr for attr in allAttributes if attr.fileId == fileId]
|
||||||
return all_attributes
|
return allAttributes
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting attributes: {str(e)}")
|
logger.error(f"Error getting attributes: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]:
|
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
|
||||||
"""Process files from SharePoint source path and store neutralized files in target path"""
|
"""Process files from SharePoint source path and store neutralized files in target path"""
|
||||||
return await self.processSharepointFiles(source_path, target_path)
|
from modules.services.serviceSharepoint.mainServiceSharepoint import SharepointService
|
||||||
|
processor = SharepointProcessor(self.currentUser, self.services)
|
||||||
|
return await processor.processSharepointFiles(sourcePath, targetPath)
|
||||||
|
|
||||||
def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
def batchNeutralizeFiles(self, filesData: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||||
"""Process multiple files for neutralization"""
|
"""Process multiple files for neutralization"""
|
||||||
file_ids = [file_data.get('fileId') for file_data in files_data if file_data.get('fileId')]
|
fileIds = [fileData.get('fileId') for fileData in filesData if fileData.get('fileId')]
|
||||||
return self.processFiles(file_ids)
|
return self.processFiles(fileIds)
|
||||||
|
|
||||||
def get_processing_stats(self) -> Dict[str, Any]:
|
def getProcessingStats(self) -> Dict[str, Any]:
|
||||||
"""Get neutralization processing statistics"""
|
"""Get neutralization processing statistics"""
|
||||||
return self.getStats()
|
return self.getStats()
|
||||||
|
|
||||||
def cleanup_file_attributes(self, file_id: str) -> bool:
|
def cleanupFileAttributes(self, fileId: str) -> bool:
|
||||||
"""Clean up neutralization attributes for a specific file"""
|
"""Clean up neutralization attributes for a specific file"""
|
||||||
return self.cleanAttributes(file_id)
|
return self.cleanAttributes(fileId)
|
||||||
|
|
||||||
|
|
||||||
# Internal SharePoint helper module separated to keep feature logic tidy
|
# Internal SharePoint helper module separated to keep feature logic tidy
|
||||||
|
|
@ -208,7 +206,7 @@ class SharepointProcessor:
|
||||||
siteUrl, _ = self._parseSharepointPath(sharepointPath)
|
siteUrl, _ = self._parseSharepointPath(sharepointPath)
|
||||||
if not siteUrl:
|
if not siteUrl:
|
||||||
return False
|
return False
|
||||||
siteInfo = await self.services.sharepoint.find_site_by_web_url(siteUrl)
|
siteInfo = await self.services.sharepoint.findSiteByWebUrl(siteUrl)
|
||||||
return siteInfo is not None
|
return siteInfo is not None
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
@ -219,17 +217,17 @@ class SharepointProcessor:
|
||||||
targetSite, targetFolder = self._parseSharepointPath(targetPath)
|
targetSite, targetFolder = self._parseSharepointPath(targetPath)
|
||||||
if not sourceSite or not targetSite:
|
if not sourceSite or not targetSite:
|
||||||
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
|
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
|
||||||
sourceSiteInfo = await self.services.sharepoint.find_site_by_web_url(sourceSite)
|
sourceSiteInfo = await self.services.sharepoint.findSiteByWebUrl(sourceSite)
|
||||||
if not sourceSiteInfo:
|
if not sourceSiteInfo:
|
||||||
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
|
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
|
||||||
targetSiteInfo = await self.services.sharepoint.find_site_by_web_url(targetSite)
|
targetSiteInfo = await self.services.sharepoint.findSiteByWebUrl(targetSite)
|
||||||
if not targetSiteInfo:
|
if not targetSiteInfo:
|
||||||
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
|
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
|
||||||
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
|
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
|
||||||
files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], sourceFolder)
|
files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], sourceFolder)
|
||||||
if not files:
|
if not files:
|
||||||
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
|
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
|
||||||
files = await self.services.sharepoint.list_folder_contents(sourceSiteInfo['id'], '')
|
files = await self.services.sharepoint.listFolderContents(sourceSiteInfo['id'], '')
|
||||||
if files:
|
if files:
|
||||||
folders = [f for f in files if f.get('type') == 'folder']
|
folders = [f for f in files if f.get('type') == 'folder']
|
||||||
folderNames = [f.get('name') for f in folders]
|
folderNames = [f.get('name') for f in folders]
|
||||||
|
|
@ -251,7 +249,7 @@ class SharepointProcessor:
|
||||||
|
|
||||||
async def _processSingle(fileInfo: Dict[str, Any]):
|
async def _processSingle(fileInfo: Dict[str, Any]):
|
||||||
try:
|
try:
|
||||||
fileContent = await self.services.sharepoint.download_file(sourceSiteInfo['id'], fileInfo['id'])
|
fileContent = await self.services.sharepoint.downloadFile(sourceSiteInfo['id'], fileInfo['id'])
|
||||||
if not fileContent:
|
if not fileContent:
|
||||||
return {'error': f"Failed to download file: {fileInfo['name']}"}
|
return {'error': f"Failed to download file: {fileInfo['name']}"}
|
||||||
try:
|
try:
|
||||||
|
|
@ -260,7 +258,7 @@ class SharepointProcessor:
|
||||||
textContent = fileContent.decode('latin-1')
|
textContent = fileContent.decode('latin-1')
|
||||||
result = self.services.neutralization.processText(textContent)
|
result = self.services.neutralization.processText(textContent)
|
||||||
neutralizedFilename = f"neutralized_{fileInfo['name']}"
|
neutralizedFilename = f"neutralized_{fileInfo['name']}"
|
||||||
uploadResult = await self.services.sharepoint.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
|
uploadResult = await self.services.sharepoint.uploadFile(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
|
||||||
if 'error' in uploadResult:
|
if 'error' in uploadResult:
|
||||||
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
|
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -204,9 +204,9 @@ class ManagerSyncDelta:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}"
|
f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}"
|
||||||
)
|
)
|
||||||
resolved = await self.services.sharepoint.find_site_by_url(
|
resolved = await self.services.sharepoint.findSiteByUrl(
|
||||||
hostname=self.SHAREPOINT_HOSTNAME,
|
hostname=self.SHAREPOINT_HOSTNAME,
|
||||||
site_path=self.SHAREPOINT_SITE_PATH
|
sitePath=self.SHAREPOINT_SITE_PATH
|
||||||
)
|
)
|
||||||
|
|
||||||
if not resolved:
|
if not resolved:
|
||||||
|
|
@ -223,9 +223,9 @@ class ManagerSyncDelta:
|
||||||
|
|
||||||
# Test site access by listing root of the drive
|
# Test site access by listing root of the drive
|
||||||
logger.info("Testing site access using resolved site ID...")
|
logger.info("Testing site access using resolved site ID...")
|
||||||
test_result = await self.services.sharepoint.list_folder_contents(
|
test_result = await self.services.sharepoint.listFolderContents(
|
||||||
site_id=self.targetSite["id"],
|
siteId=self.targetSite["id"],
|
||||||
folder_path=""
|
folderPath=""
|
||||||
)
|
)
|
||||||
|
|
||||||
if test_result is not None:
|
if test_result is not None:
|
||||||
|
|
@ -293,8 +293,8 @@ class ManagerSyncDelta:
|
||||||
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
|
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
|
||||||
try:
|
try:
|
||||||
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
|
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
|
||||||
excel_content = await self.services.sharepoint.download_file_by_path(
|
excel_content = await self.services.sharepoint.downloadFileByPath(
|
||||||
site_id=self.targetSite['id'], file_path=file_path
|
siteId=self.targetSite['id'], filePath=file_path
|
||||||
)
|
)
|
||||||
existing_data, existing_headers = self.parseExcelContent(excel_content)
|
existing_data, existing_headers = self.parseExcelContent(excel_content)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -307,16 +307,16 @@ class ManagerSyncDelta:
|
||||||
|
|
||||||
await self.backupSharepointFile(filename=sync_file_name)
|
await self.backupSharepointFile(filename=sync_file_name)
|
||||||
excel_bytes = self.createExcelContent(merged_data, existing_headers)
|
excel_bytes = self.createExcelContent(merged_data, existing_headers)
|
||||||
await self.services.sharepoint.upload_file(
|
await self.services.sharepoint.uploadFile(
|
||||||
site_id=self.targetSite['id'],
|
siteId=self.targetSite['id'],
|
||||||
folder_path=self.SHAREPOINT_MAIN_FOLDER,
|
folderPath=self.SHAREPOINT_MAIN_FOLDER,
|
||||||
file_name=sync_file_name,
|
fileName=sync_file_name,
|
||||||
content=excel_bytes,
|
content=excel_bytes,
|
||||||
)
|
)
|
||||||
# Import back to tickets
|
# Import back to tickets
|
||||||
try:
|
try:
|
||||||
excel_content = await self.services.sharepoint.download_file_by_path(
|
excel_content = await self.services.sharepoint.downloadFileByPath(
|
||||||
site_id=self.targetSite['id'], file_path=file_path
|
siteId=self.targetSite['id'], filePath=file_path
|
||||||
)
|
)
|
||||||
excel_rows, _ = self.parseExcelContent(excel_content)
|
excel_rows, _ = self.parseExcelContent(excel_content)
|
||||||
self._log_audit_event("SYNC_IMPORT", "INFO", f"Importing {len(excel_rows)} Excel rows back to tickets")
|
self._log_audit_event("SYNC_IMPORT", "INFO", f"Importing {len(excel_rows)} Excel rows back to tickets")
|
||||||
|
|
@ -333,8 +333,8 @@ class ManagerSyncDelta:
|
||||||
existing_data: list[dict] = []
|
existing_data: list[dict] = []
|
||||||
try:
|
try:
|
||||||
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
|
file_path = f"{self.SHAREPOINT_MAIN_FOLDER}/{sync_file_name}"
|
||||||
csv_content = await self.services.sharepoint.download_file_by_path(
|
csv_content = await self.services.sharepoint.downloadFileByPath(
|
||||||
site_id=self.targetSite['id'], file_path=file_path
|
siteId=self.targetSite['id'], filePath=file_path
|
||||||
)
|
)
|
||||||
csv_lines = csv_content.decode('utf-8').split('\n')
|
csv_lines = csv_content.decode('utf-8').split('\n')
|
||||||
if len(csv_lines) >= 2:
|
if len(csv_lines) >= 2:
|
||||||
|
|
@ -348,16 +348,16 @@ class ManagerSyncDelta:
|
||||||
await self.backupSharepointFile(filename=sync_file_name)
|
await self.backupSharepointFile(filename=sync_file_name)
|
||||||
merged_data, _ = self.mergeJiraWithExistingDetailed(data_list, existing_data)
|
merged_data, _ = self.mergeJiraWithExistingDetailed(data_list, existing_data)
|
||||||
csv_bytes = self.createCsvContent(merged_data, existing_headers)
|
csv_bytes = self.createCsvContent(merged_data, existing_headers)
|
||||||
await self.services.sharepoint.upload_file(
|
await self.services.sharepoint.uploadFile(
|
||||||
site_id=self.targetSite['id'],
|
siteId=self.targetSite['id'],
|
||||||
folder_path=self.SHAREPOINT_MAIN_FOLDER,
|
folderPath=self.SHAREPOINT_MAIN_FOLDER,
|
||||||
file_name=sync_file_name,
|
fileName=sync_file_name,
|
||||||
content=csv_bytes,
|
content=csv_bytes,
|
||||||
)
|
)
|
||||||
# Import from CSV
|
# Import from CSV
|
||||||
try:
|
try:
|
||||||
csv_content = await self.services.sharepoint.download_file_by_path(
|
csv_content = await self.services.sharepoint.downloadFileByPath(
|
||||||
site_id=self.targetSite['id'], file_path=file_path
|
siteId=self.targetSite['id'], filePath=file_path
|
||||||
)
|
)
|
||||||
df = pd.read_csv(io.BytesIO(csv_content), skiprows=2, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python')
|
df = pd.read_csv(io.BytesIO(csv_content), skiprows=2, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python')
|
||||||
csv_rows = df.to_dict('records')
|
csv_rows = df.to_dict('records')
|
||||||
|
|
@ -388,12 +388,12 @@ class ManagerSyncDelta:
|
||||||
try:
|
try:
|
||||||
timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y%m%d_%H%M%S")
|
timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y%m%d_%H%M%S")
|
||||||
backup_filename = f"backup_{timestamp}_{filename}"
|
backup_filename = f"backup_{timestamp}_{filename}"
|
||||||
await self.services.sharepoint.copy_file_async(
|
await self.services.sharepoint.copyFileAsync(
|
||||||
site_id=self.targetSite['id'],
|
siteId=self.targetSite['id'],
|
||||||
source_folder=self.SHAREPOINT_MAIN_FOLDER,
|
sourceFolder=self.SHAREPOINT_MAIN_FOLDER,
|
||||||
source_file=filename,
|
sourceFile=filename,
|
||||||
dest_folder=self.SHAREPOINT_BACKUP_FOLDER,
|
destFolder=self.SHAREPOINT_BACKUP_FOLDER,
|
||||||
dest_file=backup_filename,
|
destFile=backup_filename,
|
||||||
)
|
)
|
||||||
self._log_audit_event("SYNC_BACKUP", "SUCCESS", f"Backed up file: {filename} -> {backup_filename}")
|
self._log_audit_event("SYNC_BACKUP", "SUCCESS", f"Backed up file: {filename} -> {backup_filename}")
|
||||||
return True
|
return True
|
||||||
|
|
@ -679,7 +679,7 @@ class ManagerSyncDelta:
|
||||||
connectorType=connectorType,
|
connectorType=connectorType,
|
||||||
connectorParams=connectorParams,
|
connectorParams=connectorParams,
|
||||||
)
|
)
|
||||||
attributes = await ticket_interface.connector_ticket.read_attributes()
|
attributes = await ticket_interface.connector_ticket.readAttributes()
|
||||||
if not attributes:
|
if not attributes:
|
||||||
logger.warning("No ticket attributes returned; nothing to write.")
|
logger.warning("No ticket attributes returned; nothing to write.")
|
||||||
return False
|
return False
|
||||||
|
|
@ -713,7 +713,7 @@ class ManagerSyncDelta:
|
||||||
connectorType=connectorType,
|
connectorType=connectorType,
|
||||||
connectorParams=connectorParams,
|
connectorParams=connectorParams,
|
||||||
)
|
)
|
||||||
tickets = await ticket_interface.connector_ticket.read_tasks(limit=sampleLimit)
|
tickets = await ticket_interface.connector_ticket.readTasks(limit=sampleLimit)
|
||||||
if not tickets:
|
if not tickets:
|
||||||
logger.warning("No tickets returned; nothing to write.")
|
logger.warning("No tickets returned; nothing to write.")
|
||||||
return False
|
return False
|
||||||
|
|
|
||||||
|
|
@ -54,8 +54,6 @@ class AiObjects:
|
||||||
# No need to manually create connectors - they're auto-discovered
|
# No need to manually create connectors - they're auto-discovered
|
||||||
return cls()
|
return cls()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
|
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
|
||||||
"""Select the best model using dynamic model selection system."""
|
"""Select the best model using dynamic model selection system."""
|
||||||
# Get available models from the dynamic registry
|
# Get available models from the dynamic registry
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import uuid
|
||||||
|
|
||||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
from modules.interfaces.interfaceDbAppAccess import AppAccess
|
from modules.interfaces.interfaceDbAppAccess import AppAccess
|
||||||
from modules.datamodels.datamodelUam import (
|
from modules.datamodels.datamodelUam import (
|
||||||
User,
|
User,
|
||||||
|
|
@ -604,8 +604,8 @@ class AppObjects:
|
||||||
externalUsername=externalUsername,
|
externalUsername=externalUsername,
|
||||||
externalEmail=externalEmail,
|
externalEmail=externalEmail,
|
||||||
status=status,
|
status=status,
|
||||||
connectedAt=get_utc_timestamp(),
|
connectedAt=getUtcTimestamp(),
|
||||||
lastChecked=get_utc_timestamp(),
|
lastChecked=getUtcTimestamp(),
|
||||||
expiresAt=None, # Optional field, set to None by default
|
expiresAt=None, # Optional field, set to None by default
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -755,7 +755,7 @@ class AppObjects:
|
||||||
if not token.id:
|
if not token.id:
|
||||||
token.id = str(uuid.uuid4())
|
token.id = str(uuid.uuid4())
|
||||||
if not token.createdAt:
|
if not token.createdAt:
|
||||||
token.createdAt = get_utc_timestamp()
|
token.createdAt = getUtcTimestamp()
|
||||||
|
|
||||||
# If replace_existing is True, delete old access tokens for this user and authority first
|
# If replace_existing is True, delete old access tokens for this user and authority first
|
||||||
if replace_existing:
|
if replace_existing:
|
||||||
|
|
@ -822,7 +822,7 @@ class AppObjects:
|
||||||
if not token.id:
|
if not token.id:
|
||||||
token.id = str(uuid.uuid4())
|
token.id = str(uuid.uuid4())
|
||||||
if not token.createdAt:
|
if not token.createdAt:
|
||||||
token.createdAt = get_utc_timestamp()
|
token.createdAt = getUtcTimestamp()
|
||||||
|
|
||||||
# Convert to dict and ensure all fields are properly set
|
# Convert to dict and ensure all fields are properly set
|
||||||
token_dict = token.model_dump()
|
token_dict = token.model_dump()
|
||||||
|
|
@ -932,7 +932,7 @@ class AppObjects:
|
||||||
return True
|
return True
|
||||||
tokenUpdate = {
|
tokenUpdate = {
|
||||||
"status": TokenStatus.REVOKED,
|
"status": TokenStatus.REVOKED,
|
||||||
"revokedAt": get_utc_timestamp(),
|
"revokedAt": getUtcTimestamp(),
|
||||||
"revokedBy": revokedBy,
|
"revokedBy": revokedBy,
|
||||||
"reason": reason or "revoked",
|
"reason": reason or "revoked",
|
||||||
}
|
}
|
||||||
|
|
@ -970,7 +970,7 @@ class AppObjects:
|
||||||
t["id"],
|
t["id"],
|
||||||
{
|
{
|
||||||
"status": TokenStatus.REVOKED,
|
"status": TokenStatus.REVOKED,
|
||||||
"revokedAt": get_utc_timestamp(),
|
"revokedAt": getUtcTimestamp(),
|
||||||
"revokedBy": revokedBy,
|
"revokedBy": revokedBy,
|
||||||
"reason": reason or "session logout",
|
"reason": reason or "session logout",
|
||||||
},
|
},
|
||||||
|
|
@ -1008,7 +1008,7 @@ class AppObjects:
|
||||||
t["id"],
|
t["id"],
|
||||||
{
|
{
|
||||||
"status": TokenStatus.REVOKED,
|
"status": TokenStatus.REVOKED,
|
||||||
"revokedAt": get_utc_timestamp(),
|
"revokedAt": getUtcTimestamp(),
|
||||||
"revokedBy": revokedBy,
|
"revokedBy": revokedBy,
|
||||||
"reason": reason or "admin revoke",
|
"reason": reason or "admin revoke",
|
||||||
},
|
},
|
||||||
|
|
@ -1022,7 +1022,7 @@ class AppObjects:
|
||||||
def cleanupExpiredTokens(self) -> int:
|
def cleanupExpiredTokens(self) -> int:
|
||||||
"""Clean up expired tokens for all connections, returns count of cleaned tokens"""
|
"""Clean up expired tokens for all connections, returns count of cleaned tokens"""
|
||||||
try:
|
try:
|
||||||
current_time = get_utc_timestamp()
|
current_time = getUtcTimestamp()
|
||||||
cleaned_count = 0
|
cleaned_count = 0
|
||||||
|
|
||||||
# Get all tokens
|
# Get all tokens
|
||||||
|
|
@ -1100,7 +1100,7 @@ class AppObjects:
|
||||||
# Update existing config
|
# Update existing config
|
||||||
update_data = existing_config.model_dump()
|
update_data = existing_config.model_dump()
|
||||||
update_data.update(config_data)
|
update_data.update(config_data)
|
||||||
update_data["updatedAt"] = get_utc_timestamp()
|
update_data["updatedAt"] = getUtcTimestamp()
|
||||||
|
|
||||||
updated_config = DataNeutraliserConfig(**update_data)
|
updated_config = DataNeutraliserConfig(**update_data)
|
||||||
self.db.recordModify(
|
self.db.recordModify(
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ from modules.datamodels.datamodelUam import User
|
||||||
|
|
||||||
# DYNAMIC PART: Connectors to the Interface
|
# DYNAMIC PART: Connectors to the Interface
|
||||||
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
from modules.connectors.connectorDbPostgre import DatabaseConnector
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
|
|
||||||
# Basic Configurations
|
# Basic Configurations
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
@ -66,56 +66,56 @@ class ChatObjects:
|
||||||
|
|
||||||
# ===== Generic Utility Methods =====
|
# ===== Generic Utility Methods =====
|
||||||
|
|
||||||
def _is_object_field(self, field_type) -> bool:
|
def _isObjectField(self, fieldType) -> bool:
|
||||||
"""Check if a field type represents a complex object (not a simple type)."""
|
"""Check if a field type represents a complex object (not a simple type)."""
|
||||||
# Simple scalar types
|
# Simple scalar types
|
||||||
if field_type in (str, int, float, bool, type(None)):
|
if fieldType in (str, int, float, bool, type(None)):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Everything else is an object
|
# Everything else is an object
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _separate_object_fields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
def _separateObjectFields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
||||||
"""Separate simple fields from object fields based on Pydantic model structure."""
|
"""Separate simple fields from object fields based on Pydantic model structure."""
|
||||||
simple_fields = {}
|
simpleFields = {}
|
||||||
object_fields = {}
|
objectFields = {}
|
||||||
|
|
||||||
# Get field information from the Pydantic model
|
# Get field information from the Pydantic model
|
||||||
model_fields = model_class.model_fields
|
modelFields = model_class.model_fields
|
||||||
|
|
||||||
for field_name, value in data.items():
|
for fieldName, value in data.items():
|
||||||
# Check if this field should be stored as JSONB in the database
|
# Check if this field should be stored as JSONB in the database
|
||||||
if field_name in model_fields:
|
if fieldName in modelFields:
|
||||||
field_info = model_fields[field_name]
|
fieldInfo = modelFields[fieldName]
|
||||||
# Pydantic v2 only
|
# Pydantic v2 only
|
||||||
field_type = field_info.annotation
|
fieldType = fieldInfo.annotation
|
||||||
|
|
||||||
# Always route relational/object fields to object_fields for separate handling
|
# Always route relational/object fields to object_fields for separate handling
|
||||||
if field_name in ['documents', 'stats']:
|
if fieldName in ['documents', 'stats']:
|
||||||
object_fields[field_name] = value
|
objectFields[fieldName] = value
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check if this is a JSONB field (Dict, List, or complex types)
|
# Check if this is a JSONB field (Dict, List, or complex types)
|
||||||
if (field_type == dict or
|
if (fieldType == dict or
|
||||||
field_type == list or
|
fieldType == list or
|
||||||
(hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or
|
(hasattr(fieldType, '__origin__') and fieldType.__origin__ in (dict, list)) or
|
||||||
field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']):
|
fieldName in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']):
|
||||||
# Store as JSONB - include in simple_fields for database storage
|
# Store as JSONB - include in simple_fields for database storage
|
||||||
simple_fields[field_name] = value
|
simpleFields[fieldName] = value
|
||||||
elif isinstance(value, (str, int, float, bool, type(None))):
|
elif isinstance(value, (str, int, float, bool, type(None))):
|
||||||
# Simple scalar types
|
# Simple scalar types
|
||||||
simple_fields[field_name] = value
|
simpleFields[fieldName] = value
|
||||||
else:
|
else:
|
||||||
# Complex objects that should be filtered out
|
# Complex objects that should be filtered out
|
||||||
object_fields[field_name] = value
|
objectFields[fieldName] = value
|
||||||
else:
|
else:
|
||||||
# Field not in model - treat as scalar if simple, otherwise filter out
|
# Field not in model - treat as scalar if simple, otherwise filter out
|
||||||
if isinstance(value, (str, int, float, bool, type(None))):
|
if isinstance(value, (str, int, float, bool, type(None))):
|
||||||
simple_fields[field_name] = value
|
simpleFields[fieldName] = value
|
||||||
else:
|
else:
|
||||||
object_fields[field_name] = value
|
objectFields[fieldName] = value
|
||||||
|
|
||||||
return simple_fields, object_fields
|
return simpleFields, objectFields
|
||||||
|
|
||||||
def _initializeServices(self):
|
def _initializeServices(self):
|
||||||
pass
|
pass
|
||||||
|
|
@ -240,8 +240,8 @@ class ChatObjects:
|
||||||
currentAction=workflow.get("currentAction", 0),
|
currentAction=workflow.get("currentAction", 0),
|
||||||
totalTasks=workflow.get("totalTasks", 0),
|
totalTasks=workflow.get("totalTasks", 0),
|
||||||
totalActions=workflow.get("totalActions", 0),
|
totalActions=workflow.get("totalActions", 0),
|
||||||
lastActivity=workflow.get("lastActivity", get_utc_timestamp()),
|
lastActivity=workflow.get("lastActivity", getUtcTimestamp()),
|
||||||
startedAt=workflow.get("startedAt", get_utc_timestamp()),
|
startedAt=workflow.get("startedAt", getUtcTimestamp()),
|
||||||
logs=logs,
|
logs=logs,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stats=stats,
|
stats=stats,
|
||||||
|
|
@ -257,7 +257,7 @@ class ChatObjects:
|
||||||
raise PermissionError("No permission to create workflows")
|
raise PermissionError("No permission to create workflows")
|
||||||
|
|
||||||
# Set timestamp if not present
|
# Set timestamp if not present
|
||||||
currentTime = get_utc_timestamp()
|
currentTime = getUtcTimestamp()
|
||||||
if "startedAt" not in workflowData:
|
if "startedAt" not in workflowData:
|
||||||
workflowData["startedAt"] = currentTime
|
workflowData["startedAt"] = currentTime
|
||||||
|
|
||||||
|
|
@ -265,10 +265,10 @@ class ChatObjects:
|
||||||
workflowData["lastActivity"] = currentTime
|
workflowData["lastActivity"] = currentTime
|
||||||
|
|
||||||
# Use generic field separation based on ChatWorkflow model
|
# Use generic field separation based on ChatWorkflow model
|
||||||
simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData)
|
simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
|
||||||
|
|
||||||
# Create workflow in database
|
# Create workflow in database
|
||||||
created = self.db.recordCreate(ChatWorkflow, simple_fields)
|
created = self.db.recordCreate(ChatWorkflow, simpleFields)
|
||||||
|
|
||||||
|
|
||||||
# Convert to ChatWorkflow model (empty related data for new workflow)
|
# Convert to ChatWorkflow model (empty related data for new workflow)
|
||||||
|
|
@ -302,13 +302,13 @@ class ChatObjects:
|
||||||
raise PermissionError(f"No permission to update workflow {workflowId}")
|
raise PermissionError(f"No permission to update workflow {workflowId}")
|
||||||
|
|
||||||
# Use generic field separation based on ChatWorkflow model
|
# Use generic field separation based on ChatWorkflow model
|
||||||
simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData)
|
simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData)
|
||||||
|
|
||||||
# Set update time for main workflow
|
# Set update time for main workflow
|
||||||
simple_fields["lastActivity"] = get_utc_timestamp()
|
simpleFields["lastActivity"] = getUtcTimestamp()
|
||||||
|
|
||||||
# Update main workflow in database
|
# Update main workflow in database
|
||||||
updated = self.db.recordModify(ChatWorkflow, workflowId, simple_fields)
|
updated = self.db.recordModify(ChatWorkflow, workflowId, simpleFields)
|
||||||
|
|
||||||
# Removed cascade writes for logs/messages/stats during workflow update.
|
# Removed cascade writes for logs/messages/stats during workflow update.
|
||||||
# CUD for child entities must be executed via dedicated service methods.
|
# CUD for child entities must be executed via dedicated service methods.
|
||||||
|
|
@ -423,7 +423,7 @@ class ChatObjects:
|
||||||
role=msg.get("role", "assistant"),
|
role=msg.get("role", "assistant"),
|
||||||
status=msg.get("status", "step"),
|
status=msg.get("status", "step"),
|
||||||
sequenceNr=msg.get("sequenceNr", 0),
|
sequenceNr=msg.get("sequenceNr", 0),
|
||||||
publishedAt=msg.get("publishedAt", get_utc_timestamp()),
|
publishedAt=msg.get("publishedAt", getUtcTimestamp()),
|
||||||
success=msg.get("success"),
|
success=msg.get("success"),
|
||||||
actionId=msg.get("actionId"),
|
actionId=msg.get("actionId"),
|
||||||
actionMethod=msg.get("actionMethod"),
|
actionMethod=msg.get("actionMethod"),
|
||||||
|
|
@ -490,20 +490,30 @@ class ChatObjects:
|
||||||
messageData["actionNumber"] = workflow.currentAction
|
messageData["actionNumber"] = workflow.currentAction
|
||||||
|
|
||||||
# Use generic field separation based on ChatMessage model
|
# Use generic field separation based on ChatMessage model
|
||||||
simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData)
|
simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
|
||||||
|
|
||||||
# Handle documents separately - they will be stored in normalized documents table
|
# Handle documents separately - they will be stored in normalized documents table
|
||||||
documents_to_create = object_fields.get("documents", [])
|
documents_to_create = objectFields.get("documents", [])
|
||||||
|
|
||||||
# Create message in normalized table using only simple fields
|
# Create message in normalized table using only simple fields
|
||||||
createdMessage = self.db.recordCreate(ChatMessage, simple_fields)
|
createdMessage = self.db.recordCreate(ChatMessage, simpleFields)
|
||||||
|
|
||||||
|
|
||||||
# Create documents in normalized documents table
|
# Create documents in normalized documents table
|
||||||
created_documents = []
|
created_documents = []
|
||||||
for doc_data in documents_to_create:
|
for doc_data in documents_to_create:
|
||||||
# Use the document data directly
|
# Normalize to plain dict before assignment
|
||||||
doc_dict = doc_data
|
if isinstance(doc_data, ChatDocument):
|
||||||
|
doc_dict = doc_data.model_dump()
|
||||||
|
elif isinstance(doc_data, dict):
|
||||||
|
doc_dict = dict(doc_data)
|
||||||
|
else:
|
||||||
|
# Attempt to coerce to ChatDocument then dump
|
||||||
|
try:
|
||||||
|
doc_dict = ChatDocument(**doc_data).model_dump()
|
||||||
|
except Exception:
|
||||||
|
logger.error("Invalid document data type for message creation")
|
||||||
|
continue
|
||||||
|
|
||||||
doc_dict["messageId"] = createdMessage["id"]
|
doc_dict["messageId"] = createdMessage["id"]
|
||||||
created_doc = self.createDocument(doc_dict)
|
created_doc = self.createDocument(doc_dict)
|
||||||
|
|
@ -522,8 +532,8 @@ class ChatObjects:
|
||||||
role=createdMessage.get("role", "assistant"),
|
role=createdMessage.get("role", "assistant"),
|
||||||
status=createdMessage.get("status", "step"),
|
status=createdMessage.get("status", "step"),
|
||||||
sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number
|
sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number
|
||||||
publishedAt=createdMessage.get("publishedAt", get_utc_timestamp()),
|
publishedAt=createdMessage.get("publishedAt", getUtcTimestamp()),
|
||||||
stats=object_fields.get("stats"), # Use stats from object_fields
|
stats=objectFields.get("stats"), # Use stats from objectFields
|
||||||
roundNumber=createdMessage.get("roundNumber"),
|
roundNumber=createdMessage.get("roundNumber"),
|
||||||
taskNumber=createdMessage.get("taskNumber"),
|
taskNumber=createdMessage.get("taskNumber"),
|
||||||
actionNumber=createdMessage.get("actionNumber"),
|
actionNumber=createdMessage.get("actionNumber"),
|
||||||
|
|
@ -588,31 +598,41 @@ class ChatObjects:
|
||||||
raise PermissionError(f"No permission to modify workflow {workflowId}")
|
raise PermissionError(f"No permission to modify workflow {workflowId}")
|
||||||
|
|
||||||
# Use generic field separation based on ChatMessage model
|
# Use generic field separation based on ChatMessage model
|
||||||
simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData)
|
simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData)
|
||||||
|
|
||||||
# Ensure required fields present
|
# Ensure required fields present
|
||||||
for key in ["role", "agentName"]:
|
for key in ["role", "agentName"]:
|
||||||
if key not in simple_fields and key not in existingMessage:
|
if key not in simpleFields and key not in existingMessage:
|
||||||
simple_fields[key] = "assistant" if key == "role" else ""
|
simpleFields[key] = "assistant" if key == "role" else ""
|
||||||
|
|
||||||
# Ensure ID is in the dataset
|
# Ensure ID is in the dataset
|
||||||
if 'id' not in simple_fields:
|
if 'id' not in simpleFields:
|
||||||
simple_fields['id'] = messageId
|
simpleFields['id'] = messageId
|
||||||
|
|
||||||
# Convert createdAt to startedAt if needed
|
# Convert createdAt to startedAt if needed
|
||||||
if "createdAt" in simple_fields and "startedAt" not in simple_fields:
|
if "createdAt" in simpleFields and "startedAt" not in simpleFields:
|
||||||
simple_fields["startedAt"] = simple_fields["createdAt"]
|
simpleFields["startedAt"] = simpleFields["createdAt"]
|
||||||
del simple_fields["createdAt"]
|
del simpleFields["createdAt"]
|
||||||
|
|
||||||
# Update the message with simple fields only
|
# Update the message with simple fields only
|
||||||
updatedMessage = self.db.recordModify(ChatMessage, messageId, simple_fields)
|
updatedMessage = self.db.recordModify(ChatMessage, messageId, simpleFields)
|
||||||
|
|
||||||
# Handle object field updates (documents, stats) inline
|
# Handle object field updates (documents, stats) inline
|
||||||
if 'documents' in object_fields:
|
if 'documents' in objectFields:
|
||||||
documents_data = object_fields['documents']
|
documents_data = objectFields['documents']
|
||||||
try:
|
try:
|
||||||
for doc_data in documents_data:
|
for doc_data in documents_data:
|
||||||
doc_dict = doc_data
|
# Normalize to dict before mutation
|
||||||
|
if isinstance(doc_data, ChatDocument):
|
||||||
|
doc_dict = doc_data.model_dump()
|
||||||
|
elif isinstance(doc_data, dict):
|
||||||
|
doc_dict = dict(doc_data)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
doc_dict = ChatDocument(**doc_data).model_dump()
|
||||||
|
except Exception:
|
||||||
|
logger.error("Invalid document data type for message update")
|
||||||
|
continue
|
||||||
doc_dict["messageId"] = messageId
|
doc_dict["messageId"] = messageId
|
||||||
self.createDocument(doc_dict)
|
self.createDocument(doc_dict)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -732,11 +752,9 @@ class ChatObjects:
|
||||||
def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument:
|
def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument:
|
||||||
"""Creates a document for a message in normalized table."""
|
"""Creates a document for a message in normalized table."""
|
||||||
try:
|
try:
|
||||||
# Validate document data
|
# Validate and normalize document data to dict
|
||||||
document = ChatDocument(**documentData)
|
document = ChatDocument(**documentData)
|
||||||
|
created = self.db.recordCreate(ChatDocument, document.model_dump())
|
||||||
# Create document in normalized table
|
|
||||||
created = self.db.recordCreate(ChatDocument, document)
|
|
||||||
|
|
||||||
|
|
||||||
return ChatDocument(**created)
|
return ChatDocument(**created)
|
||||||
|
|
@ -785,7 +803,7 @@ class ChatObjects:
|
||||||
|
|
||||||
# Make sure required fields are present
|
# Make sure required fields are present
|
||||||
if "timestamp" not in logData:
|
if "timestamp" not in logData:
|
||||||
logData["timestamp"] = get_utc_timestamp()
|
logData["timestamp"] = getUtcTimestamp()
|
||||||
|
|
||||||
# Add status information if not present
|
# Add status information if not present
|
||||||
if "status" not in logData and "type" in logData:
|
if "status" not in logData and "type" in logData:
|
||||||
|
|
@ -882,7 +900,7 @@ class ChatObjects:
|
||||||
messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId})
|
messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId})
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
# Apply timestamp filtering in Python
|
# Apply timestamp filtering in Python
|
||||||
msg_timestamp = msg.get("publishedAt", get_utc_timestamp())
|
msg_timestamp = msg.get("publishedAt", getUtcTimestamp())
|
||||||
if afterTimestamp is not None and msg_timestamp <= afterTimestamp:
|
if afterTimestamp is not None and msg_timestamp <= afterTimestamp:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -900,7 +918,7 @@ class ChatObjects:
|
||||||
role=msg.get("role", "assistant"),
|
role=msg.get("role", "assistant"),
|
||||||
status=msg.get("status", "step"),
|
status=msg.get("status", "step"),
|
||||||
sequenceNr=msg.get("sequenceNr", 0),
|
sequenceNr=msg.get("sequenceNr", 0),
|
||||||
publishedAt=msg.get("publishedAt", get_utc_timestamp()),
|
publishedAt=msg.get("publishedAt", getUtcTimestamp()),
|
||||||
success=msg.get("success"),
|
success=msg.get("success"),
|
||||||
actionId=msg.get("actionId"),
|
actionId=msg.get("actionId"),
|
||||||
actionMethod=msg.get("actionMethod"),
|
actionMethod=msg.get("actionMethod"),
|
||||||
|
|
@ -923,7 +941,7 @@ class ChatObjects:
|
||||||
logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId})
|
logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId})
|
||||||
for log in logs:
|
for log in logs:
|
||||||
# Apply timestamp filtering in Python
|
# Apply timestamp filtering in Python
|
||||||
log_timestamp = log.get("timestamp", get_utc_timestamp())
|
log_timestamp = log.get("timestamp", getUtcTimestamp())
|
||||||
if afterTimestamp is not None and log_timestamp <= afterTimestamp:
|
if afterTimestamp is not None and log_timestamp <= afterTimestamp:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -938,7 +956,7 @@ class ChatObjects:
|
||||||
stats = self.getStats(workflowId)
|
stats = self.getStats(workflowId)
|
||||||
for stat in stats:
|
for stat in stats:
|
||||||
# Apply timestamp filtering in Python
|
# Apply timestamp filtering in Python
|
||||||
stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else get_utc_timestamp()
|
stat_timestamp = stat.createdAt if hasattr(stat, 'createdAt') else getUtcTimestamp()
|
||||||
if afterTimestamp is not None and stat_timestamp <= afterTimestamp:
|
if afterTimestamp is not None and stat_timestamp <= afterTimestamp:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from modules.datamodels.datamodelUtils import Prompt
|
||||||
from modules.datamodels.datamodelVoice import VoiceSettings
|
from modules.datamodels.datamodelVoice import VoiceSettings
|
||||||
from modules.datamodels.datamodelUam import User, Mandate
|
from modules.datamodels.datamodelUam import User, Mandate
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -466,7 +466,7 @@ class ComponentObjects:
|
||||||
# Ensure proper values, use defaults for invalid data
|
# Ensure proper values, use defaults for invalid data
|
||||||
creationDate = file.get("creationDate")
|
creationDate = file.get("creationDate")
|
||||||
if creationDate is None or not isinstance(creationDate, (int, float)) or creationDate <= 0:
|
if creationDate is None or not isinstance(creationDate, (int, float)) or creationDate <= 0:
|
||||||
creationDate = get_utc_timestamp()
|
creationDate = getUtcTimestamp()
|
||||||
|
|
||||||
fileName = file.get("fileName")
|
fileName = file.get("fileName")
|
||||||
if not fileName or fileName == "None":
|
if not fileName or fileName == "None":
|
||||||
|
|
@ -503,7 +503,7 @@ class ComponentObjects:
|
||||||
# Get creation date from record or use current time
|
# Get creation date from record or use current time
|
||||||
creationDate = file.get("creationDate")
|
creationDate = file.get("creationDate")
|
||||||
if not creationDate:
|
if not creationDate:
|
||||||
creationDate = get_utc_timestamp()
|
creationDate = getUtcTimestamp()
|
||||||
|
|
||||||
return FileItem(
|
return FileItem(
|
||||||
id=file.get("id"),
|
id=file.get("id"),
|
||||||
|
|
@ -881,9 +881,9 @@ class ComponentObjects:
|
||||||
# Ensure timestamps are set for validation
|
# Ensure timestamps are set for validation
|
||||||
settings_data = filteredSettings[0]
|
settings_data = filteredSettings[0]
|
||||||
if not settings_data.get("creationDate"):
|
if not settings_data.get("creationDate"):
|
||||||
settings_data["creationDate"] = get_utc_timestamp()
|
settings_data["creationDate"] = getUtcTimestamp()
|
||||||
if not settings_data.get("lastModified"):
|
if not settings_data.get("lastModified"):
|
||||||
settings_data["lastModified"] = get_utc_timestamp()
|
settings_data["lastModified"] = getUtcTimestamp()
|
||||||
|
|
||||||
return VoiceSettings(**settings_data)
|
return VoiceSettings(**settings_data)
|
||||||
|
|
||||||
|
|
@ -931,7 +931,7 @@ class ComponentObjects:
|
||||||
raise ValueError(f"Voice settings not found for user {userId}")
|
raise ValueError(f"Voice settings not found for user {userId}")
|
||||||
|
|
||||||
# Update lastModified timestamp
|
# Update lastModified timestamp
|
||||||
updateData["lastModified"] = get_utc_timestamp()
|
updateData["lastModified"] = getUtcTimestamp()
|
||||||
|
|
||||||
# Update voice settings record
|
# Update voice settings record
|
||||||
success = self.db.recordModify(VoiceSettings, existingSettings.id, updateData)
|
success = self.db.recordModify(VoiceSettings, existingSettings.id, updateData)
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ class TicketInterface:
|
||||||
self.task_sync_definition = task_sync_definition
|
self.task_sync_definition = task_sync_definition
|
||||||
|
|
||||||
async def exportTicketsAsList(self) -> list[dict]:
|
async def exportTicketsAsList(self) -> list[dict]:
|
||||||
tickets: list[dict] = await self.connector_ticket.read_tasks(limit=0)
|
tickets: list[dict] = await self.connector_ticket.readTasks(limit=0)
|
||||||
transformed_tasks = self._transformTicketRecords(tickets, includePut=True)
|
transformed_tasks = self._transformTicketRecords(tickets, includePut=True)
|
||||||
# Return plain dictionaries filtered by presence of ID
|
# Return plain dictionaries filtered by presence of ID
|
||||||
rows: list[dict] = []
|
rows: list[dict] = []
|
||||||
|
|
@ -57,7 +57,7 @@ class TicketInterface:
|
||||||
if fields:
|
if fields:
|
||||||
updates.append({"ID": task_id, "fields": fields})
|
updates.append({"ID": task_id, "fields": fields})
|
||||||
if updates:
|
if updates:
|
||||||
await self.connector_ticket.write_tasks(updates)
|
await self.connector_ticket.writeTasks(updates)
|
||||||
|
|
||||||
def _transformTicketRecords(
|
def _transformTicketRecords(
|
||||||
self, tasks: list[dict], includePut: bool = False
|
self, tasks: list[dict], includePut: bool = False
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from typing import Dict, Any, Optional, List
|
||||||
from modules.connectors.connectorVoiceGoogle import ConnectorGoogleSpeech
|
from modules.connectors.connectorVoiceGoogle import ConnectorGoogleSpeech
|
||||||
from modules.datamodels.datamodelVoice import VoiceSettings
|
from modules.datamodels.datamodelVoice import VoiceSettings
|
||||||
from modules.datamodels.datamodelUam import User
|
from modules.datamodels.datamodelUam import User
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -269,7 +269,7 @@ class VoiceObjects:
|
||||||
logger.info(f"Creating voice settings: {settingsData}")
|
logger.info(f"Creating voice settings: {settingsData}")
|
||||||
|
|
||||||
# Add timestamps
|
# Add timestamps
|
||||||
currentTime = get_utc_timestamp()
|
currentTime = getUtcTimestamp()
|
||||||
settingsData["creationDate"] = currentTime
|
settingsData["creationDate"] = currentTime
|
||||||
settingsData["lastModified"] = currentTime
|
settingsData["lastModified"] = currentTime
|
||||||
|
|
||||||
|
|
@ -298,7 +298,7 @@ class VoiceObjects:
|
||||||
logger.info(f"Updating voice settings for user {userId}: {settingsData}")
|
logger.info(f"Updating voice settings for user {userId}: {settingsData}")
|
||||||
|
|
||||||
# Add last modified timestamp
|
# Add last modified timestamp
|
||||||
settingsData["lastModified"] = get_utc_timestamp()
|
settingsData["lastModified"] = getUtcTimestamp()
|
||||||
|
|
||||||
# Create updated VoiceSettings object
|
# Create updated VoiceSettings object
|
||||||
voiceSettings = VoiceSettings(**settingsData)
|
voiceSettings = VoiceSettings(**settingsData)
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ from modules.datamodels.datamodelUam import User, UserConnection, AuthAuthority,
|
||||||
from modules.datamodels.datamodelSecurity import Token
|
from modules.datamodels.datamodelSecurity import Token
|
||||||
from modules.security.auth import getCurrentUser, limiter
|
from modules.security.auth import getCurrentUser, limiter
|
||||||
from modules.interfaces.interfaceDbAppObjects import getInterface
|
from modules.interfaces.interfaceDbAppObjects import getInterface
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
|
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -64,7 +64,7 @@ def get_token_status_for_connection(interface, connection_id: str) -> tuple[str,
|
||||||
if not expires_at:
|
if not expires_at:
|
||||||
return "none", None
|
return "none", None
|
||||||
|
|
||||||
current_time = get_utc_timestamp()
|
current_time = getUtcTimestamp()
|
||||||
|
|
||||||
# Add 5 minute buffer for proactive refresh
|
# Add 5 minute buffer for proactive refresh
|
||||||
buffer_time = 5 * 60 # 5 minutes in seconds
|
buffer_time = 5 * 60 # 5 minutes in seconds
|
||||||
|
|
@ -247,7 +247,7 @@ async def update_connection(
|
||||||
setattr(connection, field, value)
|
setattr(connection, field, value)
|
||||||
|
|
||||||
# Update lastChecked timestamp using UTC timestamp
|
# Update lastChecked timestamp using UTC timestamp
|
||||||
connection.lastChecked = get_utc_timestamp()
|
connection.lastChecked = getUtcTimestamp()
|
||||||
|
|
||||||
# Update connection - models now handle timestamp serialization automatically
|
# Update connection - models now handle timestamp serialization automatically
|
||||||
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
|
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
|
||||||
|
|
@ -382,7 +382,7 @@ async def disconnect_service(
|
||||||
|
|
||||||
# Update connection status
|
# Update connection status
|
||||||
connection.status = ConnectionStatus.INACTIVE
|
connection.status = ConnectionStatus.INACTIVE
|
||||||
connection.lastChecked = get_utc_timestamp()
|
connection.lastChecked = getUtcTimestamp()
|
||||||
|
|
||||||
# Update connection record - models now handle timestamp serialization automatically
|
# Update connection record - models now handle timestamp serialization automatically
|
||||||
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
|
interface.db.recordModify(UserConnection, connectionId, connection.model_dump())
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ async def get_neutralization_config(
|
||||||
"""Get data neutralization configuration"""
|
"""Get data neutralization configuration"""
|
||||||
try:
|
try:
|
||||||
service = NeutralizationPlayground(currentUser)
|
service = NeutralizationPlayground(currentUser)
|
||||||
config = service.get_config()
|
config = service.getConfig()
|
||||||
|
|
||||||
if not config:
|
if not config:
|
||||||
# Return default config instead of 404
|
# Return default config instead of 404
|
||||||
|
|
@ -69,7 +69,7 @@ async def save_neutralization_config(
|
||||||
"""Save or update data neutralization configuration"""
|
"""Save or update data neutralization configuration"""
|
||||||
try:
|
try:
|
||||||
service = NeutralizationPlayground(currentUser)
|
service = NeutralizationPlayground(currentUser)
|
||||||
config = service.save_config(config_data)
|
config = service.saveConfig(config_data)
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
@ -99,7 +99,7 @@ async def neutralize_text(
|
||||||
)
|
)
|
||||||
|
|
||||||
service = NeutralizationPlayground(currentUser)
|
service = NeutralizationPlayground(currentUser)
|
||||||
result = service.neutralize_text(text, file_id)
|
result = service.neutralizeText(text, file_id)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -130,7 +130,7 @@ async def resolve_text(
|
||||||
)
|
)
|
||||||
|
|
||||||
service = NeutralizationPlayground(currentUser)
|
service = NeutralizationPlayground(currentUser)
|
||||||
resolved_text = service.resolve_text(text)
|
resolved_text = service.resolveText(text)
|
||||||
|
|
||||||
return {"resolved_text": resolved_text}
|
return {"resolved_text": resolved_text}
|
||||||
|
|
||||||
|
|
@ -153,7 +153,7 @@ async def get_neutralization_attributes(
|
||||||
"""Get neutralization attributes, optionally filtered by file ID"""
|
"""Get neutralization attributes, optionally filtered by file ID"""
|
||||||
try:
|
try:
|
||||||
service = NeutralizationPlayground(currentUser)
|
service = NeutralizationPlayground(currentUser)
|
||||||
attributes = service.get_attributes(fileId)
|
attributes = service.getAttributes(fileId)
|
||||||
|
|
||||||
return attributes
|
return attributes
|
||||||
|
|
||||||
|
|
@ -183,7 +183,7 @@ async def process_sharepoint_files(
|
||||||
)
|
)
|
||||||
|
|
||||||
service = NeutralizationPlayground(currentUser)
|
service = NeutralizationPlayground(currentUser)
|
||||||
result = await service.process_sharepoint_files(source_path, target_path)
|
result = await service.processSharepointFiles(source_path, target_path)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -212,7 +212,7 @@ async def batch_process_files(
|
||||||
)
|
)
|
||||||
|
|
||||||
service = NeutralizationPlayground(currentUser)
|
service = NeutralizationPlayground(currentUser)
|
||||||
result = service.batch_neutralize_files(files_data)
|
result = service.batchNeutralizeFiles(files_data)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -234,7 +234,7 @@ async def get_neutralization_stats(
|
||||||
"""Get neutralization processing statistics"""
|
"""Get neutralization processing statistics"""
|
||||||
try:
|
try:
|
||||||
service = NeutralizationPlayground(currentUser)
|
service = NeutralizationPlayground(currentUser)
|
||||||
stats = service.get_processing_stats()
|
stats = service.getProcessingStats()
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
|
@ -255,7 +255,7 @@ async def cleanup_file_attributes(
|
||||||
"""Clean up neutralization attributes for a specific file"""
|
"""Clean up neutralization attributes for a specific file"""
|
||||||
try:
|
try:
|
||||||
service = NeutralizationPlayground(currentUser)
|
service = NeutralizationPlayground(currentUser)
|
||||||
success = service.cleanup_file_attributes(fileId)
|
success = service.cleanupFileAttributes(fileId)
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
return {"message": f"Successfully cleaned up attributes for file {fileId}"}
|
return {"message": f"Successfully cleaned up attributes for file {fileId}"}
|
||||||
|
|
|
||||||
|
|
@ -181,9 +181,9 @@ async def reset_user_password(
|
||||||
# Log password reset
|
# Log password reset
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_security_event(
|
audit_logger.logSecurityEvent(
|
||||||
user_id=str(currentUser.id),
|
userId=str(currentUser.id),
|
||||||
mandate_id=str(currentUser.mandateId),
|
mandateId=str(currentUser.mandateId),
|
||||||
action="password_reset",
|
action="password_reset",
|
||||||
details=f"Reset password for user {userId}"
|
details=f"Reset password for user {userId}"
|
||||||
)
|
)
|
||||||
|
|
@ -257,9 +257,9 @@ async def change_password(
|
||||||
# Log password change
|
# Log password change
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_security_event(
|
audit_logger.logSecurityEvent(
|
||||||
user_id=str(currentUser.id),
|
userId=str(currentUser.id),
|
||||||
mandate_id=str(currentUser.mandateId),
|
mandateId=str(currentUser.mandateId),
|
||||||
action="password_change",
|
action="password_change",
|
||||||
details="User changed their own password"
|
details="User changed their own password"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface
|
from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface
|
||||||
from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
|
from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatus, UserConnection
|
||||||
from modules.security.auth import getCurrentUser, limiter
|
from modules.security.auth import getCurrentUser, limiter
|
||||||
from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp
|
from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
|
||||||
|
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -356,7 +356,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
tokenRefresh=token_response.get("refresh_token", ""),
|
tokenRefresh=token_response.get("refresh_token", ""),
|
||||||
tokenType="bearer",
|
tokenType="bearer",
|
||||||
expiresAt=jwt_expires_at.timestamp(),
|
expiresAt=jwt_expires_at.timestamp(),
|
||||||
createdAt=get_utc_timestamp()
|
createdAt=getUtcTimestamp()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save access token (no connectionId)
|
# Save access token (no connectionId)
|
||||||
|
|
@ -460,8 +460,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
logger.info(f"Updating connection {connection_id} for user {user.username}")
|
logger.info(f"Updating connection {connection_id} for user {user.username}")
|
||||||
# Update connection with external service details
|
# Update connection with external service details
|
||||||
connection.status = ConnectionStatus.ACTIVE
|
connection.status = ConnectionStatus.ACTIVE
|
||||||
connection.lastChecked = get_utc_timestamp()
|
connection.lastChecked = getUtcTimestamp()
|
||||||
connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0)
|
connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
|
||||||
connection.externalId = user_info.get("id")
|
connection.externalId = user_info.get("id")
|
||||||
connection.externalUsername = user_info.get("email")
|
connection.externalUsername = user_info.get("email")
|
||||||
connection.externalEmail = user_info.get("email")
|
connection.externalEmail = user_info.get("email")
|
||||||
|
|
@ -479,8 +479,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
tokenAccess=token_response["access_token"],
|
tokenAccess=token_response["access_token"],
|
||||||
tokenRefresh=token_response.get("refresh_token", ""),
|
tokenRefresh=token_response.get("refresh_token", ""),
|
||||||
tokenType=token_response.get("token_type", "bearer"),
|
tokenType=token_response.get("token_type", "bearer"),
|
||||||
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
|
expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
|
||||||
createdAt=get_utc_timestamp()
|
createdAt=getUtcTimestamp()
|
||||||
)
|
)
|
||||||
interface.saveConnectionToken(token)
|
interface.saveConnectionToken(token)
|
||||||
|
|
||||||
|
|
@ -498,8 +498,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
id: '{connection.id}',
|
id: '{connection.id}',
|
||||||
status: 'connected',
|
status: 'connected',
|
||||||
type: 'google',
|
type: 'google',
|
||||||
lastChecked: {get_utc_timestamp()},
|
lastChecked: {getUtcTimestamp()},
|
||||||
expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))}
|
expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
|
||||||
}}
|
}}
|
||||||
}}, '*');
|
}}, '*');
|
||||||
// Wait for message to be sent before closing
|
// Wait for message to be sent before closing
|
||||||
|
|
@ -592,11 +592,11 @@ async def logout(
|
||||||
# Log successful logout
|
# Log successful logout
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_user_access(
|
audit_logger.logUserAccess(
|
||||||
user_id=str(currentUser.id),
|
userId=str(currentUser.id),
|
||||||
mandate_id=str(currentUser.mandateId),
|
mandateId=str(currentUser.mandateId),
|
||||||
action="logout",
|
action="logout",
|
||||||
success_info="google_auth_logout"
|
successInfo="google_auth_logout"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Don't fail if audit logging fails
|
# Don't fail if audit logging fails
|
||||||
|
|
@ -726,12 +726,12 @@ async def refresh_token(
|
||||||
|
|
||||||
# Update the connection status and timing
|
# Update the connection status and timing
|
||||||
google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt
|
google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt
|
||||||
google_connection.lastChecked = get_utc_timestamp()
|
google_connection.lastChecked = getUtcTimestamp()
|
||||||
google_connection.status = ConnectionStatus.ACTIVE
|
google_connection.status = ConnectionStatus.ACTIVE
|
||||||
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.model_dump())
|
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.model_dump())
|
||||||
|
|
||||||
# Calculate time until expiration
|
# Calculate time until expiration
|
||||||
current_time = get_utc_timestamp()
|
current_time = getUtcTimestamp()
|
||||||
expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0
|
expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -131,11 +131,11 @@ async def login(
|
||||||
# Log successful login
|
# Log successful login
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_user_access(
|
audit_logger.logUserAccess(
|
||||||
user_id=str(user.id),
|
userId=str(user.id),
|
||||||
mandate_id=str(user.mandateId),
|
mandateId=str(user.mandateId),
|
||||||
action="login",
|
action="login",
|
||||||
success_info="local_auth_success"
|
successInfo="local_auth_success"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Don't fail if audit logging fails
|
# Don't fail if audit logging fails
|
||||||
|
|
@ -159,11 +159,11 @@ async def login(
|
||||||
# Log failed login attempt
|
# Log failed login attempt
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_user_access(
|
audit_logger.logUserAccess(
|
||||||
user_id="unknown",
|
userId="unknown",
|
||||||
mandate_id="unknown",
|
mandateId="unknown",
|
||||||
action="login",
|
action="login",
|
||||||
success_info=f"failed: {error_msg}"
|
successInfo=f"failed: {error_msg}"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Don't fail if audit logging fails
|
# Don't fail if audit logging fails
|
||||||
|
|
@ -367,11 +367,11 @@ async def logout(request: Request, response: Response, currentUser: User = Depen
|
||||||
# Log successful logout
|
# Log successful logout
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_user_access(
|
audit_logger.logUserAccess(
|
||||||
user_id=str(currentUser.id),
|
userId=str(currentUser.id),
|
||||||
mandate_id=str(currentUser.mandateId),
|
mandateId=str(currentUser.mandateId),
|
||||||
action="logout",
|
action="logout",
|
||||||
success_info=f"revoked_tokens: {revoked}"
|
successInfo=f"revoked_tokens: {revoked}"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Don't fail if audit logging fails
|
# Don't fail if audit logging fails
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from modules.datamodels.datamodelUam import AuthAuthority, User, ConnectionStatu
|
||||||
from modules.datamodels.datamodelSecurity import Token
|
from modules.datamodels.datamodelSecurity import Token
|
||||||
from modules.security.auth import getCurrentUser, limiter
|
from modules.security.auth import getCurrentUser, limiter
|
||||||
from modules.security.jwtService import createAccessToken
|
from modules.security.jwtService import createAccessToken
|
||||||
from modules.shared.timezoneUtils import create_expiration_timestamp, get_utc_timestamp
|
from modules.shared.timezoneUtils import createExpirationTimestamp, getUtcTimestamp
|
||||||
|
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -199,8 +199,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
tokenAccess=token_response["access_token"],
|
tokenAccess=token_response["access_token"],
|
||||||
tokenRefresh=token_response.get("refresh_token", ""),
|
tokenRefresh=token_response.get("refresh_token", ""),
|
||||||
tokenType=token_response.get("token_type", "bearer"),
|
tokenType=token_response.get("token_type", "bearer"),
|
||||||
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
|
expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
|
||||||
createdAt=get_utc_timestamp()
|
createdAt=getUtcTimestamp()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save access token (no connectionId)
|
# Save access token (no connectionId)
|
||||||
|
|
@ -225,7 +225,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
tokenAccess=jwt_token,
|
tokenAccess=jwt_token,
|
||||||
tokenType="bearer",
|
tokenType="bearer",
|
||||||
expiresAt=jwt_expires_at.timestamp(),
|
expiresAt=jwt_expires_at.timestamp(),
|
||||||
createdAt=get_utc_timestamp()
|
createdAt=getUtcTimestamp()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save JWT access token
|
# Save JWT access token
|
||||||
|
|
@ -332,8 +332,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
logger.info(f"Updating connection {connection_id} for user {user.username}")
|
logger.info(f"Updating connection {connection_id} for user {user.username}")
|
||||||
# Update connection with external service details
|
# Update connection with external service details
|
||||||
connection.status = ConnectionStatus.ACTIVE
|
connection.status = ConnectionStatus.ACTIVE
|
||||||
connection.lastChecked = get_utc_timestamp()
|
connection.lastChecked = getUtcTimestamp()
|
||||||
connection.expiresAt = get_utc_timestamp() + token_response.get("expires_in", 0)
|
connection.expiresAt = getUtcTimestamp() + token_response.get("expires_in", 0)
|
||||||
connection.externalId = user_info.get("id")
|
connection.externalId = user_info.get("id")
|
||||||
connection.externalUsername = user_info.get("userPrincipalName")
|
connection.externalUsername = user_info.get("userPrincipalName")
|
||||||
connection.externalEmail = user_info.get("mail")
|
connection.externalEmail = user_info.get("mail")
|
||||||
|
|
@ -351,8 +351,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
tokenAccess=token_response["access_token"],
|
tokenAccess=token_response["access_token"],
|
||||||
tokenRefresh=token_response.get("refresh_token", ""),
|
tokenRefresh=token_response.get("refresh_token", ""),
|
||||||
tokenType=token_response.get("token_type", "bearer"),
|
tokenType=token_response.get("token_type", "bearer"),
|
||||||
expiresAt=create_expiration_timestamp(token_response.get("expires_in", 0)),
|
expiresAt=createExpirationTimestamp(token_response.get("expires_in", 0)),
|
||||||
createdAt=get_utc_timestamp()
|
createdAt=getUtcTimestamp()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -373,8 +373,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
id: '{connection.id}',
|
id: '{connection.id}',
|
||||||
status: 'connected',
|
status: 'connected',
|
||||||
type: 'msft',
|
type: 'msft',
|
||||||
lastChecked: {get_utc_timestamp()},
|
lastChecked: {getUtcTimestamp()},
|
||||||
expiresAt: {create_expiration_timestamp(token_response.get("expires_in", 0))}
|
expiresAt: {createExpirationTimestamp(token_response.get("expires_in", 0))}
|
||||||
}}
|
}}
|
||||||
}}, '*');
|
}}, '*');
|
||||||
// Wait for message to be sent before closing
|
// Wait for message to be sent before closing
|
||||||
|
|
@ -467,11 +467,11 @@ async def logout(
|
||||||
# Log successful logout
|
# Log successful logout
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_user_access(
|
audit_logger.logUserAccess(
|
||||||
user_id=str(currentUser.id),
|
userId=str(currentUser.id),
|
||||||
mandate_id=str(currentUser.mandateId),
|
mandateId=str(currentUser.mandateId),
|
||||||
action="logout",
|
action="logout",
|
||||||
success_info="microsoft_auth_logout"
|
successInfo="microsoft_auth_logout"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Don't fail if audit logging fails
|
# Don't fail if audit logging fails
|
||||||
|
|
@ -575,27 +575,27 @@ async def refresh_token(
|
||||||
from modules.security.tokenManager import TokenManager
|
from modules.security.tokenManager import TokenManager
|
||||||
token_manager = TokenManager()
|
token_manager = TokenManager()
|
||||||
|
|
||||||
refreshed_token = token_manager.refresh_token(current_token)
|
refreshedToken = token_manager.refreshToken(current_token)
|
||||||
if refreshed_token:
|
if refreshedToken:
|
||||||
# Save the new connection token (which will automatically replace old ones)
|
# Save the new connection token (which will automatically replace old ones)
|
||||||
appInterface.saveConnectionToken(refreshed_token)
|
appInterface.saveConnectionToken(refreshedToken)
|
||||||
|
|
||||||
# Update the connection's expiration time
|
# Update the connection's expiration time
|
||||||
msft_connection.expiresAt = float(refreshed_token.expiresAt)
|
msft_connection.expiresAt = float(refreshedToken.expiresAt)
|
||||||
msft_connection.lastChecked = get_utc_timestamp()
|
msft_connection.lastChecked = getUtcTimestamp()
|
||||||
msft_connection.status = ConnectionStatus.ACTIVE
|
msft_connection.status = ConnectionStatus.ACTIVE
|
||||||
|
|
||||||
# Save updated connection
|
# Save updated connection
|
||||||
appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.model_dump())
|
appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.model_dump())
|
||||||
|
|
||||||
# Calculate time until expiration
|
# Calculate time until expiration
|
||||||
current_time = get_utc_timestamp()
|
current_time = getUtcTimestamp()
|
||||||
expires_in = int(refreshed_token.expiresAt - current_time)
|
expiresIn = int(refreshedToken.expiresAt - current_time)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"message": "Token refreshed successfully",
|
"message": "Token refreshed successfully",
|
||||||
"expires_at": refreshed_token.expiresAt,
|
"expires_at": refreshedToken.expiresAt,
|
||||||
"expires_in_seconds": expires_in
|
"expires_in_seconds": expiresIn
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
|
||||||
|
|
@ -18,26 +18,26 @@ logger = logging.getLogger(__name__)
|
||||||
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
|
router = APIRouter(prefix="/voice-google", tags=["Voice Google"])
|
||||||
|
|
||||||
# Store active WebSocket connections
|
# Store active WebSocket connections
|
||||||
active_connections: Dict[str, WebSocket] = {}
|
activeConnections: Dict[str, WebSocket] = {}
|
||||||
|
|
||||||
class ConnectionManager:
|
class ConnectionManager:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.active_connections: List[WebSocket] = []
|
self.activeConnections: List[WebSocket] = []
|
||||||
|
|
||||||
async def connect(self, websocket: WebSocket, connection_id: str):
|
async def connect(self, websocket: WebSocket, connectionId: str):
|
||||||
await websocket.accept()
|
await websocket.accept()
|
||||||
self.active_connections.append(websocket)
|
self.activeConnections.append(websocket)
|
||||||
active_connections[connection_id] = websocket
|
activeConnections[connectionId] = websocket
|
||||||
logger.info(f"WebSocket connected: {connection_id}")
|
logger.info(f"WebSocket connected: {connectionId}")
|
||||||
|
|
||||||
def disconnect(self, websocket: WebSocket, connection_id: str):
|
def disconnect(self, websocket: WebSocket, connectionId: str):
|
||||||
if websocket in self.active_connections:
|
if websocket in self.activeConnections:
|
||||||
self.active_connections.remove(websocket)
|
self.activeConnections.remove(websocket)
|
||||||
if connection_id in active_connections:
|
if connectionId in activeConnections:
|
||||||
del active_connections[connection_id]
|
del activeConnections[connectionId]
|
||||||
logger.info(f"WebSocket disconnected: {connection_id}")
|
logger.info(f"WebSocket disconnected: {connectionId}")
|
||||||
|
|
||||||
async def send_personal_message(self, message: dict, websocket: WebSocket):
|
async def sendPersonalMessage(self, message: dict, websocket: WebSocket):
|
||||||
try:
|
try:
|
||||||
await websocket.send_text(json.dumps(message))
|
await websocket.send_text(json.dumps(message))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -45,10 +45,10 @@ class ConnectionManager:
|
||||||
|
|
||||||
manager = ConnectionManager()
|
manager = ConnectionManager()
|
||||||
|
|
||||||
def get_voice_interface(current_user: User) -> VoiceObjects:
|
def _getVoiceInterface(currentUser: User) -> VoiceObjects:
|
||||||
"""Get voice interface instance with user context."""
|
"""Get voice interface instance with user context."""
|
||||||
try:
|
try:
|
||||||
return getVoiceInterface(current_user)
|
return getVoiceInterface(currentUser)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to initialize voice interface: {e}")
|
logger.error(f"Failed to initialize voice interface: {e}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -58,23 +58,23 @@ def get_voice_interface(current_user: User) -> VoiceObjects:
|
||||||
|
|
||||||
@router.post("/speech-to-text")
|
@router.post("/speech-to-text")
|
||||||
async def speech_to_text(
|
async def speech_to_text(
|
||||||
audio_file: UploadFile = File(...),
|
audioFile: UploadFile = File(...),
|
||||||
language: str = Form("de-DE"),
|
language: str = Form("de-DE"),
|
||||||
current_user: User = Depends(getCurrentUser)
|
currentUser: User = Depends(getCurrentUser)
|
||||||
):
|
):
|
||||||
"""Convert speech to text using Google Cloud Speech-to-Text API."""
|
"""Convert speech to text using Google Cloud Speech-to-Text API."""
|
||||||
try:
|
try:
|
||||||
logger.info(f"🎤 Speech-to-text request: {audio_file.filename}, language: {language}")
|
logger.info(f"🎤 Speech-to-text request: {audioFile.filename}, language: {language}")
|
||||||
|
|
||||||
# Read audio file
|
# Read audio file
|
||||||
audio_content = await audio_file.read()
|
audioContent = await audioFile.read()
|
||||||
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
|
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
|
||||||
|
|
||||||
# Get voice interface
|
# Get voice interface
|
||||||
voice_interface = get_voice_interface(current_user)
|
voiceInterface = _getVoiceInterface(currentUser)
|
||||||
|
|
||||||
# Validate audio format
|
# Validate audio format
|
||||||
validation = voice_interface.validateAudioFormat(audio_content)
|
validation = voiceInterface.validateAudioFormat(audioContent)
|
||||||
|
|
||||||
if not validation["valid"]:
|
if not validation["valid"]:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -83,8 +83,8 @@ async def speech_to_text(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Perform speech recognition
|
# Perform speech recognition
|
||||||
result = await voice_interface.speechToText(
|
result = await voiceInterface.speechToText(
|
||||||
audioContent=audio_content,
|
audioContent=audioContent,
|
||||||
language=language
|
language=language
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -95,7 +95,7 @@ async def speech_to_text(
|
||||||
"confidence": result["confidence"],
|
"confidence": result["confidence"],
|
||||||
"language": result["language"],
|
"language": result["language"],
|
||||||
"audio_info": {
|
"audio_info": {
|
||||||
"size": len(audio_content),
|
"size": len(audioContent),
|
||||||
"format": validation["format"],
|
"format": validation["format"],
|
||||||
"estimated_duration": validation.get("estimated_duration", 0)
|
"estimated_duration": validation.get("estimated_duration", 0)
|
||||||
}
|
}
|
||||||
|
|
@ -118,13 +118,13 @@ async def speech_to_text(
|
||||||
@router.post("/translate")
|
@router.post("/translate")
|
||||||
async def translate_text(
|
async def translate_text(
|
||||||
text: str = Form(...),
|
text: str = Form(...),
|
||||||
source_language: str = Form("de"),
|
sourceLanguage: str = Form("de"),
|
||||||
target_language: str = Form("en"),
|
targetLanguage: str = Form("en"),
|
||||||
current_user: User = Depends(getCurrentUser)
|
currentUser: User = Depends(getCurrentUser)
|
||||||
):
|
):
|
||||||
"""Translate text using Google Cloud Translation API."""
|
"""Translate text using Google Cloud Translation API."""
|
||||||
try:
|
try:
|
||||||
logger.info(f"🌐 Translation request: '{text}' ({source_language} -> {target_language})")
|
logger.info(f"🌐 Translation request: '{text}' ({sourceLanguage} -> {targetLanguage})")
|
||||||
|
|
||||||
if not text.strip():
|
if not text.strip():
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -133,13 +133,13 @@ async def translate_text(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get voice interface
|
# Get voice interface
|
||||||
voice_interface = get_voice_interface(current_user)
|
voiceInterface = _getVoiceInterface(currentUser)
|
||||||
|
|
||||||
# Perform translation
|
# Perform translation
|
||||||
result = await voice_interface.translateText(
|
result = await voiceInterface.translateText(
|
||||||
text=text,
|
text=text,
|
||||||
sourceLanguage=source_language,
|
sourceLanguage=sourceLanguage,
|
||||||
targetLanguage=target_language
|
targetLanguage=targetLanguage
|
||||||
)
|
)
|
||||||
|
|
||||||
if result["success"]:
|
if result["success"]:
|
||||||
|
|
@ -167,21 +167,21 @@ async def translate_text(
|
||||||
|
|
||||||
@router.post("/realtime-interpreter")
|
@router.post("/realtime-interpreter")
|
||||||
async def realtime_interpreter(
|
async def realtime_interpreter(
|
||||||
audio_file: UploadFile = File(...),
|
audioFile: UploadFile = File(...),
|
||||||
from_language: str = Form("de-DE"),
|
fromLanguage: str = Form("de-DE"),
|
||||||
to_language: str = Form("en-US"),
|
toLanguage: str = Form("en-US"),
|
||||||
connection_id: str = Form(None),
|
connectionId: str = Form(None),
|
||||||
current_user: User = Depends(getCurrentUser)
|
currentUser: User = Depends(getCurrentUser)
|
||||||
):
|
):
|
||||||
"""Real-time interpreter: speech to translated text using Google Cloud APIs."""
|
"""Real-time interpreter: speech to translated text using Google Cloud APIs."""
|
||||||
try:
|
try:
|
||||||
logger.info(f"🔄 Real-time interpreter request: {audio_file.filename}")
|
logger.info(f"🔄 Real-time interpreter request: {audioFile.filename}")
|
||||||
logger.info(f" From: {from_language} -> To: {to_language}")
|
logger.info(f" From: {fromLanguage} -> To: {toLanguage}")
|
||||||
logger.info(f" MIME type: {audio_file.content_type}")
|
logger.info(f" MIME type: {audioFile.content_type}")
|
||||||
|
|
||||||
# Read audio file
|
# Read audio file
|
||||||
audio_content = await audio_file.read()
|
audioContent = await audioFile.read()
|
||||||
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
|
logger.info(f"📊 Audio file size: {len(audioContent)} bytes")
|
||||||
|
|
||||||
# Save audio file for debugging with correct extension
|
# Save audio file for debugging with correct extension
|
||||||
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
|
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
|
||||||
|
|
@ -192,10 +192,10 @@ async def realtime_interpreter(
|
||||||
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
|
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
|
||||||
|
|
||||||
# Get voice interface
|
# Get voice interface
|
||||||
voice_interface = get_voice_interface(current_user)
|
voiceInterface = _getVoiceInterface(currentUser)
|
||||||
|
|
||||||
# Validate audio format
|
# Validate audio format
|
||||||
validation = voice_interface.validateAudioFormat(audio_content)
|
validation = voiceInterface.validateAudioFormat(audioContent)
|
||||||
|
|
||||||
if not validation["valid"]:
|
if not validation["valid"]:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -204,10 +204,10 @@ async def realtime_interpreter(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Perform complete pipeline: Speech-to-Text + Translation
|
# Perform complete pipeline: Speech-to-Text + Translation
|
||||||
result = await voice_interface.speechToTranslatedText(
|
result = await voiceInterface.speechToTranslatedText(
|
||||||
audioContent=audio_content,
|
audioContent=audioContent,
|
||||||
fromLanguage=from_language,
|
fromLanguage=fromLanguage,
|
||||||
toLanguage=to_language
|
toLanguage=toLanguage
|
||||||
)
|
)
|
||||||
|
|
||||||
if result["success"]:
|
if result["success"]:
|
||||||
|
|
@ -223,7 +223,7 @@ async def realtime_interpreter(
|
||||||
"source_language": result["source_language"],
|
"source_language": result["source_language"],
|
||||||
"target_language": result["target_language"],
|
"target_language": result["target_language"],
|
||||||
"audio_info": {
|
"audio_info": {
|
||||||
"size": len(audio_content),
|
"size": len(audioContent),
|
||||||
"format": validation["format"],
|
"format": validation["format"],
|
||||||
"estimated_duration": validation.get("estimated_duration", 0)
|
"estimated_duration": validation.get("estimated_duration", 0)
|
||||||
}
|
}
|
||||||
|
|
@ -249,7 +249,7 @@ async def text_to_speech(
|
||||||
text: str = Form(...),
|
text: str = Form(...),
|
||||||
language: str = Form("de-DE"),
|
language: str = Form("de-DE"),
|
||||||
voice: str = Form(None),
|
voice: str = Form(None),
|
||||||
current_user: User = Depends(getCurrentUser)
|
currentUser: User = Depends(getCurrentUser)
|
||||||
):
|
):
|
||||||
"""Convert text to speech using Google Cloud Text-to-Speech."""
|
"""Convert text to speech using Google Cloud Text-to-Speech."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -261,8 +261,8 @@ async def text_to_speech(
|
||||||
detail="Empty text provided for text-to-speech"
|
detail="Empty text provided for text-to-speech"
|
||||||
)
|
)
|
||||||
|
|
||||||
voice_interface = get_voice_interface(current_user)
|
voiceInterface = _getVoiceInterface(currentUser)
|
||||||
result = await voice_interface.textToSpeech(
|
result = await voiceInterface.textToSpeech(
|
||||||
text=text,
|
text=text,
|
||||||
languageCode=language,
|
languageCode=language,
|
||||||
voiceName=voice
|
voiceName=voice
|
||||||
|
|
@ -294,13 +294,13 @@ async def text_to_speech(
|
||||||
)
|
)
|
||||||
|
|
||||||
@router.get("/languages")
|
@router.get("/languages")
|
||||||
async def get_available_languages(current_user: User = Depends(getCurrentUser)):
|
async def get_available_languages(currentUser: User = Depends(getCurrentUser)):
|
||||||
"""Get available languages from Google Cloud Text-to-Speech."""
|
"""Get available languages from Google Cloud Text-to-Speech."""
|
||||||
try:
|
try:
|
||||||
logger.info("🌐 Getting available languages from Google Cloud TTS")
|
logger.info("🌐 Getting available languages from Google Cloud TTS")
|
||||||
|
|
||||||
voice_interface = get_voice_interface(current_user)
|
voiceInterface = _getVoiceInterface(currentUser)
|
||||||
result = await voice_interface.getAvailableLanguages()
|
result = await voiceInterface.getAvailableLanguages()
|
||||||
|
|
||||||
if result["success"]:
|
if result["success"]:
|
||||||
return {
|
return {
|
||||||
|
|
@ -324,21 +324,21 @@ async def get_available_languages(current_user: User = Depends(getCurrentUser)):
|
||||||
|
|
||||||
@router.get("/voices")
|
@router.get("/voices")
|
||||||
async def get_available_voices(
|
async def get_available_voices(
|
||||||
language_code: Optional[str] = None,
|
languageCode: Optional[str] = None,
|
||||||
current_user: User = Depends(getCurrentUser)
|
currentUser: User = Depends(getCurrentUser)
|
||||||
):
|
):
|
||||||
"""Get available voices from Google Cloud Text-to-Speech."""
|
"""Get available voices from Google Cloud Text-to-Speech."""
|
||||||
try:
|
try:
|
||||||
logger.info(f"🎤 Getting available voices, language filter: {language_code}")
|
logger.info(f"🎤 Getting available voices, language filter: {languageCode}")
|
||||||
|
|
||||||
voice_interface = get_voice_interface(current_user)
|
voiceInterface = _getVoiceInterface(currentUser)
|
||||||
result = await voice_interface.getAvailableVoices(languageCode=language_code)
|
result = await voiceInterface.getAvailableVoices(languageCode=languageCode)
|
||||||
|
|
||||||
if result["success"]:
|
if result["success"]:
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
"voices": result["voices"],
|
"voices": result["voices"],
|
||||||
"language_filter": language_code
|
"language_filter": languageCode
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -356,11 +356,11 @@ async def get_available_voices(
|
||||||
)
|
)
|
||||||
|
|
||||||
@router.get("/health")
|
@router.get("/health")
|
||||||
async def health_check(current_user: User = Depends(getCurrentUser)):
|
async def health_check(currentUser: User = Depends(getCurrentUser)):
|
||||||
"""Health check for Google Cloud voice services."""
|
"""Health check for Google Cloud voice services."""
|
||||||
try:
|
try:
|
||||||
voice_interface = get_voice_interface(current_user)
|
voiceInterface = _getVoiceInterface(currentUser)
|
||||||
test_result = await voice_interface.healthCheck()
|
test_result = await voiceInterface.healthCheck()
|
||||||
|
|
||||||
return test_result
|
return test_result
|
||||||
|
|
||||||
|
|
@ -372,16 +372,16 @@ async def health_check(current_user: User = Depends(getCurrentUser)):
|
||||||
}
|
}
|
||||||
|
|
||||||
@router.get("/settings")
|
@router.get("/settings")
|
||||||
async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
|
async def get_voice_settings(currentUser: User = Depends(getCurrentUser)):
|
||||||
"""Get voice settings for the current user."""
|
"""Get voice settings for the current user."""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Getting voice settings for user: {current_user.id}")
|
logger.info(f"Getting voice settings for user: {currentUser.id}")
|
||||||
|
|
||||||
# Get voice interface
|
# Get voice interface
|
||||||
voice_interface = get_voice_interface(current_user)
|
voiceInterface = _getVoiceInterface(currentUser)
|
||||||
|
|
||||||
# Get or create voice settings for the user
|
# Get or create voice settings for the user
|
||||||
voice_settings = voice_interface.getOrCreateVoiceSettings(current_user.id)
|
voice_settings = voiceInterface.getOrCreateVoiceSettings(currentUser.id)
|
||||||
|
|
||||||
if voice_settings:
|
if voice_settings:
|
||||||
# Return user settings
|
# Return user settings
|
||||||
|
|
@ -425,16 +425,16 @@ async def get_voice_settings(current_user: User = Depends(getCurrentUser)):
|
||||||
@router.post("/settings")
|
@router.post("/settings")
|
||||||
async def save_voice_settings(
|
async def save_voice_settings(
|
||||||
settings: Dict[str, Any] = Body(...),
|
settings: Dict[str, Any] = Body(...),
|
||||||
current_user: User = Depends(getCurrentUser)
|
currentUser: User = Depends(getCurrentUser)
|
||||||
):
|
):
|
||||||
"""Save voice settings for the current user."""
|
"""Save voice settings for the current user."""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Saving voice settings for user: {current_user.id}")
|
logger.info(f"Saving voice settings for user: {currentUser.id}")
|
||||||
logger.info(f"Settings: {settings}")
|
logger.info(f"Settings: {settings}")
|
||||||
|
|
||||||
# Validate required settings
|
# Validate required settings
|
||||||
required_fields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
|
requiredFields = ["sttLanguage", "ttsLanguage", "ttsVoice"]
|
||||||
for field in required_fields:
|
for field in requiredFields:
|
||||||
if field not in settings:
|
if field not in settings:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
|
|
@ -448,23 +448,23 @@ async def save_voice_settings(
|
||||||
settings["targetLanguage"] = "en-US"
|
settings["targetLanguage"] = "en-US"
|
||||||
|
|
||||||
# Get voice interface
|
# Get voice interface
|
||||||
voice_interface = get_voice_interface(current_user)
|
voiceInterface = _getVoiceInterface(currentUser)
|
||||||
|
|
||||||
# Check if settings already exist for this user
|
# Check if settings already exist for this user
|
||||||
existing_settings = voice_interface.getVoiceSettings(current_user.id)
|
existing_settings = voiceInterface.getVoiceSettings(currentUser.id)
|
||||||
|
|
||||||
if existing_settings:
|
if existing_settings:
|
||||||
# Update existing settings
|
# Update existing settings
|
||||||
logger.info(f"Updating existing voice settings for user {current_user.id}")
|
logger.info(f"Updating existing voice settings for user {currentUser.id}")
|
||||||
updated_settings = voice_interface.updateVoiceSettings(current_user.id, settings)
|
updated_settings = voiceInterface.updateVoiceSettings(currentUser.id, settings)
|
||||||
logger.info(f"Voice settings updated for user {current_user.id}: {updated_settings}")
|
logger.info(f"Voice settings updated for user {currentUser.id}: {updated_settings}")
|
||||||
else:
|
else:
|
||||||
# Create new settings
|
# Create new settings
|
||||||
logger.info(f"Creating new voice settings for user {current_user.id}")
|
logger.info(f"Creating new voice settings for user {currentUser.id}")
|
||||||
# Add userId to settings
|
# Add userId to settings
|
||||||
settings["userId"] = current_user.id
|
settings["userId"] = currentUser.id
|
||||||
created_settings = voice_interface.createVoiceSettings(settings)
|
created_settings = voiceInterface.createVoiceSettings(settings)
|
||||||
logger.info(f"Voice settings created for user {current_user.id}: {created_settings}")
|
logger.info(f"Voice settings created for user {currentUser.id}: {created_settings}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
|
|
@ -486,25 +486,25 @@ async def save_voice_settings(
|
||||||
@router.websocket("/ws/realtime-interpreter")
|
@router.websocket("/ws/realtime-interpreter")
|
||||||
async def websocket_realtime_interpreter(
|
async def websocket_realtime_interpreter(
|
||||||
websocket: WebSocket,
|
websocket: WebSocket,
|
||||||
user_id: str = "default",
|
userId: str = "default",
|
||||||
from_language: str = "de-DE",
|
fromLanguage: str = "de-DE",
|
||||||
to_language: str = "en-US"
|
toLanguage: str = "en-US"
|
||||||
):
|
):
|
||||||
"""WebSocket endpoint for real-time voice interpretation"""
|
"""WebSocket endpoint for real-time voice interpretation"""
|
||||||
connection_id = f"realtime_{user_id}_{from_language}_{to_language}"
|
connectionId = f"realtime_{userId}_{fromLanguage}_{toLanguage}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await manager.connect(websocket, connection_id)
|
await manager.connect(websocket, connectionId)
|
||||||
|
|
||||||
# Send connection confirmation
|
# Send connection confirmation
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "connected",
|
"type": "connected",
|
||||||
"connection_id": connection_id,
|
"connection_id": connectionId,
|
||||||
"message": "Connected to real-time interpreter"
|
"message": "Connected to real-time interpreter"
|
||||||
}, websocket)
|
}, websocket)
|
||||||
|
|
||||||
# Initialize voice interface
|
# Initialize voice interface
|
||||||
voice_interface = get_voice_interface(User(id=user_id))
|
voiceInterface = _getVoiceInterface(User(id=userId))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# Receive message from client
|
# Receive message from client
|
||||||
|
|
@ -515,7 +515,7 @@ async def websocket_realtime_interpreter(
|
||||||
# Process audio chunk
|
# Process audio chunk
|
||||||
try:
|
try:
|
||||||
# Decode base64 audio data
|
# Decode base64 audio data
|
||||||
audio_data = base64.b64decode(message["data"])
|
audioData = base64.b64decode(message["data"])
|
||||||
|
|
||||||
# For now, just acknowledge receipt
|
# For now, just acknowledge receipt
|
||||||
# In a full implementation, this would:
|
# In a full implementation, this would:
|
||||||
|
|
@ -524,9 +524,9 @@ async def websocket_realtime_interpreter(
|
||||||
# 3. Send partial results back
|
# 3. Send partial results back
|
||||||
# 4. Handle translation
|
# 4. Handle translation
|
||||||
|
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "audio_received",
|
"type": "audio_received",
|
||||||
"chunk_size": len(audio_data),
|
"chunk_size": len(audioData),
|
||||||
"timestamp": message.get("timestamp")
|
"timestamp": message.get("timestamp")
|
||||||
}, websocket)
|
}, websocket)
|
||||||
|
|
||||||
|
|
@ -539,7 +539,7 @@ async def websocket_realtime_interpreter(
|
||||||
|
|
||||||
elif message["type"] == "ping":
|
elif message["type"] == "ping":
|
||||||
# Respond to ping
|
# Respond to ping
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "pong",
|
"type": "pong",
|
||||||
"timestamp": message.get("timestamp")
|
"timestamp": message.get("timestamp")
|
||||||
}, websocket)
|
}, websocket)
|
||||||
|
|
@ -548,32 +548,32 @@ async def websocket_realtime_interpreter(
|
||||||
logger.warning(f"Unknown message type: {message['type']}")
|
logger.warning(f"Unknown message type: {message['type']}")
|
||||||
|
|
||||||
except WebSocketDisconnect:
|
except WebSocketDisconnect:
|
||||||
manager.disconnect(websocket, connection_id)
|
manager.disconnect(websocket, connectionId)
|
||||||
logger.info(f"Client disconnected: {connection_id}")
|
logger.info(f"Client disconnected: {connectionId}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"WebSocket error: {e}")
|
logger.error(f"WebSocket error: {e}")
|
||||||
manager.disconnect(websocket, connection_id)
|
manager.disconnect(websocket, connectionId)
|
||||||
|
|
||||||
@router.websocket("/ws/speech-to-text")
|
@router.websocket("/ws/speech-to-text")
|
||||||
async def websocket_speech_to_text(
|
async def websocket_speech_to_text(
|
||||||
websocket: WebSocket,
|
websocket: WebSocket,
|
||||||
user_id: str = "default",
|
userId: str = "default",
|
||||||
language: str = "de-DE"
|
language: str = "de-DE"
|
||||||
):
|
):
|
||||||
"""WebSocket endpoint for real-time speech-to-text"""
|
"""WebSocket endpoint for real-time speech-to-text"""
|
||||||
connection_id = f"stt_{user_id}_{language}"
|
connectionId = f"stt_{userId}_{language}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await manager.connect(websocket, connection_id)
|
await manager.connect(websocket, connectionId)
|
||||||
|
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "connected",
|
"type": "connected",
|
||||||
"connection_id": connection_id,
|
"connection_id": connectionId,
|
||||||
"message": "Connected to speech-to-text"
|
"message": "Connected to speech-to-text"
|
||||||
}, websocket)
|
}, websocket)
|
||||||
|
|
||||||
# Initialize voice interface
|
# Initialize voice interface
|
||||||
voice_interface = get_voice_interface(User(id=user_id))
|
voiceInterface = _getVoiceInterface(User(id=userId))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = await websocket.receive_text()
|
data = await websocket.receive_text()
|
||||||
|
|
@ -581,12 +581,12 @@ async def websocket_speech_to_text(
|
||||||
|
|
||||||
if message["type"] == "audio_chunk":
|
if message["type"] == "audio_chunk":
|
||||||
try:
|
try:
|
||||||
audio_data = base64.b64decode(message["data"])
|
audioData = base64.b64decode(message["data"])
|
||||||
|
|
||||||
# Process audio chunk
|
# Process audio chunk
|
||||||
# This would integrate with Google Cloud Speech-to-Text streaming API
|
# This would integrate with Google Cloud Speech-to-Text streaming API
|
||||||
|
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "transcription_result",
|
"type": "transcription_result",
|
||||||
"text": "Audio chunk received", # Placeholder
|
"text": "Audio chunk received", # Placeholder
|
||||||
"confidence": 0.95,
|
"confidence": 0.95,
|
||||||
|
|
@ -595,39 +595,39 @@ async def websocket_speech_to_text(
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing audio: {e}")
|
logger.error(f"Error processing audio: {e}")
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "error",
|
"type": "error",
|
||||||
"error": f"Failed to process audio: {str(e)}"
|
"error": f"Failed to process audio: {str(e)}"
|
||||||
}, websocket)
|
}, websocket)
|
||||||
|
|
||||||
elif message["type"] == "ping":
|
elif message["type"] == "ping":
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "pong",
|
"type": "pong",
|
||||||
"timestamp": message.get("timestamp")
|
"timestamp": message.get("timestamp")
|
||||||
}, websocket)
|
}, websocket)
|
||||||
|
|
||||||
except WebSocketDisconnect:
|
except WebSocketDisconnect:
|
||||||
manager.disconnect(websocket, connection_id)
|
manager.disconnect(websocket, connectionId)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"WebSocket error: {e}")
|
logger.error(f"WebSocket error: {e}")
|
||||||
manager.disconnect(websocket, connection_id)
|
manager.disconnect(websocket, connectionId)
|
||||||
|
|
||||||
@router.websocket("/ws/text-to-speech")
|
@router.websocket("/ws/text-to-speech")
|
||||||
async def websocket_text_to_speech(
|
async def websocket_text_to_speech(
|
||||||
websocket: WebSocket,
|
websocket: WebSocket,
|
||||||
user_id: str = "default",
|
userId: str = "default",
|
||||||
language: str = "de-DE",
|
language: str = "de-DE",
|
||||||
voice: str = "de-DE-Wavenet-A"
|
voice: str = "de-DE-Wavenet-A"
|
||||||
):
|
):
|
||||||
"""WebSocket endpoint for real-time text-to-speech"""
|
"""WebSocket endpoint for real-time text-to-speech"""
|
||||||
connection_id = f"tts_{user_id}_{language}_{voice}"
|
connectionId = f"tts_{userId}_{language}_{voice}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await manager.connect(websocket, connection_id)
|
await manager.connect(websocket, connectionId)
|
||||||
|
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "connected",
|
"type": "connected",
|
||||||
"connection_id": connection_id,
|
"connection_id": connectionId,
|
||||||
"message": "Connected to text-to-speech"
|
"message": "Connected to text-to-speech"
|
||||||
}, websocket)
|
}, websocket)
|
||||||
|
|
||||||
|
|
@ -643,7 +643,7 @@ async def websocket_text_to_speech(
|
||||||
# This would integrate with Google Cloud Text-to-Speech API
|
# This would integrate with Google Cloud Text-to-Speech API
|
||||||
|
|
||||||
# For now, send a placeholder response
|
# For now, send a placeholder response
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "audio_data",
|
"type": "audio_data",
|
||||||
"audio": "base64_encoded_audio_here", # Placeholder
|
"audio": "base64_encoded_audio_here", # Placeholder
|
||||||
"format": "mp3"
|
"format": "mp3"
|
||||||
|
|
@ -651,19 +651,19 @@ async def websocket_text_to_speech(
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing text-to-speech: {e}")
|
logger.error(f"Error processing text-to-speech: {e}")
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "error",
|
"type": "error",
|
||||||
"error": f"Failed to process text: {str(e)}"
|
"error": f"Failed to process text: {str(e)}"
|
||||||
}, websocket)
|
}, websocket)
|
||||||
|
|
||||||
elif message["type"] == "ping":
|
elif message["type"] == "ping":
|
||||||
await manager.send_personal_message({
|
await manager.sendPersonalMessage({
|
||||||
"type": "pong",
|
"type": "pong",
|
||||||
"timestamp": message.get("timestamp")
|
"timestamp": message.get("timestamp")
|
||||||
}, websocket)
|
}, websocket)
|
||||||
|
|
||||||
except WebSocketDisconnect:
|
except WebSocketDisconnect:
|
||||||
manager.disconnect(websocket, connection_id)
|
manager.disconnect(websocket, connectionId)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"WebSocket error: {e}")
|
logger.error(f"WebSocket error: {e}")
|
||||||
manager.disconnect(websocket, connection_id)
|
manager.disconnect(websocket, connectionId)
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from fastapi import Response
|
||||||
from jose import jwt
|
from jose import jwt
|
||||||
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.shared.timezoneUtils import get_utc_now
|
from modules.shared.timezoneUtils import getUtcNow
|
||||||
|
|
||||||
# Config
|
# Config
|
||||||
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
|
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
|
||||||
|
|
@ -30,7 +30,7 @@ def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> T
|
||||||
import uuid
|
import uuid
|
||||||
toEncode["jti"] = str(uuid.uuid4())
|
toEncode["jti"] = str(uuid.uuid4())
|
||||||
|
|
||||||
expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
|
expire = getUtcNow() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
|
||||||
toEncode.update({"exp": expire})
|
toEncode.update({"exp": expire})
|
||||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||||
return encodedJwt, expire
|
return encodedJwt, expire
|
||||||
|
|
@ -44,7 +44,7 @@ def createRefreshToken(data: dict) -> Tuple[str, "datetime"]:
|
||||||
toEncode["jti"] = str(uuid.uuid4())
|
toEncode["jti"] = str(uuid.uuid4())
|
||||||
toEncode["type"] = "refresh"
|
toEncode["type"] = "refresh"
|
||||||
|
|
||||||
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
expire = getUtcNow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
||||||
toEncode.update({"exp": expire})
|
toEncode.update({"exp": expire})
|
||||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||||
return encodedJwt, expire
|
return encodedJwt, expire
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from typing import Optional, Dict, Any, Callable
|
||||||
from modules.datamodels.datamodelSecurity import Token
|
from modules.datamodels.datamodelSecurity import Token
|
||||||
from modules.datamodels.datamodelUam import AuthAuthority
|
from modules.datamodels.datamodelUam import AuthAuthority
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp, createExpirationTimestamp
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -27,54 +27,54 @@ class TokenManager:
|
||||||
self.google_client_id = APP_CONFIG.get("Service_GOOGLE_CLIENT_ID")
|
self.google_client_id = APP_CONFIG.get("Service_GOOGLE_CLIENT_ID")
|
||||||
self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
|
self.google_client_secret = APP_CONFIG.get("Service_GOOGLE_CLIENT_SECRET")
|
||||||
|
|
||||||
def refresh_microsoft_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]:
|
def refreshMicrosoftToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
|
||||||
"""Refresh Microsoft OAuth token using refresh token"""
|
"""Refresh Microsoft OAuth token using refresh token"""
|
||||||
try:
|
try:
|
||||||
logger.debug(f"refresh_microsoft_token: Starting Microsoft token refresh for user {user_id}")
|
logger.debug(f"refreshMicrosoftToken: Starting Microsoft token refresh for user {userId}")
|
||||||
logger.debug(f"refresh_microsoft_token: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}")
|
logger.debug(f"refreshMicrosoftToken: Configuration check - client_id: {bool(self.msft_client_id)}, client_secret: {bool(self.msft_client_secret)}")
|
||||||
|
|
||||||
if not self.msft_client_id or not self.msft_client_secret:
|
if not self.msft_client_id or not self.msft_client_secret:
|
||||||
logger.error("Microsoft OAuth configuration not found")
|
logger.error("Microsoft OAuth configuration not found")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Microsoft token refresh endpoint
|
# Microsoft token refresh endpoint
|
||||||
token_url = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token"
|
tokenUrl = f"https://login.microsoftonline.com/{self.msft_tenant_id}/oauth2/v2.0/token"
|
||||||
logger.debug(f"refresh_microsoft_token: Using token URL: {token_url}")
|
logger.debug(f"refreshMicrosoftToken: Using token URL: {tokenUrl}")
|
||||||
|
|
||||||
# Prepare refresh request
|
# Prepare refresh request
|
||||||
data = {
|
data = {
|
||||||
"client_id": self.msft_client_id,
|
"client_id": self.msft_client_id,
|
||||||
"client_secret": self.msft_client_secret,
|
"client_secret": self.msft_client_secret,
|
||||||
"grant_type": "refresh_token",
|
"grant_type": "refresh_token",
|
||||||
"refresh_token": refresh_token,
|
"refresh_token": refreshToken,
|
||||||
"scope": "Mail.ReadWrite Mail.Send Mail.ReadWrite.Shared User.Read"
|
"scope": "Mail.ReadWrite Mail.Send Mail.ReadWrite.Shared User.Read"
|
||||||
}
|
}
|
||||||
logger.debug(f"refresh_microsoft_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})")
|
logger.debug(f"refreshMicrosoftToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
|
||||||
|
|
||||||
# Make refresh request
|
# Make refresh request
|
||||||
with httpx.Client(timeout=30.0) as client:
|
with httpx.Client(timeout=30.0) as client:
|
||||||
logger.debug(f"refresh_microsoft_token: Making HTTP request to Microsoft OAuth endpoint")
|
logger.debug(f"refreshMicrosoftToken: Making HTTP request to Microsoft OAuth endpoint")
|
||||||
response = client.post(token_url, data=data)
|
response = client.post(tokenUrl, data=data)
|
||||||
logger.debug(f"refresh_microsoft_token: HTTP response status: {response.status_code}")
|
logger.debug(f"refreshMicrosoftToken: HTTP response status: {response.status_code}")
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
token_data = response.json()
|
tokenData = response.json()
|
||||||
logger.debug(f"refresh_microsoft_token: Token refresh successful, creating new token")
|
logger.debug(f"refreshMicrosoftToken: Token refresh successful, creating new token")
|
||||||
|
|
||||||
# Create new token
|
# Create new token
|
||||||
new_token = Token(
|
newToken = Token(
|
||||||
userId=user_id,
|
userId=userId,
|
||||||
authority=AuthAuthority.MSFT,
|
authority=AuthAuthority.MSFT,
|
||||||
connectionId=old_token.connectionId, # Preserve connection ID
|
connectionId=oldToken.connectionId, # Preserve connection ID
|
||||||
tokenAccess=token_data["access_token"],
|
tokenAccess=tokenData["access_token"],
|
||||||
tokenRefresh=token_data.get("refresh_token", refresh_token), # Keep old refresh token if new one not provided
|
tokenRefresh=tokenData.get("refresh_token", refreshToken), # Keep old refresh token if new one not provided
|
||||||
tokenType=token_data.get("token_type", "bearer"),
|
tokenType=tokenData.get("token_type", "bearer"),
|
||||||
expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)),
|
expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
|
||||||
createdAt=get_utc_timestamp()
|
createdAt=getUtcTimestamp()
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(f"refresh_microsoft_token: New token created with ID: {new_token.id}")
|
logger.debug(f"refreshMicrosoftToken: New token created with ID: {newToken.id}")
|
||||||
return new_token
|
return newToken
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to refresh Microsoft token: {response.status_code} - {response.text}")
|
logger.error(f"Failed to refresh Microsoft token: {response.status_code} - {response.text}")
|
||||||
return None
|
return None
|
||||||
|
|
@ -83,70 +83,70 @@ class TokenManager:
|
||||||
logger.error(f"Error refreshing Microsoft token: {str(e)}")
|
logger.error(f"Error refreshing Microsoft token: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def refresh_google_token(self, refresh_token: str, user_id: str, old_token: Token) -> Optional[Token]:
|
def refreshGoogleToken(self, refreshToken: str, userId: str, oldToken: Token) -> Optional[Token]:
|
||||||
"""Refresh Google OAuth token using refresh token"""
|
"""Refresh Google OAuth token using refresh token"""
|
||||||
try:
|
try:
|
||||||
logger.debug(f"refresh_google_token: Starting Google token refresh for user {user_id}")
|
logger.debug(f"refreshGoogleToken: Starting Google token refresh for user {userId}")
|
||||||
logger.debug(f"refresh_google_token: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}")
|
logger.debug(f"refreshGoogleToken: Configuration check - client_id: {bool(self.google_client_id)}, client_secret: {bool(self.google_client_secret)}")
|
||||||
|
|
||||||
if not self.google_client_id or not self.google_client_secret:
|
if not self.google_client_id or not self.google_client_secret:
|
||||||
logger.error("Google OAuth configuration not found")
|
logger.error("Google OAuth configuration not found")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Google token refresh endpoint
|
# Google token refresh endpoint
|
||||||
token_url = "https://oauth2.googleapis.com/token"
|
tokenUrl = "https://oauth2.googleapis.com/token"
|
||||||
logger.debug(f"refresh_google_token: Using token URL: {token_url}")
|
logger.debug(f"refreshGoogleToken: Using token URL: {tokenUrl}")
|
||||||
|
|
||||||
# Prepare refresh request
|
# Prepare refresh request
|
||||||
data = {
|
data = {
|
||||||
"client_id": self.google_client_id,
|
"client_id": self.google_client_id,
|
||||||
"client_secret": self.google_client_secret,
|
"client_secret": self.google_client_secret,
|
||||||
"grant_type": "refresh_token",
|
"grant_type": "refresh_token",
|
||||||
"refresh_token": refresh_token
|
"refresh_token": refreshToken
|
||||||
}
|
}
|
||||||
logger.debug(f"refresh_google_token: Refresh request data prepared (refresh_token length: {len(refresh_token) if refresh_token else 0})")
|
logger.debug(f"refreshGoogleToken: Refresh request data prepared (refreshToken length: {len(refreshToken) if refreshToken else 0})")
|
||||||
|
|
||||||
# Make refresh request
|
# Make refresh request
|
||||||
with httpx.Client(timeout=30.0) as client:
|
with httpx.Client(timeout=30.0) as client:
|
||||||
logger.debug(f"refresh_google_token: Making HTTP request to Google OAuth endpoint")
|
logger.debug(f"refreshGoogleToken: Making HTTP request to Google OAuth endpoint")
|
||||||
response = client.post(token_url, data=data)
|
response = client.post(tokenUrl, data=data)
|
||||||
logger.debug(f"refresh_google_token: HTTP response status: {response.status_code}")
|
logger.debug(f"refreshGoogleToken: HTTP response status: {response.status_code}")
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
token_data = response.json()
|
tokenData = response.json()
|
||||||
logger.debug(f"refresh_google_token: Token refresh successful, creating new token")
|
logger.debug(f"refreshGoogleToken: Token refresh successful, creating new token")
|
||||||
|
|
||||||
# Validate the response contains required fields
|
# Validate the response contains required fields
|
||||||
if "access_token" not in token_data:
|
if "access_token" not in tokenData:
|
||||||
logger.error("Google token refresh response missing access_token")
|
logger.error("Google token refresh response missing access_token")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Create new token
|
# Create new token
|
||||||
new_token = Token(
|
newToken = Token(
|
||||||
userId=user_id,
|
userId=userId,
|
||||||
authority=AuthAuthority.GOOGLE,
|
authority=AuthAuthority.GOOGLE,
|
||||||
connectionId=old_token.connectionId, # Preserve connection ID
|
connectionId=oldToken.connectionId, # Preserve connection ID
|
||||||
tokenAccess=token_data["access_token"],
|
tokenAccess=tokenData["access_token"],
|
||||||
tokenRefresh=token_data.get("refresh_token", refresh_token), # Use new refresh token if provided
|
tokenRefresh=tokenData.get("refresh_token", refreshToken), # Use new refresh token if provided
|
||||||
tokenType=token_data.get("token_type", "bearer"),
|
tokenType=tokenData.get("token_type", "bearer"),
|
||||||
expiresAt=create_expiration_timestamp(token_data.get("expires_in", 3600)),
|
expiresAt=createExpirationTimestamp(tokenData.get("expires_in", 3600)),
|
||||||
createdAt=get_utc_timestamp()
|
createdAt=getUtcTimestamp()
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(f"refresh_google_token: New token created with ID: {new_token.id}")
|
logger.debug(f"refreshGoogleToken: New token created with ID: {newToken.id}")
|
||||||
return new_token
|
return newToken
|
||||||
else:
|
else:
|
||||||
error_details = response.text
|
errorDetails = response.text
|
||||||
logger.error(f"Failed to refresh Google token: {response.status_code} - {error_details}")
|
logger.error(f"Failed to refresh Google token: {response.status_code} - {errorDetails}")
|
||||||
|
|
||||||
# Handle specific error cases
|
# Handle specific error cases
|
||||||
if response.status_code == 400:
|
if response.status_code == 400:
|
||||||
try:
|
try:
|
||||||
error_data = response.json()
|
errorData = response.json()
|
||||||
error_code = error_data.get("error")
|
errorCode = errorData.get("error")
|
||||||
if error_code == "invalid_grant":
|
if errorCode == "invalid_grant":
|
||||||
logger.warning("Google refresh token is invalid or expired - user needs to re-authenticate")
|
logger.warning("Google refresh token is invalid or expired - user needs to re-authenticate")
|
||||||
elif error_code == "invalid_client":
|
elif errorCode == "invalid_client":
|
||||||
logger.error("Google OAuth client configuration is invalid")
|
logger.error("Google OAuth client configuration is invalid")
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
@ -157,55 +157,55 @@ class TokenManager:
|
||||||
logger.error(f"Error refreshing Google token: {str(e)}")
|
logger.error(f"Error refreshing Google token: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def refresh_token(self, old_token: Token) -> Optional[Token]:
|
def refreshToken(self, oldToken: Token) -> Optional[Token]:
|
||||||
"""Refresh an expired token using the appropriate OAuth service"""
|
"""Refresh an expired token using the appropriate OAuth service"""
|
||||||
try:
|
try:
|
||||||
logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}")
|
logger.debug(f"refreshToken: Starting refresh for token {oldToken.id}, authority: {oldToken.authority}")
|
||||||
logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}")
|
logger.debug(f"refreshToken: Token details: userId={oldToken.userId}, connectionId={oldToken.connectionId}, hasRefreshToken={bool(oldToken.tokenRefresh)}")
|
||||||
|
|
||||||
# Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly
|
# Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly
|
||||||
# Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed
|
# Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed
|
||||||
try:
|
try:
|
||||||
now_ts = get_utc_timestamp()
|
nowTs = getUtcTimestamp()
|
||||||
created_ts = float(old_token.createdAt) if old_token.createdAt is not None else 0.0
|
createdTs = float(oldToken.createdAt) if oldToken.createdAt is not None else 0.0
|
||||||
seconds_since_last_refresh = now_ts - created_ts
|
secondsSinceLastRefresh = nowTs - createdTs
|
||||||
if seconds_since_last_refresh < 10 * 60:
|
if secondsSinceLastRefresh < 10 * 60:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"refresh_token: Skipping refresh for connection {old_token.connectionId} due to cooldown. "
|
f"refreshToken: Skipping refresh for connection {oldToken.connectionId} due to cooldown. "
|
||||||
f"Last refresh {int(seconds_since_last_refresh)}s ago (< 600s)."
|
f"Last refresh {int(secondsSinceLastRefresh)}s ago (< 600s)."
|
||||||
)
|
)
|
||||||
# Return the existing token to avoid caller errors while preventing provider rate limits
|
# Return the existing token to avoid caller errors while preventing provider rate limits
|
||||||
return old_token
|
return oldToken
|
||||||
except Exception:
|
except Exception:
|
||||||
# If any issue reading timestamps, proceed with normal refresh to be safe
|
# If any issue reading timestamps, proceed with normal refresh to be safe
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if not old_token.tokenRefresh:
|
if not oldToken.tokenRefresh:
|
||||||
logger.warning(f"No refresh token available for {old_token.authority}")
|
logger.warning(f"No refresh token available for {oldToken.authority}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Route to appropriate refresh method
|
# Route to appropriate refresh method
|
||||||
if old_token.authority == AuthAuthority.MSFT:
|
if oldToken.authority == AuthAuthority.MSFT:
|
||||||
logger.debug(f"refresh_token: Refreshing Microsoft token")
|
logger.debug(f"refreshToken: Refreshing Microsoft token")
|
||||||
return self.refresh_microsoft_token(old_token.tokenRefresh, old_token.userId, old_token)
|
return self.refreshMicrosoftToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
|
||||||
elif old_token.authority == AuthAuthority.GOOGLE:
|
elif oldToken.authority == AuthAuthority.GOOGLE:
|
||||||
logger.debug(f"refresh_token: Refreshing Google token")
|
logger.debug(f"refreshToken: Refreshing Google token")
|
||||||
return self.refresh_google_token(old_token.tokenRefresh, old_token.userId, old_token)
|
return self.refreshGoogleToken(oldToken.tokenRefresh, oldToken.userId, oldToken)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Unknown authority for token refresh: {old_token.authority}")
|
logger.warning(f"Unknown authority for token refresh: {oldToken.authority}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error refreshing token: {str(e)}")
|
logger.error(f"Error refreshing token: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
|
def ensureFreshToken(self, token: Token, *, secondsBeforeExpiry: int = 30 * 60, saveCallback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
|
||||||
"""Ensure a token is fresh; refresh if expiring within threshold.
|
"""Ensure a token is fresh; refresh if expiring within threshold.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
token: Existing token to validate/refresh.
|
token: Existing token to validate/refresh.
|
||||||
seconds_before_expiry: Threshold window to proactively refresh.
|
secondsBeforeExpiry: Threshold window to proactively refresh.
|
||||||
save_callback: Optional function to persist a refreshed token.
|
saveCallback: Optional function to persist a refreshed token.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A fresh token (refreshed or original) or None if refresh failed.
|
A fresh token (refreshed or original) or None if refresh failed.
|
||||||
|
|
@ -214,31 +214,31 @@ class TokenManager:
|
||||||
if token is None:
|
if token is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
now_ts = get_utc_timestamp()
|
nowTs = getUtcTimestamp()
|
||||||
expires_at = token.expiresAt or 0
|
expiresAt = token.expiresAt or 0
|
||||||
|
|
||||||
# If token expires within the threshold, try to refresh
|
# If token expires within the threshold, try to refresh
|
||||||
if expires_at and expires_at < (now_ts + seconds_before_expiry):
|
if expiresAt and expiresAt < (nowTs + secondsBeforeExpiry):
|
||||||
logger.info(
|
logger.info(
|
||||||
f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon "
|
f"ensureFreshToken: Token for connection {token.connectionId} expiring soon "
|
||||||
f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh."
|
f"(in {max(0, expiresAt - nowTs)}s). Attempting proactive refresh."
|
||||||
)
|
)
|
||||||
refreshed = self.refresh_token(token)
|
refreshed = self.refreshToken(token)
|
||||||
if refreshed:
|
if refreshed:
|
||||||
if save_callback is not None:
|
if saveCallback is not None:
|
||||||
try:
|
try:
|
||||||
save_callback(refreshed)
|
saveCallback(refreshed)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}")
|
logger.warning(f"ensureFreshToken: Failed to persist refreshed token: {e}")
|
||||||
return refreshed
|
return refreshed
|
||||||
else:
|
else:
|
||||||
logger.warning("ensure_fresh_token: Token refresh failed")
|
logger.warning("ensureFreshToken: Token refresh failed")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Token is sufficiently fresh
|
# Token is sufficiently fresh
|
||||||
return token
|
return token
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}")
|
logger.error(f"ensureFreshToken: Error ensuring fresh token: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Convenience wrapper to fetch and ensure fresh token for a connection via interface layer
|
# Convenience wrapper to fetch and ensure fresh token for a connection via interface layer
|
||||||
|
|
@ -256,10 +256,10 @@ class TokenManager:
|
||||||
token = interfaceDbApp.getConnectionToken(connectionId)
|
token = interfaceDbApp.getConnectionToken(connectionId)
|
||||||
if not token:
|
if not token:
|
||||||
return None
|
return None
|
||||||
return self.ensure_fresh_token(
|
return self.ensureFreshToken(
|
||||||
token,
|
token,
|
||||||
seconds_before_expiry=secondsBeforeExpiry,
|
secondsBeforeExpiry=secondsBeforeExpiry,
|
||||||
save_callback=lambda t: interfaceDbApp.saveConnectionToken(t)
|
saveCallback=lambda t: interfaceDbApp.saveConnectionToken(t)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")
|
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from starlette.middleware.base import BaseHTTPMiddleware
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
import asyncio
|
import asyncio
|
||||||
from modules.security.tokenRefreshService import token_refresh_service
|
from modules.security.tokenRefreshService import token_refresh_service
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -135,7 +135,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
|
||||||
try:
|
try:
|
||||||
# Perform proactive refresh in background
|
# Perform proactive refresh in background
|
||||||
asyncio.create_task(self._proactive_refresh_tokens(user_id))
|
asyncio.create_task(self._proactive_refresh_tokens(user_id))
|
||||||
self.last_check[user_id] = get_utc_timestamp()
|
self.last_check[user_id] = getUtcTimestamp()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Error scheduling proactive refresh: {str(e)}")
|
logger.warning(f"Error scheduling proactive refresh: {str(e)}")
|
||||||
|
|
@ -160,7 +160,7 @@ class ProactiveTokenRefreshMiddleware(BaseHTTPMiddleware):
|
||||||
Check if we should perform proactive refresh for this user
|
Check if we should perform proactive refresh for this user
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
current_time = get_utc_timestamp()
|
current_time = getUtcTimestamp()
|
||||||
last_check = self.last_check.get(user_id, 0)
|
last_check = self.last_check.get(user_id, 0)
|
||||||
|
|
||||||
# Check every 5 minutes
|
# Check every 5 minutes
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ to ensure users don't experience token expiration issues.
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from modules.datamodels.datamodelUam import UserConnection, AuthAuthority
|
from modules.datamodels.datamodelUam import UserConnection, AuthAuthority
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -24,7 +24,7 @@ class TokenRefreshService:
|
||||||
|
|
||||||
def _is_rate_limited(self, connection_id: str) -> bool:
|
def _is_rate_limited(self, connection_id: str) -> bool:
|
||||||
"""Check if connection is rate limited for refresh attempts"""
|
"""Check if connection is rate limited for refresh attempts"""
|
||||||
now = get_utc_timestamp()
|
now = getUtcTimestamp()
|
||||||
if connection_id not in self.rate_limit_map:
|
if connection_id not in self.rate_limit_map:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -39,7 +39,7 @@ class TokenRefreshService:
|
||||||
|
|
||||||
def _record_refresh_attempt(self, connection_id: str) -> None:
|
def _record_refresh_attempt(self, connection_id: str) -> None:
|
||||||
"""Record a refresh attempt for rate limiting"""
|
"""Record a refresh attempt for rate limiting"""
|
||||||
now = get_utc_timestamp()
|
now = getUtcTimestamp()
|
||||||
if connection_id not in self.rate_limit_map:
|
if connection_id not in self.rate_limit_map:
|
||||||
self.rate_limit_map[connection_id] = []
|
self.rate_limit_map[connection_id] = []
|
||||||
self.rate_limit_map[connection_id].append(now)
|
self.rate_limit_map[connection_id].append(now)
|
||||||
|
|
@ -60,14 +60,14 @@ class TokenRefreshService:
|
||||||
token_manager = TokenManager()
|
token_manager = TokenManager()
|
||||||
|
|
||||||
# Attempt to refresh the token
|
# Attempt to refresh the token
|
||||||
refreshed_token = token_manager.refresh_token(current_token)
|
refreshedToken = token_manager.refreshToken(current_token)
|
||||||
if refreshed_token:
|
if refreshedToken:
|
||||||
# Save the refreshed token
|
# Save the refreshed token
|
||||||
interface.saveConnectionToken(refreshed_token)
|
interface.saveConnectionToken(refreshedToken)
|
||||||
|
|
||||||
# Update connection status
|
# Update connection status
|
||||||
interface.db.recordModify(UserConnection, connection.id, {
|
interface.db.recordModify(UserConnection, connection.id, {
|
||||||
"lastChecked": get_utc_timestamp(),
|
"lastChecked": getUtcTimestamp(),
|
||||||
"expiresAt": refreshed_token.expiresAt
|
"expiresAt": refreshed_token.expiresAt
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
@ -75,9 +75,9 @@ class TokenRefreshService:
|
||||||
|
|
||||||
# Log audit event
|
# Log audit event
|
||||||
try:
|
try:
|
||||||
audit_logger.log_security_event(
|
audit_logger.logSecurityEvent(
|
||||||
user_id=str(connection.userId),
|
userId=str(connection.userId),
|
||||||
mandate_id="system",
|
mandateId="system",
|
||||||
action="token_refresh",
|
action="token_refresh",
|
||||||
details=f"Google token refreshed for connection {connection.id}"
|
details=f"Google token refreshed for connection {connection.id}"
|
||||||
)
|
)
|
||||||
|
|
@ -109,14 +109,14 @@ class TokenRefreshService:
|
||||||
token_manager = TokenManager()
|
token_manager = TokenManager()
|
||||||
|
|
||||||
# Attempt to refresh the token
|
# Attempt to refresh the token
|
||||||
refreshed_token = token_manager.refresh_token(current_token)
|
refreshedToken = token_manager.refreshToken(current_token)
|
||||||
if refreshed_token:
|
if refreshedToken:
|
||||||
# Save the refreshed token
|
# Save the refreshed token
|
||||||
interface.saveConnectionToken(refreshed_token)
|
interface.saveConnectionToken(refreshedToken)
|
||||||
|
|
||||||
# Update connection status
|
# Update connection status
|
||||||
interface.db.recordModify(UserConnection, connection.id, {
|
interface.db.recordModify(UserConnection, connection.id, {
|
||||||
"lastChecked": get_utc_timestamp(),
|
"lastChecked": getUtcTimestamp(),
|
||||||
"expiresAt": refreshed_token.expiresAt
|
"expiresAt": refreshed_token.expiresAt
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
@ -124,9 +124,9 @@ class TokenRefreshService:
|
||||||
|
|
||||||
# Log audit event
|
# Log audit event
|
||||||
try:
|
try:
|
||||||
audit_logger.log_security_event(
|
audit_logger.logSecurityEvent(
|
||||||
user_id=str(connection.userId),
|
userId=str(connection.userId),
|
||||||
mandate_id="system",
|
mandateId="system",
|
||||||
action="token_refresh",
|
action="token_refresh",
|
||||||
details=f"Microsoft token refreshed for connection {connection.id}"
|
details=f"Microsoft token refreshed for connection {connection.id}"
|
||||||
)
|
)
|
||||||
|
|
@ -234,7 +234,7 @@ class TokenRefreshService:
|
||||||
refreshed_count = 0
|
refreshed_count = 0
|
||||||
failed_count = 0
|
failed_count = 0
|
||||||
rate_limited_count = 0
|
rate_limited_count = 0
|
||||||
current_time = get_utc_timestamp()
|
current_time = getUtcTimestamp()
|
||||||
five_minutes = 5 * 60 # 5 minutes in seconds
|
five_minutes = 5 * 60 # 5 minutes in seconds
|
||||||
|
|
||||||
# Process each connection
|
# Process each connection
|
||||||
|
|
|
||||||
|
|
@ -11,18 +11,18 @@ class PublicService:
|
||||||
- Optional name_filter predicate for allow-list patterns
|
- Optional name_filter predicate for allow-list patterns
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, target: Any, functions_only: bool = True, name_filter=None):
|
def __init__(self, target: Any, functionsOnly: bool = True, nameFilter=None):
|
||||||
self._target = target
|
self._target = target
|
||||||
self._functions_only = functions_only
|
self._functionsOnly = functionsOnly
|
||||||
self._name_filter = name_filter
|
self._nameFilter = nameFilter
|
||||||
|
|
||||||
def __getattr__(self, name: str):
|
def __getattr__(self, name: str):
|
||||||
if name.startswith('_'):
|
if name.startswith('_'):
|
||||||
raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private")
|
raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private")
|
||||||
if self._name_filter and not self._name_filter(name):
|
if self._nameFilter and not self._nameFilter(name):
|
||||||
raise AttributeError(f"'{name}' not exposed by policy")
|
raise AttributeError(f"'{name}' not exposed by policy")
|
||||||
attr = getattr(self._target, name)
|
attr = getattr(self._target, name)
|
||||||
if self._functions_only and not callable(attr):
|
if self._functionsOnly and not callable(attr):
|
||||||
raise AttributeError(f"'{name}' is not a function")
|
raise AttributeError(f"'{name}' is not a function")
|
||||||
return attr
|
return attr
|
||||||
|
|
||||||
|
|
@ -30,8 +30,8 @@ class PublicService:
|
||||||
names = [
|
names = [
|
||||||
n for n in dir(self._target)
|
n for n in dir(self._target)
|
||||||
if not n.startswith('_')
|
if not n.startswith('_')
|
||||||
and (not self._functions_only or callable(getattr(self._target, n, None)))
|
and (not self._functionsOnly or callable(getattr(self._target, n, None)))
|
||||||
and (self._name_filter(n) if self._name_filter else True)
|
and (self._nameFilter(n) if self._nameFilter else True)
|
||||||
]
|
]
|
||||||
return sorted(names)
|
return sorted(names)
|
||||||
|
|
||||||
|
|
@ -70,7 +70,7 @@ class Services:
|
||||||
self.sharepoint = PublicService(SharepointService(self))
|
self.sharepoint = PublicService(SharepointService(self))
|
||||||
|
|
||||||
from .serviceAi.mainServiceAi import AiService
|
from .serviceAi.mainServiceAi import AiService
|
||||||
self.ai = PublicService(AiService(self))
|
self.ai = PublicService(AiService(self), functionsOnly=False)
|
||||||
|
|
||||||
from .serviceTicket.mainServiceTicket import TicketService
|
from .serviceTicket.mainServiceTicket import TicketService
|
||||||
self.ticket = PublicService(TicketService(self))
|
self.ticket = PublicService(TicketService(self))
|
||||||
|
|
|
||||||
|
|
@ -1,30 +1,26 @@
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, List, Optional, Union
|
import time
|
||||||
|
from typing import Dict, Any, List, Optional, Tuple, Union
|
||||||
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
||||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||||
from modules.services.serviceAi.subCoreAi import SubCoreAi
|
|
||||||
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
|
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
|
||||||
from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration
|
from modules.shared.jsonUtils import (
|
||||||
from modules.services.serviceAi.subSharedAiUtils import sanitizePromptContent
|
extractJsonString,
|
||||||
|
repairBrokenJson,
|
||||||
|
extractSectionsFromDocument,
|
||||||
|
buildContinuationContext
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Rebuild the model to resolve forward references
|
||||||
|
AiCallRequest.model_rebuild()
|
||||||
|
|
||||||
class AiService:
|
class AiService:
|
||||||
"""Lightweight AI service orchestrator that delegates to specialized sub-modules.
|
"""AI service with core operations integrated."""
|
||||||
|
|
||||||
Manager delegates to specialized sub-modules:
|
|
||||||
- SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls)
|
|
||||||
- SubDocumentProcessing: Document chunking, processing, and merging logic
|
|
||||||
- SubDocumentGeneration: Single-file and multi-file document generation
|
|
||||||
|
|
||||||
The main service acts as a coordinator:
|
|
||||||
1. Manages lazy initialization of sub-modules
|
|
||||||
2. Delegates operations to appropriate sub-modules
|
|
||||||
3. Maintains the same public API for backward compatibility
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, serviceCenter=None) -> None:
|
def __init__(self, serviceCenter=None) -> None:
|
||||||
"""Initialize AI service with service center access.
|
"""Initialize AI service with service center access.
|
||||||
|
|
@ -34,64 +30,638 @@ class AiService:
|
||||||
"""
|
"""
|
||||||
self.services = serviceCenter
|
self.services = serviceCenter
|
||||||
# Only depend on interfaces
|
# Only depend on interfaces
|
||||||
self.aiObjects = None # Will be initialized in create()
|
self.aiObjects = None # Will be initialized in create() or _ensureAiObjectsInitialized()
|
||||||
self._extractionService = None # Lazy initialization
|
# Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready
|
||||||
self._coreAi = None # Lazy initialization
|
self.extractionService = None
|
||||||
self._documentProcessor = None # Lazy initialization
|
self.documentProcessor = None
|
||||||
self._documentGenerator = None # Lazy initialization
|
|
||||||
|
|
||||||
@property
|
def _initializeSubmodules(self):
|
||||||
def extractionService(self):
|
"""Initialize all submodules after aiObjects is ready."""
|
||||||
"""Lazy initialization of extraction service."""
|
if self.aiObjects is None:
|
||||||
if self._extractionService is None:
|
raise RuntimeError("aiObjects must be initialized before initializing submodules")
|
||||||
logger.info("Lazy initializing ExtractionService...")
|
|
||||||
self._extractionService = ExtractionService(self.services)
|
|
||||||
return self._extractionService
|
|
||||||
|
|
||||||
@property
|
if self.extractionService is None:
|
||||||
def coreAi(self):
|
logger.info("Initializing ExtractionService...")
|
||||||
"""Lazy initialization of core AI service."""
|
self.extractionService = ExtractionService(self.services)
|
||||||
if self._coreAi is None:
|
|
||||||
if self.aiObjects is None:
|
|
||||||
raise RuntimeError("AiService.aiObjects must be initialized before accessing coreAi. Use await AiService.create() or await service._ensureAiObjectsInitialized()")
|
|
||||||
logger.info("Lazy initializing SubCoreAi...")
|
|
||||||
self._coreAi = SubCoreAi(self.services, self.aiObjects)
|
|
||||||
return self._coreAi
|
|
||||||
|
|
||||||
@property
|
if self.documentProcessor is None:
|
||||||
def documentProcessor(self):
|
logger.info("Initializing SubDocumentProcessing...")
|
||||||
"""Lazy initialization of document processing service."""
|
self.documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
|
||||||
if self._documentProcessor is None:
|
|
||||||
logger.info("Lazy initializing SubDocumentProcessing...")
|
|
||||||
self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
|
|
||||||
return self._documentProcessor
|
|
||||||
|
|
||||||
|
|
||||||
@property
|
|
||||||
def documentGenerator(self):
|
|
||||||
"""Lazy initialization of document generation service."""
|
|
||||||
if self._documentGenerator is None:
|
|
||||||
logger.info("Lazy initializing SubDocumentGeneration...")
|
|
||||||
self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor)
|
|
||||||
return self._documentGenerator
|
|
||||||
|
|
||||||
async def _ensureAiObjectsInitialized(self):
|
async def _ensureAiObjectsInitialized(self):
|
||||||
"""Ensure aiObjects is initialized."""
|
"""Ensure aiObjects is initialized and submodules are ready."""
|
||||||
if self.aiObjects is None:
|
if self.aiObjects is None:
|
||||||
logger.info("Lazy initializing AiObjects...")
|
logger.info("Lazy initializing AiObjects...")
|
||||||
self.aiObjects = await AiObjects.create()
|
self.aiObjects = await AiObjects.create()
|
||||||
logger.info("AiObjects initialization completed")
|
logger.info("AiObjects initialization completed")
|
||||||
|
# Initialize submodules after aiObjects is ready
|
||||||
|
self._initializeSubmodules()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
async def create(cls, serviceCenter=None) -> "AiService":
|
async def create(cls, serviceCenter=None) -> "AiService":
|
||||||
"""Create AiService instance with all connectors initialized."""
|
"""Create AiService instance with all connectors and submodules initialized."""
|
||||||
logger.info("AiService.create() called")
|
logger.info("AiService.create() called")
|
||||||
instance = cls(serviceCenter)
|
instance = cls(serviceCenter)
|
||||||
logger.info("AiService created, about to call AiObjects.create()...")
|
logger.info("AiService created, about to call AiObjects.create()...")
|
||||||
instance.aiObjects = await AiObjects.create()
|
instance.aiObjects = await AiObjects.create()
|
||||||
logger.info("AiObjects.create() completed")
|
logger.info("AiObjects.create() completed")
|
||||||
|
# Initialize all submodules after aiObjects is ready
|
||||||
|
instance._initializeSubmodules()
|
||||||
|
logger.info("AiService submodules initialized")
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
|
# Helper methods
|
||||||
|
|
||||||
|
def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
|
||||||
|
"""
|
||||||
|
Build full prompt by replacing placeholders with their content.
|
||||||
|
Uses the new {{KEY:placeholder}} format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The base prompt template
|
||||||
|
placeholders: Dictionary of placeholder key-value pairs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Prompt with placeholders replaced
|
||||||
|
"""
|
||||||
|
if not placeholders:
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
full_prompt = prompt
|
||||||
|
for placeholder, content in placeholders.items():
|
||||||
|
# Skip if content is None or empty
|
||||||
|
if content is None:
|
||||||
|
continue
|
||||||
|
# Replace {{KEY:placeholder}}
|
||||||
|
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
|
||||||
|
|
||||||
|
return full_prompt
|
||||||
|
|
||||||
|
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
|
||||||
|
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
|
||||||
|
try:
|
||||||
|
# Get dynamic enum values from Pydantic models
|
||||||
|
operationTypes = [e.value for e in OperationTypeEnum]
|
||||||
|
priorities = [e.value for e in PriorityEnum]
|
||||||
|
processingModes = [e.value for e in ProcessingModeEnum]
|
||||||
|
|
||||||
|
# Create analysis prompt for AI to determine operation type and parameters
|
||||||
|
analysisPrompt = f"""
|
||||||
|
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
|
||||||
|
|
||||||
|
PROMPT TO ANALYZE:
|
||||||
|
{self.services.utils.sanitizePromptContent(prompt, 'userinput')}
|
||||||
|
|
||||||
|
Based on the prompt content, determine:
|
||||||
|
1. operationType: Choose the most appropriate from: {', '.join(operationTypes)}
|
||||||
|
2. priority: Choose from: {', '.join(priorities)}
|
||||||
|
3. processingMode: Choose from: {', '.join(processingModes)}
|
||||||
|
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
|
||||||
|
5. compressContext: true/false (true to summarize context, false to process fully)
|
||||||
|
|
||||||
|
Respond with ONLY a JSON object in this exact format:
|
||||||
|
{{
|
||||||
|
"operationType": "dataAnalyse",
|
||||||
|
"priority": "balanced",
|
||||||
|
"processingMode": "basic",
|
||||||
|
"compressPrompt": true,
|
||||||
|
"compressContext": true
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use AI to analyze the prompt
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=analysisPrompt,
|
||||||
|
options=AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||||
|
priority=PriorityEnum.SPEED,
|
||||||
|
processingMode=ProcessingModeEnum.BASIC,
|
||||||
|
compressPrompt=True,
|
||||||
|
compressContext=False
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await self.aiObjects.call(request)
|
||||||
|
|
||||||
|
# Parse AI response
|
||||||
|
try:
|
||||||
|
jsonStart = response.content.find('{')
|
||||||
|
jsonEnd = response.content.rfind('}') + 1
|
||||||
|
if jsonStart != -1 and jsonEnd > jsonStart:
|
||||||
|
analysis = json.loads(response.content[jsonStart:jsonEnd])
|
||||||
|
|
||||||
|
# Map string values to enums
|
||||||
|
operationType = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
|
||||||
|
priority = PriorityEnum(analysis.get('priority', 'balanced'))
|
||||||
|
processingMode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
|
||||||
|
|
||||||
|
return AiCallOptions(
|
||||||
|
operationType=operationType,
|
||||||
|
priority=priority,
|
||||||
|
processingMode=processingMode,
|
||||||
|
compressPrompt=analysis.get('compressPrompt', True),
|
||||||
|
compressContext=analysis.get('compressContext', True)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to parse AI analysis response: {e}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Prompt analysis failed: {e}")
|
||||||
|
|
||||||
|
# Fallback to default options
|
||||||
|
return AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||||
|
priority=PriorityEnum.BALANCED,
|
||||||
|
processingMode=ProcessingModeEnum.BASIC
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _callAiWithLooping(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
options: AiCallOptions,
|
||||||
|
debugPrefix: str = "ai_call",
|
||||||
|
promptBuilder: Optional[callable] = None,
|
||||||
|
promptArgs: Optional[Dict[str, Any]] = None,
|
||||||
|
operationId: Optional[str] = None
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Shared core function for AI calls with repair-based looping system.
|
||||||
|
Automatically repairs broken JSON and continues generation seamlessly.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The prompt to send to AI
|
||||||
|
options: AI call configuration options
|
||||||
|
debugPrefix: Prefix for debug file names
|
||||||
|
promptBuilder: Optional function to rebuild prompts for continuation
|
||||||
|
promptArgs: Optional arguments for prompt builder
|
||||||
|
operationId: Optional operation ID for progress tracking
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete AI response after all iterations
|
||||||
|
"""
|
||||||
|
maxIterations = 50 # Prevent infinite loops
|
||||||
|
iteration = 0
|
||||||
|
allSections = [] # Accumulate all sections across iterations
|
||||||
|
lastRawResponse = None # Store last raw JSON response for continuation
|
||||||
|
|
||||||
|
while iteration < maxIterations:
|
||||||
|
iteration += 1
|
||||||
|
|
||||||
|
# Update progress for iteration start
|
||||||
|
if operationId:
|
||||||
|
if iteration == 1:
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
|
||||||
|
else:
|
||||||
|
# For continuation iterations, show progress incrementally
|
||||||
|
baseProgress = 0.5 + (min(iteration - 1, maxIterations) / maxIterations * 0.4) # Progress from 0.5 to 0.9 over maxIterations iterations
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, baseProgress, f"Continuing generation (iteration {iteration})")
|
||||||
|
|
||||||
|
# Build iteration prompt
|
||||||
|
if len(allSections) > 0 and promptBuilder and promptArgs:
|
||||||
|
# This is a continuation - build continuation context with raw JSON and rebuild prompt
|
||||||
|
continuationContext = buildContinuationContext(allSections, lastRawResponse)
|
||||||
|
if not lastRawResponse:
|
||||||
|
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
|
||||||
|
|
||||||
|
# Rebuild prompt with continuation context using the provided prompt builder
|
||||||
|
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
|
||||||
|
else:
|
||||||
|
# First iteration - use original prompt
|
||||||
|
iterationPrompt = prompt
|
||||||
|
|
||||||
|
# Make AI call
|
||||||
|
try:
|
||||||
|
if operationId and iteration == 1:
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=iterationPrompt,
|
||||||
|
context="",
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
|
||||||
|
# Write the ACTUAL prompt sent to AI
|
||||||
|
if iteration == 1:
|
||||||
|
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
|
||||||
|
else:
|
||||||
|
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
||||||
|
|
||||||
|
response = await self.aiObjects.call(request)
|
||||||
|
result = response.content
|
||||||
|
|
||||||
|
# Update progress after AI call
|
||||||
|
if operationId:
|
||||||
|
if iteration == 1:
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
|
||||||
|
else:
|
||||||
|
progress = 0.6 + (min(iteration - 1, 10) * 0.03)
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
|
||||||
|
|
||||||
|
# Write raw AI response to debug file
|
||||||
|
if iteration == 1:
|
||||||
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
||||||
|
else:
|
||||||
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
||||||
|
|
||||||
|
# Emit stats for this iteration
|
||||||
|
self.services.workflow.storeWorkflowStat(
|
||||||
|
self.services.currentWorkflow,
|
||||||
|
response,
|
||||||
|
f"ai.call.{debugPrefix}.iteration_{iteration}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not result or not result.strip():
|
||||||
|
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Store raw response for continuation (even if broken)
|
||||||
|
lastRawResponse = result
|
||||||
|
|
||||||
|
# Check for complete_response flag in raw response (before parsing)
|
||||||
|
import re
|
||||||
|
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
|
||||||
|
pass # Flag detected, will stop in _shouldContinueGeneration
|
||||||
|
|
||||||
|
# Extract sections from response (handles both valid and broken JSON)
|
||||||
|
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
||||||
|
|
||||||
|
# Update progress after parsing
|
||||||
|
if operationId:
|
||||||
|
if extractedSections:
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
|
||||||
|
|
||||||
|
if not extractedSections:
|
||||||
|
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
|
||||||
|
if iteration > 1 and not wasJsonComplete:
|
||||||
|
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
|
||||||
|
continue
|
||||||
|
# Otherwise, stop if no sections
|
||||||
|
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Add new sections to accumulator
|
||||||
|
allSections.extend(extractedSections)
|
||||||
|
|
||||||
|
# Check if we should continue (completion detection)
|
||||||
|
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Done - build final result
|
||||||
|
if operationId:
|
||||||
|
self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
|
||||||
|
break
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
|
||||||
|
break
|
||||||
|
|
||||||
|
if iteration >= maxIterations:
|
||||||
|
logger.warning(f"AI call stopped after maximum iterations ({maxIterations})")
|
||||||
|
|
||||||
|
# Build final result from accumulated sections
|
||||||
|
final_result = self._buildFinalResultFromSections(allSections)
|
||||||
|
|
||||||
|
# Write final result to debug file
|
||||||
|
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
|
||||||
|
|
||||||
|
return final_result
|
||||||
|
|
||||||
|
def _extractSectionsFromResponse(
|
||||||
|
self,
|
||||||
|
result: str,
|
||||||
|
iteration: int,
|
||||||
|
debugPrefix: str
|
||||||
|
) -> Tuple[List[Dict[str, Any]], bool]:
|
||||||
|
"""
|
||||||
|
Extract sections from AI response, handling both valid and broken JSON.
|
||||||
|
Uses repair mechanism for broken JSON.
|
||||||
|
Checks for "complete_response": true flag to determine completion.
|
||||||
|
Returns (sections, wasJsonComplete)
|
||||||
|
"""
|
||||||
|
# First, try to parse as valid JSON
|
||||||
|
try:
|
||||||
|
extracted = extractJsonString(result)
|
||||||
|
parsed_result = json.loads(extracted)
|
||||||
|
|
||||||
|
# Check if AI marked response as complete
|
||||||
|
isComplete = parsed_result.get("complete_response", False) == True
|
||||||
|
|
||||||
|
# Extract sections from parsed JSON
|
||||||
|
sections = extractSectionsFromDocument(parsed_result)
|
||||||
|
|
||||||
|
# If AI marked as complete, always return as complete
|
||||||
|
if isComplete:
|
||||||
|
return sections, True
|
||||||
|
|
||||||
|
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
|
||||||
|
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
|
||||||
|
if len(sections) == 0 and iteration > 1:
|
||||||
|
return sections, False # Mark as incomplete so loop continues
|
||||||
|
|
||||||
|
# First iteration with 0 sections means empty response - stop
|
||||||
|
if len(sections) == 0:
|
||||||
|
return sections, True # Complete but empty
|
||||||
|
|
||||||
|
return sections, True # JSON was complete with sections
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
# Broken JSON - try repair mechanism (normal in iterative generation)
|
||||||
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
|
||||||
|
|
||||||
|
# Try to repair
|
||||||
|
repaired_json = repairBrokenJson(result)
|
||||||
|
|
||||||
|
if repaired_json:
|
||||||
|
# Extract sections from repaired JSON
|
||||||
|
sections = extractSectionsFromDocument(repaired_json)
|
||||||
|
return sections, False # JSON was broken but repaired
|
||||||
|
else:
|
||||||
|
# Repair failed - log error
|
||||||
|
logger.error(f"Iteration {iteration}: All repair strategies failed")
|
||||||
|
return [], False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
|
||||||
|
return [], False
|
||||||
|
|
||||||
|
def _shouldContinueGeneration(
|
||||||
|
self,
|
||||||
|
allSections: List[Dict[str, Any]],
|
||||||
|
iteration: int,
|
||||||
|
wasJsonComplete: bool,
|
||||||
|
rawResponse: str = None
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Determine if generation should continue based on JSON completeness and complete_response flag.
|
||||||
|
Returns True if we should continue, False if done.
|
||||||
|
"""
|
||||||
|
if len(allSections) == 0:
|
||||||
|
return True # No sections yet, continue
|
||||||
|
|
||||||
|
# Check for complete_response flag in raw response
|
||||||
|
if rawResponse:
|
||||||
|
import re
|
||||||
|
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If JSON was complete (and no complete_response flag), we're done
|
||||||
|
# If JSON was broken and repaired, continue to get more content
|
||||||
|
if wasJsonComplete:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _buildFinalResultFromSections(
|
||||||
|
self,
|
||||||
|
allSections: List[Dict[str, Any]]
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Build final JSON result from accumulated sections.
|
||||||
|
"""
|
||||||
|
if not allSections:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Build documents structure
|
||||||
|
# Assuming single document for now
|
||||||
|
documents = [{
|
||||||
|
"id": "doc_1",
|
||||||
|
"title": "Generated Document", # This should come from prompt
|
||||||
|
"filename": "document.json",
|
||||||
|
"sections": allSections
|
||||||
|
}]
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"metadata": {
|
||||||
|
"split_strategy": "single_document",
|
||||||
|
"source_documents": [],
|
||||||
|
"extraction_method": "ai_generation"
|
||||||
|
},
|
||||||
|
"documents": documents
|
||||||
|
}
|
||||||
|
|
||||||
|
return json.dumps(result, indent=2)
|
||||||
|
|
||||||
|
# Public API Methods
|
||||||
|
|
||||||
|
# Planning AI Call
|
||||||
|
async def callAiPlanning(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
placeholders: Optional[List[PromptPlaceholder]] = None
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Planning AI call for task planning, action planning, action selection, etc.
|
||||||
|
Always uses static parameters optimized for planning tasks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The planning prompt
|
||||||
|
placeholders: Optional list of placeholder replacements
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Planning JSON response
|
||||||
|
"""
|
||||||
|
await self._ensureAiObjectsInitialized()
|
||||||
|
|
||||||
|
# Planning calls always use static parameters
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.PLAN,
|
||||||
|
priority=PriorityEnum.QUALITY,
|
||||||
|
processingMode=ProcessingModeEnum.DETAILED,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build full prompt with placeholders
|
||||||
|
if placeholders:
|
||||||
|
placeholdersDict = {p.label: p.content for p in placeholders}
|
||||||
|
fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict)
|
||||||
|
else:
|
||||||
|
fullPrompt = prompt
|
||||||
|
|
||||||
|
# Root-cause fix: planning must return raw single-shot JSON, not section-based output
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=fullPrompt,
|
||||||
|
context="",
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
|
||||||
|
# Debug: persist prompt/response for analysis
|
||||||
|
self.services.utils.writeDebugFile(fullPrompt, "plan_prompt")
|
||||||
|
response = await self.aiObjects.call(request)
|
||||||
|
result = response.content or ""
|
||||||
|
self.services.utils.writeDebugFile(result, "plan_response")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Document Generation AI Call
|
||||||
|
async def callAiDocuments(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
documents: Optional[List[ChatDocument]] = None,
|
||||||
|
options: Optional[AiCallOptions] = None,
|
||||||
|
outputFormat: Optional[str] = None,
|
||||||
|
title: Optional[str] = None
|
||||||
|
) -> Union[str, Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Document generation AI call for all non-planning calls.
|
||||||
|
Uses the current unified path with extraction and generation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The main prompt for the AI call
|
||||||
|
documents: Optional list of documents to process
|
||||||
|
options: AI call configuration options
|
||||||
|
outputFormat: Optional output format for document generation
|
||||||
|
title: Optional title for generated documents
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
AI response as string, or dict with documents if outputFormat is specified
|
||||||
|
"""
|
||||||
|
await self._ensureAiObjectsInitialized()
|
||||||
|
|
||||||
|
# Create separate operationId for detailed progress tracking
|
||||||
|
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
|
||||||
|
aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
|
||||||
|
|
||||||
|
# Start progress tracking for this operation
|
||||||
|
self.services.workflow.progressLogStart(
|
||||||
|
aiOperationId,
|
||||||
|
"AI call with documents",
|
||||||
|
"Document Generation",
|
||||||
|
f"Format: {outputFormat or 'text'}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Ensure AI connectors are initialized before delegating to documentProcessor/generator
|
||||||
|
if hasattr(self.services, 'ai') and hasattr(self.services.ai, '_ensureAiObjectsInitialized'):
|
||||||
|
await self.services.ai._ensureAiObjectsInitialized()
|
||||||
|
if options is None or (hasattr(options, 'operationType') and options.operationType is None):
|
||||||
|
# Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
|
||||||
|
options = await self._analyzePromptAndCreateOptions(prompt)
|
||||||
|
|
||||||
|
# Route image-generation requests directly to image pipeline to avoid JSON loop
|
||||||
|
imgFormats = {"png", "jpg", "jpeg", "webp", "image", "base64"}
|
||||||
|
opType = getattr(options, "operationType", None)
|
||||||
|
fmt = (outputFormat or "").lower() if outputFormat else None
|
||||||
|
isImageRequest = (opType == OperationTypeEnum.IMAGE_GENERATE) or (fmt in imgFormats)
|
||||||
|
if isImageRequest:
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
|
||||||
|
imageResponse = await self.generateImage(prompt, options=options)
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.9, "Image generated")
|
||||||
|
self.services.workflow.progressLogFinish(aiOperationId, True)
|
||||||
|
return imageResponse
|
||||||
|
|
||||||
|
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
|
||||||
|
# Compressing would truncate the template structure and confuse the AI
|
||||||
|
if outputFormat: # Document generation with structured output
|
||||||
|
if not options:
|
||||||
|
options = AiCallOptions()
|
||||||
|
options.compressPrompt = False # JSON templates must NOT be truncated
|
||||||
|
options.compressContext = False # Context also should not be compressed
|
||||||
|
|
||||||
|
# Handle document generation with specific output format using unified approach
|
||||||
|
if outputFormat:
|
||||||
|
# Use unified generation method for all document generation
|
||||||
|
if documents and len(documents) > 0:
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
|
||||||
|
extracted_content = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
|
||||||
|
else:
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
|
||||||
|
extracted_content = None
|
||||||
|
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
|
||||||
|
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
||||||
|
# First call without continuation context
|
||||||
|
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
|
||||||
|
|
||||||
|
# Prepare prompt builder arguments for continuation
|
||||||
|
promptArgs = {
|
||||||
|
"outputFormat": outputFormat,
|
||||||
|
"userPrompt": prompt,
|
||||||
|
"title": title,
|
||||||
|
"extracted_content": extracted_content
|
||||||
|
}
|
||||||
|
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
|
||||||
|
generated_json = await self._callAiWithLooping(
|
||||||
|
generation_prompt,
|
||||||
|
options,
|
||||||
|
"document_generation",
|
||||||
|
buildGenerationPrompt,
|
||||||
|
promptArgs,
|
||||||
|
aiOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
|
||||||
|
# Parse the generated JSON (extract fenced/embedded JSON first)
|
||||||
|
try:
|
||||||
|
extracted_json = self.services.utils.jsonExtractString(generated_json)
|
||||||
|
generated_data = json.loads(extracted_json)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"Failed to parse generated JSON: {str(e)}")
|
||||||
|
logger.error(f"JSON content length: {len(generated_json)}")
|
||||||
|
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
|
||||||
|
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
|
||||||
|
|
||||||
|
# Write the problematic JSON to debug file
|
||||||
|
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
|
||||||
|
|
||||||
|
self.services.workflow.progressLogFinish(aiOperationId, False)
|
||||||
|
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
|
||||||
|
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
|
||||||
|
# Render to final format using the existing renderer
|
||||||
|
try:
|
||||||
|
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||||
|
generationService = GenerationService(self.services)
|
||||||
|
rendered_content, mime_type = await generationService.renderReport(
|
||||||
|
generated_data, outputFormat, title or "Generated Document", prompt, self
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build result in the expected format
|
||||||
|
result = {
|
||||||
|
"success": True,
|
||||||
|
"content": generated_data,
|
||||||
|
"documents": [{
|
||||||
|
"documentName": f"generated.{outputFormat}",
|
||||||
|
"documentData": rendered_content,
|
||||||
|
"mimeType": mime_type,
|
||||||
|
"title": title or "Generated Document"
|
||||||
|
}],
|
||||||
|
"is_multi_file": False,
|
||||||
|
"format": outputFormat,
|
||||||
|
"title": title,
|
||||||
|
"split_strategy": "single",
|
||||||
|
"total_documents": 1,
|
||||||
|
"processed_documents": 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Log AI response for debugging
|
||||||
|
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
|
||||||
|
|
||||||
|
self.services.workflow.progressLogFinish(aiOperationId, True)
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error rendering document: {str(e)}")
|
||||||
|
self.services.workflow.progressLogFinish(aiOperationId, False)
|
||||||
|
return {"success": False, "error": f"Rendering failed: {str(e)}"}
|
||||||
|
|
||||||
|
# Handle text calls (no output format specified)
|
||||||
|
self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
|
||||||
|
if documents:
|
||||||
|
# Use document processing for text calls with documents
|
||||||
|
result = await self.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
|
||||||
|
else:
|
||||||
|
# Use shared core function for direct text calls
|
||||||
|
result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
|
||||||
|
|
||||||
|
self.services.workflow.progressLogFinish(aiOperationId, True)
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in callAiDocuments: {str(e)}")
|
||||||
|
self.services.workflow.progressLogFinish(aiOperationId, False)
|
||||||
|
raise
|
||||||
|
|
||||||
# AI Image Analysis
|
# AI Image Analysis
|
||||||
async def readImage(
|
async def readImage(
|
||||||
self,
|
self,
|
||||||
|
|
@ -102,7 +672,64 @@ class AiService:
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Call AI for image analysis using interface.call() with contentParts."""
|
"""Call AI for image analysis using interface.call() with contentParts."""
|
||||||
await self._ensureAiObjectsInitialized()
|
await self._ensureAiObjectsInitialized()
|
||||||
return await self.coreAi.readImage(prompt, imageData, mimeType, options)
|
|
||||||
|
try:
|
||||||
|
# Check if imageData is valid
|
||||||
|
if not imageData:
|
||||||
|
error_msg = "No image data provided"
|
||||||
|
logger.error(f"Error in AI image analysis: {error_msg}")
|
||||||
|
return f"Error: {error_msg}"
|
||||||
|
|
||||||
|
|
||||||
|
# Always use IMAGE_ANALYSE operation type for image processing
|
||||||
|
if options is None:
|
||||||
|
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
|
||||||
|
else:
|
||||||
|
# Override the operation type to ensure image analysis
|
||||||
|
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
|
||||||
|
|
||||||
|
# Create content parts with image data
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
import base64
|
||||||
|
|
||||||
|
# ContentPart.data must be a string - convert bytes to base64 if needed
|
||||||
|
if isinstance(imageData, bytes):
|
||||||
|
imageDataStr = base64.b64encode(imageData).decode('utf-8')
|
||||||
|
else:
|
||||||
|
# Already a base64 string
|
||||||
|
imageDataStr = imageData
|
||||||
|
|
||||||
|
imagePart = ContentPart(
|
||||||
|
id="image_0",
|
||||||
|
parentId=None,
|
||||||
|
label="Image",
|
||||||
|
typeGroup="image",
|
||||||
|
mimeType=mimeType or "image/jpeg",
|
||||||
|
data=imageDataStr, # Must be a string (base64 encoded)
|
||||||
|
metadata={"imageAnalysis": True}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create request with content parts
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=prompt,
|
||||||
|
context="",
|
||||||
|
options=options,
|
||||||
|
contentParts=[imagePart]
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await self.aiObjects.call(request)
|
||||||
|
result = response.content
|
||||||
|
|
||||||
|
# Check if result is valid
|
||||||
|
if not result or (isinstance(result, str) and not result.strip()):
|
||||||
|
error_msg = f"No response from AI image analysis (result: {repr(result)})"
|
||||||
|
logger.error(f"Error in AI image analysis: {error_msg}")
|
||||||
|
return f"Error: {error_msg}"
|
||||||
|
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in AI image analysis: {str(e)}")
|
||||||
|
return f"Error: {str(e)}"
|
||||||
|
|
||||||
# AI Image Generation
|
# AI Image Generation
|
||||||
async def generateImage(
|
async def generateImage(
|
||||||
|
|
@ -115,34 +742,19 @@ class AiService:
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Generate an image using AI using interface.generateImage()."""
|
"""Generate an image using AI using interface.generateImage()."""
|
||||||
await self._ensureAiObjectsInitialized()
|
await self._ensureAiObjectsInitialized()
|
||||||
return await self.coreAi.generateImage(prompt, size, quality, style, options)
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
|
||||||
|
|
||||||
# Core AI Methods - Delegating to SubCoreAi
|
# Emit stats for image generation
|
||||||
async def callAiPlanning(
|
self.services.workflow.storeWorkflowStat(
|
||||||
self,
|
self.services.currentWorkflow,
|
||||||
prompt: str,
|
response,
|
||||||
placeholders: Optional[List[PromptPlaceholder]] = None
|
f"ai.generate.image"
|
||||||
) -> str:
|
)
|
||||||
"""Planning AI call for task planning, action planning, action selection, etc."""
|
|
||||||
await self._ensureAiObjectsInitialized()
|
|
||||||
# Always use "json" for planning calls since they return JSON
|
|
||||||
return await self.coreAi.callAiPlanning(prompt, placeholders)
|
|
||||||
|
|
||||||
async def callAiDocuments(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
documents: Optional[List[ChatDocument]] = None,
|
|
||||||
options: Optional[AiCallOptions] = None,
|
|
||||||
outputFormat: Optional[str] = None,
|
|
||||||
title: Optional[str] = None
|
|
||||||
) -> Union[str, Dict[str, Any]]:
|
|
||||||
"""Document generation AI call for all non-planning calls."""
|
|
||||||
await self._ensureAiObjectsInitialized()
|
|
||||||
return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title)
|
|
||||||
|
|
||||||
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
|
|
||||||
"""Sanitize prompt content to prevent injection attacks and ensure safe presentation."""
|
|
||||||
return sanitizePromptContent(content, contentType)
|
|
||||||
|
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in AI image generation: {str(e)}")
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,687 +0,0 @@
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, List, Optional, Tuple, Union
|
|
||||||
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
|
||||||
from modules.services.serviceAi.subSharedAiUtils import (
|
|
||||||
buildPromptWithPlaceholders,
|
|
||||||
extractTextFromContentParts,
|
|
||||||
reduceText,
|
|
||||||
determineCallType
|
|
||||||
)
|
|
||||||
from modules.shared.jsonUtils import (
|
|
||||||
extractJsonString,
|
|
||||||
repairBrokenJson,
|
|
||||||
extractSectionsFromDocument,
|
|
||||||
buildContinuationContext
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Repair-based looping system - no longer needs LOOP_INSTRUCTION_TEXT
|
|
||||||
# Sections are accumulated and repair mechanism handles broken JSON automatically
|
|
||||||
|
|
||||||
# Rebuild the model to resolve forward references
|
|
||||||
AiCallRequest.model_rebuild()
|
|
||||||
|
|
||||||
|
|
||||||
class SubCoreAi:
|
|
||||||
"""Core AI operations including image analysis, text generation, and planning calls."""
|
|
||||||
|
|
||||||
def __init__(self, services, aiObjects):
|
|
||||||
"""Initialize core AI operations.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
services: Service center instance for accessing other services
|
|
||||||
aiObjects: Initialized AiObjects instance
|
|
||||||
"""
|
|
||||||
self.services = services
|
|
||||||
self.aiObjects = aiObjects
|
|
||||||
|
|
||||||
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
|
|
||||||
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
|
|
||||||
try:
|
|
||||||
# Get dynamic enum values from Pydantic models
|
|
||||||
operation_types = [e.value for e in OperationTypeEnum]
|
|
||||||
priorities = [e.value for e in PriorityEnum]
|
|
||||||
processing_modes = [e.value for e in ProcessingModeEnum]
|
|
||||||
|
|
||||||
# Create analysis prompt for AI to determine operation type and parameters
|
|
||||||
analysisPrompt = f"""
|
|
||||||
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
|
|
||||||
|
|
||||||
PROMPT TO ANALYZE:
|
|
||||||
{self.services.ai.sanitizePromptContent(prompt, 'userinput')}
|
|
||||||
|
|
||||||
Based on the prompt content, determine:
|
|
||||||
1. operationType: Choose the most appropriate from: {', '.join(operation_types)}
|
|
||||||
2. priority: Choose from: {', '.join(priorities)}
|
|
||||||
3. processingMode: Choose from: {', '.join(processing_modes)}
|
|
||||||
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
|
|
||||||
5. compressContext: true/false (true to summarize context, false to process fully)
|
|
||||||
|
|
||||||
Respond with ONLY a JSON object in this exact format:
|
|
||||||
{{
|
|
||||||
"operationType": "dataAnalyse",
|
|
||||||
"priority": "balanced",
|
|
||||||
"processingMode": "basic",
|
|
||||||
"compressPrompt": true,
|
|
||||||
"compressContext": true
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Use AI to analyze the prompt
|
|
||||||
request = AiCallRequest(
|
|
||||||
prompt=analysisPrompt,
|
|
||||||
options=AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
||||||
priority=PriorityEnum.SPEED,
|
|
||||||
processingMode=ProcessingModeEnum.BASIC,
|
|
||||||
compressPrompt=True,
|
|
||||||
compressContext=False
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
response = await self.aiObjects.call(request)
|
|
||||||
|
|
||||||
# Parse AI response
|
|
||||||
try:
|
|
||||||
import json
|
|
||||||
json_start = response.content.find('{')
|
|
||||||
json_end = response.content.rfind('}') + 1
|
|
||||||
if json_start != -1 and json_end > json_start:
|
|
||||||
analysis = json.loads(response.content[json_start:json_end])
|
|
||||||
|
|
||||||
# Map string values to enums
|
|
||||||
operation_type = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
|
|
||||||
priority = PriorityEnum(analysis.get('priority', 'balanced'))
|
|
||||||
processing_mode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
|
|
||||||
|
|
||||||
return AiCallOptions(
|
|
||||||
operationType=operation_type,
|
|
||||||
priority=priority,
|
|
||||||
processingMode=processing_mode,
|
|
||||||
compressPrompt=analysis.get('compressPrompt', True),
|
|
||||||
compressContext=analysis.get('compressContext', True)
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to parse AI analysis response: {e}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Prompt analysis failed: {e}")
|
|
||||||
|
|
||||||
# Fallback to default options
|
|
||||||
return AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
||||||
priority=PriorityEnum.BALANCED,
|
|
||||||
processingMode=ProcessingModeEnum.BASIC
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Shared Core Function for AI Calls with Looping and Repair
|
|
||||||
async def _callAiWithLooping(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
options: AiCallOptions,
|
|
||||||
debugPrefix: str = "ai_call",
|
|
||||||
promptBuilder: Optional[callable] = None,
|
|
||||||
promptArgs: Optional[Dict[str, Any]] = None,
|
|
||||||
operationId: Optional[str] = None
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Shared core function for AI calls with repair-based looping system.
|
|
||||||
Automatically repairs broken JSON and continues generation seamlessly.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The prompt to send to AI
|
|
||||||
options: AI call configuration options
|
|
||||||
debugPrefix: Prefix for debug file names
|
|
||||||
promptBuilder: Optional function to rebuild prompts for continuation
|
|
||||||
promptArgs: Optional arguments for prompt builder
|
|
||||||
operationId: Optional operation ID for progress tracking
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Complete AI response after all iterations
|
|
||||||
"""
|
|
||||||
max_iterations = 50 # Prevent infinite loops
|
|
||||||
iteration = 0
|
|
||||||
allSections = [] # Accumulate all sections across iterations
|
|
||||||
lastRawResponse = None # Store last raw JSON response for continuation
|
|
||||||
|
|
||||||
while iteration < max_iterations:
|
|
||||||
iteration += 1
|
|
||||||
|
|
||||||
# Update progress for iteration start
|
|
||||||
if operationId:
|
|
||||||
if iteration == 1:
|
|
||||||
self.services.workflow.progressLogUpdate(operationId, 0.5, f"Starting AI call iteration {iteration}")
|
|
||||||
else:
|
|
||||||
# For continuation iterations, show progress incrementally
|
|
||||||
base_progress = 0.5 + (min(iteration - 1, max_iterations) / max_iterations * 0.4) # Progress from 0.5 to 0.9 over max_iterations iterations
|
|
||||||
self.services.workflow.progressLogUpdate(operationId, base_progress, f"Continuing generation (iteration {iteration})")
|
|
||||||
|
|
||||||
# Build iteration prompt
|
|
||||||
if len(allSections) > 0 and promptBuilder and promptArgs:
|
|
||||||
# This is a continuation - build continuation context with raw JSON and rebuild prompt
|
|
||||||
continuationContext = buildContinuationContext(allSections, lastRawResponse)
|
|
||||||
if not lastRawResponse:
|
|
||||||
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
|
|
||||||
|
|
||||||
# Rebuild prompt with continuation context using the provided prompt builder
|
|
||||||
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
|
|
||||||
else:
|
|
||||||
# First iteration - use original prompt
|
|
||||||
iterationPrompt = prompt
|
|
||||||
|
|
||||||
# Make AI call
|
|
||||||
try:
|
|
||||||
if operationId and iteration == 1:
|
|
||||||
self.services.workflow.progressLogUpdate(operationId, 0.51, "Calling AI model")
|
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest
|
|
||||||
request = AiCallRequest(
|
|
||||||
prompt=iterationPrompt,
|
|
||||||
context="",
|
|
||||||
options=options
|
|
||||||
)
|
|
||||||
|
|
||||||
# Write the ACTUAL prompt sent to AI
|
|
||||||
if iteration == 1:
|
|
||||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
|
|
||||||
else:
|
|
||||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
|
||||||
|
|
||||||
response = await self.aiObjects.call(request)
|
|
||||||
result = response.content
|
|
||||||
|
|
||||||
# Update progress after AI call
|
|
||||||
if operationId:
|
|
||||||
if iteration == 1:
|
|
||||||
self.services.workflow.progressLogUpdate(operationId, 0.6, f"AI response received (iteration {iteration})")
|
|
||||||
else:
|
|
||||||
progress = 0.6 + (min(iteration - 1, 10) * 0.03)
|
|
||||||
self.services.workflow.progressLogUpdate(operationId, progress, f"Processing response (iteration {iteration})")
|
|
||||||
|
|
||||||
# Write raw AI response to debug file
|
|
||||||
if iteration == 1:
|
|
||||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
|
||||||
else:
|
|
||||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
|
||||||
|
|
||||||
# Emit stats for this iteration
|
|
||||||
self.services.workflow.storeWorkflowStat(
|
|
||||||
self.services.currentWorkflow,
|
|
||||||
response,
|
|
||||||
f"ai.call.{debugPrefix}.iteration_{iteration}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not result or not result.strip():
|
|
||||||
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
|
||||||
break
|
|
||||||
|
|
||||||
# Store raw response for continuation (even if broken)
|
|
||||||
lastRawResponse = result
|
|
||||||
|
|
||||||
# Check for complete_response flag in raw response (before parsing)
|
|
||||||
import re
|
|
||||||
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
|
|
||||||
pass # Flag detected, will stop in _shouldContinueGeneration
|
|
||||||
|
|
||||||
# Extract sections from response (handles both valid and broken JSON)
|
|
||||||
extractedSections, wasJsonComplete = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
|
||||||
|
|
||||||
# Update progress after parsing
|
|
||||||
if operationId:
|
|
||||||
if extractedSections:
|
|
||||||
self.services.workflow.progressLogUpdate(operationId, 0.65 + (min(iteration - 1, 10) * 0.025), f"Extracted {len(extractedSections)} sections (iteration {iteration})")
|
|
||||||
|
|
||||||
if not extractedSections:
|
|
||||||
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
|
|
||||||
if iteration > 1 and not wasJsonComplete:
|
|
||||||
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
|
|
||||||
continue
|
|
||||||
# Otherwise, stop if no sections
|
|
||||||
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
|
|
||||||
break
|
|
||||||
|
|
||||||
# Add new sections to accumulator
|
|
||||||
allSections.extend(extractedSections)
|
|
||||||
|
|
||||||
# Check if we should continue (completion detection)
|
|
||||||
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# Done - build final result
|
|
||||||
if operationId:
|
|
||||||
self.services.workflow.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
|
|
||||||
break
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
|
|
||||||
break
|
|
||||||
|
|
||||||
if iteration >= max_iterations:
|
|
||||||
logger.warning(f"AI call stopped after maximum iterations ({max_iterations})")
|
|
||||||
|
|
||||||
# Build final result from accumulated sections
|
|
||||||
final_result = self._buildFinalResultFromSections(allSections)
|
|
||||||
|
|
||||||
# Write final result to debug file
|
|
||||||
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
|
|
||||||
|
|
||||||
return final_result
|
|
||||||
|
|
||||||
def _extractSectionsFromResponse(
|
|
||||||
self,
|
|
||||||
result: str,
|
|
||||||
iteration: int,
|
|
||||||
debugPrefix: str
|
|
||||||
) -> Tuple[List[Dict[str, Any]], bool]:
|
|
||||||
"""
|
|
||||||
Extract sections from AI response, handling both valid and broken JSON.
|
|
||||||
Uses repair mechanism for broken JSON.
|
|
||||||
Checks for "complete_response": true flag to determine completion.
|
|
||||||
Returns (sections, wasJsonComplete)
|
|
||||||
"""
|
|
||||||
# First, try to parse as valid JSON
|
|
||||||
try:
|
|
||||||
extracted = extractJsonString(result)
|
|
||||||
parsed_result = json.loads(extracted)
|
|
||||||
|
|
||||||
# Check if AI marked response as complete
|
|
||||||
isComplete = parsed_result.get("complete_response", False) == True
|
|
||||||
|
|
||||||
# Extract sections from parsed JSON
|
|
||||||
sections = extractSectionsFromDocument(parsed_result)
|
|
||||||
|
|
||||||
# If AI marked as complete, always return as complete
|
|
||||||
if isComplete:
|
|
||||||
return sections, True
|
|
||||||
|
|
||||||
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
|
|
||||||
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
|
|
||||||
if len(sections) == 0 and iteration > 1:
|
|
||||||
return sections, False # Mark as incomplete so loop continues
|
|
||||||
|
|
||||||
# First iteration with 0 sections means empty response - stop
|
|
||||||
if len(sections) == 0:
|
|
||||||
return sections, True # Complete but empty
|
|
||||||
|
|
||||||
return sections, True # JSON was complete with sections
|
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
# Broken JSON - try repair mechanism (normal in iterative generation)
|
|
||||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
|
|
||||||
|
|
||||||
# Try to repair
|
|
||||||
repaired_json = repairBrokenJson(result)
|
|
||||||
|
|
||||||
if repaired_json:
|
|
||||||
# Extract sections from repaired JSON
|
|
||||||
sections = extractSectionsFromDocument(repaired_json)
|
|
||||||
return sections, False # JSON was broken but repaired
|
|
||||||
else:
|
|
||||||
# Repair failed - log error
|
|
||||||
logger.error(f"Iteration {iteration}: All repair strategies failed")
|
|
||||||
return [], False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
|
|
||||||
return [], False
|
|
||||||
|
|
||||||
def _shouldContinueGeneration(
|
|
||||||
self,
|
|
||||||
allSections: List[Dict[str, Any]],
|
|
||||||
iteration: int,
|
|
||||||
wasJsonComplete: bool,
|
|
||||||
rawResponse: str = None
|
|
||||||
) -> bool:
|
|
||||||
"""
|
|
||||||
Determine if generation should continue based on JSON completeness and complete_response flag.
|
|
||||||
Returns True if we should continue, False if done.
|
|
||||||
"""
|
|
||||||
if len(allSections) == 0:
|
|
||||||
return True # No sections yet, continue
|
|
||||||
|
|
||||||
# Check for complete_response flag in raw response
|
|
||||||
if rawResponse:
|
|
||||||
import re
|
|
||||||
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# If JSON was complete (and no complete_response flag), we're done
|
|
||||||
# If JSON was broken and repaired, continue to get more content
|
|
||||||
if wasJsonComplete:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
def _buildFinalResultFromSections(
|
|
||||||
self,
|
|
||||||
allSections: List[Dict[str, Any]]
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Build final JSON result from accumulated sections.
|
|
||||||
"""
|
|
||||||
if not allSections:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# Build documents structure
|
|
||||||
# Assuming single document for now
|
|
||||||
documents = [{
|
|
||||||
"id": "doc_1",
|
|
||||||
"title": "Generated Document", # This should come from prompt
|
|
||||||
"filename": "document.json",
|
|
||||||
"sections": allSections
|
|
||||||
}]
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"metadata": {
|
|
||||||
"split_strategy": "single_document",
|
|
||||||
"source_documents": [],
|
|
||||||
"extraction_method": "ai_generation"
|
|
||||||
},
|
|
||||||
"documents": documents
|
|
||||||
}
|
|
||||||
|
|
||||||
return json.dumps(result, indent=2)
|
|
||||||
|
|
||||||
# Old _buildContinuationPrompt and _mergeJsonContent methods removed
|
|
||||||
# Now handled by repair mechanism in jsonUtils.py and section accumulation
|
|
||||||
|
|
||||||
|
|
||||||
# Planning AI Call
|
|
||||||
async def callAiPlanning(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
placeholders: Optional[List[PromptPlaceholder]] = None
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Planning AI call for task planning, action planning, action selection, etc.
|
|
||||||
Always uses static parameters optimized for planning tasks.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The planning prompt
|
|
||||||
placeholders: Optional list of placeholder replacements
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Planning JSON response
|
|
||||||
"""
|
|
||||||
# Planning calls always use static parameters
|
|
||||||
options = AiCallOptions(
|
|
||||||
operationType=OperationTypeEnum.PLAN,
|
|
||||||
priority=PriorityEnum.QUALITY,
|
|
||||||
processingMode=ProcessingModeEnum.DETAILED,
|
|
||||||
compressPrompt=False,
|
|
||||||
compressContext=False
|
|
||||||
)
|
|
||||||
|
|
||||||
# Build full prompt with placeholders
|
|
||||||
if placeholders:
|
|
||||||
placeholders_dict = {p.label: p.content for p in placeholders}
|
|
||||||
full_prompt = buildPromptWithPlaceholders(prompt, placeholders_dict)
|
|
||||||
else:
|
|
||||||
full_prompt = prompt
|
|
||||||
|
|
||||||
# Use shared core function with planning-specific debug prefix
|
|
||||||
return await self._callAiWithLooping(full_prompt, options, "plan")
|
|
||||||
|
|
||||||
# Document Generation AI Call
|
|
||||||
async def callAiDocuments(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
documents: Optional[List[ChatDocument]] = None,
|
|
||||||
options: Optional[AiCallOptions] = None,
|
|
||||||
outputFormat: Optional[str] = None,
|
|
||||||
title: Optional[str] = None
|
|
||||||
) -> Union[str, Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Document generation AI call for all non-planning calls.
|
|
||||||
Uses the current unified path with extraction and generation.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The main prompt for the AI call
|
|
||||||
documents: Optional list of documents to process
|
|
||||||
options: AI call configuration options
|
|
||||||
outputFormat: Optional output format for document generation
|
|
||||||
title: Optional title for generated documents
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
AI response as string, or dict with documents if outputFormat is specified
|
|
||||||
"""
|
|
||||||
# Create separate operationId for detailed progress tracking
|
|
||||||
import time
|
|
||||||
import uuid
|
|
||||||
workflowId = self.services.currentWorkflow.id if self.services.currentWorkflow else f"no-workflow-{int(time.time())}"
|
|
||||||
aiOperationId = f"ai_documents_{workflowId}_{int(time.time())}"
|
|
||||||
|
|
||||||
# Start progress tracking for this operation
|
|
||||||
self.services.workflow.progressLogStart(
|
|
||||||
aiOperationId,
|
|
||||||
"AI call with documents",
|
|
||||||
"Document Generation",
|
|
||||||
f"Format: {outputFormat or 'text'}"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if options is None or (hasattr(options, 'operationType') and options.operationType is None):
|
|
||||||
# Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
|
|
||||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
|
|
||||||
options = await self._analyzePromptAndCreateOptions(prompt)
|
|
||||||
|
|
||||||
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
|
|
||||||
# Compressing would truncate the template structure and confuse the AI
|
|
||||||
if outputFormat: # Document generation with structured output
|
|
||||||
if not options:
|
|
||||||
options = AiCallOptions()
|
|
||||||
options.compressPrompt = False # JSON templates must NOT be truncated
|
|
||||||
options.compressContext = False # Context also should not be compressed
|
|
||||||
|
|
||||||
# Handle document generation with specific output format using unified approach
|
|
||||||
if outputFormat:
|
|
||||||
# Use unified generation method for all document generation
|
|
||||||
if documents and len(documents) > 0:
|
|
||||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, f"Extracting content from {len(documents)} documents")
|
|
||||||
extracted_content = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
|
|
||||||
else:
|
|
||||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.2, "Preparing for direct generation")
|
|
||||||
extracted_content = None
|
|
||||||
|
|
||||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
|
|
||||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
|
||||||
# First call without continuation context
|
|
||||||
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content, None)
|
|
||||||
|
|
||||||
# Prepare prompt builder arguments for continuation
|
|
||||||
promptArgs = {
|
|
||||||
"outputFormat": outputFormat,
|
|
||||||
"userPrompt": prompt,
|
|
||||||
"title": title,
|
|
||||||
"extracted_content": extracted_content
|
|
||||||
}
|
|
||||||
|
|
||||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
|
|
||||||
generated_json = await self._callAiWithLooping(
|
|
||||||
generation_prompt,
|
|
||||||
options,
|
|
||||||
"document_generation",
|
|
||||||
buildGenerationPrompt,
|
|
||||||
promptArgs,
|
|
||||||
aiOperationId
|
|
||||||
)
|
|
||||||
|
|
||||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
|
|
||||||
# Parse the generated JSON (extract fenced/embedded JSON first)
|
|
||||||
try:
|
|
||||||
extracted_json = self.services.utils.jsonExtractString(generated_json)
|
|
||||||
generated_data = json.loads(extracted_json)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logger.error(f"Failed to parse generated JSON: {str(e)}")
|
|
||||||
logger.error(f"JSON content length: {len(generated_json)}")
|
|
||||||
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
|
|
||||||
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
|
|
||||||
|
|
||||||
# Write the problematic JSON to debug file
|
|
||||||
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
|
|
||||||
|
|
||||||
self.services.workflow.progressLogFinish(aiOperationId, False)
|
|
||||||
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
|
|
||||||
|
|
||||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
|
|
||||||
# Render to final format using the existing renderer
|
|
||||||
try:
|
|
||||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
|
||||||
generationService = GenerationService(self.services)
|
|
||||||
rendered_content, mime_type = await generationService.renderReport(
|
|
||||||
generated_data, outputFormat, title or "Generated Document", prompt, self
|
|
||||||
)
|
|
||||||
|
|
||||||
# Build result in the expected format
|
|
||||||
result = {
|
|
||||||
"success": True,
|
|
||||||
"content": generated_data,
|
|
||||||
"documents": [{
|
|
||||||
"documentName": f"generated.{outputFormat}",
|
|
||||||
"documentData": rendered_content,
|
|
||||||
"mimeType": mime_type,
|
|
||||||
"title": title or "Generated Document"
|
|
||||||
}],
|
|
||||||
"is_multi_file": False,
|
|
||||||
"format": outputFormat,
|
|
||||||
"title": title,
|
|
||||||
"split_strategy": "single",
|
|
||||||
"total_documents": 1,
|
|
||||||
"processed_documents": 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Log AI response for debugging
|
|
||||||
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
|
|
||||||
|
|
||||||
self.services.workflow.progressLogFinish(aiOperationId, True)
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error rendering document: {str(e)}")
|
|
||||||
self.services.workflow.progressLogFinish(aiOperationId, False)
|
|
||||||
return {"success": False, "error": f"Rendering failed: {str(e)}"}
|
|
||||||
|
|
||||||
# Handle text calls (no output format specified)
|
|
||||||
self.services.workflow.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
|
|
||||||
if documents:
|
|
||||||
# Use document processing for text calls with documents
|
|
||||||
result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options, aiOperationId)
|
|
||||||
else:
|
|
||||||
# Use shared core function for direct text calls
|
|
||||||
result = await self._callAiWithLooping(prompt, options, "text", None, None, aiOperationId)
|
|
||||||
|
|
||||||
self.services.workflow.progressLogFinish(aiOperationId, True)
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in callAiDocuments: {str(e)}")
|
|
||||||
self.services.workflow.progressLogFinish(aiOperationId, False)
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
# AI Image Analysis
|
|
||||||
async def readImage(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
imageData: Union[str, bytes],
|
|
||||||
mimeType: str = None,
|
|
||||||
options: Optional[AiCallOptions] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Call AI for image analysis using interface.call() with contentParts."""
|
|
||||||
try:
|
|
||||||
# Check if imageData is valid
|
|
||||||
if not imageData:
|
|
||||||
error_msg = "No image data provided"
|
|
||||||
logger.error(f"Error in AI image analysis: {error_msg}")
|
|
||||||
return f"Error: {error_msg}"
|
|
||||||
|
|
||||||
|
|
||||||
# Always use IMAGE_ANALYSE operation type for image processing
|
|
||||||
if options is None:
|
|
||||||
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
|
|
||||||
else:
|
|
||||||
# Override the operation type to ensure image analysis
|
|
||||||
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
|
|
||||||
|
|
||||||
# Create content parts with image data
|
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
|
||||||
import base64
|
|
||||||
|
|
||||||
# ContentPart.data must be a string - convert bytes to base64 if needed
|
|
||||||
if isinstance(imageData, bytes):
|
|
||||||
imageDataStr = base64.b64encode(imageData).decode('utf-8')
|
|
||||||
else:
|
|
||||||
# Already a base64 string
|
|
||||||
imageDataStr = imageData
|
|
||||||
|
|
||||||
imagePart = ContentPart(
|
|
||||||
id="image_0",
|
|
||||||
parentId=None,
|
|
||||||
label="Image",
|
|
||||||
typeGroup="image",
|
|
||||||
mimeType=mimeType or "image/jpeg",
|
|
||||||
data=imageDataStr, # Must be a string (base64 encoded)
|
|
||||||
metadata={"imageAnalysis": True}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create request with content parts
|
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest
|
|
||||||
request = AiCallRequest(
|
|
||||||
prompt=prompt,
|
|
||||||
context="",
|
|
||||||
options=options,
|
|
||||||
contentParts=[imagePart]
|
|
||||||
)
|
|
||||||
|
|
||||||
response = await self.aiObjects.call(request)
|
|
||||||
result = response.content
|
|
||||||
|
|
||||||
# Check if result is valid
|
|
||||||
if not result or (isinstance(result, str) and not result.strip()):
|
|
||||||
error_msg = f"No response from AI image analysis (result: {repr(result)})"
|
|
||||||
logger.error(f"Error in AI image analysis: {error_msg}")
|
|
||||||
return f"Error: {error_msg}"
|
|
||||||
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in AI image analysis: {str(e)}")
|
|
||||||
return f"Error: {str(e)}"
|
|
||||||
|
|
||||||
# AI Image Generation
|
|
||||||
async def generateImage(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
size: str = "1024x1024",
|
|
||||||
quality: str = "standard",
|
|
||||||
style: str = "vivid",
|
|
||||||
options: Optional[AiCallOptions] = None,
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Generate an image using AI using interface.generateImage()."""
|
|
||||||
try:
|
|
||||||
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
|
|
||||||
|
|
||||||
# Emit stats for image generation
|
|
||||||
self.services.workflow.storeWorkflowStat(
|
|
||||||
self.services.currentWorkflow,
|
|
||||||
response,
|
|
||||||
f"ai.generate.image"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert response to dict format for backward compatibility
|
|
||||||
if hasattr(response, 'content'):
|
|
||||||
return {
|
|
||||||
"success": True,
|
|
||||||
"content": response.content,
|
|
||||||
"modelName": response.modelName,
|
|
||||||
"priceUsd": response.priceUsd,
|
|
||||||
"processingTime": response.processingTime
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in AI image generation: {str(e)}")
|
|
||||||
return {"success": False, "error": str(e)}
|
|
||||||
|
|
@ -1,500 +0,0 @@
|
||||||
import re
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
from datetime import datetime, UTC
|
|
||||||
from typing import Dict, Any, List, Optional
|
|
||||||
from modules.datamodels.datamodelChat import ChatDocument
|
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class SubDocumentGeneration:
|
|
||||||
"""Document generation operations including single-file and multi-file generation."""
|
|
||||||
|
|
||||||
def __init__(self, services, aiObjects, documentProcessor):
|
|
||||||
"""Initialize document generation service.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
services: Service center instance for accessing other services
|
|
||||||
aiObjects: Initialized AiObjects instance
|
|
||||||
documentProcessor: Document processing service instance
|
|
||||||
"""
|
|
||||||
self.services = services
|
|
||||||
self.aiObjects = aiObjects
|
|
||||||
self.documentProcessor = documentProcessor
|
|
||||||
|
|
||||||
async def callAiWithDocumentGeneration(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
documents: Optional[List[ChatDocument]],
|
|
||||||
options: AiCallOptions,
|
|
||||||
outputFormat: str,
|
|
||||||
title: Optional[str]
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Unified document generation method that handles both single and multi-file cases.
|
|
||||||
Always uses multi-file approach internally.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The main prompt for the AI call
|
|
||||||
documents: Optional list of documents to process
|
|
||||||
options: AI call configuration options
|
|
||||||
outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx)
|
|
||||||
title: Optional title for generated documents
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dict with generated documents and metadata in unified structure
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# 1. Get unified extraction prompt
|
|
||||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
|
||||||
generationService = GenerationService(self.services)
|
|
||||||
|
|
||||||
extractionPrompt = await generationService.getAdaptiveExtractionPrompt(
|
|
||||||
outputFormat=outputFormat,
|
|
||||||
userPrompt=prompt,
|
|
||||||
title=title,
|
|
||||||
aiService=self
|
|
||||||
)
|
|
||||||
|
|
||||||
# 2. Process with unified pipeline (always multi-file approach)
|
|
||||||
aiResponse = await self._processDocumentsUnified(
|
|
||||||
documents, extractionPrompt, options
|
|
||||||
)
|
|
||||||
|
|
||||||
# 3. Return unified result structure
|
|
||||||
return await self._buildUnifiedResult(aiResponse, outputFormat, title)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in unified document generation: {str(e)}")
|
|
||||||
return self._buildErrorResult(str(e), outputFormat, title)
|
|
||||||
|
|
||||||
async def _processDocumentsUnified(
|
|
||||||
self,
|
|
||||||
documents: Optional[List[ChatDocument]],
|
|
||||||
extractionPrompt: str,
|
|
||||||
options: AiCallOptions
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Unified document processing that handles both single and multi-file cases.
|
|
||||||
Always processes as multi-file structure internally.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Init progress logger
|
|
||||||
workflow = self.services.currentWorkflow
|
|
||||||
operationId = f"docGenUnified_{workflow.id}_{int(time.time())}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Start progress tracking
|
|
||||||
self.services.workflow.progressLogStart(
|
|
||||||
operationId,
|
|
||||||
"Generate",
|
|
||||||
"Unified Document Generation",
|
|
||||||
f"Processing {len(documents) if documents else 0} documents"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update progress - generating extraction prompt
|
|
||||||
self.services.workflow.progressLogUpdate(operationId, 0.1, "Generating prompt")
|
|
||||||
|
|
||||||
# Write prompt to debug file
|
|
||||||
self.services.utils.writeDebugFile(extractionPrompt, "extraction_prompt", documents)
|
|
||||||
|
|
||||||
# Process with unified JSON pipeline using continuation logic
|
|
||||||
aiResponse = await self.documentProcessor.processDocumentsWithContinuation(
|
|
||||||
documents, extractionPrompt, options
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update progress - AI processing completed
|
|
||||||
self.services.workflow.progressLogUpdate(operationId, 0.6, "Processing done")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Write AI response to debug file
|
|
||||||
response_json = json.dumps(aiResponse, indent=2, ensure_ascii=False) if isinstance(aiResponse, dict) else str(aiResponse)
|
|
||||||
self.services.utils.writeDebugFile(response_json, "ai_response", documents)
|
|
||||||
|
|
||||||
# Validate response structure
|
|
||||||
if not self._validateUnifiedResponseStructure(aiResponse):
|
|
||||||
raise Exception("AI response is not valid unified document structure")
|
|
||||||
|
|
||||||
# Emit raw extracted data as a chat message attachment
|
|
||||||
try:
|
|
||||||
await self._postRawDataChatMessage(aiResponse, label="raw_extraction_unified")
|
|
||||||
except Exception:
|
|
||||||
logger.warning("Failed to emit raw extraction chat message (unified)")
|
|
||||||
|
|
||||||
# Complete progress tracking
|
|
||||||
self.services.workflow.progressLogFinish(operationId, True)
|
|
||||||
|
|
||||||
return aiResponse
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in unified document processing: {str(e)}")
|
|
||||||
self.services.workflow.progressLogFinish(operationId, False)
|
|
||||||
raise
|
|
||||||
|
|
||||||
def _validateUnifiedResponseStructure(self, response: Dict[str, Any]) -> bool:
|
|
||||||
"""
|
|
||||||
Unified validation that checks for document structure.
|
|
||||||
Handles both multi-file (documents array) and single-file (sections array) structures.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
if not isinstance(response, dict):
|
|
||||||
logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check for documents array (multi-file structure)
|
|
||||||
hasDocuments = "documents" in response
|
|
||||||
isDocumentsList = isinstance(response.get("documents"), list)
|
|
||||||
|
|
||||||
# Check for sections array (single-file structure)
|
|
||||||
hasSections = "sections" in response
|
|
||||||
isSectionsList = isinstance(response.get("sections"), list)
|
|
||||||
|
|
||||||
if hasDocuments and isDocumentsList:
|
|
||||||
# Multi-file structure
|
|
||||||
documents = response.get("documents", [])
|
|
||||||
if not documents:
|
|
||||||
logger.warning("Unified validation failed: documents array is empty")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Validate each document individually
|
|
||||||
validDocuments = 0
|
|
||||||
for i, doc in enumerate(documents):
|
|
||||||
if self._validateDocumentStructure(doc, i):
|
|
||||||
validDocuments += 1
|
|
||||||
else:
|
|
||||||
logger.warning(f"Document {i} failed validation, but continuing with others")
|
|
||||||
|
|
||||||
# Process succeeds if at least one document is valid
|
|
||||||
if validDocuments == 0:
|
|
||||||
logger.error("Unified validation failed: no valid documents found")
|
|
||||||
return False
|
|
||||||
|
|
||||||
logger.info(f"Unified validation passed: {validDocuments}/{len(documents)} documents valid")
|
|
||||||
return True
|
|
||||||
|
|
||||||
elif hasSections and isSectionsList:
|
|
||||||
# Single-file structure - convert to multi-file format
|
|
||||||
logger.info("Converting single-file structure to multi-file format")
|
|
||||||
sections = response.get("sections", [])
|
|
||||||
if not sections:
|
|
||||||
logger.warning("Unified validation failed: sections array is empty")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Convert to documents array format
|
|
||||||
response["documents"] = [{
|
|
||||||
"id": "document_1",
|
|
||||||
"title": response.get("metadata", {}).get("title", "Generated Document"),
|
|
||||||
"filename": "document_1",
|
|
||||||
"sections": sections
|
|
||||||
}]
|
|
||||||
|
|
||||||
logger.info("Successfully converted single-file structure to multi-file format")
|
|
||||||
return True
|
|
||||||
|
|
||||||
else:
|
|
||||||
# No valid structure found - fail with clear error details
|
|
||||||
logger.error("Unified validation failed: No valid structure found")
|
|
||||||
logger.error(f"Response type: {type(response)}")
|
|
||||||
logger.error(f"Available keys: {list(response.keys()) if isinstance(response, dict) else 'Not a dict'}")
|
|
||||||
logger.error(f"hasDocuments={hasDocuments}, isDocumentsList={isDocumentsList}")
|
|
||||||
logger.error(f"hasSections={hasSections}, isSectionsList={isSectionsList}")
|
|
||||||
logger.error(f"Full response: {response}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Unified response validation failed with exception: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _validateDocumentStructure(self, document: Dict[str, Any], documentIndex: int) -> bool:
|
|
||||||
"""
|
|
||||||
Validate individual document structure.
|
|
||||||
Returns True if document is valid, False otherwise.
|
|
||||||
Does not fail the entire process if one document is invalid.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
if not isinstance(document, dict):
|
|
||||||
logger.error(f"Document {documentIndex} validation failed: not a dict, got {type(document)}")
|
|
||||||
logger.error(f"Document {documentIndex} content: {document}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check for required fields
|
|
||||||
hasTitle = "title" in document
|
|
||||||
hasSections = "sections" in document
|
|
||||||
isSectionsList = isinstance(document.get("sections"), list)
|
|
||||||
|
|
||||||
logger.debug(f"Document {documentIndex} structure check:")
|
|
||||||
logger.debug(f" - hasTitle: {hasTitle}")
|
|
||||||
logger.debug(f" - hasSections: {hasSections}")
|
|
||||||
logger.debug(f" - isSectionsList: {isSectionsList}")
|
|
||||||
logger.debug(f" - available keys: {list(document.keys())}")
|
|
||||||
|
|
||||||
if not (hasTitle and hasSections and isSectionsList):
|
|
||||||
logger.error(f"Document {documentIndex} validation failed:")
|
|
||||||
logger.error(f" - title present: {hasTitle}")
|
|
||||||
logger.error(f" - sections present: {hasSections}")
|
|
||||||
logger.error(f" - sections is list: {isSectionsList}")
|
|
||||||
logger.error(f" - document content: {document}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
sections = document.get("sections", [])
|
|
||||||
if not sections:
|
|
||||||
logger.error(f"Document {documentIndex} validation failed: sections array is empty")
|
|
||||||
logger.error(f" - document content: {document}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
logger.info(f"Document {documentIndex} validation passed")
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Document {documentIndex} validation failed with exception: {str(e)}")
|
|
||||||
logger.error(f" - document content: {document}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def _buildUnifiedResult(
|
|
||||||
self,
|
|
||||||
aiResponse: Dict[str, Any],
|
|
||||||
outputFormat: str,
|
|
||||||
title: str
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Build unified result structure that always returns array-based format.
|
|
||||||
Content is always a multi-document structure.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Process all documents uniformly
|
|
||||||
generatedDocuments = []
|
|
||||||
documents = aiResponse.get("documents", [])
|
|
||||||
|
|
||||||
for i, docData in enumerate(documents):
|
|
||||||
try:
|
|
||||||
processedDocument = await self._processDocument(
|
|
||||||
docData, outputFormat, title, i
|
|
||||||
)
|
|
||||||
generatedDocuments.append(processedDocument)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to process document {i}: {str(e)}, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not generatedDocuments:
|
|
||||||
raise Exception("No documents could be processed successfully")
|
|
||||||
|
|
||||||
# Build unified result
|
|
||||||
result = {
|
|
||||||
"success": True,
|
|
||||||
"content": aiResponse, # Always multi-document structure
|
|
||||||
"documents": generatedDocuments, # Always array
|
|
||||||
"is_multi_file": len(generatedDocuments) > 1,
|
|
||||||
"format": outputFormat,
|
|
||||||
"title": title,
|
|
||||||
"total_documents": len(generatedDocuments),
|
|
||||||
"processed_documents": len(generatedDocuments)
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error building unified result: {str(e)}")
|
|
||||||
return self._buildErrorResult(str(e), outputFormat, title)
|
|
||||||
|
|
||||||
async def _processDocument(
|
|
||||||
self,
|
|
||||||
docData: Dict[str, Any],
|
|
||||||
outputFormat: str,
|
|
||||||
title: str,
|
|
||||||
documentIndex: int
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Process individual document with content enhancement and rendering.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Get generation service
|
|
||||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
|
||||||
generationService = GenerationService(self.services)
|
|
||||||
|
|
||||||
# Use AI generation to enhance the extracted JSON before rendering
|
|
||||||
enhancedContent = docData # Default to original
|
|
||||||
if docData.get("sections"):
|
|
||||||
try:
|
|
||||||
# Get generation prompt directly
|
|
||||||
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
|
||||||
generationPrompt = await buildGenerationPrompt(
|
|
||||||
outputFormat=outputFormat,
|
|
||||||
userPrompt=title,
|
|
||||||
title=docData.get("title", title)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Prepare the AI call
|
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
|
||||||
requestOptions = AiCallOptions()
|
|
||||||
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
|
|
||||||
|
|
||||||
# Create context with the extracted JSON content
|
|
||||||
context = f"Extracted JSON content:\n{json.dumps(docData, indent=2)}"
|
|
||||||
|
|
||||||
request = AiCallRequest(
|
|
||||||
prompt=generationPrompt,
|
|
||||||
context=context,
|
|
||||||
options=requestOptions
|
|
||||||
)
|
|
||||||
|
|
||||||
# Write document generation prompt to debug file
|
|
||||||
self.services.utils.writeDebugFile(generationPrompt, "document_generation_enhancement_prompt")
|
|
||||||
|
|
||||||
# Call AI to enhance the content
|
|
||||||
response = await self.aiObjects.call(request)
|
|
||||||
|
|
||||||
# Write document generation response to debug file
|
|
||||||
self.services.utils.writeDebugFile(response.content or '', "document_generation_enhancement_response")
|
|
||||||
|
|
||||||
if response and response.content:
|
|
||||||
# Parse the AI response as JSON
|
|
||||||
try:
|
|
||||||
result = response.content.strip()
|
|
||||||
|
|
||||||
# Extract JSON from markdown if present
|
|
||||||
jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
|
||||||
if jsonMatch:
|
|
||||||
result = jsonMatch.group(1).strip()
|
|
||||||
elif result.startswith('```json'):
|
|
||||||
result = re.sub(r'^```json\s*', '', result)
|
|
||||||
result = re.sub(r'\s*```$', '', result)
|
|
||||||
elif result.startswith('```'):
|
|
||||||
result = re.sub(r'^```\s*', '', result)
|
|
||||||
result = re.sub(r'\s*```$', '', result)
|
|
||||||
|
|
||||||
# Try to parse JSON
|
|
||||||
enhancedContent = json.loads(result)
|
|
||||||
logger.info(f"AI enhanced JSON content successfully for document {documentIndex}")
|
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logger.warning(f"AI generation returned invalid JSON for document {documentIndex}: {str(e)}, using original content")
|
|
||||||
enhancedContent = docData
|
|
||||||
else:
|
|
||||||
logger.warning(f"AI generation returned empty response for document {documentIndex}, using original content")
|
|
||||||
enhancedContent = docData
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"AI generation failed for document {documentIndex}: {str(e)}, using original content")
|
|
||||||
enhancedContent = docData
|
|
||||||
|
|
||||||
# Render the enhanced JSON content
|
|
||||||
renderedContent, mimeType = await generationService.renderReport(
|
|
||||||
extractedContent=enhancedContent,
|
|
||||||
outputFormat=outputFormat,
|
|
||||||
title=docData.get("title", title),
|
|
||||||
userPrompt=title,
|
|
||||||
aiService=self
|
|
||||||
)
|
|
||||||
|
|
||||||
# Generate proper filename
|
|
||||||
baseFilename = docData.get("filename", f"document_{documentIndex + 1}")
|
|
||||||
if '.' in baseFilename:
|
|
||||||
baseFilename = baseFilename.rsplit('.', 1)[0]
|
|
||||||
|
|
||||||
# Add proper extension based on output format
|
|
||||||
if outputFormat.lower() == "docx":
|
|
||||||
filename = f"{baseFilename}.docx"
|
|
||||||
elif outputFormat.lower() == "pdf":
|
|
||||||
filename = f"{baseFilename}.pdf"
|
|
||||||
elif outputFormat.lower() == "html":
|
|
||||||
filename = f"{baseFilename}.html"
|
|
||||||
else:
|
|
||||||
filename = f"{baseFilename}.{outputFormat}"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"documentName": filename,
|
|
||||||
"documentData": renderedContent,
|
|
||||||
"mimeType": mimeType,
|
|
||||||
"title": docData.get("title", title),
|
|
||||||
"documentIndex": documentIndex
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing document {documentIndex}: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def _buildErrorResult(self, errorMessage: str, outputFormat: str, title: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Build error result with unified structure.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"success": False,
|
|
||||||
"error": errorMessage,
|
|
||||||
"content": {},
|
|
||||||
"documents": [],
|
|
||||||
"is_multi_file": False,
|
|
||||||
"format": outputFormat,
|
|
||||||
"title": title,
|
|
||||||
"split_strategy": "error",
|
|
||||||
"total_documents": 0,
|
|
||||||
"processed_documents": 0
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _callAiJson(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
documents: Optional[List[ChatDocument]],
|
|
||||||
options: AiCallOptions
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Handle AI calls with document processing for JSON output.
|
|
||||||
Returns structured JSON document instead of text.
|
|
||||||
"""
|
|
||||||
# Process documents with JSON merging
|
|
||||||
return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
|
|
||||||
|
|
||||||
|
|
||||||
async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
|
|
||||||
"""
|
|
||||||
Create a ChatMessage with the extracted raw JSON attached as a file so the user
|
|
||||||
has access to the data even if downstream processing fails.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
services = self.services
|
|
||||||
workflow = services.currentWorkflow
|
|
||||||
|
|
||||||
# Serialize payload
|
|
||||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
|
||||||
content_text = json.dumps(payload, ensure_ascii=False, indent=2)
|
|
||||||
content_bytes = content_text.encode('utf-8')
|
|
||||||
|
|
||||||
# Store as file via component storage
|
|
||||||
file_name = f"{label}_{ts}.json"
|
|
||||||
file_item = services.interfaceDbComponent.createFile(
|
|
||||||
name=file_name,
|
|
||||||
mimeType="application/json",
|
|
||||||
content=content_bytes
|
|
||||||
)
|
|
||||||
services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
|
|
||||||
|
|
||||||
# Lookup file info for ChatDocument
|
|
||||||
file_info = services.workflow.getFileInfo(file_item.id)
|
|
||||||
doc = ChatDocument(
|
|
||||||
messageId="", # set after message creation
|
|
||||||
fileId=file_item.id,
|
|
||||||
fileName=file_info.get("fileName", file_name) if file_info else file_name,
|
|
||||||
fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
|
|
||||||
mimeType=file_info.get("mimeType", "application/json") if file_info else "application/json"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create message referencing the file - include document in initial call
|
|
||||||
messageData = {
|
|
||||||
"workflowId": workflow.id,
|
|
||||||
"role": "assistant",
|
|
||||||
"message": "Raw extraction data saved",
|
|
||||||
"status": "data",
|
|
||||||
"sequenceNr": len(getattr(workflow, 'messages', []) or []) + 1,
|
|
||||||
"publishedAt": services.utils.timestampGetUtc(),
|
|
||||||
"documentsLabel": label,
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Store message with document included from the start
|
|
||||||
services.workflow.storeMessageWithDocuments(services.workflow.workflow, messageData, [doc])
|
|
||||||
except Exception:
|
|
||||||
# Non-fatal; ignore if storage or chat creation fails
|
|
||||||
return
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,165 +0,0 @@
|
||||||
"""
|
|
||||||
Shared utilities for AI services to eliminate code duplication.
|
|
||||||
|
|
||||||
This module contains common functions used across multiple AI service modules
|
|
||||||
to maintain DRY principles and ensure consistency.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, List, Optional, Union
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def buildPromptWithPlaceholders(prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
|
|
||||||
"""
|
|
||||||
Build full prompt by replacing placeholders with their content.
|
|
||||||
Uses the new {{KEY:placeholder}} format.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The base prompt template
|
|
||||||
placeholders: Dictionary of placeholder key-value pairs
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Prompt with placeholders replaced
|
|
||||||
"""
|
|
||||||
if not placeholders:
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
full_prompt = prompt
|
|
||||||
for placeholder, content in placeholders.items():
|
|
||||||
# Skip if content is None or empty
|
|
||||||
if content is None:
|
|
||||||
continue
|
|
||||||
# Replace {{KEY:placeholder}}
|
|
||||||
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
|
|
||||||
|
|
||||||
return full_prompt
|
|
||||||
|
|
||||||
|
|
||||||
def sanitizePromptContent(content: str, contentType: str = "text") -> str:
|
|
||||||
"""
|
|
||||||
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
|
|
||||||
|
|
||||||
This is the single source of truth for all prompt sanitization across the system.
|
|
||||||
Replaces all scattered sanitization functions with a unified approach.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: The content to sanitize
|
|
||||||
contentType: Type of content ("text", "userinput", "json", "document")
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Safely sanitized content ready for AI prompt insertion
|
|
||||||
"""
|
|
||||||
if not content:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Convert to string if not already
|
|
||||||
content_str = str(content)
|
|
||||||
|
|
||||||
# Remove null bytes and control characters (except newlines and tabs)
|
|
||||||
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
|
|
||||||
|
|
||||||
# Handle different content types with appropriate sanitization
|
|
||||||
if contentType == "userinput":
|
|
||||||
# Extra security for user-controlled content
|
|
||||||
# Escape curly braces to prevent placeholder injection
|
|
||||||
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
|
|
||||||
# Escape quotes and wrap in single quotes
|
|
||||||
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
|
|
||||||
return f"'{sanitized}'"
|
|
||||||
|
|
||||||
elif contentType == "json":
|
|
||||||
# For JSON content, escape quotes and backslashes
|
|
||||||
sanitized = sanitized.replace('\\', '\\\\')
|
|
||||||
sanitized = sanitized.replace('"', '\\"')
|
|
||||||
sanitized = sanitized.replace('\n', '\\n')
|
|
||||||
sanitized = sanitized.replace('\r', '\\r')
|
|
||||||
sanitized = sanitized.replace('\t', '\\t')
|
|
||||||
|
|
||||||
elif contentType == "document":
|
|
||||||
# For document content, escape special characters
|
|
||||||
sanitized = sanitized.replace('\\', '\\\\')
|
|
||||||
sanitized = sanitized.replace('"', '\\"')
|
|
||||||
sanitized = sanitized.replace("'", "\\'")
|
|
||||||
sanitized = sanitized.replace('\n', '\\n')
|
|
||||||
sanitized = sanitized.replace('\r', '\\r')
|
|
||||||
sanitized = sanitized.replace('\t', '\\t')
|
|
||||||
|
|
||||||
else: # contentType == "text" or default
|
|
||||||
# Basic text sanitization
|
|
||||||
sanitized = sanitized.replace('\\', '\\\\')
|
|
||||||
sanitized = sanitized.replace('"', '\\"')
|
|
||||||
sanitized = sanitized.replace("'", "\\'")
|
|
||||||
sanitized = sanitized.replace('\n', '\\n')
|
|
||||||
sanitized = sanitized.replace('\r', '\\r')
|
|
||||||
sanitized = sanitized.replace('\t', '\\t')
|
|
||||||
|
|
||||||
return sanitized
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error sanitizing prompt content: {str(e)}")
|
|
||||||
# Return a safe fallback
|
|
||||||
return "[ERROR: Content could not be safely sanitized]"
|
|
||||||
|
|
||||||
|
|
||||||
def extractTextFromContentParts(extracted_content) -> str:
|
|
||||||
"""
|
|
||||||
Extract text content from ExtractionService ContentPart objects.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
extracted_content: ContentExtracted object with parts
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Concatenated text content from all text/table/structure parts
|
|
||||||
"""
|
|
||||||
if not extracted_content or not hasattr(extracted_content, 'parts'):
|
|
||||||
return ""
|
|
||||||
|
|
||||||
text_parts = []
|
|
||||||
for part in extracted_content.parts:
|
|
||||||
if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
|
|
||||||
if hasattr(part, 'data') and part.data:
|
|
||||||
text_parts.append(part.data)
|
|
||||||
|
|
||||||
return "\n\n".join(text_parts)
|
|
||||||
|
|
||||||
|
|
||||||
def reduceText(text: str, reduction_factor: float) -> str:
|
|
||||||
"""
|
|
||||||
Reduce text size by the specified factor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: Text to reduce
|
|
||||||
reduction_factor: Factor by which to reduce (0.0 to 1.0)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Reduced text with truncation indicator
|
|
||||||
"""
|
|
||||||
if reduction_factor >= 1.0:
|
|
||||||
return text
|
|
||||||
|
|
||||||
target_length = int(len(text) * reduction_factor)
|
|
||||||
return text[:target_length] + "... [reduced]"
|
|
||||||
|
|
||||||
|
|
||||||
def determineCallType(documents: Optional[List], operation_type: str) -> str:
|
|
||||||
"""
|
|
||||||
Determine call type based on documents and operation type.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
documents: List of ChatDocument objects
|
|
||||||
operation_type: Type of operation being performed
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Call type: "plan" or "text"
|
|
||||||
"""
|
|
||||||
has_documents = documents is not None and len(documents) > 0
|
|
||||||
is_planning_operation = operation_type == "plan"
|
|
||||||
|
|
||||||
if not has_documents and is_planning_operation:
|
|
||||||
return "plan"
|
|
||||||
else:
|
|
||||||
return "text"
|
|
||||||
|
|
@ -19,6 +19,16 @@ class ExtractionService:
|
||||||
self.services = services
|
self.services = services
|
||||||
self._extractorRegistry = ExtractorRegistry()
|
self._extractorRegistry = ExtractorRegistry()
|
||||||
self._chunkerRegistry = ChunkerRegistry()
|
self._chunkerRegistry = ChunkerRegistry()
|
||||||
|
# Ensure AI connectors are discovered so pricing models are available
|
||||||
|
try:
|
||||||
|
# If internal model is missing, trigger discovery and registration
|
||||||
|
if modelRegistry.getModel("internal-extractor") is None:
|
||||||
|
discovered = modelRegistry.discoverConnectors()
|
||||||
|
for connector in discovered:
|
||||||
|
modelRegistry.registerConnector(connector)
|
||||||
|
except Exception:
|
||||||
|
# Propagate actual errors during use; init should be fast and side-effect free otherwise
|
||||||
|
pass
|
||||||
|
|
||||||
def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]:
|
def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]:
|
||||||
"""
|
"""
|
||||||
|
|
@ -82,12 +92,12 @@ class ExtractionService:
|
||||||
p.metadata["documentMimeType"] = documentData["mimeType"]
|
p.metadata["documentMimeType"] = documentData["mimeType"]
|
||||||
|
|
||||||
# Log chunking information
|
# Log chunking information
|
||||||
chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)]
|
chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
|
||||||
if chunked_parts:
|
if chunkedParts:
|
||||||
logger.debug(f"=== CHUNKING RESULTS ===")
|
logger.debug(f"=== CHUNKING RESULTS ===")
|
||||||
logger.debug(f"Total parts: {len(ec.parts)}")
|
logger.debug(f"Total parts: {len(ec.parts)}")
|
||||||
logger.debug(f"Chunked parts: {len(chunked_parts)}")
|
logger.debug(f"Chunked parts: {len(chunkedParts)}")
|
||||||
for chunk in chunked_parts:
|
for chunk in chunkedParts:
|
||||||
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
|
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
|
||||||
else:
|
else:
|
||||||
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
|
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
|
||||||
|
|
@ -101,8 +111,11 @@ class ExtractionService:
|
||||||
# Emit stats for extraction operation
|
# Emit stats for extraction operation
|
||||||
|
|
||||||
# Use internal extraction model for pricing
|
# Use internal extraction model for pricing
|
||||||
modelName = "internal_extraction"
|
modelName = "internal-extractor"
|
||||||
model = modelRegistry.getModel(modelName)
|
model = modelRegistry.getModel(modelName)
|
||||||
|
# Hard fail if model is missing; caller must ensure connectors are registered
|
||||||
|
if model is None or model.calculatePriceUsd is None:
|
||||||
|
raise RuntimeError(f"Pricing model not available: {modelName}")
|
||||||
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
|
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
|
||||||
|
|
||||||
# Create AiCallResponse with real calculation
|
# Create AiCallResponse with real calculation
|
||||||
|
|
|
||||||
|
|
@ -20,13 +20,13 @@ class IntelligentTokenAwareMerger:
|
||||||
4. Minimize total number of AI calls
|
4. Minimize total number of AI calls
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, model_capabilities: Dict[str, Any]):
|
def __init__(self, modelCapabilities: Dict[str, Any]):
|
||||||
self.max_tokens = model_capabilities.get("maxTokens", 4000)
|
self.maxTokens = modelCapabilities.get("maxTokens", 4000)
|
||||||
self.safety_margin = model_capabilities.get("safetyMargin", 0.1)
|
self.safetyMargin = modelCapabilities.get("safetyMargin", 0.1)
|
||||||
self.effective_max_tokens = int(self.max_tokens * (1 - self.safety_margin))
|
self.effectiveMaxTokens = int(self.maxTokens * (1 - self.safetyMargin))
|
||||||
self.chars_per_token = model_capabilities.get("charsPerToken", 4) # Rough estimation
|
self.charsPerToken = modelCapabilities.get("charsPerToken", 4) # Rough estimation
|
||||||
|
|
||||||
def merge_chunks_intelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
|
def mergeChunksIntelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
|
||||||
"""
|
"""
|
||||||
Merge chunks intelligently based on token limits.
|
Merge chunks intelligently based on token limits.
|
||||||
|
|
||||||
|
|
@ -40,125 +40,125 @@ class IntelligentTokenAwareMerger:
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, max_tokens={self.effective_max_tokens}")
|
logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, maxTokens={self.effectiveMaxTokens}")
|
||||||
|
|
||||||
# Calculate tokens for prompt
|
# Calculate tokens for prompt
|
||||||
prompt_tokens = self._estimate_tokens(prompt)
|
promptTokens = self._estimateTokens(prompt)
|
||||||
available_tokens = self.effective_max_tokens - prompt_tokens
|
availableTokens = self.effectiveMaxTokens - promptTokens
|
||||||
|
|
||||||
logger.info(f"📊 Prompt tokens: {prompt_tokens}, Available for content: {available_tokens}")
|
logger.info(f"📊 Prompt tokens: {promptTokens}, Available for content: {availableTokens}")
|
||||||
|
|
||||||
# Group chunks by document and type for semantic coherence
|
# Group chunks by document and type for semantic coherence
|
||||||
grouped_chunks = self._group_chunks_by_document_and_type(chunks)
|
groupedChunks = self._groupChunksByDocumentAndType(chunks)
|
||||||
|
|
||||||
merged_parts = []
|
mergedParts = []
|
||||||
|
|
||||||
for group_key, group_chunks in grouped_chunks.items():
|
for groupKey, groupChunks in groupedChunks.items():
|
||||||
logger.info(f"📁 Processing group: {group_key} ({len(group_chunks)} chunks)")
|
logger.info(f"📁 Processing group: {groupKey} ({len(groupChunks)} chunks)")
|
||||||
|
|
||||||
# Merge chunks within this group optimally
|
# Merge chunks within this group optimally
|
||||||
group_merged = self._merge_group_optimally(group_chunks, available_tokens)
|
groupMerged = self._mergeGroupOptimally(groupChunks, availableTokens)
|
||||||
merged_parts.extend(group_merged)
|
mergedParts.extend(groupMerged)
|
||||||
|
|
||||||
logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(merged_parts)} parts")
|
logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(mergedParts)} parts")
|
||||||
return merged_parts
|
return mergedParts
|
||||||
|
|
||||||
def _group_chunks_by_document_and_type(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
|
def _groupChunksByDocumentAndType(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
|
||||||
"""Group chunks by document and type for semantic coherence."""
|
"""Group chunks by document and type for semantic coherence."""
|
||||||
groups = {}
|
groups = {}
|
||||||
|
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
# Create group key: document_id + type_group
|
# Create group key: document_id + type_group
|
||||||
doc_id = chunk.metadata.get("documentId", "unknown")
|
docId = chunk.metadata.get("documentId", "unknown")
|
||||||
type_group = chunk.typeGroup
|
typeGroup = chunk.typeGroup
|
||||||
group_key = f"{doc_id}_{type_group}"
|
groupKey = f"{docId}_{typeGroup}"
|
||||||
|
|
||||||
if group_key not in groups:
|
if groupKey not in groups:
|
||||||
groups[group_key] = []
|
groups[groupKey] = []
|
||||||
groups[group_key].append(chunk)
|
groups[groupKey].append(chunk)
|
||||||
|
|
||||||
return groups
|
return groups
|
||||||
|
|
||||||
def _merge_group_optimally(self, chunks: List[ContentPart], available_tokens: int) -> List[ContentPart]:
|
def _mergeGroupOptimally(self, chunks: List[ContentPart], availableTokens: int) -> List[ContentPart]:
|
||||||
"""Merge chunks within a group optimally to minimize AI calls."""
|
"""Merge chunks within a group optimally to minimize AI calls."""
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Sort chunks by size (smallest first for better packing)
|
# Sort chunks by size (smallest first for better packing)
|
||||||
sorted_chunks = sorted(chunks, key=lambda c: self._estimate_tokens(c.data))
|
sortedChunks = sorted(chunks, key=lambda c: self._estimateTokens(c.data))
|
||||||
|
|
||||||
merged_parts = []
|
mergedParts = []
|
||||||
current_group = []
|
currentGroup = []
|
||||||
current_tokens = 0
|
currentTokens = 0
|
||||||
|
|
||||||
for chunk in sorted_chunks:
|
for chunk in sortedChunks:
|
||||||
chunk_tokens = self._estimate_tokens(chunk.data)
|
chunkTokens = self._estimateTokens(chunk.data)
|
||||||
|
|
||||||
# Special case: If single chunk is already at max size, process it alone
|
# Special case: If single chunk is already at max size, process it alone
|
||||||
if chunk_tokens >= available_tokens * 0.9: # 90% of available tokens
|
if chunkTokens >= availableTokens * 0.9: # 90% of available tokens
|
||||||
# Finalize current group if it exists
|
# Finalize current group if it exists
|
||||||
if current_group:
|
if currentGroup:
|
||||||
merged_part = self._create_merged_part(current_group, current_tokens)
|
mergedPart = self._createMergedPart(currentGroup, currentTokens)
|
||||||
merged_parts.append(merged_part)
|
mergedParts.append(mergedPart)
|
||||||
current_group = []
|
currentGroup = []
|
||||||
current_tokens = 0
|
currentTokens = 0
|
||||||
|
|
||||||
# Process large chunk individually
|
# Process large chunk individually
|
||||||
merged_parts.append(chunk)
|
mergedParts.append(chunk)
|
||||||
logger.debug(f"🔍 Large chunk processed individually: {chunk_tokens} tokens")
|
logger.debug(f"🔍 Large chunk processed individually: {chunkTokens} tokens")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# If adding this chunk would exceed limit, finalize current group
|
# If adding this chunk would exceed limit, finalize current group
|
||||||
if current_tokens + chunk_tokens > available_tokens and current_group:
|
if currentTokens + chunkTokens > availableTokens and currentGroup:
|
||||||
merged_part = self._create_merged_part(current_group, current_tokens)
|
mergedPart = self._createMergedPart(currentGroup, currentTokens)
|
||||||
merged_parts.append(merged_part)
|
mergedParts.append(mergedPart)
|
||||||
current_group = [chunk]
|
currentGroup = [chunk]
|
||||||
current_tokens = chunk_tokens
|
currentTokens = chunkTokens
|
||||||
else:
|
else:
|
||||||
current_group.append(chunk)
|
currentGroup.append(chunk)
|
||||||
current_tokens += chunk_tokens
|
currentTokens += chunkTokens
|
||||||
|
|
||||||
# Finalize remaining group
|
# Finalize remaining group
|
||||||
if current_group:
|
if currentGroup:
|
||||||
merged_part = self._create_merged_part(current_group, current_tokens)
|
mergedPart = self._createMergedPart(currentGroup, currentTokens)
|
||||||
merged_parts.append(merged_part)
|
mergedParts.append(mergedPart)
|
||||||
|
|
||||||
logger.info(f"📦 Group merged: {len(chunks)} → {len(merged_parts)} parts")
|
logger.info(f"📦 Group merged: {len(chunks)} → {len(mergedParts)} parts")
|
||||||
return merged_parts
|
return mergedParts
|
||||||
|
|
||||||
def _create_merged_part(self, chunks: List[ContentPart], total_tokens: int) -> ContentPart:
|
def _createMergedPart(self, chunks: List[ContentPart], totalTokens: int) -> ContentPart:
|
||||||
"""Create a merged ContentPart from multiple chunks."""
|
"""Create a merged ContentPart from multiple chunks."""
|
||||||
if len(chunks) == 1:
|
if len(chunks) == 1:
|
||||||
return chunks[0] # No need to merge single chunk
|
return chunks[0] # No need to merge single chunk
|
||||||
|
|
||||||
# Combine data with semantic separators
|
# Combine data with semantic separators
|
||||||
combined_data = self._combine_chunk_data(chunks)
|
combinedData = self._combineChunkData(chunks)
|
||||||
|
|
||||||
# Use metadata from first chunk as base
|
# Use metadata from first chunk as base
|
||||||
base_chunk = chunks[0]
|
baseChunk = chunks[0]
|
||||||
merged_metadata = base_chunk.metadata.copy()
|
mergedMetadata = baseChunk.metadata.copy()
|
||||||
merged_metadata.update({
|
mergedMetadata.update({
|
||||||
"merged": True,
|
"merged": True,
|
||||||
"originalChunkCount": len(chunks),
|
"originalChunkCount": len(chunks),
|
||||||
"totalTokens": total_tokens,
|
"totalTokens": totalTokens,
|
||||||
"originalChunkIds": [c.id for c in chunks],
|
"originalChunkIds": [c.id for c in chunks],
|
||||||
"size": len(combined_data.encode('utf-8'))
|
"size": len(combinedData.encode('utf-8'))
|
||||||
})
|
})
|
||||||
|
|
||||||
merged_part = ContentPart(
|
mergedPart = ContentPart(
|
||||||
id=makeId(),
|
id=makeId(),
|
||||||
parentId=base_chunk.parentId,
|
parentId=baseChunk.parentId,
|
||||||
label=f"merged_{len(chunks)}_chunks",
|
label=f"merged_{len(chunks)}_chunks",
|
||||||
typeGroup=base_chunk.typeGroup,
|
typeGroup=baseChunk.typeGroup,
|
||||||
mimeType=base_chunk.mimeType,
|
mimeType=baseChunk.mimeType,
|
||||||
data=combined_data,
|
data=combinedData,
|
||||||
metadata=merged_metadata
|
metadata=mergedMetadata
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {total_tokens} tokens")
|
logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {totalTokens} tokens")
|
||||||
return merged_part
|
return mergedPart
|
||||||
|
|
||||||
def _combine_chunk_data(self, chunks: List[ContentPart]) -> str:
|
def _combineChunkData(self, chunks: List[ContentPart]) -> str:
|
||||||
"""Combine chunk data with appropriate separators."""
|
"""Combine chunk data with appropriate separators."""
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return ""
|
return ""
|
||||||
|
|
@ -173,37 +173,37 @@ class IntelligentTokenAwareMerger:
|
||||||
|
|
||||||
return separator.join([chunk.data for chunk in chunks])
|
return separator.join([chunk.data for chunk in chunks])
|
||||||
|
|
||||||
def _estimate_tokens(self, text: str) -> int:
|
def _estimateTokens(self, text: str) -> int:
|
||||||
"""Estimate token count for text."""
|
"""Estimate token count for text."""
|
||||||
if not text:
|
if not text:
|
||||||
return 0
|
return 0
|
||||||
return len(text) // self.chars_per_token
|
return len(text) // self.charsPerToken
|
||||||
|
|
||||||
def calculate_optimization_stats(self, original_chunks: List[ContentPart], merged_parts: List[ContentPart]) -> Dict[str, Any]:
|
def calculateOptimizationStats(self, originalChunks: List[ContentPart], mergedParts: List[ContentPart]) -> Dict[str, Any]:
|
||||||
"""Calculate optimization statistics with detailed analysis."""
|
"""Calculate optimization statistics with detailed analysis."""
|
||||||
original_calls = len(original_chunks)
|
originalCalls = len(originalChunks)
|
||||||
optimized_calls = len(merged_parts)
|
optimizedCalls = len(mergedParts)
|
||||||
reduction_percent = ((original_calls - optimized_calls) / original_calls * 100) if original_calls > 0 else 0
|
reductionPercent = ((originalCalls - optimizedCalls) / originalCalls * 100) if originalCalls > 0 else 0
|
||||||
|
|
||||||
# Analyze chunk sizes
|
# Analyze chunk sizes
|
||||||
large_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) >= self.effective_max_tokens * 0.9]
|
largeChunks = [c for c in originalChunks if self._estimateTokens(c.data) >= self.effectiveMaxTokens * 0.9]
|
||||||
small_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) < self.effective_max_tokens * 0.9]
|
smallChunks = [c for c in originalChunks if self._estimateTokens(c.data) < self.effectiveMaxTokens * 0.9]
|
||||||
|
|
||||||
# Calculate theoretical maximum optimization (if all small chunks could be merged)
|
# Calculate theoretical maximum optimization (if all small chunks could be merged)
|
||||||
theoretical_min_calls = len(large_chunks) + max(1, len(small_chunks) // 3) # Assume 3 small chunks per call
|
theoreticalMinCalls = len(largeChunks) + max(1, len(smallChunks) // 3) # Assume 3 small chunks per call
|
||||||
theoretical_reduction = ((original_calls - theoretical_min_calls) / original_calls * 100) if original_calls > 0 else 0
|
theoreticalReduction = ((originalCalls - theoreticalMinCalls) / originalCalls * 100) if originalCalls > 0 else 0
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"original_ai_calls": original_calls,
|
"original_ai_calls": originalCalls,
|
||||||
"optimized_ai_calls": optimized_calls,
|
"optimized_ai_calls": optimizedCalls,
|
||||||
"reduction_percent": round(reduction_percent, 1),
|
"reduction_percent": round(reductionPercent, 1),
|
||||||
"cost_savings": f"{reduction_percent:.1f}%",
|
"cost_savings": f"{reductionPercent:.1f}%",
|
||||||
"efficiency_gain": f"{original_calls / optimized_calls:.1f}x" if optimized_calls > 0 else "∞",
|
"efficiency_gain": f"{originalCalls / optimizedCalls:.1f}x" if optimizedCalls > 0 else "∞",
|
||||||
"analysis": {
|
"analysis": {
|
||||||
"large_chunks": len(large_chunks),
|
"large_chunks": len(largeChunks),
|
||||||
"small_chunks": len(small_chunks),
|
"small_chunks": len(smallChunks),
|
||||||
"theoretical_min_calls": theoretical_min_calls,
|
"theoretical_min_calls": theoreticalMinCalls,
|
||||||
"theoretical_reduction": round(theoretical_reduction, 1),
|
"theoretical_reduction": round(theoreticalReduction, 1),
|
||||||
"optimization_potential": "high" if reduction_percent > 50 else "moderate" if reduction_percent > 20 else "low"
|
"optimization_potential": "high" if reductionPercent > 50 else "moderate" if reductionPercent > 20 else "low"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -96,10 +96,10 @@ def _applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[Con
|
||||||
subMerger = IntelligentTokenAwareMerger(model_capabilities)
|
subMerger = IntelligentTokenAwareMerger(model_capabilities)
|
||||||
|
|
||||||
# Use intelligent merging for all parts
|
# Use intelligent merging for all parts
|
||||||
merged = subMerger.merge_chunks_intelligently(parts, strategy.prompt or "")
|
merged = subMerger.mergeChunksIntelligently(parts, strategy.prompt or "")
|
||||||
|
|
||||||
# Calculate and log optimization stats
|
# Calculate and log optimization stats
|
||||||
stats = subMerger.calculate_optimization_stats(parts, merged)
|
stats = subMerger.calculateOptimizationStats(parts, merged)
|
||||||
logger.info(f"🧠 Intelligent merging stats: {stats}")
|
logger.info(f"🧠 Intelligent merging stats: {stats}")
|
||||||
logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
|
logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -101,7 +101,7 @@ async def buildExtractionPrompt(
|
||||||
|
|
||||||
# Build base prompt
|
# Build base prompt
|
||||||
adaptive_prompt = f"""
|
adaptive_prompt = f"""
|
||||||
{services.ai.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
|
{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
|
||||||
|
|
||||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,13 +37,13 @@ class GenerationService:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Process each document from the AI action result
|
# Process each document from the AI action result
|
||||||
processed_documents = []
|
processedDocuments = []
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
processed_doc = self.processSingleDocument(doc, action)
|
processedDoc = self.processSingleDocument(doc, action)
|
||||||
if processed_doc:
|
if processedDoc:
|
||||||
processed_documents.append(processed_doc)
|
processedDocuments.append(processedDoc)
|
||||||
|
|
||||||
return processed_documents
|
return processedDocuments
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing action result documents: {str(e)}")
|
logger.error(f"Error processing action result documents: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
@ -77,20 +77,20 @@ class GenerationService:
|
||||||
try:
|
try:
|
||||||
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
||||||
|
|
||||||
created_documents = []
|
createdDocuments = []
|
||||||
for i, doc_data in enumerate(processed_docs):
|
for i, doc_data in enumerate(processed_docs):
|
||||||
try:
|
try:
|
||||||
document_name = doc_data['fileName']
|
documentName = doc_data['fileName']
|
||||||
document_data = doc_data['content']
|
documentData = doc_data['content']
|
||||||
mime_type = doc_data['mimeType']
|
mimeType = doc_data['mimeType']
|
||||||
|
|
||||||
# Convert document data to string content
|
# Convert document data to string content
|
||||||
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
|
content = convertDocumentDataToString(documentData, getFileExtension(documentName))
|
||||||
|
|
||||||
# Skip empty or minimal content
|
# Skip empty or minimal content
|
||||||
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
minimalContentPatterns = ['{}', '[]', 'null', '""', "''"]
|
||||||
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
if not content or content.strip() == "" or content.strip() in minimalContentPatterns:
|
||||||
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
logger.warning(f"Empty or minimal content for document {documentName}, skipping")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Normalize file extension based on mime type if missing or incorrect
|
# Normalize file extension based on mime type if missing or incorrect
|
||||||
|
|
@ -105,35 +105,35 @@ class GenerationService:
|
||||||
"text/plain": ".txt",
|
"text/plain": ".txt",
|
||||||
"application/json": ".json",
|
"application/json": ".json",
|
||||||
}
|
}
|
||||||
expected_ext = mime_to_ext.get(mime_type)
|
expectedExt = mime_to_ext.get(mimeType)
|
||||||
if expected_ext:
|
if expectedExt:
|
||||||
if not document_name.lower().endswith(expected_ext):
|
if not documentName.lower().endswith(expectedExt):
|
||||||
# Append/replace extension to match mime type
|
# Append/replace extension to match mime type
|
||||||
if "." in document_name:
|
if "." in documentName:
|
||||||
document_name = document_name.rsplit(".", 1)[0] + expected_ext
|
documentName = documentName.rsplit(".", 1)[0] + expectedExt
|
||||||
else:
|
else:
|
||||||
document_name = document_name + expected_ext
|
documentName = documentName + expectedExt
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
|
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
|
||||||
base64encoded = False
|
base64encoded = False
|
||||||
try:
|
try:
|
||||||
binary_mime_types = {
|
binaryMimeTypes = {
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
}
|
}
|
||||||
if isinstance(document_data, str) and mime_type in binary_mime_types:
|
if isinstance(documentData, str) and mimeType in binaryMimeTypes:
|
||||||
base64encoded = True
|
base64encoded = True
|
||||||
except Exception:
|
except Exception:
|
||||||
base64encoded = False
|
base64encoded = False
|
||||||
|
|
||||||
# Create document with file in one step using interfaces directly
|
# Create document with file in one step using interfaces directly
|
||||||
document = self._createDocument(
|
document = self._createDocument(
|
||||||
fileName=document_name,
|
fileName=documentName,
|
||||||
mimeType=mime_type,
|
mimeType=mimeType,
|
||||||
content=content,
|
content=content,
|
||||||
base64encoded=base64encoded,
|
base64encoded=base64encoded,
|
||||||
messageId=message_id
|
messageId=message_id
|
||||||
|
|
@ -141,14 +141,14 @@ class GenerationService:
|
||||||
if document:
|
if document:
|
||||||
# Set workflow context on the document if possible
|
# Set workflow context on the document if possible
|
||||||
self._setDocumentWorkflowContext(document, action, workflow)
|
self._setDocumentWorkflowContext(document, action, workflow)
|
||||||
created_documents.append(document)
|
createdDocuments.append(document)
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
logger.error(f"Failed to create ChatDocument object for {documentName}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
|
logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return created_documents
|
return createdDocuments
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error creating documents from action result: {str(e)}")
|
logger.error(f"Error creating documents from action result: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
@ -157,28 +157,28 @@ class GenerationService:
|
||||||
"""Set workflow context on a document for proper routing and labeling"""
|
"""Set workflow context on a document for proper routing and labeling"""
|
||||||
try:
|
try:
|
||||||
# Get current workflow context directly from workflow object
|
# Get current workflow context directly from workflow object
|
||||||
workflow_context = self._getWorkflowContext(workflow)
|
workflowContext = self._getWorkflowContext(workflow)
|
||||||
workflow_stats = self._getWorkflowStats(workflow)
|
workflowStats = self._getWorkflowStats(workflow)
|
||||||
|
|
||||||
current_round = workflow_context.get('currentRound', 0)
|
currentRound = workflowContext.get('currentRound', 0)
|
||||||
current_task = workflow_context.get('currentTask', 0)
|
currentTask = workflowContext.get('currentTask', 0)
|
||||||
current_action = workflow_context.get('currentAction', 0)
|
currentAction = workflowContext.get('currentAction', 0)
|
||||||
|
|
||||||
# Try to set workflow context attributes if they exist
|
# Try to set workflow context attributes if they exist
|
||||||
if hasattr(document, 'roundNumber'):
|
if hasattr(document, 'roundNumber'):
|
||||||
document.roundNumber = current_round
|
document.roundNumber = currentRound
|
||||||
if hasattr(document, 'taskNumber'):
|
if hasattr(document, 'taskNumber'):
|
||||||
document.taskNumber = current_task
|
document.taskNumber = currentTask
|
||||||
if hasattr(document, 'actionNumber'):
|
if hasattr(document, 'actionNumber'):
|
||||||
document.actionNumber = current_action
|
document.actionNumber = currentAction
|
||||||
if hasattr(document, 'actionId'):
|
if hasattr(document, 'actionId'):
|
||||||
document.actionId = action.id if hasattr(action, 'id') else None
|
document.actionId = action.id if hasattr(action, 'id') else None
|
||||||
|
|
||||||
# Set additional workflow metadata if available
|
# Set additional workflow metadata if available
|
||||||
if hasattr(document, 'workflowId'):
|
if hasattr(document, 'workflowId'):
|
||||||
document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
|
document.workflowId = workflowStats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
|
||||||
if hasattr(document, 'workflowStatus'):
|
if hasattr(document, 'workflowStatus'):
|
||||||
document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
|
document.workflowStatus = workflowStats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -355,17 +355,17 @@ class GenerationService:
|
||||||
def _getFormatRenderer(self, output_format: str):
|
def _getFormatRenderer(self, output_format: str):
|
||||||
"""Get the appropriate renderer for the specified format using auto-discovery."""
|
"""Get the appropriate renderer for the specified format using auto-discovery."""
|
||||||
try:
|
try:
|
||||||
from .renderers.registry import get_renderer
|
from .renderers.registry import getRenderer
|
||||||
renderer = get_renderer(output_format, services=self.services)
|
renderer = getRenderer(output_format, services=self.services)
|
||||||
|
|
||||||
if renderer:
|
if renderer:
|
||||||
return renderer
|
return renderer
|
||||||
|
|
||||||
# Fallback to text renderer if no specific renderer found
|
# Fallback to text renderer if no specific renderer found
|
||||||
logger.warning(f"No renderer found for format {output_format}, falling back to text")
|
logger.warning(f"No renderer found for format {output_format}, falling back to text")
|
||||||
fallback_renderer = get_renderer('text', services=self.services)
|
fallbackRenderer = getRenderer('text', services=self.services)
|
||||||
if fallback_renderer:
|
if fallbackRenderer:
|
||||||
return fallback_renderer
|
return fallbackRenderer
|
||||||
|
|
||||||
logger.error("Even text renderer fallback failed")
|
logger.error("Even text renderer fallback failed")
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ class RendererRegistry:
|
||||||
self._format_mappings: Dict[str, str] = {}
|
self._format_mappings: Dict[str, str] = {}
|
||||||
self._discovered = False
|
self._discovered = False
|
||||||
|
|
||||||
def discover_renderers(self) -> None:
|
def discoverRenderers(self) -> None:
|
||||||
"""Automatically discover and register all renderers by scanning files."""
|
"""Automatically discover and register all renderers by scanning files."""
|
||||||
if self._discovered:
|
if self._discovered:
|
||||||
return
|
return
|
||||||
|
|
@ -28,38 +28,38 @@ class RendererRegistry:
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Get the directory containing this registry file
|
# Get the directory containing this registry file
|
||||||
current_dir = Path(__file__).parent
|
currentDir = Path(__file__).parent
|
||||||
renderers_dir = current_dir
|
renderersDir = currentDir
|
||||||
|
|
||||||
# Get the package name dynamically
|
# Get the package name dynamically
|
||||||
package_name = __name__.rsplit('.', 1)[0]
|
packageName = __name__.rsplit('.', 1)[0]
|
||||||
|
|
||||||
# Scan all Python files in the renderers directory
|
# Scan all Python files in the renderers directory
|
||||||
for file_path in renderers_dir.glob("*.py"):
|
for filePath in renderersDir.glob("*.py"):
|
||||||
if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
|
if filePath.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Extract module name from filename
|
# Extract module name from filename
|
||||||
module_name = file_path.stem
|
moduleName = filePath.stem
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Import the module dynamically
|
# Import the module dynamically
|
||||||
full_module_name = f"{package_name}.{module_name}"
|
fullModuleName = f"{packageName}.{moduleName}"
|
||||||
module = importlib.import_module(full_module_name)
|
module = importlib.import_module(fullModuleName)
|
||||||
|
|
||||||
# Look for renderer classes in the module
|
# Look for renderer classes in the module
|
||||||
for attr_name in dir(module):
|
for attrName in dir(module):
|
||||||
attr = getattr(module, attr_name)
|
attr = getattr(module, attrName)
|
||||||
if (isinstance(attr, type) and
|
if (isinstance(attr, type) and
|
||||||
issubclass(attr, BaseRenderer) and
|
issubclass(attr, BaseRenderer) and
|
||||||
attr != BaseRenderer and
|
attr != BaseRenderer and
|
||||||
hasattr(attr, 'get_supported_formats')):
|
hasattr(attr, 'getSupportedFormats')):
|
||||||
|
|
||||||
# Register the renderer
|
# Register the renderer
|
||||||
self._register_renderer_class(attr)
|
self._registerRendererClass(attr)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not load renderer from {module_name}: {str(e)}")
|
logger.warning(f"Could not load renderer from {moduleName}: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self._discovered = True
|
self._discovered = True
|
||||||
|
|
@ -68,72 +68,72 @@ class RendererRegistry:
|
||||||
logger.error(f"Error during renderer discovery: {str(e)}")
|
logger.error(f"Error during renderer discovery: {str(e)}")
|
||||||
self._discovered = True # Mark as discovered to avoid repeated attempts
|
self._discovered = True # Mark as discovered to avoid repeated attempts
|
||||||
|
|
||||||
def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None:
|
def _registerRendererClass(self, rendererClass: Type[BaseRenderer]) -> None:
|
||||||
"""Register a renderer class with its supported formats."""
|
"""Register a renderer class with its supported formats."""
|
||||||
try:
|
try:
|
||||||
# Get supported formats from the renderer class
|
# Get supported formats from the renderer class
|
||||||
supported_formats = renderer_class.get_supported_formats()
|
supportedFormats = rendererClass.getSupportedFormats()
|
||||||
|
|
||||||
for format_name in supported_formats:
|
for formatName in supportedFormats:
|
||||||
# Register primary format
|
# Register primary format
|
||||||
self._renderers[format_name.lower()] = renderer_class
|
self._renderers[formatName.lower()] = rendererClass
|
||||||
|
|
||||||
# Register aliases if any
|
# Register aliases if any
|
||||||
if hasattr(renderer_class, 'get_format_aliases'):
|
if hasattr(rendererClass, 'getFormatAliases'):
|
||||||
aliases = renderer_class.get_format_aliases()
|
aliases = rendererClass.getFormatAliases()
|
||||||
for alias in aliases:
|
for alias in aliases:
|
||||||
self._format_mappings[alias.lower()] = format_name.lower()
|
self._format_mappings[alias.lower()] = formatName.lower()
|
||||||
|
|
||||||
logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}")
|
logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}")
|
logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}")
|
||||||
|
|
||||||
def get_renderer(self, output_format: str, services=None) -> Optional[BaseRenderer]:
|
def getRenderer(self, outputFormat: str, services=None) -> Optional[BaseRenderer]:
|
||||||
"""Get a renderer instance for the specified format."""
|
"""Get a renderer instance for the specified format."""
|
||||||
if not self._discovered:
|
if not self._discovered:
|
||||||
self.discover_renderers()
|
self.discoverRenderers()
|
||||||
|
|
||||||
# Normalize format name
|
# Normalize format name
|
||||||
format_name = output_format.lower().strip()
|
formatName = outputFormat.lower().strip()
|
||||||
|
|
||||||
# Check for aliases first
|
# Check for aliases first
|
||||||
if format_name in self._format_mappings:
|
if formatName in self._format_mappings:
|
||||||
format_name = self._format_mappings[format_name]
|
formatName = self._format_mappings[formatName]
|
||||||
|
|
||||||
# Get renderer class
|
# Get renderer class
|
||||||
renderer_class = self._renderers.get(format_name)
|
rendererClass = self._renderers.get(formatName)
|
||||||
|
|
||||||
if renderer_class:
|
if rendererClass:
|
||||||
try:
|
try:
|
||||||
return renderer_class(services=services)
|
return rendererClass(services=services)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error creating renderer instance for {format_name}: {str(e)}")
|
logger.error(f"Error creating renderer instance for {formatName}: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
logger.warning(f"No renderer found for format: {output_format}")
|
logger.warning(f"No renderer found for format: {outputFormat}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_supported_formats(self) -> List[str]:
|
def getSupportedFormats(self) -> List[str]:
|
||||||
"""Get list of all supported formats."""
|
"""Get list of all supported formats."""
|
||||||
if not self._discovered:
|
if not self._discovered:
|
||||||
self.discover_renderers()
|
self.discoverRenderers()
|
||||||
|
|
||||||
formats = list(self._renderers.keys())
|
formats = list(self._renderers.keys())
|
||||||
formats.extend(self._format_mappings.keys())
|
formats.extend(self._format_mappings.keys())
|
||||||
return sorted(set(formats))
|
return sorted(set(formats))
|
||||||
|
|
||||||
def get_renderer_info(self) -> Dict[str, Dict[str, str]]:
|
def getRendererInfo(self) -> Dict[str, Dict[str, str]]:
|
||||||
"""Get information about all registered renderers."""
|
"""Get information about all registered renderers."""
|
||||||
if not self._discovered:
|
if not self._discovered:
|
||||||
self.discover_renderers()
|
self.discoverRenderers()
|
||||||
|
|
||||||
info = {}
|
info = {}
|
||||||
for format_name, renderer_class in self._renderers.items():
|
for formatName, rendererClass in self._renderers.items():
|
||||||
info[format_name] = {
|
info[formatName] = {
|
||||||
'class_name': renderer_class.__name__,
|
'class_name': rendererClass.__name__,
|
||||||
'module': renderer_class.__module__,
|
'module': rendererClass.__module__,
|
||||||
'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description'
|
'description': getattr(rendererClass, '__doc__', 'No description').strip().split('\n')[0] if rendererClass.__doc__ else 'No description'
|
||||||
}
|
}
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
@ -141,14 +141,14 @@ class RendererRegistry:
|
||||||
# Global registry instance
|
# Global registry instance
|
||||||
_registry = RendererRegistry()
|
_registry = RendererRegistry()
|
||||||
|
|
||||||
def get_renderer(output_format: str, services=None) -> Optional[BaseRenderer]:
|
def getRenderer(outputFormat: str, services=None) -> Optional[BaseRenderer]:
|
||||||
"""Get a renderer instance for the specified format."""
|
"""Get a renderer instance for the specified format."""
|
||||||
return _registry.get_renderer(output_format, services)
|
return _registry.getRenderer(outputFormat, services)
|
||||||
|
|
||||||
def get_supported_formats() -> List[str]:
|
def getSupportedFormats() -> List[str]:
|
||||||
"""Get list of all supported formats."""
|
"""Get list of all supported formats."""
|
||||||
return _registry.get_supported_formats()
|
return _registry.getSupportedFormats()
|
||||||
|
|
||||||
def get_renderer_info() -> Dict[str, Dict[str, str]]:
|
def getRendererInfo() -> Dict[str, Dict[str, str]]:
|
||||||
"""Get information about all registered renderers."""
|
"""Get information about all registered renderers."""
|
||||||
return _registry.get_renderer_info()
|
return _registry.getRendererInfo()
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ Base renderer class for all format renderers.
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Dict, Any, Tuple, List
|
from typing import Dict, Any, Tuple, List
|
||||||
|
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
@ -23,7 +24,7 @@ class BaseRenderer(ABC):
|
||||||
self.services = services # Add services attribute
|
self.services = services # Add services attribute
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Return list of supported format names for this renderer.
|
Return list of supported format names for this renderer.
|
||||||
Override this method in subclasses to specify supported formats.
|
Override this method in subclasses to specify supported formats.
|
||||||
|
|
@ -31,7 +32,7 @@ class BaseRenderer(ABC):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Return list of format aliases for this renderer.
|
Return list of format aliases for this renderer.
|
||||||
Override this method in subclasses to specify format aliases.
|
Override this method in subclasses to specify format aliases.
|
||||||
|
|
@ -39,7 +40,7 @@ class BaseRenderer(ABC):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""
|
"""
|
||||||
Return priority for this renderer (higher number = higher priority).
|
Return priority for this renderer (higher number = higher priority).
|
||||||
Used when multiple renderers support the same format.
|
Used when multiple renderers support the same format.
|
||||||
|
|
@ -47,43 +48,43 @@ class BaseRenderer(ABC):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Render extracted JSON content to the target format.
|
Render extracted JSON content to the target format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
extracted_content: Structured JSON content with sections and metadata
|
extractedContent: Structured JSON content with sections and metadata
|
||||||
title: Report title
|
title: Report title
|
||||||
user_prompt: Original user prompt for context
|
userPrompt: Original user prompt for context
|
||||||
ai_service: AI service instance for additional processing
|
aiService: AI service instance for additional processing
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: (rendered_content, mime_type)
|
tuple: (renderedContent, mimeType)
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||||
"""Extract sections from report data."""
|
"""Extract sections from report data."""
|
||||||
return report_data.get('sections', [])
|
return reportData.get('sections', [])
|
||||||
|
|
||||||
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
|
def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Extract metadata from report data."""
|
"""Extract metadata from report data."""
|
||||||
return report_data.get('metadata', {})
|
return reportData.get('metadata', {})
|
||||||
|
|
||||||
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
|
def _getTitle(self, reportData: Dict[str, Any], fallbackTitle: str) -> str:
|
||||||
"""Get title from report data or use fallback."""
|
"""Get title from report data or use fallback."""
|
||||||
metadata = report_data.get('metadata', {})
|
metadata = reportData.get('metadata', {})
|
||||||
return metadata.get('title', fallback_title)
|
return metadata.get('title', fallbackTitle)
|
||||||
|
|
||||||
def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool:
|
def _validateJsonStructure(self, jsonContent: Dict[str, Any]) -> bool:
|
||||||
"""Validate that JSON content has the expected structure."""
|
"""Validate that JSON content has the expected structure."""
|
||||||
if not isinstance(json_content, dict):
|
if not isinstance(jsonContent, dict):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if "sections" not in json_content:
|
if "sections" not in jsonContent:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
sections = json_content.get("sections", [])
|
sections = jsonContent.get("sections", [])
|
||||||
if not isinstance(sections, list):
|
if not isinstance(sections, list):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -96,14 +97,14 @@ class BaseRenderer(ABC):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _get_section_type(self, section: Dict[str, Any]) -> str:
|
def _getSectionType(self, section: Dict[str, Any]) -> str:
|
||||||
"""Get the type of a section; default to 'paragraph' for non-dict inputs."""
|
"""Get the type of a section; default to 'paragraph' for non-dict inputs."""
|
||||||
if isinstance(section, dict):
|
if isinstance(section, dict):
|
||||||
return section.get("content_type", "paragraph")
|
return section.get("content_type", "paragraph")
|
||||||
# If section is a list or any other type, treat as paragraph elements
|
# If section is a list or any other type, treat as paragraph elements
|
||||||
return "paragraph"
|
return "paragraph"
|
||||||
|
|
||||||
def _get_section_data(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
|
def _getSectionData(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||||
"""Get the elements of a section; if a list is provided directly, return it."""
|
"""Get the elements of a section; if a list is provided directly, return it."""
|
||||||
if isinstance(section, dict):
|
if isinstance(section, dict):
|
||||||
return section.get("elements", [])
|
return section.get("elements", [])
|
||||||
|
|
@ -111,21 +112,30 @@ class BaseRenderer(ABC):
|
||||||
return section
|
return section
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _get_section_id(self, section: Dict[str, Any]) -> str:
|
def _getSectionId(self, section: Dict[str, Any]) -> str:
|
||||||
"""Get the ID of a section (if available)."""
|
"""Get the ID of a section (if available)."""
|
||||||
if isinstance(section, dict):
|
if isinstance(section, dict):
|
||||||
return section.get("id", "unknown")
|
return section.get("id", "unknown")
|
||||||
return "unknown"
|
return "unknown"
|
||||||
|
|
||||||
def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
|
def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
|
||||||
"""Extract table headers and rows from section data."""
|
"""Extract table headers and rows from section data."""
|
||||||
headers = section_data.get("headers", [])
|
# Normalize when elements array was passed in
|
||||||
rows = section_data.get("rows", [])
|
if isinstance(sectionData, list) and sectionData:
|
||||||
|
candidate = sectionData[0]
|
||||||
|
sectionData = candidate if isinstance(candidate, dict) else {}
|
||||||
|
headers = sectionData.get("headers", [])
|
||||||
|
rows = sectionData.get("rows", [])
|
||||||
return headers, rows
|
return headers, rows
|
||||||
|
|
||||||
def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]:
|
def _extractBulletListItems(self, sectionData: Dict[str, Any]) -> List[str]:
|
||||||
"""Extract bullet list items from section data."""
|
"""Extract bullet list items from section data."""
|
||||||
items = section_data.get("items", [])
|
# Normalize when elements array or raw list was passed in
|
||||||
|
if isinstance(sectionData, list):
|
||||||
|
# Already a list of items (strings or dicts)
|
||||||
|
items = sectionData
|
||||||
|
else:
|
||||||
|
items = sectionData.get("items", [])
|
||||||
result = []
|
result = []
|
||||||
for item in items:
|
for item in items:
|
||||||
if isinstance(item, str):
|
if isinstance(item, str):
|
||||||
|
|
@ -134,29 +144,47 @@ class BaseRenderer(ABC):
|
||||||
result.append(item["text"])
|
result.append(item["text"])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]:
|
def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]:
|
||||||
"""Extract heading level and text from section data."""
|
"""Extract heading level and text from section data."""
|
||||||
level = section_data.get("level", 1)
|
# Normalize when elements array was passed in
|
||||||
text = section_data.get("text", "")
|
if isinstance(sectionData, list) and sectionData:
|
||||||
|
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
|
||||||
|
level = sectionData.get("level", 1)
|
||||||
|
text = sectionData.get("text", "")
|
||||||
return level, text
|
return level, text
|
||||||
|
|
||||||
def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str:
|
def _extractParagraphText(self, sectionData: Dict[str, Any]) -> str:
|
||||||
"""Extract paragraph text from section data."""
|
"""Extract paragraph text from section data."""
|
||||||
return section_data.get("text", "")
|
if isinstance(sectionData, list):
|
||||||
|
# Join multiple paragraph elements if provided as a list
|
||||||
|
texts = []
|
||||||
|
for el in sectionData:
|
||||||
|
if isinstance(el, dict) and "text" in el:
|
||||||
|
texts.append(el["text"])
|
||||||
|
elif isinstance(el, str):
|
||||||
|
texts.append(el)
|
||||||
|
return "\n".join(texts)
|
||||||
|
return sectionData.get("text", "")
|
||||||
|
|
||||||
def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
|
def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
|
||||||
"""Extract code and language from section data."""
|
"""Extract code and language from section data."""
|
||||||
code = section_data.get("code", "")
|
# Normalize when elements array was passed in
|
||||||
language = section_data.get("language", "")
|
if isinstance(sectionData, list) and sectionData:
|
||||||
|
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
|
||||||
|
code = sectionData.get("code", "")
|
||||||
|
language = sectionData.get("language", "")
|
||||||
return code, language
|
return code, language
|
||||||
|
|
||||||
def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
|
def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
|
||||||
"""Extract base64 data and alt text from section data."""
|
"""Extract base64 data and alt text from section data."""
|
||||||
base64_data = section_data.get("base64Data", "")
|
# Normalize when elements array was passed in
|
||||||
alt_text = section_data.get("altText", "Image")
|
if isinstance(sectionData, list) and sectionData:
|
||||||
return base64_data, alt_text
|
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
|
||||||
|
base64Data = sectionData.get("base64Data", "")
|
||||||
|
altText = sectionData.get("altText", "Image")
|
||||||
|
return base64Data, altText
|
||||||
|
|
||||||
def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
|
def _renderImageSection(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
|
||||||
"""
|
"""
|
||||||
Render an image section. This is a base implementation that should be overridden
|
Render an image section. This is a base implementation that should be overridden
|
||||||
by format-specific renderers.
|
by format-specific renderers.
|
||||||
|
|
@ -168,47 +196,47 @@ class BaseRenderer(ABC):
|
||||||
Returns:
|
Returns:
|
||||||
Format-specific image representation
|
Format-specific image representation
|
||||||
"""
|
"""
|
||||||
section_data = self._get_section_data(section)
|
sectionData = self._getSectionData(section)
|
||||||
base64_data, alt_text = self._extract_image_data(section_data)
|
base64Data, altText = self._extractImageData(sectionData)
|
||||||
|
|
||||||
# Base implementation returns a simple dict
|
# Base implementation returns a simple dict
|
||||||
# Format-specific renderers should override this method
|
# Format-specific renderers should override this method
|
||||||
return {
|
return {
|
||||||
"content_type": "image",
|
"content_type": "image",
|
||||||
"base64Data": base64_data,
|
"base64Data": base64Data,
|
||||||
"altText": alt_text,
|
"altText": altText,
|
||||||
"width": section_data.get("width", None),
|
"width": sectionData.get("width", None),
|
||||||
"height": section_data.get("height", None),
|
"height": sectionData.get("height", None),
|
||||||
"caption": section_data.get("caption", "")
|
"caption": sectionData.get("caption", "")
|
||||||
}
|
}
|
||||||
|
|
||||||
def _validate_image_data(self, base64_data: str, alt_text: str) -> bool:
|
def _validateImageData(self, base64Data: str, altText: str) -> bool:
|
||||||
"""Validate image data."""
|
"""Validate image data."""
|
||||||
if not base64_data:
|
if not base64Data:
|
||||||
self.logger.warning("Image section has no base64 data")
|
self.logger.warning("Image section has no base64 data")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not alt_text:
|
if not altText:
|
||||||
self.logger.warning("Image section has no alt text")
|
self.logger.warning("Image section has no alt text")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Basic base64 validation
|
# Basic base64 validation
|
||||||
try:
|
try:
|
||||||
base64.b64decode(base64_data, validate=True)
|
base64.b64decode(base64Data, validate=True)
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Invalid base64 image data: {str(e)}")
|
self.logger.warning(f"Invalid base64 image data: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]:
|
def _getImageDimensions(self, base64Data: str) -> Tuple[int, int]:
|
||||||
"""
|
"""
|
||||||
Get image dimensions from base64 data.
|
Get image dimensions from base64 data.
|
||||||
This is a helper method that format-specific renderers can use.
|
This is a helper method that format-specific renderers can use.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Decode base64 data
|
# Decode base64 data
|
||||||
image_data = base64.b64decode(base64_data)
|
imageData = base64.b64decode(base64Data)
|
||||||
image = Image.open(io.BytesIO(image_data))
|
image = Image.open(io.BytesIO(imageData))
|
||||||
|
|
||||||
return image.size # Returns (width, height)
|
return image.size # Returns (width, height)
|
||||||
|
|
||||||
|
|
@ -216,89 +244,89 @@ class BaseRenderer(ABC):
|
||||||
self.logger.warning(f"Could not determine image dimensions: {str(e)}")
|
self.logger.warning(f"Could not determine image dimensions: {str(e)}")
|
||||||
return (0, 0)
|
return (0, 0)
|
||||||
|
|
||||||
def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str:
|
def _resizeImageIfNeeded(self, base64Data: str, maxWidth: int = 800, maxHeight: int = 600) -> str:
|
||||||
"""
|
"""
|
||||||
Resize image if it exceeds maximum dimensions.
|
Resize image if it exceeds maximum dimensions.
|
||||||
Returns the resized image as base64 string.
|
Returns the resized image as base64 string.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Decode base64 data
|
# Decode base64 data
|
||||||
image_data = base64.b64decode(base64_data)
|
imageData = base64.b64decode(base64Data)
|
||||||
image = Image.open(io.BytesIO(image_data))
|
image = Image.open(io.BytesIO(imageData))
|
||||||
|
|
||||||
# Check if resizing is needed
|
# Check if resizing is needed
|
||||||
width, height = image.size
|
width, height = image.size
|
||||||
if width <= max_width and height <= max_height:
|
if width <= maxWidth and height <= maxHeight:
|
||||||
return base64_data # No resizing needed
|
return base64Data # No resizing needed
|
||||||
|
|
||||||
# Calculate new dimensions maintaining aspect ratio
|
# Calculate new dimensions maintaining aspect ratio
|
||||||
ratio = min(max_width / width, max_height / height)
|
ratio = min(maxWidth / width, maxHeight / height)
|
||||||
new_width = int(width * ratio)
|
newWidth = int(width * ratio)
|
||||||
new_height = int(height * ratio)
|
newHeight = int(height * ratio)
|
||||||
|
|
||||||
# Resize image
|
# Resize image
|
||||||
resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
resizedImage = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS)
|
||||||
|
|
||||||
# Convert back to base64
|
# Convert back to base64
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
resized_image.save(buffer, format=image.format or 'PNG')
|
resizedImage.save(buffer, format=image.format or 'PNG')
|
||||||
resized_data = buffer.getvalue()
|
resizedData = buffer.getvalue()
|
||||||
|
|
||||||
return base64.b64encode(resized_data).decode('utf-8')
|
return base64.b64encode(resizedData).decode('utf-8')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not resize image: {str(e)}")
|
self.logger.warning(f"Could not resize image: {str(e)}")
|
||||||
return base64_data # Return original if resize fails
|
return base64Data # Return original if resize fails
|
||||||
|
|
||||||
def _get_supported_section_types(self) -> List[str]:
|
def _getSupportedSectionTypes(self) -> List[str]:
|
||||||
"""Return list of supported section types."""
|
"""Return list of supported section types (from unified schema)."""
|
||||||
return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
|
return supportedSectionTypes
|
||||||
|
|
||||||
def _is_valid_section_type(self, section_type: str) -> bool:
|
def _isValidSectionType(self, sectionType: str) -> bool:
|
||||||
"""Check if a section type is valid."""
|
"""Check if a section type is valid."""
|
||||||
return section_type in self._get_supported_section_types()
|
return sectionType in self._getSupportedSectionTypes()
|
||||||
|
|
||||||
def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]:
|
def _processSectionByType(self, section: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Process a section and return structured data based on its type."""
|
"""Process a section and return structured data based on its type."""
|
||||||
section_type = self._get_section_type(section)
|
sectionType = self._getSectionType(section)
|
||||||
section_data = self._get_section_data(section)
|
sectionData = self._getSectionData(section)
|
||||||
|
|
||||||
if section_type == "table":
|
if sectionType == "table":
|
||||||
headers, rows = self._extract_table_data(section_data)
|
headers, rows = self._extractTableData(sectionData)
|
||||||
return {"content_type": "table", "headers": headers, "rows": rows}
|
return {"content_type": "table", "headers": headers, "rows": rows}
|
||||||
elif section_type == "bullet_list":
|
elif sectionType == "bullet_list":
|
||||||
items = self._extract_bullet_list_items(section_data)
|
items = self._extractBulletListItems(sectionData)
|
||||||
return {"content_type": "bullet_list", "items": items}
|
return {"content_type": "bullet_list", "items": items}
|
||||||
elif section_type == "heading":
|
elif sectionType == "heading":
|
||||||
level, text = self._extract_heading_data(section_data)
|
level, text = self._extractHeadingData(sectionData)
|
||||||
return {"content_type": "heading", "level": level, "text": text}
|
return {"content_type": "heading", "level": level, "text": text}
|
||||||
elif section_type == "paragraph":
|
elif sectionType == "paragraph":
|
||||||
text = self._extract_paragraph_text(section_data)
|
text = self._extractParagraphText(sectionData)
|
||||||
return {"content_type": "paragraph", "text": text}
|
return {"content_type": "paragraph", "text": text}
|
||||||
elif section_type == "code_block":
|
elif sectionType == "code_block":
|
||||||
code, language = self._extract_code_block_data(section_data)
|
code, language = self._extractCodeBlockData(sectionData)
|
||||||
return {"content_type": "code_block", "code": code, "language": language}
|
return {"content_type": "code_block", "code": code, "language": language}
|
||||||
elif section_type == "image":
|
elif sectionType == "image":
|
||||||
base64_data, alt_text = self._extract_image_data(section_data)
|
base64Data, altText = self._extractImageData(sectionData)
|
||||||
# Validate image data
|
# Validate image data
|
||||||
if self._validate_image_data(base64_data, alt_text):
|
if self._validateImageData(base64Data, altText):
|
||||||
return {
|
return {
|
||||||
"content_type": "image",
|
"content_type": "image",
|
||||||
"base64Data": base64_data,
|
"base64Data": base64Data,
|
||||||
"altText": alt_text,
|
"altText": altText,
|
||||||
"width": section_data.get("width"),
|
"width": sectionData.get("width") if isinstance(sectionData, dict) else None,
|
||||||
"height": section_data.get("height"),
|
"height": sectionData.get("height") if isinstance(sectionData, dict) else None,
|
||||||
"caption": section_data.get("caption", "")
|
"caption": sectionData.get("caption", "") if isinstance(sectionData, dict) else ""
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
# Return placeholder if image data is invalid
|
# Return placeholder if image data is invalid
|
||||||
return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"}
|
return {"content_type": "paragraph", "text": f"[Image: {altText}]"}
|
||||||
else:
|
else:
|
||||||
# Fallback to paragraph
|
# Fallback to paragraph
|
||||||
text = self._extract_paragraph_text(section_data)
|
text = self._extractParagraphText(sectionData)
|
||||||
return {"content_type": "paragraph", "text": text}
|
return {"content_type": "paragraph", "text": text}
|
||||||
|
|
||||||
def _format_timestamp(self, timestamp: str = None) -> str:
|
def _formatTimestamp(self, timestamp: str = None) -> str:
|
||||||
"""Format timestamp for display."""
|
"""Format timestamp for display."""
|
||||||
if timestamp:
|
if timestamp:
|
||||||
return timestamp
|
return timestamp
|
||||||
|
|
@ -306,38 +334,38 @@ class BaseRenderer(ABC):
|
||||||
|
|
||||||
# ===== GENERIC AI STYLING HELPERS =====
|
# ===== GENERIC AI STYLING HELPERS =====
|
||||||
|
|
||||||
async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
async def _getAiStyles(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Generic AI styling method that can be used by all renderers.
|
Generic AI styling method that can be used by all renderers.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
ai_service: AI service instance
|
aiService: AI service instance
|
||||||
style_template: Format-specific style template
|
styleTemplate: Format-specific style template
|
||||||
default_styles: Default styles to fall back to
|
defaultStyles: Default styles to fall back to
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict with styling definitions
|
Dict with styling definitions
|
||||||
"""
|
"""
|
||||||
# DEBUG: Show which renderer is calling this method
|
# DEBUG: Show which renderer is calling this method
|
||||||
|
|
||||||
if not ai_service:
|
if not aiService:
|
||||||
return default_styles
|
return defaultStyles
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
request_options = AiCallOptions()
|
requestOptions = AiCallOptions()
|
||||||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
|
||||||
|
|
||||||
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
|
||||||
|
|
||||||
# DEBUG: Show the actual prompt being sent to AI
|
# DEBUG: Show the actual prompt being sent to AI
|
||||||
self.logger.debug(f"AI Style Template Prompt:")
|
self.logger.debug(f"AI Style Template Prompt:")
|
||||||
self.logger.debug(f"{style_template}")
|
self.logger.debug(f"{styleTemplate}")
|
||||||
|
|
||||||
response = await ai_service.aiObjects.call(request)
|
response = await aiService.aiObjects.call(request)
|
||||||
|
|
||||||
# Save styling prompt and response to debug
|
# Save styling prompt and response to debug
|
||||||
self.services.utils.writeDebugFile(style_template, "renderer_styling_prompt")
|
self.services.utils.writeDebugFile(styleTemplate, "renderer_styling_prompt")
|
||||||
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
||||||
|
|
||||||
# Clean and parse JSON
|
# Clean and parse JSON
|
||||||
|
|
@ -346,12 +374,12 @@ class BaseRenderer(ABC):
|
||||||
# Check if result is empty
|
# Check if result is empty
|
||||||
if not result:
|
if not result:
|
||||||
self.logger.warning("AI styling returned empty response, using defaults")
|
self.logger.warning("AI styling returned empty response, using defaults")
|
||||||
return default_styles
|
return defaultStyles
|
||||||
|
|
||||||
# Extract JSON from markdown if present
|
# Extract JSON from markdown if present
|
||||||
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
jsonMatch = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
||||||
if json_match:
|
if jsonMatch:
|
||||||
result = json_match.group(1).strip()
|
result = jsonMatch.group(1).strip()
|
||||||
elif result.startswith('```json'):
|
elif result.startswith('```json'):
|
||||||
result = re.sub(r'^```json\s*', '', result)
|
result = re.sub(r'^```json\s*', '', result)
|
||||||
result = re.sub(r'\s*```$', '', result)
|
result = re.sub(r'\s*```$', '', result)
|
||||||
|
|
@ -362,8 +390,8 @@ class BaseRenderer(ABC):
|
||||||
# Try to parse JSON
|
# Try to parse JSON
|
||||||
try:
|
try:
|
||||||
styles = json.loads(result)
|
styles = json.loads(result)
|
||||||
except json.JSONDecodeError as json_error:
|
except json.JSONDecodeError as jsonError:
|
||||||
self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
|
self.logger.warning(f"AI styling returned invalid JSON: {jsonError}")
|
||||||
|
|
||||||
# Use print instead of logger to avoid truncation
|
# Use print instead of logger to avoid truncation
|
||||||
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER")
|
self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER")
|
||||||
|
|
@ -372,88 +400,88 @@ class BaseRenderer(ABC):
|
||||||
self.logger.warning(f"Raw content that failed to parse: {result}")
|
self.logger.warning(f"Raw content that failed to parse: {result}")
|
||||||
|
|
||||||
# Try to fix incomplete JSON by adding missing closing braces
|
# Try to fix incomplete JSON by adding missing closing braces
|
||||||
open_braces = result.count('{')
|
openBraces = result.count('{')
|
||||||
close_braces = result.count('}')
|
closeBraces = result.count('}')
|
||||||
|
|
||||||
if open_braces > close_braces:
|
if openBraces > closeBraces:
|
||||||
# JSON is incomplete, add missing closing braces
|
# JSON is incomplete, add missing closing braces
|
||||||
missing_braces = open_braces - close_braces
|
missingBraces = openBraces - closeBraces
|
||||||
result = result + '}' * missing_braces
|
result = result + '}' * missingBraces
|
||||||
self.logger.info(f"Added {missing_braces} missing closing brace(s)")
|
self.logger.info(f"Added {missingBraces} missing closing brace(s)")
|
||||||
self.logger.debug(f"Fixed JSON: {result}")
|
self.logger.debug(f"Fixed JSON: {result}")
|
||||||
|
|
||||||
# Try parsing the fixed JSON
|
# Try parsing the fixed JSON
|
||||||
try:
|
try:
|
||||||
styles = json.loads(result)
|
styles = json.loads(result)
|
||||||
self.logger.info("Successfully fixed incomplete JSON")
|
self.logger.info("Successfully fixed incomplete JSON")
|
||||||
except json.JSONDecodeError as fix_error:
|
except json.JSONDecodeError as fixError:
|
||||||
self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
|
self.logger.warning(f"Fixed JSON still invalid: {fixError}")
|
||||||
self.logger.warning(f"Fixed JSON content: {result}")
|
self.logger.warning(f"Fixed JSON content: {result}")
|
||||||
# Try to extract just the JSON part if it's embedded in text
|
# Try to extract just the JSON part if it's embedded in text
|
||||||
json_start = result.find('{')
|
jsonStart = result.find('{')
|
||||||
json_end = result.rfind('}')
|
jsonEnd = result.rfind('}')
|
||||||
if json_start != -1 and json_end != -1 and json_end > json_start:
|
if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
|
||||||
json_part = result[json_start:json_end+1]
|
jsonPart = result[jsonStart:jsonEnd+1]
|
||||||
try:
|
try:
|
||||||
styles = json.loads(json_part)
|
styles = json.loads(jsonPart)
|
||||||
self.logger.info("Successfully extracted JSON from explanatory text")
|
self.logger.info("Successfully extracted JSON from explanatory text")
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
||||||
return default_styles
|
return defaultStyles
|
||||||
else:
|
else:
|
||||||
return default_styles
|
return defaultStyles
|
||||||
else:
|
else:
|
||||||
# Try to extract just the JSON part if it's embedded in text
|
# Try to extract just the JSON part if it's embedded in text
|
||||||
json_start = result.find('{')
|
jsonStart = result.find('{')
|
||||||
json_end = result.rfind('}')
|
jsonEnd = result.rfind('}')
|
||||||
if json_start != -1 and json_end != -1 and json_end > json_start:
|
if jsonStart != -1 and jsonEnd != -1 and jsonEnd > jsonStart:
|
||||||
json_part = result[json_start:json_end+1]
|
jsonPart = result[jsonStart:jsonEnd+1]
|
||||||
try:
|
try:
|
||||||
styles = json.loads(json_part)
|
styles = json.loads(jsonPart)
|
||||||
self.logger.info("Successfully extracted JSON from explanatory text")
|
self.logger.info("Successfully extracted JSON from explanatory text")
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
self.logger.warning("Could not extract valid JSON from response, using defaults")
|
||||||
return default_styles
|
return defaultStyles
|
||||||
else:
|
else:
|
||||||
return default_styles
|
return defaultStyles
|
||||||
|
|
||||||
# Convert colors to appropriate format
|
# Convert colors to appropriate format
|
||||||
styles = self._convert_colors_format(styles)
|
styles = self._convertColorsFormat(styles)
|
||||||
|
|
||||||
return styles
|
return styles
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||||
return default_styles
|
return defaultStyles
|
||||||
|
|
||||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Convert colors to appropriate format based on renderer type.
|
Convert colors to appropriate format based on renderer type.
|
||||||
Override this method in subclasses for format-specific color handling.
|
Override this method in subclasses for format-specific color handling.
|
||||||
"""
|
"""
|
||||||
return styles
|
return styles
|
||||||
|
|
||||||
def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str:
|
def _createAiStyleTemplate(self, formatName: str, userPrompt: str, styleSchema: Dict[str, Any]) -> str:
|
||||||
"""
|
"""
|
||||||
Create a standardized AI style template for any format.
|
Create a standardized AI style template for any format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
format_name: Name of the format (e.g., "docx", "xlsx", "pptx")
|
formatName: Name of the format (e.g., "docx", "xlsx", "pptx")
|
||||||
user_prompt: User's original prompt
|
userPrompt: User's original prompt
|
||||||
style_schema: Format-specific style schema
|
styleSchema: Format-specific style schema
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Formatted prompt string
|
Formatted prompt string
|
||||||
"""
|
"""
|
||||||
schema_json = json.dumps(style_schema, indent=4)
|
schemaJson = json.dumps(styleSchema, indent=4)
|
||||||
|
|
||||||
# DEBUG: Show the schema being sent
|
# DEBUG: Show the schema being sent
|
||||||
|
|
||||||
return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents.
|
return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {formatName.upper()} documents.
|
||||||
|
|
||||||
Use this schema as a template and customize the values for professional document styling:
|
Use this schema as a template and customize the values for professional document styling:
|
||||||
|
|
||||||
{schema_json}
|
{schemaJson}
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
- Return ONLY the complete JSON object (no markdown, no explanations)
|
- Return ONLY the complete JSON object (no markdown, no explanations)
|
||||||
|
|
|
||||||
|
|
@ -9,163 +9,163 @@ class RendererCsv(BaseRenderer):
|
||||||
"""Renders content to CSV format with format-specific extraction."""
|
"""Renders content to CSV format with format-specific extraction."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""Return supported CSV formats."""
|
"""Return supported CSV formats."""
|
||||||
return ['csv']
|
return ['csv']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""Return format aliases."""
|
"""Return format aliases."""
|
||||||
return ['spreadsheet', 'table']
|
return ['spreadsheet', 'table']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""Return priority for CSV renderer."""
|
"""Return priority for CSV renderer."""
|
||||||
return 70
|
return 70
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""Render extracted JSON content to CSV format."""
|
"""Render extracted JSON content to CSV format."""
|
||||||
try:
|
try:
|
||||||
# Generate CSV directly from JSON (no styling needed for CSV)
|
# Generate CSV directly from JSON (no styling needed for CSV)
|
||||||
csv_content = await self._generate_csv_from_json(extracted_content, title)
|
csvContent = await self._generateCsvFromJson(extractedContent, title)
|
||||||
|
|
||||||
return csv_content, "text/csv"
|
return csvContent, "text/csv"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering CSV: {str(e)}")
|
self.logger.error(f"Error rendering CSV: {str(e)}")
|
||||||
# Return minimal CSV fallback
|
# Return minimal CSV fallback
|
||||||
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
|
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
|
||||||
|
|
||||||
async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str:
|
async def _generateCsvFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||||
"""Generate CSV content from structured JSON document."""
|
"""Generate CSV content from structured JSON document."""
|
||||||
try:
|
try:
|
||||||
# Validate JSON structure
|
# Validate JSON structure
|
||||||
if not isinstance(json_content, dict):
|
if not isinstance(jsonContent, dict):
|
||||||
raise ValueError("JSON content must be a dictionary")
|
raise ValueError("JSON content must be a dictionary")
|
||||||
|
|
||||||
if "sections" not in json_content:
|
if "sections" not in jsonContent:
|
||||||
raise ValueError("JSON content must contain 'sections' field")
|
raise ValueError("JSON content must contain 'sections' field")
|
||||||
|
|
||||||
# Use title from JSON metadata if available, otherwise use provided title
|
# Use title from JSON metadata if available, otherwise use provided title
|
||||||
document_title = json_content.get("metadata", {}).get("title", title)
|
documentTitle = jsonContent.get("metadata", {}).get("title", title)
|
||||||
|
|
||||||
# Generate CSV content
|
# Generate CSV content
|
||||||
csv_rows = []
|
csvRows = []
|
||||||
|
|
||||||
# Add title row
|
# Add title row
|
||||||
if document_title:
|
if documentTitle:
|
||||||
csv_rows.append([document_title])
|
csvRows.append([documentTitle])
|
||||||
csv_rows.append([]) # Empty row
|
csvRows.append([]) # Empty row
|
||||||
|
|
||||||
# Process each section in order
|
# Process each section in order
|
||||||
sections = json_content.get("sections", [])
|
sections = jsonContent.get("sections", [])
|
||||||
for section in sections:
|
for section in sections:
|
||||||
section_csv = self._render_json_section_to_csv(section)
|
sectionCsv = self._renderJsonSectionToCsv(section)
|
||||||
if section_csv:
|
if sectionCsv:
|
||||||
csv_rows.extend(section_csv)
|
csvRows.extend(sectionCsv)
|
||||||
csv_rows.append([]) # Empty row between sections
|
csvRows.append([]) # Empty row between sections
|
||||||
|
|
||||||
# Convert to CSV string
|
# Convert to CSV string
|
||||||
csv_content = self._convert_rows_to_csv(csv_rows)
|
csvContent = self._convertRowsToCsv(csvRows)
|
||||||
|
|
||||||
return csv_content
|
return csvContent
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error generating CSV from JSON: {str(e)}")
|
self.logger.error(f"Error generating CSV from JSON: {str(e)}")
|
||||||
raise Exception(f"CSV generation failed: {str(e)}")
|
raise Exception(f"CSV generation failed: {str(e)}")
|
||||||
|
|
||||||
def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]:
|
def _renderJsonSectionToCsv(self, section: Dict[str, Any]) -> List[List[str]]:
|
||||||
"""Render a single JSON section to CSV rows."""
|
"""Render a single JSON section to CSV rows."""
|
||||||
try:
|
try:
|
||||||
section_type = section.get("content_type", "paragraph")
|
sectionType = section.get("content_type", "paragraph")
|
||||||
elements = section.get("elements", [])
|
elements = section.get("elements", [])
|
||||||
|
|
||||||
csv_rows = []
|
csvRows = []
|
||||||
|
|
||||||
# Add section title if available
|
# Add section title if available
|
||||||
section_title = section.get("title")
|
sectionTitle = section.get("title")
|
||||||
if section_title:
|
if sectionTitle:
|
||||||
csv_rows.append([f"# {section_title}"])
|
csvRows.append([f"# {sectionTitle}"])
|
||||||
|
|
||||||
# Process each element in the section
|
# Process each element in the section
|
||||||
for element in elements:
|
for element in elements:
|
||||||
if section_type == "table":
|
if sectionType == "table":
|
||||||
csv_rows.extend(self._render_json_table_to_csv(element))
|
csvRows.extend(self._renderJsonTableToCsv(element))
|
||||||
elif section_type == "list":
|
elif sectionType == "list":
|
||||||
csv_rows.extend(self._render_json_list_to_csv(element))
|
csvRows.extend(self._renderJsonListToCsv(element))
|
||||||
elif section_type == "heading":
|
elif sectionType == "heading":
|
||||||
csv_rows.extend(self._render_json_heading_to_csv(element))
|
csvRows.extend(self._renderJsonHeadingToCsv(element))
|
||||||
elif section_type == "paragraph":
|
elif sectionType == "paragraph":
|
||||||
csv_rows.extend(self._render_json_paragraph_to_csv(element))
|
csvRows.extend(self._renderJsonParagraphToCsv(element))
|
||||||
elif section_type == "code":
|
elif sectionType == "code":
|
||||||
csv_rows.extend(self._render_json_code_to_csv(element))
|
csvRows.extend(self._renderJsonCodeToCsv(element))
|
||||||
else:
|
else:
|
||||||
# Fallback to paragraph for unknown types
|
# Fallback to paragraph for unknown types
|
||||||
csv_rows.extend(self._render_json_paragraph_to_csv(element))
|
csvRows.extend(self._renderJsonParagraphToCsv(element))
|
||||||
|
|
||||||
return csv_rows
|
return csvRows
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
|
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
|
||||||
return [["[Error rendering section]"]]
|
return [["[Error rendering section]"]]
|
||||||
|
|
||||||
def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]:
|
def _renderJsonTableToCsv(self, tableData: Dict[str, Any]) -> List[List[str]]:
|
||||||
"""Render a JSON table to CSV rows."""
|
"""Render a JSON table to CSV rows."""
|
||||||
try:
|
try:
|
||||||
headers = table_data.get("headers", [])
|
headers = tableData.get("headers", [])
|
||||||
rows = table_data.get("rows", [])
|
rows = tableData.get("rows", [])
|
||||||
|
|
||||||
csv_rows = []
|
csvRows = []
|
||||||
|
|
||||||
if headers:
|
if headers:
|
||||||
csv_rows.append(headers)
|
csvRows.append(headers)
|
||||||
|
|
||||||
if rows:
|
if rows:
|
||||||
csv_rows.extend(rows)
|
csvRows.extend(rows)
|
||||||
|
|
||||||
return csv_rows
|
return csvRows
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||||
return [["[Error rendering table]"]]
|
return [["[Error rendering table]"]]
|
||||||
|
|
||||||
def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]:
|
def _renderJsonListToCsv(self, listData: Dict[str, Any]) -> List[List[str]]:
|
||||||
"""Render a JSON list to CSV rows."""
|
"""Render a JSON list to CSV rows."""
|
||||||
try:
|
try:
|
||||||
items = list_data.get("items", [])
|
items = listData.get("items", [])
|
||||||
csv_rows = []
|
csvRows = []
|
||||||
|
|
||||||
for item in items:
|
for item in items:
|
||||||
if isinstance(item, dict):
|
if isinstance(item, dict):
|
||||||
text = item.get("text", "")
|
text = item.get("text", "")
|
||||||
subitems = item.get("subitems", [])
|
subitems = item.get("subitems", [])
|
||||||
csv_rows.append([text])
|
csvRows.append([text])
|
||||||
|
|
||||||
# Add subitems as indented rows
|
# Add subitems as indented rows
|
||||||
for subitem in subitems:
|
for subitem in subitems:
|
||||||
if isinstance(subitem, dict):
|
if isinstance(subitem, dict):
|
||||||
csv_rows.append([f" - {subitem.get('text', '')}"])
|
csvRows.append([f" - {subitem.get('text', '')}"])
|
||||||
else:
|
else:
|
||||||
csv_rows.append([f" - {subitem}"])
|
csvRows.append([f" - {subitem}"])
|
||||||
else:
|
else:
|
||||||
csv_rows.append([str(item)])
|
csvRows.append([str(item)])
|
||||||
|
|
||||||
return csv_rows
|
return csvRows
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering list: {str(e)}")
|
self.logger.warning(f"Error rendering list: {str(e)}")
|
||||||
return [["[Error rendering list]"]]
|
return [["[Error rendering list]"]]
|
||||||
|
|
||||||
def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]:
|
def _renderJsonHeadingToCsv(self, headingData: Dict[str, Any]) -> List[List[str]]:
|
||||||
"""Render a JSON heading to CSV rows."""
|
"""Render a JSON heading to CSV rows."""
|
||||||
try:
|
try:
|
||||||
text = heading_data.get("text", "")
|
text = headingData.get("text", "")
|
||||||
level = heading_data.get("level", 1)
|
level = headingData.get("level", 1)
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
# Use # symbols for heading levels
|
# Use # symbols for heading levels
|
||||||
heading_text = f"{'#' * level} {text}"
|
headingText = f"{'#' * level} {text}"
|
||||||
return [[heading_text]]
|
return [[headingText]]
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
@ -173,30 +173,30 @@ class RendererCsv(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||||
return [["[Error rendering heading]"]]
|
return [["[Error rendering heading]"]]
|
||||||
|
|
||||||
def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]:
|
def _renderJsonParagraphToCsv(self, paragraphData: Dict[str, Any]) -> List[List[str]]:
|
||||||
"""Render a JSON paragraph to CSV rows."""
|
"""Render a JSON paragraph to CSV rows."""
|
||||||
try:
|
try:
|
||||||
text = paragraph_data.get("text", "")
|
text = paragraphData.get("text", "")
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
# Split long paragraphs into multiple rows if needed
|
# Split long paragraphs into multiple rows if needed
|
||||||
if len(text) > 100:
|
if len(text) > 100:
|
||||||
words = text.split()
|
words = text.split()
|
||||||
rows = []
|
rows = []
|
||||||
current_row = []
|
currentRow = []
|
||||||
current_length = 0
|
currentLength = 0
|
||||||
|
|
||||||
for word in words:
|
for word in words:
|
||||||
if current_length + len(word) > 100 and current_row:
|
if currentLength + len(word) > 100 and currentRow:
|
||||||
rows.append([" ".join(current_row)])
|
rows.append([" ".join(currentRow)])
|
||||||
current_row = [word]
|
currentRow = [word]
|
||||||
current_length = len(word)
|
currentLength = len(word)
|
||||||
else:
|
else:
|
||||||
current_row.append(word)
|
currentRow.append(word)
|
||||||
current_length += len(word) + 1
|
currentLength += len(word) + 1
|
||||||
|
|
||||||
if current_row:
|
if currentRow:
|
||||||
rows.append([" ".join(current_row)])
|
rows.append([" ".join(currentRow)])
|
||||||
|
|
||||||
return rows
|
return rows
|
||||||
else:
|
else:
|
||||||
|
|
@ -208,30 +208,30 @@ class RendererCsv(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||||
return [["[Error rendering paragraph]"]]
|
return [["[Error rendering paragraph]"]]
|
||||||
|
|
||||||
def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]:
|
def _renderJsonCodeToCsv(self, codeData: Dict[str, Any]) -> List[List[str]]:
|
||||||
"""Render a JSON code block to CSV rows."""
|
"""Render a JSON code block to CSV rows."""
|
||||||
try:
|
try:
|
||||||
code = code_data.get("code", "")
|
code = codeData.get("code", "")
|
||||||
language = code_data.get("language", "")
|
language = codeData.get("language", "")
|
||||||
|
|
||||||
csv_rows = []
|
csvRows = []
|
||||||
|
|
||||||
if language:
|
if language:
|
||||||
csv_rows.append([f"Code ({language}):"])
|
csvRows.append([f"Code ({language}):"])
|
||||||
|
|
||||||
if code:
|
if code:
|
||||||
# Split code into lines
|
# Split code into lines
|
||||||
code_lines = code.split('\n')
|
codeLines = code.split('\n')
|
||||||
for line in code_lines:
|
for line in codeLines:
|
||||||
csv_rows.append([f" {line}"])
|
csvRows.append([f" {line}"])
|
||||||
|
|
||||||
return csv_rows
|
return csvRows
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||||
return [["[Error rendering code block]"]]
|
return [["[Error rendering code block]"]]
|
||||||
|
|
||||||
def _convert_rows_to_csv(self, rows: List[List[str]]) -> str:
|
def _convertRowsToCsv(self, rows: List[List[str]]) -> str:
|
||||||
"""Convert rows to CSV string."""
|
"""Convert rows to CSV string."""
|
||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
|
|
@ -245,7 +245,7 @@ class RendererCsv(BaseRenderer):
|
||||||
|
|
||||||
return output.getvalue()
|
return output.getvalue()
|
||||||
|
|
||||||
def _clean_csv_content(self, content: str, title: str) -> str:
|
def _cleanCsvContent(self, content: str, title: str) -> str:
|
||||||
"""Clean and validate CSV content from AI."""
|
"""Clean and validate CSV content from AI."""
|
||||||
content = content.strip()
|
content = content.strip()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,33 +21,33 @@ class RendererDocx(BaseRenderer):
|
||||||
"""Renders content to DOCX format using python-docx."""
|
"""Renders content to DOCX format using python-docx."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""Return supported DOCX formats."""
|
"""Return supported DOCX formats."""
|
||||||
return ['docx', 'doc']
|
return ['docx', 'doc']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""Return format aliases."""
|
"""Return format aliases."""
|
||||||
return ['word', 'document']
|
return ['word', 'document']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""Return priority for DOCX renderer."""
|
"""Return priority for DOCX renderer."""
|
||||||
return 115
|
return 115
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
||||||
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...", "DOCX_RENDERER")
|
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
|
||||||
try:
|
try:
|
||||||
if not DOCX_AVAILABLE:
|
if not DOCX_AVAILABLE:
|
||||||
# Fallback to HTML if python-docx not available
|
# Fallback to HTML if python-docx not available
|
||||||
from .rendererHtml import RendererHtml
|
from .rendererHtml import RendererHtml
|
||||||
html_renderer = RendererHtml()
|
htmlRenderer = RendererHtml()
|
||||||
html_content, _ = await html_renderer.render(extracted_content, title)
|
htmlContent, _ = await htmlRenderer.render(extractedContent, title)
|
||||||
return html_content, "text/html"
|
return htmlContent, "text/html"
|
||||||
|
|
||||||
# Generate DOCX using AI-analyzed styling
|
# Generate DOCX using AI-analyzed styling
|
||||||
docx_content = await self._generate_docx_from_json(extracted_content, title, user_prompt, ai_service)
|
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
|
|
||||||
|
|
@ -56,18 +56,18 @@ class RendererDocx(BaseRenderer):
|
||||||
# Return minimal fallback
|
# Return minimal fallback
|
||||||
return f"DOCX Generation Error: {str(e)}", "text/plain"
|
return f"DOCX Generation Error: {str(e)}", "text/plain"
|
||||||
|
|
||||||
async def _generate_docx_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Generate DOCX content from structured JSON document using AI-generated styling."""
|
"""Generate DOCX content from structured JSON document using AI-generated styling."""
|
||||||
try:
|
try:
|
||||||
# Create new document
|
# Create new document
|
||||||
doc = Document()
|
doc = Document()
|
||||||
|
|
||||||
# Get AI-generated styling definitions
|
# Get AI-generated styling definitions
|
||||||
self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...")
|
self.logger.info(f"About to call AI styling with user_prompt: {userPrompt[:100] if userPrompt else 'None'}...")
|
||||||
styles = await self._get_docx_styles(user_prompt, ai_service)
|
styles = await self._getDocxStyles(userPrompt, aiService)
|
||||||
|
|
||||||
# Apply basic document setup
|
# Apply basic document setup
|
||||||
self._setup_basic_document_styles(doc)
|
self._setupBasicDocumentStyles(doc)
|
||||||
|
|
||||||
# Validate JSON structure
|
# Validate JSON structure
|
||||||
if not isinstance(json_content, dict):
|
if not isinstance(json_content, dict):
|
||||||
|
|
@ -104,7 +104,7 @@ class RendererDocx(BaseRenderer):
|
||||||
self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
|
self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
|
||||||
raise Exception(f"DOCX generation failed: {str(e)}")
|
raise Exception(f"DOCX generation failed: {str(e)}")
|
||||||
|
|
||||||
async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
async def _getDocxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
|
||||||
"""Get DOCX styling definitions using base template AI styling."""
|
"""Get DOCX styling definitions using base template AI styling."""
|
||||||
style_schema = {
|
style_schema = {
|
||||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
|
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
|
||||||
|
|
@ -118,13 +118,13 @@ class RendererDocx(BaseRenderer):
|
||||||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
|
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
|
||||||
}
|
}
|
||||||
|
|
||||||
style_template = self._create_ai_style_template("docx", user_prompt, style_schema)
|
style_template = self._createAiStyleTemplate("docx", userPrompt, style_schema)
|
||||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles())
|
styles = await self._getAiStyles(aiService, style_template, self._getDefaultStyles())
|
||||||
|
|
||||||
# Validate and fix contrast issues
|
# Validate and fix contrast issues
|
||||||
return self._validate_styles_contrast(styles)
|
return self._validateStylesContrast(styles)
|
||||||
|
|
||||||
def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _validateStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Validate and fix contrast issues in AI-generated styles."""
|
"""Validate and fix contrast issues in AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
# Fix table header contrast
|
# Fix table header contrast
|
||||||
|
|
@ -159,9 +159,9 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||||
return self._get_default_styles()
|
return self._getDefaultStyles()
|
||||||
|
|
||||||
def _get_default_styles(self) -> Dict[str, Any]:
|
def _getDefaultStyles(self) -> Dict[str, Any]:
|
||||||
"""Default DOCX styles."""
|
"""Default DOCX styles."""
|
||||||
return {
|
return {
|
||||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
|
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
|
||||||
|
|
@ -175,7 +175,7 @@ class RendererDocx(BaseRenderer):
|
||||||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
|
"code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _setup_basic_document_styles(self, doc: Document) -> None:
|
def _setupBasicDocumentStyles(self, doc: Document) -> None:
|
||||||
"""Set up basic document styles."""
|
"""Set up basic document styles."""
|
||||||
try:
|
try:
|
||||||
# Set default font
|
# Set default font
|
||||||
|
|
@ -189,7 +189,7 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _clear_template_content(self, doc: Document) -> None:
|
def _clearTemplateContent(self, doc: Document) -> None:
|
||||||
"""Clear template content while preserving styles."""
|
"""Clear template content while preserving styles."""
|
||||||
try:
|
try:
|
||||||
# Remove all paragraphs except keep the styles
|
# Remove all paragraphs except keep the styles
|
||||||
|
|
@ -204,7 +204,7 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not clear template content: {str(e)}")
|
self.logger.warning(f"Could not clear template content: {str(e)}")
|
||||||
|
|
||||||
def _render_json_section(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Render a single JSON section to DOCX using AI-generated styles."""
|
"""Render a single JSON section to DOCX using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
section_type = section.get("content_type", "paragraph")
|
section_type = section.get("content_type", "paragraph")
|
||||||
|
|
@ -213,27 +213,27 @@ class RendererDocx(BaseRenderer):
|
||||||
# Process each element in the section
|
# Process each element in the section
|
||||||
for element in elements:
|
for element in elements:
|
||||||
if section_type == "table":
|
if section_type == "table":
|
||||||
self._render_json_table(doc, element, styles)
|
self._renderJsonTable(doc, element, styles)
|
||||||
elif section_type == "bullet_list":
|
elif section_type == "bullet_list":
|
||||||
self._render_json_bullet_list(doc, element, styles)
|
self._renderJsonBulletList(doc, element, styles)
|
||||||
elif section_type == "heading":
|
elif section_type == "heading":
|
||||||
self._render_json_heading(doc, element, styles)
|
self._renderJsonHeading(doc, element, styles)
|
||||||
elif section_type == "paragraph":
|
elif section_type == "paragraph":
|
||||||
self._render_json_paragraph(doc, element, styles)
|
self._renderJsonParagraph(doc, element, styles)
|
||||||
elif section_type == "code_block":
|
elif section_type == "code_block":
|
||||||
self._render_json_code_block(doc, element, styles)
|
self._renderJsonCodeBlock(doc, element, styles)
|
||||||
elif section_type == "image":
|
elif section_type == "image":
|
||||||
self._render_json_image(doc, element, styles)
|
self._renderJsonImage(doc, element, styles)
|
||||||
else:
|
else:
|
||||||
# Fallback to paragraph for unknown types
|
# Fallback to paragraph for unknown types
|
||||||
self._render_json_paragraph(doc, element, styles)
|
self._renderJsonParagraph(doc, element, styles)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
|
self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
|
||||||
# Add error paragraph as fallback
|
# Add error paragraph as fallback
|
||||||
error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
|
error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
|
||||||
|
|
||||||
def _render_json_table(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _renderJsonTable(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Render a JSON table to DOCX using AI-generated styles."""
|
"""Render a JSON table to DOCX using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
headers = table_data.get("headers", [])
|
headers = table_data.get("headers", [])
|
||||||
|
|
@ -249,7 +249,7 @@ class RendererDocx(BaseRenderer):
|
||||||
# Apply table borders based on AI style
|
# Apply table borders based on AI style
|
||||||
border_style = styles["table_border"]["style"]
|
border_style = styles["table_border"]["style"]
|
||||||
if border_style == "horizontal_only":
|
if border_style == "horizontal_only":
|
||||||
self._apply_horizontal_borders_only(table)
|
self._applyHorizontalBordersOnly(table)
|
||||||
elif border_style == "grid":
|
elif border_style == "grid":
|
||||||
table.style = 'Table Grid'
|
table.style = 'Table Grid'
|
||||||
# else: no borders
|
# else: no borders
|
||||||
|
|
@ -264,7 +264,7 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
# Apply background color
|
# Apply background color
|
||||||
bg_color = header_style["background"].lstrip('#')
|
bg_color = header_style["background"].lstrip('#')
|
||||||
self._set_cell_background(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
|
self._setCellBackground(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
|
||||||
|
|
||||||
# Apply text styling
|
# Apply text styling
|
||||||
for paragraph in cell.paragraphs:
|
for paragraph in cell.paragraphs:
|
||||||
|
|
@ -296,7 +296,7 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||||
|
|
||||||
def _apply_horizontal_borders_only(self, table) -> None:
|
def _applyHorizontalBordersOnly(self, table) -> None:
|
||||||
"""Apply only horizontal borders to the table (no vertical borders)."""
|
"""Apply only horizontal borders to the table (no vertical borders)."""
|
||||||
try:
|
try:
|
||||||
from docx.oxml.shared import OxmlElement, qn
|
from docx.oxml.shared import OxmlElement, qn
|
||||||
|
|
@ -359,7 +359,7 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not apply horizontal borders: {str(e)}")
|
self.logger.warning(f"Could not apply horizontal borders: {str(e)}")
|
||||||
|
|
||||||
def _set_cell_background(self, cell, color: RGBColor) -> None:
|
def _setCellBackground(self, cell, color: RGBColor) -> None:
|
||||||
"""Set the background color of a table cell."""
|
"""Set the background color of a table cell."""
|
||||||
try:
|
try:
|
||||||
from docx.oxml.shared import OxmlElement, qn
|
from docx.oxml.shared import OxmlElement, qn
|
||||||
|
|
@ -389,7 +389,7 @@ class RendererDocx(BaseRenderer):
|
||||||
self.logger.warning(f"Could not set cell background: {str(e)}")
|
self.logger.warning(f"Could not set cell background: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
def _render_json_bullet_list(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _renderJsonBulletList(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Render a JSON bullet list to DOCX using AI-generated styles."""
|
"""Render a JSON bullet list to DOCX using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
items = list_data.get("items", [])
|
items = list_data.get("items", [])
|
||||||
|
|
@ -404,7 +404,7 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||||
|
|
||||||
def _render_json_heading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _renderJsonHeading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Render a JSON heading to DOCX using AI-generated styles."""
|
"""Render a JSON heading to DOCX using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
level = heading_data.get("level", 1)
|
level = heading_data.get("level", 1)
|
||||||
|
|
@ -417,7 +417,7 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||||
|
|
||||||
def _render_json_paragraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _renderJsonParagraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Render a JSON paragraph to DOCX using AI-generated styles."""
|
"""Render a JSON paragraph to DOCX using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
text = paragraph_data.get("text", "")
|
text = paragraph_data.get("text", "")
|
||||||
|
|
@ -428,7 +428,7 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||||
|
|
||||||
def _render_json_code_block(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _renderJsonCodeBlock(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Render a JSON code block to DOCX using AI-generated styles."""
|
"""Render a JSON code block to DOCX using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
code = code_data.get("code", "")
|
code = code_data.get("code", "")
|
||||||
|
|
@ -447,7 +447,7 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||||
|
|
||||||
def _render_json_image(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _renderJsonImage(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Render a JSON image to DOCX."""
|
"""Render a JSON image to DOCX."""
|
||||||
try:
|
try:
|
||||||
base64_data = image_data.get("base64Data", "")
|
base64_data = image_data.get("base64Data", "")
|
||||||
|
|
@ -465,7 +465,7 @@ class RendererDocx(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||||
doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]")
|
doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]")
|
||||||
|
|
||||||
def _extract_structure_from_prompt(self, user_prompt: str, title: str) -> Dict[str, Any]:
|
def _extractStructureFromPrompt(self, userPrompt: str, title: str) -> Dict[str, Any]:
|
||||||
"""Extract document structure from user prompt."""
|
"""Extract document structure from user prompt."""
|
||||||
structure = {
|
structure = {
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|
@ -473,21 +473,21 @@ class RendererDocx(BaseRenderer):
|
||||||
'format': 'standard'
|
'format': 'standard'
|
||||||
}
|
}
|
||||||
|
|
||||||
if not user_prompt:
|
if not userPrompt:
|
||||||
return structure
|
return structure
|
||||||
|
|
||||||
# Extract title from prompt if not provided
|
# Extract title from prompt if not provided
|
||||||
if not title or title == "Generated Document":
|
if not title or title == "Generated Document":
|
||||||
# Look for "create a ... document" or "generate a ... report"
|
# Look for "create a ... document" or "generate a ... report"
|
||||||
import re
|
import re
|
||||||
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', user_prompt.lower())
|
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
|
||||||
if title_match:
|
if title_match:
|
||||||
structure['title'] = title_match.group(1).strip().title()
|
structure['title'] = title_match.group(1).strip().title()
|
||||||
|
|
||||||
# Extract sections from numbered lists in prompt
|
# Extract sections from numbered lists in prompt
|
||||||
import re
|
import re
|
||||||
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
|
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
|
||||||
sections = re.findall(section_pattern, user_prompt)
|
sections = re.findall(section_pattern, userPrompt)
|
||||||
|
|
||||||
for num, section_text in sections:
|
for num, section_text in sections:
|
||||||
structure['sections'].append({
|
structure['sections'].append({
|
||||||
|
|
@ -498,7 +498,7 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
# If no numbered sections found, try to extract from "including:" patterns
|
# If no numbered sections found, try to extract from "including:" patterns
|
||||||
if not structure['sections']:
|
if not structure['sections']:
|
||||||
including_match = re.search(r'including:\s*(.+?)(?:\.|$)', user_prompt, re.DOTALL)
|
including_match = re.search(r'including:\s*(.+?)(?:\.|$)', userPrompt, re.DOTALL)
|
||||||
if including_match:
|
if including_match:
|
||||||
including_text = including_match.group(1)
|
including_text = including_match.group(1)
|
||||||
# Split by common separators
|
# Split by common separators
|
||||||
|
|
@ -516,7 +516,7 @@ class RendererDocx(BaseRenderer):
|
||||||
if not structure['sections']:
|
if not structure['sections']:
|
||||||
# Look for bullet points or dashes
|
# Look for bullet points or dashes
|
||||||
bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
|
bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
|
||||||
bullets = re.findall(bullet_pattern, user_prompt)
|
bullets = re.findall(bullet_pattern, userPrompt)
|
||||||
for i, bullet in enumerate(bullets, 1):
|
for i, bullet in enumerate(bullets, 1):
|
||||||
bullet = bullet.strip()
|
bullet = bullet.strip()
|
||||||
if bullet and len(bullet) > 3:
|
if bullet and len(bullet) > 3:
|
||||||
|
|
@ -529,7 +529,7 @@ class RendererDocx(BaseRenderer):
|
||||||
# If still no sections, extract from sentence structure
|
# If still no sections, extract from sentence structure
|
||||||
if not structure['sections']:
|
if not structure['sections']:
|
||||||
# Split prompt into sentences and use as sections
|
# Split prompt into sentences and use as sections
|
||||||
sentences = re.split(r'[.!?]\s+', user_prompt)
|
sentences = re.split(r'[.!?]\s+', userPrompt)
|
||||||
for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections
|
for i, sentence in enumerate(sentences[:5], 1): # Max 5 sections
|
||||||
sentence = sentence.strip()
|
sentence = sentence.strip()
|
||||||
if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
|
if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
|
||||||
|
|
@ -545,7 +545,7 @@ class RendererDocx(BaseRenderer):
|
||||||
action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
|
action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
|
||||||
found_actions = []
|
found_actions = []
|
||||||
for action in action_words:
|
for action in action_words:
|
||||||
if action in user_prompt.lower():
|
if action in userPrompt.lower():
|
||||||
found_actions.append(action.title())
|
found_actions.append(action.title())
|
||||||
|
|
||||||
if found_actions:
|
if found_actions:
|
||||||
|
|
@ -565,7 +565,7 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
return structure
|
return structure
|
||||||
|
|
||||||
def _generate_from_structure(self, doc, content: str, structure: Dict[str, Any]):
|
def _generateFromStructure(self, doc, content: str, structure: Dict[str, Any]):
|
||||||
"""Generate DOCX content based on extracted structure."""
|
"""Generate DOCX content based on extracted structure."""
|
||||||
# Add sections based on prompt structure
|
# Add sections based on prompt structure
|
||||||
for section in structure['sections']:
|
for section in structure['sections']:
|
||||||
|
|
@ -574,7 +574,7 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
# Add AI-generated content for this section
|
# Add AI-generated content for this section
|
||||||
# Try to extract relevant content for this section from the AI response
|
# Try to extract relevant content for this section from the AI response
|
||||||
section_content = self._extract_section_content(content, section['title'])
|
section_content = self._extractSectionContent(content, section['title'])
|
||||||
|
|
||||||
if section_content:
|
if section_content:
|
||||||
doc.add_paragraph(section_content)
|
doc.add_paragraph(section_content)
|
||||||
|
|
@ -590,7 +590,7 @@ class RendererDocx(BaseRenderer):
|
||||||
doc.add_heading("Complete Analysis", level=1)
|
doc.add_heading("Complete Analysis", level=1)
|
||||||
doc.add_paragraph(content)
|
doc.add_paragraph(content)
|
||||||
|
|
||||||
def _extract_section_content(self, content: str, section_title: str) -> str:
|
def _extractSectionContent(self, content: str, section_title: str) -> str:
|
||||||
"""Extract relevant content for a specific section from AI response."""
|
"""Extract relevant content for a specific section from AI response."""
|
||||||
if not content or not section_title:
|
if not content or not section_title:
|
||||||
return ""
|
return ""
|
||||||
|
|
@ -613,7 +613,7 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _setup_document_styles(self, doc):
|
def _setupDocumentStyles(self, doc):
|
||||||
"""Set up document styles."""
|
"""Set up document styles."""
|
||||||
try:
|
try:
|
||||||
# Set default font
|
# Set default font
|
||||||
|
|
@ -632,7 +632,7 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not set up document styles: {str(e)}")
|
self.logger.warning(f"Could not set up document styles: {str(e)}")
|
||||||
|
|
||||||
def _process_section(self, doc, lines: list):
|
def _processSection(self, doc, lines: list):
|
||||||
"""Process a section of content into DOCX elements."""
|
"""Process a section of content into DOCX elements."""
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if not line.strip():
|
if not line.strip():
|
||||||
|
|
@ -641,9 +641,9 @@ class RendererDocx(BaseRenderer):
|
||||||
# Check for tables (lines with |)
|
# Check for tables (lines with |)
|
||||||
if '|' in line and not line.startswith('|'):
|
if '|' in line and not line.startswith('|'):
|
||||||
# This might be part of a table, process as table
|
# This might be part of a table, process as table
|
||||||
table_data = self._extract_table_data(lines)
|
table_data = self._extractTableData(lines)
|
||||||
if table_data:
|
if table_data:
|
||||||
self._add_table(doc, table_data)
|
self._addTable(doc, table_data)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check for lists
|
# Check for lists
|
||||||
|
|
@ -657,7 +657,7 @@ class RendererDocx(BaseRenderer):
|
||||||
# Regular paragraph
|
# Regular paragraph
|
||||||
doc.add_paragraph(line)
|
doc.add_paragraph(line)
|
||||||
|
|
||||||
def _extract_table_data(self, lines: list) -> list:
|
def _extractTableData(self, lines: list) -> list:
|
||||||
"""Extract table data from lines."""
|
"""Extract table data from lines."""
|
||||||
table_data = []
|
table_data = []
|
||||||
in_table = False
|
in_table = False
|
||||||
|
|
@ -676,7 +676,7 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
return table_data if len(table_data) > 1 else []
|
return table_data if len(table_data) > 1 else []
|
||||||
|
|
||||||
def _add_table(self, doc, table_data: list):
|
def _addTable(self, doc, table_data: list):
|
||||||
"""Add a table to the document."""
|
"""Add a table to the document."""
|
||||||
try:
|
try:
|
||||||
if not table_data:
|
if not table_data:
|
||||||
|
|
@ -693,12 +693,12 @@ class RendererDocx(BaseRenderer):
|
||||||
table.rows[row_idx].cells[col_idx].text = cell_data
|
table.rows[row_idx].cells[col_idx].text = cell_data
|
||||||
|
|
||||||
# Style the table
|
# Style the table
|
||||||
self._style_table(table)
|
self._styleTable(table)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not add table: {str(e)}")
|
self.logger.warning(f"Could not add table: {str(e)}")
|
||||||
|
|
||||||
def _style_table(self, table):
|
def _styleTable(self, table):
|
||||||
"""Apply styling to the table."""
|
"""Apply styling to the table."""
|
||||||
try:
|
try:
|
||||||
# Style header row
|
# Style header row
|
||||||
|
|
@ -711,7 +711,7 @@ class RendererDocx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not style table: {str(e)}")
|
self.logger.warning(f"Could not style table: {str(e)}")
|
||||||
|
|
||||||
def _process_table_row(self, doc, line: str):
|
def _processTableRow(self, doc, line: str):
|
||||||
"""Process a table row and add it to the document."""
|
"""Process a table row and add it to the document."""
|
||||||
if not line.strip():
|
if not line.strip():
|
||||||
return
|
return
|
||||||
|
|
@ -745,7 +745,7 @@ class RendererDocx(BaseRenderer):
|
||||||
# Not a table row, treat as regular text
|
# Not a table row, treat as regular text
|
||||||
doc.add_paragraph(line)
|
doc.add_paragraph(line)
|
||||||
|
|
||||||
def _clean_ai_content(self, content: str) -> str:
|
def _cleanAiContent(self, content: str) -> str:
|
||||||
"""Clean AI-generated content by removing debug information and duplicates."""
|
"""Clean AI-generated content by removing debug information and duplicates."""
|
||||||
if not content:
|
if not content:
|
||||||
return ""
|
return ""
|
||||||
|
|
@ -781,7 +781,7 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
return '\n\n'.join(unique_sections)
|
return '\n\n'.join(unique_sections)
|
||||||
|
|
||||||
def _process_tables(self, doc, content: str) -> str:
|
def _processTables(self, doc, content: str) -> str:
|
||||||
"""
|
"""
|
||||||
Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
|
Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
|
||||||
Returns the content with tables replaced by placeholders.
|
Returns the content with tables replaced by placeholders.
|
||||||
|
|
@ -864,13 +864,13 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
return '\n'.join(processed_lines)
|
return '\n'.join(processed_lines)
|
||||||
|
|
||||||
def _parse_and_format_content(self, doc, content: str, title: str):
|
def _parseAndFormatContent(self, doc, content: str, title: str):
|
||||||
"""Parse AI-generated content in standardized format and apply proper DOCX formatting."""
|
"""Parse AI-generated content in standardized format and apply proper DOCX formatting."""
|
||||||
if not content:
|
if not content:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Process tables and replace them with placeholders
|
# Process tables and replace them with placeholders
|
||||||
content = self._process_tables(doc, content)
|
content = self._processTables(doc, content)
|
||||||
|
|
||||||
# Parse content line by line in exact sequence
|
# Parse content line by line in exact sequence
|
||||||
lines = content.split('\n')
|
lines = content.split('\n')
|
||||||
|
|
@ -920,9 +920,9 @@ class RendererDocx(BaseRenderer):
|
||||||
|
|
||||||
# Regular paragraph
|
# Regular paragraph
|
||||||
else:
|
else:
|
||||||
self._add_paragraph_to_doc(doc, line)
|
self._addParagraphToDoc(doc, line)
|
||||||
|
|
||||||
def _add_paragraph_to_doc(self, doc, text: str):
|
def _addParagraphToDoc(self, doc, text: str):
|
||||||
"""Add a paragraph to the document with proper formatting."""
|
"""Add a paragraph to the document with proper formatting."""
|
||||||
if not text.strip():
|
if not text.strip():
|
||||||
return
|
return
|
||||||
|
|
|
||||||
|
|
@ -9,97 +9,97 @@ class RendererHtml(BaseRenderer):
|
||||||
"""Renders content to HTML format with format-specific extraction."""
|
"""Renders content to HTML format with format-specific extraction."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""Return supported HTML formats."""
|
"""Return supported HTML formats."""
|
||||||
return ['html', 'htm']
|
return ['html', 'htm']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""Return format aliases."""
|
"""Return format aliases."""
|
||||||
return ['web', 'webpage']
|
return ['web', 'webpage']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""Return priority for HTML renderer."""
|
"""Return priority for HTML renderer."""
|
||||||
return 100
|
return 100
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
|
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
|
||||||
try:
|
try:
|
||||||
# Generate HTML using AI-analyzed styling
|
# Generate HTML using AI-analyzed styling
|
||||||
html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service)
|
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
return html_content, "text/html"
|
return htmlContent, "text/html"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering HTML: {str(e)}")
|
self.logger.error(f"Error rendering HTML: {str(e)}")
|
||||||
# Return minimal HTML fallback
|
# Return minimal HTML fallback
|
||||||
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
||||||
|
|
||||||
async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Generate HTML content from structured JSON document using AI-generated styling."""
|
"""Generate HTML content from structured JSON document using AI-generated styling."""
|
||||||
try:
|
try:
|
||||||
# Get AI-generated styling definitions
|
# Get AI-generated styling definitions
|
||||||
styles = await self._get_html_styles(user_prompt, ai_service)
|
styles = await self._getHtmlStyles(userPrompt, aiService)
|
||||||
|
|
||||||
# Validate JSON structure
|
# Validate JSON structure
|
||||||
if not isinstance(json_content, dict):
|
if not isinstance(jsonContent, dict):
|
||||||
raise ValueError("JSON content must be a dictionary")
|
raise ValueError("JSON content must be a dictionary")
|
||||||
|
|
||||||
if "sections" not in json_content:
|
if "sections" not in jsonContent:
|
||||||
raise ValueError("JSON content must contain 'sections' field")
|
raise ValueError("JSON content must contain 'sections' field")
|
||||||
|
|
||||||
# Use title from JSON metadata if available, otherwise use provided title
|
# Use title from JSON metadata if available, otherwise use provided title
|
||||||
document_title = json_content.get("metadata", {}).get("title", title)
|
documentTitle = jsonContent.get("metadata", {}).get("title", title)
|
||||||
|
|
||||||
# Build HTML document
|
# Build HTML document
|
||||||
html_parts = []
|
htmlParts = []
|
||||||
|
|
||||||
# HTML document structure
|
# HTML document structure
|
||||||
html_parts.append('<!DOCTYPE html>')
|
htmlParts.append('<!DOCTYPE html>')
|
||||||
html_parts.append('<html lang="en">')
|
htmlParts.append('<html lang="en">')
|
||||||
html_parts.append('<head>')
|
htmlParts.append('<head>')
|
||||||
html_parts.append('<meta charset="UTF-8">')
|
htmlParts.append('<meta charset="UTF-8">')
|
||||||
html_parts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
|
htmlParts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
|
||||||
html_parts.append(f'<title>{document_title}</title>')
|
htmlParts.append(f'<title>{documentTitle}</title>')
|
||||||
html_parts.append('<style>')
|
htmlParts.append('<style>')
|
||||||
html_parts.append(self._generate_css_styles(styles))
|
htmlParts.append(self._generateCssStyles(styles))
|
||||||
html_parts.append('</style>')
|
htmlParts.append('</style>')
|
||||||
html_parts.append('</head>')
|
htmlParts.append('</head>')
|
||||||
html_parts.append('<body>')
|
htmlParts.append('<body>')
|
||||||
|
|
||||||
# Document header
|
# Document header
|
||||||
html_parts.append(f'<header><h1 class="document-title">{document_title}</h1></header>')
|
htmlParts.append(f'<header><h1 class="document-title">{documentTitle}</h1></header>')
|
||||||
|
|
||||||
# Main content
|
# Main content
|
||||||
html_parts.append('<main>')
|
htmlParts.append('<main>')
|
||||||
|
|
||||||
# Process each section
|
# Process each section
|
||||||
sections = json_content.get("sections", [])
|
sections = jsonContent.get("sections", [])
|
||||||
for section in sections:
|
for section in sections:
|
||||||
section_html = self._render_json_section(section, styles)
|
sectionHtml = self._renderJsonSection(section, styles)
|
||||||
if section_html:
|
if sectionHtml:
|
||||||
html_parts.append(section_html)
|
htmlParts.append(sectionHtml)
|
||||||
|
|
||||||
html_parts.append('</main>')
|
htmlParts.append('</main>')
|
||||||
|
|
||||||
# Footer
|
# Footer
|
||||||
html_parts.append('<footer>')
|
htmlParts.append('<footer>')
|
||||||
html_parts.append(f'<p class="generated-info">Generated: {self._format_timestamp()}</p>')
|
htmlParts.append(f'<p class="generated-info">Generated: {self._formatTimestamp()}</p>')
|
||||||
html_parts.append('</footer>')
|
htmlParts.append('</footer>')
|
||||||
|
|
||||||
html_parts.append('</body>')
|
htmlParts.append('</body>')
|
||||||
html_parts.append('</html>')
|
htmlParts.append('</html>')
|
||||||
|
|
||||||
return '\n'.join(html_parts)
|
return '\n'.join(htmlParts)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error generating HTML from JSON: {str(e)}")
|
self.logger.error(f"Error generating HTML from JSON: {str(e)}")
|
||||||
raise Exception(f"HTML generation failed: {str(e)}")
|
raise Exception(f"HTML generation failed: {str(e)}")
|
||||||
|
|
||||||
async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
async def _getHtmlStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
|
||||||
"""Get HTML styling definitions using base template AI styling."""
|
"""Get HTML styling definitions using base template AI styling."""
|
||||||
style_schema = {
|
styleSchema = {
|
||||||
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
|
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
|
||||||
"heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
|
"heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
|
||||||
"heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
|
"heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
|
||||||
|
|
@ -113,40 +113,40 @@ class RendererHtml(BaseRenderer):
|
||||||
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
|
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
|
||||||
}
|
}
|
||||||
|
|
||||||
style_template = self._create_ai_style_template("html", user_prompt, style_schema)
|
styleTemplate = self._createAiStyleTemplate("html", userPrompt, styleSchema)
|
||||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_html_styles())
|
styles = await self._getAiStyles(aiService, styleTemplate, self._getDefaultHtmlStyles())
|
||||||
|
|
||||||
# Validate and fix contrast issues
|
# Validate and fix contrast issues
|
||||||
return self._validate_html_styles_contrast(styles)
|
return self._validateHtmlStylesContrast(styles)
|
||||||
|
|
||||||
def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _validateHtmlStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Validate and fix contrast issues in AI-generated styles."""
|
"""Validate and fix contrast issues in AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
# Fix table header contrast
|
# Fix table header contrast
|
||||||
if "table_header" in styles:
|
if "table_header" in styles:
|
||||||
header = styles["table_header"]
|
header = styles["table_header"]
|
||||||
bg_color = header.get("background", "#FFFFFF")
|
bgColor = header.get("background", "#FFFFFF")
|
||||||
text_color = header.get("color", "#000000")
|
textColor = header.get("color", "#000000")
|
||||||
|
|
||||||
# If both are white or both are dark, fix it
|
# If both are white or both are dark, fix it
|
||||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
||||||
header["background"] = "#4F4F4F"
|
header["background"] = "#4F4F4F"
|
||||||
header["color"] = "#FFFFFF"
|
header["color"] = "#FFFFFF"
|
||||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
||||||
header["background"] = "#4F4F4F"
|
header["background"] = "#4F4F4F"
|
||||||
header["color"] = "#FFFFFF"
|
header["color"] = "#FFFFFF"
|
||||||
|
|
||||||
# Fix table cell contrast
|
# Fix table cell contrast
|
||||||
if "table_cell" in styles:
|
if "table_cell" in styles:
|
||||||
cell = styles["table_cell"]
|
cell = styles["table_cell"]
|
||||||
bg_color = cell.get("background", "#FFFFFF")
|
bgColor = cell.get("background", "#FFFFFF")
|
||||||
text_color = cell.get("color", "#000000")
|
textColor = cell.get("color", "#000000")
|
||||||
|
|
||||||
# If both are white or both are dark, fix it
|
# If both are white or both are dark, fix it
|
||||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
||||||
cell["background"] = "#FFFFFF"
|
cell["background"] = "#FFFFFF"
|
||||||
cell["color"] = "#2F2F2F"
|
cell["color"] = "#2F2F2F"
|
||||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
||||||
cell["background"] = "#FFFFFF"
|
cell["background"] = "#FFFFFF"
|
||||||
cell["color"] = "#2F2F2F"
|
cell["color"] = "#2F2F2F"
|
||||||
|
|
||||||
|
|
@ -154,10 +154,10 @@ class RendererHtml(BaseRenderer):
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||||
return self._get_default_html_styles()
|
return self._getDefaultHtmlStyles()
|
||||||
|
|
||||||
|
|
||||||
def _get_default_html_styles(self) -> Dict[str, Any]:
|
def _getDefaultHtmlStyles(self) -> Dict[str, Any]:
|
||||||
"""Default HTML styles."""
|
"""Default HTML styles."""
|
||||||
return {
|
return {
|
||||||
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
|
"title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
|
||||||
|
|
@ -173,7 +173,7 @@ class RendererHtml(BaseRenderer):
|
||||||
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
|
"body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _generate_css_styles(self, styles: Dict[str, Any]) -> str:
|
def _generateCssStyles(self, styles: Dict[str, Any]) -> str:
|
||||||
"""Generate CSS from style definitions."""
|
"""Generate CSS from style definitions."""
|
||||||
css_parts = []
|
css_parts = []
|
||||||
|
|
||||||
|
|
@ -271,109 +271,109 @@ class RendererHtml(BaseRenderer):
|
||||||
|
|
||||||
return '\n'.join(css_parts)
|
return '\n'.join(css_parts)
|
||||||
|
|
||||||
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||||
"""Render a single JSON section to HTML using AI-generated styles."""
|
"""Render a single JSON section to HTML using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
section_type = self._get_section_type(section)
|
sectionType = self._getSectionType(section)
|
||||||
section_data = self._get_section_data(section)
|
sectionData = self._getSectionData(section)
|
||||||
|
|
||||||
if section_type == "table":
|
if sectionType == "table":
|
||||||
# Process the section data to extract table structure
|
# Process the section data to extract table structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_table(processed_data, styles)
|
return self._renderJsonTable(processedData, styles)
|
||||||
elif section_type == "bullet_list":
|
elif sectionType == "bullet_list":
|
||||||
# Process the section data to extract bullet list structure
|
# Process the section data to extract bullet list structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_bullet_list(processed_data, styles)
|
return self._renderJsonBulletList(processedData, styles)
|
||||||
elif section_type == "heading":
|
elif sectionType == "heading":
|
||||||
return self._render_json_heading(section_data, styles)
|
return self._renderJsonHeading(sectionData, styles)
|
||||||
elif section_type == "paragraph":
|
elif sectionType == "paragraph":
|
||||||
return self._render_json_paragraph(section_data, styles)
|
return self._renderJsonParagraph(sectionData, styles)
|
||||||
elif section_type == "code_block":
|
elif sectionType == "code_block":
|
||||||
# Process the section data to extract code block structure
|
# Process the section data to extract code block structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_code_block(processed_data, styles)
|
return self._renderJsonCodeBlock(processedData, styles)
|
||||||
elif section_type == "image":
|
elif sectionType == "image":
|
||||||
# Process the section data to extract image structure
|
# Process the section data to extract image structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_image(processed_data, styles)
|
return self._renderJsonImage(processedData, styles)
|
||||||
else:
|
else:
|
||||||
# Fallback to paragraph for unknown types
|
# Fallback to paragraph for unknown types
|
||||||
return self._render_json_paragraph(section_data, styles)
|
return self._renderJsonParagraph(sectionData, styles)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
||||||
return f'<div class="error">[Error rendering section: {str(e)}]</div>'
|
return f'<div class="error">[Error rendering section: {str(e)}]</div>'
|
||||||
|
|
||||||
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
def _renderJsonTable(self, tableData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON table to HTML using AI-generated styles."""
|
"""Render a JSON table to HTML using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
headers = table_data.get("headers", [])
|
headers = tableData.get("headers", [])
|
||||||
rows = table_data.get("rows", [])
|
rows = tableData.get("rows", [])
|
||||||
|
|
||||||
if not headers or not rows:
|
if not headers or not rows:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
html_parts = ['<table>']
|
htmlParts = ['<table>']
|
||||||
|
|
||||||
# Table header
|
# Table header
|
||||||
html_parts.append('<thead><tr>')
|
htmlParts.append('<thead><tr>')
|
||||||
for header in headers:
|
for header in headers:
|
||||||
html_parts.append(f'<th>{header}</th>')
|
htmlParts.append(f'<th>{header}</th>')
|
||||||
html_parts.append('</tr></thead>')
|
htmlParts.append('</tr></thead>')
|
||||||
|
|
||||||
# Table body
|
# Table body
|
||||||
html_parts.append('<tbody>')
|
htmlParts.append('<tbody>')
|
||||||
for row in rows:
|
for row in rows:
|
||||||
html_parts.append('<tr>')
|
htmlParts.append('<tr>')
|
||||||
for cell_data in row:
|
for cellData in row:
|
||||||
html_parts.append(f'<td>{cell_data}</td>')
|
htmlParts.append(f'<td>{cellData}</td>')
|
||||||
html_parts.append('</tr>')
|
htmlParts.append('</tr>')
|
||||||
html_parts.append('</tbody>')
|
htmlParts.append('</tbody>')
|
||||||
|
|
||||||
html_parts.append('</table>')
|
htmlParts.append('</table>')
|
||||||
return '\n'.join(html_parts)
|
return '\n'.join(htmlParts)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
def _renderJsonBulletList(self, listData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON bullet list to HTML using AI-generated styles."""
|
"""Render a JSON bullet list to HTML using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
items = list_data.get("items", [])
|
items = listData.get("items", [])
|
||||||
|
|
||||||
if not items:
|
if not items:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
html_parts = ['<ul>']
|
htmlParts = ['<ul>']
|
||||||
for item in items:
|
for item in items:
|
||||||
if isinstance(item, str):
|
if isinstance(item, str):
|
||||||
html_parts.append(f'<li>{item}</li>')
|
htmlParts.append(f'<li>{item}</li>')
|
||||||
elif isinstance(item, dict) and "text" in item:
|
elif isinstance(item, dict) and "text" in item:
|
||||||
html_parts.append(f'<li>{item["text"]}</li>')
|
htmlParts.append(f'<li>{item["text"]}</li>')
|
||||||
html_parts.append('</ul>')
|
htmlParts.append('</ul>')
|
||||||
|
|
||||||
return '\n'.join(html_parts)
|
return '\n'.join(htmlParts)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
def _renderJsonHeading(self, headingData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON heading to HTML using AI-generated styles."""
|
"""Render a JSON heading to HTML using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
# Normalize non-dict inputs
|
# Normalize non-dict inputs
|
||||||
if isinstance(heading_data, str):
|
if isinstance(headingData, str):
|
||||||
heading_data = {"text": heading_data, "level": 2}
|
headingData = {"text": headingData, "level": 2}
|
||||||
elif isinstance(heading_data, list):
|
elif isinstance(headingData, list):
|
||||||
# Render a list as bullet list under a default heading label
|
# Render a list as bullet list under a default heading label
|
||||||
return self._render_json_bullet_list({"items": heading_data}, styles)
|
return self._renderJsonBulletList({"items": headingData}, styles)
|
||||||
elif not isinstance(heading_data, dict):
|
elif not isinstance(headingData, dict):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
level = heading_data.get("level", 1)
|
level = headingData.get("level", 1)
|
||||||
text = heading_data.get("text", "")
|
text = headingData.get("text", "")
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
level = max(1, min(6, level))
|
level = max(1, min(6, level))
|
||||||
|
|
@ -385,19 +385,19 @@ class RendererHtml(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
def _renderJsonParagraph(self, paragraphData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON paragraph to HTML using AI-generated styles."""
|
"""Render a JSON paragraph to HTML using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
# Normalize non-dict inputs
|
# Normalize non-dict inputs
|
||||||
if isinstance(paragraph_data, str):
|
if isinstance(paragraphData, str):
|
||||||
paragraph_data = {"text": paragraph_data}
|
paragraphData = {"text": paragraphData}
|
||||||
elif isinstance(paragraph_data, list):
|
elif isinstance(paragraphData, list):
|
||||||
# Treat list as bullet list paragraph
|
# Treat list as bullet list paragraph
|
||||||
return self._render_json_bullet_list({"items": paragraph_data}, styles)
|
return self._renderJsonBulletList({"items": paragraphData}, styles)
|
||||||
elif not isinstance(paragraph_data, dict):
|
elif not isinstance(paragraphData, dict):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
text = paragraph_data.get("text", "")
|
text = paragraphData.get("text", "")
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
return f'<p>{text}</p>'
|
return f'<p>{text}</p>'
|
||||||
|
|
@ -408,11 +408,11 @@ class RendererHtml(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
def _renderJsonCodeBlock(self, codeData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON code block to HTML using AI-generated styles."""
|
"""Render a JSON code block to HTML using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
code = code_data.get("code", "")
|
code = codeData.get("code", "")
|
||||||
language = code_data.get("language", "")
|
language = codeData.get("language", "")
|
||||||
|
|
||||||
if code:
|
if code:
|
||||||
if language:
|
if language:
|
||||||
|
|
@ -426,17 +426,17 @@ class RendererHtml(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON image to HTML."""
|
"""Render a JSON image to HTML."""
|
||||||
try:
|
try:
|
||||||
base64_data = image_data.get("base64Data", "")
|
base64Data = imageData.get("base64Data", "")
|
||||||
alt_text = image_data.get("altText", "Image")
|
altText = imageData.get("altText", "Image")
|
||||||
|
|
||||||
if base64_data:
|
if base64Data:
|
||||||
return f'<img src="data:image/png;base64,{base64_data}" alt="{alt_text}">'
|
return f'<img src="data:image/png;base64,{base64Data}" alt="{altText}">'
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||||
return f'<div class="error">[Image: {image_data.get("altText", "Image")}]</div>'
|
return f'<div class="error">[Image: {imageData.get("altText", "Image")}]</div>'
|
||||||
|
|
|
||||||
|
|
@ -12,154 +12,156 @@ class RendererImage(BaseRenderer):
|
||||||
"""Renders content to image format using AI image generation."""
|
"""Renders content to image format using AI image generation."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""Return supported image formats."""
|
"""Return supported image formats."""
|
||||||
return ['png', 'jpg', 'jpeg', 'image']
|
return ['png', 'jpg', 'jpeg', 'image']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""Return format aliases."""
|
"""Return format aliases."""
|
||||||
return ['img', 'picture', 'photo', 'graphic']
|
return ['img', 'picture', 'photo', 'graphic']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""Return priority for image renderer."""
|
"""Return priority for image renderer."""
|
||||||
return 90
|
return 90
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""Render extracted JSON content to image format using AI image generation."""
|
"""Render extracted JSON content to image format using AI image generation."""
|
||||||
try:
|
try:
|
||||||
# Generate AI image from content
|
# Generate AI image from content
|
||||||
image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service)
|
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
return image_content, "image/png"
|
return imageContent, "image/png"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering image: {str(e)}")
|
self.logger.error(f"Error rendering image: {str(e)}")
|
||||||
# Re-raise the exception instead of using fallback
|
# Re-raise the exception instead of using fallback
|
||||||
raise Exception(f"Image rendering failed: {str(e)}")
|
raise Exception(f"Image rendering failed: {str(e)}")
|
||||||
|
|
||||||
async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
async def _generateAiImage(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Generate AI image from extracted content."""
|
"""Generate AI image from extracted content."""
|
||||||
try:
|
try:
|
||||||
if not ai_service:
|
if not aiService:
|
||||||
raise ValueError("AI service is required for image generation")
|
raise ValueError("AI service is required for image generation")
|
||||||
|
|
||||||
# Validate JSON structure
|
# Validate JSON structure
|
||||||
if not isinstance(extracted_content, dict):
|
if not isinstance(extractedContent, dict):
|
||||||
raise ValueError("Extracted content must be a dictionary")
|
raise ValueError("Extracted content must be a dictionary")
|
||||||
|
|
||||||
if "sections" not in extracted_content:
|
if "sections" not in extractedContent:
|
||||||
raise ValueError("Extracted content must contain 'sections' field")
|
raise ValueError("Extracted content must contain 'sections' field")
|
||||||
|
|
||||||
# Use title from JSON metadata if available, otherwise use provided title
|
# Use title from JSON metadata if available, otherwise use provided title
|
||||||
document_title = extracted_content.get("metadata", {}).get("title", title)
|
documentTitle = extractedContent.get("metadata", {}).get("title", title)
|
||||||
|
|
||||||
# Create AI prompt for image generation
|
# Create AI prompt for image generation
|
||||||
image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service)
|
imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)
|
||||||
|
|
||||||
# Save image generation prompt to debug
|
# Save image generation prompt to debug
|
||||||
ai_service.services.utils.writeDebugFile(image_prompt, "image_generation_prompt")
|
aiService.services.utils.writeDebugFile(imagePrompt, "image_generation_prompt")
|
||||||
|
|
||||||
# Generate image using AI
|
# Generate image using AI
|
||||||
image_result = await ai_service.aiObjects.generateImage(
|
imageResult = await aiService.aiObjects.generateImage(
|
||||||
prompt=image_prompt,
|
prompt=imagePrompt,
|
||||||
size="1024x1024",
|
size="1024x1024",
|
||||||
quality="standard",
|
quality="standard",
|
||||||
style="vivid"
|
style="vivid"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save image generation response to debug
|
# Save image generation response to debug
|
||||||
ai_service.services.utils.writeDebugFile(str(image_result), "image_generation_response")
|
aiService.services.utils.writeDebugFile(str(imageResult), "image_generation_response")
|
||||||
|
|
||||||
# Extract base64 image data from result
|
# Extract base64 image data from result
|
||||||
if image_result and image_result.get("success", False):
|
if imageResult and imageResult.get("success", False):
|
||||||
image_data = image_result.get("image_data", "")
|
imageData = imageResult.get("image_data", "")
|
||||||
if image_data:
|
if imageData:
|
||||||
return image_data
|
return imageData
|
||||||
else:
|
else:
|
||||||
raise ValueError("No image data returned from AI")
|
raise ValueError("No image data returned from AI")
|
||||||
else:
|
else:
|
||||||
error_msg = image_result.get("error", "Unknown error") if image_result else "No result"
|
errorMsg = imageResult.get("error", "Unknown error") if imageResult else "No result"
|
||||||
raise ValueError(f"AI image generation failed: {error_msg}")
|
raise ValueError(f"AI image generation failed: {errorMsg}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error generating AI image: {str(e)}")
|
self.logger.error(f"Error generating AI image: {str(e)}")
|
||||||
raise Exception(f"AI image generation failed: {str(e)}")
|
raise Exception(f"AI image generation failed: {str(e)}")
|
||||||
|
|
||||||
async def _create_imageGenerate_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
async def _createImageGeneratePrompt(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Create a detailed prompt for AI image generation based on the content."""
|
"""Create a detailed prompt for AI image generation based on the content."""
|
||||||
try:
|
try:
|
||||||
# Start with base prompt
|
# Start with base prompt
|
||||||
prompt_parts = []
|
promptParts = []
|
||||||
|
|
||||||
# Add user's original intent if available
|
# Add user's original intent if available
|
||||||
if user_prompt:
|
if userPrompt:
|
||||||
prompt_parts.append(f"User Request: {ai_service.sanitizePromptContent(user_prompt, 'userinput')}")
|
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
|
||||||
|
promptParts.append(f"User Request: {sanitized_prompt}")
|
||||||
|
|
||||||
# Add document title
|
# Add document title
|
||||||
prompt_parts.append(f"Document Title: {title}")
|
promptParts.append(f"Document Title: {title}")
|
||||||
|
|
||||||
# Analyze content and create visual description
|
# Analyze content and create visual description
|
||||||
sections = extracted_content.get("sections", [])
|
sections = extractedContent.get("sections", [])
|
||||||
content_description = self._analyze_content_for_visual_description(sections)
|
contentDescription = self._analyzeContentForVisualDescription(sections)
|
||||||
|
|
||||||
if content_description:
|
if contentDescription:
|
||||||
prompt_parts.append(f"Content to Visualize: {content_description}")
|
promptParts.append(f"Content to Visualize: {contentDescription}")
|
||||||
|
|
||||||
# Add style guidance
|
# Add style guidance
|
||||||
style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt)
|
styleGuidance = self._getStyleGuidanceFromContent(extractedContent, userPrompt)
|
||||||
if style_guidance:
|
if styleGuidance:
|
||||||
prompt_parts.append(f"Visual Style: {style_guidance}")
|
promptParts.append(f"Visual Style: {styleGuidance}")
|
||||||
|
|
||||||
# Combine all parts
|
# Combine all parts
|
||||||
full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts)
|
fullPrompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(promptParts)
|
||||||
|
|
||||||
# Add technical requirements
|
# Add technical requirements
|
||||||
full_prompt += "\n\nTechnical Requirements:"
|
fullPrompt += "\n\nTechnical Requirements:"
|
||||||
full_prompt += "\n- High quality, professional appearance"
|
fullPrompt += "\n- High quality, professional appearance"
|
||||||
full_prompt += "\n- Clear, readable text if any text is included"
|
fullPrompt += "\n- Clear, readable text if any text is included"
|
||||||
full_prompt += "\n- Appropriate colors and layout"
|
fullPrompt += "\n- Appropriate colors and layout"
|
||||||
full_prompt += "\n- Suitable for business/professional use"
|
fullPrompt += "\n- Suitable for business/professional use"
|
||||||
|
|
||||||
# Truncate prompt if it exceeds DALL-E's 4000 character limit
|
# Truncate prompt if it exceeds DALL-E's 4000 character limit
|
||||||
if len(full_prompt) > 4000:
|
if len(fullPrompt) > 4000:
|
||||||
# Use AI to compress the prompt intelligently
|
# Use AI to compress the prompt intelligently
|
||||||
compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service)
|
compressedPrompt = await self._compressPromptWithAi(fullPrompt, aiService)
|
||||||
if compressed_prompt and len(compressed_prompt) <= 4000:
|
if compressedPrompt and len(compressedPrompt) <= 4000:
|
||||||
return compressed_prompt
|
return compressedPrompt
|
||||||
|
|
||||||
# Fallback to minimal prompt if AI compression fails or is still too long
|
# Fallback to minimal prompt if AI compression fails or is still too long
|
||||||
minimal_prompt = f"Create a professional image representing: {title}"
|
minimalPrompt = f"Create a professional image representing: {title}"
|
||||||
if user_prompt:
|
if userPrompt:
|
||||||
minimal_prompt += f" - {ai_service.sanitizePromptContent(user_prompt, 'userinput')}"
|
sanitized_prompt = aiService.services.utils.sanitizePromptContent(userPrompt, 'userinput') if aiService else userPrompt
|
||||||
|
minimalPrompt += f" - {sanitized_prompt}"
|
||||||
|
|
||||||
# If even the minimal prompt is too long, truncate it
|
# If even the minimal prompt is too long, truncate it
|
||||||
if len(minimal_prompt) > 4000:
|
if len(minimalPrompt) > 4000:
|
||||||
minimal_prompt = minimal_prompt[:3997] + "..."
|
minimalPrompt = minimalPrompt[:3997] + "..."
|
||||||
|
|
||||||
return minimal_prompt
|
return minimalPrompt
|
||||||
|
|
||||||
return full_prompt
|
return fullPrompt
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error creating image prompt: {str(e)}")
|
self.logger.warning(f"Error creating image prompt: {str(e)}")
|
||||||
# Fallback to simple prompt
|
# Fallback to simple prompt
|
||||||
return f"Create a professional image representing: {title}"
|
return f"Create a professional image representing: {title}"
|
||||||
|
|
||||||
async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str:
|
async def _compressPromptWithAi(self, longPrompt: str, aiService=None) -> str:
|
||||||
"""Use AI to intelligently compress a long prompt while preserving key information."""
|
"""Use AI to intelligently compress a long prompt while preserving key information."""
|
||||||
try:
|
try:
|
||||||
if not ai_service:
|
if not aiService:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
compression_prompt = f"""
|
compressionPrompt = f"""
|
||||||
You are an expert at creating concise, effective prompts for AI image generation.
|
You are an expert at creating concise, effective prompts for AI image generation.
|
||||||
|
|
||||||
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
|
The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
|
||||||
|
|
||||||
Original prompt ({len(long_prompt)} characters):
|
Original prompt ({len(longPrompt)} characters):
|
||||||
{long_prompt}
|
{longPrompt}
|
||||||
|
|
||||||
Please create a compressed version that:
|
Please create a compressed version that:
|
||||||
1. Keeps the most important visual elements and requirements
|
1. Keeps the most important visual elements and requirements
|
||||||
|
|
@ -176,7 +178,7 @@ Return only the compressed prompt, no explanations.
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||||
|
|
||||||
request = AiCallRequest(
|
request = AiCallRequest(
|
||||||
prompt=compression_prompt,
|
prompt=compressionPrompt,
|
||||||
options=AiCallOptions(
|
options=AiCallOptions(
|
||||||
operationType=OperationTypeEnum.DATA_GENERATE,
|
operationType=OperationTypeEnum.DATA_GENERATE,
|
||||||
maxTokens=None, # Let the model use its full context length
|
maxTokens=None, # Let the model use its full context length
|
||||||
|
|
@ -184,12 +186,12 @@ Return only the compressed prompt, no explanations.
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await ai_service.aiObjects.call(request)
|
response = await aiService.aiObjects.call(request)
|
||||||
compressed = response.content.strip()
|
compressed = response.content.strip()
|
||||||
|
|
||||||
# Validate the compressed prompt
|
# Validate the compressed prompt
|
||||||
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
|
if compressed and len(compressed) <= 4000 and len(compressed) > 50:
|
||||||
self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters")
|
self.logger.info(f"Successfully compressed prompt from {len(longPrompt)} to {len(compressed)} characters")
|
||||||
return compressed
|
return compressed
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
|
self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
|
||||||
|
|
@ -199,42 +201,42 @@ Return only the compressed prompt, no explanations.
|
||||||
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
|
self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str:
|
def _analyzeContentForVisualDescription(self, sections: List[Dict[str, Any]]) -> str:
|
||||||
"""Analyze content sections and create a visual description for AI."""
|
"""Analyze content sections and create a visual description for AI."""
|
||||||
try:
|
try:
|
||||||
descriptions = []
|
descriptions = []
|
||||||
|
|
||||||
for section in sections:
|
for section in sections:
|
||||||
section_type = self._get_section_type(section)
|
sectionType = self._getSectionType(section)
|
||||||
section_data = self._get_section_data(section)
|
sectionData = self._getSectionData(section)
|
||||||
|
|
||||||
if section_type == "table":
|
if sectionType == "table":
|
||||||
headers = section_data.get("headers", [])
|
headers = sectionData.get("headers", [])
|
||||||
rows = section_data.get("rows", [])
|
rows = sectionData.get("rows", [])
|
||||||
if headers and rows:
|
if headers and rows:
|
||||||
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
|
descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
|
||||||
|
|
||||||
elif section_type == "bullet_list":
|
elif sectionType == "bullet_list":
|
||||||
items = section_data.get("items", [])
|
items = sectionData.get("items", [])
|
||||||
if items:
|
if items:
|
||||||
descriptions.append(f"List with {len(items)} items")
|
descriptions.append(f"List with {len(items)} items")
|
||||||
|
|
||||||
elif section_type == "heading":
|
elif sectionType == "heading":
|
||||||
text = section_data.get("text", "")
|
text = sectionData.get("text", "")
|
||||||
level = section_data.get("level", 1)
|
level = sectionData.get("level", 1)
|
||||||
if text:
|
if text:
|
||||||
descriptions.append(f"Heading {level}: {text}")
|
descriptions.append(f"Heading {level}: {text}")
|
||||||
|
|
||||||
elif section_type == "paragraph":
|
elif sectionType == "paragraph":
|
||||||
text = section_data.get("text", "")
|
text = sectionData.get("text", "")
|
||||||
if text and len(text) > 10: # Only include substantial paragraphs
|
if text and len(text) > 10: # Only include substantial paragraphs
|
||||||
# Truncate long text
|
# Truncate long text
|
||||||
truncated = text[:100] + "..." if len(text) > 100 else text
|
truncated = text[:100] + "..." if len(text) > 100 else text
|
||||||
descriptions.append(f"Text content: {truncated}")
|
descriptions.append(f"Text content: {truncated}")
|
||||||
|
|
||||||
elif section_type == "code_block":
|
elif sectionType == "code_block":
|
||||||
code = section_data.get("code", "")
|
code = sectionData.get("code", "")
|
||||||
language = section_data.get("language", "")
|
language = sectionData.get("language", "")
|
||||||
if code:
|
if code:
|
||||||
descriptions.append(f"Code block ({language}): {code[:50]}...")
|
descriptions.append(f"Code block ({language}): {code[:50]}...")
|
||||||
|
|
||||||
|
|
@ -244,42 +246,42 @@ Return only the compressed prompt, no explanations.
|
||||||
self.logger.warning(f"Error analyzing content: {str(e)}")
|
self.logger.warning(f"Error analyzing content: {str(e)}")
|
||||||
return "Document content"
|
return "Document content"
|
||||||
|
|
||||||
def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str:
|
def _getStyleGuidanceFromContent(self, extractedContent: Dict[str, Any], userPrompt: str = None) -> str:
|
||||||
"""Determine visual style guidance based on content and user prompt."""
|
"""Determine visual style guidance based on content and user prompt."""
|
||||||
try:
|
try:
|
||||||
style_elements = []
|
styleElements = []
|
||||||
|
|
||||||
# Analyze user prompt for style hints
|
# Analyze user prompt for style hints
|
||||||
if user_prompt:
|
if userPrompt:
|
||||||
prompt_lower = user_prompt.lower()
|
promptLower = userPrompt.lower()
|
||||||
|
|
||||||
if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]):
|
if any(word in promptLower for word in ["modern", "contemporary", "sleek"]):
|
||||||
style_elements.append("modern, clean design")
|
styleElements.append("modern, clean design")
|
||||||
elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]):
|
elif any(word in promptLower for word in ["classic", "traditional", "formal"]):
|
||||||
style_elements.append("classic, formal design")
|
styleElements.append("classic, formal design")
|
||||||
elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]):
|
elif any(word in promptLower for word in ["creative", "artistic", "colorful"]):
|
||||||
style_elements.append("creative, artistic design")
|
styleElements.append("creative, artistic design")
|
||||||
elif any(word in prompt_lower for word in ["corporate", "business", "professional"]):
|
elif any(word in promptLower for word in ["corporate", "business", "professional"]):
|
||||||
style_elements.append("corporate, professional design")
|
styleElements.append("corporate, professional design")
|
||||||
|
|
||||||
# Analyze content type for additional style hints
|
# Analyze content type for additional style hints
|
||||||
sections = extracted_content.get("sections", [])
|
sections = extractedContent.get("sections", [])
|
||||||
has_tables = any(self._get_section_type(s) == "table" for s in sections)
|
hasTables = any(self._getSectionType(s) == "table" for s in sections)
|
||||||
has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections)
|
hasLists = any(self._getSectionType(s) == "bullet_list" for s in sections)
|
||||||
has_code = any(self._get_section_type(s) == "code_block" for s in sections)
|
hasCode = any(self._getSectionType(s) == "code_block" for s in sections)
|
||||||
|
|
||||||
if has_tables:
|
if hasTables:
|
||||||
style_elements.append("data-focused layout")
|
styleElements.append("data-focused layout")
|
||||||
if has_lists:
|
if hasLists:
|
||||||
style_elements.append("organized, structured presentation")
|
styleElements.append("organized, structured presentation")
|
||||||
if has_code:
|
if hasCode:
|
||||||
style_elements.append("technical, developer-friendly")
|
styleElements.append("technical, developer-friendly")
|
||||||
|
|
||||||
# Default style if no specific guidance
|
# Default style if no specific guidance
|
||||||
if not style_elements:
|
if not styleElements:
|
||||||
style_elements.append("professional, clean design")
|
styleElements.append("professional, clean design")
|
||||||
|
|
||||||
return ", ".join(style_elements)
|
return ", ".join(styleElements)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error determining style guidance: {str(e)}")
|
self.logger.warning(f"Error determining style guidance: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -10,40 +10,40 @@ class RendererJson(BaseRenderer):
|
||||||
"""Renders content to JSON format with format-specific extraction."""
|
"""Renders content to JSON format with format-specific extraction."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""Return supported JSON formats."""
|
"""Return supported JSON formats."""
|
||||||
return ['json']
|
return ['json']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""Return format aliases."""
|
"""Return format aliases."""
|
||||||
return ['data']
|
return ['data']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""Return priority for JSON renderer."""
|
"""Return priority for JSON renderer."""
|
||||||
return 80
|
return 80
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""Render extracted JSON content to JSON format."""
|
"""Render extracted JSON content to JSON format."""
|
||||||
try:
|
try:
|
||||||
# The extracted content should already be JSON from the AI
|
# The extracted content should already be JSON from the AI
|
||||||
# Just validate and format it
|
# Just validate and format it
|
||||||
json_content = self._clean_json_content(extracted_content, title)
|
jsonContent = self._cleanJsonContent(extractedContent, title)
|
||||||
|
|
||||||
return json_content, "application/json"
|
return jsonContent, "application/json"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering JSON: {str(e)}")
|
self.logger.error(f"Error rendering JSON: {str(e)}")
|
||||||
# Return minimal JSON fallback
|
# Return minimal JSON fallback
|
||||||
fallback_data = {
|
fallbackData = {
|
||||||
"title": title,
|
"title": title,
|
||||||
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
|
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
|
||||||
"metadata": {"error": str(e)}
|
"metadata": {"error": str(e)}
|
||||||
}
|
}
|
||||||
return json.dumps(fallback_data, indent=2), "application/json"
|
return json.dumps(fallbackData, indent=2), "application/json"
|
||||||
|
|
||||||
def _clean_json_content(self, content: Dict[str, Any], title: str) -> str:
|
def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
|
||||||
"""Clean and validate JSON content from AI."""
|
"""Clean and validate JSON content from AI."""
|
||||||
try:
|
try:
|
||||||
# Validate JSON structure
|
# Validate JSON structure
|
||||||
|
|
@ -72,8 +72,8 @@ class RendererJson(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error cleaning JSON content: {str(e)}")
|
self.logger.warning(f"Error cleaning JSON content: {str(e)}")
|
||||||
# Return minimal valid JSON
|
# Return minimal valid JSON
|
||||||
fallback_data = {
|
fallbackData = {
|
||||||
"sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
|
"sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
|
||||||
"metadata": {"title": title, "error": str(e)}
|
"metadata": {"title": title, "error": str(e)}
|
||||||
}
|
}
|
||||||
return json.dumps(fallback_data, indent=2, ensure_ascii=False)
|
return json.dumps(fallbackData, indent=2, ensure_ascii=False)
|
||||||
|
|
|
||||||
|
|
@ -9,161 +9,161 @@ class RendererMarkdown(BaseRenderer):
|
||||||
"""Renders content to Markdown format with format-specific extraction."""
|
"""Renders content to Markdown format with format-specific extraction."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""Return supported Markdown formats."""
|
"""Return supported Markdown formats."""
|
||||||
return ['md', 'markdown']
|
return ['md', 'markdown']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""Return format aliases."""
|
"""Return format aliases."""
|
||||||
return ['mdown', 'mkd']
|
return ['mdown', 'mkd']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""Return priority for markdown renderer."""
|
"""Return priority for markdown renderer."""
|
||||||
return 95
|
return 95
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""Render extracted JSON content to Markdown format."""
|
"""Render extracted JSON content to Markdown format."""
|
||||||
try:
|
try:
|
||||||
# Generate markdown from JSON structure
|
# Generate markdown from JSON structure
|
||||||
markdown_content = self._generate_markdown_from_json(extracted_content, title)
|
markdownContent = self._generateMarkdownFromJson(extractedContent, title)
|
||||||
|
|
||||||
return markdown_content, "text/markdown"
|
return markdownContent, "text/markdown"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering markdown: {str(e)}")
|
self.logger.error(f"Error rendering markdown: {str(e)}")
|
||||||
# Return minimal markdown fallback
|
# Return minimal markdown fallback
|
||||||
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
||||||
|
|
||||||
def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str:
|
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||||
"""Generate markdown content from structured JSON document."""
|
"""Generate markdown content from structured JSON document."""
|
||||||
try:
|
try:
|
||||||
# Validate JSON structure
|
# Validate JSON structure
|
||||||
if not isinstance(json_content, dict):
|
if not isinstance(jsonContent, dict):
|
||||||
raise ValueError("JSON content must be a dictionary")
|
raise ValueError("JSON content must be a dictionary")
|
||||||
|
|
||||||
if "sections" not in json_content:
|
if "sections" not in jsonContent:
|
||||||
raise ValueError("JSON content must contain 'sections' field")
|
raise ValueError("JSON content must contain 'sections' field")
|
||||||
|
|
||||||
# Use title from JSON metadata if available, otherwise use provided title
|
# Use title from JSON metadata if available, otherwise use provided title
|
||||||
document_title = json_content.get("metadata", {}).get("title", title)
|
documentTitle = jsonContent.get("metadata", {}).get("title", title)
|
||||||
|
|
||||||
# Build markdown content
|
# Build markdown content
|
||||||
markdown_parts = []
|
markdownParts = []
|
||||||
|
|
||||||
# Document title
|
# Document title
|
||||||
markdown_parts.append(f"# {document_title}")
|
markdownParts.append(f"# {documentTitle}")
|
||||||
markdown_parts.append("")
|
markdownParts.append("")
|
||||||
|
|
||||||
# Process each section
|
# Process each section
|
||||||
sections = json_content.get("sections", [])
|
sections = jsonContent.get("sections", [])
|
||||||
for section in sections:
|
for section in sections:
|
||||||
section_markdown = self._render_json_section(section)
|
sectionMarkdown = self._renderJsonSection(section)
|
||||||
if section_markdown:
|
if sectionMarkdown:
|
||||||
markdown_parts.append(section_markdown)
|
markdownParts.append(sectionMarkdown)
|
||||||
markdown_parts.append("") # Add spacing between sections
|
markdownParts.append("") # Add spacing between sections
|
||||||
|
|
||||||
# Add generation info
|
# Add generation info
|
||||||
markdown_parts.append("---")
|
markdownParts.append("---")
|
||||||
markdown_parts.append(f"*Generated: {self._format_timestamp()}*")
|
markdownParts.append(f"*Generated: {self._formatTimestamp()}*")
|
||||||
|
|
||||||
return '\n'.join(markdown_parts)
|
return '\n'.join(markdownParts)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error generating markdown from JSON: {str(e)}")
|
self.logger.error(f"Error generating markdown from JSON: {str(e)}")
|
||||||
raise Exception(f"Markdown generation failed: {str(e)}")
|
raise Exception(f"Markdown generation failed: {str(e)}")
|
||||||
|
|
||||||
def _render_json_section(self, section: Dict[str, Any]) -> str:
|
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
|
||||||
"""Render a single JSON section to markdown."""
|
"""Render a single JSON section to markdown."""
|
||||||
try:
|
try:
|
||||||
section_type = self._get_section_type(section)
|
sectionType = self._getSectionType(section)
|
||||||
section_data = self._get_section_data(section)
|
sectionData = self._getSectionData(section)
|
||||||
|
|
||||||
if section_type == "table":
|
if sectionType == "table":
|
||||||
# Process the section data to extract table structure
|
# Process the section data to extract table structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_table(processed_data)
|
return self._renderJsonTable(processedData)
|
||||||
elif section_type == "bullet_list":
|
elif sectionType == "bullet_list":
|
||||||
# Process the section data to extract bullet list structure
|
# Process the section data to extract bullet list structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_bullet_list(processed_data)
|
return self._renderJsonBulletList(processedData)
|
||||||
elif section_type == "heading":
|
elif sectionType == "heading":
|
||||||
return self._render_json_heading(section_data)
|
return self._renderJsonHeading(sectionData)
|
||||||
elif section_type == "paragraph":
|
elif sectionType == "paragraph":
|
||||||
return self._render_json_paragraph(section_data)
|
return self._renderJsonParagraph(sectionData)
|
||||||
elif section_type == "code_block":
|
elif sectionType == "code_block":
|
||||||
# Process the section data to extract code block structure
|
# Process the section data to extract code block structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_code_block(processed_data)
|
return self._renderJsonCodeBlock(processedData)
|
||||||
elif section_type == "image":
|
elif sectionType == "image":
|
||||||
# Process the section data to extract image structure
|
# Process the section data to extract image structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_image(processed_data)
|
return self._renderJsonImage(processedData)
|
||||||
else:
|
else:
|
||||||
# Fallback to paragraph for unknown types
|
# Fallback to paragraph for unknown types
|
||||||
return self._render_json_paragraph(section_data)
|
return self._renderJsonParagraph(sectionData)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
||||||
return f"*[Error rendering section: {str(e)}]*"
|
return f"*[Error rendering section: {str(e)}]*"
|
||||||
|
|
||||||
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
|
def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON table to markdown."""
|
"""Render a JSON table to markdown."""
|
||||||
try:
|
try:
|
||||||
headers = table_data.get("headers", [])
|
headers = tableData.get("headers", [])
|
||||||
rows = table_data.get("rows", [])
|
rows = tableData.get("rows", [])
|
||||||
|
|
||||||
if not headers or not rows:
|
if not headers or not rows:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
markdown_parts = []
|
markdownParts = []
|
||||||
|
|
||||||
# Create table header
|
# Create table header
|
||||||
header_line = " | ".join(str(header) for header in headers)
|
headerLine = " | ".join(str(header) for header in headers)
|
||||||
markdown_parts.append(header_line)
|
markdownParts.append(headerLine)
|
||||||
|
|
||||||
# Add separator line
|
# Add separator line
|
||||||
separator_line = " | ".join("---" for _ in headers)
|
separatorLine = " | ".join("---" for _ in headers)
|
||||||
markdown_parts.append(separator_line)
|
markdownParts.append(separatorLine)
|
||||||
|
|
||||||
# Add data rows
|
# Add data rows
|
||||||
for row in rows:
|
for row in rows:
|
||||||
row_line = " | ".join(str(cell_data) for cell_data in row)
|
rowLine = " | ".join(str(cellData) for cellData in row)
|
||||||
markdown_parts.append(row_line)
|
markdownParts.append(rowLine)
|
||||||
|
|
||||||
return '\n'.join(markdown_parts)
|
return '\n'.join(markdownParts)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
|
def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON bullet list to markdown."""
|
"""Render a JSON bullet list to markdown."""
|
||||||
try:
|
try:
|
||||||
items = list_data.get("items", [])
|
items = listData.get("items", [])
|
||||||
|
|
||||||
if not items:
|
if not items:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
markdown_parts = []
|
markdownParts = []
|
||||||
for item in items:
|
for item in items:
|
||||||
if isinstance(item, str):
|
if isinstance(item, str):
|
||||||
markdown_parts.append(f"- {item}")
|
markdownParts.append(f"- {item}")
|
||||||
elif isinstance(item, dict) and "text" in item:
|
elif isinstance(item, dict) and "text" in item:
|
||||||
markdown_parts.append(f"- {item['text']}")
|
markdownParts.append(f"- {item['text']}")
|
||||||
|
|
||||||
return '\n'.join(markdown_parts)
|
return '\n'.join(markdownParts)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
|
def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON heading to markdown."""
|
"""Render a JSON heading to markdown."""
|
||||||
try:
|
try:
|
||||||
level = heading_data.get("level", 1)
|
level = headingData.get("level", 1)
|
||||||
text = heading_data.get("text", "")
|
text = headingData.get("text", "")
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
level = max(1, min(6, level))
|
level = max(1, min(6, level))
|
||||||
|
|
@ -175,21 +175,21 @@ class RendererMarkdown(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
|
def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON paragraph to markdown."""
|
"""Render a JSON paragraph to markdown."""
|
||||||
try:
|
try:
|
||||||
text = paragraph_data.get("text", "")
|
text = paragraphData.get("text", "")
|
||||||
return text if text else ""
|
return text if text else ""
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
|
def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON code block to markdown."""
|
"""Render a JSON code block to markdown."""
|
||||||
try:
|
try:
|
||||||
code = code_data.get("code", "")
|
code = codeData.get("code", "")
|
||||||
language = code_data.get("language", "")
|
language = codeData.get("language", "")
|
||||||
|
|
||||||
if code:
|
if code:
|
||||||
if language:
|
if language:
|
||||||
|
|
@ -203,19 +203,19 @@ class RendererMarkdown(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
|
def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON image to markdown."""
|
"""Render a JSON image to markdown."""
|
||||||
try:
|
try:
|
||||||
alt_text = image_data.get("altText", "Image")
|
altText = imageData.get("altText", "Image")
|
||||||
base64_data = image_data.get("base64Data", "")
|
base64Data = imageData.get("base64Data", "")
|
||||||
|
|
||||||
if base64_data:
|
if base64Data:
|
||||||
# For base64 images, we can't embed them directly in markdown
|
# For base64 images, we can't embed them directly in markdown
|
||||||
# So we'll use a placeholder with the alt text
|
# So we'll use a placeholder with the alt text
|
||||||
return f""
|
return f""
|
||||||
else:
|
else:
|
||||||
return f""
|
return f""
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||||
return f""
|
return f""
|
||||||
|
|
|
||||||
|
|
@ -22,32 +22,32 @@ class RendererPdf(BaseRenderer):
|
||||||
"""Renders content to PDF format using reportlab."""
|
"""Renders content to PDF format using reportlab."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""Return supported PDF formats."""
|
"""Return supported PDF formats."""
|
||||||
return ['pdf']
|
return ['pdf']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""Return format aliases."""
|
"""Return format aliases."""
|
||||||
return ['document', 'print']
|
return ['document', 'print']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""Return priority for PDF renderer."""
|
"""Return priority for PDF renderer."""
|
||||||
return 120
|
return 120
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
||||||
try:
|
try:
|
||||||
if not REPORTLAB_AVAILABLE:
|
if not REPORTLAB_AVAILABLE:
|
||||||
# Fallback to HTML if reportlab not available
|
# Fallback to HTML if reportlab not available
|
||||||
from .rendererHtml import RendererHtml
|
from .rendererHtml import RendererHtml
|
||||||
html_renderer = RendererHtml()
|
html_renderer = RendererHtml()
|
||||||
html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service)
|
html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService)
|
||||||
return html_content, "text/html"
|
return html_content, "text/html"
|
||||||
|
|
||||||
# Generate PDF using AI-analyzed styling
|
# Generate PDF using AI-analyzed styling
|
||||||
pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service)
|
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
return pdf_content, "application/pdf"
|
return pdf_content, "application/pdf"
|
||||||
|
|
||||||
|
|
@ -56,11 +56,11 @@ class RendererPdf(BaseRenderer):
|
||||||
# Return minimal fallback
|
# Return minimal fallback
|
||||||
return f"PDF Generation Error: {str(e)}", "text/plain"
|
return f"PDF Generation Error: {str(e)}", "text/plain"
|
||||||
|
|
||||||
async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
||||||
try:
|
try:
|
||||||
# Get AI-generated styling definitions
|
# Get AI-generated styling definitions
|
||||||
styles = await self._get_pdf_styles(user_prompt, ai_service)
|
styles = await self._getPdfStyles(userPrompt, aiService)
|
||||||
|
|
||||||
# Validate JSON structure
|
# Validate JSON structure
|
||||||
if not isinstance(json_content, dict):
|
if not isinstance(json_content, dict):
|
||||||
|
|
@ -93,10 +93,10 @@ class RendererPdf(BaseRenderer):
|
||||||
story = []
|
story = []
|
||||||
|
|
||||||
# Title page
|
# Title page
|
||||||
title_style = self._create_title_style(styles)
|
title_style = self._createTitleStyle(styles)
|
||||||
story.append(Paragraph(document_title, title_style))
|
story.append(Paragraph(document_title, title_style))
|
||||||
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
|
story.append(Spacer(1, 50)) # Increased spacing to prevent overlap
|
||||||
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles)))
|
story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._createNormalStyle(styles)))
|
||||||
story.append(Spacer(1, 30)) # Add spacing before page break
|
story.append(Spacer(1, 30)) # Add spacing before page break
|
||||||
story.append(PageBreak())
|
story.append(PageBreak())
|
||||||
|
|
||||||
|
|
@ -105,7 +105,7 @@ class RendererPdf(BaseRenderer):
|
||||||
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
|
self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
|
||||||
for i, section in enumerate(sections):
|
for i, section in enumerate(sections):
|
||||||
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
|
self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
|
||||||
section_elements = self._render_json_section(section, styles)
|
section_elements = self._renderJsonSection(section, styles)
|
||||||
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
|
self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
|
||||||
story.extend(section_elements)
|
story.extend(section_elements)
|
||||||
|
|
||||||
|
|
@ -123,7 +123,7 @@ class RendererPdf(BaseRenderer):
|
||||||
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
|
self.logger.error(f"Error generating PDF from JSON: {str(e)}")
|
||||||
raise Exception(f"PDF generation failed: {str(e)}")
|
raise Exception(f"PDF generation failed: {str(e)}")
|
||||||
|
|
||||||
async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
async def _getPdfStyles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
||||||
"""Get PDF styling definitions using base template AI styling."""
|
"""Get PDF styling definitions using base template AI styling."""
|
||||||
style_schema = {
|
style_schema = {
|
||||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
||||||
|
|
@ -136,21 +136,21 @@ class RendererPdf(BaseRenderer):
|
||||||
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
||||||
}
|
}
|
||||||
|
|
||||||
style_template = self._create_ai_style_template("pdf", user_prompt, style_schema)
|
style_template = self._createAiStyleTemplate("pdf", user_prompt, style_schema)
|
||||||
|
|
||||||
# Use base template method like DOCX does (this works!)
|
# Use base template method like DOCX does (this works!)
|
||||||
styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles())
|
styles = await self._getAiStyles(ai_service, style_template, self._getDefaultPdfStyles())
|
||||||
|
|
||||||
if styles is None:
|
if styles is None:
|
||||||
return self._get_default_pdf_styles()
|
return self._getDefaultPdfStyles()
|
||||||
|
|
||||||
# Convert colors to PDF format after getting styles
|
# Convert colors to PDF format after getting styles
|
||||||
styles = self._convert_colors_format(styles)
|
styles = self._convertColorsFormat(styles)
|
||||||
|
|
||||||
# Validate and fix contrast issues
|
# Validate and fix contrast issues
|
||||||
return self._validate_pdf_styles_contrast(styles)
|
return self._validatePdfStylesContrast(styles)
|
||||||
|
|
||||||
async def _get_ai_styles_with_pdf_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
async def _getAiStylesWithPdfColors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Get AI styles with proper PDF color conversion."""
|
"""Get AI styles with proper PDF color conversion."""
|
||||||
if not ai_service:
|
if not ai_service:
|
||||||
return default_styles
|
return default_styles
|
||||||
|
|
@ -279,7 +279,7 @@ class RendererPdf(BaseRenderer):
|
||||||
return default_styles
|
return default_styles
|
||||||
|
|
||||||
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
|
# Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
|
||||||
styles = self._convert_colors_format(styles)
|
styles = self._convertColorsFormat(styles)
|
||||||
|
|
||||||
return styles
|
return styles
|
||||||
|
|
||||||
|
|
@ -287,7 +287,7 @@ class RendererPdf(BaseRenderer):
|
||||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||||
return default_styles
|
return default_styles
|
||||||
|
|
||||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Convert colors to proper format for PDF compatibility."""
|
"""Convert colors to proper format for PDF compatibility."""
|
||||||
try:
|
try:
|
||||||
for style_name, style_config in styles.items():
|
for style_name, style_config in styles.items():
|
||||||
|
|
@ -304,7 +304,7 @@ class RendererPdf(BaseRenderer):
|
||||||
self.logger.warning(f"Color conversion failed: {str(e)}")
|
self.logger.warning(f"Color conversion failed: {str(e)}")
|
||||||
return styles
|
return styles
|
||||||
|
|
||||||
def _get_safe_color(self, color_value: str, default: str = "#000000") -> str:
|
def _getSafeColor(self, color_value: str, default: str = "#000000") -> str:
|
||||||
"""Get a safe hex color value for PDF."""
|
"""Get a safe hex color value for PDF."""
|
||||||
if isinstance(color_value, str) and color_value.startswith('#'):
|
if isinstance(color_value, str) and color_value.startswith('#'):
|
||||||
if len(color_value) == 7:
|
if len(color_value) == 7:
|
||||||
|
|
@ -313,7 +313,7 @@ class RendererPdf(BaseRenderer):
|
||||||
return color_value
|
return color_value
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _validatePdfStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Validate and fix contrast issues in AI-generated styles."""
|
"""Validate and fix contrast issues in AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
# Fix table header contrast
|
# Fix table header contrast
|
||||||
|
|
@ -348,9 +348,9 @@ class RendererPdf(BaseRenderer):
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||||
return self._get_default_pdf_styles()
|
return self._getDefaultPdfStyles()
|
||||||
|
|
||||||
def _get_default_pdf_styles(self) -> Dict[str, Any]:
|
def _getDefaultPdfStyles(self) -> Dict[str, Any]:
|
||||||
"""Default PDF styles."""
|
"""Default PDF styles."""
|
||||||
return {
|
return {
|
||||||
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
"title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
|
||||||
|
|
@ -363,27 +363,27 @@ class RendererPdf(BaseRenderer):
|
||||||
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
"code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
def _createTitleStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||||
"""Create title style from style definitions."""
|
"""Create title style from style definitions."""
|
||||||
title_style_def = styles.get("title", {})
|
title_style_def = styles.get("title", {})
|
||||||
|
|
||||||
# DEBUG: Show what color and spacing is being used for title
|
# DEBUG: Show what color and spacing is being used for title
|
||||||
title_color = title_style_def.get("color", "#1F4E79")
|
title_color = title_style_def.get("color", "#1F4E79")
|
||||||
title_space_after = title_style_def.get("space_after", 30)
|
title_space_after = title_style_def.get("space_after", 30)
|
||||||
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hex_to_color(title_color)}", "PDF_RENDERER")
|
self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hexToColor(title_color)}", "PDF_RENDERER")
|
||||||
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
|
self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
|
||||||
|
|
||||||
return ParagraphStyle(
|
return ParagraphStyle(
|
||||||
'CustomTitle',
|
'CustomTitle',
|
||||||
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
|
fontSize=title_style_def.get("font_size", 20), # Reduced from 24 to 20
|
||||||
spaceAfter=title_style_def.get("space_after", 30),
|
spaceAfter=title_style_def.get("space_after", 30),
|
||||||
alignment=self._get_alignment(title_style_def.get("align", "center")),
|
alignment=self._getAlignment(title_style_def.get("align", "center")),
|
||||||
textColor=self._hex_to_color(title_color),
|
textColor=self._hexToColor(title_color),
|
||||||
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
|
leading=title_style_def.get("font_size", 20) * 1.4, # Add line spacing for multi-line titles
|
||||||
spaceBefore=0 # Ensure no space before title
|
spaceBefore=0 # Ensure no space before title
|
||||||
)
|
)
|
||||||
|
|
||||||
def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
|
def _createHeadingStyle(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
|
||||||
"""Create heading style from style definitions."""
|
"""Create heading style from style definitions."""
|
||||||
heading_key = f"heading{level}"
|
heading_key = f"heading{level}"
|
||||||
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
|
heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
|
||||||
|
|
@ -393,11 +393,11 @@ class RendererPdf(BaseRenderer):
|
||||||
fontSize=heading_style_def.get("font_size", 18 - level * 2),
|
fontSize=heading_style_def.get("font_size", 18 - level * 2),
|
||||||
spaceAfter=heading_style_def.get("space_after", 12),
|
spaceAfter=heading_style_def.get("space_after", 12),
|
||||||
spaceBefore=heading_style_def.get("space_before", 12),
|
spaceBefore=heading_style_def.get("space_before", 12),
|
||||||
alignment=self._get_alignment(heading_style_def.get("align", "left")),
|
alignment=self._getAlignment(heading_style_def.get("align", "left")),
|
||||||
textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F"))
|
textColor=self._hexToColor(heading_style_def.get("color", "#2F2F2F"))
|
||||||
)
|
)
|
||||||
|
|
||||||
def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
def _createNormalStyle(self, styles: Dict[str, Any]) -> ParagraphStyle:
|
||||||
"""Create normal paragraph style from style definitions."""
|
"""Create normal paragraph style from style definitions."""
|
||||||
paragraph_style_def = styles.get("paragraph", {})
|
paragraph_style_def = styles.get("paragraph", {})
|
||||||
|
|
||||||
|
|
@ -405,12 +405,12 @@ class RendererPdf(BaseRenderer):
|
||||||
'CustomNormal',
|
'CustomNormal',
|
||||||
fontSize=paragraph_style_def.get("font_size", 11),
|
fontSize=paragraph_style_def.get("font_size", 11),
|
||||||
spaceAfter=paragraph_style_def.get("space_after", 6),
|
spaceAfter=paragraph_style_def.get("space_after", 6),
|
||||||
alignment=self._get_alignment(paragraph_style_def.get("align", "left")),
|
alignment=self._getAlignment(paragraph_style_def.get("align", "left")),
|
||||||
textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")),
|
textColor=self._hexToColor(paragraph_style_def.get("color", "#2F2F2F")),
|
||||||
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
|
leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_alignment(self, align: str) -> int:
|
def _getAlignment(self, align: str) -> int:
|
||||||
"""Convert alignment string to reportlab alignment constant."""
|
"""Convert alignment string to reportlab alignment constant."""
|
||||||
if not align or not isinstance(align, str):
|
if not align or not isinstance(align, str):
|
||||||
return TA_LEFT
|
return TA_LEFT
|
||||||
|
|
@ -426,7 +426,7 @@ class RendererPdf(BaseRenderer):
|
||||||
}
|
}
|
||||||
return align_map.get(align.lower().strip(), TA_LEFT)
|
return align_map.get(align.lower().strip(), TA_LEFT)
|
||||||
|
|
||||||
def _get_table_alignment(self, align: str) -> str:
|
def _getTableAlignment(self, align: str) -> str:
|
||||||
"""Convert alignment string to ReportLab table alignment string."""
|
"""Convert alignment string to ReportLab table alignment string."""
|
||||||
if not align or not isinstance(align, str):
|
if not align or not isinstance(align, str):
|
||||||
return 'LEFT'
|
return 'LEFT'
|
||||||
|
|
@ -442,7 +442,7 @@ class RendererPdf(BaseRenderer):
|
||||||
}
|
}
|
||||||
return align_map.get(align.lower().strip(), 'LEFT')
|
return align_map.get(align.lower().strip(), 'LEFT')
|
||||||
|
|
||||||
def _hex_to_color(self, hex_color: str) -> colors.Color:
|
def _hexToColor(self, hex_color: str) -> colors.Color:
|
||||||
"""Convert hex color to reportlab color."""
|
"""Convert hex color to reportlab color."""
|
||||||
try:
|
try:
|
||||||
hex_color = hex_color.lstrip('#')
|
hex_color = hex_color.lstrip('#')
|
||||||
|
|
@ -464,38 +464,38 @@ class RendererPdf(BaseRenderer):
|
||||||
except:
|
except:
|
||||||
return colors.black
|
return colors.black
|
||||||
|
|
||||||
def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
"""Render a single JSON section to PDF elements using AI-generated styles."""
|
"""Render a single JSON section to PDF elements using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
section_type = self._get_section_type(section)
|
section_type = self._getSectionType(section)
|
||||||
elements = self._get_section_data(section)
|
elements = self._getSectionData(section)
|
||||||
|
|
||||||
# Process each element in the section
|
# Process each element in the section
|
||||||
all_elements = []
|
all_elements = []
|
||||||
for element in elements:
|
for element in elements:
|
||||||
if section_type == "table":
|
if section_type == "table":
|
||||||
all_elements.extend(self._render_json_table(element, styles))
|
all_elements.extend(self._renderJsonTable(element, styles))
|
||||||
elif section_type == "bullet_list":
|
elif section_type == "bullet_list":
|
||||||
all_elements.extend(self._render_json_bullet_list(element, styles))
|
all_elements.extend(self._renderJsonBulletList(element, styles))
|
||||||
elif section_type == "heading":
|
elif section_type == "heading":
|
||||||
all_elements.extend(self._render_json_heading(element, styles))
|
all_elements.extend(self._renderJsonHeading(element, styles))
|
||||||
elif section_type == "paragraph":
|
elif section_type == "paragraph":
|
||||||
all_elements.extend(self._render_json_paragraph(element, styles))
|
all_elements.extend(self._renderJsonParagraph(element, styles))
|
||||||
elif section_type == "code_block":
|
elif section_type == "code_block":
|
||||||
all_elements.extend(self._render_json_code_block(element, styles))
|
all_elements.extend(self._renderJsonCodeBlock(element, styles))
|
||||||
elif section_type == "image":
|
elif section_type == "image":
|
||||||
all_elements.extend(self._render_json_image(element, styles))
|
all_elements.extend(self._renderJsonImage(element, styles))
|
||||||
else:
|
else:
|
||||||
# Fallback to paragraph for unknown types
|
# Fallback to paragraph for unknown types
|
||||||
all_elements.extend(self._render_json_paragraph(element, styles))
|
all_elements.extend(self._renderJsonParagraph(element, styles))
|
||||||
|
|
||||||
return all_elements
|
return all_elements
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
||||||
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
|
return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
|
||||||
|
|
||||||
def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
def _renderJsonTable(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
"""Render a JSON table to PDF elements using AI-generated styles."""
|
"""Render a JSON table to PDF elements using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
headers = table_data.get("headers", [])
|
headers = table_data.get("headers", [])
|
||||||
|
|
@ -517,7 +517,7 @@ class RendererPdf(BaseRenderer):
|
||||||
table_style = [
|
table_style = [
|
||||||
('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
|
('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
|
||||||
('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
|
('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
|
||||||
('ALIGN', (0, 0), (-1, -1), self._get_table_alignment(table_cell_style.get("align", "left"))),
|
('ALIGN', (0, 0), (-1, -1), self._getTableAlignment(table_cell_style.get("align", "left"))),
|
||||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
|
||||||
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
|
('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
|
||||||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||||
|
|
@ -534,7 +534,7 @@ class RendererPdf(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
def _renderJsonBulletList(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
|
"""Render a JSON bullet list to PDF elements using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
items = list_data.get("items", [])
|
items = list_data.get("items", [])
|
||||||
|
|
@ -556,7 +556,7 @@ class RendererPdf(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
def _renderJsonHeading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
"""Render a JSON heading to PDF elements using AI-generated styles."""
|
"""Render a JSON heading to PDF elements using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
level = heading_data.get("level", 1)
|
level = heading_data.get("level", 1)
|
||||||
|
|
@ -564,7 +564,7 @@ class RendererPdf(BaseRenderer):
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
level = max(1, min(6, level))
|
level = max(1, min(6, level))
|
||||||
heading_style = self._create_heading_style(styles, level)
|
heading_style = self._createHeadingStyle(styles, level)
|
||||||
return [Paragraph(text, heading_style)]
|
return [Paragraph(text, heading_style)]
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
@ -573,13 +573,13 @@ class RendererPdf(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
def _renderJsonParagraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
|
"""Render a JSON paragraph to PDF elements using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
text = paragraph_data.get("text", "")
|
text = paragraph_data.get("text", "")
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
return [Paragraph(text, self._create_normal_style(styles))]
|
return [Paragraph(text, self._createNormalStyle(styles))]
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
@ -587,7 +587,7 @@ class RendererPdf(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
def _renderJsonCodeBlock(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
"""Render a JSON code block to PDF elements using AI-generated styles."""
|
"""Render a JSON code block to PDF elements using AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
code = code_data.get("code", "")
|
code = code_data.get("code", "")
|
||||||
|
|
@ -601,7 +601,7 @@ class RendererPdf(BaseRenderer):
|
||||||
lang_style = ParagraphStyle(
|
lang_style = ParagraphStyle(
|
||||||
'CodeLanguage',
|
'CodeLanguage',
|
||||||
fontSize=code_style_def.get("font_size", 9),
|
fontSize=code_style_def.get("font_size", 9),
|
||||||
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
|
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
||||||
fontName='Helvetica-Bold'
|
fontName='Helvetica-Bold'
|
||||||
)
|
)
|
||||||
elements.append(Paragraph(f"Code ({language}):", lang_style))
|
elements.append(Paragraph(f"Code ({language}):", lang_style))
|
||||||
|
|
@ -609,9 +609,9 @@ class RendererPdf(BaseRenderer):
|
||||||
code_style = ParagraphStyle(
|
code_style = ParagraphStyle(
|
||||||
'CodeBlock',
|
'CodeBlock',
|
||||||
fontSize=code_style_def.get("font_size", 9),
|
fontSize=code_style_def.get("font_size", 9),
|
||||||
textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
|
textColor=self._hexToColor(code_style_def.get("color", "#2F2F2F")),
|
||||||
fontName=code_style_def.get("font", "Courier"),
|
fontName=code_style_def.get("font", "Courier"),
|
||||||
backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")),
|
backColor=self._hexToColor(code_style_def.get("background", "#F5F5F5")),
|
||||||
spaceAfter=code_style_def.get("space_after", 6)
|
spaceAfter=code_style_def.get("space_after", 6)
|
||||||
)
|
)
|
||||||
elements.append(Paragraph(code, code_style))
|
elements.append(Paragraph(code, code_style))
|
||||||
|
|
@ -624,7 +624,7 @@ class RendererPdf(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
def _renderJsonImage(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
|
||||||
"""Render a JSON image to PDF elements."""
|
"""Render a JSON image to PDF elements."""
|
||||||
try:
|
try:
|
||||||
base64_data = image_data.get("base64Data", "")
|
base64_data = image_data.get("base64Data", "")
|
||||||
|
|
@ -632,10 +632,10 @@ class RendererPdf(BaseRenderer):
|
||||||
|
|
||||||
if base64_data:
|
if base64_data:
|
||||||
# For now, just add a placeholder since reportlab image handling is complex
|
# For now, just add a placeholder since reportlab image handling is complex
|
||||||
return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))]
|
return [Paragraph(f"[Image: {alt_text}]", self._createNormalStyle(styles))]
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||||
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))]
|
return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._createNormalStyle(styles))]
|
||||||
|
|
@ -12,23 +12,23 @@ class RendererPptx(BaseRenderer):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.supported_formats = ["pptx", "ppt"]
|
self.supportedFormats = ["pptx", "ppt"]
|
||||||
self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> list:
|
def getSupportedFormats(cls) -> list:
|
||||||
"""Get list of supported output formats."""
|
"""Get list of supported output formats."""
|
||||||
return ["pptx", "ppt"]
|
return ["pptx", "ppt"]
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Render content as PowerPoint presentation from JSON data.
|
Render content as PowerPoint presentation from JSON data.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
extracted_content: JSON content to render as presentation
|
extractedContent: JSON content to render as presentation
|
||||||
title: Title for the presentation
|
title: Title for the presentation
|
||||||
user_prompt: User prompt for AI styling
|
userPrompt: User prompt for AI styling
|
||||||
ai_service: AI service for styling
|
aiService: AI service for styling
|
||||||
**kwargs: Additional rendering options
|
**kwargs: Additional rendering options
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
@ -43,7 +43,7 @@ class RendererPptx(BaseRenderer):
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Get AI-generated styling definitions first
|
# Get AI-generated styling definitions first
|
||||||
styles = await self._get_pptx_styles(user_prompt, ai_service)
|
styles = await self._getPptxStyles(userPrompt, aiService)
|
||||||
|
|
||||||
# Create new presentation
|
# Create new presentation
|
||||||
prs = Presentation()
|
prs = Presentation()
|
||||||
|
|
@ -58,13 +58,13 @@ class RendererPptx(BaseRenderer):
|
||||||
prs.slide_height = Inches(7.5)
|
prs.slide_height = Inches(7.5)
|
||||||
|
|
||||||
# Generate slides from JSON content
|
# Generate slides from JSON content
|
||||||
slides_data = await self._parse_json_to_slides(extracted_content, title, styles)
|
slidesData = await self._parseJsonToSlides(extractedContent, title, styles)
|
||||||
logger.info(f"Parsed {len(slides_data)} slides from JSON content")
|
logger.info(f"Parsed {len(slidesData)} slides from JSON content")
|
||||||
|
|
||||||
# Debug: Show first 200 chars of content
|
# Debug: Show first 200 chars of content
|
||||||
logger.info(f"JSON content preview: {str(extracted_content)[:200]}...")
|
logger.info(f"JSON content preview: {str(extractedContent)[:200]}...")
|
||||||
|
|
||||||
for i, slide_data in enumerate(slides_data):
|
for i, slide_data in enumerate(slidesData):
|
||||||
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
|
logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
|
||||||
# Debug: Show slide content preview
|
# Debug: Show slide content preview
|
||||||
slide_content = slide_data.get('content', '')
|
slide_content = slide_data.get('content', '')
|
||||||
|
|
@ -74,8 +74,8 @@ class RendererPptx(BaseRenderer):
|
||||||
logger.warning(f" ⚠️ Slide {i+1} has NO content!")
|
logger.warning(f" ⚠️ Slide {i+1} has NO content!")
|
||||||
|
|
||||||
# Create slide with appropriate layout based on content
|
# Create slide with appropriate layout based on content
|
||||||
slide_layout_index = self._get_slide_layout_index(slide_data, styles)
|
slideLayoutIndex = self._getSlideLayoutIndex(slide_data, styles)
|
||||||
slide_layout = prs.slide_layouts[slide_layout_index]
|
slide_layout = prs.slide_layouts[slideLayoutIndex]
|
||||||
slide = prs.slides.add_slide(slide_layout)
|
slide = prs.slides.add_slide(slide_layout)
|
||||||
|
|
||||||
# Set title with AI-generated styling
|
# Set title with AI-generated styling
|
||||||
|
|
@ -153,7 +153,7 @@ class RendererPptx(BaseRenderer):
|
||||||
p.alignment = PP_ALIGN.LEFT
|
p.alignment = PP_ALIGN.LEFT
|
||||||
|
|
||||||
# If no slides were created, create a default slide
|
# If no slides were created, create a default slide
|
||||||
if not slides_data:
|
if not slidesData:
|
||||||
slide_layout = prs.slide_layouts[0] # Title slide layout
|
slide_layout = prs.slide_layouts[0] # Title slide layout
|
||||||
slide = prs.slides.add_slide(slide_layout)
|
slide = prs.slides.add_slide(slide_layout)
|
||||||
|
|
||||||
|
|
@ -198,7 +198,7 @@ class RendererPptx(BaseRenderer):
|
||||||
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
|
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
|
||||||
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
|
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
|
||||||
|
|
||||||
def _parse_content_to_slides(self, content: str, title: str) -> list:
|
def _parseContentToSlides(self, content: str, title: str) -> list:
|
||||||
"""
|
"""
|
||||||
Parse content into slide data structure.
|
Parse content into slide data structure.
|
||||||
|
|
||||||
|
|
@ -212,7 +212,7 @@ class RendererPptx(BaseRenderer):
|
||||||
slides = []
|
slides = []
|
||||||
|
|
||||||
# Split content by slide markers or headers
|
# Split content by slide markers or headers
|
||||||
slide_sections = self._split_content_into_slides(content)
|
slide_sections = self._splitContentIntoSlides(content)
|
||||||
|
|
||||||
for i, section in enumerate(slide_sections):
|
for i, section in enumerate(slide_sections):
|
||||||
if section.strip():
|
if section.strip():
|
||||||
|
|
@ -239,7 +239,7 @@ class RendererPptx(BaseRenderer):
|
||||||
|
|
||||||
return slides
|
return slides
|
||||||
|
|
||||||
def _split_content_into_slides(self, content: str) -> list:
|
def _splitContentIntoSlides(self, content: str) -> list:
|
||||||
"""
|
"""
|
||||||
Split content into individual slides based on headers and structure.
|
Split content into individual slides based on headers and structure.
|
||||||
|
|
||||||
|
|
@ -299,11 +299,11 @@ class RendererPptx(BaseRenderer):
|
||||||
return [content.strip()]
|
return [content.strip()]
|
||||||
|
|
||||||
|
|
||||||
def get_output_mime_type(self) -> str:
|
def getOutputMimeType(self) -> str:
|
||||||
"""Get MIME type for rendered output."""
|
"""Get MIME type for rendered output."""
|
||||||
return self.output_mime_type
|
return self.outputMimeType
|
||||||
|
|
||||||
async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
async def _getPptxStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
|
||||||
"""Get PowerPoint styling definitions using base template AI styling."""
|
"""Get PowerPoint styling definitions using base template AI styling."""
|
||||||
style_schema = {
|
style_schema = {
|
||||||
"title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"},
|
"title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"},
|
||||||
|
|
@ -323,21 +323,21 @@ class RendererPptx(BaseRenderer):
|
||||||
"executive_ready": True
|
"executive_ready": True
|
||||||
}
|
}
|
||||||
|
|
||||||
style_template = self._create_professional_pptx_template(user_prompt, style_schema)
|
style_template = self._createProfessionalPptxTemplate(userPrompt, style_schema)
|
||||||
# Use our own _get_ai_styles_with_pptx_colors method to ensure proper color conversion
|
# Use our own _getAiStylesWithPptxColors method to ensure proper color conversion
|
||||||
styles = await self._get_ai_styles_with_pptx_colors(ai_service, style_template, self._get_default_pptx_styles())
|
styles = await self._getAiStylesWithPptxColors(aiService, style_template, self._getDefaultPptxStyles())
|
||||||
|
|
||||||
# Validate PowerPoint-specific requirements
|
# Validate PowerPoint-specific requirements
|
||||||
return self._validate_pptx_styles_readability(styles)
|
return self._validatePptxStylesReadability(styles)
|
||||||
|
|
||||||
def _create_professional_pptx_template(self, user_prompt: str, style_schema: Dict[str, Any]) -> str:
|
def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str:
|
||||||
"""Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
|
"""Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
|
||||||
import json
|
import json
|
||||||
schema_json = json.dumps(style_schema, indent=4)
|
schema_json = json.dumps(style_schema, indent=4)
|
||||||
|
|
||||||
return f"""Customize the JSON below for professional PowerPoint slides.
|
return f"""Customize the JSON below for professional PowerPoint slides.
|
||||||
|
|
||||||
User Request: {user_prompt or "Create professional corporate slides"}
|
User Request: {userPrompt or "Create professional corporate slides"}
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- Use professional colors (blues, grays, deep greens)
|
- Use professional colors (blues, grays, deep greens)
|
||||||
|
|
@ -351,9 +351,9 @@ Return ONLY this JSON with your changes:
|
||||||
|
|
||||||
JSON ONLY. NO OTHER TEXT."""
|
JSON ONLY. NO OTHER TEXT."""
|
||||||
|
|
||||||
async def _get_ai_styles_with_pptx_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
async def _getAiStylesWithPptxColors(self, aiService, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Get AI styles with proper PowerPoint color conversion."""
|
"""Get AI styles with proper PowerPoint color conversion."""
|
||||||
if not ai_service:
|
if not aiService:
|
||||||
return default_styles
|
return default_styles
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -365,11 +365,11 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
||||||
|
|
||||||
# Check if AI service is properly configured
|
# Check if AI service is properly configured
|
||||||
if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
|
if not hasattr(aiService, 'aiObjects') or not aiService.aiObjects:
|
||||||
self.logger.warning("AI service not properly configured, using defaults")
|
self.logger.warning("AI service not properly configured, using defaults")
|
||||||
return default_styles
|
return default_styles
|
||||||
|
|
||||||
response = await ai_service.aiObjects.call(request)
|
response = await aiService.aiObjects.call(request)
|
||||||
|
|
||||||
# Check if response is valid
|
# Check if response is valid
|
||||||
if not response:
|
if not response:
|
||||||
|
|
@ -445,7 +445,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
return default_styles
|
return default_styles
|
||||||
|
|
||||||
# Convert colors to PowerPoint RGB format
|
# Convert colors to PowerPoint RGB format
|
||||||
styles = self._convert_colors_format(styles)
|
styles = self._convertColorsFormat(styles)
|
||||||
|
|
||||||
return styles
|
return styles
|
||||||
|
|
||||||
|
|
@ -453,7 +453,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||||
return default_styles
|
return default_styles
|
||||||
|
|
||||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Convert hex colors to RGB format for PowerPoint compatibility."""
|
"""Convert hex colors to RGB format for PowerPoint compatibility."""
|
||||||
try:
|
try:
|
||||||
for style_name, style_config in styles.items():
|
for style_name, style_config in styles.items():
|
||||||
|
|
@ -477,7 +477,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
self.logger.warning(f"Color conversion failed: {str(e)}")
|
self.logger.warning(f"Color conversion failed: {str(e)}")
|
||||||
return styles
|
return styles
|
||||||
|
|
||||||
def _get_safe_color(self, color_value, default=(0, 0, 0)) -> tuple:
|
def _getSafeColor(self, color_value, default=(0, 0, 0)) -> tuple:
|
||||||
"""Get a safe RGB color tuple for PowerPoint."""
|
"""Get a safe RGB color tuple for PowerPoint."""
|
||||||
if isinstance(color_value, tuple) and len(color_value) == 3:
|
if isinstance(color_value, tuple) and len(color_value) == 3:
|
||||||
return color_value
|
return color_value
|
||||||
|
|
@ -495,7 +495,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
return (r, g, b)
|
return (r, g, b)
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _validatePptxStylesReadability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Validate and fix readability issues in AI-generated styles."""
|
"""Validate and fix readability issues in AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
# Ensure minimum font sizes for PowerPoint readability
|
# Ensure minimum font sizes for PowerPoint readability
|
||||||
|
|
@ -519,9 +519,9 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Style validation failed: {str(e)}")
|
logger.warning(f"Style validation failed: {str(e)}")
|
||||||
return self._get_default_pptx_styles()
|
return self._getDefaultPptxStyles()
|
||||||
|
|
||||||
def _get_default_pptx_styles(self) -> Dict[str, Any]:
|
def _getDefaultPptxStyles(self) -> Dict[str, Any]:
|
||||||
"""Default PowerPoint styles with corporate professional color scheme."""
|
"""Default PowerPoint styles with corporate professional color scheme."""
|
||||||
return {
|
return {
|
||||||
"title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"},
|
"title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"},
|
||||||
|
|
@ -541,7 +541,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
"executive_ready": True
|
"executive_ready": True
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
async def _parseJsonToSlides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Parse JSON content into slide data structure.
|
Parse JSON content into slide data structure.
|
||||||
|
|
||||||
|
|
@ -569,12 +569,12 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
# Create title slide
|
# Create title slide
|
||||||
slides.append({
|
slides.append({
|
||||||
"title": document_title,
|
"title": document_title,
|
||||||
"content": "Generated by PowerOn AI System\n\n" + self._format_timestamp()
|
"content": "Generated by PowerOn AI System\n\n" + self._formatTimestamp()
|
||||||
})
|
})
|
||||||
|
|
||||||
# Process sections into slides based on content and user intent
|
# Process sections into slides based on content and user intent
|
||||||
sections = json_content.get("sections", [])
|
sections = json_content.get("sections", [])
|
||||||
slides.extend(self._create_slides_from_sections(sections, styles))
|
slides.extend(self._createSlidesFromSections(sections, styles))
|
||||||
|
|
||||||
# If no content slides were created, create a default content slide
|
# If no content slides were created, create a default content slide
|
||||||
if len(slides) == 1: # Only title slide
|
if len(slides) == 1: # Only title slide
|
||||||
|
|
@ -595,7 +595,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _createSlideFromSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Create a slide from a JSON section."""
|
"""Create a slide from a JSON section."""
|
||||||
try:
|
try:
|
||||||
# Get section title from data or use default
|
# Get section title from data or use default
|
||||||
|
|
@ -616,15 +616,15 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
content_parts = []
|
content_parts = []
|
||||||
|
|
||||||
if content_type == "table":
|
if content_type == "table":
|
||||||
content_parts.append(self._format_table_for_slide(elements))
|
content_parts.append(self._formatTableForSlide(elements))
|
||||||
elif content_type == "list":
|
elif content_type == "list":
|
||||||
content_parts.append(self._format_list_for_slide(elements))
|
content_parts.append(self._formatListForSlide(elements))
|
||||||
elif content_type == "heading":
|
elif content_type == "heading":
|
||||||
content_parts.append(self._format_heading_for_slide(elements))
|
content_parts.append(self._formatHeadingForSlide(elements))
|
||||||
elif content_type == "paragraph":
|
elif content_type == "paragraph":
|
||||||
content_parts.append(self._format_paragraph_for_slide(elements))
|
content_parts.append(self._formatParagraphForSlide(elements))
|
||||||
elif content_type == "code":
|
elif content_type == "code":
|
||||||
content_parts.append(self._format_code_for_slide(elements))
|
content_parts.append(self._formatCodeForSlide(elements))
|
||||||
else:
|
else:
|
||||||
content_parts.append(self._format_paragraph_for_slide(elements))
|
content_parts.append(self._format_paragraph_for_slide(elements))
|
||||||
|
|
||||||
|
|
@ -640,7 +640,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
logger.warning(f"Error creating slide from section: {str(e)}")
|
logger.warning(f"Error creating slide from section: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _format_table_for_slide(self, elements: List[Dict[str, Any]]) -> str:
|
def _formatTableForSlide(self, elements: List[Dict[str, Any]]) -> str:
|
||||||
"""Format table data for slide presentation."""
|
"""Format table data for slide presentation."""
|
||||||
try:
|
try:
|
||||||
# Extract table data from elements array
|
# Extract table data from elements array
|
||||||
|
|
@ -681,7 +681,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
logger.warning(f"Error formatting table for slide: {str(e)}")
|
logger.warning(f"Error formatting table for slide: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str:
|
def _formatListForSlide(self, list_data: Dict[str, Any]) -> str:
|
||||||
"""Format list data for slide presentation."""
|
"""Format list data for slide presentation."""
|
||||||
try:
|
try:
|
||||||
items = list_data.get("items", [])
|
items = list_data.get("items", [])
|
||||||
|
|
@ -713,7 +713,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
logger.warning(f"Error formatting list for slide: {str(e)}")
|
logger.warning(f"Error formatting list for slide: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str:
|
def _formatHeadingForSlide(self, heading_data: Dict[str, Any]) -> str:
|
||||||
"""Format heading data for slide presentation."""
|
"""Format heading data for slide presentation."""
|
||||||
try:
|
try:
|
||||||
text = heading_data.get("text", "")
|
text = heading_data.get("text", "")
|
||||||
|
|
@ -728,7 +728,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
logger.warning(f"Error formatting heading for slide: {str(e)}")
|
logger.warning(f"Error formatting heading for slide: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str:
|
def _formatParagraphForSlide(self, paragraph_data: Dict[str, Any]) -> str:
|
||||||
"""Format paragraph data for slide presentation."""
|
"""Format paragraph data for slide presentation."""
|
||||||
try:
|
try:
|
||||||
text = paragraph_data.get("text", "")
|
text = paragraph_data.get("text", "")
|
||||||
|
|
@ -747,7 +747,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
logger.warning(f"Error formatting paragraph for slide: {str(e)}")
|
logger.warning(f"Error formatting paragraph for slide: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str:
|
def _formatCodeForSlide(self, code_data: Dict[str, Any]) -> str:
|
||||||
"""Format code data for slide presentation."""
|
"""Format code data for slide presentation."""
|
||||||
try:
|
try:
|
||||||
code = code_data.get("code", "")
|
code = code_data.get("code", "")
|
||||||
|
|
@ -770,7 +770,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
logger.warning(f"Error formatting code for slide: {str(e)}")
|
logger.warning(f"Error formatting code for slide: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
|
def _getSlideLayoutIndex(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
|
||||||
"""Determine the best professional slide layout based on content."""
|
"""Determine the best professional slide layout based on content."""
|
||||||
try:
|
try:
|
||||||
content = slide_data.get("content", "")
|
content = slide_data.get("content", "")
|
||||||
|
|
@ -804,7 +804,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
logger.warning(f"Error determining slide layout: {str(e)}")
|
logger.warning(f"Error determining slide layout: {str(e)}")
|
||||||
return 1 # Default to title and content layout
|
return 1 # Default to title and content layout
|
||||||
|
|
||||||
def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||||
"""Create slides from sections based on content density and user intent."""
|
"""Create slides from sections based on content density and user intent."""
|
||||||
try:
|
try:
|
||||||
slides = []
|
slides = []
|
||||||
|
|
@ -834,7 +834,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# Add content to current slide
|
# Add content to current slide
|
||||||
formatted_content = self._format_section_content(section)
|
formatted_content = self._formatSectionContent(section)
|
||||||
if formatted_content:
|
if formatted_content:
|
||||||
current_slide_content.append(formatted_content)
|
current_slide_content.append(formatted_content)
|
||||||
|
|
||||||
|
|
@ -851,7 +851,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
logger.warning(f"Error creating slides from sections: {str(e)}")
|
logger.warning(f"Error creating slides from sections: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _format_section_content(self, section: Dict[str, Any]) -> str:
|
def _formatSectionContent(self, section: Dict[str, Any]) -> str:
|
||||||
"""Format section content for slide presentation."""
|
"""Format section content for slide presentation."""
|
||||||
try:
|
try:
|
||||||
content_type = section.get("content_type", "paragraph")
|
content_type = section.get("content_type", "paragraph")
|
||||||
|
|
@ -861,15 +861,15 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
content_parts = []
|
content_parts = []
|
||||||
for element in elements:
|
for element in elements:
|
||||||
if content_type == "table":
|
if content_type == "table":
|
||||||
content_parts.append(self._format_table_for_slide([element]))
|
content_parts.append(self._formatTableForSlide([element]))
|
||||||
elif content_type == "list":
|
elif content_type == "list":
|
||||||
content_parts.append(self._format_list_for_slide([element]))
|
content_parts.append(self._formatListForSlide([element]))
|
||||||
elif content_type == "heading":
|
elif content_type == "heading":
|
||||||
content_parts.append(self._format_heading_for_slide([element]))
|
content_parts.append(self._formatHeadingForSlide([element]))
|
||||||
elif content_type == "paragraph":
|
elif content_type == "paragraph":
|
||||||
content_parts.append(self._format_paragraph_for_slide([element]))
|
content_parts.append(self._formatParagraphForSlide([element]))
|
||||||
elif content_type == "code":
|
elif content_type == "code":
|
||||||
content_parts.append(self._format_code_for_slide([element]))
|
content_parts.append(self._formatCodeForSlide([element]))
|
||||||
else:
|
else:
|
||||||
content_parts.append(self._format_paragraph_for_slide([element]))
|
content_parts.append(self._format_paragraph_for_slide([element]))
|
||||||
|
|
||||||
|
|
@ -879,7 +879,7 @@ JSON ONLY. NO OTHER TEXT."""
|
||||||
logger.warning(f"Error formatting section content: {str(e)}")
|
logger.warning(f"Error formatting section content: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _format_timestamp(self) -> str:
|
def _formatTimestamp(self) -> str:
|
||||||
"""Format current timestamp for presentation generation."""
|
"""Format current timestamp for presentation generation."""
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ class RendererText(BaseRenderer):
|
||||||
"""Renders content to plain text format with format-specific extraction."""
|
"""Renders content to plain text format with format-specific extraction."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""Return supported text formats (excluding formats with dedicated renderers)."""
|
"""Return supported text formats (excluding formats with dedicated renderers)."""
|
||||||
return [
|
return [
|
||||||
'txt', 'text', 'plain',
|
'txt', 'text', 'plain',
|
||||||
|
|
@ -32,7 +32,7 @@ class RendererText(BaseRenderer):
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""Return format aliases."""
|
"""Return format aliases."""
|
||||||
return [
|
return [
|
||||||
'ascii', 'utf8', 'utf-8', 'code', 'source',
|
'ascii', 'utf8', 'utf-8', 'code', 'source',
|
||||||
|
|
@ -41,166 +41,166 @@ class RendererText(BaseRenderer):
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""Return priority for text renderer."""
|
"""Return priority for text renderer."""
|
||||||
return 90
|
return 90
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""Render extracted JSON content to plain text format."""
|
"""Render extracted JSON content to plain text format."""
|
||||||
try:
|
try:
|
||||||
# Generate text from JSON structure
|
# Generate text from JSON structure
|
||||||
text_content = self._generate_text_from_json(extracted_content, title)
|
textContent = self._generateTextFromJson(extractedContent, title)
|
||||||
|
|
||||||
return text_content, "text/plain"
|
return textContent, "text/plain"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering text: {str(e)}")
|
self.logger.error(f"Error rendering text: {str(e)}")
|
||||||
# Return minimal text fallback
|
# Return minimal text fallback
|
||||||
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
||||||
|
|
||||||
def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str:
|
def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||||
"""Generate text content from structured JSON document."""
|
"""Generate text content from structured JSON document."""
|
||||||
try:
|
try:
|
||||||
# Validate JSON structure
|
# Validate JSON structure
|
||||||
if not isinstance(json_content, dict):
|
if not isinstance(jsonContent, dict):
|
||||||
raise ValueError("JSON content must be a dictionary")
|
raise ValueError("JSON content must be a dictionary")
|
||||||
|
|
||||||
if "sections" not in json_content:
|
if "sections" not in jsonContent:
|
||||||
raise ValueError("JSON content must contain 'sections' field")
|
raise ValueError("JSON content must contain 'sections' field")
|
||||||
|
|
||||||
# Use title from JSON metadata if available, otherwise use provided title
|
# Use title from JSON metadata if available, otherwise use provided title
|
||||||
document_title = json_content.get("metadata", {}).get("title", title)
|
documentTitle = jsonContent.get("metadata", {}).get("title", title)
|
||||||
|
|
||||||
# Build text content
|
# Build text content
|
||||||
text_parts = []
|
textParts = []
|
||||||
|
|
||||||
# Document title
|
# Document title
|
||||||
text_parts.append(document_title)
|
textParts.append(documentTitle)
|
||||||
text_parts.append("=" * len(document_title))
|
textParts.append("=" * len(documentTitle))
|
||||||
text_parts.append("")
|
textParts.append("")
|
||||||
|
|
||||||
# Process each section
|
# Process each section
|
||||||
sections = json_content.get("sections", [])
|
sections = jsonContent.get("sections", [])
|
||||||
for section in sections:
|
for section in sections:
|
||||||
section_text = self._render_json_section(section)
|
sectionText = self._renderJsonSection(section)
|
||||||
if section_text:
|
if sectionText:
|
||||||
text_parts.append(section_text)
|
textParts.append(sectionText)
|
||||||
text_parts.append("") # Add spacing between sections
|
textParts.append("") # Add spacing between sections
|
||||||
|
|
||||||
# Add generation info
|
# Add generation info
|
||||||
text_parts.append("")
|
textParts.append("")
|
||||||
text_parts.append(f"Generated: {self._format_timestamp()}")
|
textParts.append(f"Generated: {self._formatTimestamp()}")
|
||||||
|
|
||||||
return '\n'.join(text_parts)
|
return '\n'.join(textParts)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error generating text from JSON: {str(e)}")
|
self.logger.error(f"Error generating text from JSON: {str(e)}")
|
||||||
raise Exception(f"Text generation failed: {str(e)}")
|
raise Exception(f"Text generation failed: {str(e)}")
|
||||||
|
|
||||||
def _render_json_section(self, section: Dict[str, Any]) -> str:
|
def _renderJsonSection(self, section: Dict[str, Any]) -> str:
|
||||||
"""Render a single JSON section to text."""
|
"""Render a single JSON section to text."""
|
||||||
try:
|
try:
|
||||||
section_type = self._get_section_type(section)
|
sectionType = self._getSectionType(section)
|
||||||
section_data = self._get_section_data(section)
|
sectionData = self._getSectionData(section)
|
||||||
|
|
||||||
if section_type == "table":
|
if sectionType == "table":
|
||||||
# Process the section data to extract table structure
|
# Process the section data to extract table structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_table(processed_data)
|
return self._renderJsonTable(processedData)
|
||||||
elif section_type == "bullet_list":
|
elif sectionType == "bullet_list":
|
||||||
# Process the section data to extract bullet list structure
|
# Process the section data to extract bullet list structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_bullet_list(processed_data)
|
return self._renderJsonBulletList(processedData)
|
||||||
elif section_type == "heading":
|
elif sectionType == "heading":
|
||||||
# Render each heading element in the elements array
|
# Render each heading element in the elements array
|
||||||
# section_data is already the elements array from _get_section_data
|
# sectionData is already the elements array from _getSectionData
|
||||||
rendered_elements = []
|
renderedElements = []
|
||||||
for element in section_data:
|
for element in sectionData:
|
||||||
rendered_elements.append(self._render_json_heading(element))
|
renderedElements.append(self._renderJsonHeading(element))
|
||||||
return "\n".join(rendered_elements)
|
return "\n".join(renderedElements)
|
||||||
elif section_type == "paragraph":
|
elif sectionType == "paragraph":
|
||||||
# Render each paragraph element in the elements array
|
# Render each paragraph element in the elements array
|
||||||
# section_data is already the elements array from _get_section_data
|
# sectionData is already the elements array from _getSectionData
|
||||||
rendered_elements = []
|
renderedElements = []
|
||||||
for element in section_data:
|
for element in sectionData:
|
||||||
rendered_elements.append(self._render_json_paragraph(element))
|
renderedElements.append(self._renderJsonParagraph(element))
|
||||||
return "\n".join(rendered_elements)
|
return "\n".join(renderedElements)
|
||||||
elif section_type == "code_block":
|
elif sectionType == "code_block":
|
||||||
# Process the section data to extract code block structure
|
# Process the section data to extract code block structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_code_block(processed_data)
|
return self._renderJsonCodeBlock(processedData)
|
||||||
elif section_type == "image":
|
elif sectionType == "image":
|
||||||
# Process the section data to extract image structure
|
# Process the section data to extract image structure
|
||||||
processed_data = self._process_section_by_type(section)
|
processedData = self._processSectionByType(section)
|
||||||
return self._render_json_image(processed_data)
|
return self._renderJsonImage(processedData)
|
||||||
else:
|
else:
|
||||||
# Fallback to paragraph for unknown types - render each element
|
# Fallback to paragraph for unknown types - render each element
|
||||||
# section_data is already the elements array from _get_section_data
|
# sectionData is already the elements array from _getSectionData
|
||||||
rendered_elements = []
|
renderedElements = []
|
||||||
for element in section_data:
|
for element in sectionData:
|
||||||
rendered_elements.append(self._render_json_paragraph(element))
|
renderedElements.append(self._renderJsonParagraph(element))
|
||||||
return "\n".join(rendered_elements)
|
return "\n".join(renderedElements)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
|
self.logger.warning(f"Error rendering section {self._getSectionId(section)}: {str(e)}")
|
||||||
return f"[Error rendering section: {str(e)}]"
|
return f"[Error rendering section: {str(e)}]"
|
||||||
|
|
||||||
def _render_json_table(self, table_data: Dict[str, Any]) -> str:
|
def _renderJsonTable(self, tableData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON table to text."""
|
"""Render a JSON table to text."""
|
||||||
try:
|
try:
|
||||||
headers = table_data.get("headers", [])
|
headers = tableData.get("headers", [])
|
||||||
rows = table_data.get("rows", [])
|
rows = tableData.get("rows", [])
|
||||||
|
|
||||||
if not headers or not rows:
|
if not headers or not rows:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
text_parts = []
|
textParts = []
|
||||||
|
|
||||||
# Create table header
|
# Create table header
|
||||||
header_line = " | ".join(str(header) for header in headers)
|
headerLine = " | ".join(str(header) for header in headers)
|
||||||
text_parts.append(header_line)
|
textParts.append(headerLine)
|
||||||
|
|
||||||
# Add separator line
|
# Add separator line
|
||||||
separator_line = " | ".join("-" * len(str(header)) for header in headers)
|
separatorLine = " | ".join("-" * len(str(header)) for header in headers)
|
||||||
text_parts.append(separator_line)
|
textParts.append(separatorLine)
|
||||||
|
|
||||||
# Add data rows
|
# Add data rows
|
||||||
for row in rows:
|
for row in rows:
|
||||||
row_line = " | ".join(str(cell_data) for cell_data in row)
|
rowLine = " | ".join(str(cellData) for cellData in row)
|
||||||
text_parts.append(row_line)
|
textParts.append(rowLine)
|
||||||
|
|
||||||
return '\n'.join(text_parts)
|
return '\n'.join(textParts)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering table: {str(e)}")
|
self.logger.warning(f"Error rendering table: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
|
def _renderJsonBulletList(self, listData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON bullet list to text."""
|
"""Render a JSON bullet list to text."""
|
||||||
try:
|
try:
|
||||||
items = list_data.get("items", [])
|
items = listData.get("items", [])
|
||||||
|
|
||||||
if not items:
|
if not items:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
text_parts = []
|
textParts = []
|
||||||
for item in items:
|
for item in items:
|
||||||
if isinstance(item, str):
|
if isinstance(item, str):
|
||||||
text_parts.append(f"- {item}")
|
textParts.append(f"- {item}")
|
||||||
elif isinstance(item, dict) and "text" in item:
|
elif isinstance(item, dict) and "text" in item:
|
||||||
text_parts.append(f"- {item['text']}")
|
textParts.append(f"- {item['text']}")
|
||||||
|
|
||||||
return '\n'.join(text_parts)
|
return '\n'.join(textParts)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
self.logger.warning(f"Error rendering bullet list: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
|
def _renderJsonHeading(self, headingData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON heading to text."""
|
"""Render a JSON heading to text."""
|
||||||
try:
|
try:
|
||||||
level = heading_data.get("level", 1)
|
level = headingData.get("level", 1)
|
||||||
text = heading_data.get("text", "")
|
text = headingData.get("text", "")
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
level = max(1, min(6, level))
|
level = max(1, min(6, level))
|
||||||
|
|
@ -217,21 +217,21 @@ class RendererText(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering heading: {str(e)}")
|
self.logger.warning(f"Error rendering heading: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
|
def _renderJsonParagraph(self, paragraphData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON paragraph to text."""
|
"""Render a JSON paragraph to text."""
|
||||||
try:
|
try:
|
||||||
text = paragraph_data.get("text", "")
|
text = paragraphData.get("text", "")
|
||||||
return text if text else ""
|
return text if text else ""
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
self.logger.warning(f"Error rendering paragraph: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
|
def _renderJsonCodeBlock(self, codeData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON code block to text."""
|
"""Render a JSON code block to text."""
|
||||||
try:
|
try:
|
||||||
code = code_data.get("code", "")
|
code = codeData.get("code", "")
|
||||||
language = code_data.get("language", "")
|
language = codeData.get("language", "")
|
||||||
|
|
||||||
if code:
|
if code:
|
||||||
if language:
|
if language:
|
||||||
|
|
@ -245,12 +245,12 @@ class RendererText(BaseRenderer):
|
||||||
self.logger.warning(f"Error rendering code block: {str(e)}")
|
self.logger.warning(f"Error rendering code block: {str(e)}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _render_json_image(self, image_data: Dict[str, Any]) -> str:
|
def _renderJsonImage(self, imageData: Dict[str, Any]) -> str:
|
||||||
"""Render a JSON image to text."""
|
"""Render a JSON image to text."""
|
||||||
try:
|
try:
|
||||||
alt_text = image_data.get("altText", "Image")
|
altText = imageData.get("altText", "Image")
|
||||||
return f"[Image: {alt_text}]"
|
return f"[Image: {altText}]"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Error rendering image: {str(e)}")
|
self.logger.warning(f"Error rendering image: {str(e)}")
|
||||||
return f"[Image: {image_data.get('altText', 'Image')}]"
|
return f"[Image: {imageData.get('altText', 'Image')}]"
|
||||||
|
|
|
||||||
|
|
@ -21,41 +21,41 @@ class RendererXlsx(BaseRenderer):
|
||||||
"""Renders content to Excel format using openpyxl."""
|
"""Renders content to Excel format using openpyxl."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_supported_formats(cls) -> List[str]:
|
def getSupportedFormats(cls) -> List[str]:
|
||||||
"""Return supported Excel formats."""
|
"""Return supported Excel formats."""
|
||||||
return ['xlsx', 'xls', 'excel']
|
return ['xlsx', 'xls', 'excel']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_format_aliases(cls) -> List[str]:
|
def getFormatAliases(cls) -> List[str]:
|
||||||
"""Return format aliases."""
|
"""Return format aliases."""
|
||||||
return ['spreadsheet', 'workbook']
|
return ['spreadsheet', 'workbook']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_priority(cls) -> int:
|
def getPriority(cls) -> int:
|
||||||
"""Return priority for Excel renderer."""
|
"""Return priority for Excel renderer."""
|
||||||
return 110
|
return 110
|
||||||
|
|
||||||
async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||||
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
||||||
try:
|
try:
|
||||||
if not OPENPYXL_AVAILABLE:
|
if not OPENPYXL_AVAILABLE:
|
||||||
# Fallback to CSV if openpyxl not available
|
# Fallback to CSV if openpyxl not available
|
||||||
from .rendererCsv import RendererCsv
|
from .rendererCsv import RendererCsv
|
||||||
csv_renderer = RendererCsv()
|
csvRenderer = RendererCsv()
|
||||||
csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
|
csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||||
return csv_content, "text/csv"
|
return csvContent, "text/csv"
|
||||||
|
|
||||||
# Generate Excel using AI-analyzed styling
|
# Generate Excel using AI-analyzed styling
|
||||||
excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service)
|
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering Excel: {str(e)}")
|
self.logger.error(f"Error rendering Excel: {str(e)}")
|
||||||
# Return CSV fallback
|
# Return CSV fallback
|
||||||
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
|
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
|
||||||
|
|
||||||
def _generate_excel(self, content: str, title: str) -> str:
|
def _generateExcel(self, content: str, title: str) -> str:
|
||||||
"""Generate Excel content using openpyxl."""
|
"""Generate Excel content using openpyxl."""
|
||||||
try:
|
try:
|
||||||
# Create workbook
|
# Create workbook
|
||||||
|
|
@ -65,14 +65,14 @@ class RendererXlsx(BaseRenderer):
|
||||||
wb.remove(wb.active)
|
wb.remove(wb.active)
|
||||||
|
|
||||||
# Create sheets
|
# Create sheets
|
||||||
summary_sheet = wb.create_sheet("Summary", 0)
|
summarySheet = wb.create_sheet("Summary", 0)
|
||||||
data_sheet = wb.create_sheet("Data", 1)
|
dataSheet = wb.create_sheet("Data", 1)
|
||||||
analysis_sheet = wb.create_sheet("Analysis", 2)
|
analysisSheet = wb.create_sheet("Analysis", 2)
|
||||||
|
|
||||||
# Add content to sheets
|
# Add content to sheets
|
||||||
self._populate_summary_sheet(summary_sheet, title)
|
self._populateSummarySheet(summarySheet, title)
|
||||||
self._populate_data_sheet(data_sheet, content)
|
self._populateDataSheet(dataSheet, content)
|
||||||
self._populate_analysis_sheet(analysis_sheet, content)
|
self._populateAnalysisSheet(analysisSheet, content)
|
||||||
|
|
||||||
# Save to buffer
|
# Save to buffer
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
|
|
@ -80,16 +80,16 @@ class RendererXlsx(BaseRenderer):
|
||||||
buffer.seek(0)
|
buffer.seek(0)
|
||||||
|
|
||||||
# Convert to base64
|
# Convert to base64
|
||||||
excel_bytes = buffer.getvalue()
|
excelBytes = buffer.getvalue()
|
||||||
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
|
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
|
||||||
|
|
||||||
return excel_base64
|
return excelBase64
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error generating Excel: {str(e)}")
|
self.logger.error(f"Error generating Excel: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _populate_summary_sheet(self, sheet, title: str):
|
def _populateSummarySheet(self, sheet, title: str):
|
||||||
"""Populate the summary sheet."""
|
"""Populate the summary sheet."""
|
||||||
try:
|
try:
|
||||||
# Title
|
# Title
|
||||||
|
|
@ -99,7 +99,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
|
|
||||||
# Generation info
|
# Generation info
|
||||||
sheet['A3'] = "Generated:"
|
sheet['A3'] = "Generated:"
|
||||||
sheet['B3'] = self._format_timestamp()
|
sheet['B3'] = self._formatTimestamp()
|
||||||
sheet['A4'] = "Status:"
|
sheet['A4'] = "Status:"
|
||||||
sheet['B4'] = "Generated Successfully"
|
sheet['B4'] = "Generated Successfully"
|
||||||
|
|
||||||
|
|
@ -116,7 +116,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
|
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
|
||||||
|
|
||||||
def _populate_data_sheet(self, sheet, content: str):
|
def _populateDataSheet(self, sheet, content: str):
|
||||||
"""Populate the data sheet."""
|
"""Populate the data sheet."""
|
||||||
try:
|
try:
|
||||||
# Headers
|
# Headers
|
||||||
|
|
@ -138,8 +138,8 @@ class RendererXlsx(BaseRenderer):
|
||||||
# Check for table data (lines with |)
|
# Check for table data (lines with |)
|
||||||
if '|' in line:
|
if '|' in line:
|
||||||
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
||||||
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
|
for col, cellData in enumerate(cells[:5], 1): # Limit to 5 columns
|
||||||
sheet.cell(row=row, column=col, value=cell_data)
|
sheet.cell(row=row, column=col, value=cellData)
|
||||||
row += 1
|
row += 1
|
||||||
else:
|
else:
|
||||||
# Regular content
|
# Regular content
|
||||||
|
|
@ -153,7 +153,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not populate data sheet: {str(e)}")
|
self.logger.warning(f"Could not populate data sheet: {str(e)}")
|
||||||
|
|
||||||
def _populate_analysis_sheet(self, sheet, content: str):
|
def _populateAnalysisSheet(self, sheet, content: str):
|
||||||
"""Populate the analysis sheet."""
|
"""Populate the analysis sheet."""
|
||||||
try:
|
try:
|
||||||
# Title
|
# Title
|
||||||
|
|
@ -169,17 +169,17 @@ class RendererXlsx(BaseRenderer):
|
||||||
row += 1
|
row += 1
|
||||||
|
|
||||||
# Count different types of content
|
# Count different types of content
|
||||||
table_lines = sum(1 for line in lines if '|' in line)
|
tableLines = sum(1 for line in lines if '|' in line)
|
||||||
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
|
listLines = sum(1 for line in lines if line.startswith(('- ', '* ')))
|
||||||
text_lines = len(lines) - table_lines - list_lines
|
textLines = len(lines) - tableLines - listLines
|
||||||
|
|
||||||
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
|
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
|
||||||
row += 1
|
row += 1
|
||||||
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
|
sheet[f'A{row}'] = f"Table Rows: {tableLines}"
|
||||||
row += 1
|
row += 1
|
||||||
sheet[f'A{row}'] = f"List Items: {list_lines}"
|
sheet[f'A{row}'] = f"List Items: {listLines}"
|
||||||
row += 1
|
row += 1
|
||||||
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
|
sheet[f'A{row}'] = f"Text Lines: {textLines}"
|
||||||
row += 2
|
row += 2
|
||||||
|
|
||||||
# Recommendations
|
# Recommendations
|
||||||
|
|
@ -198,35 +198,35 @@ class RendererXlsx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
|
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
|
||||||
|
|
||||||
async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
|
async def _generateExcelFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Generate Excel content from structured JSON document using AI-generated styling."""
|
"""Generate Excel content from structured JSON document using AI-generated styling."""
|
||||||
try:
|
try:
|
||||||
# Debug output
|
# Debug output
|
||||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(json_content)}", "EXCEL_RENDERER")
|
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(jsonContent)}", "EXCEL_RENDERER")
|
||||||
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}", "EXCEL_RENDERER")
|
self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(jsonContent.keys()) if isinstance(jsonContent, dict) else 'Not a dict'}", "EXCEL_RENDERER")
|
||||||
|
|
||||||
# Get AI-generated styling definitions
|
# Get AI-generated styling definitions
|
||||||
styles = await self._get_excel_styles(user_prompt, ai_service)
|
styles = await self._getExcelStyles(userPrompt, aiService)
|
||||||
|
|
||||||
# Validate JSON structure
|
# Validate JSON structure
|
||||||
if not isinstance(json_content, dict):
|
if not isinstance(jsonContent, dict):
|
||||||
raise ValueError("JSON content must be a dictionary")
|
raise ValueError("JSON content must be a dictionary")
|
||||||
|
|
||||||
if "sections" not in json_content:
|
if "sections" not in jsonContent:
|
||||||
raise ValueError("JSON content must contain 'sections' field")
|
raise ValueError("JSON content must contain 'sections' field")
|
||||||
|
|
||||||
# Use title from JSON metadata if available, otherwise use provided title
|
# Use title from JSON metadata if available, otherwise use provided title
|
||||||
document_title = json_content.get("metadata", {}).get("title", title)
|
document_title = jsonContent.get("metadata", {}).get("title", title)
|
||||||
|
|
||||||
# Create workbook
|
# Create workbook
|
||||||
wb = Workbook()
|
wb = Workbook()
|
||||||
|
|
||||||
# Create sheets based on content
|
# Create sheets based on content
|
||||||
sheets = self._create_excel_sheets(wb, json_content, styles)
|
sheets = self._createExcelSheets(wb, jsonContent, styles)
|
||||||
self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
|
self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
|
||||||
|
|
||||||
# Populate sheets with content
|
# Populate sheets with content
|
||||||
self._populate_excel_sheets(sheets, json_content, styles)
|
self._populateExcelSheets(sheets, jsonContent, styles)
|
||||||
|
|
||||||
# Save to buffer
|
# Save to buffer
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
|
|
@ -234,24 +234,24 @@ class RendererXlsx(BaseRenderer):
|
||||||
buffer.seek(0)
|
buffer.seek(0)
|
||||||
|
|
||||||
# Convert to base64
|
# Convert to base64
|
||||||
excel_bytes = buffer.getvalue()
|
excelBytes = buffer.getvalue()
|
||||||
self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excel_bytes)}", "EXCEL_RENDERER")
|
self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excelBytes)}", "EXCEL_RENDERER")
|
||||||
try:
|
try:
|
||||||
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
|
excelBase64 = base64.b64encode(excelBytes).decode('utf-8')
|
||||||
self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excel_base64)}", "EXCEL_RENDERER")
|
self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excelBase64)}", "EXCEL_RENDERER")
|
||||||
except Exception as b64_error:
|
except Exception as b64_error:
|
||||||
self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
|
self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return excel_base64
|
return excelBase64
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
|
self.logger.error(f"Error generating Excel from JSON: {str(e)}")
|
||||||
raise Exception(f"Excel generation failed: {str(e)}")
|
raise Exception(f"Excel generation failed: {str(e)}")
|
||||||
|
|
||||||
async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
|
async def _getExcelStyles(self, userPrompt: str, aiService=None) -> Dict[str, Any]:
|
||||||
"""Get Excel styling definitions using base template AI styling."""
|
"""Get Excel styling definitions using base template AI styling."""
|
||||||
style_schema = {
|
styleSchema = {
|
||||||
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
||||||
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
|
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
|
||||||
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
|
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
|
||||||
|
|
@ -261,26 +261,26 @@ class RendererXlsx(BaseRenderer):
|
||||||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
|
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
|
||||||
}
|
}
|
||||||
|
|
||||||
style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema)
|
styleTemplate = self._createAiStyleTemplate("xlsx", userPrompt, styleSchema)
|
||||||
# Use our own _get_ai_styles_with_excel_colors method to ensure proper color conversion
|
# Use our own _getAiStylesWithExcelColors method to ensure proper color conversion
|
||||||
styles = await self._get_ai_styles_with_excel_colors(ai_service, style_template, self._get_default_excel_styles())
|
styles = await self._getAiStylesWithExcelColors(aiService, styleTemplate, self._getDefaultExcelStyles())
|
||||||
|
|
||||||
# Validate and fix contrast issues
|
# Validate and fix contrast issues
|
||||||
return self._validate_excel_styles_contrast(styles)
|
return self._validateExcelStylesContrast(styles)
|
||||||
|
|
||||||
async def _get_ai_styles_with_excel_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
|
async def _getAiStylesWithExcelColors(self, aiService, styleTemplate: str, defaultStyles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Get AI styles with proper Excel color conversion."""
|
"""Get AI styles with proper Excel color conversion."""
|
||||||
if not ai_service:
|
if not aiService:
|
||||||
return default_styles
|
return defaultStyles
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
|
||||||
|
|
||||||
request_options = AiCallOptions()
|
requestOptions = AiCallOptions()
|
||||||
request_options.operationType = OperationTypeEnum.DATA_GENERATE
|
requestOptions.operationType = OperationTypeEnum.DATA_GENERATE
|
||||||
|
|
||||||
request = AiCallRequest(prompt=style_template, context="", options=request_options)
|
request = AiCallRequest(prompt=styleTemplate, context="", options=requestOptions)
|
||||||
response = await ai_service.aiObjects.call(request)
|
response = await aiService.aiObjects.call(request)
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
@ -291,7 +291,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
# Check if result is empty
|
# Check if result is empty
|
||||||
if not result:
|
if not result:
|
||||||
self.logger.warning("AI styling returned empty response, using defaults")
|
self.logger.warning("AI styling returned empty response, using defaults")
|
||||||
return default_styles
|
return defaultStyles
|
||||||
|
|
||||||
# Extract JSON from markdown if present
|
# Extract JSON from markdown if present
|
||||||
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
|
||||||
|
|
@ -312,46 +312,46 @@ class RendererXlsx(BaseRenderer):
|
||||||
styles = json.loads(result)
|
styles = json.loads(result)
|
||||||
except json.JSONDecodeError as json_error:
|
except json.JSONDecodeError as json_error:
|
||||||
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
|
self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
|
||||||
return default_styles
|
return defaultStyles
|
||||||
|
|
||||||
# Convert colors to Excel aRGB format
|
# Convert colors to Excel aRGB format
|
||||||
styles = self._convert_colors_format(styles)
|
styles = self._convertColorsFormat(styles)
|
||||||
|
|
||||||
return styles
|
return styles
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
|
||||||
return default_styles
|
return defaultStyles
|
||||||
|
|
||||||
def _get_safe_color(self, color_value: str, default: str = "FF000000") -> str:
|
def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str:
|
||||||
"""Get a safe aRGB color value for Excel (without # prefix)."""
|
"""Get a safe aRGB color value for Excel (without # prefix)."""
|
||||||
if not isinstance(color_value, str):
|
if not isinstance(colorValue, str):
|
||||||
return default
|
return default
|
||||||
|
|
||||||
# Remove # prefix if present
|
# Remove # prefix if present
|
||||||
if color_value.startswith('#'):
|
if colorValue.startswith('#'):
|
||||||
color_value = color_value[1:]
|
colorValue = colorValue[1:]
|
||||||
|
|
||||||
if len(color_value) == 6:
|
if len(colorValue) == 6:
|
||||||
# Convert RRGGBB to AARRGGBB
|
# Convert RRGGBB to AARRGGBB
|
||||||
return f"FF{color_value}"
|
return f"FF{colorValue}"
|
||||||
elif len(color_value) == 8:
|
elif len(colorValue) == 8:
|
||||||
# Already aRGB format
|
# Already aRGB format
|
||||||
return color_value
|
return colorValue
|
||||||
else:
|
else:
|
||||||
# Unexpected format, return default
|
# Unexpected format, return default
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _convertColorsFormat(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Convert hex colors to aRGB format for Excel compatibility."""
|
"""Convert hex colors to aRGB format for Excel compatibility."""
|
||||||
try:
|
try:
|
||||||
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
|
self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
|
||||||
for style_name, style_config in styles.items():
|
for styleName, styleConfig in styles.items():
|
||||||
if isinstance(style_config, dict):
|
if isinstance(styleConfig, dict):
|
||||||
for prop, value in style_config.items():
|
for prop, value in styleConfig.items():
|
||||||
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
|
if isinstance(value, str) and value.startswith('#') and len(value) == 7:
|
||||||
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
|
# Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
|
||||||
styles[style_name][prop] = f"FF{value[1:]}"
|
styles[styleName][prop] = f"FF{value[1:]}"
|
||||||
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
|
elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
|
||||||
pass # Already aRGB format
|
pass # Already aRGB format
|
||||||
elif isinstance(value, str) and value.startswith('#'):
|
elif isinstance(value, str) and value.startswith('#'):
|
||||||
|
|
@ -360,34 +360,34 @@ class RendererXlsx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return styles
|
return styles
|
||||||
|
|
||||||
def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _validateExcelStylesContrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Validate and fix contrast issues in AI-generated styles."""
|
"""Validate and fix contrast issues in AI-generated styles."""
|
||||||
try:
|
try:
|
||||||
# Fix table header contrast
|
# Fix table header contrast
|
||||||
if "table_header" in styles:
|
if "table_header" in styles:
|
||||||
header = styles["table_header"]
|
header = styles["table_header"]
|
||||||
bg_color = header.get("background", "#FFFFFF")
|
bgColor = header.get("background", "#FFFFFF")
|
||||||
text_color = header.get("text_color", "#000000")
|
textColor = header.get("text_color", "#000000")
|
||||||
|
|
||||||
# If both are white or both are dark, fix it
|
# If both are white or both are dark, fix it
|
||||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
||||||
header["background"] = "#4F4F4F"
|
header["background"] = "#4F4F4F"
|
||||||
header["text_color"] = "#FFFFFF"
|
header["text_color"] = "#FFFFFF"
|
||||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
||||||
header["background"] = "#4F4F4F"
|
header["background"] = "#4F4F4F"
|
||||||
header["text_color"] = "#FFFFFF"
|
header["text_color"] = "#FFFFFF"
|
||||||
|
|
||||||
# Fix table cell contrast
|
# Fix table cell contrast
|
||||||
if "table_cell" in styles:
|
if "table_cell" in styles:
|
||||||
cell = styles["table_cell"]
|
cell = styles["table_cell"]
|
||||||
bg_color = cell.get("background", "#FFFFFF")
|
bgColor = cell.get("background", "#FFFFFF")
|
||||||
text_color = cell.get("text_color", "#000000")
|
textColor = cell.get("text_color", "#000000")
|
||||||
|
|
||||||
# If both are white or both are dark, fix it
|
# If both are white or both are dark, fix it
|
||||||
if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
|
if bgColor.upper() == "#FFFFFF" and textColor.upper() == "#FFFFFF":
|
||||||
cell["background"] = "#FFFFFF"
|
cell["background"] = "#FFFFFF"
|
||||||
cell["text_color"] = "#2F2F2F"
|
cell["text_color"] = "#2F2F2F"
|
||||||
elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
|
elif bgColor.upper() == "#000000" and textColor.upper() == "#000000":
|
||||||
cell["background"] = "#FFFFFF"
|
cell["background"] = "#FFFFFF"
|
||||||
cell["text_color"] = "#2F2F2F"
|
cell["text_color"] = "#2F2F2F"
|
||||||
|
|
||||||
|
|
@ -395,9 +395,9 @@ class RendererXlsx(BaseRenderer):
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Style validation failed: {str(e)}")
|
self.logger.warning(f"Style validation failed: {str(e)}")
|
||||||
return self._get_default_excel_styles()
|
return self._getDefaultExcelStyles()
|
||||||
|
|
||||||
def _get_default_excel_styles(self) -> Dict[str, Any]:
|
def _getDefaultExcelStyles(self) -> Dict[str, Any]:
|
||||||
"""Default Excel styles with aRGB color format."""
|
"""Default Excel styles with aRGB color format."""
|
||||||
return {
|
return {
|
||||||
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
||||||
|
|
@ -409,104 +409,104 @@ class RendererXlsx(BaseRenderer):
|
||||||
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
|
"code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
def _createExcelSheets(self, wb: Workbook, jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Create Excel sheets based on content structure and user intent."""
|
"""Create Excel sheets based on content structure and user intent."""
|
||||||
sheets = {}
|
sheets = {}
|
||||||
|
|
||||||
# Get sheet names from AI styles or generate based on content
|
# Get sheet names from AI styles or generate based on content
|
||||||
sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
|
sheetNames = styles.get("sheet_names", self._generateSheetNamesFromContent(jsonContent))
|
||||||
self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheet_names}", "EXCEL_RENDERER")
|
self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheetNames}", "EXCEL_RENDERER")
|
||||||
|
|
||||||
# Create sheets
|
# Create sheets
|
||||||
for i, sheet_name in enumerate(sheet_names):
|
for i, sheetName in enumerate(sheetNames):
|
||||||
if i == 0:
|
if i == 0:
|
||||||
# Use the default sheet for the first sheet
|
# Use the default sheet for the first sheet
|
||||||
sheet = wb.active
|
sheet = wb.active
|
||||||
sheet.title = sheet_name
|
sheet.title = sheetName
|
||||||
else:
|
else:
|
||||||
# Create additional sheets
|
# Create additional sheets
|
||||||
sheet = wb.create_sheet(sheet_name, i)
|
sheet = wb.create_sheet(sheetName, i)
|
||||||
sheets[sheet_name.lower()] = sheet
|
sheets[sheetName.lower()] = sheet
|
||||||
|
|
||||||
return sheets
|
return sheets
|
||||||
|
|
||||||
def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]:
|
def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]:
|
||||||
"""Generate sheet names based on actual content structure."""
|
"""Generate sheet names based on actual content structure."""
|
||||||
sections = json_content.get("sections", [])
|
sections = jsonContent.get("sections", [])
|
||||||
|
|
||||||
# If no sections, create a single sheet
|
# If no sections, create a single sheet
|
||||||
if not sections:
|
if not sections:
|
||||||
return ["Content"]
|
return ["Content"]
|
||||||
|
|
||||||
# Generate sheet names based on content structure
|
# Generate sheet names based on content structure
|
||||||
sheet_names = []
|
sheetNames = []
|
||||||
|
|
||||||
# Check if we have multiple table sections
|
# Check if we have multiple table sections
|
||||||
table_sections = [s for s in sections if s.get("content_type") == "table"]
|
tableSections = [s for s in sections if s.get("content_type") == "table"]
|
||||||
|
|
||||||
if len(table_sections) > 1:
|
if len(tableSections) > 1:
|
||||||
# Create separate sheets for each table
|
# Create separate sheets for each table
|
||||||
for i, section in enumerate(table_sections, 1):
|
for i, section in enumerate(tableSections, 1):
|
||||||
section_title = section.get("title", f"Table {i}")
|
sectionTitle = section.get("title", f"Table {i}")
|
||||||
sheet_names.append(section_title[:31]) # Excel sheet name limit
|
sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
|
||||||
else:
|
else:
|
||||||
# Single table or mixed content - create main sheet
|
# Single table or mixed content - create main sheet
|
||||||
document_title = json_content.get("metadata", {}).get("title", "Document")
|
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
|
||||||
sheet_names.append(document_title[:31]) # Excel sheet name limit
|
sheetNames.append(documentTitle[:31]) # Excel sheet name limit
|
||||||
|
|
||||||
# Add additional sheets for other content types
|
# Add additional sheets for other content types
|
||||||
content_types = set()
|
contentTypes = set()
|
||||||
for section in sections:
|
for section in sections:
|
||||||
content_type = section.get("content_type", "paragraph")
|
contentType = section.get("content_type", "paragraph")
|
||||||
content_types.add(content_type)
|
contentTypes.add(contentType)
|
||||||
|
|
||||||
if "table" in content_types and len(table_sections) == 1:
|
if "table" in contentTypes and len(tableSections) == 1:
|
||||||
sheet_names.append("Table Data")
|
sheetNames.append("Table Data")
|
||||||
if "list" in content_types:
|
if "list" in contentTypes:
|
||||||
sheet_names.append("Lists")
|
sheetNames.append("Lists")
|
||||||
if "paragraph" in content_types or "heading" in content_types:
|
if "paragraph" in contentTypes or "heading" in contentTypes:
|
||||||
sheet_names.append("Text")
|
sheetNames.append("Text")
|
||||||
|
|
||||||
# Limit to 4 sheets maximum
|
# Limit to 4 sheets maximum
|
||||||
return sheet_names[:4]
|
return sheetNames[:4]
|
||||||
|
|
||||||
def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
|
||||||
"""Populate Excel sheets with content from JSON based on actual sheet names."""
|
"""Populate Excel sheets with content from JSON based on actual sheet names."""
|
||||||
try:
|
try:
|
||||||
# Get the actual sheet names that were created
|
# Get the actual sheet names that were created
|
||||||
sheet_names = list(sheets.keys())
|
sheetNames = list(sheets.keys())
|
||||||
|
|
||||||
if not sheet_names:
|
if not sheetNames:
|
||||||
return
|
return
|
||||||
|
|
||||||
sections = json_content.get("sections", [])
|
sections = jsonContent.get("sections", [])
|
||||||
table_sections = [s for s in sections if s.get("content_type") == "table"]
|
tableSections = [s for s in sections if s.get("content_type") == "table"]
|
||||||
|
|
||||||
if len(table_sections) > 1:
|
if len(tableSections) > 1:
|
||||||
# Multiple tables - populate each sheet with its corresponding table
|
# Multiple tables - populate each sheet with its corresponding table
|
||||||
for i, section in enumerate(table_sections):
|
for i, section in enumerate(tableSections):
|
||||||
if i < len(sheet_names):
|
if i < len(sheetNames):
|
||||||
sheet_name = sheet_names[i]
|
sheetName = sheetNames[i]
|
||||||
sheet = sheets[sheet_name]
|
sheet = sheets[sheetName]
|
||||||
self._populate_table_sheet(sheet, section, styles, f"Table {i+1}")
|
self._populateTableSheet(sheet, section, styles, f"Table {i+1}")
|
||||||
else:
|
else:
|
||||||
# Single table or mixed content - use original logic
|
# Single table or mixed content - use original logic
|
||||||
first_sheet_name = sheet_names[0]
|
firstSheetName = sheetNames[0]
|
||||||
self._populate_main_sheet(sheets[first_sheet_name], json_content, styles)
|
self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
|
||||||
|
|
||||||
# If we have multiple sheets, distribute content by type
|
# If we have multiple sheets, distribute content by type
|
||||||
if len(sheet_names) > 1:
|
if len(sheetNames) > 1:
|
||||||
self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:])
|
self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
|
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
|
||||||
|
|
||||||
def _populate_table_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheet_title: str):
|
def _populateTableSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheetTitle: str):
|
||||||
"""Populate a sheet with a single table section."""
|
"""Populate a sheet with a single table section."""
|
||||||
try:
|
try:
|
||||||
# Sheet title
|
# Sheet title
|
||||||
sheet['A1'] = sheet_title
|
sheet['A1'] = sheetTitle
|
||||||
sheet['A1'].font = Font(size=16, bold=True, color=self._get_safe_color(styles.get("title", {}).get("color", "FF1F4E79")))
|
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(styles.get("title", {}).get("color", "FF1F4E79")))
|
||||||
sheet['A1'].alignment = Alignment(horizontal="center")
|
sheet['A1'].alignment = Alignment(horizontal="center")
|
||||||
|
|
||||||
# Get table data from elements (canonical JSON format)
|
# Get table data from elements (canonical JSON format)
|
||||||
|
|
@ -528,9 +528,9 @@ class RendererXlsx(BaseRenderer):
|
||||||
for col, header in enumerate(headers, 1):
|
for col, header in enumerate(headers, 1):
|
||||||
cell = sheet.cell(row=3, column=col, value=header)
|
cell = sheet.cell(row=3, column=col, value=header)
|
||||||
if header_style.get("bold"):
|
if header_style.get("bold"):
|
||||||
cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
|
cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
|
||||||
if header_style.get("background"):
|
if header_style.get("background"):
|
||||||
cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
|
cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
|
||||||
|
|
||||||
# Add rows
|
# Add rows
|
||||||
cell_style = styles.get("table_cell", {})
|
cell_style = styles.get("table_cell", {})
|
||||||
|
|
@ -538,7 +538,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
for col_idx, cell_value in enumerate(row_data, 1):
|
for col_idx, cell_value in enumerate(row_data, 1):
|
||||||
cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value)
|
cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value)
|
||||||
if cell_style.get("text_color"):
|
if cell_style.get("text_color"):
|
||||||
cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
|
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
|
||||||
|
|
||||||
# Auto-adjust column widths
|
# Auto-adjust column widths
|
||||||
for col in range(1, len(headers) + 1):
|
for col in range(1, len(headers) + 1):
|
||||||
|
|
@ -547,17 +547,17 @@ class RendererXlsx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not populate table sheet: {str(e)}")
|
self.logger.warning(f"Could not populate table sheet: {str(e)}")
|
||||||
|
|
||||||
def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]):
|
def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
|
||||||
"""Populate the main sheet with document overview and all content."""
|
"""Populate the main sheet with document overview and all content."""
|
||||||
try:
|
try:
|
||||||
# Document title
|
# Document title
|
||||||
document_title = json_content.get("metadata", {}).get("title", "Generated Report")
|
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
|
||||||
sheet['A1'] = document_title
|
sheet['A1'] = documentTitle
|
||||||
|
|
||||||
# Safety check for title style
|
# Safety check for title style
|
||||||
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
|
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
|
||||||
try:
|
try:
|
||||||
safe_color = self._get_safe_color(title_style["color"])
|
safe_color = self._getSafeColor(title_style["color"])
|
||||||
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
|
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
|
||||||
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
|
sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
|
||||||
except Exception as font_error:
|
except Exception as font_error:
|
||||||
|
|
@ -567,12 +567,12 @@ class RendererXlsx(BaseRenderer):
|
||||||
|
|
||||||
# Generation info
|
# Generation info
|
||||||
sheet['A3'] = "Generated:"
|
sheet['A3'] = "Generated:"
|
||||||
sheet['B3'] = self._format_timestamp()
|
sheet['B3'] = self._formatTimestamp()
|
||||||
sheet['A4'] = "Status:"
|
sheet['A4'] = "Status:"
|
||||||
sheet['B4'] = "Generated Successfully"
|
sheet['B4'] = "Generated Successfully"
|
||||||
|
|
||||||
# Document metadata
|
# Document metadata
|
||||||
metadata = json_content.get("metadata", {})
|
metadata = jsonContent.get("metadata", {})
|
||||||
if metadata:
|
if metadata:
|
||||||
sheet['A6'] = "Document Information:"
|
sheet['A6'] = "Document Information:"
|
||||||
sheet['A6'].font = Font(bold=True)
|
sheet['A6'].font = Font(bold=True)
|
||||||
|
|
@ -585,7 +585,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
row += 1
|
row += 1
|
||||||
|
|
||||||
# Content overview
|
# Content overview
|
||||||
sections = json_content.get("sections", [])
|
sections = jsonContent.get("sections", [])
|
||||||
sheet[f'A{row + 1}'] = "Content Overview:"
|
sheet[f'A{row + 1}'] = "Content Overview:"
|
||||||
sheet[f'A{row + 1}'].font = Font(bold=True)
|
sheet[f'A{row + 1}'].font = Font(bold=True)
|
||||||
|
|
||||||
|
|
@ -605,7 +605,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
# Add all content to this sheet
|
# Add all content to this sheet
|
||||||
row += 2
|
row += 2
|
||||||
for section in sections:
|
for section in sections:
|
||||||
row = self._add_section_to_sheet(sheet, section, styles, row)
|
row = self._addSectionToSheet(sheet, section, styles, row)
|
||||||
row += 1 # Empty row between sections
|
row += 1 # Empty row between sections
|
||||||
|
|
||||||
# Auto-adjust column widths
|
# Auto-adjust column widths
|
||||||
|
|
@ -615,34 +615,34 @@ class RendererXlsx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not populate main sheet: {str(e)}")
|
self.logger.warning(f"Could not populate main sheet: {str(e)}")
|
||||||
|
|
||||||
def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]):
|
def _populateContentTypeSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any], sheetNames: List[str]):
|
||||||
"""Populate additional sheets based on content types."""
|
"""Populate additional sheets based on content types."""
|
||||||
try:
|
try:
|
||||||
sections = json_content.get("sections", [])
|
sections = jsonContent.get("sections", [])
|
||||||
|
|
||||||
for sheet_name in sheet_names:
|
for sheetName in sheetNames:
|
||||||
if sheet_name not in sheets:
|
if sheetName not in sheets:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
sheet = sheets[sheet_name]
|
sheet = sheets[sheetName]
|
||||||
sheet_title = sheet_name.title()
|
sheetTitle = sheetName.title()
|
||||||
sheet['A1'] = sheet_title
|
sheet['A1'] = sheetTitle
|
||||||
sheet['A1'].font = Font(size=16, bold=True)
|
sheet['A1'].font = Font(size=16, bold=True)
|
||||||
|
|
||||||
row = 3
|
row = 3
|
||||||
|
|
||||||
# Filter sections by content type
|
# Filter sections by content type
|
||||||
if sheet_name == "tables":
|
if sheetName == "tables":
|
||||||
filtered_sections = [s for s in sections if s.get("content_type") == "table"]
|
filtered_sections = [s for s in sections if s.get("content_type") == "table"]
|
||||||
elif sheet_name == "lists":
|
elif sheetName == "lists":
|
||||||
filtered_sections = [s for s in sections if s.get("content_type") == "list"]
|
filtered_sections = [s for s in sections if s.get("content_type") == "list"]
|
||||||
elif sheet_name == "text":
|
elif sheetName == "text":
|
||||||
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
|
filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
|
||||||
else:
|
else:
|
||||||
filtered_sections = sections
|
filtered_sections = sections
|
||||||
|
|
||||||
for section in filtered_sections:
|
for section in filtered_sections:
|
||||||
row = self._add_section_to_sheet(sheet, section, styles, row)
|
row = self._addSectionToSheet(sheet, section, styles, row)
|
||||||
row += 1 # Empty row between sections
|
row += 1 # Empty row between sections
|
||||||
|
|
||||||
# Auto-adjust column widths
|
# Auto-adjust column widths
|
||||||
|
|
@ -652,15 +652,15 @@ class RendererXlsx(BaseRenderer):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not populate content type sheets: {str(e)}")
|
self.logger.warning(f"Could not populate content type sheets: {str(e)}")
|
||||||
|
|
||||||
def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
def _addSectionToSheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||||
"""Add a section to a sheet and return the next row."""
|
"""Add a section to a sheet and return the next row."""
|
||||||
try:
|
try:
|
||||||
# Add section title
|
# Add section title
|
||||||
section_title = section.get("title")
|
section_title = section.get("title")
|
||||||
if section_title:
|
if section_title:
|
||||||
sheet[f'A{start_row}'] = f"# {section_title}"
|
sheet[f'A{startRow}'] = f"# {section_title}"
|
||||||
sheet[f'A{start_row}'].font = Font(bold=True)
|
sheet[f'A{startRow}'].font = Font(bold=True)
|
||||||
start_row += 1
|
startRow += 1
|
||||||
|
|
||||||
# Process section based on type
|
# Process section based on type
|
||||||
section_type = section.get("content_type", "paragraph")
|
section_type = section.get("content_type", "paragraph")
|
||||||
|
|
@ -669,23 +669,23 @@ class RendererXlsx(BaseRenderer):
|
||||||
elements = section.get("elements", [])
|
elements = section.get("elements", [])
|
||||||
for element in elements:
|
for element in elements:
|
||||||
if section_type == "table":
|
if section_type == "table":
|
||||||
start_row = self._add_table_to_excel(sheet, element, styles, start_row)
|
startRow = self._addTableToExcel(sheet, element, styles, startRow)
|
||||||
elif section_type == "list":
|
elif section_type == "list":
|
||||||
start_row = self._add_list_to_excel(sheet, element, styles, start_row)
|
startRow = self._addListToExcel(sheet, element, styles, startRow)
|
||||||
elif section_type == "paragraph":
|
elif section_type == "paragraph":
|
||||||
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
|
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
|
||||||
elif section_type == "heading":
|
elif section_type == "heading":
|
||||||
start_row = self._add_heading_to_excel(sheet, element, styles, start_row)
|
startRow = self._addHeadingToExcel(sheet, element, styles, startRow)
|
||||||
else:
|
else:
|
||||||
start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
|
startRow = self._addParagraphToExcel(sheet, element, styles, startRow)
|
||||||
|
|
||||||
return start_row
|
return startRow
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not add section to sheet: {str(e)}")
|
self.logger.warning(f"Could not add section to sheet: {str(e)}")
|
||||||
return start_row + 1
|
return startRow + 1
|
||||||
|
|
||||||
def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
def _addTableToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||||
"""Add a table element to Excel sheet."""
|
"""Add a table element to Excel sheet."""
|
||||||
try:
|
try:
|
||||||
# In canonical JSON format, table elements have headers and rows directly
|
# In canonical JSON format, table elements have headers and rows directly
|
||||||
|
|
@ -693,99 +693,99 @@ class RendererXlsx(BaseRenderer):
|
||||||
rows = element.get("rows", [])
|
rows = element.get("rows", [])
|
||||||
|
|
||||||
if not headers and not rows:
|
if not headers and not rows:
|
||||||
return start_row
|
return startRow
|
||||||
|
|
||||||
# Add headers
|
# Add headers
|
||||||
header_style = styles.get("table_header", {})
|
header_style = styles.get("table_header", {})
|
||||||
for col, header in enumerate(headers, 1):
|
for col, header in enumerate(headers, 1):
|
||||||
cell = sheet.cell(row=start_row, column=col, value=header)
|
cell = sheet.cell(row=startRow, column=col, value=header)
|
||||||
if header_style.get("bold"):
|
if header_style.get("bold"):
|
||||||
cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
|
cell.font = Font(bold=True, color=self._getSafeColor(header_style.get("text_color", "FF000000")))
|
||||||
if header_style.get("background"):
|
if header_style.get("background"):
|
||||||
cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
|
cell.fill = PatternFill(start_color=self._getSafeColor(header_style["background"]), end_color=self._getSafeColor(header_style["background"]), fill_type="solid")
|
||||||
|
|
||||||
start_row += 1
|
startRow += 1
|
||||||
|
|
||||||
# Add rows
|
# Add rows
|
||||||
cell_style = styles.get("table_cell", {})
|
cell_style = styles.get("table_cell", {})
|
||||||
for row_data in rows:
|
for row_data in rows:
|
||||||
for col, cell_value in enumerate(row_data, 1):
|
for col, cell_value in enumerate(row_data, 1):
|
||||||
cell = sheet.cell(row=start_row, column=col, value=cell_value)
|
cell = sheet.cell(row=startRow, column=col, value=cell_value)
|
||||||
if cell_style.get("text_color"):
|
if cell_style.get("text_color"):
|
||||||
cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
|
cell.font = Font(color=self._getSafeColor(cell_style["text_color"]))
|
||||||
start_row += 1
|
startRow += 1
|
||||||
|
|
||||||
return start_row
|
return startRow
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not add table to Excel: {str(e)}")
|
self.logger.warning(f"Could not add table to Excel: {str(e)}")
|
||||||
return start_row + 1
|
return startRow + 1
|
||||||
|
|
||||||
def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
def _addListToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||||
"""Add a list element to Excel sheet."""
|
"""Add a list element to Excel sheet."""
|
||||||
try:
|
try:
|
||||||
list_items = element.get("items", [])
|
list_items = element.get("items", [])
|
||||||
|
|
||||||
list_style = styles.get("bullet_list", {})
|
list_style = styles.get("bullet_list", {})
|
||||||
for item in list_items:
|
for item in list_items:
|
||||||
sheet.cell(row=start_row, column=1, value=f"• {item}")
|
sheet.cell(row=startRow, column=1, value=f"• {item}")
|
||||||
if list_style.get("color"):
|
if list_style.get("color"):
|
||||||
sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(list_style["color"]))
|
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(list_style["color"]))
|
||||||
start_row += 1
|
startRow += 1
|
||||||
|
|
||||||
return start_row
|
return startRow
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not add list to Excel: {str(e)}")
|
self.logger.warning(f"Could not add list to Excel: {str(e)}")
|
||||||
return start_row + 1
|
return startRow + 1
|
||||||
|
|
||||||
def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
def _addParagraphToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||||
"""Add a paragraph element to Excel sheet."""
|
"""Add a paragraph element to Excel sheet."""
|
||||||
try:
|
try:
|
||||||
text = element.get("text", "")
|
text = element.get("text", "")
|
||||||
if text:
|
if text:
|
||||||
sheet.cell(row=start_row, column=1, value=text)
|
sheet.cell(row=startRow, column=1, value=text)
|
||||||
|
|
||||||
paragraph_style = styles.get("paragraph", {})
|
paragraph_style = styles.get("paragraph", {})
|
||||||
if paragraph_style.get("color"):
|
if paragraph_style.get("color"):
|
||||||
sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(paragraph_style["color"]))
|
sheet.cell(row=startRow, column=1).font = Font(color=self._getSafeColor(paragraph_style["color"]))
|
||||||
|
|
||||||
start_row += 1
|
startRow += 1
|
||||||
|
|
||||||
return start_row
|
return startRow
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
|
self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
|
||||||
return start_row + 1
|
return startRow + 1
|
||||||
|
|
||||||
def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
|
def _addHeadingToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int:
|
||||||
"""Add a heading element to Excel sheet."""
|
"""Add a heading element to Excel sheet."""
|
||||||
try:
|
try:
|
||||||
text = element.get("text", "")
|
text = element.get("text", "")
|
||||||
level = element.get("level", 1)
|
level = element.get("level", 1)
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
sheet.cell(row=start_row, column=1, value=text)
|
sheet.cell(row=startRow, column=1, value=text)
|
||||||
|
|
||||||
heading_style = styles.get("heading", {})
|
heading_style = styles.get("heading", {})
|
||||||
font_size = heading_style.get("font_size", 14)
|
font_size = heading_style.get("font_size", 14)
|
||||||
if level > 1:
|
if level > 1:
|
||||||
font_size = max(10, font_size - (level - 1) * 2)
|
font_size = max(10, font_size - (level - 1) * 2)
|
||||||
|
|
||||||
sheet.cell(row=start_row, column=1).font = Font(
|
sheet.cell(row=startRow, column=1).font = Font(
|
||||||
size=font_size,
|
size=font_size,
|
||||||
bold=True,
|
bold=True,
|
||||||
color=self._get_safe_color(heading_style.get("color", "FF000000"))
|
color=self._getSafeColor(heading_style.get("color", "FF000000"))
|
||||||
)
|
)
|
||||||
|
|
||||||
start_row += 1
|
startRow += 1
|
||||||
|
|
||||||
return start_row
|
return startRow
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
|
self.logger.warning(f"Could not add heading to Excel: {str(e)}")
|
||||||
return start_row + 1
|
return startRow + 1
|
||||||
|
|
||||||
def _format_timestamp(self) -> str:
|
def _formatTimestamp(self) -> str:
|
||||||
"""Format current timestamp for document generation."""
|
"""Format current timestamp for document generation."""
|
||||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||||
|
|
|
||||||
|
|
@ -1,25 +1,32 @@
|
||||||
"""
|
"""
|
||||||
JSON Schema definitions for AI-generated document structures.
|
JSON Schema definitions for AI-generated document structures (unified).
|
||||||
This module provides schemas that guide AI to generate structured JSON output.
|
This module provides schemas that guide AI to generate structured JSON output
|
||||||
|
that matches the master template in modules.datamodels.datamodelJson.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
|
||||||
def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
def getMultiDocumentSchema() -> Dict[str, Any]:
|
||||||
"""Get the JSON schema for multi-document generation."""
|
"""Get the JSON schema for multi-document generation (unified)."""
|
||||||
return {
|
return {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": ["metadata", "documents"],
|
"required": ["metadata", "documents"],
|
||||||
"properties": {
|
"properties": {
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": ["title", "split_strategy"],
|
"required": ["split_strategy"],
|
||||||
"properties": {
|
"properties": {
|
||||||
"title": {"type": "string", "description": "Document title"},
|
|
||||||
"split_strategy": {
|
"split_strategy": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
|
"enum": [
|
||||||
|
"single_document",
|
||||||
|
"per_entity",
|
||||||
|
"by_section",
|
||||||
|
"by_criteria",
|
||||||
|
"by_data_type",
|
||||||
|
"custom"
|
||||||
|
],
|
||||||
"description": "Strategy for splitting content into multiple files"
|
"description": "Strategy for splitting content into multiple files"
|
||||||
},
|
},
|
||||||
"splitCriteria": {
|
"splitCriteria": {
|
||||||
|
|
@ -30,7 +37,6 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')"
|
"description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')"
|
||||||
},
|
},
|
||||||
"author": {"type": "string", "description": "Document author (optional)"},
|
|
||||||
"source_documents": {
|
"source_documents": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {"type": "string"},
|
"items": {"type": "string"},
|
||||||
|
|
@ -38,7 +44,7 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
},
|
},
|
||||||
"extraction_method": {
|
"extraction_method": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "ai_extraction",
|
"default": "ai_generation",
|
||||||
"description": "Method used for extraction"
|
"description": "Method used for extraction"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -64,7 +70,15 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
"title": {"type": "string", "description": "Section title (optional)"},
|
"title": {"type": "string", "description": "Section title (optional)"},
|
||||||
"content_type": {
|
"content_type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
|
"enum": [
|
||||||
|
"table",
|
||||||
|
"bullet_list",
|
||||||
|
"paragraph",
|
||||||
|
"heading",
|
||||||
|
"code_block",
|
||||||
|
"image",
|
||||||
|
"mixed"
|
||||||
|
],
|
||||||
"description": "Primary content type of this section"
|
"description": "Primary content type of this section"
|
||||||
},
|
},
|
||||||
"elements": {
|
"elements": {
|
||||||
|
|
@ -76,7 +90,8 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
{"$ref": "#/definitions/bullet_list"},
|
{"$ref": "#/definitions/bullet_list"},
|
||||||
{"$ref": "#/definitions/paragraph"},
|
{"$ref": "#/definitions/paragraph"},
|
||||||
{"$ref": "#/definitions/heading"},
|
{"$ref": "#/definitions/heading"},
|
||||||
{"$ref": "#/definitions/code_block"}
|
{"$ref": "#/definitions/code_block"},
|
||||||
|
{"$ref": "#/definitions/image"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -191,11 +206,20 @@ def get_multi_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
"code": {"type": "string", "description": "Code content"},
|
"code": {"type": "string", "description": "Code content"},
|
||||||
"language": {"type": "string", "description": "Programming language (optional)"}
|
"language": {"type": "string", "description": "Programming language (optional)"}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"image": {
|
||||||
|
"type": "object",
|
||||||
|
"required": ["url"],
|
||||||
|
"properties": {
|
||||||
|
"url": {"type": "string", "description": "Image URL or data URI"},
|
||||||
|
"caption": {"type": "string", "description": "Image caption (optional)"},
|
||||||
|
"alt": {"type": "string", "description": "Alt text (optional)"}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_document_subJsonSchema() -> Dict[str, Any]:
|
def getDocumentSchema() -> Dict[str, Any]:
|
||||||
"""Get the JSON schema for structured document generation (single document)."""
|
"""Get the JSON schema for structured document generation (single document)."""
|
||||||
return {
|
return {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
@ -206,7 +230,6 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
"required": ["title"],
|
"required": ["title"],
|
||||||
"properties": {
|
"properties": {
|
||||||
"title": {"type": "string", "description": "Document title"},
|
"title": {"type": "string", "description": "Document title"},
|
||||||
"author": {"type": "string", "description": "Document author (optional)"},
|
|
||||||
"source_documents": {
|
"source_documents": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {"type": "string"},
|
"items": {"type": "string"},
|
||||||
|
|
@ -214,7 +237,7 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
},
|
},
|
||||||
"extraction_method": {
|
"extraction_method": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "ai_extraction",
|
"default": "ai_generation",
|
||||||
"description": "Method used for extraction"
|
"description": "Method used for extraction"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -230,7 +253,15 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
"title": {"type": "string", "description": "Section title (optional)"},
|
"title": {"type": "string", "description": "Section title (optional)"},
|
||||||
"content_type": {
|
"content_type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
|
"enum": [
|
||||||
|
"table",
|
||||||
|
"bullet_list",
|
||||||
|
"paragraph",
|
||||||
|
"heading",
|
||||||
|
"code_block",
|
||||||
|
"image",
|
||||||
|
"mixed"
|
||||||
|
],
|
||||||
"description": "Primary content type of this section"
|
"description": "Primary content type of this section"
|
||||||
},
|
},
|
||||||
"elements": {
|
"elements": {
|
||||||
|
|
@ -242,7 +273,8 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
{"$ref": "#/definitions/bullet_list"},
|
{"$ref": "#/definitions/bullet_list"},
|
||||||
{"$ref": "#/definitions/paragraph"},
|
{"$ref": "#/definitions/paragraph"},
|
||||||
{"$ref": "#/definitions/heading"},
|
{"$ref": "#/definitions/heading"},
|
||||||
{"$ref": "#/definitions/code_block"}
|
{"$ref": "#/definitions/code_block"},
|
||||||
|
{"$ref": "#/definitions/image"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -359,12 +391,21 @@ def get_document_subJsonSchema() -> Dict[str, Any]:
|
||||||
"code": {"type": "string", "description": "Code content"},
|
"code": {"type": "string", "description": "Code content"},
|
||||||
"language": {"type": "string", "description": "Programming language (optional)"}
|
"language": {"type": "string", "description": "Programming language (optional)"}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"image": {
|
||||||
|
"type": "object",
|
||||||
|
"required": ["url"],
|
||||||
|
"properties": {
|
||||||
|
"url": {"type": "string", "description": "Image URL or data URI"},
|
||||||
|
"caption": {"type": "string", "description": "Image caption (optional)"},
|
||||||
|
"alt": {"type": "string", "description": "Alt text (optional)"}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_extraction_prompt_template() -> str:
|
def getExtractionPromptTemplate() -> str:
|
||||||
"""Get the template for AI extraction prompts that request JSON output."""
|
"""Get the template for AI extraction prompts that request JSON output."""
|
||||||
return """
|
return """
|
||||||
You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document.
|
You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document.
|
||||||
|
|
@ -390,7 +431,7 @@ Return only the JSON structure following the schema. Do not include any text bef
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def get_generation_prompt_template() -> str:
|
def getGenerationPromptTemplate() -> str:
|
||||||
"""Get the template for AI generation prompts that work with JSON input."""
|
"""Get the template for AI generation prompts that work with JSON input."""
|
||||||
return """
|
return """
|
||||||
You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content.
|
You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content.
|
||||||
|
|
@ -416,31 +457,31 @@ Return only the enhanced JSON structure following the schema. Do not include any
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def get_adaptive_json_schema(prompt_analysis: Dict[str, Any] = None) -> Dict[str, Any]:
|
def getAdaptiveJsonSchema(promptAnalysis: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||||
"""Automatically select appropriate schema based on prompt analysis."""
|
"""Automatically select appropriate schema based on prompt analysis."""
|
||||||
if prompt_analysis and prompt_analysis.get("is_multi_file", False):
|
if promptAnalysis and promptAnalysis.get("is_multi_file", False):
|
||||||
return get_multi_document_subJsonSchema()
|
return getMultiDocumentSchema()
|
||||||
else:
|
else:
|
||||||
return get_document_subJsonSchema()
|
return getDocumentSchema()
|
||||||
|
|
||||||
def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
def validateJsonDocument(jsonData: Dict[str, Any]) -> bool:
|
||||||
"""Validate that the JSON data follows the document schema."""
|
"""Validate that the JSON data follows the unified document schema."""
|
||||||
try:
|
try:
|
||||||
# Basic validation - check required fields
|
# Basic validation - check required fields
|
||||||
if not isinstance(json_data, dict):
|
if not isinstance(jsonData, dict):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Check if it's multi-document or single-document structure
|
# Check if it's multi-document or single-document structure
|
||||||
if "documents" in json_data:
|
if "documents" in jsonData:
|
||||||
# Multi-document structure
|
# Multi-document structure
|
||||||
if "metadata" not in json_data:
|
if "metadata" not in jsonData:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
metadata = json_data["metadata"]
|
metadata = jsonData["metadata"]
|
||||||
if not isinstance(metadata, dict) or "title" not in metadata or "split_strategy" not in metadata:
|
if not isinstance(metadata, dict) or "split_strategy" not in metadata:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
documents = json_data["documents"]
|
documents = jsonData["documents"]
|
||||||
if not isinstance(documents, list):
|
if not isinstance(documents, list):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -469,7 +510,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Validate content_type
|
# Validate content_type
|
||||||
valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
|
valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
|
||||||
if section["content_type"] not in valid_types:
|
if section["content_type"] not in valid_types:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -477,16 +518,16 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
||||||
if not isinstance(section["elements"], list):
|
if not isinstance(section["elements"], list):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
elif "sections" in json_data:
|
elif "sections" in jsonData:
|
||||||
# Single-document structure (existing validation)
|
# Single-document structure (existing validation)
|
||||||
if "metadata" not in json_data:
|
if "metadata" not in jsonData:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
metadata = json_data["metadata"]
|
metadata = jsonData["metadata"]
|
||||||
if not isinstance(metadata, dict) or "title" not in metadata:
|
if not isinstance(metadata, dict) or "title" not in metadata:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
sections = json_data["sections"]
|
sections = jsonData["sections"]
|
||||||
if not isinstance(sections, list):
|
if not isinstance(sections, list):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -501,7 +542,7 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Validate content_type
|
# Validate content_type
|
||||||
valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
|
valid_types = ["table", "bullet_list", "paragraph", "heading", "code_block", "image", "mixed"]
|
||||||
if section["content_type"] not in valid_types:
|
if section["content_type"] not in valid_types:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,83 +5,10 @@ This module builds prompts for generating documents from extracted content.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
from modules.datamodels.datamodelJson import jsonTemplateDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Centralized JSON structure template for document generation
|
|
||||||
# Includes examples for all content types so AI knows the structure patterns
|
|
||||||
TEMPLATE_JSON_DOCUMENT_GENERATION = """{
|
|
||||||
"metadata": {
|
|
||||||
"split_strategy": "single_document",
|
|
||||||
"source_documents": [],
|
|
||||||
"extraction_method": "ai_generation"
|
|
||||||
},
|
|
||||||
"documents": [
|
|
||||||
{
|
|
||||||
"id": "doc_1",
|
|
||||||
"title": "{{DOCUMENT_TITLE}}",
|
|
||||||
"filename": "document.json",
|
|
||||||
"sections": [
|
|
||||||
{
|
|
||||||
"id": "section_heading_example",
|
|
||||||
"content_type": "heading",
|
|
||||||
"elements": [
|
|
||||||
{"level": 1, "text": "Heading Text"}
|
|
||||||
],
|
|
||||||
"order": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "section_paragraph_example",
|
|
||||||
"content_type": "paragraph",
|
|
||||||
"elements": [
|
|
||||||
{"text": "Paragraph text content"}
|
|
||||||
],
|
|
||||||
"order": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "section_list_example",
|
|
||||||
"content_type": "list",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"items": [
|
|
||||||
{"text": "Item 1"},
|
|
||||||
{"text": "Item 2"}
|
|
||||||
],
|
|
||||||
"list_type": "numbered"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"order": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "section_table_example",
|
|
||||||
"content_type": "table",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"headers": ["Column 1", "Column 2"],
|
|
||||||
"rows": [
|
|
||||||
["Row 1 Col 1", "Row 1 Col 2"],
|
|
||||||
["Row 2 Col 1", "Row 2 Col 2"]
|
|
||||||
],
|
|
||||||
"caption": "Table caption"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"order": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "section_code_example",
|
|
||||||
"content_type": "code",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"code": "function example() { return true; }",
|
|
||||||
"language": "javascript"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"order": 0
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}"""
|
|
||||||
|
|
||||||
|
|
||||||
async def buildGenerationPrompt(
|
async def buildGenerationPrompt(
|
||||||
|
|
@ -106,99 +33,101 @@ async def buildGenerationPrompt(
|
||||||
Complete generation prompt string
|
Complete generation prompt string
|
||||||
"""
|
"""
|
||||||
# Create a template - let AI generate title if not provided
|
# Create a template - let AI generate title if not provided
|
||||||
title_value = title if title else "Generated Document"
|
titleValue = title if title else "Generated Document"
|
||||||
json_template = TEMPLATE_JSON_DOCUMENT_GENERATION.replace("{{DOCUMENT_TITLE}}", title_value)
|
jsonTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", titleValue)
|
||||||
|
|
||||||
# Build prompt based on whether this is a continuation or first call
|
# Build prompt based on whether this is a continuation or first call
|
||||||
# Check if we have valid continuation context with actual JSON fragment
|
# Check if we have valid continuation context with actual JSON fragment
|
||||||
has_continuation = (
|
hasContinuation = (
|
||||||
continuationContext
|
continuationContext
|
||||||
and continuationContext.get("section_count", 0) > 0
|
and continuationContext.get("section_count", 0) > 0
|
||||||
and continuationContext.get("last_raw_json", "")
|
and continuationContext.get("last_raw_json", "")
|
||||||
and continuationContext.get("last_raw_json", "").strip() != "{}"
|
and continuationContext.get("last_raw_json", "").strip() != "{}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if has_continuation:
|
if hasContinuation:
|
||||||
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
|
# CONTINUATION PROMPT - user already received first part, continue from where it stopped
|
||||||
last_raw_json = continuationContext.get("last_raw_json", "")
|
lastRawJson = continuationContext.get("last_raw_json", "")
|
||||||
last_item_object = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
|
lastItemObject = continuationContext.get("last_item_object", "") # Full object like {"text": "value"}
|
||||||
last_items_from_fragment = continuationContext.get("last_items_from_fragment", "")
|
lastItemsFromFragment = continuationContext.get("last_items_from_fragment", "")
|
||||||
total_items_count = continuationContext.get("total_items_count", 0)
|
totalItemsCount = continuationContext.get("total_items_count", 0)
|
||||||
|
|
||||||
# Show the last few items to indicate where to continue (limit fragment size)
|
# Show the last few items to indicate where to continue (limit fragment size)
|
||||||
# Extract just the ending portion of the JSON to show where it cut off
|
# Extract just the ending portion of the JSON to show where it cut off
|
||||||
fragment_snippet = ""
|
fragmentSnippet = ""
|
||||||
if last_raw_json:
|
if lastRawJson:
|
||||||
# Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
|
# Show last 1500 chars or the whole thing if shorter - just enough to show the cut point
|
||||||
fragment_snippet = last_raw_json[-1500:] if len(last_raw_json) > 1500 else last_raw_json
|
fragmentSnippet = lastRawJson[-1500:] if len(lastRawJson) > 1500 else lastRawJson
|
||||||
# Add ellipsis if truncated
|
# Add ellipsis if truncated
|
||||||
if len(last_raw_json) > 1500:
|
if len(lastRawJson) > 1500:
|
||||||
fragment_snippet = "..." + fragment_snippet
|
fragmentSnippet = "..." + fragmentSnippet
|
||||||
|
|
||||||
# Build clear continuation guidance
|
# Build clear continuation guidance
|
||||||
continuation_guidance = []
|
continuationGuidance = []
|
||||||
|
|
||||||
if total_items_count > 0:
|
if totalItemsCount > 0:
|
||||||
continuation_guidance.append(f"You have already generated {total_items_count} items.")
|
continuationGuidance.append(f"You have already generated {totalItemsCount} items.")
|
||||||
|
|
||||||
# Show the last complete item object (full object format)
|
# Show the last complete item object (full object format)
|
||||||
if last_item_object:
|
if lastItemObject:
|
||||||
continuation_guidance.append(f"Last item in previous response: {last_item_object}. Continue with the NEXT item after this.")
|
continuationGuidance.append(f"Last item in previous response: {lastItemObject}. Continue with the NEXT item after this.")
|
||||||
|
|
||||||
continuation_text = "\n".join(continuation_guidance) if continuation_guidance else "Continue from where it stopped."
|
continuationText = "\n".join(continuationGuidance) if continuationGuidance else "Continue from where it stopped."
|
||||||
|
|
||||||
generation_prompt = f"""User request: "{userPrompt}"
|
generationPrompt = f"""User request: "{userPrompt}"
|
||||||
|
|
||||||
The user already received part of the response. Continue generating the remaining content.
|
The user already received part of the response. Continue generating the remaining content.
|
||||||
|
|
||||||
{continuation_text}
|
{continuationText}
|
||||||
|
|
||||||
Previous response ended here (JSON was cut off at this point):
|
Previous response ended here (JSON was cut off at this point):
|
||||||
```json
|
```json
|
||||||
{fragment_snippet if fragment_snippet else "(No fragment available)"}
|
{fragmentSnippet if fragmentSnippet else "(No fragment available)"}
|
||||||
```
|
```
|
||||||
|
|
||||||
JSON structure template:
|
JSON structure template:
|
||||||
{json_template}
|
{jsonTemplate}
|
||||||
|
|
||||||
Instructions:
|
Instructions:
|
||||||
- Return full JSON structure (metadata + documents + sections)
|
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
|
||||||
- Continue from where it stopped - add NEW items only, do not repeat old items
|
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
|
||||||
- Use the element structures shown in the template
|
- Use ONLY the element structures shown in the template.
|
||||||
- Generate all remaining content needed to complete the user request
|
- Continue from where it stopped — add NEW items only; do not repeat existing items.
|
||||||
- Fill with actual content (no comments, no "Add more..." text, no placeholders)
|
- Generate all remaining content needed to complete the user request.
|
||||||
- When fully complete, add "complete_response": true at root level
|
- Fill with actual content (no placeholders or instructional text such as "Add more...").
|
||||||
- Return only valid JSON (no comments, no markdown blocks)
|
- When fully complete, add "complete_response": true at root level.
|
||||||
|
- Output JSON only; no markdown fences or extra text before/after.
|
||||||
|
|
||||||
Continue generating:
|
Continue generating:
|
||||||
"""
|
"""
|
||||||
else:
|
else:
|
||||||
# FIRST CALL - initial generation
|
# FIRST CALL - initial generation
|
||||||
generation_prompt = f"""User request: "{userPrompt}"
|
generationPrompt = f"""User request: "{userPrompt}"
|
||||||
|
|
||||||
Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
|
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
|
||||||
|
|
||||||
JSON structure template (reference only - shows the pattern):
|
JSON structure template:
|
||||||
{json_template}
|
{jsonTemplate}
|
||||||
|
|
||||||
Instructions:
|
Instructions:
|
||||||
- Start your response with {{"metadata": ...}} - return COMPLETE JSON from the beginning
|
- Start your response with {{"metadata": ...}} — return COMPLETE, STRICT JSON.
|
||||||
- Do NOT continue from the template examples above - create your own sections
|
- Return ONLY valid JSON (strict). No comments of any kind (no //, /* */, or #). No trailing commas. Strings must use double quotes.
|
||||||
- Generate complete content based on the user request
|
- Arrays must contain ONLY JSON values; do not include comments or ellipses.
|
||||||
- Use the element structures shown in the template (heading, paragraph, list, table, code)
|
- Do NOT reuse the example section IDs from the template; create your own.
|
||||||
- Create your own section IDs (do not use the example IDs like "section_heading_example")
|
- Use ONLY the element structures shown in the template.
|
||||||
- When fully complete, add "complete_response": true at root level
|
- Generate complete content based on the user request.
|
||||||
- Return only valid JSON (no comments, no markdown blocks, no text before/after)
|
- When fully complete, add "complete_response": true at root level.
|
||||||
|
- Output JSON only; no markdown fences or any additional text.
|
||||||
|
|
||||||
Generate your complete response starting from {{"metadata": ...}}:
|
Generate your complete response starting from {{"metadata": ...}}:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# If we have extracted content, prepend it to the prompt
|
# If we have extracted content, prepend it to the prompt
|
||||||
if extracted_content:
|
if extracted_content:
|
||||||
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
||||||
{extracted_content}
|
{extracted_content}
|
||||||
|
|
||||||
{generation_prompt}"""
|
{generationPrompt}"""
|
||||||
|
|
||||||
return generation_prompt.strip()
|
return generationPrompt.strip()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -152,11 +152,11 @@ class NeutralizationService:
|
||||||
try:
|
try:
|
||||||
# Auto-detect content type if not provided
|
# Auto-detect content type if not provided
|
||||||
if textType is None:
|
if textType is None:
|
||||||
textType = self.commonUtils.detect_content_type(text)
|
textType = self.commonUtils.detectContentType(text)
|
||||||
|
|
||||||
# Check if content is binary data
|
# Check if content is binary data
|
||||||
if self.binaryProcessor.is_binary_content(text):
|
if self.binaryProcessor.isBinaryContent(text):
|
||||||
data, mapping, replaced_fields, processed_info = self.binaryProcessor.process_binary_content(text)
|
data, mapping, replaced_fields, processed_info = self.binaryProcessor.processBinaryContent(text)
|
||||||
neutralized_text = text if isinstance(data, str) else str(data)
|
neutralized_text = text if isinstance(data, str) else str(data)
|
||||||
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
|
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
|
||||||
return NeutralizationResult(
|
return NeutralizationResult(
|
||||||
|
|
@ -169,13 +169,13 @@ class NeutralizationService:
|
||||||
# Inline former _processData routing
|
# Inline former _processData routing
|
||||||
if textType in ['csv', 'json', 'xml']:
|
if textType in ['csv', 'json', 'xml']:
|
||||||
if textType == 'csv':
|
if textType == 'csv':
|
||||||
data, mapping, replaced_fields, processed_info = self.listProcessor.process_csv_content(text)
|
data, mapping, replaced_fields, processed_info = self.listProcessor.processCsvContent(text)
|
||||||
elif textType == 'json':
|
elif textType == 'json':
|
||||||
data, mapping, replaced_fields, processed_info = self.listProcessor.process_json_content(text)
|
data, mapping, replaced_fields, processed_info = self.listProcessor.processJsonContent(text)
|
||||||
else: # xml
|
else: # xml
|
||||||
data, mapping, replaced_fields, processed_info = self.listProcessor.process_xml_content(text)
|
data, mapping, replaced_fields, processed_info = self.listProcessor.processXmlContent(text)
|
||||||
else:
|
else:
|
||||||
data, mapping, replaced_fields, processed_info = self.textProcessor.process_text_content(text)
|
data, mapping, replaced_fields, processed_info = self.textProcessor.processTextContent(text)
|
||||||
# Stringify data consistently
|
# Stringify data consistently
|
||||||
if textType == 'csv':
|
if textType == 'csv':
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ Handles pattern matching and replacement for emails, phones, addresses, IDs and
|
||||||
import re
|
import re
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Dict, List, Tuple, Any
|
from typing import Dict, List, Tuple, Any
|
||||||
from modules.services.serviceNeutralization.subPatterns import DataPatterns, find_patterns_in_text
|
from modules.services.serviceNeutralization.subPatterns import DataPatterns, findPatternsInText
|
||||||
|
|
||||||
class StringParser:
|
class StringParser:
|
||||||
"""Handles string parsing and replacement operations"""
|
"""Handles string parsing and replacement operations"""
|
||||||
|
|
@ -22,7 +22,7 @@ class StringParser:
|
||||||
self.NamesToParse = NamesToParse or []
|
self.NamesToParse = NamesToParse or []
|
||||||
self.mapping = {}
|
self.mapping = {}
|
||||||
|
|
||||||
def is_placeholder(self, text: str) -> bool:
|
def _isPlaceholder(self, text: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if text is already a placeholder in format [tag.uuid]
|
Check if text is already a placeholder in format [tag.uuid]
|
||||||
|
|
||||||
|
|
@ -34,7 +34,7 @@ class StringParser:
|
||||||
"""
|
"""
|
||||||
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text))
|
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text))
|
||||||
|
|
||||||
def replace_pattern_matches(self, text: str) -> str:
|
def _replacePatternMatches(self, text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Replace pattern-based matches (emails, phones, etc.) in text
|
Replace pattern-based matches (emails, phones, etc.) in text
|
||||||
|
|
||||||
|
|
@ -44,37 +44,37 @@ class StringParser:
|
||||||
Returns:
|
Returns:
|
||||||
str: Text with pattern matches replaced
|
str: Text with pattern matches replaced
|
||||||
"""
|
"""
|
||||||
pattern_matches = find_patterns_in_text(text, self.data_patterns)
|
patternMatches = findPatternsInText(text, self.data_patterns)
|
||||||
|
|
||||||
# Process pattern matches from right to left to avoid position shifts
|
# Process pattern matches from right to left to avoid position shifts
|
||||||
for pattern_name, matched_text, start, end in reversed(pattern_matches):
|
for patternName, matchedText, start, end in reversed(patternMatches):
|
||||||
# Skip if already a placeholder
|
# Skip if already a placeholder
|
||||||
if self.is_placeholder(matched_text):
|
if self._isPlaceholder(matchedText):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip if contains placeholder characters
|
# Skip if contains placeholder characters
|
||||||
if '[' in matched_text or ']' in matched_text:
|
if '[' in matchedText or ']' in matchedText:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if matched_text not in self.mapping:
|
if matchedText not in self.mapping:
|
||||||
# Generate a UUID for the placeholder
|
# Generate a UUID for the placeholder
|
||||||
placeholder_id = str(uuid.uuid4())
|
placeholderId = str(uuid.uuid4())
|
||||||
# Create placeholder in format [type.uuid]
|
# Create placeholder in format [type.uuid]
|
||||||
type_mapping = {
|
typeMapping = {
|
||||||
'email': 'email',
|
'email': 'email',
|
||||||
'phone': 'phone',
|
'phone': 'phone',
|
||||||
'address': 'address',
|
'address': 'address',
|
||||||
'id': 'id'
|
'id': 'id'
|
||||||
}
|
}
|
||||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
placeholderType = typeMapping.get(patternName, 'data')
|
||||||
self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]"
|
self.mapping[matchedText] = f"[{placeholderType}.{placeholderId}]"
|
||||||
|
|
||||||
replacement = self.mapping[matched_text]
|
replacement = self.mapping[matchedText]
|
||||||
text = text[:start] + replacement + text[end:]
|
text = text[:start] + replacement + text[end:]
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def replace_custom_names(self, text: str) -> str:
|
def _replaceCustomNames(self, text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Replace custom names from the user list in text
|
Replace custom names from the user list in text
|
||||||
|
|
||||||
|
|
@ -96,19 +96,19 @@ class StringParser:
|
||||||
|
|
||||||
# Replace each match with a placeholder
|
# Replace each match with a placeholder
|
||||||
for match in reversed(matches): # Process from right to left to avoid position shifts
|
for match in reversed(matches): # Process from right to left to avoid position shifts
|
||||||
matched_text = match.group()
|
matchedText = match.group()
|
||||||
if matched_text not in self.mapping:
|
if matchedText not in self.mapping:
|
||||||
# Generate a UUID for the placeholder
|
# Generate a UUID for the placeholder
|
||||||
placeholder_id = str(uuid.uuid4())
|
placeholderId = str(uuid.uuid4())
|
||||||
self.mapping[matched_text] = f"[name.{placeholder_id}]"
|
self.mapping[matchedText] = f"[name.{placeholderId}]"
|
||||||
|
|
||||||
replacement = self.mapping[matched_text]
|
replacement = self.mapping[matchedText]
|
||||||
start, end = match.span()
|
start, end = match.span()
|
||||||
text = text[:start] + replacement + text[end:]
|
text = text[:start] + replacement + text[end:]
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def process_string(self, text: str) -> str:
|
def processString(self, text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Process a string by replacing patterns first, then custom names
|
Process a string by replacing patterns first, then custom names
|
||||||
|
|
||||||
|
|
@ -118,18 +118,18 @@ class StringParser:
|
||||||
Returns:
|
Returns:
|
||||||
str: Processed text with replacements
|
str: Processed text with replacements
|
||||||
"""
|
"""
|
||||||
if self.is_placeholder(text):
|
if self._isPlaceholder(text):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
# Step 1: Replace pattern-based matches FIRST
|
# Step 1: Replace pattern-based matches FIRST
|
||||||
text = self.replace_pattern_matches(text)
|
text = self._replacePatternMatches(text)
|
||||||
|
|
||||||
# Step 2: Replace custom names SECOND
|
# Step 2: Replace custom names SECOND
|
||||||
text = self.replace_custom_names(text)
|
text = self._replaceCustomNames(text)
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def process_json_value(self, value: Any) -> Any:
|
def processJsonValue(self, value: Any) -> Any:
|
||||||
"""
|
"""
|
||||||
Process a JSON value for anonymization
|
Process a JSON value for anonymization
|
||||||
|
|
||||||
|
|
@ -140,15 +140,15 @@ class StringParser:
|
||||||
Any: Processed value
|
Any: Processed value
|
||||||
"""
|
"""
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
return self.process_string(value)
|
return self.processString(value)
|
||||||
elif isinstance(value, dict):
|
elif isinstance(value, dict):
|
||||||
return {k: self.process_json_value(v) for k, v in value.items()}
|
return {k: self.processJsonValue(v) for k, v in value.items()}
|
||||||
elif isinstance(value, list):
|
elif isinstance(value, list):
|
||||||
return [self.process_json_value(item) for item in value]
|
return [self.processJsonValue(item) for item in value]
|
||||||
else:
|
else:
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def get_mapping(self) -> Dict[str, str]:
|
def getMapping(self) -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Get the current mapping of original values to placeholders
|
Get the current mapping of original values to placeholders
|
||||||
|
|
||||||
|
|
@ -157,6 +157,6 @@ class StringParser:
|
||||||
"""
|
"""
|
||||||
return self.mapping.copy()
|
return self.mapping.copy()
|
||||||
|
|
||||||
def clear_mapping(self):
|
def clearMapping(self):
|
||||||
"""Clear the current mapping"""
|
"""Clear the current mapping"""
|
||||||
self.mapping.clear()
|
self.mapping.clear()
|
||||||
|
|
|
||||||
|
|
@ -316,7 +316,7 @@ class TextTablePatterns:
|
||||||
"""Patterns for identifying table-like structures in text"""
|
"""Patterns for identifying table-like structures in text"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_patterns() -> List[Tuple[str, str]]:
|
def getPatterns() -> List[Tuple[str, str]]:
|
||||||
return [
|
return [
|
||||||
# key: value pattern (with optional whitespace)
|
# key: value pattern (with optional whitespace)
|
||||||
(r'^([^:]+):\s*(.+)$', ':'),
|
(r'^([^:]+):\s*(.+)$', ':'),
|
||||||
|
|
@ -329,15 +329,15 @@ class TextTablePatterns:
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_table_line(line: str) -> bool:
|
def _isTableLine(line: str) -> bool:
|
||||||
"""Check if a line matches any table pattern"""
|
"""Check if a line matches any table pattern"""
|
||||||
patterns = TextTablePatterns.get_patterns()
|
patterns = TextTablePatterns.getPatterns()
|
||||||
return any(re.match(pattern[0], line.strip()) for pattern in patterns)
|
return any(re.match(pattern[0], line.strip()) for pattern in patterns)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extract_key_value(line: str) -> Optional[Tuple[str, str]]:
|
def extractKeyValue(line: str) -> Optional[Tuple[str, str]]:
|
||||||
"""Extract key and value from a table line"""
|
"""Extract key and value from a table line"""
|
||||||
patterns = TextTablePatterns.get_patterns()
|
patterns = TextTablePatterns.getPatterns()
|
||||||
for pattern, separator in patterns:
|
for pattern, separator in patterns:
|
||||||
match = re.match(pattern, line.strip())
|
match = re.match(pattern, line.strip())
|
||||||
if match:
|
if match:
|
||||||
|
|
@ -346,7 +346,7 @@ class TextTablePatterns:
|
||||||
return key, value
|
return key, value
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
|
def getPatternForHeader(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
|
||||||
"""
|
"""
|
||||||
Find matching pattern for a header
|
Find matching pattern for a header
|
||||||
|
|
||||||
|
|
@ -368,7 +368,7 @@ def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pat
|
||||||
return pattern
|
return pattern
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def find_patterns_in_text(text: str, patterns: List[Pattern]) -> List[tuple]:
|
def findPatternsInText(text: str, patterns: List[Pattern]) -> List[tuple]:
|
||||||
"""
|
"""
|
||||||
Find all pattern matches in text
|
Find all pattern matches in text
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ class BinaryProcessor:
|
||||||
'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx']
|
'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx']
|
||||||
}
|
}
|
||||||
|
|
||||||
def detect_binary_type(self, content: str) -> str:
|
def _detectBinaryType(self, content: str) -> str:
|
||||||
"""
|
"""
|
||||||
Detect if content is binary data and determine type
|
Detect if content is binary data and determine type
|
||||||
|
|
||||||
|
|
@ -54,7 +54,7 @@ class BinaryProcessor:
|
||||||
|
|
||||||
return 'text'
|
return 'text'
|
||||||
|
|
||||||
def is_binary_content(self, content: str) -> bool:
|
def isBinaryContent(self, content: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if content is binary data
|
Check if content is binary data
|
||||||
|
|
||||||
|
|
@ -64,9 +64,9 @@ class BinaryProcessor:
|
||||||
Returns:
|
Returns:
|
||||||
bool: True if content is binary
|
bool: True if content is binary
|
||||||
"""
|
"""
|
||||||
return self.detect_binary_type(content) == 'binary'
|
return self._detectBinaryType(content) == 'binary'
|
||||||
|
|
||||||
def process_binary_content(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
|
def processBinaryContent(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Process binary content for anonymization
|
Process binary content for anonymization
|
||||||
|
|
||||||
|
|
@ -83,15 +83,15 @@ class BinaryProcessor:
|
||||||
# 3. Handling metadata and embedded content
|
# 3. Handling metadata and embedded content
|
||||||
# 4. Preserving binary integrity while removing sensitive data
|
# 4. Preserving binary integrity while removing sensitive data
|
||||||
|
|
||||||
processed_info = {
|
processedInfo = {
|
||||||
'type': 'binary',
|
'type': 'binary',
|
||||||
'status': 'not_implemented',
|
'status': 'not_implemented',
|
||||||
'message': 'Binary data neutralization not yet implemented'
|
'message': 'Binary data neutralization not yet implemented'
|
||||||
}
|
}
|
||||||
|
|
||||||
return content, {}, [], processed_info
|
return content, {}, [], processedInfo
|
||||||
|
|
||||||
def get_supported_types(self) -> Dict[str, list]:
|
def getSupportedTypes(self) -> Dict[str, list]:
|
||||||
"""
|
"""
|
||||||
Get list of supported binary file types
|
Get list of supported binary file types
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ class CommonUtils:
|
||||||
"""Common utility functions for data processing"""
|
"""Common utility functions for data processing"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def normalize_whitespace(text: str) -> str:
|
def normalizeWhitespace(text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Normalize whitespace in text
|
Normalize whitespace in text
|
||||||
|
|
||||||
|
|
@ -48,7 +48,7 @@ class CommonUtils:
|
||||||
return text.strip()
|
return text.strip()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_table_line(line: str) -> bool:
|
def _isTableLine(line: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if a line represents a table row
|
Check if a line represents a table row
|
||||||
|
|
||||||
|
|
@ -62,7 +62,7 @@ class CommonUtils:
|
||||||
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
|
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def detect_content_type(content: str) -> str:
|
def detectContentType(content: str) -> str:
|
||||||
"""
|
"""
|
||||||
Detect the type of content based on its structure
|
Detect the type of content based on its structure
|
||||||
|
|
||||||
|
|
@ -98,7 +98,7 @@ class CommonUtils:
|
||||||
return 'text'
|
return 'text'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]:
|
def mergeMappings(*mappings: Dict[str, str]) -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Merge multiple mapping dictionaries
|
Merge multiple mapping dictionaries
|
||||||
|
|
||||||
|
|
@ -114,21 +114,21 @@ class CommonUtils:
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_placeholder(placeholder_type: str, placeholder_id: str) -> str:
|
def createPlaceholder(placeholderType: str, placeholderId: str) -> str:
|
||||||
"""
|
"""
|
||||||
Create a placeholder string in the format [type.uuid]
|
Create a placeholder string in the format [type.uuid]
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
placeholder_type: Type of placeholder (email, phone, name, etc.)
|
placeholderType: Type of placeholder (email, phone, name, etc.)
|
||||||
placeholder_id: Unique identifier for the placeholder
|
placeholderId: Unique identifier for the placeholder
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Formatted placeholder string
|
str: Formatted placeholder string
|
||||||
"""
|
"""
|
||||||
return f"[{placeholder_type}.{placeholder_id}]"
|
return f"[{placeholderType}.{placeholderId}]"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def validate_placeholder(placeholder: str) -> bool:
|
def validatePlaceholder(placeholder: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Validate if a string is a valid placeholder
|
Validate if a string is a valid placeholder
|
||||||
|
|
||||||
|
|
@ -141,7 +141,7 @@ class CommonUtils:
|
||||||
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder))
|
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extract_placeholder_info(placeholder: str) -> Optional[tuple]:
|
def extractPlaceholderInfo(placeholder: str) -> Optional[tuple]:
|
||||||
"""
|
"""
|
||||||
Extract type and ID from a placeholder
|
Extract type and ID from a placeholder
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from typing import Dict, List, Any, Union
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from modules.services.serviceNeutralization.subParseString import StringParser
|
from modules.services.serviceNeutralization.subParseString import StringParser
|
||||||
from modules.services.serviceNeutralization.subPatterns import get_pattern_for_header, HeaderPatterns
|
from modules.services.serviceNeutralization.subPatterns import getPatternForHeader, HeaderPatterns
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TableData:
|
class TableData:
|
||||||
|
|
@ -32,7 +32,7 @@ class ListProcessor:
|
||||||
self.string_parser = StringParser(NamesToParse)
|
self.string_parser = StringParser(NamesToParse)
|
||||||
self.header_patterns = HeaderPatterns.patterns
|
self.header_patterns = HeaderPatterns.patterns
|
||||||
|
|
||||||
def anonymize_table(self, table: TableData) -> TableData:
|
def _anonymizeTable(self, table: TableData) -> TableData:
|
||||||
"""
|
"""
|
||||||
Anonymize table data based on headers
|
Anonymize table data based on headers
|
||||||
|
|
||||||
|
|
@ -42,28 +42,28 @@ class ListProcessor:
|
||||||
Returns:
|
Returns:
|
||||||
TableData: Anonymized table
|
TableData: Anonymized table
|
||||||
"""
|
"""
|
||||||
anonymized_table = TableData(
|
anonymizedTable = TableData(
|
||||||
headers=table.headers.copy(),
|
headers=table.headers.copy(),
|
||||||
rows=[row.copy() for row in table.rows],
|
rows=[row.copy() for row in table.rows],
|
||||||
source_type=table.source_type
|
source_type=table.source_type
|
||||||
)
|
)
|
||||||
|
|
||||||
for i, header in enumerate(anonymized_table.headers):
|
for i, header in enumerate(anonymizedTable.headers):
|
||||||
pattern = get_pattern_for_header(header, self.header_patterns)
|
pattern = getPatternForHeader(header, self.header_patterns)
|
||||||
if pattern:
|
if pattern:
|
||||||
for row in anonymized_table.rows:
|
for row in anonymizedTable.rows:
|
||||||
if row[i] is not None:
|
if row[i] is not None:
|
||||||
original = str(row[i])
|
original = str(row[i])
|
||||||
if original not in self.string_parser.mapping:
|
if original not in self.string_parser.mapping:
|
||||||
# Generate a UUID for the placeholder
|
# Generate a UUID for the placeholder
|
||||||
import uuid
|
import uuid
|
||||||
placeholder_id = str(uuid.uuid4())
|
placeholderId = str(uuid.uuid4())
|
||||||
self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1)
|
self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1)
|
||||||
row[i] = self.string_parser.mapping[original]
|
row[i] = self.string_parser.mapping[original]
|
||||||
|
|
||||||
return anonymized_table
|
return anonymizedTable
|
||||||
|
|
||||||
def process_csv_content(self, content: str) -> tuple:
|
def processCsvContent(self, content: str) -> tuple:
|
||||||
"""
|
"""
|
||||||
Process CSV content
|
Process CSV content
|
||||||
|
|
||||||
|
|
@ -81,29 +81,29 @@ class ListProcessor:
|
||||||
)
|
)
|
||||||
|
|
||||||
if not table.rows:
|
if not table.rows:
|
||||||
return None, self.string_parser.get_mapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
|
return None, self.string_parser.getMapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
|
||||||
|
|
||||||
anonymized_table = self.anonymize_table(table)
|
anonymizedTable = self._anonymizeTable(table)
|
||||||
|
|
||||||
# Track replaced fields
|
# Track replaced fields
|
||||||
replaced_fields = []
|
replacedFields = []
|
||||||
for i, header in enumerate(anonymized_table.headers):
|
for i, header in enumerate(anonymizedTable.headers):
|
||||||
for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
|
for origRow, anonRow in zip(table.rows, anonymizedTable.rows):
|
||||||
if anon_row[i] != orig_row[i]:
|
if anonRow[i] != origRow[i]:
|
||||||
replaced_fields.append(header)
|
replacedFields.append(header)
|
||||||
|
|
||||||
# Convert back to DataFrame
|
# Convert back to DataFrame
|
||||||
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
|
result = pd.DataFrame(anonymizedTable.rows, columns=anonymizedTable.headers)
|
||||||
|
|
||||||
processed_info = {
|
processedInfo = {
|
||||||
'type': 'table',
|
'type': 'table',
|
||||||
'headers': table.headers,
|
'headers': table.headers,
|
||||||
'row_count': len(table.rows)
|
'row_count': len(table.rows)
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, self.string_parser.get_mapping(), replaced_fields, processed_info
|
return result, self.string_parser.getMapping(), replacedFields, processedInfo
|
||||||
|
|
||||||
def process_json_content(self, content: str) -> tuple:
|
def processJsonContent(self, content: str) -> tuple:
|
||||||
"""
|
"""
|
||||||
Process JSON content
|
Process JSON content
|
||||||
|
|
||||||
|
|
@ -116,13 +116,13 @@ class ListProcessor:
|
||||||
data = json.loads(content)
|
data = json.loads(content)
|
||||||
|
|
||||||
# Process JSON recursively using string parser
|
# Process JSON recursively using string parser
|
||||||
result = self.string_parser.process_json_value(data)
|
result = self.string_parser.processJsonValue(data)
|
||||||
|
|
||||||
processed_info = {'type': 'json'}
|
processedInfo = {'type': 'json'}
|
||||||
|
|
||||||
return result, self.string_parser.get_mapping(), [], processed_info
|
return result, self.string_parser.getMapping(), [], processedInfo
|
||||||
|
|
||||||
def anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
|
def _anonymizeXmlElement(self, element: ET.Element, indent: str = '') -> str:
|
||||||
"""
|
"""
|
||||||
Recursively process XML element and return formatted string
|
Recursively process XML element and return formatted string
|
||||||
|
|
||||||
|
|
@ -134,69 +134,69 @@ class ListProcessor:
|
||||||
Formatted XML string
|
Formatted XML string
|
||||||
"""
|
"""
|
||||||
# Process attributes
|
# Process attributes
|
||||||
processed_attrs = {}
|
processedAttrs = {}
|
||||||
for attr_name, attr_value in element.attrib.items():
|
for attrName, attrValue in element.attrib.items():
|
||||||
# Check if attribute name matches any header patterns
|
# Check if attribute name matches any header patterns
|
||||||
pattern = get_pattern_for_header(attr_name, self.header_patterns)
|
pattern = getPatternForHeader(attrName, self.header_patterns)
|
||||||
if pattern:
|
if pattern:
|
||||||
if attr_value not in self.string_parser.mapping:
|
if attrValue not in self.string_parser.mapping:
|
||||||
# Generate a UUID for the placeholder
|
# Generate a UUID for the placeholder
|
||||||
import uuid
|
import uuid
|
||||||
placeholder_id = str(uuid.uuid4())
|
placeholderId = str(uuid.uuid4())
|
||||||
# Create placeholder in format [type.uuid]
|
# Create placeholder in format [type.uuid]
|
||||||
type_mapping = {
|
typeMapping = {
|
||||||
'email': 'email',
|
'email': 'email',
|
||||||
'phone': 'phone',
|
'phone': 'phone',
|
||||||
'name': 'name',
|
'name': 'name',
|
||||||
'address': 'address',
|
'address': 'address',
|
||||||
'id': 'id'
|
'id': 'id'
|
||||||
}
|
}
|
||||||
placeholder_type = type_mapping.get(pattern.name, 'data')
|
placeholderType = typeMapping.get(pattern.name, 'data')
|
||||||
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
|
self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
|
||||||
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
|
processedAttrs[attrName] = self.string_parser.mapping[attrValue]
|
||||||
else:
|
else:
|
||||||
# Check if attribute value matches any data patterns
|
# Check if attribute value matches any data patterns
|
||||||
from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns
|
from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
|
||||||
matches = find_patterns_in_text(attr_value, DataPatterns.patterns)
|
matches = findPatternsInText(attrValue, DataPatterns.patterns)
|
||||||
if matches:
|
if matches:
|
||||||
pattern_name = matches[0][0]
|
patternName = matches[0][0]
|
||||||
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
|
pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
|
||||||
if pattern:
|
if pattern:
|
||||||
if attr_value not in self.string_parser.mapping:
|
if attrValue not in self.string_parser.mapping:
|
||||||
# Generate a UUID for the placeholder
|
# Generate a UUID for the placeholder
|
||||||
import uuid
|
import uuid
|
||||||
placeholder_id = str(uuid.uuid4())
|
placeholderId = str(uuid.uuid4())
|
||||||
# Create placeholder in format [type.uuid]
|
# Create placeholder in format [type.uuid]
|
||||||
type_mapping = {
|
typeMapping = {
|
||||||
'email': 'email',
|
'email': 'email',
|
||||||
'phone': 'phone',
|
'phone': 'phone',
|
||||||
'name': 'name',
|
'name': 'name',
|
||||||
'address': 'address',
|
'address': 'address',
|
||||||
'id': 'id'
|
'id': 'id'
|
||||||
}
|
}
|
||||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
placeholderType = typeMapping.get(patternName, 'data')
|
||||||
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
|
self.string_parser.mapping[attrValue] = f"[{placeholderType}.{placeholderId}]"
|
||||||
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
|
processedAttrs[attrName] = self.string_parser.mapping[attrValue]
|
||||||
else:
|
else:
|
||||||
processed_attrs[attr_name] = attr_value
|
processedAttrs[attrName] = attrValue
|
||||||
else:
|
else:
|
||||||
processed_attrs[attr_name] = attr_value
|
processedAttrs[attrName] = attrValue
|
||||||
|
|
||||||
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
|
attrs = ' '.join(f'{k}="{v}"' for k, v in processedAttrs.items())
|
||||||
attrs = f' {attrs}' if attrs else ''
|
attrs = f' {attrs}' if attrs else ''
|
||||||
|
|
||||||
# Process text content
|
# Process text content
|
||||||
text = element.text.strip() if element.text and element.text.strip() else ''
|
text = element.text.strip() if element.text and element.text.strip() else ''
|
||||||
if text:
|
if text:
|
||||||
# Skip if already a placeholder
|
# Skip if already a placeholder
|
||||||
if not self.string_parser.is_placeholder(text):
|
if not self.string_parser._isPlaceholder(text):
|
||||||
# Check if text matches any patterns
|
# Check if text matches any patterns
|
||||||
from modules.services.serviceNeutralization.subPatterns import find_patterns_in_text, DataPatterns
|
from modules.services.serviceNeutralization.subPatterns import findPatternsInText, DataPatterns
|
||||||
pattern_matches = find_patterns_in_text(text, DataPatterns.patterns)
|
patternMatches = findPatternsInText(text, DataPatterns.patterns)
|
||||||
|
|
||||||
if pattern_matches:
|
if patternMatches:
|
||||||
pattern_name = pattern_matches[0][0]
|
patternName = patternMatches[0][0]
|
||||||
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
|
pattern = next((p for p in DataPatterns.patterns if p.name == patternName), None)
|
||||||
if pattern:
|
if pattern:
|
||||||
if text not in self.string_parser.mapping:
|
if text not in self.string_parser.mapping:
|
||||||
# Generate a UUID for the placeholder
|
# Generate a UUID for the placeholder
|
||||||
|
|
@ -210,8 +210,8 @@ class ListProcessor:
|
||||||
'address': 'address',
|
'address': 'address',
|
||||||
'id': 'id'
|
'id': 'id'
|
||||||
}
|
}
|
||||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
placeholderType = typeMapping.get(patternName, 'data')
|
||||||
self.string_parser.mapping[text] = f"[{placeholder_type}.{placeholder_id}]"
|
self.string_parser.mapping[text] = f"[{placeholderType}.{placeholderId}]"
|
||||||
text = self.string_parser.mapping[text]
|
text = self.string_parser.mapping[text]
|
||||||
else:
|
else:
|
||||||
# Check if text matches any custom names from the user list
|
# Check if text matches any custom names from the user list
|
||||||
|
|
@ -230,8 +230,8 @@ class ListProcessor:
|
||||||
# Process child elements
|
# Process child elements
|
||||||
children = []
|
children = []
|
||||||
for child in element:
|
for child in element:
|
||||||
child_str = self.anonymize_xml_element(child, indent + ' ')
|
childStr = self._anonymizeXmlElement(child, indent + ' ')
|
||||||
children.append(child_str)
|
children.append(childStr)
|
||||||
|
|
||||||
# Build element string
|
# Build element string
|
||||||
if not children and not text:
|
if not children and not text:
|
||||||
|
|
@ -246,7 +246,7 @@ class ListProcessor:
|
||||||
result.append(f"{indent}</{element.tag}>")
|
result.append(f"{indent}</{element.tag}>")
|
||||||
return '\n'.join(result)
|
return '\n'.join(result)
|
||||||
|
|
||||||
def process_xml_content(self, content: str) -> tuple:
|
def processXmlContent(self, content: str) -> tuple:
|
||||||
"""
|
"""
|
||||||
Process XML content
|
Process XML content
|
||||||
|
|
||||||
|
|
@ -259,21 +259,21 @@ class ListProcessor:
|
||||||
root = ET.fromstring(content)
|
root = ET.fromstring(content)
|
||||||
|
|
||||||
# Process XML recursively with proper formatting
|
# Process XML recursively with proper formatting
|
||||||
result = self.anonymize_xml_element(root)
|
result = self._anonymizeXmlElement(root)
|
||||||
|
|
||||||
processed_info = {'type': 'xml'}
|
processedInfo = {'type': 'xml'}
|
||||||
|
|
||||||
return result, self.string_parser.get_mapping(), [], processed_info
|
return result, self.string_parser.getMapping(), [], processedInfo
|
||||||
|
|
||||||
def get_mapping(self) -> Dict[str, str]:
|
def getMapping(self) -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Get the current mapping of original values to placeholders
|
Get the current mapping of original values to placeholders
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict[str, str]: Mapping dictionary
|
Dict[str, str]: Mapping dictionary
|
||||||
"""
|
"""
|
||||||
return self.string_parser.get_mapping()
|
return self.string_parser.getMapping()
|
||||||
|
|
||||||
def clear_mapping(self):
|
def clearMapping(self):
|
||||||
"""Clear the current mapping"""
|
"""Clear the current mapping"""
|
||||||
self.string_parser.clear_mapping()
|
self.string_parser.clearMapping()
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class TextProcessor:
|
||||||
"""
|
"""
|
||||||
self.string_parser = StringParser(NamesToParse)
|
self.string_parser = StringParser(NamesToParse)
|
||||||
|
|
||||||
def extract_tables_from_text(self, content: str) -> tuple:
|
def _extractTablesFromText(self, content: str) -> tuple:
|
||||||
"""
|
"""
|
||||||
Extract tables and plain text from content
|
Extract tables and plain text from content
|
||||||
|
|
||||||
|
|
@ -38,11 +38,11 @@ class TextProcessor:
|
||||||
# For now, process the entire content as plain text
|
# For now, process the entire content as plain text
|
||||||
# This can be extended later to detect table-like structures
|
# This can be extended later to detect table-like structures
|
||||||
tables = []
|
tables = []
|
||||||
plain_texts = [PlainText(content=content, source_type='text_plain')]
|
plainTexts = [PlainText(content=content, source_type='text_plain')]
|
||||||
|
|
||||||
return tables, plain_texts
|
return tables, plainTexts
|
||||||
|
|
||||||
def anonymize_plain_text(self, text: PlainText) -> PlainText:
|
def _anonymizePlainText(self, text: PlainText) -> PlainText:
|
||||||
"""
|
"""
|
||||||
Anonymize plain text content
|
Anonymize plain text content
|
||||||
|
|
||||||
|
|
@ -53,11 +53,11 @@ class TextProcessor:
|
||||||
PlainText: Anonymized text
|
PlainText: Anonymized text
|
||||||
"""
|
"""
|
||||||
# Use the string parser to process the content
|
# Use the string parser to process the content
|
||||||
anonymized_content = self.string_parser.process_string(text.content)
|
anonymizedContent = self.string_parser.processString(text.content)
|
||||||
|
|
||||||
return PlainText(content=anonymized_content, source_type=text.source_type)
|
return PlainText(content=anonymizedContent, source_type=text.source_type)
|
||||||
|
|
||||||
def process_text_content(self, content: str) -> tuple:
|
def processTextContent(self, content: str) -> tuple:
|
||||||
"""
|
"""
|
||||||
Process text content and return anonymized data
|
Process text content and return anonymized data
|
||||||
|
|
||||||
|
|
@ -68,35 +68,35 @@ class TextProcessor:
|
||||||
Tuple of (anonymized_content, mapping, replaced_fields, processed_info)
|
Tuple of (anonymized_content, mapping, replaced_fields, processed_info)
|
||||||
"""
|
"""
|
||||||
# Extract tables and plain text sections
|
# Extract tables and plain text sections
|
||||||
tables, plain_texts = self.extract_tables_from_text(content)
|
tables, plainTexts = self._extractTablesFromText(content)
|
||||||
|
|
||||||
# Process plain text sections
|
# Process plain text sections
|
||||||
anonymized_texts = [self.anonymize_plain_text(text) for text in plain_texts]
|
anonymizedTexts = [self._anonymizePlainText(text) for text in plainTexts]
|
||||||
|
|
||||||
# Combine all processed content
|
# Combine all processed content
|
||||||
result = content
|
result = content
|
||||||
for text, anonymized_text in zip(plain_texts, anonymized_texts):
|
for text, anonymizedText in zip(plainTexts, anonymizedTexts):
|
||||||
if text.content != anonymized_text.content:
|
if text.content != anonymizedText.content:
|
||||||
result = result.replace(text.content, anonymized_text.content)
|
result = result.replace(text.content, anonymizedText.content)
|
||||||
|
|
||||||
# Get processing information
|
# Get processing information
|
||||||
processed_info = {
|
processedInfo = {
|
||||||
'type': 'text',
|
'type': 'text',
|
||||||
'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
|
'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
|
||||||
if tables else [])
|
if tables else [])
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, self.string_parser.get_mapping(), [], processed_info
|
return result, self.string_parser.getMapping(), [], processedInfo
|
||||||
|
|
||||||
def get_mapping(self) -> Dict[str, str]:
|
def getMapping(self) -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Get the current mapping of original values to placeholders
|
Get the current mapping of original values to placeholders
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict[str, str]: Mapping dictionary
|
Dict[str, str]: Mapping dictionary
|
||||||
"""
|
"""
|
||||||
return self.string_parser.get_mapping()
|
return self.string_parser.getMapping()
|
||||||
|
|
||||||
def clear_mapping(self):
|
def clearMapping(self):
|
||||||
"""Clear the current mapping"""
|
"""Clear the current mapping"""
|
||||||
self.string_parser.clear_mapping()
|
self.string_parser.clearMapping()
|
||||||
|
|
|
||||||
|
|
@ -20,8 +20,8 @@ class SharepointService:
|
||||||
Use setAccessTokenFromConnection() method to configure the access token before making API calls.
|
Use setAccessTokenFromConnection() method to configure the access token before making API calls.
|
||||||
"""
|
"""
|
||||||
self.services = serviceCenter
|
self.services = serviceCenter
|
||||||
self.access_token = None
|
self.accessToken = None
|
||||||
self.base_url = "https://graph.microsoft.com/v1.0"
|
self.baseUrl = "https://graph.microsoft.com/v1.0"
|
||||||
|
|
||||||
def setAccessTokenFromConnection(self, userConnection) -> bool:
|
def setAccessTokenFromConnection(self, userConnection) -> bool:
|
||||||
"""Set access token from UserConnection.
|
"""Set access token from UserConnection.
|
||||||
|
|
@ -52,21 +52,21 @@ class SharepointService:
|
||||||
logger.error(f"Error setting access token: {str(e)}")
|
logger.error(f"Error setting access token: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
||||||
"""Make a Microsoft Graph API call with proper error handling."""
|
"""Make a Microsoft Graph API call with proper error handling."""
|
||||||
try:
|
try:
|
||||||
if self.access_token is None:
|
if self.accessToken is None:
|
||||||
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
|
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
|
||||||
return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."}
|
return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."}
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {self.access_token}",
|
"Authorization": f"Bearer {self.accessToken}",
|
||||||
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
|
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Remove leading slash from endpoint to avoid double slash
|
# Remove leading slash from endpoint to avoid double slash
|
||||||
clean_endpoint = endpoint.lstrip('/')
|
cleanEndpoint = endpoint.lstrip('/')
|
||||||
url = f"{self.base_url}/{clean_endpoint}"
|
url = f"{self.baseUrl}/{cleanEndpoint}"
|
||||||
logger.debug(f"Making Graph API call: {method} {url}")
|
logger.debug(f"Making Graph API call: {method} {url}")
|
||||||
|
|
||||||
timeout = aiohttp.ClientTimeout(total=30)
|
timeout = aiohttp.ClientTimeout(total=30)
|
||||||
|
|
@ -106,10 +106,10 @@ class SharepointService:
|
||||||
logger.error(f"Error making Graph API call: {str(e)}")
|
logger.error(f"Error making Graph API call: {str(e)}")
|
||||||
return {"error": f"Error making Graph API call: {str(e)}"}
|
return {"error": f"Error making Graph API call: {str(e)}"}
|
||||||
|
|
||||||
async def discover_sites(self) -> List[Dict[str, Any]]:
|
async def discoverSites(self) -> List[Dict[str, Any]]:
|
||||||
"""Discover all SharePoint sites accessible to the user."""
|
"""Discover all SharePoint sites accessible to the user."""
|
||||||
try:
|
try:
|
||||||
result = await self._make_graph_api_call("sites?search=*")
|
result = await self._makeGraphApiCall("sites?search=*")
|
||||||
|
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.error(f"Error discovering SharePoint sites: {result['error']}")
|
logger.error(f"Error discovering SharePoint sites: {result['error']}")
|
||||||
|
|
@ -118,9 +118,9 @@ class SharepointService:
|
||||||
sites = result.get("value", [])
|
sites = result.get("value", [])
|
||||||
logger.info(f"Discovered {len(sites)} SharePoint sites")
|
logger.info(f"Discovered {len(sites)} SharePoint sites")
|
||||||
|
|
||||||
processed_sites = []
|
processedSites = []
|
||||||
for site in sites:
|
for site in sites:
|
||||||
site_info = {
|
siteInfo = {
|
||||||
"id": site.get("id"),
|
"id": site.get("id"),
|
||||||
"displayName": site.get("displayName"),
|
"displayName": site.get("displayName"),
|
||||||
"name": site.get("name"),
|
"name": site.get("name"),
|
||||||
|
|
@ -129,24 +129,24 @@ class SharepointService:
|
||||||
"createdDateTime": site.get("createdDateTime"),
|
"createdDateTime": site.get("createdDateTime"),
|
||||||
"lastModifiedDateTime": site.get("lastModifiedDateTime")
|
"lastModifiedDateTime": site.get("lastModifiedDateTime")
|
||||||
}
|
}
|
||||||
processed_sites.append(site_info)
|
processedSites.append(siteInfo)
|
||||||
logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
|
logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}")
|
||||||
|
|
||||||
return processed_sites
|
return processedSites
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error discovering SharePoint sites: {str(e)}")
|
logger.error(f"Error discovering SharePoint sites: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]:
|
async def findSiteByName(self, siteName: str) -> Optional[Dict[str, Any]]:
|
||||||
"""Find a specific SharePoint site by name using direct Graph API call."""
|
"""Find a specific SharePoint site by name using direct Graph API call."""
|
||||||
try:
|
try:
|
||||||
# Try to get the site directly by name using Graph API
|
# Try to get the site directly by name using Graph API
|
||||||
endpoint = f"sites/{site_name}"
|
endpoint = f"sites/{siteName}"
|
||||||
result = await self._make_graph_api_call(endpoint)
|
result = await self._makeGraphApiCall(endpoint)
|
||||||
|
|
||||||
if result and "error" not in result:
|
if result and "error" not in result:
|
||||||
site_info = {
|
siteInfo = {
|
||||||
"id": result.get("id"),
|
"id": result.get("id"),
|
||||||
"displayName": result.get("displayName"),
|
"displayName": result.get("displayName"),
|
||||||
"name": result.get("name"),
|
"name": result.get("name"),
|
||||||
|
|
@ -155,15 +155,15 @@ class SharepointService:
|
||||||
"createdDateTime": result.get("createdDateTime"),
|
"createdDateTime": result.get("createdDateTime"),
|
||||||
"lastModifiedDateTime": result.get("lastModifiedDateTime")
|
"lastModifiedDateTime": result.get("lastModifiedDateTime")
|
||||||
}
|
}
|
||||||
logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}")
|
logger.info(f"Found site directly: {siteInfo['displayName']} - {siteInfo['webUrl']}")
|
||||||
return site_info
|
return siteInfo
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}")
|
logger.debug(f"Direct site lookup failed for '{siteName}': {str(e)}")
|
||||||
|
|
||||||
# Fallback to discovery if direct lookup fails
|
# Fallback to discovery if direct lookup fails
|
||||||
logger.info(f"Direct lookup failed, trying discovery for site: {site_name}")
|
logger.info(f"Direct lookup failed, trying discovery for site: {siteName}")
|
||||||
sites = await self.discover_sites()
|
sites = await self.discoverSites()
|
||||||
if not sites:
|
if not sites:
|
||||||
logger.warning("No sites discovered")
|
logger.warning("No sites discovered")
|
||||||
return None
|
return None
|
||||||
|
|
@ -174,46 +174,46 @@ class SharepointService:
|
||||||
|
|
||||||
# Try exact match first
|
# Try exact match first
|
||||||
for site in sites:
|
for site in sites:
|
||||||
if site.get("displayName", "").strip().lower() == site_name.strip().lower():
|
if site.get("displayName", "").strip().lower() == siteName.strip().lower():
|
||||||
logger.info(f"Found exact match: {site.get('displayName')}")
|
logger.info(f"Found exact match: {site.get('displayName')}")
|
||||||
return site
|
return site
|
||||||
|
|
||||||
# Try partial match
|
# Try partial match
|
||||||
for site in sites:
|
for site in sites:
|
||||||
if site_name.lower() in site.get("displayName", "").lower():
|
if siteName.lower() in site.get("displayName", "").lower():
|
||||||
logger.info(f"Found partial match: {site.get('displayName')}")
|
logger.info(f"Found partial match: {site.get('displayName')}")
|
||||||
return site
|
return site
|
||||||
|
|
||||||
logger.warning(f"No site found matching: {site_name}")
|
logger.warning(f"No site found matching: {siteName}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]:
|
async def findSiteByWebUrl(self, webUrl: str) -> Optional[Dict[str, Any]]:
|
||||||
"""Find a SharePoint site using its web URL (useful for guest sites)."""
|
"""Find a SharePoint site using its web URL (useful for guest sites)."""
|
||||||
try:
|
try:
|
||||||
# Use the web URL format: sites/{hostname}:/sites/{site-path}
|
# Use the web URL format: sites/{hostname}:/sites/{site-path}
|
||||||
# Extract hostname and site path from the web URL
|
# Extract hostname and site path from the web URL
|
||||||
if not web_url.startswith("https://"):
|
if not webUrl.startswith("https://"):
|
||||||
web_url = f"https://{web_url}"
|
webUrl = f"https://{webUrl}"
|
||||||
|
|
||||||
# Parse the URL to extract hostname and site path
|
# Parse the URL to extract hostname and site path
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
parsed = urlparse(web_url)
|
parsed = urlparse(webUrl)
|
||||||
hostname = parsed.hostname
|
hostname = parsed.hostname
|
||||||
path_parts = parsed.path.strip('/').split('/')
|
pathParts = parsed.path.strip('/').split('/')
|
||||||
|
|
||||||
if len(path_parts) >= 2 and path_parts[0] == 'sites':
|
if len(pathParts) >= 2 and pathParts[0] == 'sites':
|
||||||
site_path = '/'.join(path_parts[1:]) # Everything after 'sites/'
|
sitePath = '/'.join(pathParts[1:]) # Everything after 'sites/'
|
||||||
else:
|
else:
|
||||||
logger.error(f"Invalid SharePoint URL format: {web_url}")
|
logger.error(f"Invalid SharePoint URL format: {webUrl}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
endpoint = f"sites/{hostname}:/sites/{site_path}"
|
endpoint = f"sites/{hostname}:/sites/{sitePath}"
|
||||||
logger.debug(f"Trying web URL format: {endpoint}")
|
logger.debug(f"Trying web URL format: {endpoint}")
|
||||||
|
|
||||||
result = await self._make_graph_api_call(endpoint)
|
result = await self._makeGraphApiCall(endpoint)
|
||||||
|
|
||||||
if result and "error" not in result:
|
if result and "error" not in result:
|
||||||
site_info = {
|
siteInfo = {
|
||||||
"id": result.get("id"),
|
"id": result.get("id"),
|
||||||
"displayName": result.get("displayName"),
|
"displayName": result.get("displayName"),
|
||||||
"name": result.get("name"),
|
"name": result.get("name"),
|
||||||
|
|
@ -222,33 +222,33 @@ class SharepointService:
|
||||||
"createdDateTime": result.get("createdDateTime"),
|
"createdDateTime": result.get("createdDateTime"),
|
||||||
"lastModifiedDateTime": result.get("lastModifiedDateTime")
|
"lastModifiedDateTime": result.get("lastModifiedDateTime")
|
||||||
}
|
}
|
||||||
logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
|
logger.info(f"Found site by web URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
|
||||||
return site_info
|
return siteInfo
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Site not found using web URL: {web_url}")
|
logger.warning(f"Site not found using web URL: {webUrl}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error finding site by web URL: {str(e)}")
|
logger.error(f"Error finding site by web URL: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]:
|
async def findSiteByUrl(self, hostname: str, sitePath: str) -> Optional[Dict[str, Any]]:
|
||||||
"""Find a SharePoint site using the site URL format."""
|
"""Find a SharePoint site using the site URL format."""
|
||||||
try:
|
try:
|
||||||
# For guest sites, try different URL formats
|
# For guest sites, try different URL formats
|
||||||
url_formats = [
|
urlFormats = [
|
||||||
f"sites/{hostname}:/sites/{site_path}", # Standard format
|
f"sites/{hostname}:/sites/{sitePath}", # Standard format
|
||||||
f"sites/{hostname}:/sites/{site_path}/", # With trailing slash
|
f"sites/{hostname}:/sites/{sitePath}/", # With trailing slash
|
||||||
f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase
|
f"sites/{hostname}:/sites/{sitePath.lower()}", # Lowercase
|
||||||
f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash
|
f"sites/{hostname}:/sites/{sitePath.lower()}/", # Lowercase with slash
|
||||||
]
|
]
|
||||||
|
|
||||||
for endpoint in url_formats:
|
for endpoint in urlFormats:
|
||||||
logger.debug(f"Trying URL format: {endpoint}")
|
logger.debug(f"Trying URL format: {endpoint}")
|
||||||
result = await self._make_graph_api_call(endpoint)
|
result = await self._makeGraphApiCall(endpoint)
|
||||||
|
|
||||||
if result and "error" not in result:
|
if result and "error" not in result:
|
||||||
site_info = {
|
siteInfo = {
|
||||||
"id": result.get("id"),
|
"id": result.get("id"),
|
||||||
"displayName": result.get("displayName"),
|
"displayName": result.get("displayName"),
|
||||||
"name": result.get("name"),
|
"name": result.get("name"),
|
||||||
|
|
@ -257,29 +257,29 @@ class SharepointService:
|
||||||
"createdDateTime": result.get("createdDateTime"),
|
"createdDateTime": result.get("createdDateTime"),
|
||||||
"lastModifiedDateTime": result.get("lastModifiedDateTime")
|
"lastModifiedDateTime": result.get("lastModifiedDateTime")
|
||||||
}
|
}
|
||||||
logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
|
logger.info(f"Found site by URL: {siteInfo['displayName']} - {siteInfo['webUrl']} (ID: {siteInfo['id']})")
|
||||||
return site_info
|
return siteInfo
|
||||||
else:
|
else:
|
||||||
logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}")
|
logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}")
|
||||||
|
|
||||||
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}")
|
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{sitePath}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error finding site by URL: {str(e)}")
|
logger.error(f"Error finding site by URL: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]:
|
async def getFolderByPath(self, siteId: str, folderPath: str) -> Optional[Dict[str, Any]]:
|
||||||
"""Get folder information by path within a site."""
|
"""Get folder information by path within a site."""
|
||||||
try:
|
try:
|
||||||
# Clean the path
|
# Clean the path
|
||||||
clean_path = folder_path.lstrip('/')
|
cleanPath = folderPath.lstrip('/')
|
||||||
endpoint = f"sites/{site_id}/drive/root:/{clean_path}"
|
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
|
||||||
|
|
||||||
result = await self._make_graph_api_call(endpoint)
|
result = await self._makeGraphApiCall(endpoint)
|
||||||
|
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"Folder not found at path {folder_path}: {result['error']}")
|
logger.warning(f"Folder not found at path {folderPath}: {result['error']}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
@ -288,43 +288,43 @@ class SharepointService:
|
||||||
logger.error(f"Error getting folder by path: {str(e)}")
|
logger.error(f"Error getting folder by path: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]:
|
async def uploadFile(self, siteId: str, folderPath: str, fileName: str, content: bytes) -> Dict[str, Any]:
|
||||||
"""Upload a file to SharePoint."""
|
"""Upload a file to SharePoint."""
|
||||||
try:
|
try:
|
||||||
# Clean the path
|
# Clean the path
|
||||||
clean_path = folder_path.lstrip('/')
|
cleanPath = folderPath.lstrip('/')
|
||||||
upload_path = f"{clean_path.rstrip('/')}/{file_name}"
|
uploadPath = f"{cleanPath.rstrip('/')}/{fileName}"
|
||||||
endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content"
|
endpoint = f"sites/{siteId}/drive/root:/{uploadPath}:/content"
|
||||||
|
|
||||||
logger.info(f"Uploading file to: {endpoint}")
|
logger.info(f"Uploading file to: {endpoint}")
|
||||||
|
|
||||||
result = await self._make_graph_api_call(endpoint, method="PUT", data=content)
|
result = await self._makeGraphApiCall(endpoint, method="PUT", data=content)
|
||||||
|
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.error(f"Upload failed: {result['error']}")
|
logger.error(f"Upload failed: {result['error']}")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
logger.info(f"File uploaded successfully: {file_name}")
|
logger.info(f"File uploaded successfully: {fileName}")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error uploading file: {str(e)}")
|
logger.error(f"Error uploading file: {str(e)}")
|
||||||
return {"error": f"Error uploading file: {str(e)}"}
|
return {"error": f"Error uploading file: {str(e)}"}
|
||||||
|
|
||||||
async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]:
|
async def downloadFile(self, siteId: str, fileId: str) -> Optional[bytes]:
|
||||||
"""Download a file from SharePoint."""
|
"""Download a file from SharePoint."""
|
||||||
try:
|
try:
|
||||||
if self.access_token is None:
|
if self.accessToken is None:
|
||||||
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
|
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
|
endpoint = f"sites/{siteId}/drive/items/{fileId}/content"
|
||||||
|
|
||||||
headers = {"Authorization": f"Bearer {self.access_token}"}
|
headers = {"Authorization": f"Bearer {self.accessToken}"}
|
||||||
timeout = aiohttp.ClientTimeout(total=30)
|
timeout = aiohttp.ClientTimeout(total=30)
|
||||||
|
|
||||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response:
|
async with session.get(f"{self.baseUrl}/{endpoint}", headers=headers) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
return await response.read()
|
return await response.read()
|
||||||
else:
|
else:
|
||||||
|
|
@ -335,32 +335,32 @@ class SharepointService:
|
||||||
logger.error(f"Error downloading file: {str(e)}")
|
logger.error(f"Error downloading file: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]:
|
async def listFolderContents(self, siteId: str, folderPath: str = "") -> List[Dict[str, Any]]:
|
||||||
"""List contents of a folder."""
|
"""List contents of a folder."""
|
||||||
try:
|
try:
|
||||||
if not folder_path or folder_path == "/":
|
if not folderPath or folderPath == "/":
|
||||||
endpoint = f"sites/{site_id}/drive/root/children"
|
endpoint = f"sites/{siteId}/drive/root/children"
|
||||||
else:
|
else:
|
||||||
clean_path = folder_path.lstrip('/')
|
cleanPath = folderPath.lstrip('/')
|
||||||
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children"
|
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/children"
|
||||||
|
|
||||||
result = await self._make_graph_api_call(endpoint)
|
result = await self._makeGraphApiCall(endpoint)
|
||||||
|
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"Failed to list folder contents: {result['error']}")
|
logger.warning(f"Failed to list folder contents: {result['error']}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
items = result.get("value", [])
|
items = result.get("value", [])
|
||||||
processed_items = []
|
processedItems = []
|
||||||
|
|
||||||
for item in items:
|
for item in items:
|
||||||
# Determine if it's a folder or file
|
# Determine if it's a folder or file
|
||||||
is_folder = 'folder' in item
|
isFolder = 'folder' in item
|
||||||
|
|
||||||
item_info = {
|
itemInfo = {
|
||||||
"id": item.get("id"),
|
"id": item.get("id"),
|
||||||
"name": item.get("name"),
|
"name": item.get("name"),
|
||||||
"type": "folder" if is_folder else "file",
|
"type": "folder" if isFolder else "file",
|
||||||
"size": item.get("size", 0),
|
"size": item.get("size", 0),
|
||||||
"createdDateTime": item.get("createdDateTime"),
|
"createdDateTime": item.get("createdDateTime"),
|
||||||
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
||||||
|
|
@ -368,42 +368,42 @@ class SharepointService:
|
||||||
}
|
}
|
||||||
|
|
||||||
if "file" in item:
|
if "file" in item:
|
||||||
item_info["mimeType"] = item["file"].get("mimeType")
|
itemInfo["mimeType"] = item["file"].get("mimeType")
|
||||||
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
|
itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
|
||||||
|
|
||||||
if "folder" in item:
|
if "folder" in item:
|
||||||
item_info["childCount"] = item["folder"].get("childCount", 0)
|
itemInfo["childCount"] = item["folder"].get("childCount", 0)
|
||||||
|
|
||||||
processed_items.append(item_info)
|
processedItems.append(itemInfo)
|
||||||
|
|
||||||
return processed_items
|
return processedItems
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error listing folder contents: {str(e)}")
|
logger.error(f"Error listing folder contents: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]:
|
async def searchFiles(self, siteId: str, query: str) -> List[Dict[str, Any]]:
|
||||||
"""Search for files in a site."""
|
"""Search for files in a site."""
|
||||||
try:
|
try:
|
||||||
search_query = query.replace("'", "''") # Escape single quotes for OData
|
searchQuery = query.replace("'", "''") # Escape single quotes for OData
|
||||||
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
endpoint = f"sites/{siteId}/drive/root/search(q='{searchQuery}')"
|
||||||
|
|
||||||
result = await self._make_graph_api_call(endpoint)
|
result = await self._makeGraphApiCall(endpoint)
|
||||||
|
|
||||||
if "error" in result:
|
if "error" in result:
|
||||||
logger.warning(f"Search failed: {result['error']}")
|
logger.warning(f"Search failed: {result['error']}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
items = result.get("value", [])
|
items = result.get("value", [])
|
||||||
processed_items = []
|
processedItems = []
|
||||||
|
|
||||||
for item in items:
|
for item in items:
|
||||||
is_folder = 'folder' in item
|
isFolder = 'folder' in item
|
||||||
|
|
||||||
item_info = {
|
itemInfo = {
|
||||||
"id": item.get("id"),
|
"id": item.get("id"),
|
||||||
"name": item.get("name"),
|
"name": item.get("name"),
|
||||||
"type": "folder" if is_folder else "file",
|
"type": "folder" if isFolder else "file",
|
||||||
"size": item.get("size", 0),
|
"size": item.get("size", 0),
|
||||||
"createdDateTime": item.get("createdDateTime"),
|
"createdDateTime": item.get("createdDateTime"),
|
||||||
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
||||||
|
|
@ -412,64 +412,64 @@ class SharepointService:
|
||||||
}
|
}
|
||||||
|
|
||||||
if "file" in item:
|
if "file" in item:
|
||||||
item_info["mimeType"] = item["file"].get("mimeType")
|
itemInfo["mimeType"] = item["file"].get("mimeType")
|
||||||
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
|
itemInfo["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
|
||||||
|
|
||||||
processed_items.append(item_info)
|
processedItems.append(itemInfo)
|
||||||
|
|
||||||
return processed_items
|
return processedItems
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error searching files: {str(e)}")
|
logger.error(f"Error searching files: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None:
|
async def copyFileAsync(self, siteId: str, sourceFolder: str, sourceFile: str, destFolder: str, destFile: str) -> None:
|
||||||
"""Copy a file from source to destination folder (like original synchronizer)."""
|
"""Copy a file from source to destination folder (like original synchronizer)."""
|
||||||
try:
|
try:
|
||||||
# First, download the source file
|
# First, download the source file
|
||||||
source_path = f"{source_folder}/{source_file}"
|
sourcePath = f"{sourceFolder}/{sourceFile}"
|
||||||
file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path)
|
fileContent = await self.downloadFileByPath(siteId=siteId, filePath=sourcePath)
|
||||||
|
|
||||||
if not file_content:
|
if not fileContent:
|
||||||
raise Exception(f"Failed to download source file: {source_path}")
|
raise Exception(f"Failed to download source file: {sourcePath}")
|
||||||
|
|
||||||
# Upload to destination
|
# Upload to destination
|
||||||
await self.upload_file(
|
await self.uploadFile(
|
||||||
site_id=site_id,
|
siteId=siteId,
|
||||||
folder_path=dest_folder,
|
folderPath=destFolder,
|
||||||
file_name=dest_file,
|
fileName=destFile,
|
||||||
content=file_content
|
content=fileContent
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"File copied: {source_file} -> {dest_file}")
|
logger.info(f"File copied: {sourceFile} -> {destFile}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Provide more specific error information
|
# Provide more specific error information
|
||||||
error_msg = str(e)
|
errorMsg = str(e)
|
||||||
if "itemNotFound" in error_msg or "404" in error_msg:
|
if "itemNotFound" in errorMsg or "404" in errorMsg:
|
||||||
raise Exception(f"Source file not found (404): {source_path} - {error_msg}")
|
raise Exception(f"Source file not found (404): {sourcePath} - {errorMsg}")
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Error copying file: {error_msg}")
|
raise Exception(f"Error copying file: {errorMsg}")
|
||||||
|
|
||||||
async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]:
|
async def downloadFileByPath(self, siteId: str, filePath: str) -> Optional[bytes]:
|
||||||
"""Download a file by its path within a site."""
|
"""Download a file by its path within a site."""
|
||||||
try:
|
try:
|
||||||
if self.access_token is None:
|
if self.accessToken is None:
|
||||||
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
|
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Clean the path
|
# Clean the path
|
||||||
clean_path = file_path.strip('/')
|
cleanPath = filePath.strip('/')
|
||||||
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content"
|
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}:/content"
|
||||||
|
|
||||||
# Use direct HTTP call for file downloads (binary content)
|
# Use direct HTTP call for file downloads (binary content)
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {self.access_token}",
|
"Authorization": f"Bearer {self.accessToken}",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Remove leading slash from endpoint to avoid double slash
|
# Remove leading slash from endpoint to avoid double slash
|
||||||
clean_endpoint = endpoint.lstrip('/')
|
cleanEndpoint = endpoint.lstrip('/')
|
||||||
url = f"{self.base_url}/{clean_endpoint}"
|
url = f"{self.baseUrl}/{cleanEndpoint}"
|
||||||
logger.debug(f"Downloading file: GET {url}")
|
logger.debug(f"Downloading file: GET {url}")
|
||||||
|
|
||||||
timeout = aiohttp.ClientTimeout(total=30)
|
timeout = aiohttp.ClientTimeout(total=30)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import logging
|
||||||
from typing import Any, Optional, Dict, Callable, List
|
from typing import Any, Optional, Dict, Callable, List
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.shared.eventManagement import eventManager
|
from modules.shared.eventManagement import eventManager
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
from modules.shared import jsonUtils
|
from modules.shared import jsonUtils
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -122,7 +122,7 @@ class UtilsService:
|
||||||
float: Current UTC timestamp in seconds
|
float: Current UTC timestamp in seconds
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return get_utc_timestamp()
|
return getUtcTimestamp()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting UTC timestamp: {str(e)}")
|
logger.error(f"Error getting UTC timestamp: {str(e)}")
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
@ -185,6 +185,75 @@ class UtilsService:
|
||||||
# Silent fail to never break main flow
|
# Silent fail to never break main flow
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# ===== Prompt sanitization =====
|
||||||
|
|
||||||
|
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
|
||||||
|
"""
|
||||||
|
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
|
||||||
|
|
||||||
|
This is the single source of truth for all prompt sanitization across the system.
|
||||||
|
Replaces all scattered sanitization functions with a unified approach.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: The content to sanitize
|
||||||
|
contentType: Type of content ("text", "userinput", "json", "document")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Safely sanitized content ready for AI prompt insertion
|
||||||
|
"""
|
||||||
|
if not content:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
# Convert to string if not already
|
||||||
|
content_str = str(content)
|
||||||
|
|
||||||
|
# Remove null bytes and control characters (except newlines and tabs)
|
||||||
|
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
|
||||||
|
|
||||||
|
# Handle different content types with appropriate sanitization
|
||||||
|
if contentType == "userinput":
|
||||||
|
# Extra security for user-controlled content
|
||||||
|
# Escape curly braces to prevent placeholder injection
|
||||||
|
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
|
||||||
|
# Escape quotes and wrap in single quotes
|
||||||
|
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
|
||||||
|
return f"'{sanitized}'"
|
||||||
|
|
||||||
|
elif contentType == "json":
|
||||||
|
# For JSON content, escape quotes and backslashes
|
||||||
|
sanitized = sanitized.replace('\\', '\\\\')
|
||||||
|
sanitized = sanitized.replace('"', '\\"')
|
||||||
|
sanitized = sanitized.replace('\n', '\\n')
|
||||||
|
sanitized = sanitized.replace('\r', '\\r')
|
||||||
|
sanitized = sanitized.replace('\t', '\\t')
|
||||||
|
|
||||||
|
elif contentType == "document":
|
||||||
|
# For document content, escape special characters
|
||||||
|
sanitized = sanitized.replace('\\', '\\\\')
|
||||||
|
sanitized = sanitized.replace('"', '\\"')
|
||||||
|
sanitized = sanitized.replace("'", "\\'")
|
||||||
|
sanitized = sanitized.replace('\n', '\\n')
|
||||||
|
sanitized = sanitized.replace('\r', '\\r')
|
||||||
|
sanitized = sanitized.replace('\t', '\\t')
|
||||||
|
|
||||||
|
else: # contentType == "text" or default
|
||||||
|
# Basic text sanitization
|
||||||
|
sanitized = sanitized.replace('\\', '\\\\')
|
||||||
|
sanitized = sanitized.replace('"', '\\"')
|
||||||
|
sanitized = sanitized.replace("'", "\\'")
|
||||||
|
sanitized = sanitized.replace('\n', '\\n')
|
||||||
|
sanitized = sanitized.replace('\r', '\\r')
|
||||||
|
sanitized = sanitized.replace('\t', '\\t')
|
||||||
|
|
||||||
|
return sanitized
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error sanitizing prompt content: {str(e)}")
|
||||||
|
# Return a safe fallback
|
||||||
|
return "[ERROR: Content could not be safely sanitized]"
|
||||||
|
|
||||||
# ===== JSON utility wrappers =====
|
# ===== JSON utility wrappers =====
|
||||||
|
|
||||||
def jsonStripCodeFences(self, text: str) -> str:
|
def jsonStripCodeFences(self, text: str) -> str:
|
||||||
|
|
|
||||||
|
|
@ -34,54 +34,54 @@ class AttributeDefinition(BaseModel):
|
||||||
MODEL_LABELS: Dict[str, Dict[str, Dict[str, str]]] = {}
|
MODEL_LABELS: Dict[str, Dict[str, Dict[str, str]]] = {}
|
||||||
|
|
||||||
|
|
||||||
def register_model_labels(model_name: str, model_label: Dict[str, str], labels: Dict[str, Dict[str, str]]):
|
def registerModelLabels(modelName: str, modelLabel: Dict[str, str], labels: Dict[str, Dict[str, str]]):
|
||||||
"""
|
"""
|
||||||
Register labels for a model's attributes and the model itself.
|
Register labels for a model's attributes and the model itself.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model_name: Name of the model class
|
modelName: Name of the model class
|
||||||
model_label: Dictionary mapping language codes to model labels
|
modelLabel: Dictionary mapping language codes to model labels
|
||||||
e.g. {"en": "Prompt", "fr": "Invite"}
|
e.g. {"en": "Prompt", "fr": "Invite"}
|
||||||
labels: Dictionary mapping attribute names to their translations
|
labels: Dictionary mapping attribute names to their translations
|
||||||
e.g. {"name": {"en": "Name", "fr": "Nom"}}
|
e.g. {"name": {"en": "Name", "fr": "Nom"}}
|
||||||
"""
|
"""
|
||||||
MODEL_LABELS[model_name] = {"model": model_label, "attributes": labels}
|
MODEL_LABELS[modelName] = {"model": modelLabel, "attributes": labels}
|
||||||
|
|
||||||
|
|
||||||
def get_model_labels(model_name: str, language: str = "en") -> Dict[str, str]:
|
def getModelLabels(modelName: str, language: str = "en") -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Get labels for a model's attributes in the specified language.
|
Get labels for a model's attributes in the specified language.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model_name: Name of the model class
|
modelName: Name of the model class
|
||||||
language: Language code (default: "en")
|
language: Language code (default: "en")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary mapping attribute names to their labels in the specified language
|
Dictionary mapping attribute names to their labels in the specified language
|
||||||
"""
|
"""
|
||||||
model_data = MODEL_LABELS.get(model_name, {})
|
modelData = MODEL_LABELS.get(modelName, {})
|
||||||
attribute_labels = model_data.get("attributes", {})
|
attributeLabels = modelData.get("attributes", {})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
attr: translations.get(language, translations.get("en", attr))
|
attr: translations.get(language, translations.get("en", attr))
|
||||||
for attr, translations in attribute_labels.items()
|
for attr, translations in attributeLabels.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_model_label(model_name: str, language: str = "en") -> str:
|
def getModelLabel(modelName: str, language: str = "en") -> str:
|
||||||
"""
|
"""
|
||||||
Get the label for a model in the specified language.
|
Get the label for a model in the specified language.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model_name: Name of the model class
|
modelName: Name of the model class
|
||||||
language: Language code (default: "en")
|
language: Language code (default: "en")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Model label in the specified language, or model name if no label exists
|
Model label in the specified language, or model name if no label exists
|
||||||
"""
|
"""
|
||||||
model_data = MODEL_LABELS.get(model_name, {})
|
modelData = MODEL_LABELS.get(modelName, {})
|
||||||
model_label = model_data.get("model", {})
|
modelLabel = modelData.get("model", {})
|
||||||
return model_label.get(language, model_label.get("en", model_name))
|
return modelLabel.get(language, modelLabel.get("en", modelName))
|
||||||
|
|
||||||
|
|
||||||
def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguage: str = "en") -> Dict[str, Any]:
|
def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguage: str = "en") -> Dict[str, Any]:
|
||||||
|
|
@ -100,8 +100,8 @@ def getModelAttributeDefinitions(modelClass: Type[BaseModel] = None, userLanguag
|
||||||
|
|
||||||
attributes = []
|
attributes = []
|
||||||
model_name = modelClass.__name__
|
model_name = modelClass.__name__
|
||||||
labels = get_model_labels(model_name, userLanguage)
|
labels = getModelLabels(model_name, userLanguage)
|
||||||
model_label = get_model_label(model_name, userLanguage)
|
model_label = getModelLabel(model_name, userLanguage)
|
||||||
|
|
||||||
# Pydantic v2 only
|
# Pydantic v2 only
|
||||||
fields = modelClass.model_fields
|
fields = modelClass.model_fields
|
||||||
|
|
|
||||||
|
|
@ -19,42 +19,42 @@ class DailyRotatingFileHandler(RotatingFileHandler):
|
||||||
The log file name includes the current date and switches at midnight.
|
The log file name includes the current date and switches at midnight.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs):
|
def __init__(self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs):
|
||||||
self.log_dir = log_dir
|
self.logDir = logDir
|
||||||
self.filename_prefix = filename_prefix
|
self.filenamePrefix = filenamePrefix
|
||||||
self.current_date = None
|
self.currentDate = None
|
||||||
self.current_file = None
|
self.currentFile = None
|
||||||
|
|
||||||
# Initialize with today's file
|
# Initialize with today's file
|
||||||
self._update_file_if_needed()
|
self._updateFileIfNeeded()
|
||||||
|
|
||||||
# Call parent constructor with current file
|
# Call parent constructor with current file
|
||||||
super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs)
|
super().__init__(self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs)
|
||||||
|
|
||||||
def _update_file_if_needed(self):
|
def _updateFileIfNeeded(self):
|
||||||
"""Update the log file if the date has changed"""
|
"""Update the log file if the date has changed"""
|
||||||
today = datetime.now().strftime("%Y%m%d")
|
today = datetime.now().strftime("%Y%m%d")
|
||||||
|
|
||||||
if self.current_date != today:
|
if self.currentDate != today:
|
||||||
self.current_date = today
|
self.currentDate = today
|
||||||
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
|
newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log")
|
||||||
|
|
||||||
if self.current_file != new_file:
|
if self.currentFile != newFile:
|
||||||
self.current_file = new_file
|
self.currentFile = newFile
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def emit(self, record):
|
def emit(self, record):
|
||||||
"""Emit a log record, switching files if date has changed"""
|
"""Emit a log record, switching files if date has changed"""
|
||||||
# Check if we need to switch to a new file
|
# Check if we need to switch to a new file
|
||||||
if self._update_file_if_needed():
|
if self._updateFileIfNeeded():
|
||||||
# Close current file and open new one
|
# Close current file and open new one
|
||||||
if self.stream:
|
if self.stream:
|
||||||
self.stream.close()
|
self.stream.close()
|
||||||
self.stream = None
|
self.stream = None
|
||||||
|
|
||||||
# Update the baseFilename for the parent class
|
# Update the baseFilename for the parent class
|
||||||
self.baseFilename = self.current_file
|
self.baseFilename = self.currentFile
|
||||||
# Reopen the stream
|
# Reopen the stream
|
||||||
if not self.delay:
|
if not self.delay:
|
||||||
self.stream = self._open()
|
self.stream = self._open()
|
||||||
|
|
@ -68,9 +68,9 @@ class AuditLogger:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.logger = None
|
self.logger = None
|
||||||
self._setup_audit_logger()
|
self._setupAuditLogger()
|
||||||
|
|
||||||
def _setup_audit_logger(self):
|
def _setupAuditLogger(self):
|
||||||
"""Setup the audit logger with daily file rotation"""
|
"""Setup the audit logger with daily file rotation"""
|
||||||
try:
|
try:
|
||||||
# Get log directory from config
|
# Get log directory from config
|
||||||
|
|
@ -96,10 +96,10 @@ class AuditLogger:
|
||||||
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
|
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
|
||||||
|
|
||||||
fileHandler = DailyRotatingFileHandler(
|
fileHandler = DailyRotatingFileHandler(
|
||||||
log_dir=logDir,
|
logDir=logDir,
|
||||||
filename_prefix="log_audit",
|
filenamePrefix="log_audit",
|
||||||
max_bytes=rotationSize,
|
maxBytes=rotationSize,
|
||||||
backup_count=backupCount
|
backupCount=backupCount
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create formatter for audit log
|
# Create formatter for audit log
|
||||||
|
|
@ -120,9 +120,9 @@ class AuditLogger:
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
self.logger.error(f"Failed to setup audit logger: {str(e)}")
|
self.logger.error(f"Failed to setup audit logger: {str(e)}")
|
||||||
|
|
||||||
def log_event(self,
|
def logEvent(self,
|
||||||
user_id: str,
|
userId: str,
|
||||||
mandate_id: str,
|
mandateId: str,
|
||||||
category: str,
|
category: str,
|
||||||
action: str,
|
action: str,
|
||||||
details: str = "",
|
details: str = "",
|
||||||
|
|
@ -131,8 +131,8 @@ class AuditLogger:
|
||||||
Log an audit event
|
Log an audit event
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
user_id: User identifier
|
userId: User identifier
|
||||||
mandate_id: Mandate identifier (can be empty if not applicable)
|
mandateId: Mandate identifier (can be empty if not applicable)
|
||||||
category: Event category (e.g., 'key', 'access', 'data')
|
category: Event category (e.g., 'key', 'access', 'data')
|
||||||
action: Specific action (e.g., 'decode', 'login', 'logout')
|
action: Specific action (e.g., 'decode', 'login', 'logout')
|
||||||
details: Additional details about the event
|
details: Additional details about the event
|
||||||
|
|
@ -148,50 +148,50 @@ class AuditLogger:
|
||||||
|
|
||||||
# Format the audit log entry
|
# Format the audit log entry
|
||||||
# Format: timestamp | userid | mandateid | category | action | details
|
# Format: timestamp | userid | mandateid | category | action | details
|
||||||
audit_entry = f"{user_id} | {mandate_id} | {category} | {action} | {details}"
|
auditEntry = f"{userId} | {mandateId} | {category} | {action} | {details}"
|
||||||
|
|
||||||
# Log the event
|
# Log the event
|
||||||
self.logger.info(audit_entry)
|
self.logger.info(auditEntry)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Use standard logger as fallback
|
# Use standard logger as fallback
|
||||||
logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}")
|
logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}")
|
||||||
|
|
||||||
def log_key_access(self, user_id: str, mandate_id: str, key_name: str, action: str) -> None:
|
def logKeyAccess(self, userId: str, mandateId: str, keyName: str, action: str) -> None:
|
||||||
"""Log key access events (decode/encode)"""
|
"""Log key access events (decode/encode)"""
|
||||||
self.log_event(
|
self.logEvent(
|
||||||
user_id=user_id,
|
userId=userId,
|
||||||
mandate_id=mandate_id,
|
mandateId=mandateId,
|
||||||
category="key",
|
category="key",
|
||||||
action=action,
|
action=action,
|
||||||
details=key_name
|
details=keyName
|
||||||
)
|
)
|
||||||
|
|
||||||
def log_user_access(self, user_id: str, mandate_id: str, action: str, success_info: str = "") -> None:
|
def logUserAccess(self, userId: str, mandateId: str, action: str, successInfo: str = "") -> None:
|
||||||
"""Log user access events (login/logout)"""
|
"""Log user access events (login/logout)"""
|
||||||
self.log_event(
|
self.logEvent(
|
||||||
user_id=user_id,
|
userId=userId,
|
||||||
mandate_id=mandate_id,
|
mandateId=mandateId,
|
||||||
category="access",
|
category="access",
|
||||||
action=action,
|
action=action,
|
||||||
details=success_info
|
details=successInfo
|
||||||
)
|
)
|
||||||
|
|
||||||
def log_data_access(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
|
def logDataAccess(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
|
||||||
"""Log data access events"""
|
"""Log data access events"""
|
||||||
self.log_event(
|
self.logEvent(
|
||||||
user_id=user_id,
|
userId=userId,
|
||||||
mandate_id=mandate_id,
|
mandateId=mandateId,
|
||||||
category="data",
|
category="data",
|
||||||
action=action,
|
action=action,
|
||||||
details=details
|
details=details
|
||||||
)
|
)
|
||||||
|
|
||||||
def log_security_event(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
|
def logSecurityEvent(self, userId: str, mandateId: str, action: str, details: str = "") -> None:
|
||||||
"""Log security-related events"""
|
"""Log security-related events"""
|
||||||
self.log_event(
|
self.logEvent(
|
||||||
user_id=user_id,
|
userId=userId,
|
||||||
mandate_id=mandate_id,
|
mandateId=mandateId,
|
||||||
category="security",
|
category="security",
|
||||||
action=action,
|
action=action,
|
||||||
details=details
|
details=details
|
||||||
|
|
|
||||||
|
|
@ -199,10 +199,10 @@ class Configuration:
|
||||||
# Log audit event for secret key access
|
# Log audit event for secret key access
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_key_access(
|
audit_logger.logKeyAccess(
|
||||||
user_id=user_id,
|
userId=user_id,
|
||||||
mandate_id="system",
|
mandateId="system",
|
||||||
key_name=key,
|
keyName=key,
|
||||||
action="decode"
|
action="decode"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -211,9 +211,9 @@ class Configuration:
|
||||||
|
|
||||||
if value.startswith("{") and value.endswith("}"):
|
if value.startswith("{") and value.endswith("}"):
|
||||||
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
|
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
|
||||||
return handleSecretJson(value, user_id, key)
|
return handleSecretJson(value, userId=user_id, keyName=key)
|
||||||
else:
|
else:
|
||||||
return handleSecretText(value, user_id, key)
|
return handleSecretText(value, userId=user_id, keyName=key)
|
||||||
return value
|
return value
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
@ -235,31 +235,31 @@ class Configuration:
|
||||||
"""Set a configuration value (for testing/overrides)"""
|
"""Set a configuration value (for testing/overrides)"""
|
||||||
self._data[key] = value
|
self._data[key] = value
|
||||||
|
|
||||||
def handleSecretText(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
|
def handleSecretText(value: str, userId: str = "system", keyName: str = "unknown") -> str:
|
||||||
"""
|
"""
|
||||||
Handle secret values with encryption/decryption support.
|
Handle secret values with encryption/decryption support.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
value: The secret value to handle (may be encrypted)
|
value: The secret value to handle (may be encrypted)
|
||||||
user_id: The user ID making the request (default: "system")
|
userId: The user ID making the request (default: "system")
|
||||||
key_name: The name of the key being decrypted (default: "unknown")
|
keyName: The name of the key being decrypted (default: "unknown")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Processed secret value (decrypted if encrypted)
|
str: Processed secret value (decrypted if encrypted)
|
||||||
"""
|
"""
|
||||||
if _is_encrypted_value(value):
|
if _isEncryptedValue(value):
|
||||||
return decrypt_value(value, user_id, key_name)
|
return decryptValue(value, userId, keyName)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
|
def handleSecretJson(value: str, userId: str = "system", keyName: str = "unknown") -> str:
|
||||||
"""
|
"""
|
||||||
Handle JSON secret values (like Google service account keys) with encryption/decryption support.
|
Handle JSON secret values (like Google service account keys) with encryption/decryption support.
|
||||||
Validates that the value is valid JSON after decryption.
|
Validates that the value is valid JSON after decryption.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
value: The JSON secret value to handle (may be encrypted)
|
value: The JSON secret value to handle (may be encrypted)
|
||||||
user_id: The user ID making the request (default: "system")
|
userId: The user ID making the request (default: "system")
|
||||||
key_name: The name of the key being decrypted (default: "unknown")
|
keyName: The name of the key being decrypted (default: "unknown")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Processed JSON secret value (decrypted if encrypted)
|
str: Processed JSON secret value (decrypted if encrypted)
|
||||||
|
|
@ -268,15 +268,15 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
|
||||||
ValueError: If the value is not valid JSON after decryption
|
ValueError: If the value is not valid JSON after decryption
|
||||||
"""
|
"""
|
||||||
# Decrypt if encrypted
|
# Decrypt if encrypted
|
||||||
if _is_encrypted_value(value):
|
if _isEncryptedValue(value):
|
||||||
decrypted_value = decrypt_value(value, user_id, key_name)
|
decryptedValue = decryptValue(value, userId, keyName)
|
||||||
else:
|
else:
|
||||||
decrypted_value = value
|
decryptedValue = value
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Validate that it's valid JSON
|
# Validate that it's valid JSON
|
||||||
json.loads(decrypted_value)
|
json.loads(decryptedValue)
|
||||||
return decrypted_value
|
return decryptedValue
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
raise ValueError(f"Invalid JSON in secret value: {e}")
|
raise ValueError(f"Invalid JSON in secret value: {e}")
|
||||||
|
|
||||||
|
|
@ -284,12 +284,12 @@ def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unkno
|
||||||
# Structure: {user_id: {key_name: [timestamps]}}
|
# Structure: {user_id: {key_name: [timestamps]}}
|
||||||
_decryption_attempts = {}
|
_decryption_attempts = {}
|
||||||
|
|
||||||
def _get_master_key(env_type: str = None) -> bytes:
|
def _getMasterKey(envType: str = None) -> bytes:
|
||||||
"""
|
"""
|
||||||
Get the master key for the specified environment.
|
Get the master key for the specified environment.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
env_type: The environment type (dev, int, prod, etc.). If None, uses current config.
|
envType: The environment type (dev, int, prod, etc.). If None, uses current config.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bytes: The master key for encryption/decryption
|
bytes: The master key for encryption/decryption
|
||||||
|
|
@ -298,24 +298,24 @@ def _get_master_key(env_type: str = None) -> bytes:
|
||||||
ValueError: If no master key is found
|
ValueError: If no master key is found
|
||||||
"""
|
"""
|
||||||
# Get the key location from config
|
# Get the key location from config
|
||||||
key_location = APP_CONFIG.get('APP_KEY_SYSVAR')
|
keyLocation = APP_CONFIG.get('APP_KEY_SYSVAR')
|
||||||
if env_type is None:
|
if envType is None:
|
||||||
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
||||||
|
|
||||||
if not key_location:
|
if not keyLocation:
|
||||||
raise ValueError("APP_KEY_SYSVAR not configured")
|
raise ValueError("APP_KEY_SYSVAR not configured")
|
||||||
|
|
||||||
# First try to get from environment variable
|
# First try to get from environment variable
|
||||||
master_key = os.environ.get(key_location)
|
masterKey = os.environ.get(keyLocation)
|
||||||
|
|
||||||
if master_key:
|
if masterKey:
|
||||||
# If found in environment, use it directly
|
# If found in environment, use it directly
|
||||||
return master_key.encode('utf-8')
|
return masterKey.encode('utf-8')
|
||||||
|
|
||||||
# If not in environment, try to read from file
|
# If not in environment, try to read from file
|
||||||
if os.path.exists(key_location):
|
if os.path.exists(keyLocation):
|
||||||
try:
|
try:
|
||||||
with open(key_location, 'r') as f:
|
with open(keyLocation, 'r') as f:
|
||||||
content = f.read().strip()
|
content = f.read().strip()
|
||||||
|
|
||||||
# Parse the key file format: env = key
|
# Parse the key file format: env = key
|
||||||
|
|
@ -326,26 +326,26 @@ def _get_master_key(env_type: str = None) -> bytes:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if '=' in line:
|
if '=' in line:
|
||||||
key_env, key_value = line.split('=', 1)
|
keyEnv, keyValue = line.split('=', 1)
|
||||||
key_env = key_env.strip()
|
keyEnv = keyEnv.strip()
|
||||||
key_value = key_value.strip()
|
keyValue = keyValue.strip()
|
||||||
|
|
||||||
if key_env == env_type:
|
if keyEnv == envType:
|
||||||
return key_value.encode('utf-8')
|
return keyValue.encode('utf-8')
|
||||||
|
|
||||||
raise ValueError(f"No key found for environment '{env_type}' in {key_location}")
|
raise ValueError(f"No key found for environment '{envType}' in {keyLocation}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(f"Error reading key file {key_location}: {e}")
|
raise ValueError(f"Error reading key file {keyLocation}: {e}")
|
||||||
|
|
||||||
raise ValueError(f"Master key not found. Checked environment variable '{key_location}' and file path")
|
raise ValueError(f"Master key not found. Checked environment variable '{keyLocation}' and file path")
|
||||||
|
|
||||||
def _derive_encryption_key(master_key: bytes) -> bytes:
|
def _deriveEncryptionKey(masterKey: bytes) -> bytes:
|
||||||
"""
|
"""
|
||||||
Derive a 32-byte encryption key from the master key using PBKDF2.
|
Derive a 32-byte encryption key from the master key using PBKDF2.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
master_key: The master key bytes
|
masterKey: The master key bytes
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bytes: 32-byte derived key suitable for Fernet
|
bytes: 32-byte derived key suitable for Fernet
|
||||||
|
|
@ -360,9 +360,9 @@ def _derive_encryption_key(master_key: bytes) -> bytes:
|
||||||
iterations=100000,
|
iterations=100000,
|
||||||
)
|
)
|
||||||
|
|
||||||
return base64.urlsafe_b64encode(kdf.derive(master_key))
|
return base64.urlsafe_b64encode(kdf.derive(masterKey))
|
||||||
|
|
||||||
def _is_encrypted_value(value: str) -> bool:
|
def _isEncryptedValue(value: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if a value is encrypted (starts with environment-specific prefix).
|
Check if a value is encrypted (starts with environment-specific prefix).
|
||||||
|
|
||||||
|
|
@ -382,64 +382,64 @@ def _is_encrypted_value(value: str) -> bool:
|
||||||
value.startswith('TEST_ENC:') or
|
value.startswith('TEST_ENC:') or
|
||||||
value.startswith('STAGING_ENC:'))
|
value.startswith('STAGING_ENC:'))
|
||||||
|
|
||||||
def _get_encryption_prefix(env_type: str) -> str:
|
def _getEncryptionPrefix(envType: str) -> str:
|
||||||
"""
|
"""
|
||||||
Get the encryption prefix for the given environment type.
|
Get the encryption prefix for the given environment type.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
env_type: The environment type (dev, int, prod, etc.)
|
envType: The environment type (dev, int, prod, etc.)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The encryption prefix
|
str: The encryption prefix
|
||||||
"""
|
"""
|
||||||
return f"{env_type.upper()}_ENC:"
|
return f"{envType.upper()}_ENC:"
|
||||||
|
|
||||||
def _check_decryption_rate_limit(user_id: str, key_name: str, max_per_second: int = 10) -> bool:
|
def _checkDecryptionRateLimit(userId: str, keyName: str, maxPerSecond: int = 10) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if decryption is allowed based on rate limiting (max 10 per second per user per key).
|
Check if decryption is allowed based on rate limiting (max 10 per second per user per key).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
user_id: The user ID making the request
|
userId: The user ID making the request
|
||||||
key_name: The name of the key being decrypted
|
keyName: The name of the key being decrypted
|
||||||
max_per_second: Maximum decryptions per second (default: 10)
|
maxPerSecond: Maximum decryptions per second (default: 10)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: True if allowed, False if rate limited
|
bool: True if allowed, False if rate limited
|
||||||
"""
|
"""
|
||||||
current_time = time.time()
|
currentTime = time.time()
|
||||||
|
|
||||||
# Initialize tracking for this user if not exists
|
# Initialize tracking for this user if not exists
|
||||||
if user_id not in _decryption_attempts:
|
if userId not in _decryption_attempts:
|
||||||
_decryption_attempts[user_id] = {}
|
_decryption_attempts[userId] = {}
|
||||||
|
|
||||||
# Initialize tracking for this key if not exists
|
# Initialize tracking for this key if not exists
|
||||||
if key_name not in _decryption_attempts[user_id]:
|
if keyName not in _decryption_attempts[userId]:
|
||||||
_decryption_attempts[user_id][key_name] = []
|
_decryption_attempts[userId][keyName] = []
|
||||||
|
|
||||||
# Clean old attempts (older than 1 second)
|
# Clean old attempts (older than 1 second)
|
||||||
_decryption_attempts[user_id][key_name] = [
|
_decryption_attempts[userId][keyName] = [
|
||||||
timestamp for timestamp in _decryption_attempts[user_id][key_name]
|
timestamp for timestamp in _decryption_attempts[userId][keyName]
|
||||||
if current_time - timestamp < 1.0
|
if currentTime - timestamp < 1.0
|
||||||
]
|
]
|
||||||
|
|
||||||
# Check if we're within rate limit
|
# Check if we're within rate limit
|
||||||
if len(_decryption_attempts[user_id][key_name]) >= max_per_second:
|
if len(_decryption_attempts[userId][keyName]) >= maxPerSecond:
|
||||||
logger.warning(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' ({max_per_second}/sec)")
|
logger.warning(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' ({maxPerSecond}/sec)")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Record this attempt
|
# Record this attempt
|
||||||
_decryption_attempts[user_id][key_name].append(current_time)
|
_decryption_attempts[userId][keyName].append(currentTime)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key_name: str = "unknown") -> str:
|
def encryptValue(value: str, envType: str = None, userId: str = "system", keyName: str = "unknown") -> str:
|
||||||
"""
|
"""
|
||||||
Encrypt a value using the master key for the specified environment.
|
Encrypt a value using the master key for the specified environment.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
value: The plain text value to encrypt
|
value: The plain text value to encrypt
|
||||||
env_type: The environment type (dev, int, prod). If None, uses current environment.
|
envType: The environment type (dev, int, prod). If None, uses current environment.
|
||||||
user_id: The user ID making the request (default: "system")
|
userId: The user ID making the request (default: "system")
|
||||||
key_name: The name of the key being encrypted (default: "unknown")
|
keyName: The name of the key being encrypted (default: "unknown")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The encrypted value with prefix
|
str: The encrypted value with prefix
|
||||||
|
|
@ -447,48 +447,48 @@ def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If encryption fails
|
ValueError: If encryption fails
|
||||||
"""
|
"""
|
||||||
if env_type is None:
|
if envType is None:
|
||||||
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
envType = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
master_key = _get_master_key(env_type)
|
masterKey = _getMasterKey(envType)
|
||||||
derived_key = _derive_encryption_key(master_key)
|
derivedKey = _deriveEncryptionKey(masterKey)
|
||||||
fernet = Fernet(derived_key)
|
fernet = Fernet(derivedKey)
|
||||||
|
|
||||||
# Encrypt the value
|
# Encrypt the value
|
||||||
encrypted_bytes = fernet.encrypt(value.encode('utf-8'))
|
encryptedBytes = fernet.encrypt(value.encode('utf-8'))
|
||||||
encrypted_b64 = base64.urlsafe_b64encode(encrypted_bytes).decode('utf-8')
|
encryptedB64 = base64.urlsafe_b64encode(encryptedBytes).decode('utf-8')
|
||||||
|
|
||||||
# Add environment prefix
|
# Add environment prefix
|
||||||
prefix = _get_encryption_prefix(env_type)
|
prefix = _getEncryptionPrefix(envType)
|
||||||
encrypted_value = f"{prefix}{encrypted_b64}"
|
encryptedValue = f"{prefix}{encryptedB64}"
|
||||||
|
|
||||||
# Log audit event for encryption
|
# Log audit event for encryption
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_key_access(
|
audit_logger.logKeyAccess(
|
||||||
user_id=user_id,
|
userId=userId,
|
||||||
mandate_id="system",
|
mandateId="system",
|
||||||
key_name=key_name,
|
keyName=keyName,
|
||||||
action="encrypt"
|
action="encrypt"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Don't fail if audit logging fails
|
# Don't fail if audit logging fails
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return encrypted_value
|
return encryptedValue
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(f"Encryption failed: {e}")
|
raise ValueError(f"Encryption failed: {e}")
|
||||||
|
|
||||||
def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = "unknown") -> str:
|
def decryptValue(encryptedValue: str, userId: str = "system", keyName: str = "unknown") -> str:
|
||||||
"""
|
"""
|
||||||
Decrypt a value using the master key for the current environment.
|
Decrypt a value using the master key for the current environment.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
encrypted_value: The encrypted value with prefix
|
encryptedValue: The encrypted value with prefix
|
||||||
user_id: The user ID making the request (default: "system")
|
userId: The user ID making the request (default: "system")
|
||||||
key_name: The name of the key being decrypted (default: "unknown")
|
keyName: The name of the key being decrypted (default: "unknown")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The decrypted plain text value
|
str: The decrypted plain text value
|
||||||
|
|
@ -496,59 +496,59 @@ def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str =
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If decryption fails
|
ValueError: If decryption fails
|
||||||
"""
|
"""
|
||||||
if not _is_encrypted_value(encrypted_value):
|
if not _isEncryptedValue(encryptedValue):
|
||||||
return encrypted_value # Return as-is if not encrypted
|
return encryptedValue # Return as-is if not encrypted
|
||||||
|
|
||||||
# Check rate limiting (10 per second per user per key)
|
# Check rate limiting (10 per second per user per key)
|
||||||
if not _check_decryption_rate_limit(user_id, key_name, max_per_second=10):
|
if not _checkDecryptionRateLimit(userId, keyName, maxPerSecond=10):
|
||||||
raise ValueError(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' (10/sec)")
|
raise ValueError(f"Decryption rate limit exceeded for user '{userId}' key '{keyName}' (10/sec)")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Extract environment type from prefix
|
# Extract environment type from prefix
|
||||||
if encrypted_value.startswith('DEV_ENC:'):
|
if encryptedValue.startswith('DEV_ENC:'):
|
||||||
env_type = 'dev'
|
envType = 'dev'
|
||||||
prefix = 'DEV_ENC:'
|
prefix = 'DEV_ENC:'
|
||||||
elif encrypted_value.startswith('INT_ENC:'):
|
elif encryptedValue.startswith('INT_ENC:'):
|
||||||
env_type = 'int'
|
envType = 'int'
|
||||||
prefix = 'INT_ENC:'
|
prefix = 'INT_ENC:'
|
||||||
elif encrypted_value.startswith('PROD_ENC:'):
|
elif encryptedValue.startswith('PROD_ENC:'):
|
||||||
env_type = 'prod'
|
envType = 'prod'
|
||||||
prefix = 'PROD_ENC:'
|
prefix = 'PROD_ENC:'
|
||||||
elif encrypted_value.startswith('TEST_ENC:'):
|
elif encryptedValue.startswith('TEST_ENC:'):
|
||||||
env_type = 'test'
|
envType = 'test'
|
||||||
prefix = 'TEST_ENC:'
|
prefix = 'TEST_ENC:'
|
||||||
elif encrypted_value.startswith('STAGING_ENC:'):
|
elif encryptedValue.startswith('STAGING_ENC:'):
|
||||||
env_type = 'staging'
|
envType = 'staging'
|
||||||
prefix = 'STAGING_ENC:'
|
prefix = 'STAGING_ENC:'
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Invalid encryption prefix. Expected DEV_ENC:, INT_ENC:, PROD_ENC:, TEST_ENC:, or STAGING_ENC:")
|
raise ValueError(f"Invalid encryption prefix. Expected DEV_ENC:, INT_ENC:, PROD_ENC:, TEST_ENC:, or STAGING_ENC:")
|
||||||
|
|
||||||
encrypted_part = encrypted_value[len(prefix):]
|
encryptedPart = encryptedValue[len(prefix):]
|
||||||
|
|
||||||
# Get master key for the specific environment and derive encryption key
|
# Get master key for the specific environment and derive encryption key
|
||||||
master_key = _get_master_key(env_type)
|
masterKey = _getMasterKey(envType)
|
||||||
derived_key = _derive_encryption_key(master_key)
|
derivedKey = _deriveEncryptionKey(masterKey)
|
||||||
fernet = Fernet(derived_key)
|
fernet = Fernet(derivedKey)
|
||||||
|
|
||||||
# Decode and decrypt
|
# Decode and decrypt
|
||||||
encrypted_bytes = base64.urlsafe_b64decode(encrypted_part.encode('utf-8'))
|
encryptedBytes = base64.urlsafe_b64decode(encryptedPart.encode('utf-8'))
|
||||||
decrypted_bytes = fernet.decrypt(encrypted_bytes)
|
decryptedBytes = fernet.decrypt(encryptedBytes)
|
||||||
decrypted_value = decrypted_bytes.decode('utf-8')
|
decryptedValue = decryptedBytes.decode('utf-8')
|
||||||
|
|
||||||
# Log audit event for decryption
|
# Log audit event for decryption
|
||||||
try:
|
try:
|
||||||
from modules.shared.auditLogger import audit_logger
|
from modules.shared.auditLogger import audit_logger
|
||||||
audit_logger.log_key_access(
|
audit_logger.logKeyAccess(
|
||||||
user_id=user_id,
|
userId=userId,
|
||||||
mandate_id="system",
|
mandateId="system",
|
||||||
key_name=key_name,
|
keyName=keyName,
|
||||||
action="decrypt"
|
action="decrypt"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Don't fail if audit logging fails
|
# Don't fail if audit logging fails
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return decrypted_value
|
return decryptedValue
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(f"Decryption failed: {e}")
|
raise ValueError(f"Decryption failed: {e}")
|
||||||
|
|
|
||||||
|
|
@ -120,8 +120,8 @@ def debugLogToFile(message: str, context: str = "DEBUG") -> None:
|
||||||
debug_file = os.path.join(debug_dir, "debug_workflow.log")
|
debug_file = os.path.join(debug_dir, "debug_workflow.log")
|
||||||
|
|
||||||
# Format the debug entry
|
# Format the debug entry
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import getUtcTimestamp
|
||||||
timestamp = get_utc_timestamp()
|
timestamp = getUtcTimestamp()
|
||||||
debug_entry = f"[{timestamp}] [{context}] {message}\n"
|
debug_entry = f"[{timestamp}] [{context}] {message}\n"
|
||||||
|
|
||||||
# Write to debug file
|
# Write to debug file
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,7 @@ def parseJsonOrRaise(text: Union[str, bytes]) -> Union[Dict, List]:
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
def mergeRootLists(jsonParts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Generic merger for root-level lists: take first dict as base; for each subsequent part:
|
Generic merger for root-level lists: take first dict as base; for each subsequent part:
|
||||||
- if value is list and same key exists as list, extend it
|
- if value is list and same key exists as list, extend it
|
||||||
|
|
@ -112,7 +112,7 @@ def mergeRootLists(json_parts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
base: Optional[Dict[str, Any]] = None
|
base: Optional[Dict[str, Any]] = None
|
||||||
parsed: List[Dict[str, Any]] = []
|
parsed: List[Dict[str, Any]] = []
|
||||||
for part in json_parts:
|
for part in jsonParts:
|
||||||
if isinstance(part, (dict, list)):
|
if isinstance(part, (dict, list)):
|
||||||
obj = part
|
obj = part
|
||||||
else:
|
else:
|
||||||
|
|
@ -146,61 +146,61 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
||||||
|
|
||||||
# Strategy 1: Try to extract sections from the entire text first
|
# Strategy 1: Try to extract sections from the entire text first
|
||||||
# This handles cases where the JSON structure is broken but content is intact
|
# This handles cases where the JSON structure is broken but content is intact
|
||||||
extracted_sections = _extractSectionsRegex(text)
|
extractedSections = _extractSectionsRegex(text)
|
||||||
if extracted_sections:
|
if extractedSections:
|
||||||
logger.info(f"Extracted {len(extracted_sections)} sections using regex")
|
logger.info(f"Extracted {len(extractedSections)} sections using regex")
|
||||||
return {
|
return {
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"split_strategy": "single_document",
|
"split_strategy": "single_document",
|
||||||
"source_documents": [],
|
"source_documents": [],
|
||||||
"extraction_method": "ai_generation"
|
"extraction_method": "ai_generation"
|
||||||
},
|
},
|
||||||
"documents": [{"sections": extracted_sections}]
|
"documents": [{"sections": extractedSections}]
|
||||||
}
|
}
|
||||||
|
|
||||||
# Strategy 2: Progressive parsing - try to find longest valid prefix
|
# Strategy 2: Progressive parsing - try to find longest valid prefix
|
||||||
best_result = None
|
bestResult = None
|
||||||
best_valid_length = 0
|
bestValidLength = 0
|
||||||
|
|
||||||
# Try different step sizes to find the best valid JSON
|
# Try different step sizes to find the best valid JSON
|
||||||
for step_size in [100, 50, 10, 1]:
|
for stepSize in [100, 50, 10, 1]:
|
||||||
for i in range(len(text), 0, -step_size):
|
for i in range(len(text), 0, -stepSize):
|
||||||
test_str = text[:i]
|
testStr = text[:i]
|
||||||
closed_str = _closeJsonStructures(test_str)
|
closedStr = _closeJsonStructures(testStr)
|
||||||
obj, err, _ = tryParseJson(closed_str)
|
obj, err, _ = tryParseJson(closedStr)
|
||||||
if err is None and isinstance(obj, dict):
|
if err is None and isinstance(obj, dict):
|
||||||
best_result = obj
|
bestResult = obj
|
||||||
best_valid_length = i
|
bestValidLength = i
|
||||||
logger.debug(f"Progressive parsing success at length {i} (step: {step_size})")
|
logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
|
||||||
break
|
break
|
||||||
if best_result:
|
if bestResult:
|
||||||
break
|
break
|
||||||
|
|
||||||
if best_result:
|
if bestResult:
|
||||||
logger.info(f"Repaired JSON using progressive parsing (valid length: {best_valid_length})")
|
logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
|
||||||
|
|
||||||
# Check if we have sections in the result
|
# Check if we have sections in the result
|
||||||
sections = extractSectionsFromDocument(best_result)
|
sections = extractSectionsFromDocument(bestResult)
|
||||||
if sections:
|
if sections:
|
||||||
logger.info(f"Progressive parsing found {len(sections)} sections")
|
logger.info(f"Progressive parsing found {len(sections)} sections")
|
||||||
return best_result
|
return bestResult
|
||||||
else:
|
else:
|
||||||
# No sections found in progressive parsing, try to extract from broken part
|
# No sections found in progressive parsing, try to extract from broken part
|
||||||
logger.info("Progressive parsing found no sections, trying to extract from broken part")
|
logger.info("Progressive parsing found no sections, trying to extract from broken part")
|
||||||
extracted_sections = _extractSectionsRegex(text[best_valid_length:])
|
extractedSections = _extractSectionsRegex(text[bestValidLength:])
|
||||||
if extracted_sections:
|
if extractedSections:
|
||||||
logger.info(f"Extracted {len(extracted_sections)} sections from broken part")
|
logger.info(f"Extracted {len(extractedSections)} sections from broken part")
|
||||||
# Merge with the valid part
|
# Merge with the valid part
|
||||||
if "documents" not in best_result:
|
if "documents" not in bestResult:
|
||||||
best_result["documents"] = []
|
bestResult["documents"] = []
|
||||||
if not best_result["documents"]:
|
if not bestResult["documents"]:
|
||||||
best_result["documents"] = [{"sections": []}]
|
bestResult["documents"] = [{"sections": []}]
|
||||||
best_result["documents"][0]["sections"].extend(extracted_sections)
|
bestResult["documents"][0]["sections"].extend(extractedSections)
|
||||||
return best_result
|
return bestResult
|
||||||
|
|
||||||
# Strategy 3: Structure closing - close incomplete structures
|
# Strategy 3: Structure closing - close incomplete structures
|
||||||
closed_str = _closeJsonStructures(text)
|
closedStr = _closeJsonStructures(text)
|
||||||
obj, err, _ = tryParseJson(closed_str)
|
obj, err, _ = tryParseJson(closedStr)
|
||||||
if err is None and isinstance(obj, dict):
|
if err is None and isinstance(obj, dict):
|
||||||
logger.info("Repaired JSON using structure closing")
|
logger.info("Repaired JSON using structure closing")
|
||||||
return obj
|
return obj
|
||||||
|
|
@ -217,16 +217,16 @@ def _closeJsonStructures(text: str) -> str:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
# Count open/close brackets and braces
|
# Count open/close brackets and braces
|
||||||
open_braces = text.count('{')
|
openBraces = text.count('{')
|
||||||
close_braces = text.count('}')
|
closeBraces = text.count('}')
|
||||||
open_brackets = text.count('[')
|
openBrackets = text.count('[')
|
||||||
close_brackets = text.count(']')
|
closeBrackets = text.count(']')
|
||||||
|
|
||||||
# Close incomplete structures
|
# Close incomplete structures
|
||||||
result = text
|
result = text
|
||||||
for _ in range(open_braces - close_braces):
|
for _ in range(openBraces - closeBraces):
|
||||||
result += '}'
|
result += '}'
|
||||||
for _ in range(open_brackets - close_brackets):
|
for _ in range(openBrackets - closeBrackets):
|
||||||
result += ']'
|
result += ']'
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
@ -242,32 +242,32 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
||||||
sections = []
|
sections = []
|
||||||
|
|
||||||
# Pattern to find section objects
|
# Pattern to find section objects
|
||||||
section_pattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
|
sectionPattern = r'"id"\s*:\s*"(section_\d+)"\s*,?\s*"content_type"\s*:\s*"(\w+)"\s*,?\s*"order"\s*:\s*(\d+)'
|
||||||
|
|
||||||
for match in re.finditer(section_pattern, text, re.IGNORECASE):
|
for match in re.finditer(sectionPattern, text, re.IGNORECASE):
|
||||||
section_id = match.group(1)
|
sectionId = match.group(1)
|
||||||
content_type = match.group(2)
|
contentType = match.group(2)
|
||||||
order = int(match.group(3))
|
order = int(match.group(3))
|
||||||
|
|
||||||
# Try to extract elements array - look for the elements array after this section
|
# Try to extract elements array - look for the elements array after this section
|
||||||
elements_match = re.search(
|
elementsMatch = re.search(
|
||||||
r'"elements"\s*:\s*\[(.*?)\]',
|
r'"elements"\s*:\s*\[(.*?)\]',
|
||||||
text[match.end():match.end()+5000] # Look ahead for elements (large range)
|
text[match.end():match.end()+5000] # Look ahead for elements (large range)
|
||||||
)
|
)
|
||||||
|
|
||||||
elements = []
|
elements = []
|
||||||
if elements_match:
|
if elementsMatch:
|
||||||
try:
|
try:
|
||||||
elements_str = '[' + elements_match.group(1) + ']'
|
elementsStr = '[' + elementsMatch.group(1) + ']'
|
||||||
elements = json.loads(elements_str)
|
elements = json.loads(elementsStr)
|
||||||
except:
|
except:
|
||||||
# If JSON parsing fails, try to extract individual items manually
|
# If JSON parsing fails, try to extract individual items manually
|
||||||
elements_text = elements_match.group(1)
|
elementsText = elementsMatch.group(1)
|
||||||
elements = _extractElementsFromText(elements_text, content_type)
|
elements = _extractElementsFromText(elementsText, contentType)
|
||||||
|
|
||||||
sections.append({
|
sections.append({
|
||||||
"id": section_id,
|
"id": sectionId,
|
||||||
"content_type": content_type,
|
"content_type": contentType,
|
||||||
"elements": elements,
|
"elements": elements,
|
||||||
"order": order
|
"order": order
|
||||||
})
|
})
|
||||||
|
|
@ -279,7 +279,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
|
||||||
return sections
|
return sections
|
||||||
|
|
||||||
|
|
||||||
def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict[str, Any]]:
|
def _extractElementsFromText(elementsText: str, contentType: str) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Extract elements from text when JSON parsing fails.
|
Extract elements from text when JSON parsing fails.
|
||||||
Generic approach that works for any content type.
|
Generic approach that works for any content type.
|
||||||
|
|
@ -290,11 +290,11 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
|
||||||
|
|
||||||
elements = []
|
elements = []
|
||||||
|
|
||||||
if content_type == "list":
|
if contentType == "list":
|
||||||
# Look for {"text": "..."} patterns, including incomplete ones
|
# Look for {"text": "..."} patterns, including incomplete ones
|
||||||
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
|
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
|
||||||
# Also look for incomplete patterns like {"text": "36
|
# Also look for incomplete patterns like {"text": "36
|
||||||
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||||
|
|
||||||
# Combine both complete and incomplete items
|
# Combine both complete and incomplete items
|
||||||
all_items = text_items + incomplete_items
|
all_items = text_items + incomplete_items
|
||||||
|
|
@ -303,41 +303,41 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
|
||||||
|
|
||||||
# Remove the last item if it appears to be incomplete/corrupted
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
if unique_items:
|
if unique_items:
|
||||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||||
|
|
||||||
elements = [{"text": item} for item in unique_items]
|
elements = [{"text": item} for item in unique_items]
|
||||||
|
|
||||||
elif content_type == "paragraph":
|
elif contentType == "paragraph":
|
||||||
# Look for {"text": "..."} patterns, including incomplete ones
|
# Look for {"text": "..."} patterns, including incomplete ones
|
||||||
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elements_text)
|
text_items = re.findall(r'\{"text"\s*:\s*"([^"]*)"\}', elementsText)
|
||||||
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
incomplete_items = re.findall(r'\{"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||||
|
|
||||||
all_items = text_items + incomplete_items
|
all_items = text_items + incomplete_items
|
||||||
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
|
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
|
||||||
|
|
||||||
# Remove the last item if it appears to be incomplete/corrupted
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
if unique_items:
|
if unique_items:
|
||||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||||
|
|
||||||
elements = [{"text": item} for item in unique_items]
|
elements = [{"text": item} for item in unique_items]
|
||||||
|
|
||||||
elif content_type == "heading":
|
elif contentType == "heading":
|
||||||
# Look for {"level": X, "text": "..."} patterns, including incomplete ones
|
# Look for {"level": X, "text": "..."} patterns, including incomplete ones
|
||||||
heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elements_text)
|
heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*)"\}', elementsText)
|
||||||
incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
incomplete_heading_items = re.findall(r'\{"level"\s*:\s*(\d+)\s*,\s*"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||||
|
|
||||||
all_items = heading_items + incomplete_heading_items
|
all_items = heading_items + incomplete_heading_items
|
||||||
unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()]))
|
unique_items = list(dict.fromkeys([(int(level), text) for level, text in all_items if text.strip()]))
|
||||||
|
|
||||||
# Remove the last item if it appears to be incomplete/corrupted
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
if unique_items:
|
if unique_items:
|
||||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||||
|
|
||||||
elements = [{"level": level, "text": text} for level, text in unique_items]
|
elements = [{"level": level, "text": text} for level, text in unique_items]
|
||||||
|
|
||||||
elif content_type == "table":
|
elif contentType == "table":
|
||||||
# Look for table patterns
|
# Look for table patterns
|
||||||
table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elements_text)
|
table_items = re.findall(r'\{"headers"\s*:\s*\[(.*?)\]\s*,\s*"rows"\s*:\s*\[(.*?)\]\s*,\s*"caption"\s*:\s*"([^"]*)"\}', elementsText)
|
||||||
for headers_str, rows_str, caption in table_items:
|
for headers_str, rows_str, caption in table_items:
|
||||||
# Extract headers
|
# Extract headers
|
||||||
headers = re.findall(r'"([^"]+)"', headers_str)
|
headers = re.findall(r'"([^"]+)"', headers_str)
|
||||||
|
|
@ -354,31 +354,31 @@ def _extractElementsFromText(elements_text: str, content_type: str) -> List[Dict
|
||||||
"caption": caption
|
"caption": caption
|
||||||
})
|
})
|
||||||
|
|
||||||
elif content_type == "code":
|
elif contentType == "code":
|
||||||
# Look for {"code": "...", "language": "..."} patterns, including incomplete ones
|
# Look for {"code": "...", "language": "..."} patterns, including incomplete ones
|
||||||
code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elements_text)
|
code_items = re.findall(r'\{"code"\s*:\s*"([^"]*)"\s*,\s*"language"\s*:\s*"([^"]*)"\}', elementsText)
|
||||||
incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
incomplete_code_items = re.findall(r'\{"code"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||||
|
|
||||||
all_items = code_items + [(code, "unknown") for code in incomplete_code_items]
|
all_items = code_items + [(code, "unknown") for code in incomplete_code_items]
|
||||||
unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()]))
|
unique_items = list(dict.fromkeys([(code, lang) for code, lang in all_items if code.strip()]))
|
||||||
|
|
||||||
# Remove the last item if it appears to be incomplete/corrupted
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
if unique_items:
|
if unique_items:
|
||||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||||
|
|
||||||
elements = [{"code": code, "language": lang} for code, lang in unique_items]
|
elements = [{"code": code, "language": lang} for code, lang in unique_items]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Generic fallback - look for any text content, including incomplete
|
# Generic fallback - look for any text content, including incomplete
|
||||||
text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elements_text)
|
text_items = re.findall(r'"text"\s*:\s*"([^"]*)"', elementsText)
|
||||||
incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elements_text)
|
incomplete_text_items = re.findall(r'"text"\s*:\s*"([^"]*?)(?:\n|$)', elementsText)
|
||||||
|
|
||||||
all_items = text_items + incomplete_text_items
|
all_items = text_items + incomplete_text_items
|
||||||
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
|
unique_items = list(dict.fromkeys([item for item in all_items if item.strip()]))
|
||||||
|
|
||||||
# Remove the last item if it appears to be incomplete/corrupted
|
# Remove the last item if it appears to be incomplete/corrupted
|
||||||
if unique_items:
|
if unique_items:
|
||||||
unique_items = _removeLastIncompleteItem(unique_items, elements_text)
|
unique_items = _removeLastIncompleteItem(unique_items, elementsText)
|
||||||
|
|
||||||
elements = [{"text": item} for item in unique_items]
|
elements = [{"text": item} for item in unique_items]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ Ensures all timestamps are properly handled as UTC.
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
import time
|
import time
|
||||||
|
|
||||||
def get_utc_now() -> datetime:
|
def getUtcNow() -> datetime:
|
||||||
"""
|
"""
|
||||||
Get current time in UTC with timezone info.
|
Get current time in UTC with timezone info.
|
||||||
|
|
||||||
|
|
@ -15,7 +15,7 @@ def get_utc_now() -> datetime:
|
||||||
"""
|
"""
|
||||||
return datetime.now(timezone.utc)
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
def get_utc_timestamp() -> float:
|
def getUtcTimestamp() -> float:
|
||||||
"""
|
"""
|
||||||
Get current UTC timestamp (seconds since epoch with millisecond precision).
|
Get current UTC timestamp (seconds since epoch with millisecond precision).
|
||||||
|
|
||||||
|
|
@ -24,14 +24,14 @@ def get_utc_timestamp() -> float:
|
||||||
"""
|
"""
|
||||||
return time.time()
|
return time.time()
|
||||||
|
|
||||||
def create_expiration_timestamp(expires_in_seconds: int) -> float:
|
def createExpirationTimestamp(expiresInSeconds: int) -> float:
|
||||||
"""
|
"""
|
||||||
Create a new expiration timestamp from seconds until expiration.
|
Create a new expiration timestamp from seconds until expiration.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
expires_in_seconds (int): Seconds until expiration
|
expiresInSeconds (int): Seconds until expiration
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: UTC timestamp in seconds
|
float: UTC timestamp in seconds
|
||||||
"""
|
"""
|
||||||
return get_utc_timestamp() + expires_in_seconds
|
return getUtcTimestamp() + expiresInSeconds
|
||||||
|
|
@ -22,13 +22,11 @@ class AdaptiveLearningEngine:
|
||||||
workflowId: str, attemptNumber: int):
|
workflowId: str, attemptNumber: int):
|
||||||
"""Record validation result and learn from it"""
|
"""Record validation result and learn from it"""
|
||||||
try:
|
try:
|
||||||
actionType = actionContext.get('actionType', 'unknown')
|
|
||||||
actionName = actionContext.get('actionName', 'unknown')
|
actionName = actionContext.get('actionName', 'unknown')
|
||||||
|
|
||||||
# Store validation history
|
# Store validation history
|
||||||
validationEntry = {
|
validationEntry = {
|
||||||
'workflowId': workflowId,
|
'workflowId': workflowId,
|
||||||
'actionType': actionType,
|
|
||||||
'actionName': actionName,
|
'actionName': actionName,
|
||||||
'attemptNumber': attemptNumber,
|
'attemptNumber': attemptNumber,
|
||||||
'validationResult': validationResult,
|
'validationResult': validationResult,
|
||||||
|
|
@ -42,17 +40,17 @@ class AdaptiveLearningEngine:
|
||||||
|
|
||||||
# Track patterns
|
# Track patterns
|
||||||
if validationResult.get('overallSuccess', False):
|
if validationResult.get('overallSuccess', False):
|
||||||
self.successPatterns[actionType].append(validationEntry)
|
self.successPatterns[actionName].append(validationEntry)
|
||||||
else:
|
else:
|
||||||
self.failurePatterns[actionType].append(validationEntry)
|
self.failurePatterns[actionName].append(validationEntry)
|
||||||
|
|
||||||
# Update attempt count
|
# Update attempt count
|
||||||
self.actionAttempts[f"{workflowId}:{actionType}"] += 1
|
self.actionAttempts[f"{workflowId}:{actionName}"] += 1
|
||||||
|
|
||||||
# Generate learning insights
|
# Generate learning insights
|
||||||
self._generateLearningInsights(workflowId, actionType)
|
self._generateLearningInsights(workflowId, actionName)
|
||||||
|
|
||||||
logger.info(f"Recorded validation for {actionType} (attempt {attemptNumber}): "
|
logger.info(f"Recorded validation for {actionName} (attempt {attemptNumber}): "
|
||||||
f"Success={validationResult.get('overallSuccess', False)}, "
|
f"Success={validationResult.get('overallSuccess', False)}, "
|
||||||
f"Quality={validationResult.get('qualityScore', 0.0)}")
|
f"Quality={validationResult.get('qualityScore', 0.0)}")
|
||||||
|
|
||||||
|
|
@ -86,21 +84,21 @@ class AdaptiveLearningEngine:
|
||||||
logger.error(f"Error generating adaptive context: {str(e)}")
|
logger.error(f"Error generating adaptive context: {str(e)}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def getAdaptiveContextForParameters(self, workflowId: str, actionType: str,
|
def getAdaptiveContextForParameters(self, workflowId: str, actionName: str,
|
||||||
parametersContext: str) -> Dict[str, Any]:
|
parametersContext: str) -> Dict[str, Any]:
|
||||||
"""Generate adaptive context for parameter selection prompt"""
|
"""Generate adaptive context for parameter selection prompt"""
|
||||||
try:
|
try:
|
||||||
# Get validation history for this specific action type
|
# Get validation history for this specific action name
|
||||||
actionValidations = [
|
actionValidations = [
|
||||||
v for v in self.validationHistory
|
v for v in self.validationHistory
|
||||||
if v['workflowId'] == workflowId and v['actionType'] == actionType
|
if v['workflowId'] == workflowId and v['actionName'] == actionName
|
||||||
][-3:] # Last 3 attempts for this action
|
][-3:] # Last 3 attempts for this action
|
||||||
|
|
||||||
# Analyze what went wrong in previous attempts
|
# Analyze what went wrong in previous attempts
|
||||||
failureAnalysis = self._analyzeParameterFailures(actionValidations)
|
failureAnalysis = self._analyzeParameterFailures(actionValidations)
|
||||||
|
|
||||||
# Generate specific parameter guidance
|
# Generate specific parameter guidance
|
||||||
parameterGuidance = self._generateParameterGuidance(actionType, parametersContext, failureAnalysis)
|
parameterGuidance = self._generateParameterGuidance(actionName, parametersContext, failureAnalysis)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'actionValidations': actionValidations,
|
'actionValidations': actionValidations,
|
||||||
|
|
@ -206,36 +204,28 @@ class AdaptiveLearningEngine:
|
||||||
|
|
||||||
return "\n".join(guidance_parts) if guidance_parts else "No specific guidance available."
|
return "\n".join(guidance_parts) if guidance_parts else "No specific guidance available."
|
||||||
|
|
||||||
def _generateParameterGuidance(self, actionType: str, parametersContext: str,
|
def _generateParameterGuidance(self, actionName: str, parametersContext: str,
|
||||||
failureAnalysis: Dict[str, Any]) -> str:
|
failureAnalysis: Dict[str, Any]) -> str:
|
||||||
"""Generate specific parameter guidance based on previous failures"""
|
"""Generate generic parameter guidance based on previous failures (no app-specific logic)."""
|
||||||
if not failureAnalysis.get('hasFailures', False):
|
if not failureAnalysis.get('hasFailures', False):
|
||||||
return "No previous parameter failures. Use standard parameter values."
|
return "No previous parameter failures. Use standard parameter values."
|
||||||
|
|
||||||
guidance_parts = []
|
guidanceParts = []
|
||||||
|
|
||||||
# Add attempt awareness
|
# Attempt awareness
|
||||||
attemptNumber = failureAnalysis.get('attemptNumber', 1)
|
attemptNumber = failureAnalysis.get('attemptNumber', 1)
|
||||||
if attemptNumber >= 3:
|
if attemptNumber and attemptNumber >= 3:
|
||||||
guidance_parts.append(f"ATTEMPT #{attemptNumber}: Previous attempts failed. Adjust parameters based on validation feedback.")
|
guidanceParts.append(f"Attempt #{attemptNumber}: Adjust parameters based on validation feedback.")
|
||||||
|
|
||||||
# Add specific parameter guidance based on action type
|
# Generic issues summary
|
||||||
if actionType == "outlook.composeAndSendEmailWithContext":
|
commonIssues = failureAnalysis.get('commonIssues', {}) or {}
|
||||||
guidance_parts.append("EMAIL PARAMETER GUIDANCE:")
|
if commonIssues:
|
||||||
guidance_parts.append("- context: Be very specific about account (valueon), appointment time (Friday), and requirements")
|
guidanceParts.append("Address the following parameter issues:")
|
||||||
guidance_parts.append("- emailStyle: Use 'formal' for business emails")
|
for issueKey, issueDesc in commonIssues.items():
|
||||||
guidance_parts.append("- maxLength: Set to 2000+ for detailed emails with summaries")
|
guidanceParts.append(f"- {issueKey}: {issueDesc}")
|
||||||
|
|
||||||
# Add specific guidance based on common failures
|
# Keep guidance format stable
|
||||||
commonIssues = failureAnalysis.get('commonIssues', {})
|
return "\n".join(guidanceParts) if guidanceParts else "Use standard parameter values."
|
||||||
if any("account" in str(issue).lower() for issue in commonIssues.keys()):
|
|
||||||
guidance_parts.append("- context: MUST specify 'from valueon account' explicitly")
|
|
||||||
if any("attachment" in str(issue).lower() for issue in commonIssues.keys()):
|
|
||||||
guidance_parts.append("- documentList: Ensure PDF is properly referenced")
|
|
||||||
if any("summary" in str(issue).lower() for issue in commonIssues.keys()):
|
|
||||||
guidance_parts.append("- context: MUST request '10-12 sentence German summary' explicitly")
|
|
||||||
|
|
||||||
return "\n".join(guidance_parts) if guidance_parts else "Use standard parameter values."
|
|
||||||
|
|
||||||
def _getEscalationLevel(self, workflowId: str) -> str:
|
def _getEscalationLevel(self, workflowId: str) -> str:
|
||||||
"""Determine escalation level based on failure patterns"""
|
"""Determine escalation level based on failure patterns"""
|
||||||
|
|
@ -251,7 +241,7 @@ class AdaptiveLearningEngine:
|
||||||
else:
|
else:
|
||||||
return "low"
|
return "low"
|
||||||
|
|
||||||
def _generateLearningInsights(self, workflowId: str, actionType: str):
|
def _generateLearningInsights(self, workflowId: str, actionName: str):
|
||||||
"""Generate learning insights for a workflow"""
|
"""Generate learning insights for a workflow"""
|
||||||
if workflowId not in self.learningInsights:
|
if workflowId not in self.learningInsights:
|
||||||
self.learningInsights[workflowId] = {}
|
self.learningInsights[workflowId] = {}
|
||||||
|
|
@ -263,7 +253,7 @@ class AdaptiveLearningEngine:
|
||||||
'totalAttempts': len(workflowValidations),
|
'totalAttempts': len(workflowValidations),
|
||||||
'successfulAttempts': len([v for v in workflowValidations if v['success']]),
|
'successfulAttempts': len([v for v in workflowValidations if v['success']]),
|
||||||
'failedAttempts': len([v for v in workflowValidations if not v['success']]),
|
'failedAttempts': len([v for v in workflowValidations if not v['success']]),
|
||||||
'lastActionType': actionType,
|
'lastActionName': actionName,
|
||||||
'escalationLevel': self._getEscalationLevel(workflowId)
|
'escalationLevel': self._getEscalationLevel(workflowId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,14 +26,14 @@ class ContentValidator:
|
||||||
if isinstance(data, dict) and 'content' in data:
|
if isinstance(data, dict) and 'content' in data:
|
||||||
content = data['content']
|
content = data['content']
|
||||||
# For large content, check size before converting to string
|
# For large content, check size before converting to string
|
||||||
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
|
if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
|
||||||
# For very large content, return a size indicator instead
|
# For very large content, return a size indicator instead
|
||||||
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
|
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
|
||||||
return str(content)
|
return str(content)
|
||||||
else:
|
else:
|
||||||
content = data
|
content = data
|
||||||
# For large content, check size before converting to string
|
# For large content, check size before converting to string
|
||||||
if hasattr(content, '__len__') and len(str(content)) > 100000: # 100KB threshold
|
if hasattr(content, '__len__') and len(str(content)) > 10000: # 10KB threshold
|
||||||
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
|
return f"[Large document content - {len(str(content))} characters - truncated for validation]"
|
||||||
return str(content)
|
return str(content)
|
||||||
return ""
|
return ""
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ class IntentAnalyzer:
|
||||||
analysisPrompt = f"""
|
analysisPrompt = f"""
|
||||||
You are an intent analyzer. Analyze the user's request to understand what they want delivered.
|
You are an intent analyzer. Analyze the user's request to understand what they want delivered.
|
||||||
|
|
||||||
USER REQUEST: {self.services.ai.sanitizePromptContent(userPrompt, 'userinput')}
|
USER REQUEST: {self.services.utils.sanitizePromptContent(userPrompt, 'userinput')}
|
||||||
|
|
||||||
CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}
|
CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -571,7 +571,7 @@ class ActionplanMode(BaseMode):
|
||||||
actionData["execParameters"] = {}
|
actionData["execParameters"] = {}
|
||||||
|
|
||||||
# Use generic field separation based on ActionItem model
|
# Use generic field separation based on ActionItem model
|
||||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
|
||||||
|
|
||||||
# Create action in database
|
# Create action in database
|
||||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||||
|
|
@ -715,7 +715,7 @@ class ActionplanMode(BaseMode):
|
||||||
actionData["execParameters"] = {}
|
actionData["execParameters"] = {}
|
||||||
|
|
||||||
# Use generic field separation based on ActionItem model
|
# Use generic field separation based on ActionItem model
|
||||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
|
||||||
|
|
||||||
# Create action in database
|
# Create action in database
|
||||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,12 @@ class ReactMode(BaseMode):
|
||||||
|
|
||||||
# NEW: Add content validation (against original cleaned user prompt / workflow intent)
|
# NEW: Add content validation (against original cleaned user prompt / workflow intent)
|
||||||
if getattr(self, 'workflowIntent', None) and result.documents:
|
if getattr(self, 'workflowIntent', None) and result.documents:
|
||||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent)
|
# Validate ONLY the produced JSON (structured content), not rendered files
|
||||||
|
from types import SimpleNamespace
|
||||||
|
validationDocs = []
|
||||||
|
if hasattr(result, 'content') and result.content:
|
||||||
|
validationDocs.append(SimpleNamespace(documentName='generated.json', documentData={'content': result.content}))
|
||||||
|
validationResult = await self.contentValidator.validateContent(validationDocs, self.workflowIntent)
|
||||||
observation['contentValidation'] = validationResult
|
observation['contentValidation'] = validationResult
|
||||||
quality_score = validationResult.get('qualityScore', 0.0)
|
quality_score = validationResult.get('qualityScore', 0.0)
|
||||||
if quality_score is None:
|
if quality_score is None:
|
||||||
|
|
@ -106,9 +111,9 @@ class ReactMode(BaseMode):
|
||||||
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})")
|
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})")
|
||||||
|
|
||||||
# NEW: Record validation result for adaptive learning
|
# NEW: Record validation result for adaptive learning
|
||||||
|
actionValue = selection.get('action', 'unknown')
|
||||||
actionContext = {
|
actionContext = {
|
||||||
'actionType': selection.get('action', {}).get('action', 'unknown'),
|
'actionName': actionValue,
|
||||||
'actionName': selection.get('action', {}).get('action', 'unknown'),
|
|
||||||
'workflowId': context.workflow_id
|
'workflowId': context.workflow_id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -747,7 +752,7 @@ Return only the user-friendly message, no technical details."""
|
||||||
actionData["execParameters"] = {}
|
actionData["execParameters"] = {}
|
||||||
|
|
||||||
# Use generic field separation based on ActionItem model
|
# Use generic field separation based on ActionItem model
|
||||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
|
||||||
|
|
||||||
# Create action in database
|
# Create action in database
|
||||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||||
|
|
@ -838,7 +843,7 @@ Return only the user-friendly message, no technical details."""
|
||||||
actionData["execParameters"] = {}
|
actionData["execParameters"] = {}
|
||||||
|
|
||||||
# Use generic field separation based on ActionItem model
|
# Use generic field separation based on ActionItem model
|
||||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
simpleFields, objectFields = self.services.interfaceDbChat._separateObjectFields(ActionItem, actionData)
|
||||||
|
|
||||||
# Create action in database
|
# Create action in database
|
||||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||||
|
|
|
||||||
|
|
@ -215,7 +215,7 @@ class WorkflowManager:
|
||||||
" }\n"
|
" }\n"
|
||||||
" ]\n"
|
" ]\n"
|
||||||
"}\n\n"
|
"}\n\n"
|
||||||
f"User message:\n{self.services.ai.sanitizePromptContent(userInput.prompt, 'userinput')}"
|
f"User message:\n{self.services.utils.sanitizePromptContent(userInput.prompt, 'userinput')}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Call AI analyzer (planning call - will use static parameters)
|
# Call AI analyzer (planning call - will use static parameters)
|
||||||
|
|
|
||||||
107
naming_violations_report.csv
Normal file
107
naming_violations_report.csv
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
Module,Function Names,Parameter Names,Variable Names,Total
|
||||||
|
modules/workflows/methods/methodSharepoint.py,0,2,211,213
|
||||||
|
modules/workflows/methods/methodOutlook.py,0,3,131,134
|
||||||
|
modules/services/serviceAi/subDocumentProcessing.py,0,0,104,104
|
||||||
|
modules/features/syncDelta/mainSyncDelta.py,1,10,88,99
|
||||||
|
modules/shared/jsonUtils.py,0,3,88,91
|
||||||
|
modules/services/serviceGeneration/renderers/rendererDocx.py,3,8,79,90
|
||||||
|
modules/services/serviceWorkflow/mainServiceWorkflow.py,0,3,85,88
|
||||||
|
modules/services/serviceGeneration/renderers/rendererPptx.py,2,7,73,82
|
||||||
|
modules/services/serviceGeneration/renderers/rendererPdf.py,3,8,50,61
|
||||||
|
modules/connectors/connectorVoiceGoogle.py,1,2,52,55
|
||||||
|
modules/services/serviceGeneration/renderers/rendererHtml.py,3,6,46,55
|
||||||
|
modules/services/serviceGeneration/renderers/rendererBaseTemplate.py,3,21,27,51
|
||||||
|
modules/shared/configuration.py,2,17,30,49
|
||||||
|
modules/services/serviceExtraction/subMerger.py,2,5,31,38
|
||||||
|
modules/connectors/connectorDbPostgre.py,0,14,20,34
|
||||||
|
modules/interfaces/interfaceDbAppObjects.py,0,8,26,34
|
||||||
|
modules/routes/routeSecurityGoogle.py,0,0,32,32
|
||||||
|
modules/shared/attributeUtils.py,3,4,25,32
|
||||||
|
modules/interfaces/interfaceDbChatObjects.py,0,4,27,31
|
||||||
|
modules/routes/routeSecurityAdmin.py,0,2,28,30
|
||||||
|
modules/services/serviceNeutralization/subProcessList.py,7,0,22,29
|
||||||
|
modules/services/serviceGeneration/renderers/rendererText.py,3,7,19,29
|
||||||
|
modules/routes/routeSecurityMsft.py,0,0,27,27
|
||||||
|
modules/services/serviceGeneration/renderers/rendererMarkdown.py,3,7,17,27
|
||||||
|
modules/services/serviceGeneration/renderers/rendererXlsx.py,3,0,24,27
|
||||||
|
modules/services/serviceGeneration/renderers/rendererImage.py,3,2,21,26
|
||||||
|
modules/security/tokenManager.py,4,7,14,25
|
||||||
|
modules/workflows/workflowManager.py,0,0,25,25
|
||||||
|
modules/services/serviceGeneration/renderers/rendererCsv.py,3,5,17,25
|
||||||
|
modules/shared/auditLogger.py,5,16,3,24
|
||||||
|
modules/shared/debugLogger.py,0,0,24,24
|
||||||
|
modules/workflows/processing/shared/placeholderFactory.py,0,0,24,24
|
||||||
|
modules/interfaces/interfaceDbAppAccess.py,0,2,21,23
|
||||||
|
modules/connectors/connectorTicketsJira.py,0,0,22,22
|
||||||
|
modules/services/serviceGeneration/renderers/registry.py,7,3,12,22
|
||||||
|
modules/routes/routeDataConnections.py,1,1,19,21
|
||||||
|
modules/security/tokenRefreshService.py,0,2,19,21
|
||||||
|
modules/services/serviceExtraction/extractors/extractorPptx.py,0,1,16,17
|
||||||
|
modules/routes/routeSecurityLocal.py,0,0,16,16
|
||||||
|
modules/workflows/methods/methodBase.py,0,4,12,16
|
||||||
|
modules/services/serviceGeneration/mainServiceGeneration.py,0,4,11,15
|
||||||
|
modules/services/serviceUtils/mainServiceUtils.py,0,14,1,15
|
||||||
|
modules/features/neutralizePlayground/mainNeutralizePlayground.py,8,5,2,15
|
||||||
|
modules/interfaces/interfaceTicketObjects.py,0,5,9,14
|
||||||
|
modules/services/serviceNeutralization/subParseString.py,7,0,6,13
|
||||||
|
modules/workflows/processing/modes/modeReact.py,0,1,11,12
|
||||||
|
modules/interfaces/interfaceDbComponentAccess.py,0,2,9,11
|
||||||
|
modules/services/serviceAi/subCoreAi.py,0,0,11,11
|
||||||
|
modules/services/serviceExtraction/subRegistry.py,0,0,11,11
|
||||||
|
modules/services/serviceNeutralization/mainServiceNeutralization.py,0,2,9,11
|
||||||
|
modules/interfaces/interfaceAiObjects.py,0,0,10,10
|
||||||
|
modules/services/serviceAi/subSharedAiUtils.py,0,3,7,10
|
||||||
|
modules/connectors/connectorDbJson.py,0,3,6,9
|
||||||
|
modules/workflows/methods/methodAi.py,0,0,9,9
|
||||||
|
modules/services/serviceExtraction/subPromptBuilderExtraction.py,0,0,9,9
|
||||||
|
modules/services/serviceGeneration/subDocumentUtility.py,0,3,6,9
|
||||||
|
modules/services/serviceNeutralization/subProcessCommon.py,7,2,0,9
|
||||||
|
modules/services/serviceNeutralization/subProcessText.py,5,0,4,9
|
||||||
|
modules/interfaces/interfaceDbChatAccess.py,0,2,6,8
|
||||||
|
modules/security/auth.py,0,1,7,8
|
||||||
|
modules/aicore/aicorePluginAnthropic.py,0,0,7,7
|
||||||
|
modules/security/tokenRefreshMiddleware.py,0,2,4,6
|
||||||
|
modules/services/serviceGeneration/renderers/rendererJson.py,3,0,3,6
|
||||||
|
analyze_naming_violations.py,5,0,0,5
|
||||||
|
modules/aicore/aicorePluginOpenai.py,0,0,5,5
|
||||||
|
modules/routes/routeVoiceGoogle.py,0,0,5,5
|
||||||
|
modules/shared/eventManagement.py,2,3,0,5
|
||||||
|
modules/workflows/processing/adaptive/intentAnalyzer.py,0,0,5,5
|
||||||
|
modules/workflows/processing/shared/executionState.py,0,5,0,5
|
||||||
|
modules/services/serviceGeneration/subJsonSchema.py,0,0,5,5
|
||||||
|
modules/services/serviceNeutralization/subPatterns.py,5,0,0,5
|
||||||
|
modules/services/serviceNeutralization/subProcessBinary.py,4,0,1,5
|
||||||
|
modules/services/serviceExtraction/extractors/extractorXlsx.py,0,0,5,5
|
||||||
|
modules/interfaces/interfaceDbComponentObjects.py,0,3,1,4
|
||||||
|
modules/routes/routeDataNeutralization.py,0,0,4,4
|
||||||
|
modules/routes/routeWorkflows.py,0,0,4,4
|
||||||
|
modules/shared/timezoneUtils.py,3,1,0,4
|
||||||
|
modules/workflows/processing/adaptive/contentValidator.py,0,0,4,4
|
||||||
|
modules/workflows/processing/core/messageCreator.py,0,0,4,4
|
||||||
|
modules/services/serviceSharepoint/mainServiceSharepoint.py,0,0,4,4
|
||||||
|
modules/routes/routeDataUsers.py,0,0,3,3
|
||||||
|
modules/services/serviceExtraction/subPipeline.py,0,0,3,3
|
||||||
|
app.py,0,0,2,2
|
||||||
|
modules/datamodels/datamodelChat.py,0,1,1,2
|
||||||
|
modules/routes/routeAttributes.py,0,0,2,2
|
||||||
|
modules/routes/routeDataPrompts.py,0,0,2,2
|
||||||
|
modules/security/csrf.py,0,1,1,2
|
||||||
|
modules/security/jwtService.py,0,0,2,2
|
||||||
|
modules/workflows/processing/adaptive/learningEngine.py,0,0,2,2
|
||||||
|
modules/workflows/processing/modes/modeActionplan.py,0,0,2,2
|
||||||
|
modules/workflows/processing/shared/methodDiscovery.py,0,0,2,2
|
||||||
|
modules/services/serviceNormalization/mainServiceNormalization.py,0,0,2,2
|
||||||
|
modules/services/serviceExtraction/extractors/extractorImage.py,0,0,2,2
|
||||||
|
modules/aicore/aicoreBase.py,0,0,1,1
|
||||||
|
modules/aicore/aicoreModelSelector.py,0,0,1,1
|
||||||
|
modules/connectors/connectorTicketsClickup.py,0,0,1,1
|
||||||
|
modules/datamodels/datamodelDocument.py,0,1,0,1
|
||||||
|
modules/datamodels/datamodelSecurity.py,0,0,1,1
|
||||||
|
modules/routes/routeAdmin.py,0,0,1,1
|
||||||
|
modules/routes/routeDataFiles.py,0,0,1,1
|
||||||
|
modules/workflows/processing/workflowProcessor.py,0,0,1,1
|
||||||
|
modules/workflows/processing/adaptive/adaptiveLearningEngine.py,0,0,1,1
|
||||||
|
modules/workflows/processing/core/actionExecutor.py,0,0,1,1
|
||||||
|
modules/workflows/processing/core/taskPlanner.py,0,0,1,1
|
||||||
|
modules/workflows/processing/modes/modeBase.py,0,0,1,1
|
||||||
|
modules/services/serviceAi/subDocumentGeneration.py,0,0,1,1
|
||||||
|
184
processDocumentsWithContinuation_usage_analysis.md
Normal file
184
processDocumentsWithContinuation_usage_analysis.md
Normal file
|
|
@ -0,0 +1,184 @@
|
||||||
|
# Analysis: `processDocumentsWithContinuation` and Subfunctions Usage
|
||||||
|
|
||||||
|
## Executive Summary
|
||||||
|
|
||||||
|
**FINDING**: The function `processDocumentsWithContinuation` in `subDocumentProcessing.py` is **NOT USED** anywhere in the active codebase. The continuation chain was only referenced by the deleted `subDocumentGeneration.py` module.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Main Function: `processDocumentsWithContinuation`
|
||||||
|
|
||||||
|
**Location**: `gateway/modules/services/serviceAi/subDocumentProcessing.py:303`
|
||||||
|
|
||||||
|
**Status**: ❌ **NOT USED**
|
||||||
|
|
||||||
|
### Usage Search Results
|
||||||
|
|
||||||
|
- ❌ No actual code calls to `.processDocumentsWithContinuation(`
|
||||||
|
- ⚠️ Only mentioned in documentation files:
|
||||||
|
- `wiki/poweron/appdoc/doc_system_function_relationship_ai.md` (documentation)
|
||||||
|
- `gateway/callAiWithDocumentGeneration_usage_analysis.md` (previous analysis - noted it was called by deleted code)
|
||||||
|
|
||||||
|
### Why It's Not Used
|
||||||
|
|
||||||
|
The only caller was `subDocumentGeneration._processDocumentsUnified()` which we just deleted. The current active codebase uses `subCoreAi.callAiDocuments()` which has its own continuation logic via `_callAiWithLooping()`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Function Call Chain Analysis
|
||||||
|
|
||||||
|
```
|
||||||
|
processDocumentsWithContinuation (line 303) - ❌ NOT USED
|
||||||
|
├─> _buildContinuationPrompt (line 319, 324) - ❌ ONLY USED HERE
|
||||||
|
└─> _processWithContinuationLoop (line 322, 373) - ❌ ONLY USED HERE
|
||||||
|
├─> _buildContinuationIterationPrompt (line 393, 459) - ❌ ONLY USED HERE
|
||||||
|
└─> processDocumentsPerChunkJsonWithPrompt (line 402) - ✅ USED ELSEWHERE
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Subfunction Analysis
|
||||||
|
|
||||||
|
### 1. `_buildContinuationPrompt`
|
||||||
|
**Location**: Line 324-371
|
||||||
|
**Status**: ✅ **USED** (but only internally)
|
||||||
|
**Called by**: `processDocumentsWithContinuation` (line 319)
|
||||||
|
**Effectively**: ❌ **UNUSED** (because parent function is unused)
|
||||||
|
|
||||||
|
**Internal Usage**:
|
||||||
|
- Called from `processDocumentsWithContinuation` at line 319
|
||||||
|
|
||||||
|
**Functionality**:
|
||||||
|
- Builds a prompt with continuation instructions
|
||||||
|
- Adds JSON structure requirements with `"continue": true/false` flag
|
||||||
|
- Adds `continuation_context` field specification
|
||||||
|
|
||||||
|
**Note**: This uses a different continuation pattern than `SubCoreAi._callAiWithLooping()`:
|
||||||
|
- This uses `"continue": true/false + "continuation_context"` for document sections
|
||||||
|
- SubCoreAi uses `buildContinuationContext()` with `last_raw_json`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. `_processWithContinuationLoop`
|
||||||
|
**Location**: Line 373-457
|
||||||
|
**Status**: ✅ **USED** (but only internally)
|
||||||
|
**Called by**: `processDocumentsWithContinuation` (line 322)
|
||||||
|
**Effectively**: ❌ **UNUSED** (because parent function is unused)
|
||||||
|
|
||||||
|
**Internal Usage**:
|
||||||
|
- Called from `processDocumentsWithContinuation` at line 322
|
||||||
|
|
||||||
|
**External Dependencies**:
|
||||||
|
- Calls `self._buildContinuationIterationPrompt()` (line 393)
|
||||||
|
- Calls `self.processDocumentsPerChunkJsonWithPrompt()` (line 402)
|
||||||
|
|
||||||
|
**Functionality**:
|
||||||
|
- Implements continuation loop (max 10 iterations)
|
||||||
|
- Accumulates sections across iterations
|
||||||
|
- Checks `continue` flag and `continuation_context` to determine if more iterations needed
|
||||||
|
- Builds final result with accumulated sections
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. `_buildContinuationIterationPrompt`
|
||||||
|
**Location**: Line 459-498
|
||||||
|
**Status**: ✅ **USED** (but only internally)
|
||||||
|
**Called by**: `_processWithContinuationLoop` (line 393)
|
||||||
|
**Effectively**: ❌ **UNUSED** (because parent chain is unused)
|
||||||
|
|
||||||
|
**Internal Usage**:
|
||||||
|
- Called from `_processWithContinuationLoop` at line 393 (in loop, conditionally)
|
||||||
|
|
||||||
|
**Functionality**:
|
||||||
|
- Builds a prompt for continuation iteration with context
|
||||||
|
- Includes summary of previously generated content (last 3 sections)
|
||||||
|
- Includes continuation instructions with last section ID, element index, remaining requirements
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. `processDocumentsPerChunkJsonWithPrompt`
|
||||||
|
**Location**: Line 219-301
|
||||||
|
**Status**: ✅ **USED ELSEWHERE**
|
||||||
|
**Called by**:
|
||||||
|
- `_processWithContinuationLoop` (line 402)
|
||||||
|
- Also referenced in backup files (not active code)
|
||||||
|
|
||||||
|
**Internal Usage**:
|
||||||
|
- Called from `_processWithContinuationLoop` at line 402
|
||||||
|
|
||||||
|
**External Usage Search**:
|
||||||
|
- ✅ Used internally by continuation loop
|
||||||
|
- ⚠️ Referenced in `local/backup/backup_mainServiceAi.py.txt` (backup file, not active)
|
||||||
|
- ❌ Not used by any other active code
|
||||||
|
|
||||||
|
**Functionality**:
|
||||||
|
- Processes documents with per-chunk AI calls
|
||||||
|
- Uses a custom prompt instead of default extraction prompt
|
||||||
|
- Returns merged JSON document
|
||||||
|
|
||||||
|
**Note**: This function itself is only used by the continuation loop. However, it's a more general function that could be useful, so it's not "dead code" - it's just currently only used by unused code.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Summary Table
|
||||||
|
|
||||||
|
| Function | Line | Status | Called By | Effectively Used? |
|
||||||
|
|----------|------|--------|-----------|-------------------|
|
||||||
|
| `processDocumentsWithContinuation` | 303 | ❌ Not used | (external) | ❌ No |
|
||||||
|
| `_buildContinuationPrompt` | 324 | ✅ Used internally | `processDocumentsWithContinuation:319` | ❌ No |
|
||||||
|
| `_processWithContinuationLoop` | 373 | ✅ Used internally | `processDocumentsWithContinuation:322` | ❌ No |
|
||||||
|
| `_buildContinuationIterationPrompt` | 459 | ✅ Used internally | `_processWithContinuationLoop:393` | ❌ No |
|
||||||
|
| `processDocumentsPerChunkJsonWithPrompt` | 219 | ✅ Used internally | `_processWithContinuationLoop:402` | ⚠️ **ONLY USED BY UNUSED CODE** |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Current Active Implementation
|
||||||
|
|
||||||
|
The active continuation logic is in `subCoreAi.callAiDocuments()` → `_callAiWithLooping()`:
|
||||||
|
- Uses `buildGenerationPrompt()` with `continuationContext` parameter
|
||||||
|
- Uses `buildContinuationContext()` to build context from sections
|
||||||
|
- Different continuation pattern (uses `last_raw_json` instead of `continuation_context`)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dead Code Identification
|
||||||
|
|
||||||
|
**Completely Unused Chain** (can be safely removed):
|
||||||
|
1. ✅ `processDocumentsWithContinuation` - entry point, not called
|
||||||
|
2. ✅ `_buildContinuationPrompt` - only used by #1
|
||||||
|
3. ✅ `_processWithContinuationLoop` - only used by #1
|
||||||
|
4. ✅ `_buildContinuationIterationPrompt` - only used by #3
|
||||||
|
|
||||||
|
**Potentially Unused** (only used by dead code):
|
||||||
|
- ⚠️ `processDocumentsPerChunkJsonWithPrompt` - only caller is dead code, but function is general-purpose
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recommendations
|
||||||
|
|
||||||
|
1. **Remove Dead Code Chain**: All four functions (`processDocumentsWithContinuation`, `_buildContinuationPrompt`, `_processWithContinuationLoop`, `_buildContinuationIterationPrompt`) can be safely removed.
|
||||||
|
|
||||||
|
2. **For `processDocumentsPerChunkJsonWithPrompt`**:
|
||||||
|
- **Option A**: Remove if not needed (it's only used by the dead continuation chain)
|
||||||
|
- **Option B**: Keep if it might be useful for future custom prompt processing
|
||||||
|
- **Recommendation**: Since it's a general-purpose function that could be useful, keep it but note that it's currently unused.
|
||||||
|
|
||||||
|
3. **If Keeping**: Document why this continuation logic exists but is unused, or mark as deprecated/legacy alternative to `_callAiWithLooping()`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Verification Commands
|
||||||
|
|
||||||
|
To verify these findings:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Search for actual function calls (should return no results for the main function)
|
||||||
|
grep -r "\.processDocumentsWithContinuation(" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup
|
||||||
|
|
||||||
|
# Search for _buildContinuationPrompt usage (should only find the definition)
|
||||||
|
grep -r "_buildContinuationPrompt" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
|
||||||
|
|
||||||
|
# Search for _processWithContinuationLoop usage (should only find the definition)
|
||||||
|
grep -r "_processWithContinuationLoop" gateway/ --exclude-dir=wiki --exclude-dir=local --exclude-dir=backup --exclude="*.md"
|
||||||
|
```
|
||||||
|
|
||||||
|
|
@ -39,7 +39,7 @@ else:
|
||||||
|
|
||||||
# Import encryption functions
|
# Import encryption functions
|
||||||
try:
|
try:
|
||||||
from modules.shared.configuration import encrypt_value
|
from modules.shared.configuration import encryptValue
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
print(f"Error: Could not import encryption functions from shared.configuration: {e}")
|
print(f"Error: Could not import encryption functions from shared.configuration: {e}")
|
||||||
print(f"Make sure you're running this script from the gateway directory")
|
print(f"Make sure you're running this script from the gateway directory")
|
||||||
|
|
@ -254,7 +254,7 @@ def encrypt_all_secrets_in_file(file_path: Path, dry_run: bool = False, create_b
|
||||||
print(f" 🔐 Encrypting {key}...")
|
print(f" 🔐 Encrypting {key}...")
|
||||||
|
|
||||||
# Encrypt the value using the environment type from the file
|
# Encrypt the value using the environment type from the file
|
||||||
encrypted_value = encrypt_value(value, file_env_type)
|
encrypted_value = encryptValue(value, file_env_type)
|
||||||
|
|
||||||
# Replace the line in the file content
|
# Replace the line in the file content
|
||||||
new_line = f"{key} = {encrypted_value}\n"
|
new_line = f"{key} = {encrypted_value}\n"
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ from datetime import datetime
|
||||||
# Add the modules directory to the Python path
|
# Add the modules directory to the Python path
|
||||||
sys.path.insert(0, str(Path(__file__).parent / 'modules'))
|
sys.path.insert(0, str(Path(__file__).parent / 'modules'))
|
||||||
|
|
||||||
from shared.configuration import encrypt_value, decrypt_value, _is_encrypted_value
|
from modules.shared.configuration import encryptValue, decryptValue, isEncryptedValue
|
||||||
|
|
||||||
def get_env_type_from_file(file_path: Path) -> str:
|
def get_env_type_from_file(file_path: Path) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -247,7 +247,7 @@ def encrypt_all_secrets_in_file(file_path: Path, env_type: str, dry_run: bool =
|
||||||
print(f" 🔐 Encrypting {key}...")
|
print(f" 🔐 Encrypting {key}...")
|
||||||
|
|
||||||
# Encrypt the value using the environment type from the file
|
# Encrypt the value using the environment type from the file
|
||||||
encrypted_value = encrypt_value(value, file_env_type)
|
encrypted_value = encryptValue(value, file_env_type)
|
||||||
|
|
||||||
# Replace the line in the file content
|
# Replace the line in the file content
|
||||||
new_line = f"{key} = {encrypted_value}\n"
|
new_line = f"{key} = {encrypted_value}\n"
|
||||||
|
|
@ -360,8 +360,8 @@ def main():
|
||||||
|
|
||||||
# Handle decryption
|
# Handle decryption
|
||||||
if args.decrypt:
|
if args.decrypt:
|
||||||
if _is_encrypted_value(args.decrypt):
|
if isEncryptedValue(args.decrypt):
|
||||||
decrypted = decrypt_value(args.decrypt)
|
decrypted = decryptValue(args.decrypt)
|
||||||
print(f"Decrypted value: {decrypted}")
|
print(f"Decrypted value: {decrypted}")
|
||||||
else:
|
else:
|
||||||
print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)")
|
print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)")
|
||||||
|
|
@ -411,7 +411,7 @@ def main():
|
||||||
return
|
return
|
||||||
|
|
||||||
# Encrypt the value
|
# Encrypt the value
|
||||||
encrypted_value = encrypt_value(value_to_encrypt, args.env)
|
encrypted_value = encryptValue(value_to_encrypt, args.env)
|
||||||
|
|
||||||
print(f"\n✓ Encryption successful!")
|
print(f"\n✓ Encryption successful!")
|
||||||
print(f"Environment: {args.env or 'current'}")
|
print(f"Environment: {args.env or 'current'}")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue