From 64d1c083e02272c61d809ceae56c5aeb076e3b7e Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sat, 26 Apr 2025 02:13:22 +0200
Subject: [PATCH] mvp 1.2 ready for single test
---
_SAVE_app copy.py | 194 +
app.py | 164 +-
connectors/_SAVE_connectorDbJson copy.py | 546 ++
...t_anthropic.py => connectorAiAnthropic.py} | 132 +-
..._aichat_openai.py => connectorAiOpenai.py} | 82 +-
connectors/connectorDbJson.py | 561 ++
connectors/connector_db_json.py | 557 --
env_dev.env | 2 +-
env_prod.env | 2 +-
modules/_SAVE_gatewayInterface copy.py | 261 +
...{chat_agent_analyst.py => agentAnalyst.py} | 440 +-
.../{chat_agent_coder.py => agentCoder.py} | 347 +-
...documentation.py => agentDocumentation.py} | 370 +-
...agent_webcrawler.py => agentWebcrawler.py} | 484 +-
modules/auth.py | 99 +-
modules/chat.py | 1235 ----
modules/chat_content_extraction.py | 778 ---
modules/configuration.py | 86 +-
.../{def_attributes.py => defAttributes.py} | 60 +-
modules/documentProcessor.py | 887 +++
modules/gatewayInterface.py | 471 ++
modules/{gateway_model.py => gatewayModel.py} | 44 +-
modules/gateway_interface.py | 458 --
...cydom_interface.py => lucydomInterface.py} | 922 +--
modules/{lucydom_model.py => lucydomModel.py} | 100 +-
..._registry.py => workflowAgentsRegistry.py} | 132 +-
modules/workflowManager.py | 1236 ++++
notes/changelog.txt | 191 +-
notes/{frontend.md => doc_frontend.md} | 0
notes/doc_statemachine_backend.md | 366 ++
notes/doc_statemachine_frontend.md | 453 ++
routes/attributes.py | 74 -
routes/mandates.py | 216 -
routes/prompts.py | 188 -
routes/routeAttributes.py | 74 +
routes/{files.py => routeFiles.py} | 167 +-
routes/routeMandates.py | 199 +
routes/routePrompts.py | 160 +
routes/routeUsers.py | 249 +
routes/routeWorkflows.py | 667 ++
routes/users.py | 265 -
routes/workflows.py | 371 --
static/1_test_document.txt | 10 +
static/2_test_image.png | Bin 0 -> 287 bytes
static/3_q1_sales_chart.svg | 31 +
static/44_LF-Details.png | Bin 253009 -> 0 bytes
static/45_LF-Details_summary.txt | 27 -
static/46_LF-Details_Summary.txt | 30 -
static/47_prompt_a1.txt | 1 -
static/48_short_story_french.txt | 138 -
static/49_bedtime_story_french.txt | 147 -
static/4_q1_sales_data.md | 19 +
static/50_bedtime_story_french.txt | 101 -
static/51_short_story_french.txt | 177 -
static/52_french_childrens_story.txt | 244 -
static/53_animal_story_fr.txt | 166 -
static/54_animal_story_french.txt | 163 -
static/55_children_story.txt | 204 -
static/56_children_story.txt | 198 -
static/57_children_story.txt | 227 -
static/58_kinder_geschichte_ueber_fische.txt | 120 -
static/59_bean_recipe.txt | 213 -
static/5_sales_trends_analysis.txt | 75 +
static/60_triage_definition.txt | 37 -
static/61_goal_definition.txt | 37 -
static/62_handling_definition.txt | 44 -
static/63_triangle_definition.txt | 45 -
static/64_translation_result.txt | 37 -
static/65_definition_greedy.txt | 39 -
static/66_workflow_logs.json | 5822 -----------------
static/67_grid_definition.txt | 44 -
static/68_summary.txt | 152 -
static/69_nacht_gedicht.txt | 228 -
static/6_sales_trends_analysis.txt | 71 +
static/70_generated_code.py | 33 -
static/71_execution_history.json | 13 -
static/72_generated_code.py | 39 -
static/73_execution_history.json | 13 -
static/74_generated_code.py | 33 -
static/75_execution_history.json | 13 -
static/76_generated_code.py | 42 -
static/77_execution_history.json | 13 -
static/78_generated_code.py | 39 -
static/79_execution_history.json | 13 -
static/7_q2_forecast.svg | 1 +
static/8_forecast_apr_jun.svg | 1 +
test_workflow1.py | 230 -
testcode.py | 66 -
stats.py => tool_getStats.py | 0
tool_testBackendSingle.py | 432 ++
tool_testData.py | 1064 +++
tool_testUser.py | 244 +
92 files changed, 10279 insertions(+), 15147 deletions(-)
create mode 100644 _SAVE_app copy.py
create mode 100644 connectors/_SAVE_connectorDbJson copy.py
rename connectors/{connector_aichat_anthropic.py => connectorAiAnthropic.py} (59%)
rename connectors/{connector_aichat_openai.py => connectorAiOpenai.py} (59%)
create mode 100644 connectors/connectorDbJson.py
delete mode 100644 connectors/connector_db_json.py
create mode 100644 modules/_SAVE_gatewayInterface copy.py
rename modules/{chat_agent_analyst.py => agentAnalyst.py} (56%)
rename modules/{chat_agent_coder.py => agentCoder.py} (61%)
rename modules/{chat_agent_documentation.py => agentDocumentation.py} (53%)
rename modules/{chat_agent_webcrawler.py => agentWebcrawler.py} (56%)
delete mode 100644 modules/chat.py
delete mode 100644 modules/chat_content_extraction.py
rename modules/{def_attributes.py => defAttributes.py} (64%)
create mode 100644 modules/documentProcessor.py
create mode 100644 modules/gatewayInterface.py
rename modules/{gateway_model.py => gatewayModel.py} (68%)
delete mode 100644 modules/gateway_interface.py
rename modules/{lucydom_interface.py => lucydomInterface.py} (50%)
rename modules/{lucydom_model.py => lucydomModel.py} (54%)
rename modules/{chat_registry.py => workflowAgentsRegistry.py} (58%)
create mode 100644 modules/workflowManager.py
rename notes/{frontend.md => doc_frontend.md} (100%)
create mode 100644 notes/doc_statemachine_backend.md
create mode 100644 notes/doc_statemachine_frontend.md
delete mode 100644 routes/attributes.py
delete mode 100644 routes/mandates.py
delete mode 100644 routes/prompts.py
create mode 100644 routes/routeAttributes.py
rename routes/{files.py => routeFiles.py} (56%)
create mode 100644 routes/routeMandates.py
create mode 100644 routes/routePrompts.py
create mode 100644 routes/routeUsers.py
create mode 100644 routes/routeWorkflows.py
delete mode 100644 routes/users.py
delete mode 100644 routes/workflows.py
create mode 100644 static/1_test_document.txt
create mode 100644 static/2_test_image.png
create mode 100644 static/3_q1_sales_chart.svg
delete mode 100644 static/44_LF-Details.png
delete mode 100644 static/45_LF-Details_summary.txt
delete mode 100644 static/46_LF-Details_Summary.txt
delete mode 100644 static/47_prompt_a1.txt
delete mode 100644 static/48_short_story_french.txt
delete mode 100644 static/49_bedtime_story_french.txt
create mode 100644 static/4_q1_sales_data.md
delete mode 100644 static/50_bedtime_story_french.txt
delete mode 100644 static/51_short_story_french.txt
delete mode 100644 static/52_french_childrens_story.txt
delete mode 100644 static/53_animal_story_fr.txt
delete mode 100644 static/54_animal_story_french.txt
delete mode 100644 static/55_children_story.txt
delete mode 100644 static/56_children_story.txt
delete mode 100644 static/57_children_story.txt
delete mode 100644 static/58_kinder_geschichte_ueber_fische.txt
delete mode 100644 static/59_bean_recipe.txt
create mode 100644 static/5_sales_trends_analysis.txt
delete mode 100644 static/60_triage_definition.txt
delete mode 100644 static/61_goal_definition.txt
delete mode 100644 static/62_handling_definition.txt
delete mode 100644 static/63_triangle_definition.txt
delete mode 100644 static/64_translation_result.txt
delete mode 100644 static/65_definition_greedy.txt
delete mode 100644 static/66_workflow_logs.json
delete mode 100644 static/67_grid_definition.txt
delete mode 100644 static/68_summary.txt
delete mode 100644 static/69_nacht_gedicht.txt
create mode 100644 static/6_sales_trends_analysis.txt
delete mode 100644 static/70_generated_code.py
delete mode 100644 static/71_execution_history.json
delete mode 100644 static/72_generated_code.py
delete mode 100644 static/73_execution_history.json
delete mode 100644 static/74_generated_code.py
delete mode 100644 static/75_execution_history.json
delete mode 100644 static/76_generated_code.py
delete mode 100644 static/77_execution_history.json
delete mode 100644 static/78_generated_code.py
delete mode 100644 static/79_execution_history.json
create mode 100644 static/7_q2_forecast.svg
create mode 100644 static/8_forecast_apr_jun.svg
delete mode 100644 test_workflow1.py
delete mode 100644 testcode.py
rename stats.py => tool_getStats.py (100%)
create mode 100644 tool_testBackendSingle.py
create mode 100644 tool_testData.py
create mode 100644 tool_testUser.py
diff --git a/_SAVE_app copy.py b/_SAVE_app copy.py
new file mode 100644
index 00000000..9b8b6fed
--- /dev/null
+++ b/_SAVE_app copy.py
@@ -0,0 +1,194 @@
+import os
+os.environ["NUMEXPR_MAX_THREADS"] = "12"
+
+from fastapi import FastAPI, HTTPException, Depends, Body, status, Response
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, FileResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.security import OAuth2PasswordRequestForm
+from contextlib import asynccontextmanager
+
+from typing import Dict, Any
+import logging
+from logging.handlers import RotatingFileHandler
+from datetime import timedelta
+import pathlib
+
+from modules.configuration import APP_CONFIG
+
+# Import auth module
+from modules.auth import (
+ createAccessToken,
+ getCurrentActiveUser,
+ getUserContext,
+ ACCESS_TOKEN_EXPIRE_MINUTES
+)
+
+# Import models - import generically for INITIALIZATION
+import modules.gatewayModel as gatewayModel
+from modules.gatewayInterface import getGatewayInterface
+
+def initLogging():
+ # Get log level from config (default to INFO if not found)
+ logLevelName = APP_CONFIG.get("Logging_LOG_LEVEL", "WARNING")
+ logLevel = getattr(logging, logLevelName)
+
+ # Configure handlers based on config
+ handlers = []
+
+ # Add console handler if enabled
+ if APP_CONFIG.get("Logging_CONSOLE_ENABLED", True):
+ consoleHandler = logging.StreamHandler()
+ handlers.append(consoleHandler)
+
+ # Add file handler if enabled
+ if APP_CONFIG.get("Logging_FILE_ENABLED", True):
+ logFile = APP_CONFIG.get("Logging_LOG_FILE", "app.log")
+ rotationSize = int(APP_CONFIG.get("Logging_ROTATION_SIZE", 10485760)) # Default: 10MB
+ backupCount = int(APP_CONFIG.get("Logging_BACKUP_COUNT", 5))
+
+ fileHandler = RotatingFileHandler(
+ logFile,
+ maxBytes=rotationSize,
+ backupCount=backupCount
+ )
+ handlers.append(fileHandler)
+
+ # Configure the logger
+ logging.basicConfig(
+ level=logLevel,
+ format=APP_CONFIG.get("Logging_FORMAT", "%(asctime)s - %(levelname)s - %(name)s - %(message)s"),
+ datefmt=APP_CONFIG.get("Logging_DATE_FORMAT", "%Y-%m-%d %H:%M:%S"),
+ handlers=handlers
+ )
+
+ # Silence noisy third-party libraries - use the same level as the root logger
+ noisyLoggers = ["httpx", "urllib3", "asyncio", "fastapi.security.oauth2"]
+ for loggerName in noisyLoggers:
+ logging.getLogger(loggerName).setLevel(logLevel)
+
+
+# Initialize logging
+initLogging()
+logger = logging.getLogger(__name__)
+instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
+
+# Define lifespan context manager for application startup/shutdown events
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+ # Startup logic (if any)
+ logger.info("Application is starting up")
+ yield
+ # Shutdown logic
+ logger.info("Application has been shut down")
+
+# Parse CORS origins from environment variable
+def get_allowed_origins():
+ origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
+ # Split by comma and strip whitespace
+ origins = [origin.strip() for origin in origins_str.split(",")]
+ logger.info(f"CORS allowed origins: {origins}")
+ return origins
+
+# START APP
+app = FastAPI(
+ title="PowerOn | Data Platform API",
+ description=f"Backend API for the Multi-Agent Platform by ValueOn AG ({instanceLabel})",
+ lifespan=lifespan
+)
+
+# CORS configuration using environment variables
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=get_allowed_origins(),
+ allow_credentials=True,
+ allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
+ allow_headers=["*"],
+ expose_headers=["*"],
+ max_age=86400 # Increased caching for preflight requests
+)
+
+# Static folder for frontend - work with absolute path
+baseDir = pathlib.Path(__file__).parent
+staticFolder = baseDir / "static"
+os.makedirs(staticFolder, exist_ok=True)
+app.mount("/static", StaticFiles(directory=str(staticFolder)), name="static")
+
+# General Elements
+@app.get("/", tags=["General"])
+async def root():
+ """API status endpoint"""
+ return {"status": "online", "message": "Data Platform API is active"}
+
+@app.get("/api/test", tags=["General"])
+async def getTest():
+ return "OK 1.5"
+
+@app.options("/{fullPath:path}", tags=["General"])
+async def optionsRoute(fullPath: str):
+ return Response(status_code=200)
+
+@app.get("/api/environment", tags=["General"])
+async def get_environment():
+ """Get environment configuration for frontend"""
+ return {
+ "apiBaseUrl": APP_CONFIG.get("APP_API_URL", ""),
+ "environment": APP_CONFIG.get("APP_ENV", "development"),
+ "instanceLabel": APP_CONFIG.get("APP_ENV_LABEL", "Development"),
+ # Add other environment variables the frontend might need
+ }
+
+# Token endpoint for login
+@app.post("/api/token", response_model=gatewayModel.Token, tags=["General"])
+async def loginForAccessToken(formData: OAuth2PasswordRequestForm = Depends()):
+ # Initialize Gateway interface without context
+ gateway = getGatewayInterface()
+
+ # Authenticate user
+ user = gateway.authenticateUser(formData.username, formData.password)
+
+ if not user:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid username or password",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
+
+ # Create token with tenant ID
+ accessTokenExpires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+ accessToken = createAccessToken(
+ data={
+ "sub": user["username"],
+ "mandateId": user["mandateId"]
+ },
+ expiresDelta=accessTokenExpires
+ )
+
+ return {"accessToken": accessToken, "tokenType": "bearer"}
+
+# Get user info
+@app.get("/api/user/me", response_model=Dict[str, Any], tags=["General"])
+async def readUserMe(currentUser: Dict[str, Any] = Depends(getCurrentActiveUser)):
+ return currentUser
+
+# Include all routers
+from routes.routeAttributes import router as attributesRouter
+app.include_router(attributesRouter)
+
+from routes.routeMandates import router as mandateRouter
+app.include_router(mandateRouter)
+
+from routes.routeUsers import router as userRouter
+app.include_router(userRouter)
+
+from routes.routeFiles import router as fileRouter
+app.include_router(fileRouter)
+
+from routes.routePrompts import router as promptRouter
+app.include_router(promptRouter)
+
+from routes.routeWorkflows import router as workflowRouter
+app.include_router(workflowRouter)
+
+#if __name__ == "__main__":
+# uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
\ No newline at end of file
diff --git a/app.py b/app.py
index dbed6902..d42012fe 100644
--- a/app.py
+++ b/app.py
@@ -8,7 +8,6 @@ from fastapi.staticfiles import StaticFiles
from fastapi.security import OAuth2PasswordRequestForm
from contextlib import asynccontextmanager
-import uvicorn
from typing import Dict, Any
import logging
from logging.handlers import RotatingFileHandler
@@ -16,66 +15,65 @@ from datetime import timedelta
import pathlib
from modules.configuration import APP_CONFIG
-from modules.gateway_interface import get_gateway_interface
-# Import auth module
-from modules.auth import (
- create_access_token,
- get_current_active_user,
- get_user_context,
- ACCESS_TOKEN_EXPIRE_MINUTES
-)
-
-# Import models - import generically for INITIALIZATION, even if dummy!
-import modules.gateway_model as gateway_model
-
-#from modules.lucydom_interface import get_lucydom_interface as dom_interface
-
-
-def init_logging():
+def initLogging():
# Get log level from config (default to INFO if not found)
- log_level_name = APP_CONFIG.get("Logging_LOG_LEVEL", "WARNING")
- log_level = getattr(logging, log_level_name)
+ logLevelName = APP_CONFIG.get("Logging_LOG_LEVEL", "WARNING")
+ logLevel = getattr(logging, logLevelName)
# Configure handlers based on config
handlers = []
# Add console handler if enabled
if APP_CONFIG.get("Logging_CONSOLE_ENABLED", True):
- console_handler = logging.StreamHandler()
- handlers.append(console_handler)
+ consoleHandler = logging.StreamHandler()
+ handlers.append(consoleHandler)
# Add file handler if enabled
if APP_CONFIG.get("Logging_FILE_ENABLED", True):
- log_file = APP_CONFIG.get("Logging_LOG_FILE", "app.log")
- rotation_size = int(APP_CONFIG.get("Logging_ROTATION_SIZE", 10485760)) # Default: 10MB
- backup_count = int(APP_CONFIG.get("Logging_BACKUP_COUNT", 5))
+ logFile = APP_CONFIG.get("Logging_LOG_FILE", "app.log")
+ rotationSize = int(APP_CONFIG.get("Logging_ROTATION_SIZE", 10485760)) # Default: 10MB
+ backupCount = int(APP_CONFIG.get("Logging_BACKUP_COUNT", 5))
- file_handler = RotatingFileHandler(
- log_file,
- maxBytes=rotation_size,
- backupCount=backup_count
+ fileHandler = RotatingFileHandler(
+ logFile,
+ maxBytes=rotationSize,
+ backupCount=backupCount
)
- handlers.append(file_handler)
+ handlers.append(fileHandler)
# Configure the logger
logging.basicConfig(
- level=log_level,
+ level=logLevel,
format=APP_CONFIG.get("Logging_FORMAT", "%(asctime)s - %(levelname)s - %(name)s - %(message)s"),
datefmt=APP_CONFIG.get("Logging_DATE_FORMAT", "%Y-%m-%d %H:%M:%S"),
handlers=handlers
)
# Silence noisy third-party libraries - use the same level as the root logger
- noisy_loggers = ["httpx", "urllib3", "asyncio", "fastapi.security.oauth2"]
- for logger_name in noisy_loggers:
- logging.getLogger(logger_name).setLevel(log_level)
+ noisyLoggers = ["httpx", "urllib3", "asyncio", "fastapi.security.oauth2"]
+ for loggerName in noisyLoggers:
+ logging.getLogger(loggerName).setLevel(logLevel)
# Initialize logging
-init_logging()
+initLogging()
logger = logging.getLogger(__name__)
-instance_label = APP_CONFIG.get("APP_ENV_LABEL")
+instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
+
+# Import auth module
+from modules.auth import (
+ createAccessToken,
+ getCurrentActiveUser,
+ getUserContext,
+ ACCESS_TOKEN_EXPIRE_MINUTES
+)
+
+# Import models - import generically for INITIALIZATION
+import modules.gatewayModel as gatewayModel
+from modules.gatewayInterface import getGatewayInterface
+gateway = getGatewayInterface()
+
# Define lifespan context manager for application startup/shutdown events
@asynccontextmanager
@@ -86,17 +84,25 @@ async def lifespan(app: FastAPI):
# Shutdown logic
logger.info("Application has been shut down")
+# Parse CORS origins from environment variable
+def get_allowed_origins():
+ origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
+ # Split by comma and strip whitespace
+ origins = [origin.strip() for origin in origins_str.split(",")]
+ logger.info(f"CORS allowed origins: {origins}")
+ return origins
+
# START APP
app = FastAPI(
title="PowerOn | Data Platform API",
- description=f"Backend API for the Multi-Agent Platform by ValueOn AG ({instance_label})",
+ description=f"Backend API for the Multi-Agent Platform by ValueOn AG ({instanceLabel})",
lifespan=lifespan
)
-# CORS configuration for frontend requests
+# CORS configuration using environment variables
app.add_middleware(
CORSMiddleware,
- allow_origins=["http://localhost:8080","https://poweron-lucyagents-xxx.germanywestcentral-01.azurewebsites.net"],
+ allow_origins=get_allowed_origins(), # ["http://localhost:8080","http://localhost:8081"], #get_allowed_origins(),
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allow_headers=["*"],
@@ -105,16 +111,10 @@ app.add_middleware(
)
# Static folder for frontend - work with absolute path
-base_dir = pathlib.Path(__file__).parent
-static_folder = base_dir / "static"
-os.makedirs(static_folder, exist_ok=True)
-app.mount("/static", StaticFiles(directory=str(static_folder)), name="static")
-
-# Add a specific route for favicon.ico
-@app.get("/favicon.ico", include_in_schema=False)
-async def favicon():
- favicon_path = static_folder / "favicon.ico"
- return FileResponse(str(favicon_path))
+baseDir = pathlib.Path(__file__).parent
+staticFolder = baseDir / "static"
+os.makedirs(staticFolder, exist_ok=True)
+app.mount("/static", StaticFiles(directory=str(staticFolder)), name="static")
# General Elements
@app.get("/", tags=["General"])
@@ -123,22 +123,31 @@ async def root():
return {"status": "online", "message": "Data Platform API is active"}
@app.get("/api/test", tags=["General"])
-async def get_test():
+async def getTest():
return "OK 1.5"
-@app.options("/{full_path:path}", tags=["General"])
-async def options_route(full_path: str):
+@app.options("/{fullPath:path}", tags=["General"])
+async def optionsRoute(fullPath: str):
return Response(status_code=200)
+@app.get("/api/environment", tags=["General"])
+async def get_environment():
+ """Get environment configuration for frontend"""
+ return {
+ "apiBaseUrl": APP_CONFIG.get("APP_API_URL", ""),
+ "environment": APP_CONFIG.get("APP_ENV", "development"),
+ "instanceLabel": APP_CONFIG.get("APP_ENV_LABEL", "Development"),
+ # Add other environment variables the frontend might need
+ }
# Token endpoint for login
-@app.post("/api/token", response_model=gateway_model.Token, tags=["General"])
-async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()):
+@app.post("/api/token", response_model=gatewayModel.Token, tags=["General"])
+async def loginForAccessToken(formData: OAuth2PasswordRequestForm = Depends()):
# Initialize Gateway interface without context
- gateway = get_gateway_interface()
-
+ gateway = getGatewayInterface()
+
# Authenticate user
- user = gateway.authenticate_user(form_data.username, form_data.password)
+ user = gateway.authenticateUser(formData.username, formData.password)
if not user:
raise HTTPException(
@@ -148,40 +157,45 @@ async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends(
)
# Create token with tenant ID
- access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
- access_token = create_access_token(
+ accessTokenExpires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+ accessToken = createAccessToken(
data={
"sub": user["username"],
- "mandate_id": user["mandate_id"]
+ "mandateId": user["mandateId"]
},
- expires_delta=access_token_expires
+ expiresDelta=accessTokenExpires
)
- return {"access_token": access_token, "token_type": "bearer"}
+ return {"accessToken": accessToken, "tokenType": "bearer"}
# Get user info
@app.get("/api/user/me", response_model=Dict[str, Any], tags=["General"])
-async def read_user_me(current_user: Dict[str, Any] = Depends(get_current_active_user)):
- return current_user
+async def readUserMe(currentUser: Dict[str, Any] = Depends(getCurrentActiveUser)):
+ return currentUser
# Include all routers
-from routes.attributes import router as attributes_router
-app.include_router(attributes_router)
+from routes.routeAttributes import router as attributesRouter
+app.include_router(attributesRouter)
-from routes.mandates import router as mandate_router
-app.include_router(mandate_router)
+gateway = getGatewayInterface()
-from routes.users import router as user_router
-app.include_router(user_router)
+from routes.routeMandates import router as mandateRouter
+app.include_router(mandateRouter)
-from routes.files import router as file_router
-app.include_router(file_router)
+gateway = getGatewayInterface()
-from routes.prompts import router as prompt_router
-app.include_router(prompt_router)
-from routes.workflows import router as workflow_router
-app.include_router(workflow_router)
+from routes.routeUsers import router as userRouter
+app.include_router(userRouter)
+
+from routes.routeFiles import router as fileRouter
+app.include_router(fileRouter)
+
+from routes.routePrompts import router as promptRouter
+app.include_router(promptRouter)
+
+from routes.routeWorkflows import router as workflowRouter
+app.include_router(workflowRouter)
#if __name__ == "__main__":
# uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
\ No newline at end of file
diff --git a/connectors/_SAVE_connectorDbJson copy.py b/connectors/_SAVE_connectorDbJson copy.py
new file mode 100644
index 00000000..c92c4074
--- /dev/null
+++ b/connectors/_SAVE_connectorDbJson copy.py
@@ -0,0 +1,546 @@
+import json
+import os
+from typing import List, Dict, Any, Optional, Union
+import logging
+
+logger = logging.getLogger(__name__)
+
+class DatabaseConnector:
+ """
+ A connector for JSON-based data storage.
+ Provides generic database operations with tenant and user context support.
+ """
+ def __init__(self, dbHost: str, dbDatabase: str, dbUser: str = None, dbPassword: str = None, mandateId: int = None, userId: int = None):
+ """
+ Initializes the JSON database connector.
+
+ Args:
+ dbHost: Directory for the JSON files
+ dbDatabase: Database name
+ dbUser: Username for authentication (optional)
+ dbPassword: API key for authentication (optional)
+ mandateId: Context parameter for the tenant
+ userId: Context parameter for the user
+ """
+ # Store the input parameters
+ self.dbHost = dbHost
+ self.dbDatabase = dbDatabase
+ self.dbUser = dbUser
+ self.dbPassword = dbPassword
+
+ # Check if context parameters are set
+ if mandateId is None or userId is None:
+ raise ValueError("mandateId and userId must be set")
+
+ # Ensure the database directory exists
+ self.dbFolder = os.path.join(self.dbHost, self.dbDatabase)
+ os.makedirs(self.dbFolder, exist_ok=True)
+
+ # Cache for loaded data
+ self._tablesCache = {}
+
+ # Initialize system table
+ self._systemTableName = "_system"
+ self._initializeSystemTable()
+
+ # Temporarily store mandateId and userId
+ self._mandateId = mandateId
+ self._userId = userId
+
+ # If mandateId or userId are 0, try to use the initial IDs
+ if mandateId == 0:
+ initialMandateId = self.getInitialId("mandates")
+ if initialMandateId is not None:
+ self._mandateId = initialMandateId
+ logger.info(f"Using initial mandateId: {initialMandateId} instead of 0")
+
+ if userId == 0:
+ initialUserId = self.getInitialId("users")
+ if initialUserId is not None:
+ self._userId = initialUserId
+ logger.info(f"Using initial userId: {initialUserId} instead of 0")
+
+ # Set the effective IDs as properties
+ self.mandateId = self._mandateId
+ self.userId = self._userId
+
+ logger.info(f"DatabaseConnector initialized for directory: {self.dbFolder}")
+ logger.debug(f"Context: mandateId={self.mandateId}, userId={self.userId}")
+
+ def _initializeSystemTable(self):
+ """Initializes the system table if it doesn't exist yet."""
+ systemTablePath = self._getTablePath(self._systemTableName)
+ if not os.path.exists(systemTablePath):
+ emptySystemTable = {}
+ self._saveSystemTable(emptySystemTable)
+ logger.info(f"System table initialized in {systemTablePath}")
+ else:
+ # Load existing system table to ensure it's available
+ self._loadSystemTable()
+ logger.debug(f"Existing system table loaded from {systemTablePath}")
+
+ def _loadSystemTable(self) -> Dict[str, int]:
+ """Loads the system table with the initial IDs."""
+ systemTablePath = self._getTablePath(self._systemTableName)
+ try:
+ if os.path.exists(systemTablePath):
+ with open(systemTablePath, 'r', encoding='utf-8') as f:
+ return json.load(f)
+ else:
+ return {}
+ except Exception as e:
+ logger.error(f"Error loading the system table: {e}")
+ return {}
+
+ def _saveSystemTable(self, data: Dict[str, int]) -> bool:
+ """Saves the system table with the initial IDs."""
+ systemTablePath = self._getTablePath(self._systemTableName)
+ try:
+ with open(systemTablePath, 'w', encoding='utf-8') as f:
+ json.dump(data, f, indent=2, ensure_ascii=False)
+ return True
+ except Exception as e:
+ logger.error(f"Error saving the system table: {e}")
+ return False
+
+ def _getTablePath(self, table: str) -> str:
+ """Returns the full path to a table file"""
+ return os.path.join(self.dbFolder, f"{table}.json")
+
+ def _loadTable(self, table: str) -> List[Dict[str, Any]]:
+ """Loads a table from the corresponding JSON file"""
+ path = self._getTablePath(table)
+
+ # If the table is the system table, load it directly
+ if table == self._systemTableName:
+ return [] # The system table is not treated like normal tables
+
+ # If the table is already in the cache, use the cache
+ if table in self._tablesCache:
+ return self._tablesCache[table]
+
+ # Otherwise load the file
+ try:
+ if os.path.exists(path):
+ with open(path, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+ self._tablesCache[table] = data
+
+ # If data was loaded and no initial ID is registered yet,
+ # register the ID of the first record (if available)
+ if data and not self.hasInitialId(table):
+ if "id" in data[0]:
+ self._registerInitialId(table, data[0]["id"])
+ logger.info(f"Initial ID {data[0]['id']} for table {table} retroactively registered")
+
+ return data
+ else:
+ # If the file doesn't exist, create an empty table
+ logger.info(f"New table {table}")
+ self._tablesCache[table] = []
+ self._saveTable(table, [])
+ return []
+ except Exception as e:
+ logger.error(f"Error loading table {table}: {e}")
+ return []
+
+ def _saveTable(self, table: str, data: List[Dict[str, Any]]) -> bool:
+ """Saves a table to the corresponding JSON file"""
+ # The system table is handled specially
+ if table == self._systemTableName:
+ return False
+
+ path = self._getTablePath(table)
+ try:
+ with open(path, 'w', encoding='utf-8') as f:
+ json.dump(data, f, indent=2, ensure_ascii=False)
+
+ # Update the cache
+ self._tablesCache[table] = data
+ return True
+ except Exception as e:
+ logger.error(f"Error saving table {table}: {e}")
+ return False
+
+ def _filterByContext(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """
+ Filters records by tenant and user context,
+ if these fields exist in the record.
+ """
+ filteredRecords = []
+
+ for record in records:
+ # Check if mandateId exists in the record and is not null
+ hasMandate = "mandateId" in record and record["mandateId"] is not None and record["mandateId"] != ""
+
+ # Check if userId exists in the record and is not null
+ hasUser = "userId" in record and record["userId"] is not None and record["userId"] != ""
+
+ # If both exist, filter accordingly
+ if hasMandate and hasUser:
+ if record["mandateId"] == self.mandateId:
+ filteredRecords.append(record)
+ # If only mandateId exists
+ elif hasMandate and not hasUser:
+ if record["mandateId"] == self.mandateId:
+ filteredRecords.append(record)
+ # If neither mandateId nor userId exist, add the record
+ elif not hasMandate and not hasUser:
+ filteredRecords.append(record)
+
+ return filteredRecords
+
+ def _applyRecordFilter(self, records: List[Dict[str, Any]], recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
+ """Applies a record filter to the records"""
+ if not recordFilter:
+ return records
+
+ filteredRecords = []
+
+ for record in records:
+ match = True
+
+ for field, value in recordFilter.items():
+ # Check if the field exists
+ if field not in record:
+ match = False
+ break
+
+ # If the filter value is an integer string and the record field is an integer
+ if isinstance(value, str) and value.isdigit() and isinstance(record[field], int):
+ if record[field] != int(value):
+ match = False
+ break
+ # Otherwise direct comparison
+ elif record[field] != value:
+ match = False
+ break
+
+ if match:
+ filteredRecords.append(record)
+
+ return filteredRecords
+
+ def _registerInitialId(self, table: str, initialId: int) -> bool:
+ """
+ Registers the initial ID for a table.
+
+ Args:
+ table: Name of the table
+ initialId: The initial ID
+
+ Returns:
+ True on success, False on error
+ """
+ try:
+ # Load the current system table
+ systemData = self._loadSystemTable()
+
+ # Only register if not already present
+ if table not in systemData:
+ systemData[table] = initialId
+ success = self._saveSystemTable(systemData)
+ if success:
+ logger.info(f"Initial ID {initialId} for table {table} registered")
+ return success
+ return True # If already present, this is not an error
+ except Exception as e:
+ logger.error(f"Error registering the initial ID for table {table}: {e}")
+ return False
+
+ def _removeInitialId(self, table: str) -> bool:
+ """
+ Removes the initial ID for a table from the system table.
+
+ Args:
+ table: Name of the table
+
+ Returns:
+ True on success, False on error
+ """
+ try:
+ # Load the current system table
+ systemData = self._loadSystemTable()
+
+ # Remove the entry if it exists
+ if table in systemData:
+ del systemData[table]
+ success = self._saveSystemTable(systemData)
+ if success:
+ logger.info(f"Initial ID for table {table} removed from system table")
+ return success
+ return True # If not present, this is not an error
+ except Exception as e:
+ logger.error(f"Error removing initial ID for table {table}: {e}")
+ return False
+
+ # Public API
+
+ def getTables(self) -> List[str]:
+ """
+ Returns a list of all available tables.
+
+ Returns:
+ List of table names
+ """
+ tables = []
+
+ try:
+ for filename in os.listdir(self.dbFolder):
+ if filename.endswith('.json') and not filename.startswith('_'):
+ tableName = filename[:-5] # Remove the .json extension
+ tables.append(tableName)
+ except Exception as e:
+ logger.error(f"Error reading the database directory: {e}")
+
+ return tables
+
+ def getFields(self, table: str) -> List[str]:
+ """
+ Returns a list of all fields in a table.
+
+ Args:
+ table: Name of the table
+
+ Returns:
+ List of field names
+ """
+ # Load the table data
+ data = self._loadTable(table)
+
+ if not data:
+ return []
+
+ # Take the first record as a reference for the fields
+ fields = list(data[0].keys()) if data else []
+
+ return fields
+
+ def getSchema(self, table: str, language: str = None) -> Dict[str, Dict[str, Any]]:
+ """
+ Returns a schema object for a table with data types and labels.
+
+ Args:
+ table: Name of the table
+ language: Language for the labels (optional)
+
+ Returns:
+ Schema object with fields, data types and labels
+ """
+ # Load the table data
+ data = self._loadTable(table)
+
+ schema = {}
+
+ if not data:
+ return schema
+
+ # Take the first record as a reference for the fields and data types
+ firstRecord = data[0]
+
+ for field, value in firstRecord.items():
+ # Determine the data type
+ dataType = type(value).__name__
+
+ # Create label (default is the field name)
+ label = field
+
+ schema[field] = {
+ "type": dataType,
+ "label": label
+ }
+
+ return schema
+
+ def getRecordset(self, table: str, fieldFilter: List[str] = None, recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
+ """
+ Returns a list of records from a table, filtered by criteria.
+
+ Args:
+ table: Name of the table
+ fieldFilter: Filter for fields (which fields should be returned)
+ recordFilter: Filter for records (which records should be returned)
+
+ Returns:
+ List of filtered records
+ """
+ # Load the table data
+ data = self._loadTable(table)
+
+ # Filter by tenant and user context
+ filteredData = self._filterByContext(data)
+
+ # Apply recordFilter if available
+ if recordFilter:
+ filteredData = self._applyRecordFilter(filteredData, recordFilter)
+
+ # If fieldFilter is available, reduce the fields
+ if fieldFilter and isinstance(fieldFilter, list):
+ result = []
+ for record in filteredData:
+ filteredRecord = {}
+ for field in fieldFilter:
+ if field in record:
+ filteredRecord[field] = record[field]
+ result.append(filteredRecord)
+ return result
+
+ return filteredData
+
+ def recordCreate(self, table: str, recordData: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Creates a new record in the table.
+
+ Args:
+ table: Name of the table
+ recordData: Data for the new record
+
+ Returns:
+ The created record
+ """
+ # Load the table data
+ data = self._loadTable(table)
+
+ # Add mandateId and userId if not present or 0
+ if "mandateId" not in recordData or recordData["mandateId"] == 0:
+ recordData["mandateId"] = self.mandateId
+
+ if "userId" not in recordData or recordData["userId"] == 0:
+ recordData["userId"] = self.userId
+
+ # Determine the next ID if not present
+ if "id" not in recordData:
+ nextId = 1
+ if data:
+ nextId = max(record["id"] for record in data if "id" in record) + 1
+ recordData["id"] = nextId
+
+ # If the table is empty and a system ID should be registered
+ if not data:
+ self._registerInitialId(table, recordData["id"])
+ logger.info(f"Initial ID {recordData['id']} for table {table} has been registered")
+
+ # Add the new record
+ data.append(recordData)
+
+ # Save the updated table
+ if self._saveTable(table, data):
+ return recordData
+ else:
+ raise ValueError(f"Error creating the record in table {table}")
+
+ def recordDelete(self, table: str, recordId: Union[str, int]) -> bool:
+ """
+ Deletes a record from the table.
+
+ Args:
+ table: Name of the table
+ recordId: ID of the record to delete
+
+ Returns:
+ True on success, False on error
+ """
+ # Load table data
+ data = self._loadTable(table)
+
+ # Search for the record
+ for i, record in enumerate(data):
+ if "id" in record and record["id"] == recordId:
+ # Check if the record belongs to the current mandate
+ if "mandateId" in record and record["mandateId"] != self.mandateId:
+ raise ValueError("Not your mandate")
+
+ # Check if it's an initial record
+ initialId = self.getInitialId(table)
+ if initialId is not None and initialId == recordId:
+ # Remove this entry from the system table
+ self._removeInitialId(table)
+ logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table")
+
+ # Delete the record
+ del data[i]
+
+ # Save the updated table
+ return self._saveTable(table, data)
+
+ # Record not found
+ return False
+
+ def recordModify(self, table: str, recordId: Union[str, int], recordData: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Modifies a record in the table.
+
+ Args:
+ table: Name of the table
+ recordId: ID of the record to modify
+ recordData: New data for the record
+
+ Returns:
+ The updated record
+ """
+ # Load table data
+ data = self._loadTable(table)
+
+ # Search for the record
+ for i, record in enumerate(data):
+ if "id" in record and record["id"] == recordId:
+ # Check if the record belongs to the current mandate
+ if "mandateId" in record and record["mandateId"] != self.mandateId:
+ raise ValueError("Not your mandate")
+
+ # Prevent changing the ID
+ if "id" in recordData and recordData["id"] != recordId:
+ raise ValueError(f"The ID of a record in table {table} cannot be changed")
+
+ # Update the record
+ for key, value in recordData.items():
+ data[i][key] = value
+
+ # Save the updated table
+ if self._saveTable(table, data):
+ return data[i]
+ else:
+ raise ValueError(f"Error updating record in table {table}")
+
+ # Record not found
+ raise ValueError(f"Record with ID {recordId} not found in table {table}")
+
+ def hasInitialId(self, table: str) -> bool:
+ """
+ Checks if an initial ID is registered for a table.
+
+ Args:
+ table: Name of the table
+
+ Returns:
+ True if an initial ID is registered, otherwise False
+ """
+ systemData = self._loadSystemTable()
+ return table in systemData
+
+ def getInitialId(self, table: str) -> Optional[int]:
+ """
+ Returns the initial ID for a table.
+
+ Args:
+ table: Name of the table
+
+ Returns:
+ The initial ID or None if not present
+ """
+ systemData = self._loadSystemTable()
+ initialId = systemData.get(table)
+ print("SysTable table",table,"Value",initialId)
+ if initialId is None:
+ logger.debug(f"No initial ID found for table {table}")
+ return initialId
+
+ def getAllInitialIds(self) -> Dict[str, int]:
+ """
+ Returns all registered initial IDs.
+
+ Returns:
+ Dictionary with table names as keys and initial IDs as values
+ """
+ systemData = self._loadSystemTable()
+ return systemData.copy() # Return a copy to protect the original
\ No newline at end of file
diff --git a/connectors/connector_aichat_anthropic.py b/connectors/connectorAiAnthropic.py
similarity index 59%
rename from connectors/connector_aichat_anthropic.py
rename to connectors/connectorAiAnthropic.py
index 15728755..4b0b2412 100644
--- a/connectors/connector_aichat_anthropic.py
+++ b/connectors/connectorAiAnthropic.py
@@ -1,54 +1,52 @@
import logging
import httpx
-from typing import Dict, Any, List, Optional, Union
+from typing import Dict, Any, List, Union
from fastapi import HTTPException
from modules.configuration import APP_CONFIG
# Configure logger
logger = logging.getLogger(__name__)
-# Load configuration data
-def load_config_data():
+def loadConfigData():
+ """Load configuration data for Anthropic connector"""
return {
- "api_key": APP_CONFIG.get('Connector_AiAnthropic_API_SECRET'),
- "api_url": APP_CONFIG.get('Connector_AiAnthropic_API_URL'),
- "model_name": APP_CONFIG.get('Connector_AiAnthropic_MODEL_NAME'),
+ "apiKey": APP_CONFIG.get('Connector_AiAnthropic_API_SECRET'),
+ "apiUrl": APP_CONFIG.get('Connector_AiAnthropic_API_URL'),
+ "modelName": APP_CONFIG.get('Connector_AiAnthropic_MODEL_NAME'),
"temperature": float(APP_CONFIG.get('Connector_AiAnthropic_TEMPERATURE')),
- "max_tokens": int(APP_CONFIG.get('Connector_AiAnthropic_MAX_TOKENS'))
+ "maxTokens": int(APP_CONFIG.get('Connector_AiAnthropic_MAX_TOKENS'))
}
class ChatService:
- """
- Connector for communication with the Anthropic API.
- """
+ """Connector for communication with the Anthropic API."""
def __init__(self):
# Load configuration
- self.config = load_config_data()
- self.api_key = self.config["api_key"]
- self.api_url = self.config["api_url"]
- self.model_name = self.config["model_name"]
+ self.config = loadConfigData()
+ self.apiKey = self.config["apiKey"]
+ self.apiUrl = self.config["apiUrl"]
+ self.modelName = self.config["modelName"]
# HttpClient for API calls
- self.http_client = httpx.AsyncClient(
+ self.httpClient = httpx.AsyncClient(
timeout=120.0, # Longer timeout for complex requests
headers={
- "x-api-key": self.api_key,
+ "x-api-key": self.apiKey,
"anthropic-version": "2023-06-01", # Anthropic API Version
"Content-Type": "application/json"
}
)
- logger.info(f"Anthropic Connector initialized with model: {self.model_name}")
+ logger.info(f"Anthropic Connector initialized with model: {self.modelName}")
- async def call_api(self, messages: List[Dict[str, Any]], temperature: float = None, max_tokens: int = None) -> Dict[str, Any]:
+ async def callApi(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> Dict[str, Any]:
"""
Calls the Anthropic API with the given messages.
Args:
messages: List of messages in OpenAI format (role, content)
temperature: Temperature for response generation (0.0-1.0)
- max_tokens: Maximum number of tokens in the response
+ maxTokens: Maximum number of tokens in the response
Returns:
The response converted to OpenAI format
@@ -58,25 +56,25 @@ class ChatService:
"""
try:
# Convert OpenAI format to Anthropic format
- formatted_messages = self._convert_to_anthropic_format(messages)
+ formattedMessages = self._convertToAnthropicFormat(messages)
# Use parameters from configuration if none were overridden
if temperature is None:
temperature = self.config.get("temperature", 0.2)
- if max_tokens is None:
- max_tokens = self.config.get("max_tokens", 2000)
+ if maxTokens is None:
+ maxTokens = self.config.get("maxTokens", 2000)
# Create Anthropic API payload
payload = {
- "model": self.model_name,
- "messages": formatted_messages,
+ "model": self.modelName,
+ "messages": formattedMessages,
"temperature": temperature,
- "max_tokens": max_tokens
+ "max_tokens": maxTokens
}
- response = await self.http_client.post(
- self.api_url,
+ response = await self.httpClient.post(
+ self.apiUrl,
json=payload
)
@@ -85,16 +83,16 @@ class ChatService:
raise HTTPException(status_code=500, detail="Error communicating with Anthropic API")
# Convert response from Anthropic format to OpenAI format
- anthropic_response = response.json()
- openai_formatted_response = self._convert_to_openai_format(anthropic_response)
+ anthropicResponse = response.json()
+ openaiFormattedResponse = self._convertToOpenaiFormat(anthropicResponse)
- return openai_formatted_response
+ return openaiFormattedResponse
except Exception as e:
logger.error(f"Error calling Anthropic API: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calling Anthropic API: {str(e)}")
- def _convert_to_anthropic_format(self, openai_messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ def _convertToAnthropicFormat(self, openaiMessages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Converts messages from OpenAI format to Anthropic format.
@@ -110,16 +108,16 @@ class ChatService:
Note: Anthropic has no direct system message equivalent,
so we add system messages to the first user message.
"""
- anthropic_messages = []
- system_content = ""
+ anthropicMessages = []
+ systemContent = ""
# First extract all system messages
- for msg in openai_messages:
+ for msg in openaiMessages:
if msg.get("role") == "system":
- system_content += msg.get("content", "") + "\n\n"
+ systemContent += msg.get("content", "") + "\n\n"
# Convert the remaining messages
- for i, msg in enumerate(openai_messages):
+ for msg in openaiMessages:
role = msg.get("role")
content = msg.get("content", "")
@@ -128,49 +126,49 @@ class ChatService:
continue
# For the first user message: prepend system content if available
- if role == "user" and system_content and not any(m.get("role") == "user" for m in anthropic_messages):
+ if role == "user" and systemContent and not any(m.get("role") == "user" for m in anthropicMessages):
if isinstance(content, str):
- content = system_content + content
+ content = systemContent + content
elif isinstance(content, list):
# If content is an array (for multimodal messages)
- text_parts = []
+ textParts = []
for part in content:
if part.get("type") == "text":
- text_parts.append(part)
+ textParts.append(part)
- if text_parts:
- text_parts[0]["text"] = system_content + text_parts[0].get("text", "")
+ if textParts:
+ textParts[0]["text"] = systemContent + textParts[0].get("text", "")
# Anthropic only supports "user" and "assistant" roles
if role not in ["user", "assistant"]:
role = "user"
- anthropic_messages.append({"role": role, "content": content})
+ anthropicMessages.append({"role": role, "content": content})
- return anthropic_messages
+ return anthropicMessages
- def _convert_to_openai_format(self, anthropic_response: Dict[str, Any]) -> Dict[str, Any]:
+ def _convertToOpenaiFormat(self, anthropicResponse: Dict[str, Any]) -> Dict[str, Any]:
"""
Converts a response from Anthropic format to OpenAI format.
"""
# Extract content from Anthropic response
content = ""
- if "content" in anthropic_response:
- if isinstance(anthropic_response["content"], list):
+ if "content" in anthropicResponse:
+ if isinstance(anthropicResponse["content"], list):
# Content is a list of parts (in newer API versions)
- for part in anthropic_response["content"]:
+ for part in anthropicResponse["content"]:
if part.get("type") == "text":
content += part.get("text", "")
else:
# Direct content as string (in older API versions)
- content = anthropic_response["content"]
+ content = anthropicResponse["content"]
# Create OpenAI-formatted response
return {
- "id": anthropic_response.get("id", ""),
+ "id": anthropicResponse.get("id", ""),
"object": "chat.completion",
- "created": anthropic_response.get("created", 0),
- "model": anthropic_response.get("model", self.model_name),
+ "created": anthropicResponse.get("created", 0),
+ "model": anthropicResponse.get("model", self.modelName),
"choices": [
{
"message": {
@@ -183,33 +181,33 @@ class ChatService:
]
}
- async def analyze_image(self, image_data: Union[str, bytes], mime_type: str = None, prompt: str = "Describe this image") -> str:
+ async def analyzeImage(self, imageData: Union[str, bytes], mimeType: str = None, prompt: str = "Describe this image") -> str:
"""
- Analyzes an image with the OpenAI Vision API.
+ Analyzes an image using Anthropic's vision capabilities.
Args:
- image_data: Either a file path (str) or image data (bytes)
- mime_type: The MIME type of the image (optional, only for binary data)
+ imageData: Either a file path (str) or image data (bytes)
+ mimeType: The MIME type of the image (optional, only for binary data)
prompt: The prompt for analysis
Returns:
- The response from the OpenAI Vision API as text
+ The analysis response as text
"""
try:
# Distinguish between file path and binary data
- if isinstance(image_data, str):
+ if isinstance(imageData, str):
# It's a file path - import filehandling only when needed
- from modules import agentservice_filemanager as file_handler
- base64_data, auto_mime_type = file_handler.encode_file_to_base64(image_data)
- mime_type = mime_type or auto_mime_type
+ from modules import agentserviceFilemanager as fileHandler
+ base64Data, autoMimeType = fileHandler.encodeFileToBase64(imageData)
+ mimeType = mimeType or autoMimeType
else:
# It's binary data
import base64
- base64_data = base64.b64encode(image_data).decode('utf-8')
+ base64Data = base64.b64encode(imageData).decode('utf-8')
# MIME type must be specified for binary data
- if not mime_type:
+ if not mimeType:
# Fallback to generic image type
- mime_type = "image/png"
+ mimeType = "image/png"
# Prepare the payload for the Vision API
messages = [
@@ -220,15 +218,15 @@ class ChatService:
{
"type": "image_url",
"image_url": {
- "url": f"data:{mime_type};base64,{base64_data}"
+ "url": f"data:{mimeType};base64,{base64Data}"
}
}
]
}
]
- # Use the existing call_api function with the Vision model
- response = await self.call_api(messages)
+ # Use the existing callApi function with the Vision model
+ response = await self.callApi(messages)
# Extract and return content
return response["choices"][0]["message"]["content"]
diff --git a/connectors/connector_aichat_openai.py b/connectors/connectorAiOpenai.py
similarity index 59%
rename from connectors/connector_aichat_openai.py
rename to connectors/connectorAiOpenai.py
index 7de51e67..e394030d 100644
--- a/connectors/connector_aichat_openai.py
+++ b/connectors/connectorAiOpenai.py
@@ -1,52 +1,50 @@
import logging
import httpx
-from typing import Dict, Any, List, Optional, Union
+from typing import Dict, Any, List, Union
from fastapi import HTTPException
from modules.configuration import APP_CONFIG
# Configure logger
logger = logging.getLogger(__name__)
-# Load configuration data
-def load_config_data():
+def loadConfigData():
+ """Load configuration data for OpenAI connector"""
return {
- "api_key": APP_CONFIG.get('Connector_AiOpenai_API_SECRET'),
- "api_url": APP_CONFIG.get('Connector_AiOpenai_API_URL'),
- "model_name": APP_CONFIG.get('Connector_AiOpenai_MODEL_NAME'),
+ "apiKey": APP_CONFIG.get('Connector_AiOpenai_API_SECRET'),
+ "apiUrl": APP_CONFIG.get('Connector_AiOpenai_API_URL'),
+ "modelName": APP_CONFIG.get('Connector_AiOpenai_MODEL_NAME'),
"temperature": float(APP_CONFIG.get('Connector_AiOpenai_TEMPERATURE')),
- "max_tokens": int(APP_CONFIG.get('Connector_AiOpenai_MAX_TOKENS'))
+ "maxTokens": int(APP_CONFIG.get('Connector_AiOpenai_MAX_TOKENS'))
}
class ChatService:
- """
- Connector for communication with the OpenAI API.
- """
+ """Connector for communication with the OpenAI API."""
def __init__(self):
# Load configuration
- self.config = load_config_data()
- self.api_key = self.config["api_key"]
- self.api_url = self.config["api_url"]
- self.model_name = self.config["model_name"]
+ self.config = loadConfigData()
+ self.apiKey = self.config["apiKey"]
+ self.apiUrl = self.config["apiUrl"]
+ self.modelName = self.config["modelName"]
# HttpClient for API calls
- self.http_client = httpx.AsyncClient(
+ self.httpClient = httpx.AsyncClient(
timeout=120.0, # Longer timeout for complex requests
headers={
- "Authorization": f"Bearer {self.api_key}",
+ "Authorization": f"Bearer {self.apiKey}",
"Content-Type": "application/json"
}
)
- logger.info(f"OpenAI Connector initialized with model: {self.model_name}")
+ logger.info(f"OpenAI Connector initialized with model: {self.modelName}")
- async def call_api(self, messages: List[Dict[str, Any]], temperature: float = None, max_tokens: int = None) -> str:
+ async def callApi(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str:
"""
Calls the OpenAI API with the given messages.
Args:
messages: List of messages in OpenAI format (role, content)
temperature: Temperature for response generation (0.0-1.0)
- max_tokens: Maximum number of tokens in the response
+ maxTokens: Maximum number of tokens in the response
Returns:
The response from the OpenAI API
@@ -59,18 +57,18 @@ class ChatService:
if temperature is None:
temperature = self.config.get("temperature", 0.2)
- if max_tokens is None:
- max_tokens = self.config.get("max_tokens", 2000)
+ if maxTokens is None:
+ maxTokens = self.config.get("maxTokens", 2000)
payload = {
- "model": self.model_name,
+ "model": self.modelName,
"messages": messages,
"temperature": temperature,
- "max_tokens": max_tokens
+ "max_tokens": maxTokens
}
- response = await self.http_client.post(
- self.api_url,
+ response = await self.httpClient.post(
+ self.apiUrl,
json=payload
)
@@ -78,8 +76,8 @@ class ChatService:
logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
raise HTTPException(status_code=500, detail="Error communicating with OpenAI API")
- response_json = response.json()
- content = response_json["choices"][0]["message"]["content"]
+ responseJson = response.json()
+ content = responseJson["choices"][0]["message"]["content"]
return content
except Exception as e:
@@ -88,15 +86,15 @@ class ChatService:
async def close(self):
"""Closes the HTTP client when the application exits"""
- await self.http_client.aclose()
+ await self.httpClient.aclose()
- async def analyze_image(self, image_data: Union[str, bytes], mime_type: str = None, prompt: str = "Describe this image") -> str:
+ async def analyzeImage(self, imageData: Union[str, bytes], mimeType: str = None, prompt: str = "Describe this image") -> str:
"""
Analyzes an image with the OpenAI Vision API.
Args:
- image_data: Either a file path (str) or image data (bytes)
- mime_type: The MIME type of the image (optional, only for binary data)
+ imageData: Either a file path (str) or image data (bytes)
+ mimeType: The MIME type of the image (optional, only for binary data)
prompt: The prompt for analysis
Returns:
@@ -105,19 +103,19 @@ class ChatService:
try:
logger.debug("Starting image analysis...")
# Distinguish between file path and binary data
- if isinstance(image_data, str):
+ if isinstance(imageData, str):
# It's a file path - import filehandling only when needed
- from modules import agentservice_filemanager as file_handler
- base64_data, auto_mime_type = file_handler.encode_file_to_base64(image_data)
- mime_type = mime_type or auto_mime_type
+ from modules import agentserviceFilemanager as fileHandler
+ base64Data, autoMimeType = fileHandler.encodeFileToBase64(imageData)
+ mimeType = mimeType or autoMimeType
else:
# It's binary data
import base64
- base64_data = base64.b64encode(image_data).decode('utf-8')
+ base64Data = base64.b64encode(imageData).decode('utf-8')
# MIME type must be specified for binary data
- if not mime_type:
+ if not mimeType:
# Fallback to generic image type
- mime_type = "image/png"
+ mimeType = "image/png"
# Prepare the payload for the Vision API
messages = [
@@ -128,17 +126,17 @@ class ChatService:
{
"type": "image_url",
"image_url": {
- "url": f"data:{mime_type};base64,{base64_data}"
+ "url": f"data:{mimeType};base64,{base64Data}"
}
}
]
}
]
- # Use the existing call_api function with the Vision model
- response = await self.call_api(messages)
+ # Use the existing callApi function with the Vision model
+ response = await self.callApi(messages)
- # Extract and return content
+ # Return content
return response
except Exception as e:
diff --git a/connectors/connectorDbJson.py b/connectors/connectorDbJson.py
new file mode 100644
index 00000000..1a7a96cd
--- /dev/null
+++ b/connectors/connectorDbJson.py
@@ -0,0 +1,561 @@
+import json
+import os
+from typing import List, Dict, Any, Optional, Union
+import logging
+
+logger = logging.getLogger(__name__)
+
+class DatabaseConnector:
+ """
+ A connector for JSON-based data storage.
+ Provides generic database operations with tenant and user context support.
+ """
+ def __init__(self, dbHost: str, dbDatabase: str, dbUser: str = None, dbPassword: str = None,
+ mandateId: int = None, userId: int = None, skipInitialIdLookup: bool = False):
+ """
+ Initializes the JSON database connector.
+
+ Args:
+ dbHost: Directory for the JSON files
+ dbDatabase: Database name
+ dbUser: Username for authentication (optional)
+ dbPassword: API key for authentication (optional)
+ mandateId: Context parameter for the tenant
+ userId: Context parameter for the user
+ skipInitialIdLookup: When True, skips looking up initial IDs for mandateId and userId
+ """
+ # Store the input parameters
+ self.dbHost = dbHost
+ self.dbDatabase = dbDatabase
+ self.dbUser = dbUser
+ self.dbPassword = dbPassword
+ self.skipInitialIdLookup = skipInitialIdLookup
+
+ # Check if context parameters are set
+ if mandateId is None or userId is None:
+ raise ValueError("mandateId and userId must be set")
+
+ # Ensure the database directory exists
+ self.dbFolder = os.path.join(self.dbHost, self.dbDatabase)
+ os.makedirs(self.dbFolder, exist_ok=True)
+
+ # Cache for loaded data
+ self._tablesCache = {}
+
+ # Initialize system table
+ self._systemTableName = "_system"
+ self._initializeSystemTable()
+
+ # Temporarily store mandateId and userId
+ self._mandateId = mandateId
+ self._userId = userId
+
+ # If mandateId or userId are 0 and we're not skipping ID lookup, try to use the initial IDs
+ if not skipInitialIdLookup:
+ if mandateId == 0:
+ initialMandateId = self.getInitialId("mandates")
+ if initialMandateId is not None:
+ self._mandateId = initialMandateId
+ logger.info(f"Using initial mandateId: {initialMandateId} instead of 0")
+
+ if userId == 0:
+ initialUserId = self.getInitialId("users")
+ if initialUserId is not None:
+ self._userId = initialUserId
+ logger.info(f"Using initial userId: {initialUserId} instead of 0")
+
+ # Set the effective IDs as properties
+ self.mandateId = self._mandateId
+ self.userId = self._userId
+
+ logger.info(f"DatabaseConnector initialized for directory: {self.dbFolder}")
+ logger.debug(f"Context: mandateId={self.mandateId}, userId={self.userId}")
+
+ def _initializeSystemTable(self):
+ """Initializes the system table if it doesn't exist yet."""
+ systemTablePath = self._getTablePath(self._systemTableName)
+ if not os.path.exists(systemTablePath):
+ emptySystemTable = {}
+ self._saveSystemTable(emptySystemTable)
+ logger.info(f"System table initialized in {systemTablePath}")
+ else:
+ # Load existing system table to ensure it's available
+ self._loadSystemTable()
+ logger.debug(f"Existing system table loaded from {systemTablePath}")
+
+ def _loadSystemTable(self) -> Dict[str, int]:
+ """Loads the system table with the initial IDs."""
+ # Check if system table is in cache
+ if f"_{self._systemTableName}" in self._tablesCache:
+ return self._tablesCache[f"_{self._systemTableName}"]
+
+ systemTablePath = self._getTablePath(self._systemTableName)
+ try:
+ if os.path.exists(systemTablePath):
+ with open(systemTablePath, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+ # Store in cache with special prefix to avoid collision with regular tables
+ self._tablesCache[f"_{self._systemTableName}"] = data
+ return data
+ else:
+ self._tablesCache[f"_{self._systemTableName}"] = {}
+ return {}
+ except Exception as e:
+ logger.error(f"Error loading the system table: {e}")
+ self._tablesCache[f"_{self._systemTableName}"] = {}
+ return {}
+
+ def _saveSystemTable(self, data: Dict[str, int]) -> bool:
+ """Saves the system table with the initial IDs."""
+ systemTablePath = self._getTablePath(self._systemTableName)
+ try:
+ with open(systemTablePath, 'w', encoding='utf-8') as f:
+ json.dump(data, f, indent=2, ensure_ascii=False)
+ # Update cache
+ self._tablesCache[f"_{self._systemTableName}"] = data
+ return True
+ except Exception as e:
+ logger.error(f"Error saving the system table: {e}")
+ return False
+
+ def _getTablePath(self, table: str) -> str:
+ """Returns the full path to a table file"""
+ return os.path.join(self.dbFolder, f"{table}.json")
+
+ def _loadTable(self, table: str) -> List[Dict[str, Any]]:
+ """Loads a table from the corresponding JSON file"""
+ path = self._getTablePath(table)
+
+ # If the table is the system table, load it directly
+ if table == self._systemTableName:
+ return [] # The system table is not treated like normal tables
+
+ # If the table is already in the cache, use the cache
+ if table in self._tablesCache:
+ return self._tablesCache[table]
+
+ # Otherwise load the file
+ try:
+ if os.path.exists(path):
+ with open(path, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+ self._tablesCache[table] = data
+
+ # If data was loaded and no initial ID is registered yet,
+ # register the ID of the first record (if available)
+ if data and not self.hasInitialId(table):
+ if "id" in data[0]:
+ self._registerInitialId(table, data[0]["id"])
+ logger.info(f"Initial ID {data[0]['id']} for table {table} retroactively registered")
+
+ return data
+ else:
+ # If the file doesn't exist, create an empty table
+ logger.info(f"New table {table}")
+ self._tablesCache[table] = []
+ self._saveTable(table, [])
+ return []
+ except Exception as e:
+ logger.error(f"Error loading table {table}: {e}")
+ return []
+
+ def _saveTable(self, table: str, data: List[Dict[str, Any]]) -> bool:
+ """Saves a table to the corresponding JSON file"""
+ # The system table is handled specially
+ if table == self._systemTableName:
+ return False
+
+ path = self._getTablePath(table)
+ try:
+ with open(path, 'w', encoding='utf-8') as f:
+ json.dump(data, f, indent=2, ensure_ascii=False)
+
+ # Update the cache
+ self._tablesCache[table] = data
+ return True
+ except Exception as e:
+ logger.error(f"Error saving table {table}: {e}")
+ return False
+
+ def _filterByContext(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """
+ Filters records by tenant and user context,
+ if these fields exist in the record.
+ """
+ filteredRecords = []
+
+ for record in records:
+ # Check if mandateId exists in the record and is not null
+ hasMandate = "mandateId" in record and record["mandateId"] is not None and record["mandateId"] != ""
+
+ # Check if userId exists in the record and is not null
+ hasUser = "userId" in record and record["userId"] is not None and record["userId"] != ""
+
+ # If both exist, filter accordingly
+ if hasMandate and hasUser:
+ if record["mandateId"] == self.mandateId:
+ filteredRecords.append(record)
+ # If only mandateId exists
+ elif hasMandate and not hasUser:
+ if record["mandateId"] == self.mandateId:
+ filteredRecords.append(record)
+ # If neither mandateId nor userId exist, add the record
+ elif not hasMandate and not hasUser:
+ filteredRecords.append(record)
+
+ return filteredRecords
+
+ def _applyRecordFilter(self, records: List[Dict[str, Any]], recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
+ """Applies a record filter to the records"""
+ if not recordFilter:
+ return records
+
+ filteredRecords = []
+
+ for record in records:
+ match = True
+
+ for field, value in recordFilter.items():
+ # Check if the field exists
+ if field not in record:
+ match = False
+ break
+
+ # If the filter value is an integer string and the record field is an integer
+ if isinstance(value, str) and value.isdigit() and isinstance(record[field], int):
+ if record[field] != int(value):
+ match = False
+ break
+ # Otherwise direct comparison
+ elif record[field] != value:
+ match = False
+ break
+
+ if match:
+ filteredRecords.append(record)
+
+ return filteredRecords
+
+ def _registerInitialId(self, table: str, initialId: int) -> bool:
+ """
+ Registers the initial ID for a table.
+
+ Args:
+ table: Name of the table
+ initialId: The initial ID
+
+ Returns:
+ True on success, False on error
+ """
+ try:
+ # Load the current system table
+ systemData = self._loadSystemTable()
+
+ # Only register if not already present
+ if table not in systemData:
+ systemData[table] = initialId
+ success = self._saveSystemTable(systemData)
+ if success:
+ logger.info(f"Initial ID {initialId} for table {table} registered")
+ return success
+ return True # If already present, this is not an error
+ except Exception as e:
+ logger.error(f"Error registering the initial ID for table {table}: {e}")
+ return False
+
+ def _removeInitialId(self, table: str) -> bool:
+ """
+ Removes the initial ID for a table from the system table.
+
+ Args:
+ table: Name of the table
+
+ Returns:
+ True on success, False on error
+ """
+ try:
+ # Load the current system table
+ systemData = self._loadSystemTable()
+
+ # Remove the entry if it exists
+ if table in systemData:
+ del systemData[table]
+ success = self._saveSystemTable(systemData)
+ if success:
+ logger.info(f"Initial ID for table {table} removed from system table")
+ return success
+ return True # If not present, this is not an error
+ except Exception as e:
+ logger.error(f"Error removing initial ID for table {table}: {e}")
+ return False
+
+ # Public API
+
+ def getTables(self) -> List[str]:
+ """
+ Returns a list of all available tables.
+
+ Returns:
+ List of table names
+ """
+ tables = []
+
+ try:
+ for filename in os.listdir(self.dbFolder):
+ if filename.endswith('.json') and not filename.startswith('_'):
+ tableName = filename[:-5] # Remove the .json extension
+ tables.append(tableName)
+ except Exception as e:
+ logger.error(f"Error reading the database directory: {e}")
+
+ return tables
+
+ def getFields(self, table: str) -> List[str]:
+ """
+ Returns a list of all fields in a table.
+
+ Args:
+ table: Name of the table
+
+ Returns:
+ List of field names
+ """
+ # Load the table data
+ data = self._loadTable(table)
+
+ if not data:
+ return []
+
+ # Take the first record as a reference for the fields
+ fields = list(data[0].keys()) if data else []
+
+ return fields
+
+ def getSchema(self, table: str, language: str = None) -> Dict[str, Dict[str, Any]]:
+ """
+ Returns a schema object for a table with data types and labels.
+
+ Args:
+ table: Name of the table
+ language: Language for the labels (optional)
+
+ Returns:
+ Schema object with fields, data types and labels
+ """
+ # Load the table data
+ data = self._loadTable(table)
+
+ schema = {}
+
+ if not data:
+ return schema
+
+ # Take the first record as a reference for the fields and data types
+ firstRecord = data[0]
+
+ for field, value in firstRecord.items():
+ # Determine the data type
+ dataType = type(value).__name__
+
+ # Create label (default is the field name)
+ label = field
+
+ schema[field] = {
+ "type": dataType,
+ "label": label
+ }
+
+ return schema
+
+ def getRecordset(self, table: str, fieldFilter: List[str] = None, recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
+ """
+ Returns a list of records from a table, filtered by criteria.
+
+ Args:
+ table: Name of the table
+ fieldFilter: Filter for fields (which fields should be returned)
+ recordFilter: Filter for records (which records should be returned)
+
+ Returns:
+ List of filtered records
+ """
+ # Load the table data
+ data = self._loadTable(table)
+
+ # Filter by tenant and user context
+ filteredData = self._filterByContext(data)
+
+ # Apply recordFilter if available
+ if recordFilter:
+ filteredData = self._applyRecordFilter(filteredData, recordFilter)
+
+ # If fieldFilter is available, reduce the fields
+ if fieldFilter and isinstance(fieldFilter, list):
+ result = []
+ for record in filteredData:
+ filteredRecord = {}
+ for field in fieldFilter:
+ if field in record:
+ filteredRecord[field] = record[field]
+ result.append(filteredRecord)
+ return result
+
+ return filteredData
+
+ def recordCreate(self, table: str, recordData: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Creates a new record in the table.
+
+ Args:
+ table: Name of the table
+ recordData: Data for the new record
+
+ Returns:
+ The created record
+ """
+ # Load the table data
+ data = self._loadTable(table)
+
+ # Add mandateId and userId if not present or 0
+ if "mandateId" not in recordData or recordData["mandateId"] == 0:
+ recordData["mandateId"] = self.mandateId
+
+ if "userId" not in recordData or recordData["userId"] == 0:
+ recordData["userId"] = self.userId
+
+ # Determine the next ID if not present
+ if "id" not in recordData:
+ nextId = 1
+ if data:
+ nextId = max(record["id"] for record in data if "id" in record) + 1
+ recordData["id"] = nextId
+
+ # If the table is empty and a system ID should be registered
+ if not data:
+ self._registerInitialId(table, recordData["id"])
+ logger.info(f"Initial ID {recordData['id']} for table {table} has been registered")
+
+ # Add the new record
+ data.append(recordData)
+
+ # Save the updated table
+ if self._saveTable(table, data):
+ return recordData
+ else:
+ raise ValueError(f"Error creating the record in table {table}")
+
+ def recordDelete(self, table: str, recordId: Union[str, int]) -> bool:
+ """
+ Deletes a record from the table.
+
+ Args:
+ table: Name of the table
+ recordId: ID of the record to delete
+
+ Returns:
+ True on success, False on error
+ """
+ # Load table data
+ data = self._loadTable(table)
+
+ # Search for the record
+ for i, record in enumerate(data):
+ if "id" in record and record["id"] == recordId:
+ # Check if the record belongs to the current mandate
+ if "mandateId" in record and record["mandateId"] != self.mandateId:
+ raise ValueError("Not your mandate")
+
+ # Check if it's an initial record
+ initialId = self.getInitialId(table)
+ if initialId is not None and initialId == recordId:
+ # Remove this entry from the system table
+ self._removeInitialId(table)
+ logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table")
+
+ # Delete the record
+ del data[i]
+
+ # Save the updated table
+ return self._saveTable(table, data)
+
+ # Record not found
+ return False
+
+ def recordModify(self, table: str, recordId: Union[str, int], recordData: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Modifies a record in the table.
+
+ Args:
+ table: Name of the table
+ recordId: ID of the record to modify
+ recordData: New data for the record
+
+ Returns:
+ The updated record
+ """
+ # Load table data
+ data = self._loadTable(table)
+
+ # Search for the record
+ for i, record in enumerate(data):
+ if "id" in record and record["id"] == recordId:
+ # Check if the record belongs to the current mandate
+ if "mandateId" in record and record["mandateId"] != self.mandateId:
+ raise ValueError("Not your mandate")
+
+ # Prevent changing the ID
+ if "id" in recordData and recordData["id"] != recordId:
+ raise ValueError(f"The ID of a record in table {table} cannot be changed")
+
+ # Update the record
+ for key, value in recordData.items():
+ data[i][key] = value
+
+ # Save the updated table
+ if self._saveTable(table, data):
+ return data[i]
+ else:
+ raise ValueError(f"Error updating record in table {table}")
+
+ # Record not found
+ raise ValueError(f"Record with ID {recordId} not found in table {table}")
+
+ def hasInitialId(self, table: str) -> bool:
+ """
+ Checks if an initial ID is registered for a table.
+
+ Args:
+ table: Name of the table
+
+ Returns:
+ True if an initial ID is registered, otherwise False
+ """
+ systemData = self._loadSystemTable()
+ return table in systemData
+
+ def getInitialId(self, table: str) -> Optional[int]:
+ """
+ Returns the initial ID for a table.
+
+ Args:
+ table: Name of the table
+
+ Returns:
+ The initial ID or None if not present
+ """
+ systemData = self._loadSystemTable()
+ initialId = systemData.get(table)
+ logger.debug(f"Database '{self.dbDatabase}': Initial ID for table '{table}' is {initialId}")
+ if initialId is None:
+ logger.debug(f"No initial ID found for table {table}")
+ return initialId
+
+ def getAllInitialIds(self) -> Dict[str, int]:
+ """
+ Returns all registered initial IDs.
+
+ Returns:
+ Dictionary with table names as keys and initial IDs as values
+ """
+ systemData = self._loadSystemTable()
+ return systemData.copy() # Return a copy to protect the original
\ No newline at end of file
diff --git a/connectors/connector_db_json.py b/connectors/connector_db_json.py
deleted file mode 100644
index 919c2bfa..00000000
--- a/connectors/connector_db_json.py
+++ /dev/null
@@ -1,557 +0,0 @@
-import json
-import os
-from typing import List, Dict, Any, Optional, Union
-import logging
-from datetime import datetime
-
-
-logger = logging.getLogger(__name__)
-
-
-class DatabaseConnector:
- """
- A connector for JSON-based data storage.
- Provides generic database operations.
- """
- def __init__(self, db_host: str, db_database: str, db_user: str = None, db_password: str = None, mandate_id: int = None, user_id: int = None):
- """
- Initializes the JSON database connector.
-
- Args:
- db_host: Directory for the JSON files
- db_database = Database name
- db_user: Username for authentication (optional)
- db_password: API key for authentication (optional)
- mandate_id: Context parameter for the tenant
- user_id: Context parameter for the user
- """
- # Store the input parameters
- self.db_host = db_host
- self.db_database = db_database
- self.db_user = db_user
- self.db_password = db_password
-
- # Check if context parameters are set
- if mandate_id is None or user_id is None:
- raise ValueError("mandate_id and user_id must be set")
-
- # Ensure the database directory exists
- self.db_folder=os.path.join(self.db_host,self.db_database)
- os.makedirs(self.db_folder, exist_ok=True)
-
- # Cache for loaded data
- self._tables_cache = {}
-
- # Initialize system table
- self._system_table_name = "_system"
- self._initialize_system_table()
-
- # Temporarily store mandate_id and user_id
- self._mandate_id = mandate_id
- self._user_id = user_id
-
- # If mandate_id or user_id are 0, try to use the initial IDs
- if mandate_id == 0:
- initial_mandate_id = self.get_initial_id("mandates")
- if initial_mandate_id is not None:
- self._mandate_id = initial_mandate_id
- logger.info(f"Using initial mandate_id: {initial_mandate_id} instead of 0")
-
- if user_id == 0:
- initial_user_id = self.get_initial_id("users")
- if initial_user_id is not None:
- self._user_id = initial_user_id
- logger.info(f"Using initial user_id: {initial_user_id} instead of 0")
-
- # Set the effective IDs as properties
- self.mandate_id = self._mandate_id
- self.user_id = self._user_id
-
- logger.info(f"DatabaseConnector initialized for directory: {self.db_folder}")
- logger.debug(f"Context: mandate_id={self.mandate_id}, user_id={self.user_id}")
-
- def _initialize_system_table(self):
- """Initializes the system table if it doesn't exist yet."""
- system_table_path = self._get_table_path(self._system_table_name)
- if not os.path.exists(system_table_path):
- empty_system_table = {}
- self._save_system_table(empty_system_table)
- logger.info(f"System table initialized in {system_table_path}")
-
- def _load_system_table(self) -> Dict[str, int]:
- """Loads the system table with the initial IDs."""
- system_table_path = self._get_table_path(self._system_table_name)
- try:
- if os.path.exists(system_table_path):
- with open(system_table_path, 'r', encoding='utf-8') as f:
- return json.load(f)
- else:
- return {}
- except Exception as e:
- logger.error(f"Error loading the system table: {e}")
- return {}
-
- def _save_system_table(self, data: Dict[str, int]) -> bool:
- """Saves the system table with the initial IDs."""
- system_table_path = self._get_table_path(self._system_table_name)
- try:
- with open(system_table_path, 'w', encoding='utf-8') as f:
- json.dump(data, f, indent=2, ensure_ascii=False)
- return True
- except Exception as e:
- logger.error(f"Error saving the system table: {e}")
- return False
-
- def _get_table_path(self, table: str) -> str:
- """Returns the full path to a table file"""
- return os.path.join(self.db_folder, f"{table}.json")
-
- def _load_table(self, table: str) -> List[Dict[str, Any]]:
- """Loads a table from the corresponding JSON file"""
- path = self._get_table_path(table)
-
- # If the table is the system table, load it directly
- if table == self._system_table_name:
- return [] # The system table is not treated like normal tables
-
- # If the table is already in the cache, use the cache
- if table in self._tables_cache:
- # logger.info(f"Loading table {table} from cache")
- return self._tables_cache[table]
-
- # Otherwise load the file
- try:
- if os.path.exists(path):
- # logger.info(f"Loading table {table} from JSON {path}")
- with open(path, 'r', encoding='utf-8') as f:
- data = json.load(f)
- self._tables_cache[table] = data
-
- # If data was loaded and no initial ID is registered yet,
- # register the ID of the first record (if available)
- if data and not self.has_initial_id(table):
- if "id" in data[0]:
- self._register_initial_id(table, data[0]["id"])
- logger.info(f"Initial ID {data[0]['id']} for table {table} retroactively registered")
-
- return data
- else:
- # If the file doesn't exist, create an empty table
- logger.info(f"New table {table}")
- self._tables_cache[table] = []
- self._save_table(table, [])
- return []
- except Exception as e:
- logger.error(f"Error loading table {table}: {e}")
- return []
-
- def _save_table(self, table: str, data: List[Dict[str, Any]]) -> bool:
- """Saves a table to the corresponding JSON file"""
- # The system table is handled specially
- if table == self._system_table_name:
- return False
-
- path = self._get_table_path(table)
- try:
- with open(path, 'w', encoding='utf-8') as f:
- json.dump(data, f, indent=2, ensure_ascii=False)
-
- # Update the cache
- self._tables_cache[table] = data
- return True
- except Exception as e:
- logger.error(f"Error saving table {table}: {e}")
- return False
-
- def _filter_by_context(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
- """
- Filters records by tenant and user context,
- if these fields exist in the record.
- """
- filtered_records = []
-
- for record in records:
- # Check if mandate_id exists in the record and is not null
- has_mandate = "mandate_id" in record and record["mandate_id"] is not None and record["mandate_id"] != ""
-
- # Check if user_id exists in the record and is not null
- has_user = "user_id" in record and record["user_id"] is not None and record["user_id"] != ""
-
- # If both exist, filter accordingly
- if has_mandate and has_user:
- if record["mandate_id"] == self.mandate_id:
- filtered_records.append(record)
- # If only mandate_id exists
- elif has_mandate and not has_user:
- if record["mandate_id"] == self.mandate_id:
- filtered_records.append(record)
- # If neither mandate_id nor user_id exist, add the record
- elif not has_mandate and not has_user:
- filtered_records.append(record)
-
- return filtered_records
-
- def _apply_record_filter(self, records: List[Dict[str, Any]], record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
- """Applies a record filter to the records"""
-
- if not record_filter:
- return records
-
- filtered_records = []
-
- for record in records:
- match = True
-
- for field, value in record_filter.items():
- # Check if the field exists
- if field not in record:
- match = False
- break
-
- # If the filter value is an integer string and the record field is an integer
- if isinstance(value, str) and value.isdigit() and isinstance(record[field], int):
- if record[field] != int(value):
- match = False
- break
- # Otherwise direct comparison
- elif record[field] != value:
- match = False
- break
-
- if match:
- filtered_records.append(record)
-
- return filtered_records
-
- def _register_initial_id(self, table: str, initial_id: int) -> bool:
- """
- Registers the initial ID for a table.
-
- Args:
- table: Name of the table
- initial_id: The initial ID
-
- Returns:
- True on success, False on error
- """
- try:
- # Load the current system table
- system_data = self._load_system_table()
-
- # Only register if not already present
- if table not in system_data:
- system_data[table] = initial_id
- success = self._save_system_table(system_data)
- if success:
- logger.info(f"Initial ID {initial_id} for table {table} registered")
- return success
- return True # If already present, this is not an error
- except Exception as e:
- logger.error(f"Error registering the initial ID for table {table}: {e}")
- return False
-
- def _remove_initial_id(self, table: str) -> bool:
- """
- Removes the initial ID for a table from the system table.
-
- Args:
- table: Name of the table
-
- Returns:
- True on success, False on error
- """
- try:
- # Load the current system table
- system_data = self._load_system_table()
-
- # Remove the entry if it exists
- if table in system_data:
- del system_data[table]
- success = self._save_system_table(system_data)
- if success:
- logger.info(f"Initial ID for table {table} removed from system table")
- return success
- return True # If not present, this is not an error
- except Exception as e:
- logger.error(f"Error removing initial ID for table {table}: {e}")
- return False
-
-
- # Public API
-
- def get_tables(self, filter_criteria: Dict[str, Any] = None) -> List[str]:
- """
- Returns a list of all available tables.
-
- Args:
- filter_criteria: Optional filter criteria (not implemented)
-
- Returns:
- List of table names
- """
-
- tables = []
-
- try:
- for filename in os.listdir(self.db_folder):
- if filename.endswith('.json') and not filename.startswith('_'):
- table_name = filename[:-5] # Remove the .json extension
- tables.append(table_name)
- except Exception as e:
- logger.error(f"Error reading the database directory: {e}")
-
- return tables
-
- def get_fields(self, table: str, filter_criteria: Dict[str, Any] = None) -> List[str]:
- """
- Returns a list of all fields in a table.
-
- Args:
- table: Name of the table
- filter_criteria: Optional filter criteria (not implemented)
-
- Returns:
- List of field names
- """
- # Load the table data
- data = self._load_table(table)
-
- if not data:
- return []
-
- # Take the first record as a reference for the fields
- fields = list(data[0].keys()) if data else []
-
- return fields
-
- def get_schema(self, table: str, language: str = None, filter_criteria: Dict[str, Any] = None) -> Dict[str, Dict[str, Any]]:
- """
- Returns a schema object for a table with data types and labels.
-
- Args:
- table: Name of the table
- language: Language for the labels (optional)
- filter_criteria: Optional filter criteria (not implemented)
-
- Returns:
- Schema object with fields, data types and labels
- """
- # Load the table data
- data = self._load_table(table)
-
- schema = {}
-
- if not data:
- return schema
-
- # Take the first record as a reference for the fields and data types
- first_record = data[0]
-
- for field, value in first_record.items():
- # Determine the data type
- data_type = type(value).__name__
-
- # Create label (default is the field name)
- label = field
-
- # If model_info is available, try to get the label from the model
- # Implementation depends on the actual model
-
- schema[field] = {
- "type": data_type,
- "label": label
- }
-
- return schema
-
- def get_recordset(self, table: str, field_filter: Dict[str, Any] = None, record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
- """
- Returns a list of records from a table, filtered by criteria.
-
- Args:
- table: Name of the table
- field_filter: Filter for fields (which fields should be returned)
- record_filter: Filter for records (which records should be returned)
-
- Returns:
- List of filtered records
- """
- # Load the table data
- data = self._load_table(table)
-
- # Filter by tenant and user context
- filtered_data = self._filter_by_context(data)
-
- # Apply record_filter if available
- if record_filter:
- filtered_data = self._apply_record_filter(filtered_data, record_filter)
-
- # If field_filter is available, reduce the fields
- if field_filter and isinstance(field_filter, list):
- result = []
- for record in filtered_data:
- filtered_record = {}
- for field in field_filter:
- if field in record:
- filtered_record[field] = record[field]
- result.append(filtered_record)
- return result
-
- return filtered_data
-
- def record_create(self, table: str, record_data: Dict[str, Any]) -> Dict[str, Any]:
- """
- Creates a new record in the table.
-
- Args:
- table: Name of the table
- record_data: Data for the new record
-
- Returns:
- The created record
- """
- # Load the table data
- data = self._load_table(table)
-
- # Add mandate_id and user_id if not present or 0
- if "mandate_id" not in record_data or record_data["mandate_id"] == 0:
- record_data["mandate_id"] = self.mandate_id
-
- if "user_id" not in record_data or record_data["user_id"] == 0:
- record_data["user_id"] = self.user_id
-
- # Determine the next ID if not present
- if "id" not in record_data:
- next_id = 1
- if data:
- next_id = max(record["id"] for record in data if "id" in record) + 1
- record_data["id"] = next_id
-
- # If the table is empty and a system ID should be registered
- if not data:
- self._register_initial_id(table, record_data["id"])
- logger.info(f"Initial ID {record_data['id']} for table {table} has been registered")
-
- # Add the new record
- data.append(record_data)
-
- # Save the updated table
- if self._save_table(table, data):
- return record_data
- else:
- raise ValueError(f"Error creating the record in table {table}")
-
- def record_delete(self, table: str, record_id: Union[str, int]) -> bool:
- """
- Deletes a record from the table.
-
- Args:
- table: Name of the table
- record_id: ID of the record to delete
-
- Returns:
- True on success, False on error
- """
- # Load table data
- data = self._load_table(table)
-
- # Search for the record
- for i, record in enumerate(data):
- if "id" in record and record["id"] == record_id:
- # Check if the record belongs to the current mandate
- if "mandate_id" in record and record["mandate_id"] != self.mandate_id:
- raise ValueError("Not your mandate")
-
- # Check if it's an initial record
- initial_id = self.get_initial_id(table)
- if initial_id is not None and initial_id == record_id:
- # Remove this entry from the system table
- self._remove_initial_id(table)
- logger.info(f"Initial ID {record_id} for table {table} has been removed from the system table")
-
- # Delete the record
- del data[i]
-
- # Save the updated table
- return self._save_table(table, data)
-
- # Record not found
- return False
-
- def record_modify(self, table: str, record_id: Union[str, int], record_data: Dict[str, Any]) -> Dict[str, Any]:
- """
- Modifies a record in the table.
-
- Args:
- table: Name of the table
- record_id: ID of the record to modify
- record_data: New data for the record
-
- Returns:
- The updated record
- """
- # Load table data
- data = self._load_table(table)
-
- # Search for the record
- for i, record in enumerate(data):
- if "id" in record and record["id"] == record_id:
- # Check if the record belongs to the current mandate
- if "mandate_id" in record and record["mandate_id"] != self.mandate_id:
- raise ValueError("Not your mandate")
-
- # Prevent changing the ID
- if "id" in record_data and record_data["id"] != record_id:
- raise ValueError(f"The ID of a record in table {table} cannot be changed")
-
- # Update the record
- for key, value in record_data.items():
- data[i][key] = value
-
- # Save the updated table
- if self._save_table(table, data):
- return data[i]
- else:
- raise ValueError(f"Error updating record in table {table}")
-
- # Record not found
- raise ValueError(f"Record with ID {record_id} not found in table {table}")
-
- def has_initial_id(self, table: str) -> bool:
- """
- Checks if an initial ID is registered for a table.
-
- Args:
- table: Name of the table
-
- Returns:
- True if an initial ID is registered, otherwise False
- """
- system_data = self._load_system_table()
- return table in system_data
-
- def get_initial_id(self, table: str) -> Optional[int]:
- """
- Returns the initial ID for a table.
-
- Args:
- table: Name of the table
-
- Returns:
- The initial ID or None if not present
- """
- system_data = self._load_system_table()
- initial_id = system_data.get(table)
- if initial_id is None:
- logger.debug(f"No initial ID found for table {table}")
- return initial_id
-
- def get_all_initial_ids(self) -> Dict[str, int]:
- """
- Returns all registered initial IDs.
-
- Returns:
- Dictionary with table names as keys and initial IDs as values
- """
- system_data = self._load_system_table()
- return system_data.copy() # Return a copy to protect the original
\ No newline at end of file
diff --git a/env_dev.env b/env_dev.env
index ceb7d14f..98f8d545 100644
--- a/env_dev.env
+++ b/env_dev.env
@@ -22,4 +22,4 @@ APP_JWT_SECRET_SECRET=dev_jwt_secret_token
APP_TOKEN_EXPIRY=300
# CORS Configuration
-APP_ALLOWED_ORIGINS=["http://localhost:8080","http://localhost:3000"]
+APP_ALLOWED_ORIGINS="http://localhost:8080","http://localhost:3000"
diff --git a/env_prod.env b/env_prod.env
index e9b7e129..e9c5efd6 100644
--- a/env_prod.env
+++ b/env_prod.env
@@ -22,4 +22,4 @@ APP_JWT_SECRET_SECRET=dev_jwt_secret_token
APP_TOKEN_EXPIRY=300
# CORS Configuration
-APP_ALLOWED_ORIGINS=["http://localhost:8080","http://localhost:3000"]
+APP_ALLOWED_ORIGINS="http://localhost:8080","http://localhost:3000"
diff --git a/modules/_SAVE_gatewayInterface copy.py b/modules/_SAVE_gatewayInterface copy.py
new file mode 100644
index 00000000..ae497abc
--- /dev/null
+++ b/modules/_SAVE_gatewayInterface copy.py
@@ -0,0 +1,261 @@
+"""
+Interface to the Gateway system.
+Manages users and mandates for authentication.
+"""
+
+import os
+import logging
+from typing import Dict, Any, List, Optional, Union
+import importlib
+from passlib.context import CryptContext
+
+from connectors.connectorDbJson import DatabaseConnector
+from modules.configuration import APP_CONFIG
+
+logger = logging.getLogger(__name__)
+
+# Password-Hashing
+pwdContext = CryptContext(schemes=["argon2"], deprecated="auto")
+
+
+class GatewayInterface:
+ """
+ Interface to the Gateway system.
+ Manages users and mandates.
+ """
+
+ def __init__(self, mandateId: int = None, userId: int = None):
+ """
+ Initializes the Gateway Interface with optional mandate and user context.
+
+ Args:
+ mandateId: ID of the current mandate (optional)
+ userId: ID of the current user (optional)
+ """
+ # Context can be empty during initialization
+ self.mandateId = mandateId
+ self.userId = userId
+
+ # Import data model module
+ try:
+ self.modelModule = importlib.import_module("modules.gatewayModel")
+ logger.info("gatewayModel successfully imported")
+ except ImportError as e:
+ logger.error(f"Error importing gatewayModel: {e}")
+ raise
+
+ # Initialize database
+ self._initializeDatabase()
+
+ def _initializeDatabase(self):
+ """
+ Initializes the database with minimal objects
+ """
+
+ self.db = DatabaseConnector(
+ dbHost=APP_CONFIG.get("DB_SYSTEM_HOST"),
+ dbDatabase=APP_CONFIG.get("DB_SYSTEM_DATABASE"),
+ dbUser=APP_CONFIG.get("DB_SYSTEM_USER"),
+ dbPassword=APP_CONFIG.get("DB_SYSTEM_PASSWORD_SECRET"),
+ mandateId=self.mandateId if self.mandateId else 0,
+ userId=self.userId if self.userId else 0
+ )
+
+ # Create Root mandate if needed
+ existingMandateId = self.getInitialId("mandates")
+ mandates = self.db.getRecordset("mandates")
+ if existingMandateId is None or not mandates:
+ logger.info("Creating Root mandate")
+ rootMandate = {
+ "name": "Root",
+ "language": "de"
+ }
+ createdMandate = self.db.recordCreate("mandates", rootMandate)
+ logger.info(f"Root mandate created with ID {createdMandate['id']}")
+
+ # Update mandate context
+ self.mandateId = createdMandate['id']
+ self.userId = createdMandate['userId']
+
+ # Recreate connector with correct context
+ self.db = DatabaseConnector(
+ dbHost=APP_CONFIG.get("DB_SYSTEM_HOST"),
+ dbDatabase=APP_CONFIG.get("DB_SYSTEM_DATABASE"),
+ dbUser=APP_CONFIG.get("DB_SYSTEM_USER"),
+ dbPassword=APP_CONFIG.get("DB_SYSTEM_PASSWORD_SECRET"),
+ mandateId=self.mandateId,
+ userId=self.userId
+ )
+
+ # Create Admin user if needed
+ existingUserId = self.getInitialId("users")
+ users = self.db.getRecordset("users")
+ if existingUserId is None or not users:
+ logger.info("Creating Admin user")
+ adminUser = {
+ "mandateId": self.mandateId,
+ "username": "admin",
+ "email": "admin@example.com",
+ "fullName": "Administrator",
+ "disabled": False,
+ "language": "de",
+ "privilege": "sysadmin", # SysAdmin privilege
+ "hashedPassword": self._getPasswordHash("admin") # Use a secure password in production!
+ }
+ createdUser = self.db.recordCreate("users", adminUser)
+ logger.info(f"Admin user created with ID {createdUser['id']}")
+
+ # Update user context
+ self.userId = createdUser['id']
+
+ # Recreate connector with correct context
+ self.db = DatabaseConnector(
+ dbHost=APP_CONFIG.get("DB_SYSTEM_HOST"),
+ dbDatabase=APP_CONFIG.get("DB_SYSTEM_DATABASE"),
+ dbUser=APP_CONFIG.get("DB_SYSTEM_USER"),
+ dbPassword=APP_CONFIG.get("DB_SYSTEM_PASSWORD_SECRET"),
+ mandateId=self.mandateId,
+ userId=self.userId
+ )
+
+ def getInitialId(self, table: str) -> Optional[int]:
+ """Returns the initial ID for a table"""
+ return self.db.getInitialId(table)
+
+ def _getPasswordHash(self, password: str) -> str:
+ """Creates a hash for a password"""
+ return pwdContext.hash(password)
+
+ def _verifyPassword(self, plainPassword: str, hashedPassword: str) -> bool:
+ """Checks if the password matches the hash"""
+ return pwdContext.verify(plainPassword, hashedPassword)
+
+ def _getCurrentTimestamp(self) -> str:
+ """Returns the current timestamp in ISO format"""
+ from datetime import datetime
+ return datetime.now().isoformat()
+
+ # Mandate methods
+
+ def getAllMandates(self) -> List[Dict[str, Any]]:
+ """Returns all mandates"""
+ return self.db.getRecordset("mandates")
+
+ def getMandate(self, mandateId: int) -> Optional[Dict[str, Any]]:
+ """Returns a mandate by its ID"""
+ mandates = self.db.getRecordset("mandates", recordFilter={"id": mandateId})
+ if mandates:
+ return mandates[0]
+ return None
+
+ def createMandate(self, name: str, language: str = "de") -> Dict[str, Any]:
+ """Creates a new mandate"""
+ mandateData = {
+ "name": name,
+ "language": language
+ }
+
+ return self.db.recordCreate("mandates", mandateData)
+
+ # User methods
+
+ def getAllUsers(self) -> List[Dict[str, Any]]:
+ """Returns all users"""
+ users = self.db.getRecordset("users")
+ # Remove password hashes from the response
+ for user in users:
+ if "hashedPassword" in user:
+ del user["hashedPassword"]
+ return users
+
+ def getUsersByMandate(self, mandateId: int) -> List[Dict[str, Any]]:
+ """
+ Returns all users of a specific mandate
+
+ Args:
+ mandateId: The ID of the mandate
+
+ Returns:
+ List[Dict[str, Any]]: List of users in the mandate
+ """
+ users = self.db.getRecordset("users", recordFilter={"mandateId": mandateId})
+ # Remove password hashes from the response
+ for user in users:
+ if "hashedPassword" in user:
+ del user["hashedPassword"]
+ return users
+
+ def getUserByUsername(self, username: str) -> Optional[Dict[str, Any]]:
+ """Returns a user by username"""
+ users = self.db.getRecordset("users")
+ for user in users:
+ if user.get("username") == username:
+ return user
+ return None
+
+ def getUser(self, userId: int) -> Optional[Dict[str, Any]]:
+ """Returns a user by ID"""
+ users = self.db.getRecordset("users", recordFilter={"id": userId})
+ if users:
+ user = users[0]
+ # Remove password hash from the API response
+ if "hashedPassword" in user:
+ userCopy = user.copy()
+ del userCopy["hashedPassword"]
+ return userCopy
+ return user
+ return None
+
+ def authenticateUser(self, username: str, password: str) -> Optional[Dict[str, Any]]:
+ """
+ Authenticates a user by username and password
+
+ Args:
+ username: The username
+ password: The password
+
+ Returns:
+ Optional[Dict[str, Any]]: The user data or None if authentication fails
+ """
+ user = self.getUserByUsername(username)
+
+ if not user:
+ return None
+
+ if not self._verifyPassword(password, user.get("hashedPassword", "")):
+ return None
+
+ # Check if the user is disabled
+ if user.get("disabled", False):
+ return None
+
+ # Create a copy without password hash
+ authenticatedUser = {**user}
+ if "hashedPassword" in authenticatedUser:
+ del authenticatedUser["hashedPassword"]
+
+ return authenticatedUser
+
+
+# Singleton factory for GatewayInterface instances per context
+_gatewayInterfaces = {}
+
+def getGatewayInterface(mandateId: int = None, userId: int = None) -> GatewayInterface:
+ """
+ Returns a GatewayInterface instance for the specified context.
+ Reuses existing instances.
+
+ Args:
+ mandateId: ID of the mandate
+ userId: ID of the user
+
+ Returns:
+ GatewayInterface instance
+ """
+ contextKey = f"{mandateId}_{userId}"
+ if contextKey not in _gatewayInterfaces:
+ _gatewayInterfaces[contextKey] = GatewayInterface(mandateId, userId)
+ return _gatewayInterfaces[contextKey]
+
+# Initialize the interface
+getGatewayInterface()
\ No newline at end of file
diff --git a/modules/chat_agent_analyst.py b/modules/agentAnalyst.py
similarity index 56%
rename from modules/chat_agent_analyst.py
rename to modules/agentAnalyst.py
index e2c9cb98..181dba42 100644
--- a/modules/chat_agent_analyst.py
+++ b/modules/agentAnalyst.py
@@ -12,7 +12,7 @@ import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
-from modules.chat_registry import AgentBase
+from modules.workflowAgentsRegistry import AgentBase
logger = logging.getLogger(__name__)
@@ -25,26 +25,26 @@ class AgentAnalyst(AgentBase):
self.name = "analyst"
self.description = "Analyzes data using AI-powered insights and visualizations, produce diagrams and visualizations"
self.capabilities = [
- "data_analysis",
+ "dataAnalysis",
"statistics",
"visualization",
- "data_interpretation",
- "report_generation"
+ "dataInterpretation",
+ "reportGeneration"
]
# Set default visualization settings
plt.style.use('seaborn-v0_8-whitegrid')
- def set_dependencies(self, mydom=None):
+ def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
- async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
+ async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to generate them.
Args:
- task: Task dictionary with prompt, input_documents, output_specifications
+ task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
@@ -52,8 +52,8 @@ class AgentAnalyst(AgentBase):
try:
# Extract task information
prompt = task.get("prompt", "")
- input_documents = task.get("input_documents", [])
- output_specs = task.get("output_specifications", [])
+ inputDocuments = task.get("inputDocuments", [])
+ outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.mydom:
@@ -62,52 +62,52 @@ class AgentAnalyst(AgentBase):
"documents": []
}
- # Extract data from documents - focusing only on data_extracted
- datasets, document_context = self._extract_data(input_documents)
+ # Extract data from documents - focusing only on dataExtracted
+ datasets, documentContext = self._extractData(inputDocuments)
# Generate task analysis to understand what's needed
- analysis_plan = await self._analyze_task(prompt, document_context, datasets, output_specs)
+ analysisPlan = await self._analyzeTask(prompt, documentContext, datasets, outputSpecs)
# Generate all required output documents
documents = []
# If no output specs provided, create default analysis outputs
- if not output_specs:
- output_specs = []
+ if not outputSpecs:
+ outputSpecs = []
# Process each output specification
- for spec in output_specs:
- output_label = spec.get("label", "")
- output_description = spec.get("description", "")
+ for spec in outputSpecs:
+ outputLabel = spec.get("label", "")
+ outputDescription = spec.get("description", "")
# Determine type based on file extension
- output_type = output_label.split('.')[-1].lower() if '.' in output_label else "txt"
+ outputType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"
# Generate appropriate content based on output type
- if output_type in ['png', 'jpg', 'jpeg', 'svg']:
+ if outputType in ['png', 'jpg', 'jpeg', 'svg']:
# Create visualization
- document = await self._create_visualization(
- datasets, prompt, output_label, analysis_plan, output_description
+ document = await self._createVisualization(
+ datasets, prompt, outputLabel, analysisPlan, outputDescription
)
documents.append(document)
- elif output_type in ['csv', 'json', 'xlsx']:
+ elif outputType in ['csv', 'json', 'xlsx']:
# Create data document
- document = await self._create_data_document(
- datasets, prompt, output_label, analysis_plan, output_description
+ document = await self._createDataDocument(
+ datasets, prompt, outputLabel, analysisPlan, outputDescription
)
documents.append(document)
else:
# Create text document (report, analysis, etc.)
- document = await self._create_text_document(
- datasets, document_context, prompt, output_label,
- output_type, analysis_plan, output_description
+ document = await self._createTextDocument(
+ datasets, documentContext, prompt, outputLabel,
+ outputType, analysisPlan, outputDescription
)
documents.append(document)
# Generate feedback
- feedback = f"{analysis_plan.get('analysis_approach')}"
- if analysis_plan.get("key_insights"):
- feedback += f"\n\n{analysis_plan.get('key_insights')}"
+ feedback = f"{analysisPlan.get('analysisApproach')}"
+ if analysisPlan.get("keyInsights"):
+ feedback += f"\n\n{analysisPlan.get('keyInsights')}"
return {
"feedback": feedback,
@@ -121,9 +121,9 @@ class AgentAnalyst(AgentBase):
"documents": []
}
- def _extract_data(self, documents: List[Dict[str, Any]]) -> tuple:
+ def _extractData(self, documents: List[Dict[str, Any]]) -> tuple:
"""
- Extract data from documents, focusing on data_extracted fields.
+ Extract data from documents, focusing on dataExtracted fields.
Args:
documents: List of input documents
@@ -132,70 +132,70 @@ class AgentAnalyst(AgentBase):
Tuple of (datasets dictionary, document context text)
"""
datasets = {}
- document_context = ""
+ documentContext = ""
# Process each document
for doc in documents:
- doc_name = doc.get("name", "unnamed")
+ docName = doc.get("name", "unnamed")
if doc.get("ext"):
- doc_name = f"{doc_name}.{doc.get('ext')}"
+ docName = f"{docName}.{doc.get('ext')}"
- document_context += f"\n\n--- {doc_name} ---\n"
+ documentContext += f"\n\n--- {docName} ---\n"
# Process contents
for content in doc.get("contents", []):
- # Focus only on data_extracted
- if content.get("data_extracted"):
- extracted_text = content.get("data_extracted", "")
- document_context += extracted_text
+ # Focus only on dataExtracted
+ if content.get("dataExtracted"):
+ extractedText = content.get("dataExtracted", "")
+ documentContext += extractedText
# Try to parse as structured data if appropriate
- if doc_name.lower().endswith(('.csv', '.tsv')):
+ if docName.lower().endswith(('.csv', '.tsv')):
try:
- df = pd.read_csv(io.StringIO(extracted_text))
- datasets[doc_name] = df
+ df = pd.read_csv(io.StringIO(extractedText))
+ datasets[docName] = df
except:
pass
- elif doc_name.lower().endswith('.json'):
+ elif docName.lower().endswith('.json'):
try:
- json_data = json.loads(extracted_text)
- if isinstance(json_data, list):
- df = pd.DataFrame(json_data)
- datasets[doc_name] = df
- elif isinstance(json_data, dict):
+ jsonData = json.loads(extractedText)
+ if isinstance(jsonData, list):
+ df = pd.DataFrame(jsonData)
+ datasets[docName] = df
+ elif isinstance(jsonData, dict):
# Handle nested JSON structures
- if any(isinstance(v, list) for v in json_data.values()):
- for key, value in json_data.items():
+ if any(isinstance(v, list) for v in jsonData.values()):
+ for key, value in jsonData.items():
if isinstance(value, list) and len(value) > 0:
df = pd.DataFrame(value)
- datasets[f"{doc_name}:{key}"] = df
+ datasets[f"{docName}:{key}"] = df
else:
- df = pd.DataFrame([json_data])
- datasets[doc_name] = df
+ df = pd.DataFrame([jsonData])
+ datasets[docName] = df
except:
pass
# Try to detect tabular data in text content
- if doc_name not in datasets and len(extracted_text.splitlines()) > 2:
- lines = extracted_text.splitlines()
+ if docName not in datasets and len(extractedText.splitlines()) > 2:
+ lines = extractedText.splitlines()
if any(',' in line for line in lines[:5]):
try:
- df = pd.read_csv(io.StringIO(extracted_text))
+ df = pd.read_csv(io.StringIO(extractedText))
if len(df.columns) > 1:
- datasets[doc_name] = df
+ datasets[docName] = df
except:
pass
elif any('\t' in line for line in lines[:5]):
try:
- df = pd.read_csv(io.StringIO(extracted_text), sep='\t')
+ df = pd.read_csv(io.StringIO(extractedText), sep='\t')
if len(df.columns) > 1:
- datasets[doc_name] = df
+ datasets[docName] = df
except:
pass
- return datasets, document_context
+ return datasets, documentContext
- async def _analyze_task(self, prompt: str, context: str, datasets: Dict, output_specs: List) -> Dict:
+ async def _analyzeTask(self, prompt: str, context: str, datasets: Dict, outputSpecs: List) -> Dict:
"""
Use AI to analyze the task and create a plan for analysis.
@@ -203,106 +203,106 @@ class AgentAnalyst(AgentBase):
prompt: The task prompt
context: Document context text
datasets: Dictionary of extracted datasets
- output_specs: Output specifications
+ outputSpecs: Output specifications
Returns:
Analysis plan dictionary
"""
# Prepare dataset information
- dataset_info = {}
+ datasetInfo = {}
for name, df in datasets.items():
try:
- dataset_info[name] = {
+ datasetInfo[name] = {
"shape": df.shape,
"columns": df.columns.tolist(),
"dtypes": {col: str(df[col].dtype) for col in df.columns},
"sample": df.head(3).to_dict(orient='records')
}
except:
- dataset_info[name] = {"error": "Could not process dataset"}
+ datasetInfo[name] = {"error": "Could not process dataset"}
- analysis_prompt = f"""
+ analysisPrompt = f"""
Analyze this data analysis task and create a plan.
TASK: {prompt}
AVAILABLE DATA:
- {json.dumps(dataset_info, indent=2)}
+ {json.dumps(datasetInfo, indent=2)}
DOCUMENT CONTEXT:
{context[:1000]}... (truncated)
OUTPUT REQUIREMENTS:
- {json.dumps(output_specs, indent=2)}
+ {json.dumps(outputSpecs, indent=2)}
Create a detailed analysis plan in JSON format with the following structure:
{{
- "analysis_type": "statistical|trend|comparative|predictive|cluster|general",
- "key_questions": ["question1", "question2"],
- "recommended_visualizations": [{{
+ "analysisType": "statistical|trend|comparative|predictive|cluster|general",
+ "keyQuestions": ["question1", "question2"],
+ "recommendedVisualizations": [{{
"type": "chart_type",
- "data_source": "dataset_name",
+ "dataSource": "dataset_name",
"variables": ["col1", "col2"],
"purpose": "explanation"
}}],
- "key_insights": "brief summary of initial insights",
- "analysis_approach": "brief description of recommended approach"
+ "keyInsights": "brief summary of initial insights",
+ "analysisApproach": "brief description of recommended approach"
}}
Only return valid JSON. No preamble or explanations.
"""
try:
- response = await self.mydom.call_ai([
+ response = await self.mydom.callAi([
{"role": "system", "content": "You are a data analysis expert. Respond with valid JSON only."},
- {"role": "user", "content": analysis_prompt}
- ], produce_user_answer = True)
+ {"role": "user", "content": analysisPrompt}
+ ], produceUserAnswer = True)
# Extract JSON from response
- json_start = response.find('{')
- json_end = response.rfind('}') + 1
+ jsonStart = response.find('{')
+ jsonEnd = response.rfind('}') + 1
- if json_start >= 0 and json_end > json_start:
- plan = json.loads(response[json_start:json_end])
+ if jsonStart >= 0 and jsonEnd > jsonStart:
+ plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback if JSON not found
return {
- "analysis_type": "general",
- "key_questions": ["What insights can be extracted from this data?"],
- "recommended_visualizations": [],
- "key_insights": "Analysis plan could not be created",
- "analysis_approach": "General exploratory analysis"
+ "analysisType": "general",
+ "keyQuestions": ["What insights can be extracted from this data?"],
+ "recommendedVisualizations": [],
+ "keyInsights": "Analysis plan could not be created",
+ "analysisApproach": "General exploratory analysis"
}
except Exception as e:
logger.warning(f"Error creating analysis plan: {str(e)}")
return {
- "analysis_type": "general",
- "key_questions": ["What insights can be extracted from this data?"],
- "recommended_visualizations": [],
- "key_insights": "Analysis plan could not be created",
- "analysis_approach": "General exploratory analysis"
+ "analysisType": "general",
+ "keyQuestions": ["What insights can be extracted from this data?"],
+ "recommendedVisualizations": [],
+ "keyInsights": "Analysis plan could not be created",
+ "analysisApproach": "General exploratory analysis"
}
- async def _create_visualization(self, datasets: Dict, prompt: str, output_label: str,
- analysis_plan: Dict, description: str) -> Dict:
+ async def _createVisualization(self, datasets: Dict, prompt: str, outputLabel: str,
+ analysisPlan: Dict, description: str) -> Dict:
"""
Create visualization document using AI guidance.
Args:
datasets: Dictionary of datasets
prompt: Original task prompt
- output_label: Output filename
- analysis_plan: Analysis plan from AI
+ outputLabel: Output filename
+ analysisPlan: Analysis plan from AI
description: Output description
Returns:
Visualization document
"""
# Determine format from filename
- format_type = output_label.split('.')[-1].lower()
- if format_type not in ['png', 'jpg', 'jpeg', 'svg']:
- format_type = 'png'
+ formatType = outputLabel.split('.')[-1].lower()
+ if formatType not in ['png', 'jpg', 'jpeg', 'svg']:
+ formatType = 'png'
# If no datasets available, create error message image
if not datasets:
@@ -310,58 +310,58 @@ class AgentAnalyst(AgentBase):
plt.text(0.5, 0.5, "No data available for visualization",
ha='center', va='center', fontsize=14)
plt.tight_layout()
- img_data = self._get_image_base64(format_type)
+ imgData = self._getImageBase64(formatType)
plt.close()
return {
- "label": output_label,
- "content": img_data,
+ "label": outputLabel,
+ "content": imgData,
"metadata": {
- "content_type": f"image/{format_type}"
+ "contentType": f"image/{formatType}"
}
}
# Get recommended visualization from plan
- recommended_viz = analysis_plan.get("recommended_visualizations", [])
+ recommendedViz = analysisPlan.get("recommendedVisualizations", [])
# Prepare dataset info for the first dataset if none specified
- if not recommended_viz and datasets:
+ if not recommendedViz and datasets:
name, df = next(iter(datasets.items()))
- recommended_viz = [{
+ recommendedViz = [{
"type": "auto",
- "data_source": name,
+ "dataSource": name,
"variables": df.columns.tolist()[:5],
"purpose": "general analysis"
}]
# Create visualization code prompt
- viz_prompt = f"""
+ vizPrompt = f"""
Generate Python matplotlib/seaborn code to create a visualization for:
TASK: {prompt}
VISUALIZATION REQUIREMENTS:
- - Output format: {format_type}
- - Filename: {output_label}
+ - Output format: {formatType}
+ - Filename: {outputLabel}
- Description: {description}
RECOMMENDED VISUALIZATION:
- {json.dumps(recommended_viz, indent=2)}
+ {json.dumps(recommendedViz, indent=2)}
AVAILABLE DATASETS:
"""
# Add dataset info for recommended sources
- for viz in recommended_viz:
- data_source = viz.get("data_source")
- if data_source in datasets:
- df = datasets[data_source]
- viz_prompt += f"\nDataset '{data_source}':\n"
- viz_prompt += f"- Shape: {df.shape}\n"
- viz_prompt += f"- Columns: {df.columns.tolist()}\n"
- viz_prompt += f"- Sample data: {df.head(3).to_dict(orient='records')}\n"
+ for viz in recommendedViz:
+ dataSource = viz.get("dataSource")
+ if dataSource in datasets:
+ df = datasets[dataSource]
+ vizPrompt += f"\nDataset '{dataSource}':\n"
+ vizPrompt += f"- Shape: {df.shape}\n"
+ vizPrompt += f"- Columns: {df.columns.tolist()}\n"
+ vizPrompt += f"- Sample data: {df.head(3).to_dict(orient='records')}\n"
- viz_prompt += """
+ vizPrompt += """
Generate ONLY Python code that:
1. Uses matplotlib and/or seaborn to create a clear visualization
2. Sets figure size to (10, 6)
@@ -374,19 +374,19 @@ class AgentAnalyst(AgentBase):
try:
# Get visualization code from AI
- viz_code = await self.mydom.call_ai([
+ vizCode = await self.mydom.callAi([
{"role": "system", "content": "You are a data visualization expert. Provide only executable Python code."},
- {"role": "user", "content": viz_prompt}
- ], produce_user_answer = True)
+ {"role": "user", "content": vizPrompt}
+ ], produceUserAnswer = True)
# Clean code
- viz_code = viz_code.replace("```python", "").replace("```", "").strip()
+ vizCode = vizCode.replace("```python", "").replace("```", "").strip()
# Execute visualization code
plt.figure(figsize=(10, 6))
# Make local variables available to the code
- local_vars = {
+ localVars = {
"plt": plt,
"sns": sns,
"pd": pd,
@@ -396,27 +396,27 @@ class AgentAnalyst(AgentBase):
# Add datasets to local variables
for name, df in datasets.items():
# Create a sanitized variable name
- var_name = ''.join(c if c.isalnum() else '_' for c in name)
- local_vars[var_name] = df
+ varName = ''.join(c if c.isalnum() else '_' for c in name)
+ localVars[varName] = df
# Also add with standard names for simpler code
- if "df" not in local_vars:
- local_vars["df"] = df
- elif "df2" not in local_vars:
- local_vars["df2"] = df
+ if "df" not in localVars:
+ localVars["df"] = df
+ elif "df2" not in localVars:
+ localVars["df2"] = df
# Execute the visualization code
- exec(viz_code, globals(), local_vars)
+ exec(vizCode, globals(), localVars)
# Capture the image
- img_data = self._get_image_base64(format_type)
+ imgData = self._getImageBase64(formatType)
plt.close()
return {
- "label": output_label,
- "content": img_data,
+ "label": outputLabel,
+ "content": imgData,
"metadata": {
- "content_type": f"image/{format_type}"
+ "contentType": f"image/{formatType}"
}
}
@@ -428,70 +428,70 @@ class AgentAnalyst(AgentBase):
plt.text(0.5, 0.5, f"Visualization error: {str(e)}",
ha='center', va='center', fontsize=12)
plt.tight_layout()
- img_data = self._get_image_base64(format_type)
+ imgData = self._getImageBase64(formatType)
plt.close()
return {
- "label": output_label,
- "content": img_data,
+ "label": outputLabel,
+ "content": imgData,
"metadata": {
- "content_type": f"image/{format_type}"
+ "contentType": f"image/{formatType}"
}
}
- async def _create_data_document(self, datasets: Dict, prompt: str, output_label: str,
- analysis_plan: Dict, description: str) -> Dict:
+ async def _createDataDocument(self, datasets: Dict, prompt: str, outputLabel: str,
+ analysisPlan: Dict, description: str) -> Dict:
"""
Create a data document (e.g., CSV, JSON) based on analysis.
Args:
datasets: Dictionary of datasets
prompt: Original task prompt
- output_label: Output filename
- analysis_plan: Analysis plan from AI
+ outputLabel: Output filename
+ analysisPlan: Analysis plan from AI
description: Output description
Returns:
Data document
"""
# Determine format from filename
- format_type = output_label.split('.')[-1].lower()
+ formatType = outputLabel.split('.')[-1].lower()
# If no datasets available, return error message
if not datasets:
return {
- "label": output_label,
- "content": f"No data available for processing into {format_type} format.",
+ "label": outputLabel,
+ "content": f"No data available for processing into {formatType} format.",
"metadata": {
- "content_type": "text/plain"
+ "contentType": "text/plain"
}
}
# Generate data processing instructions
- data_prompt = f"""
- Create Python code to process datasets and generate a {format_type} file for:
+ dataPrompt = f"""
+ Create Python code to process datasets and generate a {formatType} file for:
TASK: {prompt}
OUTPUT REQUIREMENTS:
- - Format: {format_type}
- - Filename: {output_label}
+ - Format: {formatType}
+ - Filename: {outputLabel}
- Description: {description}
ANALYSIS CONTEXT:
- {json.dumps(analysis_plan, indent=2)}
+ {json.dumps(analysisPlan, indent=2)}
AVAILABLE DATASETS:
"""
# Add dataset info
for name, df in datasets.items():
- data_prompt += f"\nDataset '{name}':\n"
- data_prompt += f"- Shape: {df.shape}\n"
- data_prompt += f"- Columns: {df.columns.tolist()}\n"
- data_prompt += f"- Sample data: {df.head(3).to_dict(orient='records')}\n"
+ dataPrompt += f"\nDataset '{name}':\n"
+ dataPrompt += f"- Shape: {df.shape}\n"
+ dataPrompt += f"- Columns: {df.columns.tolist()}\n"
+ dataPrompt += f"- Sample data: {df.head(3).to_dict(orient='records')}\n"
- data_prompt += """
+ dataPrompt += """
Generate Python code that:
1. Processes the available dataset(s)
2. Performs necessary transformations, aggregations, or calculations
@@ -503,46 +503,46 @@ class AgentAnalyst(AgentBase):
try:
# Get data processing code from AI
- data_code = await self.mydom.call_ai([
+ dataCode = await self.mydom.callAi([
{"role": "system", "content": "You are a data processing expert. Provide only executable Python code."},
- {"role": "user", "content": data_prompt}
- ], produce_user_answer = True)
+ {"role": "user", "content": dataPrompt}
+ ], produceUserAnswer = True)
# Clean code
- data_code = data_code.replace("```python", "").replace("```", "").strip()
+ dataCode = dataCode.replace("```python", "").replace("```", "").strip()
# Setup execution environment
- local_vars = {"pd": pd, "np": __import__('numpy'), "io": io}
+ localVars = {"pd": pd, "np": __import__('numpy'), "io": io}
# Add datasets to local variables
for name, df in datasets.items():
# Create a sanitized variable name
- var_name = ''.join(c if c.isalnum() else '_' for c in name)
- local_vars[var_name] = df
+ varName = ''.join(c if c.isalnum() else '_' for c in name)
+ localVars[varName] = df
# Also add with standard names for simpler code
- if "df" not in local_vars:
- local_vars["df"] = df
- elif "df2" not in local_vars:
- local_vars["df2"] = df
+ if "df" not in localVars:
+ localVars["df"] = df
+ elif "df2" not in localVars:
+ localVars["df2"] = df
# Execute the code
- exec(data_code, globals(), local_vars)
+ exec(dataCode, globals(), localVars)
# Get the result
- result = local_vars.get("result", "No output was generated.")
+ result = localVars.get("result", "No output was generated.")
# Determine content type
- content_type = "text/csv" if format_type == "csv" else \
- "application/json" if format_type == "json" else \
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if format_type == "xlsx" else \
+ contentType = "text/csv" if formatType == "csv" else \
+ "application/json" if formatType == "json" else \
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if formatType == "xlsx" else \
"text/plain"
return {
- "label": output_label,
+ "label": outputLabel,
"content": result,
"metadata": {
- "content_type": content_type
+ "contentType": contentType
}
}
@@ -550,16 +550,16 @@ class AgentAnalyst(AgentBase):
logger.error(f"Error creating data document: {str(e)}", exc_info=True)
return {
- "label": output_label,
- "content": f"Error generating {format_type} document: {str(e)}",
+ "label": outputLabel,
+ "content": f"Error generating {formatType} document: {str(e)}",
"metadata": {
- "content_type": "text/plain"
+ "contentType": "text/plain"
}
}
- async def _create_text_document(self, datasets: Dict, context: str, prompt: str,
- output_label: str, format_type: str,
- analysis_plan: Dict, description: str) -> Dict:
+ async def _createTextDocument(self, datasets: Dict, context: str, prompt: str,
+ outputLabel: str, formatType: str,
+ analysisPlan: Dict, description: str) -> Dict:
"""
Create a text document (report, analysis, etc.) based on analysis.
@@ -567,52 +567,52 @@ class AgentAnalyst(AgentBase):
datasets: Dictionary of datasets
context: Document context text
prompt: Original task prompt
- output_label: Output filename
- format_type: Output format type
- analysis_plan: Analysis plan from AI
+ outputLabel: Output filename
+ formatType: Output format type
+ analysisPlan: Analysis plan from AI
description: Output description
Returns:
Text document
"""
# Create dataset summaries
- dataset_summaries = []
+ datasetSummaries = []
for name, df in datasets.items():
summary = f"Dataset: {name}\n"
summary += f"- Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
summary += f"- Columns: {', '.join(df.columns.tolist())}\n"
# Basic statistics for numeric columns
- numeric_cols = df.select_dtypes(include=['number']).columns
- if len(numeric_cols) > 0:
+ numericCols = df.select_dtypes(include=['number']).columns
+ if len(numericCols) > 0:
summary += "- Numeric Columns Stats:\n"
- for col in numeric_cols[:3]: # Limit to first 3
+ for col in numericCols[:3]: # Limit to first 3
stats = df[col].describe()
summary += f" - {col}: min={stats['min']:.2f}, max={stats['max']:.2f}, mean={stats['mean']:.2f}\n"
- dataset_summaries.append(summary)
+ datasetSummaries.append(summary)
# Determine content type based on format
- content_type = "text/markdown" if format_type in ["md", "markdown"] else \
- "text/html" if format_type == "html" else \
+ contentType = "text/markdown" if formatType in ["md", "markdown"] else \
+ "text/html" if formatType == "html" else \
"text/plain"
# Generate analysis prompt
- analysis_prompt = f"""
- Create a detailed {format_type} document for:
+ analysisPrompt = f"""
+ Create a detailed {formatType} document for:
TASK: {prompt}
OUTPUT REQUIREMENTS:
- - Format: {format_type}
- - Filename: {output_label}
+ - Format: {formatType}
+ - Filename: {outputLabel}
- Description: {description}
ANALYSIS CONTEXT:
- {json.dumps(analysis_plan, indent=2)}
+ {json.dumps(analysisPlan, indent=2)}
DATASET SUMMARIES:
- {"".join(dataset_summaries)}
+ {"".join(datasetSummaries)}
DOCUMENT CONTEXT:
{context[:2000]}... (truncated)
@@ -629,22 +629,22 @@ class AgentAnalyst(AgentBase):
try:
# Get document content from AI
- document_content = await self.mydom.call_ai([
- {"role": "system", "content": f"You are a data analysis expert creating a {format_type} document."},
- {"role": "user", "content": analysis_prompt}
- ], produce_user_answer = True)
+ documentContent = await self.mydom.callAi([
+ {"role": "system", "content": f"You are a data analysis expert creating a {formatType} document."},
+ {"role": "user", "content": analysisPrompt}
+ ], produceUserAnswer = True)
# Clean HTML or Markdown if needed
- if format_type in ["md", "markdown"] and not document_content.strip().startswith("#"):
- document_content = f"# Analysis Report\n\n{document_content}"
- elif format_type == "html" and not "{document_content}"
+ if formatType in ["md", "markdown"] and not documentContent.strip().startswith("#"):
+ documentContent = f"# Analysis Report\n\n{documentContent}"
+ elif formatType == "html" and not "{documentContent}"
return {
- "label": output_label,
- "content": document_content,
+ "label": outputLabel,
+ "content": documentContent,
"metadata": {
- "content_type": content_type
+ "contentType": contentType
}
}
@@ -652,43 +652,43 @@ class AgentAnalyst(AgentBase):
logger.error(f"Error creating text document: {str(e)}", exc_info=True)
# Create a simple error document
- if format_type in ["md", "markdown"]:
+ if formatType in ["md", "markdown"]:
content = f"# Error in Analysis\n\nThere was an error generating the analysis: {str(e)}"
- elif format_type == "html":
+ elif formatType == "html":
content = f"
Error in Analysis
There was an error generating the analysis: {str(e)}
"
else:
content = f"Error in Analysis\n\nThere was an error generating the analysis: {str(e)}"
return {
- "label": output_label,
+ "label": outputLabel,
"content": content,
"metadata": {
- "content_type": content_type
+ "contentType": contentType
}
}
- def _get_image_base64(self, format_type: str = 'png') -> str:
+ def _getImageBase64(self, formatType: str = 'png') -> str:
"""
Convert current matplotlib figure to base64 string.
Args:
- format_type: Image format
+ formatType: Image format
Returns:
Base64 encoded string of the image
"""
buffer = io.BytesIO()
- plt.savefig(buffer, format=format_type, dpi=100)
+ plt.savefig(buffer, format=formatType, dpi=100)
buffer.seek(0)
- image_data = buffer.getvalue()
+ imageData = buffer.getvalue()
buffer.close()
# Convert to base64
- image_base64 = base64.b64encode(image_data).decode('utf-8')
- return image_base64
+ imageBase64 = base64.b64encode(imageData).decode('utf-8')
+ return imageBase64
# Factory function for the Analyst agent
-def get_analyst_agent():
+def getAgentAnalyst():
"""Returns an instance of the Analyst agent."""
return AgentAnalyst()
\ No newline at end of file
diff --git a/modules/chat_agent_coder.py b/modules/agentCoder.py
similarity index 61%
rename from modules/chat_agent_coder.py
rename to modules/agentCoder.py
index 8070fc8f..215c37cf 100644
--- a/modules/chat_agent_coder.py
+++ b/modules/agentCoder.py
@@ -11,7 +11,7 @@ import shutil
import sys
from typing import Dict, Any, List, Tuple
-from modules.chat_registry import AgentBase
+from modules.workflowAgentsRegistry import AgentBase
from modules.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
@@ -33,30 +33,30 @@ class AgentCoder(AgentBase):
]
# Executor settings
- self.executor_timeout = int(APP_CONFIG.get("Agent_Coder_EXECUTION_TIMEOUT")) # seconds
- self.execution_retry_limit = int(APP_CONFIG.get("Agent_Coder_EXECUTION_RETRY")) # max retries
- self.temp_dir = None
+ self.executorTimeout = int(APP_CONFIG.get("Agent_Coder_EXECUTION_TIMEOUT")) # seconds
+ self.executionRetryLimit = int(APP_CONFIG.get("Agent_Coder_EXECUTION_RETRY")) # max retries
+ self.tempDir = None
- def set_dependencies(self, mydom=None):
+ def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
- async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
+ async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task and perform code development/execution.
First checks if the task can be completed without code execution,
then falls back to code generation if needed.
Args:
- task: Task dictionary with prompt, input_documents, output_specifications
+ task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
# 1. Extract task information
prompt = task.get("prompt", "")
- input_documents = task.get("input_documents", [])
- output_specs = task.get("output_specifications", [])
+ inputDocuments = task.get("inputDocuments", [])
+ outputSpecs = task.get("outputSpecifications", [])
# Check if AI service is available
if not self.mydom:
@@ -67,59 +67,59 @@ class AgentCoder(AgentBase):
}
# 2. Extract data from documents in separate categories
- document_data = [] # For raw file data (for code execution)
- content_data = [] # For content data (later use)
- content_extraction = [] # For AI-extracted data (for quick completion)
+ documentData = [] # For raw file data (for code execution)
+ contentData = [] # For content data (later use)
+ contentExtraction = [] # For AI-extracted data (for quick completion)
- for doc in input_documents:
+ for doc in inputDocuments:
# Create proper filename from name and ext
filename = f"{doc.get('name')}.{doc.get('ext')}" if doc.get('ext') else doc.get('name')
- # Add main document data to document_data if it exists
- doc_data = doc.get('data', '')
- if doc_data:
- is_base64 = True # Assume base64 encoded for document data
- document_data.append([filename, doc_data, is_base64])
+ # Add main document data to documentData if it exists
+ docData = doc.get('data', '')
+ if docData:
+ isBase64 = True # Assume base64 encoded for document data
+ documentData.append([filename, docData, isBase64])
# Process contents for different uses
if doc.get('contents'):
for content in doc.get('contents', []):
- content_name = content.get('name', 'unnamed')
+ contentName = content.get('name', 'unnamed')
# For AI-extracted data (quick completion)
- if content.get('data_extracted'):
- content_extraction.append({
+ if content.get('dataExtracted'):
+ contentExtraction.append({
"filename": filename,
- "content_name": content_name,
- "content_data": content.get('data_extracted', ''),
- "content_type": content.get('content_type', ''),
+ "contentName": contentName,
+ "contentData": content.get('dataExtracted', ''),
+ "contentType": content.get('contentType', ''),
"summary": content.get('summary', '')
})
# For raw content data
if content.get('data'):
- raw_data = content.get('data', '')
- is_base64 = content.get('metadata', {}).get('base64_encoded', False)
- content_data.append({
+ rawData = content.get('data', '')
+ isBase64 = content.get('metadata', {}).get('base64Encoded', False)
+ contentData.append({
"filename": filename,
- "content_name": content_name,
- "data": raw_data,
- "is_base64": is_base64,
- "content_type": content.get('content_type', '')
+ "contentName": contentName,
+ "data": rawData,
+ "isBase64": isBase64,
+ "contentType": content.get('contentType', '')
})
- # Also add to document_data for code execution if not already added
- if not doc_data or doc_data != raw_data:
- document_data.append([filename, raw_data, is_base64])
+ # Also add to documentData for code execution if not already added
+ if not docData or docData != rawData:
+ documentData.append([filename, rawData, isBase64])
# 3. Check if task can be completed without code execution
- quick_completion = await self._check_quick_completion(prompt, content_extraction, output_specs)
+ quickCompletion = await self._checkQuickCompletion(prompt, contentExtraction, outputSpecs)
- if quick_completion and quick_completion.get("complete") == 1:
+ if quickCompletion and quickCompletion.get("complete") == 1:
logger.info("Task completed without code execution")
return {
- "feedback": quick_completion.get("prompt", "Task completed successfully."),
- "documents": quick_completion.get("documents", [])
+ "feedback": quickCompletion.get("prompt", "Task completed successfully."),
+ "documents": quickCompletion.get("documents", [])
}
else:
logger.debug(f"Code to generate, no quick check")
@@ -128,7 +128,7 @@ class AgentCoder(AgentBase):
logger.info("Generating code to solve the task")
# 4. Generate code using AI
- code, requirements = await self._generate_code(prompt)
+ code, requirements = await self._generateCode(prompt)
if not code:
return {
@@ -136,53 +136,53 @@ class AgentCoder(AgentBase):
"documents": []
}
- # 5. Replace the placeholder with actual input_files data
- document_data_json = repr(document_data)
- code_with_data = code.replace("input_files = \"=== JSONLOAD ===\"", f"input_files = {document_data_json}")
+ # 5. Replace the placeholder with actual inputFiles data
+ documentDataJson = repr(documentData)
+ codeWithData = code.replace("inputFiles = \"=== JSONLOAD ===\"", f"inputFiles = {documentDataJson}")
# 6. Execute code with retry logic
- retry_count = 0
- max_retries = self.execution_retry_limit
- execution_history = []
+ retryCount = 0
+ maxRetries = self.executionRetryLimit
+ executionHistory = []
- while retry_count <= max_retries:
- execution_result = self._execute_code(code_with_data, requirements)
- execution_history.append({
- "attempt": retry_count + 1,
- "code": code_with_data,
- "result": execution_result
+ while retryCount <= maxRetries:
+ executionResult = self._executeCode(codeWithData, requirements)
+ executionHistory.append({
+ "attempt": retryCount + 1,
+ "code": codeWithData,
+ "result": executionResult
})
# Check if execution was successful
- if execution_result.get("success", False):
- logger.info(f"Code execution succeeded on attempt {retry_count + 1}")
+ if executionResult.get("success", False):
+ logger.info(f"Code execution succeeded on attempt {retryCount + 1}")
break
# If we've reached max retries, exit the loop
- if retry_count >= max_retries:
- logger.info(f"Reached maximum retry limit ({max_retries}). Giving up.")
+ if retryCount >= maxRetries:
+ logger.info(f"Reached maximum retry limit ({maxRetries}). Giving up.")
break
# Log the error and attempt to improve the code
- error = execution_result.get("error", "Unknown error")
- logger.info(f"Execution attempt {retry_count + 1} failed: {error}. Attempting to improve code.")
+ error = executionResult.get("error", "Unknown error")
+ logger.info(f"Execution attempt {retryCount + 1} failed: {error}. Attempting to improve code.")
# Generate improved code based on error
- improved_code, improved_requirements = await self._improve_code(
- original_code=code_with_data,
+ improvedCode, improvedRequirements = await self._improveCode(
+ originalCode=codeWithData,
error=error,
- execution_result=execution_result,
- attempt=retry_count + 1
+ executionResult=executionResult,
+ attempt=retryCount + 1
)
- if improved_code:
- code_with_data = improved_code
- requirements = improved_requirements
- logger.info(f"Code improved for retry {retry_count + 2}")
+ if improvedCode:
+ codeWithData = improvedCode
+ requirements = improvedRequirements
+ logger.info(f"Code improved for retry {retryCount + 2}")
else:
logger.warning("Failed to improve code, using original code for retry")
- retry_count += 1
+ retryCount += 1
# 7. Process results and create output documents
documents = []
@@ -190,32 +190,32 @@ class AgentCoder(AgentBase):
# Always add the final code document
documents.append({
"label": "generated_code.py",
- "content": code_with_data
+ "content": codeWithData
})
# Add execution history document
- execution_history_str = json.dumps(execution_history, indent=2)
+ executionHistoryStr = json.dumps(executionHistory, indent=2)
documents.append({
"label": "execution_history.json",
- "content": execution_history_str
+ "content": executionHistoryStr
})
# Create documents based on execution results
- if execution_result.get("success", False):
- result_data = execution_result.get("result")
+ if executionResult.get("success", False):
+ resultData = executionResult.get("result")
# Create documents based on output specifications
- if output_specs:
- for spec in output_specs:
+ if outputSpecs:
+ for spec in outputSpecs:
label = spec.get("label", "output.txt")
# Extract content from result if available
content = ""
- if isinstance(result_data, dict) and label in result_data:
- content = result_data[label]
+ if isinstance(resultData, dict) and label in resultData:
+ content = resultData[label]
else:
# Default to execution output
- content = execution_result.get("output", "")
+ content = executionResult.get("output", "")
documents.append({
"label": label,
@@ -225,23 +225,23 @@ class AgentCoder(AgentBase):
# No output specs, create default output document
documents.append({
"label": "execution_output.txt",
- "content": execution_result.get("output", "")
+ "content": executionResult.get("output", "")
})
- if retry_count > 0:
- feedback = f"Code executed successfully after {retry_count + 1} attempts. Generated output files based on specifications."
+ if retryCount > 0:
+ feedback = f"Code executed successfully after {retryCount + 1} attempts. Generated output files based on specifications."
else:
feedback = "Code executed successfully. Generated output files based on specifications."
else:
# Execution failed
- error = execution_result.get("error", "Unknown error")
+ error = executionResult.get("error", "Unknown error")
documents.append({
"label": "execution_error.txt",
"content": f"Error executing code:\n\n{error}"
})
- if retry_count > 0:
- feedback = f"Error during code execution after {retry_count + 1} attempts: {error}"
+ if retryCount > 0:
+ feedback = f"Error during code execution after {retryCount + 1} attempts: {error}"
else:
feedback = f"Error during code execution: {error}"
@@ -250,31 +250,31 @@ class AgentCoder(AgentBase):
"documents": documents
}
- async def _improve_code(self, original_code: str, error: str, execution_result: Dict[str, Any], attempt: int) -> Tuple[str, List[str]]:
+ async def _improveCode(self, originalCode: str, error: str, executionResult: Dict[str, Any], attempt: int) -> Tuple[str, List[str]]:
"""
Improve code based on execution error.
Args:
- original_code: The code that failed to execute
+ originalCode: The code that failed to execute
error: The error message
- execution_result: Complete execution result dictionary
+ executionResult: Complete execution result dictionary
attempt: Current attempt number
Returns:
- Tuple of (improved_code, requirements)
+ Tuple of (improvedCode, requirements)
"""
# Create prompt for code improvement
- improvement_prompt = f"""
+ improvementPrompt = f"""
Fix the following Python code that failed during execution. This is attempt {attempt} to fix the code.
ORIGINAL CODE:
-{original_code}
+{originalCode}
ERROR MESSAGE:
{error}
STDOUT:
-{execution_result.get('output', '')}
+{executionResult.get('output', '')}
INSTRUCTIONS:
1. Fix all errors identified in the error message
@@ -284,13 +284,13 @@ INSTRUCTIONS:
- Error handling and edge cases
- Resource management (file handles, etc.)
- Syntax errors and typos
-4. Keep the input_files handling logic intact
+4. Keep the inputFiles handling logic intact
5. Maintain the same overall structure and purpose
OUTPUT:
- Your improved code MUST still define a 'result' variable as a dictionary
- Each output file should be a key in the result dictionary
-- DO NOT remove the input_files assignment line structure
+- DO NOT remove the inputFiles assignment line structure
REQUIREMENTS:
Required packages should be specified as:
@@ -303,66 +303,66 @@ Return ONLY Python code without explanations or markdown.
# Call AI service
messages = [
{"role": "system", "content": "You are an expert Python code debugger. Provide only fixed Python code without explanations or formatting."},
- {"role": "user", "content": improvement_prompt}
+ {"role": "user", "content": improvementPrompt}
]
try:
- improved_content = await self.mydom.call_ai(messages, temperature=0.2)
+ improvedContent = await self.mydom.callAi(messages, temperature=0.2)
# Extract code and requirements
- improved_code = self._clean_code(improved_content)
+ improvedCode = self._cleanCode(improvedContent)
# Extract requirements
requirements = []
- for line in improved_code.split('\n'):
+ for line in improvedCode.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
- req_str = line.replace("# REQUIREMENTS:", "").strip()
- requirements = [r.strip() for r in req_str.split(',') if r.strip()]
+ reqStr = line.replace("# REQUIREMENTS:", "").strip()
+ requirements = [r.strip() for r in reqStr.split(',') if r.strip()]
break
- return improved_code, requirements
+ return improvedCode, requirements
except Exception as e:
logger.error(f"Error improving code: {str(e)}")
return None, []
- async def _check_quick_completion(self, prompt: str, content_extraction: List[Dict], output_specs: List[Dict]) -> Dict:
+ async def _checkQuickCompletion(self, prompt: str, contentExtraction: List[Dict], outputSpecs: List[Dict]) -> Dict:
"""
Check if the task can be completed without writing and executing code.
Args:
prompt: The task prompt
- content_extraction: List of extracted content data with content_name and data_extracted
- output_specs: List of output specifications
+ contentExtraction: List of extracted content data with contentName and dataExtracted
+ outputSpecs: List of output specifications
Returns:
Dictionary with completion status and results, or None if no quick completion
"""
# If no data or no output specs, can't do a quick completion
- if not content_extraction or not output_specs:
+ if not contentExtraction or not outputSpecs:
return None
# Create a prompt for the AI to check if this can be completed directly
- specs_json = json.dumps(output_specs)
- data_json = json.dumps(content_extraction)
+ specsJson = json.dumps(outputSpecs)
+ dataJson = json.dumps(contentExtraction)
- check_prompt = f"""
+ checkPrompt = f"""
Analyze this task and determine if it can be completed directly without writing code.
TASK:
{prompt}
EXTRACTED DATA AVAILABLE:
-{data_json}
+{dataJson}
Each entry in the extracted data contains:
- filename: The source file name
-- content_name: The specific content section name
-- content_data: The AI-extracted text from the content
-- content_type: The type of content (text, csv, etc.)
+- contentName: The specific content section name
+- contentData: The AI-extracted text from the content
+- contentType: The type of content (text, csv, etc.)
- summary: A brief summary of the content
REQUIRED OUTPUT:
-{specs_json}
+{specsJson}
If the task can be completed directly with the available extracted data, respond with:
{{"complete": 1, "prompt": "Brief explanation of the solution", "documents": [
@@ -376,26 +376,26 @@ Only return valid JSON. Your entire response must be parseable as JSON.
"""
# Call AI service
- logger.debug(f"Checking if task can be completed without code execution: {check_prompt}")
+ logger.debug(f"Checking if task can be completed without code execution: {checkPrompt}")
messages = [
{"role": "system", "content": "You are an AI assistant that determines if tasks require code execution. Reply with JSON only."},
- {"role": "user", "content": check_prompt}
+ {"role": "user", "content": checkPrompt}
]
try:
# Use a lower temperature for more deterministic response
- response = await self.mydom.call_ai(messages, produce_user_answer = True, temperature=0.1)
+ response = await self.mydom.callAi(messages, produceUserAnswer = True, temperature=0.1)
# Parse response as JSON
if response:
try:
# Find JSON in response if there's any text around it
- json_start = response.find('{')
- json_end = response.rfind('}') + 1
+ jsonStart = response.find('{')
+ jsonEnd = response.rfind('}') + 1
- if json_start >= 0 and json_end > json_start:
- json_str = response[json_start:json_end]
- result = json.loads(json_str)
+ if jsonStart >= 0 and jsonEnd > jsonStart:
+ jsonStr = response[jsonStart:jsonEnd]
+ result = json.loads(jsonStr)
# Check if this is a proper response
if "complete" in result:
@@ -410,28 +410,27 @@ Only return valid JSON. Your entire response must be parseable as JSON.
# Default to requiring code execution
return None
- async def _generate_code(self, prompt: str) -> Tuple[str, List[str]]:
+ async def _generateCode(self, prompt: str) -> Tuple[str, List[str]]:
"""
- Generate Python code from a prompt with the input_files placeholder.
+ Generate Python code from a prompt with the inputFiles placeholder.
Args:
prompt: The task prompt
- input_files: List of [filename, data, is_base64] items
Returns:
Tuple of (code, requirements)
"""
# Create prompt for code generation
- ai_prompt = f"""
+ aiPrompt = f"""
Generate Python code to solve the following task:
TASK:
{prompt}
INPUT FILES:
-- 'input_files' variable is provided as [[filename, data, is_base64], ...]
-- For text files (is_base64=False): use data directly as string
-- For binary files (is_base64=True): use base64.b64decode(data)
+- 'inputFiles' variable is provided as [[filename, data, isBase64], ...]
+- For text files (isBase64=False): use data directly as string
+- For binary files (isBase64=True): use base64.b64decode(data)
CODE QUALITY:
- Use explicit type conversions where needed (int/float/str)
@@ -446,7 +445,7 @@ OUTPUT:
- For example: result = {{"output.txt": "output text", "results.json": json_string}}
Your code must start with:
-input_files = "=== JSONLOAD ===" # DO NOT CHANGE THIS LINE
+inputFiles = "=== JSONLOAD ===" # DO NOT CHANGE THIS LINE
REQUIREMENTS:
Required packages should be specified as:
@@ -460,25 +459,25 @@ Return ONLY Python code without explanations or markdown.
# Call AI service
messages = [
{"role": "system", "content": "You are a Python code generator. Provide only valid Python code without explanations or formatting."},
- {"role": "user", "content": ai_prompt}
+ {"role": "user", "content": aiPrompt}
]
- generated_content = await self.mydom.call_ai(messages, temperature=0.1)
+ generatedContent = await self.mydom.callAi(messages, temperature=0.1)
# Extract code and requirements
- code = self._clean_code(generated_content)
+ code = self._cleanCode(generatedContent)
# Extract requirements
requirements = []
for line in code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
- req_str = line.replace("# REQUIREMENTS:", "").strip()
- requirements = [r.strip() for r in req_str.split(',') if r.strip()]
+ reqStr = line.replace("# REQUIREMENTS:", "").strip()
+ requirements = [r.strip() for r in reqStr.split(',') if r.strip()]
break
return code, requirements
- def _execute_code(self, code: str, requirements: List[str] = None) -> Dict[str, Any]:
+ def _executeCode(self, code: str, requirements: List[str] = None) -> Dict[str, Any]:
"""
Execute Python code in a virtual environment.
Integrated executor functionality.
@@ -492,24 +491,24 @@ Return ONLY Python code without explanations or markdown.
"""
try:
# 1. Create temp directory and virtual environment
- self.temp_dir = tempfile.mkdtemp(prefix="code_exec_")
- venv_path = os.path.join(self.temp_dir, "venv")
+ self.tempDir = tempfile.mkdtemp(prefix="code_exec_")
+ venvPath = os.path.join(self.tempDir, "venv")
# Create venv
- logger.debug(f"Creating virtual environment at {venv_path}")
- subprocess.run([sys.executable, "-m", "venv", venv_path],
+ logger.debug(f"Creating virtual environment at {venvPath}")
+ subprocess.run([sys.executable, "-m", "venv", venvPath],
check=True, capture_output=True)
# Get Python executable path
- python_exe = os.path.join(venv_path, "Scripts", "python.exe") if os.name == 'nt' else os.path.join(venv_path, "bin", "python")
+ pythonExe = os.path.join(venvPath, "Scripts", "python.exe") if os.name == 'nt' else os.path.join(venvPath, "bin", "python")
# 2. Install requirements if provided
if requirements:
logger.info(f"Installing requirements: {requirements}")
# Create requirements.txt
- req_file = os.path.join(self.temp_dir, "requirements.txt")
- with open(req_file, "w") as f:
+ reqFile = os.path.join(self.tempDir, "requirements.txt")
+ with open(reqFile, "w") as f:
f.write("\n".join(requirements))
x="\n".join(requirements)
@@ -517,38 +516,38 @@ Return ONLY Python code without explanations or markdown.
# Install requirements
try:
- pip_result = subprocess.run(
- [python_exe, "-m", "pip", "install", "-r", req_file],
+ pipResult = subprocess.run(
+ [pythonExe, "-m", "pip", "install", "-r", reqFile],
capture_output=True,
text=True,
timeout=int(APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT"))
)
- if pip_result.returncode != 0:
- logger.debug(f"Error installing requirements: {pip_result.stderr}")
+ if pipResult.returncode != 0:
+ logger.debug(f"Error installing requirements: {pipResult.stderr}")
else:
logger.debug(f"Requirements installed successfully")
# Log installed packages if in debug mode
if logger.isEnabledFor(logging.DEBUG):
- pip_list = subprocess.run(
- [python_exe, "-m", "pip", "list"],
+ pipList = subprocess.run(
+ [pythonExe, "-m", "pip", "list"],
capture_output=True,
text=True
)
- logger.debug(f"Installed packages:\n{pip_list.stdout}")
+ logger.debug(f"Installed packages:\n{pipList.stdout}")
except Exception as e:
logger.debug(f"Exception during requirements installation: {str(e)}")
# 3. Write code to file
- code_file = os.path.join(self.temp_dir, "code.py")
- with open(code_file, "w", encoding="utf-8") as f:
+ codeFile = os.path.join(self.tempDir, "code.py")
+ with open(codeFile, "w", encoding="utf-8") as f:
f.write(code)
# 4. Execute code
- logger.debug(f"Executing code with timeout of {self.executor_timeout} seconds. Code: {code}")
+ logger.debug(f"Executing code with timeout of {self.executorTimeout} seconds. Code: {code}")
process = subprocess.run(
- [python_exe, code_file],
- timeout=self.executor_timeout,
+ [pythonExe, codeFile],
+ timeout=self.executorTimeout,
capture_output=True,
text=True
)
@@ -558,7 +557,7 @@ Return ONLY Python code without explanations or markdown.
stderr = process.stderr
# Try to extract result from stdout
- result_data = None
+ resultData = None
if process.returncode == 0:
try:
# Find the last line that might be JSON
@@ -566,8 +565,8 @@ Return ONLY Python code without explanations or markdown.
line = line.strip()
if line and line[0] in '{[' and line[-1] in '}]':
try:
- result_data = json.loads(line)
- logger.debug(f"Extracted result data from stdout: {type(result_data)}")
+ resultData = json.loads(line)
+ logger.debug(f"Extracted result data from stdout: {type(resultData)}")
break
except json.JSONDecodeError:
continue
@@ -579,18 +578,18 @@ Return ONLY Python code without explanations or markdown.
"success": process.returncode == 0,
"output": stdout,
"error": stderr if process.returncode != 0 else "",
- "result": result_data,
- "exit_code": process.returncode
+ "result": resultData,
+ "exitCode": process.returncode
}
except subprocess.TimeoutExpired:
- logger.error(f"Execution timed out after {self.executor_timeout} seconds")
+ logger.error(f"Execution timed out after {self.executorTimeout} seconds")
return {
"success": False,
"output": "",
- "error": f"Execution timed out after {self.executor_timeout} seconds",
+ "error": f"Execution timed out after {self.executorTimeout} seconds",
"result": None,
- "exit_code": -1
+ "exitCode": -1
}
except Exception as e:
logger.error(f"Execution error: {str(e)}")
@@ -599,44 +598,44 @@ Return ONLY Python code without explanations or markdown.
"output": "",
"error": f"Execution error: {str(e)}",
"result": None,
- "exit_code": -1
+ "exitCode": -1
}
finally:
# Clean up resources
- self._cleanup_execution()
+ self._cleanupExecution()
- def _cleanup_execution(self):
+ def _cleanupExecution(self):
"""Clean up temporary resources from code execution."""
- if self.temp_dir and os.path.exists(self.temp_dir):
+ if self.tempDir and os.path.exists(self.tempDir):
try:
- logger.debug(f"Cleaning up temporary directory: {self.temp_dir}")
- shutil.rmtree(self.temp_dir)
- self.temp_dir = None
+ logger.debug(f"Cleaning up temporary directory: {self.tempDir}")
+ shutil.rmtree(self.tempDir)
+ self.tempDir = None
except Exception as e:
logger.warning(f"Error cleaning up temp directory: {str(e)}")
- def _clean_code(self, code: str) -> str:
+ def _cleanCode(self, code: str) -> str:
"""Remove any markdown formatting or explanations."""
# Remove code block markers
code = code.replace("```python", "").replace("```", "")
# Remove explanations before or after code
lines = code.strip().split('\n')
- start_index = 0
- end_index = len(lines)
+ startIndex = 0
+ endIndex = len(lines)
# Find start of actual code
for i, line in enumerate(lines):
- if line.strip().startswith("input_files =") or line.strip().startswith("# REQUIREMENTS:"):
- start_index = i
+ if line.strip().startswith("inputFiles =") or line.strip().startswith("# REQUIREMENTS:"):
+ startIndex = i
break
# Clean code
- cleaned_code = '\n'.join(lines[start_index:end_index])
- return cleaned_code.strip()
+ cleanedCode = '\n'.join(lines[startIndex:endIndex])
+ return cleanedCode.strip()
# Factory function for the Coder agent
-def get_coder_agent():
+def getAgentCoder():
"""Returns an instance of the Coder agent."""
return AgentCoder()
\ No newline at end of file
diff --git a/modules/chat_agent_documentation.py b/modules/agentDocumentation.py
similarity index 53%
rename from modules/chat_agent_documentation.py
rename to modules/agentDocumentation.py
index 3ae98b58..831b8ddd 100644
--- a/modules/chat_agent_documentation.py
+++ b/modules/agentDocumentation.py
@@ -7,7 +7,7 @@ import logging
import json
from typing import Dict, Any, List
-from modules.chat_registry import AgentBase
+from modules.workflowAgentsRegistry import AgentBase
logger = logging.getLogger(__name__)
@@ -27,16 +27,16 @@ class AgentDocumentation(AgentBase):
"knowledge_organization"
]
- def set_dependencies(self, mydom=None):
+ def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
- async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
+ async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to generate them.
Args:
- task: Task dictionary with prompt, input_documents, output_specifications
+ task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
@@ -44,8 +44,8 @@ class AgentDocumentation(AgentBase):
try:
# Extract task information
prompt = task.get("prompt", "")
- input_documents = task.get("input_documents", [])
- output_specs = task.get("output_specifications", [])
+ inputDocuments = task.get("inputDocuments", [])
+ outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.mydom:
@@ -54,43 +54,43 @@ class AgentDocumentation(AgentBase):
"documents": []
}
- # Extract context from input documents - focusing only on data_extracted
- document_context = self._extract_document_context(input_documents)
+ # Extract context from input documents - focusing only on dataExtracted
+ documentContext = self._extractDocumentContext(inputDocuments)
# Create task analysis to understand the requirements
- documentation_plan = await self._analyze_task(prompt, document_context, output_specs)
+ documentationPlan = await self._analyzeTask(prompt, documentContext, outputSpecs)
# Generate all required output documents
documents = []
# If no output specs provided, create default document
- if not output_specs:
- default_format = documentation_plan.get("recommended_format", "markdown")
- default_title = documentation_plan.get("title", "Documentation")
- safe_title = self._sanitize_filename(default_title)
+ if not outputSpecs:
+ defaultFormat = documentationPlan.get("recommendedFormat", "markdown")
+ defaultTitle = documentationPlan.get("title", "Documentation")
+ safeTitle = self._sanitizeFilename(defaultTitle)
- output_specs = [
- {"label": f"{safe_title}.{default_format}", "description": "Comprehensive documentation"}
+ outputSpecs = [
+ {"label": f"{safeTitle}.{defaultFormat}", "description": "Comprehensive documentation"}
]
# Process each output specification
- for spec in output_specs:
- output_label = spec.get("label", "")
- output_description = spec.get("description", "")
+ for spec in outputSpecs:
+ outputLabel = spec.get("label", "")
+ outputDescription = spec.get("description", "")
# Generate the document using multi-step approach
- document = await self._create_document_multi_step(
+ document = await self._createDocumentMultiStep(
prompt,
- document_context,
- output_label,
- output_description,
- documentation_plan
+ documentContext,
+ outputLabel,
+ outputDescription,
+ documentationPlan
)
documents.append(document)
# Generate feedback
- feedback = documentation_plan.get("feedback", f"Created {len(documents)} documents based on your requirements.")
+ feedback = documentationPlan.get("feedback", f"Created {len(documents)} documents based on your requirements.")
return {
"feedback": feedback,
@@ -104,9 +104,9 @@ class AgentDocumentation(AgentBase):
"documents": []
}
- def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
+ def _extractDocumentContext(self, documents: List[Dict[str, Any]]) -> str:
"""
- Extract context from input documents, focusing on data_extracted.
+ Extract context from input documents, focusing on dataExtracted.
Args:
documents: List of document objects
@@ -114,23 +114,23 @@ class AgentDocumentation(AgentBase):
Returns:
Extracted context as text
"""
- context_parts = []
+ contextParts = []
for doc in documents:
- doc_name = doc.get("name", "unnamed")
+ docName = doc.get("name", "unnamed")
if doc.get("ext"):
- doc_name = f"{doc_name}.{doc.get('ext')}"
+ docName = f"{docName}.{doc.get('ext')}"
- context_parts.append(f"\n\n--- {doc_name} ---\n")
+ contextParts.append(f"\n\n--- {docName} ---\n")
- # Process contents for data_extracted
+ # Process contents for dataExtracted
for content in doc.get("contents", []):
- if content.get("data_extracted"):
- context_parts.append(content.get("data_extracted", ""))
+ if content.get("dataExtracted"):
+ contextParts.append(content.get("dataExtracted", ""))
- return "\n".join(context_parts)
+ return "\n".join(contextParts)
- def _sanitize_filename(self, filename: str) -> str:
+ def _sanitizeFilename(self, filename: str) -> str:
"""
Sanitize a filename by removing invalid characters.
@@ -141,8 +141,8 @@ class AgentDocumentation(AgentBase):
Sanitized filename
"""
# Replace invalid characters with underscores
- invalid_chars = r'<>:"/\|?*'
- for char in invalid_chars:
+ invalidChars = r'<>:"/\|?*'
+ for char in invalidChars:
filename = filename.replace(char, '_')
# Trim filename if too long
@@ -151,19 +151,19 @@ class AgentDocumentation(AgentBase):
return filename
- async def _analyze_task(self, prompt: str, context: str, output_specs: List) -> Dict:
+ async def _analyzeTask(self, prompt: str, context: str, outputSpecs: List) -> Dict:
"""
Use AI to analyze the task and create a documentation plan.
Args:
prompt: The task prompt
context: Document context
- output_specs: Output specifications
+ outputSpecs: Output specifications
Returns:
Documentation plan dictionary
"""
- analysis_prompt = f"""
+ analysisPrompt = f"""
Analyze this documentation task and create a detailed plan.
TASK: {prompt}
@@ -172,28 +172,28 @@ class AgentDocumentation(AgentBase):
{context[:1000]}... (truncated)
OUTPUT REQUIREMENTS:
- {json.dumps(output_specs, indent=2)}
+ {json.dumps(outputSpecs, indent=2)}
Create a detailed documentation plan in JSON format with the following structure:
{{
"title": "Document Title",
- "document_type": "report|manual|guide|whitepaper|etc",
+ "documentType": "report|manual|guide|whitepaper|etc",
"audience": "technical|general|executive|etc",
- "detailed_structure": [
+ "detailedStructure": [
{{
"title": "Chapter/Section Title",
- "key_points": ["point1", "point2", ...],
+ "keyPoints": ["point1", "point2", ...],
"subsections": ["subsection1", "subsection2", ...],
"importance": "high|medium|low",
- "estimated_length": "short|medium|long"
+ "estimatedLength": "short|medium|long"
}},
... more sections ...
],
- "key_topics": ["topic1", "topic2", ...],
+ "keyTopics": ["topic1", "topic2", ...],
"tone": "formal|conversational|instructional|etc",
- "recommended_format": "markdown|html|text|etc",
- "formatting_requirements": ["requirement1", "requirement2", ...],
- "executive_summary": "Brief description of what the document will cover",
+ "recommendedFormat": "markdown|html|text|etc",
+ "formattingRequirements": ["requirement1", "requirement2", ...],
+ "executiveSummary": "Brief description of what the document will cover",
"feedback": "Brief message explaining the documentation approach"
}}
@@ -201,52 +201,52 @@ class AgentDocumentation(AgentBase):
"""
try:
- response = await self.mydom.call_ai([
+ response = await self.mydom.callAi([
{"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."},
- {"role": "user", "content": analysis_prompt}
+ {"role": "user", "content": analysisPrompt}
])
# Extract JSON from response
- json_start = response.find('{')
- json_end = response.rfind('}') + 1
+ jsonStart = response.find('{')
+ jsonEnd = response.rfind('}') + 1
- if json_start >= 0 and json_end > json_start:
- plan = json.loads(response[json_start:json_end])
+ if jsonStart >= 0 and jsonEnd > jsonStart:
+ plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback if JSON not found
return {
"title": "Documentation",
- "document_type": "report",
+ "documentType": "report",
"audience": "general",
- "detailed_structure": [
+ "detailedStructure": [
{
"title": "Introduction",
- "key_points": ["Purpose", "Scope"],
+ "keyPoints": ["Purpose", "Scope"],
"subsections": [],
"importance": "high",
- "estimated_length": "short"
+ "estimatedLength": "short"
},
{
"title": "Main Content",
- "key_points": ["Core Information"],
+ "keyPoints": ["Core Information"],
"subsections": ["Key Findings", "Analysis"],
"importance": "high",
- "estimated_length": "long"
+ "estimatedLength": "long"
},
{
"title": "Conclusion",
- "key_points": ["Summary", "Next Steps"],
+ "keyPoints": ["Summary", "Next Steps"],
"subsections": [],
"importance": "medium",
- "estimated_length": "short"
+ "estimatedLength": "short"
}
],
- "key_topics": ["General Information"],
+ "keyTopics": ["General Information"],
"tone": "formal",
- "recommended_format": "markdown",
- "formatting_requirements": ["Clear headings", "Professional formatting"],
- "executive_summary": "A comprehensive documentation covering the requested topics.",
+ "recommendedFormat": "markdown",
+ "formattingRequirements": ["Clear headings", "Professional formatting"],
+ "executiveSummary": "A comprehensive documentation covering the requested topics.",
"feedback": "Created documentation based on your requirements."
}
@@ -254,59 +254,59 @@ class AgentDocumentation(AgentBase):
logger.warning(f"Error creating documentation plan: {str(e)}")
return {
"title": "Documentation",
- "document_type": "report",
+ "documentType": "report",
"audience": "general",
- "detailed_structure": [
+ "detailedStructure": [
{
"title": "Introduction",
- "key_points": ["Purpose", "Scope"],
+ "keyPoints": ["Purpose", "Scope"],
"subsections": [],
"importance": "high",
- "estimated_length": "short"
+ "estimatedLength": "short"
},
{
"title": "Main Content",
- "key_points": ["Core Information"],
+ "keyPoints": ["Core Information"],
"subsections": ["Key Findings", "Analysis"],
"importance": "high",
- "estimated_length": "long"
+ "estimatedLength": "long"
},
{
"title": "Conclusion",
- "key_points": ["Summary", "Next Steps"],
+ "keyPoints": ["Summary", "Next Steps"],
"subsections": [],
"importance": "medium",
- "estimated_length": "short"
+ "estimatedLength": "short"
}
],
- "key_topics": ["General Information"],
+ "keyTopics": ["General Information"],
"tone": "formal",
- "recommended_format": "markdown",
- "formatting_requirements": ["Clear headings", "Professional formatting"],
- "executive_summary": "A comprehensive documentation covering the requested topics.",
+ "recommendedFormat": "markdown",
+ "formattingRequirements": ["Clear headings", "Professional formatting"],
+ "executiveSummary": "A comprehensive documentation covering the requested topics.",
"feedback": "Created documentation based on your requirements."
}
- async def _create_document_multi_step(self, prompt: str, context: str, output_label: str,
- output_description: str, documentation_plan: Dict) -> Dict:
+ async def _createDocumentMultiStep(self, prompt: str, context: str, outputLabel: str,
+ outputDescription: str, documentationPlan: Dict) -> Dict:
"""
Create a document using a multi-step approach with separate AI calls for each section.
Args:
prompt: Original task prompt
context: Document context
- output_label: Output filename
- output_description: Description of desired output
- documentation_plan: Documentation plan from AI
+ outputLabel: Output filename
+ outputDescription: Description of desired output
+ documentationPlan: Documentation plan from AI
Returns:
Document object
"""
# Determine format from filename
- format_type = output_label.split('.')[-1].lower() if '.' in output_label else "md"
+ formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "md"
- # Map format to content_type
- content_type_map = {
+ # Map format to contentType
+ contentTypeMap = {
"md": "text/markdown",
"markdown": "text/markdown",
"html": "text/html",
@@ -316,49 +316,49 @@ class AgentDocumentation(AgentBase):
"csv": "text/csv"
}
- content_type = content_type_map.get(format_type, "text/plain")
+ contentType = contentTypeMap.get(formatType, "text/plain")
# Get document information
- title = documentation_plan.get("title", "Documentation")
- document_type = documentation_plan.get("document_type", "document")
- audience = documentation_plan.get("audience", "general")
- tone = documentation_plan.get("tone", "formal")
- key_topics = documentation_plan.get("key_topics", [])
- formatting_requirements = documentation_plan.get("formatting_requirements", [])
+ title = documentationPlan.get("title", "Documentation")
+ documentType = documentationPlan.get("documentType", "document")
+ audience = documentationPlan.get("audience", "general")
+ tone = documentationPlan.get("tone", "formal")
+ keyTopics = documentationPlan.get("keyTopics", [])
+ formattingRequirements = documentationPlan.get("formattingRequirements", [])
# Get the detailed structure
- detailed_structure = documentation_plan.get("detailed_structure", [])
- if not detailed_structure:
+ detailedStructure = documentationPlan.get("detailedStructure", [])
+ if not detailedStructure:
# Fallback structure if none provided
- detailed_structure = [
+ detailedStructure = [
{
"title": "Introduction",
- "key_points": ["Purpose", "Scope"],
+ "keyPoints": ["Purpose", "Scope"],
"importance": "high"
},
{
"title": "Main Content",
- "key_points": ["Core Information"],
+ "keyPoints": ["Core Information"],
"importance": "high"
},
{
"title": "Conclusion",
- "key_points": ["Summary", "Next Steps"],
+ "keyPoints": ["Summary", "Next Steps"],
"importance": "medium"
}
]
try:
# Step 1: Generate document introduction
- intro_prompt = f"""
- Create the introduction for a {document_type} titled "{title}".
+ introPrompt = f"""
+ Create the introduction for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- - Type: {document_type}
+ - Type: {documentType}
- Audience: {audience}
- Tone: {tone}
- - Key Topics: {', '.join(key_topics)}
- - Format: {format_type}
+ - Key Topics: {', '.join(keyTopics)}
+ - Format: {formatType}
TASK CONTEXT: {prompt}
@@ -368,23 +368,23 @@ class AgentDocumentation(AgentBase):
3. Outline what the reader will find in the document
4. Set the appropriate tone for the {audience} audience
- The introduction should be professional and engaging, formatted according to {format_type} standards.
+ The introduction should be professional and engaging, formatted according to {formatType} standards.
"""
- introduction = await self.mydom.call_ai([
- {"role": "system", "content": f"You are a documentation expert creating an introduction in {format_type} format."},
- {"role": "user", "content": intro_prompt}
- ], produce_user_answer = True)
+ introduction = await self.mydom.callAi([
+ {"role": "system", "content": f"You are a documentation expert creating an introduction in {formatType} format."},
+ {"role": "user", "content": introPrompt}
+ ], produceUserAnswer = True)
# Step 2: Generate executive summary (if applicable)
- if document_type in ["report", "whitepaper", "case study"]:
- summary_prompt = f"""
- Create an executive summary for a {document_type} titled "{title}".
+ if documentType in ["report", "whitepaper", "case study"]:
+ summaryPrompt = f"""
+ Create an executive summary for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- - Type: {document_type}
+ - Type: {documentType}
- Audience: {audience}
- - Key Topics: {', '.join(key_topics)}
+ - Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
@@ -392,44 +392,44 @@ class AgentDocumentation(AgentBase):
1. Provide a concise overview of the entire document
2. Highlight key findings, recommendations, or conclusions
3. Be suitable for executives or busy readers who may only read this section
- 4. Be professionally formatted according to {format_type} standards
+ 4. Be professionally formatted according to {formatType} standards
Keep the summary focused and impactful, approximately 200-300 words.
"""
- executive_summary = await self.mydom.call_ai([
- {"role": "system", "content": f"You are a documentation expert creating an executive summary in {format_type} format."},
- {"role": "user", "content": summary_prompt}
- ], produce_user_answer = True)
+ executiveSummary = await self.mydom.callAi([
+ {"role": "system", "content": f"You are a documentation expert creating an executive summary in {formatType} format."},
+ {"role": "user", "content": summaryPrompt}
+ ], produceUserAnswer = True)
else:
- executive_summary = ""
+ executiveSummary = ""
# Step 3: Generate each section
sections = []
- for section in detailed_structure:
- section_title = section.get("title", "Section")
- key_points = section.get("key_points", [])
+ for section in detailedStructure:
+ sectionTitle = section.get("title", "Section")
+ keyPoints = section.get("keyPoints", [])
subsections = section.get("subsections", [])
importance = section.get("importance", "medium")
# Adjust depth based on importance
- detail_level = "high" if importance == "high" else "medium"
+ detailLevel = "high" if importance == "high" else "medium"
- section_prompt = f"""
- Create the "{section_title}" section for a {document_type} titled "{title}".
+ sectionPrompt = f"""
+ Create the "{sectionTitle}" section for a {documentType} titled "{title}".
SECTION DETAILS:
- - Title: {section_title}
- - Key Points to Cover: {', '.join(key_points)}
+ - Title: {sectionTitle}
+ - Key Points to Cover: {', '.join(keyPoints)}
- Subsections: {', '.join(subsections)}
- - Detail Level: {detail_level}
+ - Detail Level: {detailLevel}
DOCUMENT CONTEXT:
- - Type: {document_type}
+ - Type: {documentType}
- Audience: {audience}
- Tone: {tone}
- - Format: {format_type}
+ - Format: {formatType}
TASK CONTEXT: {prompt}
@@ -441,27 +441,27 @@ class AgentDocumentation(AgentBase):
2. Cover all the key points listed
3. Include the specified subsections with appropriate headings
4. Maintain a {tone} tone suitable for the {audience} audience
- 5. Be properly formatted according to {format_type} standards
+ 5. Be properly formatted according to {formatType} standards
6. Include specific examples, data, or evidence where appropriate
Be thorough in your coverage of this section, providing substantive content.
"""
- section_content = await self.mydom.call_ai([
- {"role": "system", "content": f"You are a documentation expert creating detailed content for the {section_title} section."},
- {"role": "user", "content": section_prompt}
- ], produce_user_answer = True)
+ sectionContent = await self.mydom.callAi([
+ {"role": "system", "content": f"You are a documentation expert creating detailed content for the {sectionTitle} section."},
+ {"role": "user", "content": sectionPrompt}
+ ], produceUserAnswer = True)
- sections.append(section_content)
+ sections.append(sectionContent)
# Step 4: Generate conclusion
- conclusion_prompt = f"""
- Create the conclusion for a {document_type} titled "{title}".
+ conclusionPrompt = f"""
+ Create the conclusion for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- - Type: {document_type}
+ - Type: {documentType}
- Audience: {audience}
- - Key Topics: {', '.join(key_topics)}
+ - Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
@@ -471,71 +471,71 @@ class AgentDocumentation(AgentBase):
3. Include any relevant recommendations or next steps
4. Leave the reader with a clear understanding of the document's significance
- The conclusion should be professional and impactful, formatted according to {format_type} standards.
+ The conclusion should be professional and impactful, formatted according to {formatType} standards.
"""
- conclusion = await self.mydom.call_ai([
- {"role": "system", "content": f"You are a documentation expert creating a conclusion in {format_type} format."},
- {"role": "user", "content": conclusion_prompt}
- ], produce_user_answer = True)
+ conclusion = await self.mydom.callAi([
+ {"role": "system", "content": f"You are a documentation expert creating a conclusion in {formatType} format."},
+ {"role": "user", "content": conclusionPrompt}
+ ], produceUserAnswer = True)
# Step 5: Assemble the complete document
- if format_type in ["md", "markdown"]:
+ if formatType in ["md", "markdown"]:
# Markdown format
- document_content = f"# {title}\n\n"
+ documentContent = f"# {title}\n\n"
- if executive_summary:
- document_content += f"## Executive Summary\n\n{executive_summary}\n\n"
+ if executiveSummary:
+ documentContent += f"## Executive Summary\n\n{executiveSummary}\n\n"
- document_content += f"{introduction}\n\n"
+ documentContent += f"{introduction}\n\n"
- for i, section_content in enumerate(sections):
+ for i, sectionContent in enumerate(sections):
# Ensure section starts with heading if not already
- section_title = detailed_structure[i].get("title", f"Section {i+1}")
- if not section_content.strip().startswith("#"):
- document_content += f"## {section_title}\n\n"
- document_content += f"{section_content}\n\n"
+ sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
+ if not sectionContent.strip().startswith("#"):
+ documentContent += f"## {sectionTitle}\n\n"
+ documentContent += f"{sectionContent}\n\n"
- document_content += f"## Conclusion\n\n{conclusion}\n"
+ documentContent += f"## Conclusion\n\n{conclusion}\n"
- elif format_type == "html":
+ elif formatType == "html":
# HTML format
- document_content = f"\n\n{title}\n\n\n"
- document_content += f"
\n"
+ documentContent += "\n"
else:
# Plain text format
- document_content = f"{title}\n{'=' * len(title)}\n\n"
+ documentContent = f"{title}\n{'=' * len(title)}\n\n"
- if executive_summary:
- document_content += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executive_summary}\n\n"
+ if executiveSummary:
+ documentContent += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executiveSummary}\n\n"
- document_content += f"{introduction}\n\n"
+ documentContent += f"{introduction}\n\n"
- for i, section_content in enumerate(sections):
- section_title = detailed_structure[i].get("title", f"Section {i+1}")
- document_content += f"{section_title}\n{'-' * len(section_title)}\n\n{section_content}\n\n"
+ for i, sectionContent in enumerate(sections):
+ sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
+ documentContent += f"{sectionTitle}\n{'-' * len(sectionTitle)}\n\n{sectionContent}\n\n"
- document_content += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n"
+ documentContent += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n"
# Create document object
return {
- "label": output_label,
- "content": document_content,
+ "label": outputLabel,
+ "content": documentContent,
"metadata": {
- "content_type": content_type
+ "contentType": contentType
}
}
@@ -543,23 +543,23 @@ class AgentDocumentation(AgentBase):
logger.error(f"Error creating document: {str(e)}", exc_info=True)
# Create a simple error document
- if format_type in ["md", "markdown"]:
+ if formatType in ["md", "markdown"]:
content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
- elif format_type == "html":
+ elif formatType == "html":
content = f"
Error in Documentation
There was an error generating the documentation: {str(e)}
"
else:
content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
return {
- "label": output_label,
+ "label": outputLabel,
"content": content,
"metadata": {
- "content_type": content_type
+ "contentType": contentType
}
}
# Factory function for the Documentation agent
-def get_documentation_agent():
+def getAgentDocumentation():
"""Returns an instance of the Documentation agent."""
return AgentDocumentation()
\ No newline at end of file
diff --git a/modules/chat_agent_webcrawler.py b/modules/agentWebcrawler.py
similarity index 56%
rename from modules/chat_agent_webcrawler.py
rename to modules/agentWebcrawler.py
index f78857cc..05cbe0b5 100644
--- a/modules/chat_agent_webcrawler.py
+++ b/modules/agentWebcrawler.py
@@ -14,7 +14,7 @@ from bs4 import BeautifulSoup
import requests
import markdown
-from modules.chat_registry import AgentBase
+from modules.workflowAgentsRegistry import AgentBase
from modules.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
@@ -28,31 +28,31 @@ class AgentWebcrawler(AgentBase):
self.name = "webcrawler"
self.description = "Conducts web research and collects information from online sources"
self.capabilities = [
- "web_search",
- "information_retrieval",
- "data_collection",
- "search_results_analysis",
- "webpage_content_extraction"
+ "webSearch",
+ "informationRetrieval",
+ "dataCollection",
+ "searchResultsAnalysis",
+ "webpageContentExtraction"
]
# Web crawling configuration
- self.max_url = int(APP_CONFIG.get("Agent_Webcrawler_MAX_URLS", "5"))
- self.max_search_terms = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_KEYWORDS", "3"))
- self.max_results = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_RESULTS", "5"))
+ self.maxUrl = int(APP_CONFIG.get("Agent_Webcrawler_MAX_URLS", "5"))
+ self.maxSearchTerms = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_KEYWORDS", "3"))
+ self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_RESULTS", "5"))
self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_TIMEOUT", "30"))
- self.search_engine = APP_CONFIG.get("Agent_Webcrawler_SEARCH_ENGINE", "https://html.duckduckgo.com/html/?q=")
- self.user_agent = APP_CONFIG.get("Agent_Webcrawler_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+ self.searchEngine = APP_CONFIG.get("Agent_Webcrawler_SEARCH_ENGINE", "https://html.duckduckgo.com/html/?q=")
+ self.userAgent = APP_CONFIG.get("Agent_Webcrawler_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
- def set_dependencies(self, mydom=None):
+ def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
- async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
+ async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to guide the research process.
Args:
- task: Task dictionary with prompt, input_documents, output_specifications
+ task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
@@ -60,7 +60,7 @@ class AgentWebcrawler(AgentBase):
try:
# Extract task information
prompt = task.get("prompt", "")
- output_specs = task.get("output_specifications", [])
+ outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.mydom:
@@ -70,28 +70,28 @@ class AgentWebcrawler(AgentBase):
}
# Create research plan
- research_plan = await self._create_research_plan(prompt)
+ researchPlan = await self._createResearchPlan(prompt)
# Check if this is truly a web research task
- if not research_plan.get("requires_web_research", True):
+ if not researchPlan.get("requiresWebResearch", True):
return {
"feedback": "This task doesn't appear to require web research. Please try a different agent.",
"documents": []
}
# Gather raw material through web research
- raw_results = await self._gather_research_material(research_plan)
+ rawResults = await self._gatherResearchMaterial(researchPlan)
# Format results into requested output documents
- documents = await self._create_output_documents(
+ documents = await self._createOutputDocuments(
prompt,
- raw_results,
- output_specs,
- research_plan
+ rawResults,
+ outputSpecs,
+ researchPlan
)
# Generate feedback
- feedback = research_plan.get("feedback", f"I conducted web research on '{prompt[:50]}...' and gathered information from {len(raw_results)} relevant sources.")
+ feedback = researchPlan.get("feedback", f"I conducted web research on '{prompt[:50]}...' and gathered information from {len(rawResults)} relevant sources.")
return {
"feedback": feedback,
@@ -105,7 +105,7 @@ class AgentWebcrawler(AgentBase):
"documents": []
}
- async def _create_research_plan(self, prompt: str) -> Dict[str, Any]:
+ async def _createResearchPlan(self, prompt: str) -> Dict[str, Any]:
"""
Use AI to create a detailed research plan.
@@ -115,17 +115,17 @@ class AgentWebcrawler(AgentBase):
Returns:
Research plan dictionary
"""
- research_prompt = f"""
+ researchPrompt = f"""
Create a detailed web research plan for this task: "{prompt}"
Analyze the request carefully and create a structured plan in JSON format with the following elements:
{{
- "requires_web_research": true/false, # Whether this genuinely requires web research
- "research_questions": ["question1", "question2", ...], # 2-4 specific questions to answer
- "search_terms": ["term1", "term2", ...], # Up to {self.max_search_terms} effective search terms
- "direct_urls": ["url1", "url2", ...], # Any URLs directly mentioned in the request (up to {self.max_url})
- "expected_sources": ["type1", "type2", ...], # Types of sources that would be most valuable
- "content_focus": "what specific content to extract or focus on",
+ "requiresWebResearch": true/false, # Whether this genuinely requires web research
+ "researchQuestions": ["question1", "question2", ...], # 2-4 specific questions to answer
+ "searchTerms": ["term1", "term2", ...], # Up to {self.maxSearchTerms} effective search terms
+ "directUrls": ["url1", "url2", ...], # Any URLs directly mentioned in the request (up to {self.maxUrl})
+ "expectedSources": ["type1", "type2", ...], # Types of sources that would be most valuable
+ "contentFocus": "what specific content to extract or focus on",
"feedback": "explanation of how the research will be conducted"
}}
@@ -134,37 +134,37 @@ class AgentWebcrawler(AgentBase):
try:
# Get research plan from AI
- response = await self.mydom.call_ai([
+ response = await self.mydom.callAi([
{"role": "system", "content": "You are a web research planning expert. Create precise research plans in JSON format only."},
- {"role": "user", "content": research_prompt}
+ {"role": "user", "content": researchPrompt}
])
# Extract JSON
- json_start = response.find('{')
- json_end = response.rfind('}') + 1
+ jsonStart = response.find('{')
+ jsonEnd = response.rfind('}') + 1
- if json_start >= 0 and json_end > json_start:
- plan = json.loads(response[json_start:json_end])
+ if jsonStart >= 0 and jsonEnd > jsonStart:
+ plan = json.loads(response[jsonStart:jsonEnd])
# Ensure we have the expected fields with defaults if missing
- if "search_terms" not in plan:
- plan["search_terms"] = [prompt]
- if "direct_urls" not in plan:
- plan["direct_urls"] = []
- if "research_questions" not in plan:
- plan["research_questions"] = ["What information can be found about this topic?"]
+ if "searchTerms" not in plan:
+ plan["searchTerms"] = [prompt]
+ if "directUrls" not in plan:
+ plan["directUrls"] = []
+ if "researchQuestions" not in plan:
+ plan["researchQuestions"] = ["What information can be found about this topic?"]
return plan
else:
# Fallback plan
logger.warning(f"Not able creating research plan, generating fallback plan")
return {
- "requires_web_research": True,
- "research_questions": ["What information can be found about this topic?"],
- "search_terms": [prompt],
- "direct_urls": [],
- "expected_sources": ["Web pages", "Articles"],
- "content_focus": "Relevant information about the topic",
+ "requiresWebResearch": True,
+ "researchQuestions": ["What information can be found about this topic?"],
+ "searchTerms": [prompt],
+ "directUrls": [],
+ "expectedSources": ["Web pages", "Articles"],
+ "contentFocus": "Relevant information about the topic",
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
}
@@ -172,45 +172,45 @@ class AgentWebcrawler(AgentBase):
logger.warning(f"Error creating research plan: {str(e)}")
# Simple fallback plan
return {
- "requires_web_research": True,
- "research_questions": ["What information can be found about this topic?"],
- "search_terms": [prompt],
- "direct_urls": [],
- "expected_sources": ["Web pages", "Articles"],
- "content_focus": "Relevant information about the topic",
+ "requiresWebResearch": True,
+ "researchQuestions": ["What information can be found about this topic?"],
+ "searchTerms": [prompt],
+ "directUrls": [],
+ "expectedSources": ["Web pages", "Articles"],
+ "contentFocus": "Relevant information about the topic",
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
}
- async def _gather_research_material(self, research_plan: Dict[str, Any]) -> List[Dict[str, Any]]:
+ async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Gather research material based on the research plan.
Args:
- research_plan: Research plan dictionary
+ researchPlan: Research plan dictionary
Returns:
List of research results
"""
- all_results = []
+ allResults = []
# Process direct URLs
- direct_urls = research_plan.get("direct_urls", [])[:self.max_url]
- for url in direct_urls:
+ directUrls = researchPlan.get("directUrls", [])[:self.maxUrl]
+ for url in directUrls:
logger.info(f"Processing direct URL: {url}")
try:
# Fetch and extract content
- soup = self._read_url(url)
+ soup = self._readUrl(url)
if soup:
# Extract title and content
- title = self._extract_title(soup, url)
- content = self._extract_main_content(soup)
+ title = self._extractTitle(soup, url)
+ content = self._extractMainContent(soup)
# Add to results
- all_results.append({
+ allResults.append({
"title": title,
"url": url,
- "source_type": "direct_url",
+ "sourceType": "directUrl",
"content": content,
"summary": "" # Will be filled later
})
@@ -218,48 +218,48 @@ class AgentWebcrawler(AgentBase):
logger.warning(f"Error processing URL {url}: {str(e)}")
# Process search terms
- search_terms = research_plan.get("search_terms", [])[:self.max_search_terms]
- for term in search_terms:
+ searchTerms = researchPlan.get("searchTerms", [])[:self.maxSearchTerms]
+ for term in searchTerms:
logger.info(f"Searching for: {term}")
try:
# Perform search
- search_results = self._search_web(term)
+ searchResults = self._searchWeb(term)
# Process each search result
- for result in search_results:
+ for result in searchResults:
# Check if URL is already in results
- if not any(r["url"] == result["url"] for r in all_results):
- all_results.append({
+ if not any(r["url"] == result["url"] for r in allResults):
+ allResults.append({
"title": result["title"],
"url": result["url"],
- "source_type": "search_result",
+ "sourceType": "searchResult",
"content": result["data"],
"snippet": result["snippet"],
"summary": "" # Will be filled later
})
# Stop if we've reached the maximum results
- if len(all_results) >= self.max_results:
+ if len(allResults) >= self.maxResults:
break
except Exception as e:
logger.warning(f"Error searching for {term}: {str(e)}")
# Stop if we've reached the maximum results
- if len(all_results) >= self.max_results:
+ if len(allResults) >= self.maxResults:
break
# Create summaries in parallel for all results
- all_results = await self._summarize_all_results(all_results, research_plan)
+ allResults = await self._summarizeAllResults(allResults, researchPlan)
- return all_results
+ return allResults
- async def _summarize_all_results(self, results: List[Dict[str, Any]], research_plan: Dict[str, Any]) -> List[Dict[str, Any]]:
+ async def _summarizeAllResults(self, results: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create summaries for all research results.
Args:
results: List of research results
- research_plan: Research plan with questions and focus
+ researchPlan: Research plan with questions and focus
Returns:
Results with added summaries
@@ -269,16 +269,16 @@ class AgentWebcrawler(AgentBase):
try:
# Limit content length to avoid token issues
- content = self._limit_text(result.get("content", ""), max_chars=8000)
- research_questions = research_plan.get("research_questions", ["What relevant information does this page contain?"])
- content_focus = research_plan.get("content_focus", "Relevant information")
+ content = self._limitText(result.get("content", ""), maxChars=8000)
+ researchQuestions = researchPlan.get("researchQuestions", ["What relevant information does this page contain?"])
+ contentFocus = researchPlan.get("contentFocus", "Relevant information")
# Create summary using AI
- summary_prompt = f"""
+ summaryPrompt = f"""
Summarize this web page content based on these research questions:
- {', '.join(research_questions)}
+ {', '.join(researchQuestions)}
- Focus on: {content_focus}
+ Focus on: {contentFocus}
Web page: {result['url']}
Title: {result['title']}
@@ -296,9 +296,9 @@ class AgentWebcrawler(AgentBase):
"""
if self.mydom:
- summary = await self.mydom.call_ai([
+ summary = await self.mydom.callAi([
{"role": "system", "content": "You summarize web content accurately and concisely, focusing only on what is actually in the content."},
- {"role": "user", "content": summary_prompt}
+ {"role": "user", "content": summaryPrompt}
])
# Store the summary
@@ -314,24 +314,24 @@ class AgentWebcrawler(AgentBase):
return results
- async def _create_output_documents(self, prompt: str, results: List[Dict[str, Any]],
- output_specs: List[Dict[str, Any]], research_plan: Dict[str, Any]) -> List[Dict[str, Any]]:
+ async def _createOutputDocuments(self, prompt: str, results: List[Dict[str, Any]],
+ outputSpecs: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create output documents based on research results and specifications.
Args:
prompt: Original research prompt
results: List of research results
- output_specs: Output specifications
- research_plan: Research plan
+ outputSpecs: Output specifications
+ researchPlan: Research plan
Returns:
List of output documents
"""
# If no output specs provided, create default output
- if not output_specs:
- output_specs = [{
- "label": "web_research_results.md",
+ if not outputSpecs:
+ outputSpecs = [{
+ "label": "webResearchResults.md",
"description": "Comprehensive web research results"
}]
@@ -339,66 +339,66 @@ class AgentWebcrawler(AgentBase):
documents = []
# Process each output specification
- for spec in output_specs:
- output_label = spec.get("label", "")
- output_description = spec.get("description", "")
+ for spec in outputSpecs:
+ outputLabel = spec.get("label", "")
+ outputDescription = spec.get("description", "")
# Determine format based on file extension
- format_type = self._determine_format_type(output_label)
+ formatType = self._determineFormatType(outputLabel)
# Create appropriate document based on format
- if format_type == "json":
+ if formatType == "json":
# JSON output - structured data
- document = await self._create_json_document(prompt, results, research_plan, output_label)
- elif format_type == "csv":
+ document = await self._createJsonDocument(prompt, results, researchPlan, outputLabel)
+ elif formatType == "csv":
# CSV output - tabular data
- document = await self._create_csv_document(results, output_label)
+ document = await self._createCsvDocument(results, outputLabel)
else:
# Text-based output (markdown, html, text) - narrative report
- document = await self._create_narrative_document(
- prompt, results, research_plan, format_type, output_label, output_description
+ document = await self._createNarrativeDocument(
+ prompt, results, researchPlan, formatType, outputLabel, outputDescription
)
documents.append(document)
return documents
- async def _create_narrative_document(self, prompt: str, results: List[Dict[str, Any]],
- research_plan: Dict[str, Any], format_type: str,
- output_label: str, output_description: str) -> Dict[str, Any]:
+ async def _createNarrativeDocument(self, prompt: str, results: List[Dict[str, Any]],
+ researchPlan: Dict[str, Any], formatType: str,
+ outputLabel: str, outputDescription: str) -> Dict[str, Any]:
"""
Create a narrative document (markdown, html, text) from research results.
Args:
prompt: Original research prompt
results: Research results
- research_plan: Research plan
- format_type: Output format (markdown, html, text)
- output_label: Output filename
- output_description: Output description
+ researchPlan: Research plan
+ formatType: Output format (markdown, html, text)
+ outputLabel: Output filename
+ outputDescription: Output description
Returns:
Document object
"""
# Create content based on format
- if format_type == "markdown":
- content_type = "text/markdown"
- template_format = "markdown"
- elif format_type == "html":
- content_type = "text/html"
- template_format = "html"
+ if formatType == "markdown":
+ contentType = "text/markdown"
+ templateFormat = "markdown"
+ elif formatType == "html":
+ contentType = "text/html"
+ templateFormat = "html"
else:
- content_type = "text/plain"
- template_format = "text"
+ contentType = "text/plain"
+ templateFormat = "text"
# Prepare research context
- research_questions = research_plan.get("research_questions", [])
- search_terms = research_plan.get("search_terms", [])
+ researchQuestions = researchPlan.get("researchQuestions", [])
+ searchTerms = researchPlan.get("searchTerms", [])
# Create document structure based on results
- sources_summary = []
+ sourcesSummary = []
for result in results:
- sources_summary.append({
+ sourcesSummary.append({
"title": result.get("title", "Untitled"),
"url": result.get("url", ""),
"summary": result.get("summary", ""),
@@ -406,35 +406,35 @@ class AgentWebcrawler(AgentBase):
})
# Truncate content for prompt
- sources_json = json.dumps(sources_summary, indent=2)
- if len(sources_json) > 10000:
+ sourcesJson = json.dumps(sourcesSummary, indent=2)
+ if len(sourcesJson) > 10000:
# Logic to truncate each summary while preserving structure
- for i in range(len(sources_summary)):
- if len(sources_json) <= 10000:
+ for i in range(len(sourcesSummary)):
+ if len(sourcesJson) <= 10000:
break
# Gradually truncate summaries
- sources_summary[i]["summary"] = sources_summary[i]["summary"][:500] + "..."
- sources_json = json.dumps(sources_summary, indent=2)
+ sourcesSummary[i]["summary"] = sourcesSummary[i]["summary"][:500] + "..."
+ sourcesJson = json.dumps(sourcesSummary, indent=2)
# Create report prompt
- report_prompt = f"""
- Create a comprehensive {format_type} research report based on the following web research:
+ reportPrompt = f"""
+ Create a comprehensive {formatType} research report based on the following web research:
TASK: {prompt}
RESEARCH QUESTIONS:
- {', '.join(research_questions)}
+ {', '.join(researchQuestions)}
SEARCH TERMS USED:
- {', '.join(search_terms)}
+ {', '.join(searchTerms)}
SOURCES AND FINDINGS:
- {sources_json}
+ {sourcesJson}
REPORT DETAILS:
- - Format: {template_format}
- - Filename: {output_label}
- - Description: {output_description}
+ - Format: {templateFormat}
+ - Filename: {outputLabel}
+ - Description: {outputDescription}
Create a well-structured report that:
1. Includes an executive summary of key findings
@@ -442,188 +442,188 @@ class AgentWebcrawler(AgentBase):
3. Integrates information from all relevant sources
4. Cites sources appropriately for each piece of information
5. Provides a comprehensive synthesis of the research
- 6. Is formatted professionally and appropriately for {template_format}
+ 6. Is formatted professionally and appropriately for {templateFormat}
The report should be scholarly, accurate, and focused on the original research task.
"""
try:
# Generate report with AI
- report_content = await self.mydom.call_ai([
- {"role": "system", "content": f"You create professional research reports in {template_format} format."},
- {"role": "user", "content": report_prompt}
+ reportContent = await self.mydom.callAi([
+ {"role": "system", "content": f"You create professional research reports in {templateFormat} format."},
+ {"role": "user", "content": reportPrompt}
])
# Convert to HTML if needed
- if format_type == "html" and not report_content.lower().startswith("Web Research Results{report_content}"
+ if not reportContent.lower().startswith("Web Research Results{reportContent}"
return {
- "label": output_label,
- "content": report_content,
+ "label": outputLabel,
+ "content": reportContent,
"metadata": {
- "content_type": content_type
+ "contentType": contentType
}
}
except Exception as e:
logger.error(f"Error creating narrative document: {str(e)}")
# Create error document
- if format_type == "markdown":
+ if formatType == "markdown":
content = f"# Web Research Error\n\nAn error occurred: {str(e)}"
- elif format_type == "html":
+ elif formatType == "html":
content = f"
Web Research Error
An error occurred: {str(e)}
"
else:
content = f"WEB RESEARCH ERROR\n\nAn error occurred: {str(e)}"
return {
- "label": output_label,
+ "label": outputLabel,
"content": content,
"metadata": {
- "content_type": content_type
+ "contentType": contentType
}
}
- async def _create_json_document(self, prompt: str, results: List[Dict[str, Any]],
- research_plan: Dict[str, Any], output_label: str) -> Dict[str, Any]:
+ async def _createJsonDocument(self, prompt: str, results: List[Dict[str, Any]],
+ researchPlan: Dict[str, Any], outputLabel: str) -> Dict[str, Any]:
"""
Create a JSON document from research results.
Args:
prompt: Original research prompt
results: Research results
- research_plan: Research plan
- output_label: Output filename
+ researchPlan: Research plan
+ outputLabel: Output filename
Returns:
Document object
"""
try:
# Create structured data
- sources_data = []
+ sourcesData = []
for result in results:
- sources_data.append({
+ sourcesData.append({
"title": result.get("title", "Untitled"),
"url": result.get("url", ""),
"summary": result.get("summary", ""),
"snippet": result.get("snippet", ""),
- "source_type": result.get("source_type", "")
+ "sourceType": result.get("sourceType", "")
})
# Create metadata
metadata = {
"query": prompt,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
- "research_questions": research_plan.get("research_questions", []),
- "search_terms": research_plan.get("search_terms", [])
+ "researchQuestions": researchPlan.get("researchQuestions", []),
+ "searchTerms": researchPlan.get("searchTerms", [])
}
# Compile complete report object
- json_content = {
+ jsonContent = {
"metadata": metadata,
- "summary": research_plan.get("feedback", "Web research results"),
- "sources": sources_data
+ "summary": researchPlan.get("feedback", "Web research results"),
+ "sources": sourcesData
}
# Convert to JSON string
- content = json.dumps(json_content, indent=2)
+ content = json.dumps(jsonContent, indent=2)
return {
- "label": output_label,
+ "label": outputLabel,
"content": content,
"metadata": {
- "content_type": "application/json"
+ "contentType": "application/json"
}
}
except Exception as e:
logger.error(f"Error creating JSON document: {str(e)}")
return {
- "label": output_label,
+ "label": outputLabel,
"content": json.dumps({"error": str(e)}),
"metadata": {
- "content_type": "application/json"
+ "contentType": "application/json"
}
}
- async def _create_csv_document(self, results: List[Dict[str, Any]], output_label: str) -> Dict[str, Any]:
+ async def _createCsvDocument(self, results: List[Dict[str, Any]], outputLabel: str) -> Dict[str, Any]:
"""
Create a CSV document from research results.
Args:
results: Research results
- output_label: Output filename
+ outputLabel: Output filename
Returns:
Document object
"""
try:
# Create CSV header
- csv_lines = ["Title,URL,Source Type,Snippet"]
+ csvLines = ["Title,URL,Source Type,Snippet"]
# Add results
for result in results:
# Escape CSV fields
title = result.get("title", "").replace('"', '""')
url = result.get("url", "").replace('"', '""')
- source_type = result.get("source_type", "").replace('"', '""')
+ sourceType = result.get("sourceType", "").replace('"', '""')
snippet = result.get("snippet", "").replace('"', '""')
- csv_lines.append(f'"{title}","{url}","{source_type}","{snippet}"')
+ csvLines.append(f'"{title}","{url}","{sourceType}","{snippet}"')
# Combine into CSV content
- content = "\n".join(csv_lines)
+ content = "\n".join(csvLines)
return {
- "label": output_label,
+ "label": outputLabel,
"content": content,
"metadata": {
- "content_type": "text/csv"
+ "contentType": "text/csv"
}
}
except Exception as e:
logger.error(f"Error creating CSV document: {str(e)}")
return {
- "label": output_label,
+ "label": outputLabel,
"content": "Error,Error\nFailed to create CSV,{0}".format(str(e)),
"metadata": {
- "content_type": "text/csv"
+ "contentType": "text/csv"
}
}
- def _determine_format_type(self, output_label: str) -> str:
+ def _determineFormatType(self, outputLabel: str) -> str:
"""
Determine the format type based on the filename.
Args:
- output_label: Output filename
+ outputLabel: Output filename
Returns:
Format type (markdown, html, text, json, csv)
"""
- output_label_lower = output_label.lower()
+ outputLabelLower = outputLabel.lower()
- if output_label_lower.endswith(".md"):
+ if outputLabelLower.endswith(".md"):
return "markdown"
- elif output_label_lower.endswith(".html"):
+ elif outputLabelLower.endswith(".html"):
return "html"
- elif output_label_lower.endswith(".txt"):
+ elif outputLabelLower.endswith(".txt"):
return "text"
- elif output_label_lower.endswith(".json"):
+ elif outputLabelLower.endswith(".json"):
return "json"
- elif output_label_lower.endswith(".csv"):
+ elif outputLabelLower.endswith(".csv"):
return "csv"
else:
# Default to markdown
return "markdown"
- def _search_web(self, query: str) -> List[Dict[str, str]]:
+ def _searchWeb(self, query: str) -> List[Dict[str, str]]:
"""
Conduct a web search and return the results.
@@ -633,11 +633,11 @@ class AgentWebcrawler(AgentBase):
Returns:
List of search results
"""
- formatted_query = quote_plus(query)
- url = f"{self.search_engine}{formatted_query}"
+ formattedQuery = quote_plus(query)
+ url = f"{self.searchEngine}{formattedQuery}"
- search_results_soup = self._read_url(url)
- if not search_results_soup or not search_results_soup.select('.result'):
+ searchResultsSoup = self._readUrl(url)
+ if not searchResultsSoup or not searchResultsSoup.select('.result'):
logger.warning(f"No search results found for: {query}")
return []
@@ -645,59 +645,59 @@ class AgentWebcrawler(AgentBase):
results = []
# Find all result containers
- result_elements = search_results_soup.select('.result')
+ resultElements = searchResultsSoup.select('.result')
- for result in result_elements:
+ for result in resultElements:
# Extract title
- title_element = result.select_one('.result__a')
- title = title_element.text.strip() if title_element else 'No title'
+ titleElement = result.select_one('.result__a')
+ title = titleElement.text.strip() if titleElement else 'No title'
# Extract URL (DuckDuckGo uses redirects)
- url_element = title_element.get('href') if title_element else ''
- extracted_url = 'No URL'
+ urlElement = titleElement.get('href') if titleElement else ''
+ extractedUrl = 'No URL'
- if url_element:
+ if urlElement:
# Extract actual URL from DuckDuckGo's redirect
- if url_element.startswith('/d.js?q='):
- start = url_element.find('?q=') + 3
- end = url_element.find('&', start) if '&' in url_element[start:] else None
- extracted_url = unquote(url_element[start:end])
+ if urlElement.startswith('/d.js?q='):
+ start = urlElement.find('?q=') + 3
+ end = urlElement.find('&', start) if '&' in urlElement[start:] else None
+ extractedUrl = unquote(urlElement[start:end])
# Ensure URL has correct protocol prefix
- if not extracted_url.startswith(('http://', 'https://')):
- if not extracted_url.startswith('//'):
- extracted_url = 'https://' + extracted_url
+ if not extractedUrl.startswith(('http://', 'https://')):
+ if not extractedUrl.startswith('//'):
+ extractedUrl = 'https://' + extractedUrl
else:
- extracted_url = 'https:' + extracted_url
+ extractedUrl = 'https:' + extractedUrl
else:
- extracted_url = url_element
+ extractedUrl = urlElement
# Extract snippet directly from search results page
- snippet_element = result.select_one('.result__snippet')
- snippet = snippet_element.text.strip() if snippet_element else 'No description'
+ snippetElement = result.select_one('.result__snippet')
+ snippet = snippetElement.text.strip() if snippetElement else 'No description'
# Get actual page content
try:
- target_page_soup = self._read_url(extracted_url)
- content = self._extract_main_content(target_page_soup)
+ targetPageSoup = self._readUrl(extractedUrl)
+ content = self._extractMainContent(targetPageSoup)
except Exception as e:
- logger.warning(f"Error extracting content from {extracted_url}: {str(e)}")
+ logger.warning(f"Error extracting content from {extractedUrl}: {str(e)}")
content = f"Error extracting content: {str(e)}"
results.append({
'title': title,
- 'url': extracted_url,
+ 'url': extractedUrl,
'snippet': snippet,
'data': content
})
# Limit number of results
- if len(results) >= self.max_results:
+ if len(results) >= self.maxResults:
break
return results
- def _read_url(self, url: str) -> BeautifulSoup:
+ def _readUrl(self, url: str) -> BeautifulSoup:
"""
Read a URL and return a BeautifulSoup parser for the content.
@@ -711,7 +711,7 @@ class AgentWebcrawler(AgentBase):
return None
headers = {
- 'User-Agent': self.user_agent,
+ 'User-Agent': self.userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}
@@ -723,10 +723,10 @@ class AgentWebcrawler(AgentBase):
# Handling for status 202
if response.status_code == 202:
# Retry with backoff
- backoff_times = [0.5, 1.0, 2.0, 5.0]
+ backoffTimes = [0.5, 1.0, 2.0, 5.0]
- for wait_time in backoff_times:
- time.sleep(wait_time)
+ for waitTime in backoffTimes:
+ time.sleep(waitTime)
response = requests.get(url, headers=headers, timeout=self.timeout)
if response.status_code != 202:
@@ -742,7 +742,7 @@ class AgentWebcrawler(AgentBase):
logger.error(f"Error reading URL {url}: {str(e)}")
return None
- def _extract_title(self, soup: BeautifulSoup, url: str) -> str:
+ def _extractTitle(self, soup: BeautifulSoup, url: str) -> str:
"""
Extract the title from a webpage.
@@ -757,24 +757,24 @@ class AgentWebcrawler(AgentBase):
return f"Error with {url}"
# Extract title from title tag
- title_tag = soup.find('title')
- title = title_tag.text.strip() if title_tag else "No title"
+ titleTag = soup.find('title')
+ title = titleTag.text.strip() if titleTag else "No title"
# Alternative: Also look for h1 tags if title tag is missing
if title == "No title":
- h1_tag = soup.find('h1')
- if h1_tag:
- title = h1_tag.text.strip()
+ h1Tag = soup.find('h1')
+ if h1Tag:
+ title = h1Tag.text.strip()
return title
- def _extract_main_content(self, soup: BeautifulSoup, max_chars: int = 10000) -> str:
+ def _extractMainContent(self, soup: BeautifulSoup, maxChars: int = 10000) -> str:
"""
Extract the main content from an HTML page.
Args:
soup: BeautifulSoup object of the webpage
- max_chars: Maximum number of characters
+ maxChars: Maximum number of characters
Returns:
Extracted main content as a string
@@ -783,34 +783,34 @@ class AgentWebcrawler(AgentBase):
return ""
# Try to find main content elements in priority order
- main_content = None
+ mainContent = None
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
content = soup.select_one(selector)
if content:
- main_content = content
+ mainContent = content
break
# If no main content found, use the body
- if not main_content:
- main_content = soup.find('body') or soup
+ if not mainContent:
+ mainContent = soup.find('body') or soup
# Remove script, style, nav, footer elements that don't contribute to main content
- for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
+ for element in mainContent.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
element.extract()
# Extract text content
- text_content = main_content.get_text(separator=' ', strip=True)
+ textContent = mainContent.get_text(separator=' ', strip=True)
- # Limit to max_chars
- return text_content[:max_chars]
+ # Limit to maxChars
+ return textContent[:maxChars]
- def _limit_text(self, text: str, max_chars: int = 10000) -> str:
+ def _limitText(self, text: str, maxChars: int = 10000) -> str:
"""
Limit text to a maximum number of characters.
Args:
text: Input text
- max_chars: Maximum number of characters
+ maxChars: Maximum number of characters
Returns:
Limited text
@@ -819,14 +819,14 @@ class AgentWebcrawler(AgentBase):
return ""
# If text is already under the limit, return unchanged
- if len(text) <= max_chars:
+ if len(text) <= maxChars:
return text
- # Otherwise limit text to max_chars
- return text[:max_chars] + "... [Content truncated due to length]"
+ # Otherwise limit text to maxChars
+ return text[:maxChars] + "... [Content truncated due to length]"
# Factory function for the Webcrawler agent
-def get_webcrawler_agent():
+def getAgentWebcrawler():
"""Returns an instance of the Webcrawler agent."""
return AgentWebcrawler()
\ No newline at end of file
diff --git a/modules/auth.py b/modules/auth.py
index 26366d40..6fdf7e2f 100644
--- a/modules/auth.py
+++ b/modules/auth.py
@@ -1,3 +1,8 @@
+"""
+Authentication module for backend API.
+Handles JWT-based authentication, token generation, and user context.
+"""
+
from datetime import datetime, timedelta, timezone
from typing import Optional, Dict, Any, Tuple
from fastapi import Depends, HTTPException, status
@@ -5,7 +10,7 @@ from fastapi.security import OAuth2PasswordBearer
from jose import JWTError, jwt
import logging
-from modules.gateway_interface import get_gateway_interface
+from modules.gatewayInterface import getGatewayInterface
from modules.configuration import APP_CONFIG
# Get Config Data
@@ -13,39 +18,36 @@ SECRET_KEY = APP_CONFIG.get("APP_JWT_SECRET_SECRET")
ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM")
ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY"))
-
# OAuth2 Setup
-oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
+oauth2Scheme = OAuth2PasswordBearer(tokenUrl="token")
# Logger
logger = logging.getLogger(__name__)
-
-def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
+def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> str:
"""
Creates a JWT Access Token.
Args:
data: Data to encode (usually user ID or username)
- expires_delta: Validity duration of the token (optional)
+ expiresDelta: Validity duration of the token (optional)
Returns:
JWT Token as string
"""
- to_encode = data.copy()
+ toEncode = data.copy()
- if expires_delta:
- expire = datetime.now(timezone.utc) + expires_delta
+ if expiresDelta:
+ expire = datetime.now(timezone.utc) + expiresDelta
else:
expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
- to_encode.update({"exp": expire})
- encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
+ toEncode.update({"exp": expire})
+ encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
- return encoded_jwt
+ return encodedJwt
-
-async def get_current_user(token: str = Depends(oauth2_scheme)) -> Dict[str, Any]:
+async def getCurrentUser(token: str = Depends(oauth2Scheme)) -> Dict[str, Any]:
"""
Extracts and validates the current user from the JWT token.
@@ -58,7 +60,7 @@ async def get_current_user(token: str = Depends(oauth2_scheme)) -> Dict[str, Any
Raises:
HTTPException: For invalid token or user
"""
- credentials_exception = HTTPException(
+ credentialsException = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid authentication credentials",
headers={"WWW-Authenticate": "Bearer"},
@@ -71,24 +73,24 @@ async def get_current_user(token: str = Depends(oauth2_scheme)) -> Dict[str, Any
# Extract username from token
username: str = payload.get("sub")
if username is None:
- raise credentials_exception
+ raise credentialsException
# Extract mandate ID from token (if present)
- mandate_id: int = payload.get("mandate_id", 1) # Default: Root mandate
+ mandateId: int = payload.get("mandateId", 1) # Default: Root mandate
except JWTError:
logger.warning("Invalid JWT Token")
- raise credentials_exception
+ raise credentialsException
# Initialize Gateway Interface without context
- gateway = get_gateway_interface()
+ gateway = getGatewayInterface()
# Retrieve user from database
- user = gateway.get_user_by_username(username)
+ user = gateway.getUserByUsername(username)
if user is None:
logger.warning(f"User {username} not found")
- raise credentials_exception
+ raise credentialsException
if user.get("disabled", False):
logger.warning(f"User {username} is disabled")
@@ -96,13 +98,12 @@ async def get_current_user(token: str = Depends(oauth2_scheme)) -> Dict[str, Any
return user
-
-async def get_current_active_user(current_user: Dict[str, Any] = Depends(get_current_user)) -> Dict[str, Any]:
+async def getCurrentActiveUser(currentUser: Dict[str, Any] = Depends(getCurrentUser)) -> Dict[str, Any]:
"""
Ensures that the user is active.
Args:
- current_user: Current user data
+ currentUser: Current user data
Returns:
User data
@@ -110,50 +111,48 @@ async def get_current_active_user(current_user: Dict[str, Any] = Depends(get_cur
Raises:
HTTPException: If the user is disabled
"""
- if current_user.get("disabled", False):
+ if currentUser.get("disabled", False):
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="User is disabled")
- return current_user
+ return currentUser
-
-async def get_user_context(current_user: Dict[str, Any]) -> Tuple[int, int]:
+async def getUserContext(currentUser: Dict[str, Any]) -> Tuple[int, int]:
"""
Extracts the mandate ID and user ID from the current user.
Enhanced with better logging.
Args:
- current_user: The current user
+ currentUser: The current user
Returns:
- Tuple of (mandate_id, user_id)
+ Tuple of (mandateId, userId)
"""
# Default values
- default_mandate_id = 0
- default_user_id = 0
+ defaultMandateId = 0
+ defaultUserId = 0
- # Extract mandate_id
- mandate_id = current_user.get("mandate_id", None)
- if mandate_id is None:
- logger.warning(f"No mandate_id found in current_user, using default: {default_mandate_id}")
- mandate_id = default_mandate_id
+ # Extract mandateId
+ mandateId = currentUser.get("mandateId", None)
+ if mandateId is None:
+ logger.warning(f"No mandateId found in currentUser, using default: {defaultMandateId}")
+ mandateId = defaultMandateId
else:
try:
- mandate_id = int(mandate_id)
+ mandateId = int(mandateId)
except (ValueError, TypeError):
- logger.error(f"Invalid mandate_id value: {mandate_id}, using default: {default_mandate_id}")
- mandate_id = default_mandate_id
+ logger.error(f"Invalid mandateId value: {mandateId}, using default: {defaultMandateId}")
+ mandateId = defaultMandateId
- # Extract user_id
- user_id = current_user.get("id", None)
- if user_id is None:
- logger.warning(f"No user_id found in current_user, using default: {default_user_id}")
- user_id = default_user_id
+ # Extract userId
+ userId = currentUser.get("id", None)
+ if userId is None:
+ logger.warning(f"No userId found in currentUser, using default: {defaultUserId}")
+ userId = defaultUserId
else:
try:
- user_id = int(user_id)
+ userId = int(userId)
except (ValueError, TypeError):
- logger.error(f"Invalid user_id value: {user_id}, using default: {default_user_id}")
- user_id = default_user_id
+ logger.error(f"Invalid userId value: {userId}, using default: {defaultUserId}")
+ userId = defaultUserId
- # logger.info(f"User context: mandate_id={mandate_id}, user_id={user_id}")
- return mandate_id, user_id
\ No newline at end of file
+ return mandateId, userId
\ No newline at end of file
diff --git a/modules/chat.py b/modules/chat.py
deleted file mode 100644
index fb577818..00000000
--- a/modules/chat.py
+++ /dev/null
@@ -1,1235 +0,0 @@
-"""
-ChatManager Module for managing AI-Chat workflows.
-Implements a compact and modular architecture for processing
-user requests, agent execution, and result formatting.
-"""
-
-import asyncio
-import os
-import logging
-import json
-import re
-import uuid
-import base64
-from datetime import datetime
-from typing import Dict, Any, List, Optional, Union
-
-# Required imports
-from modules.chat_registry import get_agent_registry
-from modules.lucydom_interface import get_lucydom_interface as dom_interface
-from modules.chat_content_extraction import get_document_contents
-
-# Configure logger
-logger = logging.getLogger(__name__)
-
-# Global settings for the workflow management
-GLOBAL_WorkflowLabels = {
- "system_name": "AI Assistant", # Default system name for logs
- "workflow_status_messages": {
- "init": "Workflow initialized",
- "running": "Running workflow",
- "waiting": "Waiting for input",
- "completed": "Workflow completed",
- "error": "Error in workflow"
- }
-}
-
-class ChatManager:
- """
- Manages the processing of chat requests, agent execution, and
- the integration of results into the workflow.
- """
-
- def __init__(self, mandate_id: int, user_id: int):
- """
- Initializes the ChatManager with mandate and user context.
-
- Args:
- mandate_id: ID of the current mandate
- user_id: ID of the current user
- """
- self.mandate_id = mandate_id
- self.user_id = user_id
- self.mydom = dom_interface(mandate_id, user_id)
- self.agent_registry = get_agent_registry()
- self.agent_registry.set_mydom(self.mydom)
-
- ### Chat Management
-
- async def chat_run(self, user_input: Dict[str, Any], workflow_id: Optional[str] = None) -> Dict[str, Any]:
- """
- Main function for integrating user requests into the workflow. Only initial part, then handover to session as coroutine to finish for frontend
- """
- # 1. Initialize workflow or load existing one
- workflow = self.workflow_init(workflow_id)
- self.log_add(workflow, "Starting workflow processing", level="info", progress=0)
- asyncio.create_task(self.chat_session(user_input, workflow))
- return workflow
-
-
- async def chat_session(self, user_input: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
- """
- Session in the background as event
- """
-
- # 2. Transform user input into a message object and save in workflow
- message_user = await self.chat_message_to_workflow("user", "", user_input, workflow)
-
- # 3. Create project manager prompt and analyze response
- self.log_add(workflow, "Analyzing request and planning work", level="info", progress=10)
- project_manager_response = await self.chat_prompt(message_user, workflow)
- obj_final_documents = project_manager_response.get("obj_final_documents", [])
- obj_workplan = project_manager_response.get("obj_workplan", [])
- obj_user_response = project_manager_response.get("obj_user_response", "")
-
- # Get detected language and set it in the mydom interface
- user_language = project_manager_response.get("user_language", "en")
- self.mydom.set_user_language(user_language)
-
- # 4. Save the response as a message in the workflow and add log entries
- response_message = {
- "role": "assistant",
- "agent_name": "project_manager",
- "content": obj_user_response
- }
- self.message_add(workflow, response_message)
-
- self.log_add(workflow, f"Planned outputs: {len(obj_final_documents)} documents", level="info", progress=20)
- self.log_add(workflow, f"Work plan created with {len(obj_workplan)} steps", level="info", progress=25)
-
- # 5. Execute agents according to work plan
- obj_results = []
- if obj_workplan:
- total_tasks = len(obj_workplan)
- for task_index, task in enumerate(obj_workplan):
- agent_name = task.get("agent", "unknown")
- progress_value = 30 + int((task_index / total_tasks) * 60) # Progress from 30% to 90%
-
- progress_msg = f"Running task {task_index+1}/{total_tasks}: {agent_name}"
- self.log_add(workflow, progress_msg, level="info", progress=progress_value)
-
- task_results = await self.agent_processing(task, workflow)
- obj_results.extend(task_results)
-
- # Log completion of this task
- self.log_add(
- workflow,
- f"Completed task {task_index+1}/{total_tasks}: {agent_name}",
- level="info",
- progress=progress_value + (60/total_tasks)/2
- )
-
- # 6. Create the final response with relevant documents from obj_final_documents
- self.log_add(workflow, "Creating final response", level="info", progress=90)
- final_message = await self.chat_final_message(obj_user_response, obj_final_documents, obj_results)
- self.message_add(workflow, final_message)
-
- # 7. Finalize the workflow
- self.workflow_finish(workflow)
- self.log_add(workflow, "Workflow completed successfully", level="info", progress=100)
-
-
- async def chat_prompt(self, message_user: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
- """
- Creates the prompt for the project manager and processes the response.
-
- Args:
- message_user: Message object with user request
- workflow: Current workflow object
-
- Returns:
- Project manager's response with obj_final_documents, obj_workplan and obj_user_response
- """
- # Get available agents with their capabilities
- available_agents = self.agent_profiles()
-
- # Create a workflow summary
- workflow_summary = await self.workflow_summarize(workflow, message_user)
-
- # Create a list of currently available documents from user input or previously generated documents
- available_documents = self.available_documents_get(workflow, message_user)
- available_docs_str = json.dumps(available_documents, indent=2)
-
- # Create the prompt for the project manager with language detection requirement
- prompt = f"""
-Based on the user request and the provided documents, please analyze the requirements and create a processing plan.
-Also, identify the language of the user's request and include it in your response.
-
-
-{message_user.get('content')}
-
-
-# Previous conversation history:
-
-{workflow_summary}
-
-
-# Available documents (currently in workflow):
-
-{available_docs_str}
-
-
-# Available agents and their capabilities:
-
-{self.parse_json2text(available_agents)}
-
-
-Please analyze the request and create:
-
-1. A list of required result documents (obj_final_documents)
-2. A plan for executing agents (obj_workplan)
-3. A clear response to the user explaining what you're doing (obj_user_response)
-4. Identified language of the user's request (user_language)
-
-## IMPORTANT RULES FOR THE WORKPLAN:
-1. Each input document must either already exist (provided by the user or previously created by an agent) or be created by an agent before it's used.
-2. If necessary, convert input documents to a suitable format using agents when the type doesn't match.
-3. Do not define document inputs that don't exist or haven't been generated beforehand.
-4. Create a logical sequence - earlier agents can create documents that are later used as inputs.
-5. If the user has provided documents but hasn't clearly stated what they want, try to act according to the context.
-
-Your answer must be strictly in the JSON_OUTPUT format, with no additions before or after the JSON object.
-
-JSON_OUTPUT = {{
- "obj_final_documents": ["label",...], # document label in the format 'filename.ext'
- "obj_workplan": [
- {{
- "agent": "agent_name", # Name of an available agent
- "prompt": "Specific instructions to the agent, that he knows what to do with which documents and which output to provide."
- "output_documents": [
- "label":"document label in the format 'filename.ext'",
- "prompt":"AI prompt to describe the content of the file"
- ],
- "input_documents": [
- "label":"document label in the format 'filename.ext'",
- "file_id":id, # if refering to an existing document, provide file_id to select the correct file
- "content_part":"", # provide empty string, if all document contents to consider, otherwise the content_part of the document to focus on
- "prompt":"AI prompt to describe what data to extract from the file."
- ], # If no input documents are needed, include "input_documents" as an empty list
- }}
- # Multiple agent tasks can be added here and should build logically on each other
- ],
- "obj_user_response": "Information to the user about how his request will be solved, in the language of the user's request.",
- "user_language": "en" # Language code (e.g., en, de, fr, es) based on the user's request
-}}
-
-## RULES for input_documents:
-1. The user request refers to documents where "file_source" in available documents is "user". Those documents are in the focus for input
-2. In case of redundant label in available documents, use document with highest sequence_nr if not specified differently
-
-## STRICT RULES FOR document "label":
-1. Every document label MUST include a proper file extension that matches the content type.
-2. Use standard extensions like:
- - ".txt" for text files
- - ".md" for markdown files
- - ".csv" for comma-separated values
- - ".json" for JSON data
- - ".html" for HTML content
- - ".jpg" or ".png" for images
- - ".docx" for Word documents
- - ".xlsx" for Excel files
- - ".pdf" for PDF documents
-3. Use descriptive filenames that indicate the document's purpose (e.g., "analysis_report.txt" rather than just "report.txt")
-4. If you use label for an existing file
-"""
-
- # Call the AI service through mydom for language support
- logger.debug(f"PROJECT MANAGER Planning prompt: {prompt}")
- project_manager_output = await self.mydom.call_ai([
- {
- "role": "system",
- "content": "You are an experienced project manager who analyzes user requests and creates work plans. You pay very careful attention to ensure that all document dependencies are correct and that no non-existent documents are defined as inputs. The output follows strictly the specified format."
- },
- {
- "role": "user",
- "content": prompt
- }
- ])
-
- # Parse the JSON response
- logger.debug(f"PROJECT MANAGER Planning answer: parse_json_response{project_manager_output}")
- return self.parse_json_response(project_manager_output)
-
- async def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
- """
- Integrates user inputs into a Message object including files with complete contents.
-
- Args:
- role: Role of the message sender ('user' or 'assistant')
- agent_name: Name of the agent, if message is from an agent
- chat_message: Input data with "prompt"=str, "list_file_id"=[]
- workflow: Current workflow object
-
- Returns:
- Message object with content and documents including contents
- """
- logger.info(f"Message from {role} {agent_name} sent with {len(chat_message.get('list_file_id', []))} documents")
- logger.debug(f"message = {self.parse_json2text(chat_message)}.")
-
- # Check message content
- message_content = chat_message.get("prompt", "")
- if isinstance(message_content, dict) and "content" in message_content:
- message_content = message_content["content"]
-
- # If message content is empty, no chat
- if role=="user" and (message_content is None or message_content.strip() == ""):
- logger.warning(f"Empty message, no chat")
- message_content = "(No user input received)"
-
- # Process additional files with complete contents
- additional_fileids = chat_message.get("list_file_id", [])
- additional_files = await self.process_file_ids(additional_fileids)
-
- # Create message object
- message_object = {
- "role": role,
- "agent_name": agent_name,
- "content": message_content,
- "documents": additional_files
- }
-
- message_object = self.message_add(workflow, message_object)
- logger.debug(f"message_user = {self.parse_json2text(message_object)}.")
- return message_object
-
- async def chat_final_message(self, obj_user_response: str, obj_final_documents: List[Dict[str, Any]], obj_results: List[Dict[str, Any]]) -> Dict[str, Any]:
- """
- Creates the final response message with review of proposed and delivered.
-
- Args:
- obj_user_response: Initial text response to the user
- obj_final_documents: List of expected response documents
- obj_results: List of generated result documents
-
- Returns:
- Complete message object with content and relevant documents
- """
- # Find documents that match the obj_final_documents requirements
- matching_documents = []
-
- if len(obj_final_documents)>0:
- for answer_label in obj_final_documents:
- # Find matching document in results
- for doc in obj_results:
- doc_name = self.get_filename(doc)
- # Check if this document matches the answer specification
- if doc_name == answer_label:
- content_ref = []
- for c in doc.get("contents"):
- content_ref.append(c.get("summary"))
- doc_ref = {
- "label": doc_name,
- "content_summary": content_ref
- }
- matching_documents.append(doc_ref)
- break
-
- # Use the mydom for language-aware AI calls
- final_prompt = await self.mydom.call_ai([
- {"role": "system", "content": "You are a project manager, who delivers results to a user."},
- {"role": "user", "content": f"""
- Give the final short feedback to the user with reference to the initial statement (obj_user_response). Inform him about the list of files_delivered. You do not need to send the files, this is handled separately. If in the list of files_delivered there might miss some files_promised, just give a comment on this, otherwise task is now completed successful.
-
- Here the data:
- obj_user_response = {self.parse_json2text(obj_user_response)}
- files_promised = {self.parse_json2text(matching_documents)}
- files_delivered = {self.parse_json2text(obj_user_response)}
- """
- }
- ], produce_user_answer=True)
-
- # Create basic message structure with proper fields
- logger.debug(f"FINAL PROMPT = {self.parse_json2text(final_prompt)}.")
- final_message = {
- "role": "assistant",
- "agent_name": "project_manager",
- "content": final_prompt,
- "documents": [] # DO NOT include the results documents, already with agents
- }
-
- logger.debug(f"FINAL MESSAGE = {self.parse_json2text(final_message)}.")
- return final_message
-
-
- ### Workflow
-
- def workflow_init(self, workflow_id: Optional[str] = None) -> Dict[str, Any]:
- """
- Initializes a workflow or loads an existing one with round counting.
-
- Args:
- workflow_id: Optional - ID of the workflow to load
-
- Returns:
- Initialized workflow object
- """
- current_time = datetime.now().isoformat()
-
- if workflow_id is None or not self.mydom.get_workflow(workflow_id):
- # Create new workflow
- new_workflow_id = str(uuid.uuid4()) if workflow_id is None else workflow_id
- workflow = {
- "id": new_workflow_id,
- "mandate_id": self.mandate_id,
- "user_id": self.user_id,
- "name": f"Workflow {new_workflow_id[:8]}",
- "started_at": current_time,
- "messages": [], # Empty list - will be filled with references
- "message_ids": [], # Initialize empty message_ids list
- "logs": [],
- "data_stats": {},
- "current_round": 1,
- "status": "running",
- "last_activity": current_time,
- }
-
- # Save to database - only the workflow metadata
- workflow_db = {
- "id": workflow["id"],
- "mandate_id": workflow["mandate_id"],
- "user_id": workflow["user_id"],
- "name": workflow["name"],
- "started_at": workflow["started_at"],
- "status": workflow["status"],
- "data_stats": workflow["data_stats"],
- "current_round": workflow["current_round"],
- "last_activity": workflow["last_activity"],
- "message_ids": workflow["message_ids"] # Include message_ids
- }
- self.mydom.create_workflow(workflow_db)
-
- self.log_add(workflow, GLOBAL_WorkflowLabels["workflow_status_messages"]["init"], level="info", progress=0)
- return workflow
- else:
- # Load existing workflow
- workflow = self.mydom.load_workflow_state(workflow_id)
-
- # Ensure message_ids exists
- if "message_ids" not in workflow:
- # Initialize from existing messages
- workflow["message_ids"] = [msg["id"] for msg in workflow.get("messages", [])]
-
- # Update in database
- self.mydom.update_workflow(workflow_id, {"message_ids": workflow["message_ids"]})
-
- # Update status and increment round counter
- workflow["status"] = "running"
- workflow["last_activity"] = current_time
-
- # Increment current_round if it exists, otherwise set it to 1
- if "current_round" in workflow:
- workflow["current_round"] += 1
- else:
- workflow["current_round"] = 1
-
- # Update in database - only the relevant workflow fields
- workflow_update = {
- "status": workflow["status"],
- "last_activity": workflow["last_activity"],
- "current_round": workflow["current_round"]
- }
- self.mydom.update_workflow(workflow_id, workflow_update)
-
- self.log_add(workflow, GLOBAL_WorkflowLabels["workflow_status_messages"]["running"], level="info", progress=0)
- return workflow
-
- def workflow_finish(self, workflow: Dict[str, Any]) -> Dict[str, Any]:
- """
- Finalizes a workflow and sets the status to 'completed'.
-
- Args:
- workflow: Workflow object
-
- Returns:
- Updated workflow object
- """
- # Prepare workflow update data
- workflow_update = {
- "status": "completed",
- "last_activity": datetime.now().isoformat(),
- }
-
- # Update the workflow object in memory
- workflow["status"] = workflow_update["status"]
- workflow["last_activity"] = workflow_update["last_activity"]
-
- # Save workflow state to database - only relevant fields, not the messages list
- self.mydom.update_workflow(workflow["id"], workflow_update)
-
- self.log_add(workflow, GLOBAL_WorkflowLabels["workflow_status_messages"]["completed"], level="info", progress=100)
- return workflow
-
- async def workflow_summarize(self, workflow: Dict[str, Any], message_user: Dict[str, Any]) -> str:
- """
- Creates a summary of the workflow without the current user message.
-
- Args:
- workflow: Workflow object
- message_user: Current user message
-
- Returns:
- Summary of the workflow
- """
- if not workflow or "messages" not in workflow or not workflow["messages"]:
- return "" # first message
-
- # Go through messages in reverse order (newest first)
- messages = sorted(workflow["messages"], key=lambda m: m.get("sequence_no", 0), reverse=False)
-
- summary_parts = []
- for message in messages:
- if message["id"] != message_user["id"]:
- message_summary = await self.message_summarize(message)
- summary_parts.append(message_summary)
-
- return "\n\n".join(summary_parts)
-
-
-
- ### Agents
-
- def agent_profiles(self) -> List[Dict[str, Any]]:
- """
- Gets information about all available agents.
-
- Returns:
- List with information about all available agents
- """
- return self.agent_registry.get_agent_infos()
-
- async def agent_input_documents(self, doc_input_list: List[Dict[str, Any]], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
- """
- Prepares input documents for an agent, sorted with newest first.
-
- Args:
- doc_input_list: List of required input documents as specified by the project manager
- workflow: Workflow object
-
- Returns:
- Prepared input documents for the agent, sorted with newest first
- """
- prepared_inputs = []
-
- # Sort workflow messages by sequence number (descending)
- sorted_messages = sorted(
- workflow.get("messages", []),
- key=lambda m: m.get("sequence_no", 0),
- reverse=True
- )
-
- for doc_spec in doc_input_list:
- doc_filename = doc_spec.get("label","")
- doc_file_id = doc_spec.get("file_id","")
-
- found_doc = None
- # Search for the document in sorted workflow messages (newest first)
- for message in sorted_messages:
- for doc in message.get("documents", []):
- if (doc_file_id!="" and doc_file_id==doc.get("file_id")) or (doc_filename!="" and self.get_filename(doc) == doc_filename):
- found_doc = doc
- break
- if found_doc:
- break
- if found_doc:
- # Process document for agent based on the specification
- processed_doc = await self.process_document_for_agent(found_doc, doc_spec)
-
- prepared_inputs.append(processed_doc)
- else:
- logger.warning(f"Document with label '{doc_filename}', file_id '{doc_file_id}' not found in workflow")
-
- return prepared_inputs
-
- async def process_document_for_agent(self, document: Dict[str, Any], doc_spec: Dict[str, Any]) -> Dict[str, Any]:
- """
- Processes a document for an agent based on the document specification.
- Uses AI to extract relevant content from the document based on the specification.
-
- Args:
- document: The document to process
- doc_spec: The document specification from the project manager
-
- Returns:
- Processed document with AI-extracted content
- """
- processed_doc = document.copy()
- part_spec = doc_spec.get("content_part","")
-
- # Process each content item in the document
- if "contents" in processed_doc:
- processed_contents = []
-
- for content in processed_doc["contents"]:
-
- # Check if part required
- if part_spec != "" and part_spec != content.get("name"):
- continue
-
- # Get the data from the content
- data = content.get("data", "")
- processed_content = content.copy()
-
- # Check if content data is base64 encoded
- is_base64 = content.get("metadata", {}).get("base64_encoded", False)
-
- try:
- # Use the AI service to process the document content according to the prompt from the project manager for the document specification
- summary = doc_spec.get("prompt", "Extract the relevant information from this document")
- ai_prompt = f"""
-# Please process the following document content according to this instruction:
-
-{summary}
-
-
-# Document content:
-
-{data}
-
-
-# Extract and provide only the relevant information as requested.
-"""
-
- # Call the AI service through mydom for language support
- processed_data = await self.mydom.call_ai([
- {"role": "system", "content": "You are a document processing assistant. Extract only the relevant information as requested."},
- {"role": "user", "content": ai_prompt}
- ])
-
- # DO NOT change the original data field
- # processed_content["data"] unchanged
- processed_content["data_extracted"] = processed_data
- processed_content["metadata"]["ai_processed"] = True
-
- except Exception as e:
- logger.error(f"Error processing document content with AI: {str(e)}")
- # Fall back to original content if AI processing fails
- processed_content["data_extracted"] = "(no information)"
-
- processed_contents.append(processed_content)
-
- processed_doc["contents"] = processed_contents
-
- return processed_doc
-
- async def agent_processing(self, task: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
- """
- Process a single agent task from the workflow.
- Optimized for the task-based approach where all agents implement process_task.
-
- Args:
- task: The task definition containing agent name, prompt, and document specifications
- workflow: The current workflow object
-
- Returns:
- List of document objects created by the agent
- """
- # 1. Extract task information
- agent_name = task.get("agent")
- agent_prompt = task.get("prompt", "")
-
- # Log the current step
- output_labels = [d.get("label", "unknown") for d in task.get("output_documents", [])]
- step_info = f"Agent '{agent_name}' to create {', '.join(output_labels)}."
- self.log_add(workflow, step_info, level="info")
-
- # Check if prompt is empty
- if agent_prompt == "":
- logger.warning("Empty prompt, no task to do")
- return []
-
- # Get agent from registry
- agent = self.agent_registry.get_agent(agent_name)
- if not agent:
- logger.error(f"Agent '{agent_name}' not found")
- return []
-
- # Prepare output document specifications
- output_specs = []
- for doc in task.get("output_documents", []):
- output_spec = {
- "label": doc.get("label"),
- "description": doc.get("prompt", "")
- }
- output_specs.append(output_spec)
-
- # Prepare input documents for the agent
- input_documents = await self.agent_input_documents(task.get('input_documents', []), workflow)
-
- # Create a standardized task object for the agent
- agent_task = {
- "task_id": str(uuid.uuid4()),
- "workflow_id": workflow.get("id"),
- "prompt": agent_prompt,
- "input_documents": input_documents,
- "output_specifications": output_specs,
- "context": {
- "workflow_round": workflow.get("current_round", 1),
- "agent_type": agent_name,
- "timestamp": datetime.now().isoformat(),
- "language": self.mydom.user_language # Pass language to agent
- }
- }
-
- # Execute the agent with the standardized task
- try:
- # Process the task using the agent's standardized interface
- logger.debug("TASK: "+self.parse_json2text(agent_task))
- logger.debug(f"Agent '{agent_name}' AI service available: {agent.mydom is not None}")
-
- agent_results = await agent.process_task(agent_task)
-
- logger.debug(f"Agent '{agent_name}' completed task. RESULT: {self.parse_json2text(agent_results)}")
-
- # Log the agent response
- self.log_add(
- workflow,
- f"Agent '{agent_name}' completed task. Feedback: {agent_results.get('feedback', 'No feedback provided')}",
- level="info"
- )
-
- # Store produced files and prepare input object for message
- agent_inputs = {
- "prompt": agent_results.get("feedback", ""),
- "list_file_id": self.agent_save_documents(agent_results)
- }
-
- # Create a message in the workflow with the agent's response
- agent_message = await self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow)
- logger.debug(f"Agent result = {self.parse_json2text(agent_message)}.")
-
- return agent_message.get("documents", [])
-
- except Exception as e:
- error_msg = f"Error executing agent '{agent_name}': {str(e)}"
- logger.error(error_msg, exc_info=True) # Add exc_info=True to get full traceback
- self.log_add(workflow, error_msg, level="error")
- return []
-
- def agent_save_documents(self, agent_results: Dict[str, Any]) -> List[int]:
- """
- Saves all documents from agent results as files and returns a list of file IDs.
- Enhanced to handle the standardized document format from agents.
-
- Args:
- agent_results: Dictionary containing agent feedback and documents
-
- Returns:
- List of file IDs for the saved documents
- """
- file_ids = []
-
- # Extract documents from agent results
- documents = agent_results.get("documents", [])
-
- for doc in documents:
- try:
- # Extract label (filename) and content
- label = doc.get("label", "unnamed_file.txt")
- content = doc.get("content", "")
-
- # Split label into name and extension
- name, ext = os.path.splitext(label)
- if ext.startswith('.'):
- ext = ext[1:] # Remove leading dot
- elif not ext:
- # If no extension is provided, default to .txt for text content
- ext = "txt"
- label = f"{label}.{ext}"
-
- # Determine if content is base64 encoded
- is_base64 = False
- if isinstance(content, dict) and content.get("metadata", {}).get("base64_encoded", False):
- is_base64 = True
- content = content.get("data", "")
-
- # Convert content to bytes
- if isinstance(content, str):
- if is_base64:
- # Decode base64 to bytes
- try:
- file_content = base64.b64decode(content)
- except Exception as e:
- logger.warning(f"Failed to decode base64 content: {str(e)}")
- file_content = content.encode('utf-8')
- else:
- # Convert text to bytes
- file_content = content.encode('utf-8')
- else:
- # Already bytes
- file_content = content
-
- # Save file to database
- file_meta = self.mydom.save_uploaded_file(file_content, label)
-
- if file_meta and "id" in file_meta:
- file_id = file_meta["id"]
- file_ids.append(file_id)
- logger.info(f"Saved document '{label}' with file ID: {file_id}")
- else:
- logger.warning(f"Failed to save document '{label}'")
-
- except Exception as e:
- logger.error(f"Error saving document from agent results: {str(e)}")
- # Continue with other documents instead of failing
- continue
-
- return file_ids
-
-
- ### Messages
-
- def message_add(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]:
- """
- Adds a message to the workflow and updates last_activity.
- Saves the message in the database and updates the workflow with references.
-
- Args:
- workflow: Workflow object
- message: Message to be saved
-
- Returns:
- Added message
- """
- current_time = datetime.now().isoformat()
-
- # Ensure messages list exists
- if "messages" not in workflow:
- workflow["messages"] = []
-
- # Generate new message ID if not present
- if "id" not in message:
- message["id"] = f"msg_{str(uuid.uuid4())}"
-
- # Add workflow ID and timestamps
- message["workflow_id"] = workflow["id"]
- message["started_at"] = current_time
- message["finished_at"] = current_time
-
- # Set sequence number
- message["sequence_no"] = len(workflow["messages"]) + 1
-
- # Ensure required fields are present
- if "role" not in message:
- # Set a default role based on agent_name
- message["role"] = "assistant" if message.get("agent_name") else "user"
-
- if "agent_name" not in message:
- message["agent_name"] = ""
-
- # Set status
- message["status"] = "completed"
-
- # Add message to workflow
- workflow["messages"].append(message)
-
- # Ensure message_ids list exists
- if "message_ids" not in workflow:
- workflow["message_ids"] = []
-
- # Add message ID to the message_ids list
- workflow["message_ids"].append(message["id"])
-
- # Update workflow status
- workflow["last_activity"] = current_time
-
- # Save to database - first the message itself
- self.mydom.create_workflow_message(message)
-
- # Then save the workflow with updated references
- workflow_update = {
- "last_activity": current_time,
- "message_ids": workflow["message_ids"] # Update the message_ids field
- }
- self.mydom.update_workflow(workflow["id"], workflow_update)
-
- return message
-
- async def message_summarize(self, message: Dict[str, Any]) -> str:
- """
- Creates a summary of a message including its documents.
-
- Args:
- message: Message to summarize
-
- Returns:
- Summary of the message
- """
- role = message.get("role", "undefined")
- agent_name = message.get("agent_name", "")
- content = message.get("content", "")
-
- try:
- # Use the mydom for language-aware AI calls
- content_summary = await self.mydom.call_ai([
- {"role": "system", "content": f"You are a chat message summarizer. Create a very concise summary (2-3 sentences, maximum 300 characters)"},
- {"role": "user", "content": content}
- ])
- except Exception as e:
- logger.error(f"Error creating summary: {str(e)}")
- content_summary = content[:200] + "..."
-
- # Summarize documents
- docs_summary = ""
- if "documents" in message and message["documents"]:
- docs_list = []
- for i, doc in enumerate(message["documents"]):
- doc_name = self.get_filename(doc)
- docs_list.append(doc_name)
- if docs_list:
- docs_summary = "\nDocuments:" + "\n- ".join(docs_list)
-
- return f"[{role} {agent_name}]: {content_summary}{docs_summary}"
-
- async def message_summarize_content(self, content: Dict[str, Any]) -> str:
- """
- Generates a summary for a content item using AI.
-
- Args:
- content: Content item to summarize (already processed by get_document_contents)
-
- Returns:
- Brief summary of the content
- """
- # Extract relevant information
- data = content.get("data", "")
- content_type = content.get("content_type", "text/plain")
- is_text = content.get("metadata", {}).get("is_text", False)
-
- try:
- # Use the mydom for language-aware AI calls
- summary = await self.mydom.call_ai([
- {"role": "system", "content": "You are a content summarizer. Create very concise summary (1-2 sentences, maximum 200 characters) about this file."},
- {"role": "user", "content": f"Summarize this {content_type} content briefly:\n\n{data}"}
- ])
- return summary
-
- except Exception as e:
- logger.error(f"Error generating content summary: {str(e)}")
- return f"Text content ({content_type})"
-
-
- ### Documents
-
- async def process_file_ids(self, file_ids: List[int]) -> List[Dict[str, Any]]:
- """
- Processes a list of File-IDs and returns the corresponding file objects as a list of Document objects.
- Loads all contents directly and adds summaries to each content item.
-
- Args:
- file_ids: List of file IDs
-
- Returns:
- List of Document objects with contents and summaries
- """
- documents = []
- logger.info(f"Processing {len(file_ids)} files")
-
- for file_id in file_ids:
- try:
- # Check if the file exists
- file = self.mydom.get_file(file_id)
- if not file:
- logger.warning(f"File with ID {file_id} not found")
- continue
-
- # Check if file belongs to the current mandate
- if file.get("mandate_id") != self.mandate_id:
- logger.warning(f"File {file_id} does not belong to mandate {self.mandate_id}")
- continue
-
- # Load file content
- file_content = self.mydom.get_file_data(file_id)
- if file_content is None:
- logger.warning(f"No content found for file with ID {file_id}")
- continue
-
- # Create document
- file_name_ext = file.get("name")
- document = {
- "id": f"doc_{str(uuid.uuid4())}",
- "file_id": file_id,
- "name": os.path.splitext(file_name_ext)[0] if os.path.splitext(file_name_ext)[0] else "noname",
- "ext": os.path.splitext(file_name_ext)[1][1:] if os.path.splitext(file_name_ext)[1] else "bin",
- "data": base64.b64encode(file_content).decode('utf-8'), # Add file data as base64
- "contents": []
- }
-
- # Extract contents
- contents = get_document_contents(file, file_content)
-
- # Add summaries to each content item
- for content in contents:
- content["summary"] = await self.message_summarize_content(content)
-
- document["contents"] = contents
-
- logger.info(f"File {file.get('name', 'unnamed')} (ID: {file_id}) loaded with {len(contents)} contents and summaries")
- documents.append(document)
-
- except Exception as e:
- logger.error(f"Error processing file {file_id}: {str(e)}")
- # Continue with remaining files instead of failing
- continue
-
- return documents
-
- def available_documents_get(self, workflow: Dict[str, Any], message_user: Dict[str, Any]) -> List[Dict[str, Any]]:
- """
- Determines all currently available documents from user input and already generated documents.
-
- Args:
- message_user: Current message from the user
- workflow: Current workflow object
-
- Returns:
- List with information about all available documents, sorted by message sequence_nr in descending order
- """
- available_docs = []
-
- if "messages" in workflow and workflow["messages"]:
- for message in workflow["messages"]:
- message_id = message.get("id", "unknown")
- sequence_nr = message.get("sequence_no", 0)
-
- # Determine source
- source = "user" if message_id == message_user.get("id") else "workflow"
-
- # Process documents in this message
- if "documents" in message and message["documents"]:
- for doc in message["documents"]:
- # Get filename using our helper method
- filename = self.get_filename(doc)
- file_id = doc.get("file_id")
-
- # Extract summaries from all contents
- content_summaries = []
- for content in doc.get("contents", []):
- content_summaries.append({
- "content_part": content.get("name","noname"),
- "metadata": content.get("metadata",""),
- "summary": content.get("summary","No summary"),
- })
-
- # Create document info
- doc_info = {
- "sequence_nr": sequence_nr,
- "file_source": source,
- "file_id": file_id,
- "message_id": message_id,
- "label": filename,
- "content_summary_list": content_summaries,
- }
- available_docs.append(doc_info)
-
- # Sort by message sequence_nr in descending order (newest first)
- available_docs.sort(key=lambda x: x["sequence_nr"], reverse=True)
-
- logger.info(f"Available documents: {len(available_docs)}")
- return available_docs
-
- def save_document_to_file(self, document: Dict[str, Any]) -> Optional[int]:
- """
- Saves a Document as a file in the database and returns the File-ID.
-
- Args:
- document: Document object with contents
-
- Returns:
- File-ID or None on error
- """
- try:
- if not document or "contents" not in document or not document["contents"]:
- logger.warning("Document has no contents to save")
- return None
-
- # Take the first content as main content
- main_content = document["contents"][0]
- name = main_content.get("name", "document")
- content_type = main_content.get("content_type", "text/plain")
- data = main_content.get("data", b"")
-
- # Ensure binary data
- if isinstance(data, str):
- data = data.encode('utf-8')
-
- # Save file in the database
- file_meta = self.mydom.save_uploaded_file(data, name)
- if file_meta and "id" in file_meta:
- # Update the Document with the File-ID
- document["file_id"] = file_meta["id"]
- return file_meta["id"]
-
- return None
- except Exception as e:
- logger.error(f"Error saving document as file: {str(e)}")
- return None
-
- def add_document_to_message(self, message: Dict[str, Any], document: Dict[str, Any]) -> Dict[str, Any]:
- """
- Adds a Document to a message.
-
- Args:
- message: Message to which the document should be added
- document: Document to add
-
- Returns:
- Updated message
- """
- # Ensure the documents list exists
- if "documents" not in message:
- message["documents"] = []
-
- # Add Document
- message["documents"].append(document)
-
- return message
-
-
- ### Tools
-
- def get_filename(self, document: Dict[str, Any]) -> str:
- """
- Gets the filename from a document by combining name and extension.
-
- Args:
- document: Document object
-
- Returns:
- Filename with extension
- """
- name = document.get("name", "unnamed")
- ext = document.get("ext", "")
- if ext:
- return f"{name}.{ext}"
- return name
-
- def log_add(self, workflow: Dict[str, Any], message: str, level: str = "info",
- progress: Optional[int] = None) -> str:
- """
- Adds a log entry to the workflow and also logs it in the logger.
- Enhanced with standardized formatting and workflow status tracking.
-
- Args:
- workflow: Workflow object
- message: Log message
- level: Log level (info, warning, error)
- progress: Optional - Progress value (0-100)
-
- Returns:
- ID of the created log entry
- """
- # Ensure logs list exists
- if "logs" not in workflow:
- workflow["logs"] = []
-
- # Generate log ID
- log_id = f"log_{str(uuid.uuid4())}"
-
- # Get workflow status
- workflow_status = workflow.get("status", "running")
-
- # Set agent_name from global settings
- agent_name = GLOBAL_WorkflowLabels.get("system_name", "AI Assistant")
-
- # Create log entry
- log_entry = {
- "id": log_id,
- "workflow_id": workflow["id"],
- "message": message,
- "type": level,
- "timestamp": datetime.now().isoformat(),
- "agent_name": agent_name,
- "status": workflow_status
- }
-
- # Add progress if provided
- if progress is not None:
- log_entry["progress"] = progress
-
- # Add log to workflow
- workflow["logs"].append(log_entry)
-
- # Save in database
- self.mydom.create_workflow_log(log_entry)
-
- # Also log in logger
- if level == "info":
- logger.info(f"Workflow {workflow['id']}: {message}")
- elif level == "warning":
- logger.warning(f"Workflow {workflow['id']}: {message}")
- elif level == "error":
- logger.error(f"Workflow {workflow['id']}: {message}")
-
- return log_id
-
- def parse_json2text(self, json_obj: Any) -> str:
- """
- Converts a JSON object to a readable text representation.
-
- Args:
- json_obj: JSON object to convert
-
- Returns:
- Formatted text representation
- """
- if not json_obj:
- return "No data available"
-
- try:
- # Format with indentation for better readability
- return json.dumps(json_obj, indent=2, ensure_ascii=False)
- except Exception as e:
- logger.error(f"Error in JSON conversion: {str(e)}")
- return str(json_obj)
-
- def parse_json_response(self, response_text: str) -> Dict[str, Any]:
- """
- Parses the JSON response from a text.
-
- Args:
- response_text: Text with JSON content
-
- Returns:
- Parsed JSON data
- """
- try:
- # Extract JSON from the text (if mixed with other content)
- json_start = response_text.find('{')
- json_end = response_text.rfind('}') + 1
-
- if json_start >= 0 and json_end > json_start:
- json_str = response_text[json_start:json_end]
- return json.loads(json_str)
- else:
- # Try to parse the entire text
- return json.loads(response_text)
- except json.JSONDecodeError as e:
- logger.error(f"JSON parsing error: {str(e)}")
- # Fallback: Return empty structure
- return {
- "obj_final_documents": [],
- "obj_workplan": [],
- "obj_user_response": "Sorry, I could not parse your data.",
- "user_language": "en"
- }
-
-
-# Singleton factory for the ChatManager
-_chat_managers = {}
-
-def get_chat_manager(mandate_id: int = 0, user_id: int = 0) -> ChatManager:
- """
- Returns a ChatManager for the specified context.
- Reuses existing instances.
-
- Args:
- mandate_id: ID of the mandate
- user_id: ID of the user
-
- Returns:
- ChatManager instance
- """
- context_key = f"{mandate_id}_{user_id}"
- if context_key not in _chat_managers:
- _chat_managers[context_key] = ChatManager(mandate_id, user_id)
- return _chat_managers[context_key]
\ No newline at end of file
diff --git a/modules/chat_content_extraction.py b/modules/chat_content_extraction.py
deleted file mode 100644
index 27d0829e..00000000
--- a/modules/chat_content_extraction.py
+++ /dev/null
@@ -1,778 +0,0 @@
-"""
-Module for extracting content from various file formats.
-Provides specialized functions for processing text, PDF, Office documents, images, etc.
-"""
-
-import logging
-import os
-import io
-from typing import Dict, Any, List, Optional, Union, Tuple
-import base64
-
-# Configure logger
-logger = logging.getLogger(__name__)
-
-# Optional imports - only loaded when needed
-pdf_extractor_loaded = False
-office_extractor_loaded = False
-image_processor_loaded = False
-
-def get_document_contents(file_metadata: Dict[str, Any], file_content: bytes) -> List[Dict[str, Any]]:
- """
- Main function for extracting content from a file based on its MIME type.
- Delegates to specialized extraction functions.
-
- Args:
- file_metadata: File metadata (Name, MIME type, etc.)
- file_content: Binary data of the file
-
- Returns:
- List of Document-Content objects with metadata and is_text flag
- """
- try:
- mime_type = file_metadata.get("mime_type", "application/octet-stream")
- file_name = file_metadata.get("name", "unknown")
-
- logger.info(f"Extracting content from file '{file_name}' (MIME type: {mime_type})")
-
- # Extract content based on MIME type
- contents = []
-
- # Text-based formats
- if mime_type.startswith("text/") or mime_type in [
- "application/json",
- "application/xml",
- "application/javascript",
- "application/x-python"
- ]:
- contents.extend(extract_text_content(file_name, file_content, mime_type))
-
- # CSV Format
- elif mime_type == "text/csv":
- contents.extend(extract_csv_content(file_name, file_content))
-
- # Images
- elif mime_type.startswith("image/"):
- contents.extend(extract_image_content(file_name, file_content, mime_type))
-
- # PDF Documents
- elif mime_type == "application/pdf":
- contents.extend(extract_pdf_content(file_name, file_content))
-
- # Word Documents
- elif mime_type in [
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
- "application/msword"
- ]:
- contents.extend(extract_word_content(file_name, file_content, mime_type))
-
- # Excel Documents
- elif mime_type in [
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
- "application/vnd.ms-excel"
- ]:
- contents.extend(extract_excel_content(file_name, file_content, mime_type))
-
- # PowerPoint Documents
- elif mime_type in [
- "application/vnd.openxmlformats-officedocument.presentationml.presentation",
- "application/vnd.ms-powerpoint"
- ]:
- contents.extend(extract_powerpoint_content(file_name, file_content, mime_type))
-
- # Binary data as fallback for unknown formats
- else:
- contents.extend(extract_binary_content(file_name, file_content, mime_type))
-
- # Fallback when no content could be extracted
- if not contents:
- logger.warning(f"No content extracted from file '{file_name}', using binary fallback")
- contents.append({
- "sequence_nr": 1,
- "name": '1_undefined',
- "ext": os.path.splitext(file_name)[1][1:] if os.path.splitext(file_name)[1] else "bin",
- "content_type": mime_type,
- "data": file_content,
- "metadata": {
- "is_text": False
- }
- })
-
- # Add generic attributes for all documents
- for content in contents:
- if isinstance(content.get("data"), bytes):
- content["data"] = base64.b64encode(content["data"]).decode('utf-8')
- # Add base64 flag
- if "metadata" not in content:
- content["metadata"] = {}
- content["metadata"]["base64_encoded"] = True
-
- logger.info(f"Successfully extracted {len(contents)} content items from file '{file_name}'")
- return contents
-
- except Exception as e:
- logger.error(f"Error during content extraction: {str(e)}")
- # Fallback on error - return original data
- return [{
- "sequence_nr": 1,
- "name": file_metadata.get("name", "unknown"),
- "ext": os.path.splitext(file_metadata.get("name", ""))[1][1:] if os.path.splitext(file_metadata.get("name", ""))[1] else "bin",
- "content_type": file_metadata.get("mime_type", "application/octet-stream"),
- "data": file_content,
- "metadata": {
- "is_text": False
- }
- }]
-
-
-def _load_pdf_extractor():
- """Loads PDF extraction libraries when needed"""
- global pdf_extractor_loaded
- if not pdf_extractor_loaded:
- try:
- global PyPDF2, fitz
- import PyPDF2
- import fitz # PyMuPDF for more extensive PDF processing
- pdf_extractor_loaded = True
- logger.info("PDF extraction libraries successfully loaded")
- except ImportError as e:
- logger.warning(f"PDF extraction libraries could not be loaded: {e}")
-
-def _load_office_extractor():
- """Loads Office document extraction libraries when needed"""
- global office_extractor_loaded
- if not office_extractor_loaded:
- try:
- global docx, openpyxl
- import docx # python-docx for Word documents
- import openpyxl # for Excel files
- office_extractor_loaded = True
- logger.info("Office extraction libraries successfully loaded")
- except ImportError as e:
- logger.warning(f"Office extraction libraries could not be loaded: {e}")
-
-def _load_image_processor():
- """Loads image processing libraries when needed"""
- global image_processor_loaded
- if not image_processor_loaded:
- try:
- global PIL, Image
- from PIL import Image
- image_processor_loaded = True
- logger.info("Image processing libraries successfully loaded")
- except ImportError as e:
- logger.warning(f"Image processing libraries could not be loaded: {e}")
-
-def extract_text_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
- """
- Extracts text from text files.
-
- Args:
- file_name: Name of the file
- file_content: Binary data of the file
- mime_type: MIME type of the file
-
- Returns:
- List of Text-Content objects with metadata.is_text = True
- """
- try:
- # Keep original file extension
- file_extension = os.path.splitext(file_name)[1][1:] if os.path.splitext(file_name)[1] else "txt"
-
- # Extract text content
- text_content = file_content.decode('utf-8')
- return [{
- "sequence_nr": 1,
- "name": "1_text", # Simplified naming
- "ext": file_extension,
- "content_type": "text",
- "data": text_content,
- "metadata": {
- "is_text": True
- }
- }]
- except UnicodeDecodeError:
- logger.warning(f"Could not decode text from file '{file_name}' as UTF-8, trying alternative encodings")
- try:
- # Try alternative encodings
- for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
- try:
- text_content = file_content.decode(encoding)
- logger.info(f"Text successfully decoded with encoding {encoding}")
- return [{
- "sequence_nr": 1,
- "name": "1_text", # Simplified naming
- "ext": file_extension,
- "content_type": "text",
- "data": text_content,
- "metadata": {
- "is_text": True,
- "encoding": encoding
- }
- }]
- except UnicodeDecodeError:
- continue
-
- # Fallback to binary data if no encoding works
- logger.warning(f"Could not decode text, using binary data")
- return [{
- "sequence_nr": 1,
- "name": "1_binary", # Simplified naming
- "ext": file_extension,
- "content_type": mime_type,
- "data": file_content,
- "metadata": {
- "is_text": False
- }
- }]
- except Exception as e:
- logger.error(f"Error in alternative text decoding: {str(e)}")
- # Return binary data as fallback
- return [{
- "sequence_nr": 1,
- "name": "1_binary", # Simplified naming
- "ext": file_extension,
- "content_type": mime_type,
- "data": file_content,
- "metadata": {
- "is_text": False
- }
- }]
-
-def extract_csv_content(file_name: str, file_content: bytes) -> List[Dict[str, Any]]:
- """
- Extracts content from CSV files.
-
- Args:
- file_name: Name of the file
- file_content: Binary data of the file
-
- Returns:
- List of CSV-Content objects with metadata.is_text = True
- """
- try:
- # Extract text content
- csv_content = file_content.decode('utf-8')
- return [{
- "sequence_nr": 1,
- "name": "1_csv", # Simplified naming
- "ext": "csv",
- "content_type": "csv",
- "data": csv_content,
- "metadata": {
- "is_text": True,
- "format": "csv"
- }
- }]
- except UnicodeDecodeError:
- logger.warning(f"Could not decode CSV from file '{file_name}' as UTF-8, trying alternative encodings")
- try:
- # Try alternative encodings for CSV
- for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
- try:
- csv_content = file_content.decode(encoding)
- logger.info(f"CSV successfully decoded with encoding {encoding}")
- return [{
- "sequence_nr": 1,
- "name": "1_csv", # Simplified naming
- "ext": "csv",
- "content_type": "csv",
- "data": csv_content,
- "metadata": {
- "is_text": True,
- "encoding": encoding,
- "format": "csv"
- }
- }]
- except UnicodeDecodeError:
- continue
-
- # Fallback to binary data
- return [{
- "sequence_nr": 1,
- "name": "1_binary", # Simplified naming
- "ext": "csv",
- "content_type": "text/csv",
- "data": file_content,
- "metadata": {
- "is_text": False
- }
- }]
- except Exception as e:
- logger.error(f"Error in alternative CSV decoding: {str(e)}")
- return [{
- "sequence_nr": 1,
- "name": "1_binary", # Simplified naming
- "ext": "csv",
- "content_type": "text/csv",
- "data": file_content,
- "metadata": {
- "is_text": False
- }
- }]
-
-def extract_image_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
- """
- Extracts content from image files and optionally generates metadata descriptions.
-
- Args:
- file_name: Name of the file
- file_content: Binary data of the file
- mime_type: MIME type of the file
-
- Returns:
- List of Image-Content objects with metadata.is_text = False
- """
-
- # Extract file extension from MIME type or filename
- file_extension = mime_type.split('/')[-1]
- if file_extension == "jpeg":
- file_extension = "jpg"
-
- # If possible, analyze image and extract metadata
- image_metadata = {
- "is_text": False,
- "format": "image"
- }
- image_description = None
-
- try:
- _load_image_processor()
- if image_processor_loaded and file_content and len(file_content) > 0:
- with io.BytesIO(file_content) as img_stream:
- try:
- img = Image.open(img_stream)
- # Check if the image was actually loaded
- img.verify()
- # To safely continue working, reload
- img_stream.seek(0)
- img = Image.open(img_stream)
- image_metadata.update({
- "format": img.format,
- "mode": img.mode,
- "width": img.width,
- "height": img.height
- })
- # Extract EXIF data if available
- if hasattr(img, '_getexif') and callable(img._getexif):
- exif = img._getexif()
- if exif:
- exif_data = {}
- for tag_id, value in exif.items():
- exif_data[f"tag_{tag_id}"] = str(value)
- image_metadata["exif"] = exif_data
-
- # Generate image description
- image_description = f"Image ({img.width}x{img.height}, {img.format}, {img.mode})"
- except Exception as inner_e:
- logger.warning(f"Error processing image: {str(inner_e)}")
- image_metadata["error"] = str(inner_e)
- image_description = f"Image (unable to process: {str(inner_e)})"
- except Exception as e:
- logger.warning(f"Could not extract image metadata: {str(e)}")
- image_metadata["error"] = str(e)
-
-
- # Return image content
- contents = [{
- "sequence_nr": 1,
- "name": "1_image", # Simplified naming
- "ext": file_extension,
- "content_type": "image",
- "data": file_content,
- "metadata": image_metadata
- }]
-
- # If image description available, add as additional text content
- if image_description:
- contents.append({
- "sequence_nr": 2,
- "name": "2_text_image_info", # Simplified naming with label
- "ext": "txt",
- "content_type": "text",
- "data": image_description,
- "metadata": {
- "is_text": True,
- "image_description": True
- }
- })
-
- return contents
-
-def extract_pdf_content(file_name: str, file_content: bytes) -> List[Dict[str, Any]]:
- """
- Extracts text and images from PDF files.
-
- Args:
- file_name: Name of the file
- file_content: Binary data of the file
-
- Returns:
- List of PDF-Content objects (text and images) with metadata.is_text flag
- """
- contents = []
- extracted_content_found = False
-
- try:
- # Load PDF extraction libraries
- _load_pdf_extractor()
- if not pdf_extractor_loaded:
- logger.warning("PDF extraction not possible: Libraries not available")
- # Add original file as binary content
- contents.append({
- "sequence_nr": 1,
- "name": "1_pdf", # Simplified naming
- "ext": "pdf",
- "content_type": "application/pdf",
- "data": file_content,
- "metadata": {
- "is_text": False,
- "format": "pdf"
- }
- })
- return contents
-
- # Extract text with PyPDF2
- extracted_text = ""
- pdf_metadata = {}
- with io.BytesIO(file_content) as pdf_stream:
- pdf_reader = PyPDF2.PdfReader(pdf_stream)
-
- # Extract metadata
- pdf_info = pdf_reader.metadata or {}
- for key, value in pdf_info.items():
- if key.startswith('/'):
- pdf_metadata[key[1:]] = value
- else:
- pdf_metadata[key] = value
-
- # Extract text from all pages
- for page_num in range(len(pdf_reader.pages)):
- page = pdf_reader.pages[page_num]
- page_text = page.extract_text()
- if page_text:
- extracted_text += f"--- Page {page_num + 1} ---\n{page_text}\n\n"
-
- # If text was found, add as separate content
- if extracted_text.strip():
- extracted_content_found = True
- contents.append({
- "sequence_nr": len(contents) + 1,
- "name": f"{len(contents) + 1}_text", # Simplified naming
- "ext": "txt",
- "content_type": "text",
- "data": extracted_text,
- "metadata": {
- "is_text": True,
- "source": "pdf",
- "pages": len(pdf_reader.pages),
- "pdf_metadata": pdf_metadata
- }
- })
-
- # Extract images with PyMuPDF (fitz)
- try:
- with io.BytesIO(file_content) as pdf_stream:
- doc = fitz.open(stream=pdf_stream, filetype="pdf")
- image_count = 0
-
- for page_num in range(len(doc)):
- page = doc[page_num]
- image_list = page.get_images(full=True)
-
- for img_index, img_info in enumerate(image_list):
- try:
- image_count += 1
- xref = img_info[0]
- base_image = doc.extract_image(xref)
- image_bytes = base_image["image"]
- image_ext = base_image["ext"]
-
- # Add image as content
- extracted_content_found = True
- contents.append({
- "sequence_nr": len(contents) + 1,
- "name": f"{len(contents) + 1}_image_page{page_num+1}_{img_index+1}", # Simplified naming with label
- "ext": image_ext,
- "content_type": f"image/{image_ext}",
- "data": image_bytes,
- "metadata": {
- "is_text": False,
- "source": "pdf",
- "page": page_num + 1,
- "index": img_index
- }
- })
- except Exception as img_e:
- logger.warning(f"Error extracting image {img_index} on page {page_num + 1}: {str(img_e)}")
-
- # Close document
- doc.close()
-
- except Exception as img_extract_e:
- logger.warning(f"Error extracting images from PDF: {str(img_extract_e)}")
-
- except Exception as e:
- logger.error(f"Error in PDF extraction: {str(e)}")
-
- # If no content was extracted, add the original PDF
- if not extracted_content_found:
- contents.append({
- "sequence_nr": 1,
- "name": "1_pdf", # Simplified naming
- "ext": "pdf",
- "content_type": "application/pdf",
- "data": file_content,
- "metadata": {
- "is_text": False,
- "format": "pdf"
- }
- })
-
- return contents
-
-def extract_word_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
- """
- Extracts text and images from Word documents.
-
- Args:
- file_name: Name of the file
- file_content: Binary data of the file
- mime_type: MIME type of the file
-
- Returns:
- List of Word-Content objects (text and possibly images) with metadata.is_text flag
- """
- contents = []
- extracted_content_found = False
-
- # Determine file extension
- file_extension = "docx" if mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" else "doc"
-
- try:
- # Load Office extraction libraries
- _load_office_extractor()
- if not office_extractor_loaded:
- logger.warning("Word extraction not possible: Libraries not available")
- # Add original file as binary content
- contents.append({
- "sequence_nr": 1,
- "name": "1_word", # Simplified naming
- "ext": file_extension,
- "content_type": mime_type,
- "data": file_content,
- "metadata": {
- "is_text": False,
- "format": "word"
- }
- })
- return contents
-
- # Only supports DOCX (newer format)
- if mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
- with io.BytesIO(file_content) as docx_stream:
- doc = docx.Document(docx_stream)
-
- # Extract text
- full_text = []
- for para in doc.paragraphs:
- full_text.append(para.text)
-
- # Extract tables
- for table in doc.tables:
- for row in table.rows:
- row_text = []
- for cell in row.cells:
- row_text.append(cell.text)
- full_text.append(" | ".join(row_text))
-
- extracted_text = "\n\n".join(full_text)
-
- # Add extracted text as content
- if extracted_text.strip():
- extracted_content_found = True
- contents.append({
- "sequence_nr": 1,
- "name": "1_text", # Simplified naming
- "ext": "txt",
- "content_type": "text",
- "data": extracted_text,
- "metadata": {
- "is_text": True,
- "source": "docx",
- "paragraph_count": len(doc.paragraphs),
- "table_count": len(doc.tables)
- }
- })
- else:
- logger.warning(f"Extraction from old Word format (DOC) not supported")
-
- except Exception as e:
- logger.error(f"Error in Word extraction: {str(e)}")
-
- # If no content was extracted, add the original document
- if not extracted_content_found:
- contents.append({
- "sequence_nr": 1,
- "name": "1_word", # Simplified naming
- "ext": file_extension,
- "content_type": mime_type,
- "data": file_content,
- "metadata": {
- "is_text": False,
- "format": "word"
- }
- })
-
- return contents
-
-def extract_excel_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
- """
- Extracts table data from Excel files.
-
- Args:
- file_name: Name of the file
- file_content: Binary data of the file
- mime_type: MIME type of the file
-
- Returns:
- List of Excel-Content objects with metadata.is_text flag
- """
- contents = []
- extracted_content_found = False
-
- # Determine file extension
- file_extension = "xlsx" if mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" else "xls"
-
- try:
- # Load Office extraction libraries
- _load_office_extractor()
- if not office_extractor_loaded:
- logger.warning("Excel extraction not possible: Libraries not available")
- # Add original file as binary content
- contents.append({
- "sequence_nr": 1,
- "name": "1_excel", # Simplified naming
- "ext": file_extension,
- "content_type": mime_type,
- "data": file_content,
- "metadata": {
- "is_text": False,
- "format": "excel"
- }
- })
- return contents
-
- # Only supports XLSX (newer format)
- if mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
- with io.BytesIO(file_content) as xlsx_stream:
- workbook = openpyxl.load_workbook(xlsx_stream, data_only=True)
-
- # Extract each worksheet as separate CSV content
- for sheet_index, sheet_name in enumerate(workbook.sheetnames):
- sheet = workbook[sheet_name]
-
- # Format data as CSV
- csv_rows = []
- for row in sheet.iter_rows():
- csv_row = []
- for cell in row:
- value = cell.value
- if value is None:
- csv_row.append("")
- else:
- csv_row.append(str(value).replace('"', '""'))
- csv_rows.append(','.join(f'"{cell}"' for cell in csv_row))
-
- csv_content = "\n".join(csv_rows)
-
- # Add as CSV content
- if csv_content.strip():
- extracted_content_found = True
- sheet_safe_name = sheet_name.replace(" ", "_").replace("/", "_").replace("\\", "_")
- contents.append({
- "sequence_nr": len(contents) + 1,
- "name": f"{len(contents) + 1}_csv_{sheet_safe_name}", # Simplified naming with sheet label
- "ext": "csv",
- "content_type": "csv",
- "data": csv_content,
- "metadata": {
- "is_text": True,
- "source": "xlsx",
- "sheet": sheet_name,
- "format": "csv"
- }
- })
- else:
- logger.warning(f"Extraction from old Excel format (XLS) not supported")
-
- except Exception as e:
- logger.error(f"Error in Excel extraction: {str(e)}")
-
- # If no content was extracted, add the original document
- if not extracted_content_found:
- contents.append({
- "sequence_nr": 1,
- "name": "1_excel", # Simplified naming
- "ext": file_extension,
- "content_type": mime_type,
- "data": file_content,
- "metadata": {
- "is_text": False,
- "format": "excel"
- }
- })
-
- return contents
-
-def extract_powerpoint_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
- """
- Extracts content from PowerPoint presentations.
-
- Args:
- file_name: Name of the file
- file_content: Binary data of the file
- mime_type: MIME type of the file
-
- Returns:
- List of PowerPoint-Content objects with metadata.is_text = False
- """
- # For PowerPoint, we currently only return the original binary file
- # A complete extraction would require more specialized libraries
- file_extension = "pptx" if mime_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation" else "ppt"
- return [{
- "sequence_nr": 1,
- "name": "1_powerpoint", # Simplified naming
- "ext": file_extension,
- "content_type": mime_type,
- "data": file_content,
- "metadata": {
- "is_text": False,
- "format": "powerpoint"
- }
- }]
-
-def extract_binary_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
- """
- Fallback for binary files where no specific extraction is possible.
-
- Args:
- file_name: Name of the file
- file_content: Binary data of the file
- mime_type: MIME type of the file
-
- Returns:
- List with a binary Content object with metadata.is_text = False
- """
- file_extension = os.path.splitext(file_name)[1][1:] if os.path.splitext(file_name)[1] else "bin"
- return [{
- "sequence_nr": 1,
- "name": "1_binary", # Simplified naming
- "ext": file_extension,
- "content_type": mime_type,
- "data": file_content,
- "metadata": {
- "is_text": False,
- "format": "binary"
- }
- }]
\ No newline at end of file
diff --git a/modules/configuration.py b/modules/configuration.py
index 7021ddd3..f7e9b81f 100644
--- a/modules/configuration.py
+++ b/modules/configuration.py
@@ -20,40 +20,40 @@ class Configuration:
def __init__(self):
"""Initialize the configuration object"""
self._data = {}
- self._config_file_path = None
- self._env_file_path = None
- self._config_mtime = 0
- self._env_mtime = 0
+ self._configFilePath = None
+ self._envFilePath = None
+ self._configMtime = 0
+ self._envMtime = 0
self.refresh()
def refresh(self):
"""Reload configuration from files"""
- self._load_config()
- self._load_env()
+ self._loadConfig()
+ self._loadEnv()
logger.info("Configuration refreshed")
- def _load_config(self):
+ def _loadConfig(self):
"""Load configuration from config.ini file in flattened format"""
# Find config.ini file (look in current directory and parent directory)
- config_path = Path('config.ini')
- if not config_path.exists():
+ configPath = Path('config.ini')
+ if not configPath.exists():
# Try in parent directory
- config_path = Path('../config.ini')
- if not config_path.exists():
- logger.warning(f"Configuration file not found at {config_path.absolute()}")
+ configPath = Path('../config.ini')
+ if not configPath.exists():
+ logger.warning(f"Configuration file not found at {configPath.absolute()}")
return
- self._config_file_path = config_path
- current_mtime = os.path.getmtime(config_path)
+ self._configFilePath = configPath
+ currentMtime = os.path.getmtime(configPath)
# Skip if file hasn't changed
- if current_mtime <= self._config_mtime:
+ if currentMtime <= self._configMtime:
return
- self._config_mtime = current_mtime
+ self._configMtime = currentMtime
try:
- with open(config_path, 'r') as f:
+ with open(configPath, 'r') as f:
for line in f:
line = line.strip()
# Skip empty lines and comments
@@ -73,28 +73,28 @@ class Configuration:
except Exception as e:
logger.error(f"Error loading configuration: {e}")
- def _load_env(self):
+ def _loadEnv(self):
"""Load environment variables from .env file"""
# Find .env file (look in current directory and parent directory)
- env_path = Path('.env')
- if not env_path.exists():
+ envPath = Path('.env')
+ if not envPath.exists():
# Try in parent directory
- env_path = Path('../.env')
- if not env_path.exists():
- logger.warning(f"Environment file not found at {env_path.absolute()}")
+ envPath = Path('../.env')
+ if not envPath.exists():
+ logger.warning(f"Environment file not found at {envPath.absolute()}")
return
- self._env_file_path = env_path
- current_mtime = os.path.getmtime(env_path)
+ self._envFilePath = envPath
+ currentMtime = os.path.getmtime(envPath)
# Skip if file hasn't changed
- if current_mtime <= self._env_mtime:
+ if currentMtime <= self._envMtime:
return
- self._env_mtime = current_mtime
+ self._envMtime = currentMtime
try:
- with open(env_path, 'r') as f:
+ with open(envPath, 'r') as f:
for line in f:
line = line.strip()
# Skip empty lines and comments
@@ -110,7 +110,7 @@ class Configuration:
# Add directly to data dictionary
self._data[key] = value
- logger.info(f"Loaded environment variables from {env_path.absolute()}")
+ logger.info(f"Loaded environment variables from {envPath.absolute()}")
# Also load system environment variables (don't override existing)
for key, value in os.environ.items():
@@ -120,35 +120,35 @@ class Configuration:
except Exception as e:
logger.error(f"Error loading environment variables: {e}")
- def check_for_updates(self):
+ def checkForUpdates(self):
"""Check if configuration files have changed and reload if necessary"""
- if self._config_file_path and os.path.exists(self._config_file_path):
- current_mtime = os.path.getmtime(self._config_file_path)
- if current_mtime > self._config_mtime:
+ if self._configFilePath and os.path.exists(self._configFilePath):
+ currentMtime = os.path.getmtime(self._configFilePath)
+ if currentMtime > self._configMtime:
logger.info("Config file has changed, reloading...")
- self._load_config()
+ self._loadConfig()
- if self._env_file_path and os.path.exists(self._env_file_path):
- current_mtime = os.path.getmtime(self._env_file_path)
- if current_mtime > self._env_mtime:
+ if self._envFilePath and os.path.exists(self._envFilePath):
+ currentMtime = os.path.getmtime(self._envFilePath)
+ if currentMtime > self._envMtime:
logger.info("Environment file has changed, reloading...")
- self._load_env()
+ self._loadEnv()
def get(self, key: str, default: Any = None) -> Any:
"""Get configuration value with optional default"""
- self.check_for_updates() # Check for file changes
+ self.checkForUpdates() # Check for file changes
if key in self._data:
value = self._data[key]
# Handle secrets (keys ending with _SECRET)
if key.endswith("_SECRET"):
- return handle_secret(value)
+ return handleSecret(value)
return value
return default
def __getattr__(self, name: str) -> Any:
"""Enable attribute-style access to configuration"""
- self.check_for_updates() # Check for file changes
+ self.checkForUpdates() # Check for file changes
value = self.get(name)
if value is None:
@@ -157,14 +157,14 @@ class Configuration:
def __dir__(self) -> list:
"""Support auto-completion of attributes"""
- self.check_for_updates() # Check for file changes
+ self.checkForUpdates() # Check for file changes
return list(self._data.keys()) + super().__dir__()
def set(self, key: str, value: Any) -> None:
"""Set a configuration value (for testing/overrides)"""
self._data[key] = value
-def handle_secret(value: str) -> str:
+def handleSecret(value: str) -> str:
"""
Handle secret values. Currently just returns the plain text value,
but can be enhanced to provide actual decryption in the future.
diff --git a/modules/def_attributes.py b/modules/defAttributes.py
similarity index 64%
rename from modules/def_attributes.py
rename to modules/defAttributes.py
index 6a60019c..731ecfd9 100644
--- a/modules/def_attributes.py
+++ b/modules/defAttributes.py
@@ -8,16 +8,16 @@ class AttributeDefinition(BaseModel):
type: str
required: bool = False
placeholder: Optional[str] = None
- default_value: Optional[Any] = None
+ defaultValue: Optional[Any] = None
options: Optional[List[Dict[str, Any]]] = None
editable: bool = True
visible: bool = True
order: int = 0
validation: Optional[Dict[str, Any]] = None
- help_text: Optional[str] = None
+ helpText: Optional[str] = None
# Helper classes for type mapping
-type_mappings = {
+typeMappings = {
"int": "number",
"str": "string",
"float": "number",
@@ -31,59 +31,59 @@ type_mappings = {
}
# Special field types based on naming conventions
-special_field_types = {
+specialFieldTypes = {
"content": "textarea",
"description": "textarea",
"instructions": "textarea",
"password": "password",
"email": "email",
- "workspace_id": "select",
- "agent_id": "select",
+ "workspaceId": "select",
+ "agentId": "select",
"type": "select"
}
# Function to convert a Pydantic model into attribute definitions
-def get_model_attributes(model_class, user_language="de"):
+def getModelAttributes(modelClass, userLanguage="de"):
"""
Converts a Pydantic model into a list of AttributeDefinition objects
"""
attributes = []
# Go through all fields in the model
- for i, (field_name, field) in enumerate(model_class.__fields__.items()):
+ for i, (fieldName, field) in enumerate(modelClass.__fields__.items()):
# Skip internal fields
- if field_name.startswith('_') or field_name in ["label", "field_labels"]:
+ if fieldName.startswith('_') or fieldName in ["label", "fieldLabels"]:
continue
# Determine the field type
- field_type = type_mappings.get(str(field.type_), "string")
+ fieldType = typeMappings.get(str(field.type_), "string")
# Check for special field types
- if field_name in special_field_types:
- field_type = special_field_types[field_name]
+ if fieldName in specialFieldTypes:
+ fieldType = specialFieldTypes[fieldName]
# Get the label (if available)
- field_label = field_name.replace('_', ' ').capitalize()
- if hasattr(model_class, 'field_labels') and field_name in model_class.field_labels:
- label_obj = model_class.field_labels[field_name]
- field_label = label_obj.get_label(user_language)
+ fieldLabel = fieldName.replace('_', ' ').capitalize()
+ if hasattr(modelClass, 'fieldLabels') and fieldName in modelClass.fieldLabels:
+ labelObj = modelClass.fieldLabels[fieldName]
+ fieldLabel = labelObj.getLabel(userLanguage)
# Determine default values and required status
required = field.required
- default_value = field.default if not field.required else None
+ defaultValue = field.default if not field.required else None
# Check for validation rules
validation = None
if field.validators:
- validation = {"has_validators": True}
+ validation = {"hasValidators": True}
# Placeholder text
- placeholder = f"Please enter {field_label}"
+ placeholder = f"Please enter {fieldLabel}"
# Special options for Select fields
options = None
- if field_type == "select":
- if field_name == "type" and model_class.__name__ == "Agent":
+ if fieldType == "select":
+ if fieldName == "type" and modelClass.__name__ == "Agent":
options = [
{"value": "Analysis", "label": "Analysis"},
{"value": "Transformation", "label": "Transformation"},
@@ -103,21 +103,21 @@ def get_model_attributes(model_class, user_language="de"):
description = field.schema.description
# Create attribute definition
- attr_def = AttributeDefinition(
- name=field_name,
- label=field_label,
- type=field_type,
+ attrDef = AttributeDefinition(
+ name=fieldName,
+ label=fieldLabel,
+ type=fieldType,
required=required,
placeholder=placeholder,
- default_value=default_value,
+ defaultValue=defaultValue,
options=options,
- editable=field_name not in ["id", "mandate_id", "user_id", "created_at", "upload_date"],
- visible=field_name not in ["hashed_password", "mandate_id", "user_id"],
+ editable=fieldName not in ["id", "mandateId", "userId", "createdAt", "uploadDate"],
+ visible=fieldName not in ["hashedPassword", "mandateId", "userId"],
order=i,
validation=validation,
- help_text=description or "" # Set empty string as default value if no description found
+ helpText=description or "" # Set empty string as default value if no description found
)
- attributes.append(attr_def)
+ attributes.append(attrDef)
return attributes
\ No newline at end of file
diff --git a/modules/documentProcessor.py b/modules/documentProcessor.py
new file mode 100644
index 00000000..de81a64c
--- /dev/null
+++ b/modules/documentProcessor.py
@@ -0,0 +1,887 @@
+"""
+Module for extracting content from various file formats.
+Provides specialized functions for processing text, PDF, Office documents, images, etc.
+"""
+
+import logging
+import os
+import io
+from typing import Dict, Any, List, Optional, Union, Tuple
+import base64
+
+# Configure logger
+logger = logging.getLogger(__name__)
+
+# Optional imports - only loaded when needed
+pdfExtractorLoaded = False
+officeExtractorLoaded = False
+imageProcessorLoaded = False
+
+def getDocumentContents(fileMetadata: Dict[str, Any], fileContent: bytes) -> List[Dict[str, Any]]:
+ """
+ Main function for extracting content from a file based on its MIME type.
+ Delegates to specialized extraction functions.
+
+ Args:
+ fileMetadata: File metadata (Name, MIME type, etc.)
+ fileContent: Binary data of the file
+
+ Returns:
+ List of Document-Content objects with metadata and isText flag
+ """
+ try:
+ mimeType = fileMetadata.get("mimeType", "application/octet-stream")
+ fileName = fileMetadata.get("name", "unknown")
+
+ logger.info(f"Extracting content from file '{fileName}' (MIME type: {mimeType})")
+
+ # Extract content based on MIME type
+ contents = []
+
+ # Text-based formats
+ if mimeType.startswith("text/") or mimeType in [
+ "application/json",
+ "application/xml",
+ "application/javascript",
+ "application/x-python"
+ ]:
+ contents.extend(extractTextContent(fileName, fileContent, mimeType))
+
+ # CSV Format
+ elif mimeType == "text/csv":
+ contents.extend(extractCsvContent(fileName, fileContent))
+
+ # SVG Files
+ elif mimeType == "image/svg+xml":
+ contents.extend(extractSvgContent(fileName, fileContent))
+
+ # Images
+ elif mimeType.startswith("image/"):
+ contents.extend(extractImageContent(fileName, fileContent, mimeType))
+
+ # PDF Documents
+ elif mimeType == "application/pdf":
+ contents.extend(extractPdfContent(fileName, fileContent))
+
+ # Word Documents
+ elif mimeType in [
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ "application/msword"
+ ]:
+ contents.extend(extractWordContent(fileName, fileContent, mimeType))
+
+ # Excel Documents
+ elif mimeType in [
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+ "application/vnd.ms-excel"
+ ]:
+ contents.extend(extractExcelContent(fileName, fileContent, mimeType))
+
+ # PowerPoint Documents
+ elif mimeType in [
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+ "application/vnd.ms-powerpoint"
+ ]:
+ contents.extend(extractPowerpointContent(fileName, fileContent, mimeType))
+
+ # Binary data as fallback for unknown formats
+ else:
+ contents.extend(extractBinaryContent(fileName, fileContent, mimeType))
+
+ # Fallback when no content could be extracted
+ if not contents:
+ logger.warning(f"No content extracted from file '{fileName}', using binary fallback")
+ contents.append({
+ "sequenceNr": 1,
+ "name": '1_undefined',
+ "ext": os.path.splitext(fileName)[1][1:] if os.path.splitext(fileName)[1] else "bin",
+ "contentType": mimeType,
+ "data": fileContent,
+ "metadata": {
+ "isText": False
+ }
+ })
+
+ # Add generic attributes for all documents
+ for content in contents:
+ if isinstance(content.get("data"), bytes):
+ content["data"] = base64.b64encode(content["data"]).decode('utf-8')
+ # Add base64 flag
+ if "metadata" not in content:
+ content["metadata"] = {}
+ content["metadata"]["base64Encoded"] = True
+
+ logger.info(f"Successfully extracted {len(contents)} content items from file '{fileName}'")
+ return contents
+
+ except Exception as e:
+ logger.error(f"Error during content extraction: {str(e)}")
+ # Fallback on error - return original data
+ return [{
+ "sequenceNr": 1,
+ "name": fileMetadata.get("name", "unknown"),
+ "ext": os.path.splitext(fileMetadata.get("name", ""))[1][1:] if os.path.splitext(fileMetadata.get("name", ""))[1] else "bin",
+ "contentType": fileMetadata.get("mimeType", "application/octet-stream"),
+ "data": fileContent,
+ "metadata": {
+ "isText": False
+ }
+ }]
+
+
+def _loadPdfExtractor():
+ """Loads PDF extraction libraries when needed"""
+ global pdfExtractorLoaded
+ if not pdfExtractorLoaded:
+ try:
+ global PyPDF2, fitz
+ import PyPDF2
+ import fitz # PyMuPDF for more extensive PDF processing
+ pdfExtractorLoaded = True
+ logger.info("PDF extraction libraries successfully loaded")
+ except ImportError as e:
+ logger.warning(f"PDF extraction libraries could not be loaded: {e}")
+
+def _loadOfficeExtractor():
+ """Loads Office document extraction libraries when needed"""
+ global officeExtractorLoaded
+ if not officeExtractorLoaded:
+ try:
+ global docx, openpyxl
+ import docx # python-docx for Word documents
+ import openpyxl # for Excel files
+ officeExtractorLoaded = True
+ logger.info("Office extraction libraries successfully loaded")
+ except ImportError as e:
+ logger.warning(f"Office extraction libraries could not be loaded: {e}")
+
+def _loadImageProcessor():
+ """Loads image processing libraries when needed"""
+ global imageProcessorLoaded
+ if not imageProcessorLoaded:
+ try:
+ global PIL, Image
+ from PIL import Image
+ imageProcessorLoaded = True
+ logger.info("Image processing libraries successfully loaded")
+ except ImportError as e:
+ logger.warning(f"Image processing libraries could not be loaded: {e}")
+
+def extractTextContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
+ """
+ Extracts text from text files.
+
+ Args:
+ fileName: Name of the file
+ fileContent: Binary data of the file
+ mimeType: MIME type of the file
+
+ Returns:
+ List of Text-Content objects with metadata.isText = True
+ """
+ try:
+ # Keep original file extension
+ fileExtension = os.path.splitext(fileName)[1][1:] if os.path.splitext(fileName)[1] else "txt"
+
+ # Extract text content
+ textContent = fileContent.decode('utf-8')
+ return [{
+ "sequenceNr": 1,
+ "name": "1_text", # Simplified naming
+ "ext": fileExtension,
+ "contentType": "text",
+ "data": textContent,
+ "metadata": {
+ "isText": True
+ }
+ }]
+ except UnicodeDecodeError:
+ logger.warning(f"Could not decode text from file '{fileName}' as UTF-8, trying alternative encodings")
+ try:
+ # Try alternative encodings
+ for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
+ try:
+ textContent = fileContent.decode(encoding)
+ logger.info(f"Text successfully decoded with encoding {encoding}")
+ return [{
+ "sequenceNr": 1,
+ "name": "1_text", # Simplified naming
+ "ext": fileExtension,
+ "contentType": "text",
+ "data": textContent,
+ "metadata": {
+ "isText": True,
+ "encoding": encoding
+ }
+ }]
+ except UnicodeDecodeError:
+ continue
+
+ # Fallback to binary data if no encoding works
+ logger.warning(f"Could not decode text, using binary data")
+ return [{
+ "sequenceNr": 1,
+ "name": "1_binary", # Simplified naming
+ "ext": fileExtension,
+ "contentType": mimeType,
+ "data": fileContent,
+ "metadata": {
+ "isText": False
+ }
+ }]
+ except Exception as e:
+ logger.error(f"Error in alternative text decoding: {str(e)}")
+ # Return binary data as fallback
+ return [{
+ "sequenceNr": 1,
+ "name": "1_binary", # Simplified naming
+ "ext": fileExtension,
+ "contentType": mimeType,
+ "data": fileContent,
+ "metadata": {
+ "isText": False
+ }
+ }]
+
+def extractCsvContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]:
+ """
+ Extracts content from CSV files.
+
+ Args:
+ fileName: Name of the file
+ fileContent: Binary data of the file
+
+ Returns:
+ List of CSV-Content objects with metadata.isText = True
+ """
+ try:
+ # Extract text content
+ csvContent = fileContent.decode('utf-8')
+ return [{
+ "sequenceNr": 1,
+ "name": "1_csv", # Simplified naming
+ "ext": "csv",
+ "contentType": "csv",
+ "data": csvContent,
+ "metadata": {
+ "isText": True,
+ "format": "csv"
+ }
+ }]
+ except UnicodeDecodeError:
+ logger.warning(f"Could not decode CSV from file '{fileName}' as UTF-8, trying alternative encodings")
+ try:
+ # Try alternative encodings for CSV
+ for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
+ try:
+ csvContent = fileContent.decode(encoding)
+ logger.info(f"CSV successfully decoded with encoding {encoding}")
+ return [{
+ "sequenceNr": 1,
+ "name": "1_csv", # Simplified naming
+ "ext": "csv",
+ "contentType": "csv",
+ "data": csvContent,
+ "metadata": {
+ "isText": True,
+ "encoding": encoding,
+ "format": "csv"
+ }
+ }]
+ except UnicodeDecodeError:
+ continue
+
+ # Fallback to binary data
+ return [{
+ "sequenceNr": 1,
+ "name": "1_binary", # Simplified naming
+ "ext": "csv",
+ "contentType": "text/csv",
+ "data": fileContent,
+ "metadata": {
+ "isText": False
+ }
+ }]
+ except Exception as e:
+ logger.error(f"Error in alternative CSV decoding: {str(e)}")
+ return [{
+ "sequenceNr": 1,
+ "name": "1_binary", # Simplified naming
+ "ext": "csv",
+ "contentType": "text/csv",
+ "data": fileContent,
+ "metadata": {
+ "isText": False
+ }
+ }]
+
+def extractSvgContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]:
+ """
+ Extracts content from SVG files.
+
+ Args:
+ fileName: Name of the file
+ fileContent: Binary data of the file
+
+ Returns:
+ List of SVG-Content objects with dual text/image metadata
+ """
+ contents = []
+
+ try:
+ # Extract SVG as text content (XML)
+ svgText = fileContent.decode('utf-8')
+
+ # Check if it's actually SVG by looking for the SVG tag
+ if "
diff --git a/static/44_LF-Details.png b/static/44_LF-Details.png
deleted file mode 100644
index 3a2be57d619cb66e70f76081929e723e874ffcad..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 253009
zcmeFZcT`hb_bwb11rbC=R8T=sEFeXS^di{kk=_yMAVqo&C;|#9sB|eoDIpLLAs}7h
zC`t)P??pb8H+bH2e1G@5|9#&WcZ|CSB7~i_*P3gVXFhYTJVL8nr=ww}L7`A|
z@;9!ip-?ndC={g+^=^3OMaiQQ_>aO_?Yb-~n{<2(exbCytb7@T$_?AQX0i)@-{W{g
z*BOP{SB3nesCK}a!5f0DHFR8bl$FFx9qf2a%p7i;^LW@f!rds8gtUjFiK(r*%gNj3
zme%%?XJ<+(&YrY3lRT>}q|B%6C}(bEeZ$MiT-{4W!_>>xRMhOOv=kkUgohZ6U}x@P
za?-=@j=i&(ha~&fz+&(-@-i>Gl!TL+g_zovYrpS+-z3?sTwEN*czNC3-Fe&vcpRK8
zc`u5Jit_UD^YZg^!yVktp7t ^CfMr+yD`#oXD{$=cDy+QI%LGN8$A2UizKc2`$x
zGcgMj3!&R)W
z00QAf{=$2ahYy)D^0Sz#leIa_0(qp=MTxDy|M8jxFEWvTO-yS06<90$^dHF}V)~DC
z&Fw*??jXAh5ixsFsEvd2S1xOK7)|zi>O?GTZ2mklom${A+wuh`sJ!c<^ZsalWxL!3
z9QyXCQwUS-h>l*YqRogCW>=(6jdGM?Y-;CuRqND9ZWVUZv>mZQsodo5F_#OS<|3CL5|j+VQL|M_kEhxd=zuzUV|
zZNCMjAjO|ARX+AV^XDrvF$Wqc|9pvBU`O1aub@n>$uRu+(#bk%tN)tT111XPKX3Bq
zJN17j_5Zu1@*b|Ot@)7UQ8G8rpFiJ~Z~bCUaC=|STpkiE3*)Vdbj*T|`?huCBnoyL
zeLo`NP-;rbx%210i2hcG7Mkuthv)`!^XF%$giIC(?ut$w@wD;`U
zqxa_4z1yay3E;bsONNl^yXw~KyVP4emiCg`dRs{z_=DmdI>p9zxhj}Gx^~3r_tQ~C
z(p-ZyZx!GC#_G&ORvyV|-_EUJ-Uq=OC{45)>OOx)U%B$Y`RLZmC|u-p$&3`ywq0fD
z)04~=&bT0x?`2DU>PAM1&eSM$?c?L3YQ8J;U**px3?Ih}h92Q<3!*#bGW_`fc|QtE
zQv`EzS!mUsh)`F#yN^OfXMZE5|H=QT@>R`1|Fr6%ulf#UsSh4dx=yqyBcf8FK(!py
z)zzI{_6d3P=vE
zvtk+p=%sMqzgv;{Q1N^q>iv{|TL~6dv(L&)f%o&*0Rx$7om(2+?29W@UaXg_*_`7)k*OY}^eMl{Q%1>B4Tp=g&8gQNzVZ{}`v{vj*~u_6bcN
zABjOFlz&pJfTc}uvFpmuYSsifMvI@{K8k^uhn*{0qw)`N?e|^EFf4In
z7PgO%6SV$zoJX4_?0Wr6;r53IPPM$dYYOUtje05NTg-myl#tiF73gH0sKK(lntn
z&oa}zHj2cCl3^^gsAoPQ;wbaz(IZeimwh{BVsBn%KvmZD)rfbP91ViifQ3g!MIDa9
zvf#rA`h|y=#c#F+REYn3{hAE0{BzeA6=FYdTBN0?+qU3tQcmIT&Ck1dO0bwihz|bu
z?I>x)iNwbcN}HRS)}GeYDW#^Sp1*XdEpB3Nnc7-wr$uWGLu5om3b%g2IROEIfx(|1
zSy3aYu@H#i@_*ZzYZ`3a{=BqQf*iqMQE|<4k{*SNef#!ph)es*Lc4K7Gdi$zZMK%5
zR*u!@$Gv^5Qj^C4kP(%xNLimmQGcFT-~@lf#Kgo#H86QuQ2?vk(*<*73NqOx@Z$ON
zVYiY{Lv=&fu_lgTawcygt#D>=K8OpFlK(UhduW0u`~VFv%_CjX8blMBn;2M
zPK|$keO-rD9Ho>bW%%sbGiFJz{5T=o(c?VsW!_6|lbw03Z_O3S=!`6bA~jabFOyu8
zI&ee1UK51k`6k6y+|5&DK2CWRm8WM^6SLM=mlU5T`fYd;?Yi4U>0}u5|JN$Pd0?<)
zyox+`bMVmCE?{@-l=~FXR)w1!y(;^{P26p=@!QAH))IFI(9}cX!r{-eO?RIsHzct-
zmS#+6pPYiqrAe}cK%@Xutr_#JafyG5~X^)3+Wj9K1{G|7t|9bs$RV-mZ){p
zY0w*jaYcDY5l*DDvs0rULK6lDkm9n&m0ilatQ^WggV}9;0_})~JkkBjuzMq9^6Hbb
z92|_g4wl)^?2MhCIB%kN71Pi01*%sR7
ztV-R>&J%5fS;ip6Li=8uDW(Gll1T~T(O$-@{YjwmA?}AHfUlhC$iIDh+?QFWkEFd+7$WE*-(MOC#BbMC+$UitB!qP)XA20D|@
zx5|x1iAk81vD-6Wi!9}HjLD>g5x0^(Hd|f}{St=5yf2};q|+#LSORb#o3A9;J~6Gk&G4QQ
zos74YX=WnPa@>iHj7$X$8mpq0&c7d*oFz9#$Hadn$#0`5O(j9^woG%xix;owNaDOH
zMJW7k2;Mb{KPY#hu(BOoGC_F7XdVSc#l6gG&NPQ?1TyK0?`X2thntDgfylnH$Yk-h1rUC6{BBh4>tK)
zp{HSZP26nSmR>84trLJ;I5~QhWlO_krhcr=HB3YsW1i7)iI=Z0b{DTsmnhO+*$0!(
z$jH!Y{K(Wn8(1cro={eqQZ_sMh;Iq}#IxFww~Ky*fkzZv8&LisFJR57iqTP=@fnrl
zan58?YyakkyX|yMBv+_L$frkNzI^%m?9}!6bj6l(U+>CTKy>V2ahjo$7nmNGo$yjq
zdtPv-^A)&j@aUY}+=LsgGM2b{gBMXzqwXD<7F{_99_I0zVkN<4kATOvN%}aY-M6DR
zr+aGAd%BL?up#lzfXDXf`dVe+-u1TF>vtnhafL`Wp6j+XJ$mg~yEr@htIumPb>vzf
zQoJ{W1l=&ni1kBQ1)Og`L;@W@_qrmMu=I}QAd_E`o2t_5E{~SB#DlU4J|XV*i^x?m
z5~Z)8=__#N%(YY3pXSR(f#)r(Ow@d|em5wS{VWQ*)+rYWh=lpl9i2ymN+kgF44uAk
z*tFoDUAZAJX@preUkCejO@v@Utvhm79%ATTjyeneZmpLeE_*-Z-fziwcd#;$qWv9B
zcR+V_eCx%jc#8M8?B0eR%e--KTDZ|Swm}ZZ-Iw`FCaiT?q`qKq5tULVoAx5}jixWQ
zOyixgnwr}C+~*f`-kcmLCr#M(7HdM5cbP0clbqrUnn;vznTgW&_Y`WvZ}0UU6-*r$}8f>cTUVqnHL$tZ!(yK%){=a;;nfj@8im^pF`_?No_gC#%bfvMqT4q%RndD
zrjATX?VMWui$V2!HLKsez5_p*%Il6p*@D)irP3~9p@sAw`Q6j4Vo^V;Hg;knX4C8EDN419_-LBs2=U99{OnoKo%f)Y8O4qzw18a
z2O6)y<5wK+;t1@ib4-&o&^~UdlaoS|ATKZ9mXr`FtSpt(Fq615aAb1QTWj~~da*(F
z!I?raft{rw?yEaO?{!pzs~%xrF`j}Nig1EcQx4$06N&8`*ElVY-F7-Hv0VKox8l^;3ZHyOnUc*~6$dN47)rPB^Z_IX&d_z!TEB(<}hg1wgC*!`ZMI3>#CW#W
zn&Pk!bXTJdPAnYWqY)J;$!Als7|tfwl5gG2?u^Gncv%hPHNAaX@b+KSt}}hT
zGot)e&prlnYKk<(Hz&(c(XmPy*ca+cdd-K|j;Mj)2%gE4XRmk60t6awOb&p+-H&-Y
zRb<_KMmb*O=1kdAB4%|;xMF!xNZft81&|a0b
zss*G%hyFfTAN^{46Cam;S3oq$HpZJO?wArFNXGYqRnZ?7h2-Kt$VMW&DdviY-MqU_
zmfn0uGjsQQV=_!+yfcryM-tFd%kC}Y|I0_}39}IX#-q)S4jpjxjC(ADm~XoT9u3J4
z7LYSMb=@la9X^ESB40W`*Yt8hPMLvK2N-lWcd58rN*lgOeVx2uRIx_Ey1>hhw+BBueKeKtI
zS-+RJbl`}dFSwkA$-D-ex|oX!%F3F0dT~unP4nr}>tEuG4RHJyFWyvsc}es3?N@!i
zONQJ=r8!FMQy%>Z*?y
zRqw~F5)K|bIKSBKU^~{7ikVThwzd{qoy<$EJUP^qsu)VAk*!Cp;f+~t(|53jMS--v
z)_zt)#+FvGxjxfImRMc;Hat9R)%n&Op#liL5S@uGV`{#j7R8Xehf)gF-@?ICUOJR#
z06Z9nJfoz)%)oObvaJ{pM<;Kis8!=h$;A%SHVFYAD`^Loq3WLdE;4|j7MJJ{m(`2c&cbirO&0zu(xa6&;#d{_jQ0O0+Q?t
zyJ@WZ{5IFYm6jXp&$YC)FzmBLblEXYU4F?xx9;#aDtZ~SbFQ%a#4P4a9%d`OO_xl=z;o2!TeOtH5j>|F(IF8A|
zI>&Q%yObHVrdTDY)FPUA<}zaYpNg0Gd&;8Lyc+KN!q94tA_1!rw?1ChMGItEXc1
zLHGO2Jz?<}ps4V-yt|u@*=&7{yOGS0r-v&GNy)4C^yHUIw`g>#i8#+JjVUFG4AGJ`
zhz|Wmc^37vd#!^GHforBZ=Q0a>U9UR(~6mm)HXVQw;wcdH=S`8my4P*B&~jk0Uqzv
z-xU+QOqaa;(kwYhK=q1e;2@`f$D?n%XwbXGWa%^@htC9%3(7R>A)&yPwL?FuY#VBE
zD!?wJ5-LT_KJN#|SN56N(qryQN2X<0rd!;k9SQIWiyA8TRcF96i;>z}knpZ2HlyVt
z2Q```_pwNHIFzO=yMc?No#@v^waV#MMh>FY^j-6WJnlMc=;SsVN#?!VpXw_S;h%O|
z^2hs~#ot1>Dp!sJ7x$UQ0daCa2CGQtM?SGo3k5v+7|2}Dm&%k+E0hA7_}+g|>BU?Z
zc%iF_!7^lkuVV5BlJOlqS%g$0kL3FTBtn0
z!wL!tZ#Q^eUiYMW@>`c)Kn%!qpXu|0uo+4@t$=sYWwxLe0-wSY%{5SN@zR-bH*>v=
zrKSlzhuil?wgJ;Op^qNj1b?59UYnBd?fSj(GEa17htQ&=TTX;H&(GS`vw;z$;aEia)|vG1GT-apbLo{N%mH=duxLi&NdJ#8bTI
zvDQh(@orOHjN%QRy)t%<5EFkQyd8R{NM_1z8mefQmW-iI^Lx61&9oM0I5@1qE`bAK
z2@C#B{>$wD9o-_ir!-k-DmF28KvCRqY@P-ZRy&ME^3btknSPtA{Uno&LyWRjOw@G3
z-ikBms^^RQ&oZ8+z}}H1bm^*nZ(@0KA;T*zLy)3A?z2%Ogb{p)f6Dom_o?Sw1}x>l
z`g1-#Ii?tB*9i28J$Q(xcqlAH>;Rg`pEaq0EHP4TWSyH*G*VBV`>IuPJT*OCzhUP7
zuD#SI+XR=M!|LJ`M-au%K!INSLN#^u)=cf3aM?JZ8NwULd7P%Z_i9JA^!1q0OKG)U
zBBwz>t{wV$$h$5{xb`0i%GCPzq6?s1LyUaLJR5jLpr>>z`y9v!;mWTl~$;cZ()8$eml#
zH!L#X;gVq*w`F)-W?tOMli;{rlfl_f4t|0Z<~#7qK#&|hCsJfuy$`}n2aPf*R^Ynn
zr^mVXUy#BA0tMyv7-VTSj|~D4Kngl8=+f7s3(T4_>?I+T)|i
zEiD>c@p<5)D$43;5=ZXIob^YWnVTm#qWO+{$Kxg^C)sD(?>%kmvDDoW$6K-X7SN)m
zwqdGCRnQoS`(xx
zR|}ftn9BQCc#7m-F%c0Orz5nZutw)Te17|2uVTr}BkFdkkuNOQh*TH2WUm_2Z#k2b
zhb~^chzFKu&LuXoMXiS=D_+3zsqsgp7rHm|Qo_^DqxWk(wT;x$%znkRMVx8ktCrU$
zfoah;bd7xe;sv3)^B=4QaBFU~Uj27~O;dikLZI>w*?;2138!7PO`Hv7p$v?Sn!}P}
z5wG(p>b}JcDnac5GGOiJf)L*(*88X2CLlm+!vNW(vo&vwO3v8!mzS{sfd&v|d6$r8
z$4aT+=El}eBRNRRt;ML8J1;CQbr8z!aKyx
z9)2!Z6}xzA6hQ*3GZpXDG%B_RHxVx#4)Ac^De=+|K{s!=Ye+<{irI>
zM#39#H*M(5yc>E7W_>SF(Nc)?O*Vt^#P01v6Ll+hVbATrg9#M1O>YZt@MiE^-un>gqDMBzfJ%hw+ty?OdE(?@U2>@vn4R;_Ngs4a?n`dvn>N_j
z*PhIl7&{xU_L1Hsuj^UFw0b&_gd*9_pO??g&Bd@OLyn%GFF1jzf0Ml~#5s1c|8DgO
zgb2Kt4$gCexBS<)@ZWajA0i0RS;)ReN9;qv0Hl*Xk3M{9atabm(7agUKEuNI-P-SX
zYEd+sT#$~*M|{($#F;an@V5}318KYi>3r(9tVDSc1{uw$@NjIiinubT+%Bb(tgH*m
zK2qyYFbaA4bf2y3=bC+nK{ZV5bYy3A;@iAD*Iz$O$YbnIPENyf%UMN>SWEL`+n&-`
z2+G4hsE~4F6}UidgLi0H({pZv?>{}D%9VvpEOSbeY+-urtGcUGSX#Qjt3gUQ2Fb&3
zSz#roR*QS5{p~t--t7zUb?^oTPc47%p(YopKK!i@_jh3$hS~`Fg!h?DZL|wNyQ&fw
zCF^|a+S$3em0ibwm~X2St_z{KeE}J#W;eY96