From bd0d964e93991dd3cda0c24d091f8b8aba195cb7 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 24 Aug 2025 17:03:39 +0200
Subject: [PATCH] cleaned up code, removed sessions as jwt used, functions
alignment
---
app.py | 4 +-
modules/chat/documents/documentExtraction.py | 86 +--
modules/chat/documents/documentGeneration.py | 204 ++---
modules/chat/documents/documentUtility.py | 26 +-
modules/chat/handling/executionState.py | 23 -
modules/chat/handling/handlingTasks.py | 614 +++++++++++++--
modules/chat/handling/promptFactory.py | 461 ++++++------
modules/chat/managerChat.py | 18 +-
modules/chat/serviceCenter.py | 708 ++++++++++++------
modules/connectors/connectorDbJson.py | 23 +-
modules/interfaces/interfaceAppAccess.py | 68 +-
modules/interfaces/interfaceAppModel.py | 48 --
modules/interfaces/interfaceAppObjects.py | 57 +-
modules/interfaces/interfaceChatModel.py | 127 +++-
modules/interfaces/interfaceChatObjects.py | 458 +----------
modules/interfaces/interfaceComponentModel.py | 8 +-
.../interfaces/interfaceComponentObjects.py | 252 +++----
modules/methods/methodAi.py | 20 +-
modules/methods/methodDocument.py | 54 +-
modules/methods/methodOutlook.py | 24 +-
modules/methods/methodSharepoint.py | 2 +-
modules/routes/routeDataFiles.py | 22 +-
modules/routes/routeSecurityLocal.py | 4 +-
modules/security/auth.py | 116 +--
modules/security/tokenManager.py | 28 +-
modules/shared/attributeUtils.py | 8 +-
modules/shared/timezoneUtils.py | 112 ---
notes/changelog.txt | 136 ++--
test_documentExtraction.py | 22 +-
test_excel_processing.py | 4 +-
test_pydantic_compat.py | 100 +++
tests/run_timestamp_tests.py | 218 ------
tests/test_api_timestamps.py | 155 ----
tests/test_timestamp_models.py | 385 ----------
tool_showUnusedFunctions.py | 210 ++++++
35 files changed, 2067 insertions(+), 2738 deletions(-)
create mode 100644 test_pydantic_compat.py
delete mode 100644 tests/run_timestamp_tests.py
delete mode 100644 tests/test_api_timestamps.py
delete mode 100644 tests/test_timestamp_models.py
create mode 100644 tool_showUnusedFunctions.py
diff --git a/app.py b/app.py
index bfe82f8f..469653bc 100644
--- a/app.py
+++ b/app.py
@@ -109,9 +109,9 @@ def initLogging():
)
# Silence noisy third-party libraries - use the same level as the root logger
- noisyLoggers = ["httpx", "httpcore", "urllib3", "asyncio", "fastapi.security.oauth2"]
+ noisyLoggers = ["httpx", "httpcore", "urllib3", "asyncio", "fastapi.security.oauth2", "msal"]
for loggerName in noisyLoggers:
- logging.getLogger(loggerName).setLevel(logLevel)
+ logging.getLogger(loggerName).setLevel(logging.WARNING)
# Log the current logging configuration
logger = logging.getLogger(__name__)
diff --git a/modules/chat/documents/documentExtraction.py b/modules/chat/documents/documentExtraction.py
index fcebf79e..ea96289d 100644
--- a/modules/chat/documents/documentExtraction.py
+++ b/modules/chat/documents/documentExtraction.py
@@ -159,13 +159,13 @@ class DocumentExtraction:
"svg": 40000 # SVG content
}
- def _robustTextDecode(self, fileData: bytes, filename: str = "unknown") -> str:
+ def _robustTextDecode(self, fileData: bytes, fileName: str = "unknown") -> str:
"""
Robustly decode text data with multiple encoding fallbacks.
Args:
fileData: Raw bytes to decode
- filename: Filename for logging purposes
+ fileName: fileName for logging purposes
Returns:
Decoded text string
@@ -207,16 +207,16 @@ class DocumentExtraction:
else:
# Last resort: decode with replacement characters
content = fileData.decode('utf-8', errors='replace')
- logger.warning(f"{filename}: decoded with UTF-8 and replacement characters due to low encoding confidence")
+ logger.warning(f"{fileName}: decoded with UTF-8 and replacement characters due to low encoding confidence")
return content
except ImportError:
# chardet not available, use replacement characters
content = fileData.decode('utf-8', errors='replace')
- logger.warning(f"{filename}: decoded with UTF-8 and replacement characters (chardet not available)")
+ logger.warning(f"{fileName}: decoded with UTF-8 and replacement characters (chardet not available)")
return content
# This should never be reached, but just in case
- raise FileProcessingError(f"Failed to decode {filename} with any encoding")
+ raise FileProcessingError(f"Failed to decode {fileName} with any encoding")
def initialize(self) -> None:
"""Initialize the document processor."""
@@ -262,13 +262,13 @@ class DocumentExtraction:
- async def processFileData(self, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, prompt: str = None, documentId: str = None, enableAI: bool = True) -> ExtractedContent:
+ async def processFileData(self, fileData: bytes, fileName: str, mimeType: str, base64Encoded: bool = False, prompt: str = None, documentId: str = None, enableAI: bool = True) -> ExtractedContent:
"""
Process file data directly and extract its contents with optional AI processing.
Args:
fileData: Raw file data as bytes
- filename: Name of the file
+ fileName: Name of the file
mimeType: MIME type of the file
base64Encoded: Whether the data is base64 encoded
prompt: Prompt for AI content extraction
@@ -287,13 +287,13 @@ class DocumentExtraction:
fileData = base64.b64decode(fileData)
# Use documentUtility for mime type detection
if mimeType == "application/octet-stream":
- mimeType = detectMimeTypeFromData(fileData, filename, self._serviceCenter)
+ mimeType = detectMimeTypeFromData(fileData, fileName, self._serviceCenter)
# Process document based on type
if mimeType not in self.supportedTypes:
- contentItems = await self._processBinary(fileData, filename, mimeType)
+ contentItems = await self._processBinary(fileData, fileName, mimeType)
else:
processor = self.supportedTypes[mimeType]
- contentItems = await processor(fileData, filename, mimeType)
+ contentItems = await processor(fileData, fileName, mimeType)
# Process with AI if prompt provided and AI is enabled
if enableAI and prompt and contentItems:
@@ -304,7 +304,7 @@ class DocumentExtraction:
except Exception as e:
logger.error(f"Error processing content with AI: {str(e)}")
elif not enableAI:
- logger.debug(f"AI processing disabled for {filename}, returning raw extracted content")
+ logger.debug(f"AI processing disabled for {fileName}, returning raw extracted content")
return ExtractedContent(
id=documentId if documentId else str(uuid.uuid4()),
@@ -317,14 +317,14 @@ class DocumentExtraction:
- async def _processText(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
+ async def _processText(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process text document with robust encoding detection and complete content extraction"""
try:
- content = self._robustTextDecode(fileData, filename)
+ content = self._robustTextDecode(fileData, fileName)
# Validate that we got the complete content
if not content or len(content.strip()) == 0:
- logger.warning(f"Empty content extracted from {filename}")
+ logger.warning(f"Empty content extracted from {fileName}")
return [ContentItem(
label="empty",
data="[Empty file or no readable content]",
@@ -341,7 +341,7 @@ class DocumentExtraction:
# Use documentUtility for mime type
- mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
+ mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem(
label="main",
data=content,
@@ -356,11 +356,11 @@ class DocumentExtraction:
logger.error(f"Error processing text document: {str(e)}")
raise FileProcessingError(f"Failed to process text document: {str(e)}")
- async def _processCsv(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
+ async def _processCsv(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process CSV document with robust encoding detection"""
try:
- content = self._robustTextDecode(fileData, filename)
- mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
+ content = self._robustTextDecode(fileData, fileName)
+ mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem(
label="main",
data=content,
@@ -375,12 +375,12 @@ class DocumentExtraction:
logger.error(f"Error processing CSV document: {str(e)}")
raise FileProcessingError(f"Failed to process CSV document: {str(e)}")
- async def _processJson(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
+ async def _processJson(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process JSON document with robust encoding detection"""
try:
- content = self._robustTextDecode(fileData, filename)
+ content = self._robustTextDecode(fileData, fileName)
jsonData = json.loads(content)
- mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
+ mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem(
label="main",
data=content,
@@ -395,11 +395,11 @@ class DocumentExtraction:
logger.error(f"Error processing JSON document: {str(e)}")
raise FileProcessingError(f"Failed to process JSON document: {str(e)}")
- async def _processXml(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
+ async def _processXml(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process XML document with robust encoding detection"""
try:
- content = self._robustTextDecode(fileData, filename)
- mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
+ content = self._robustTextDecode(fileData, fileName)
+ mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem(
label="main",
data=content,
@@ -414,11 +414,11 @@ class DocumentExtraction:
logger.error(f"Error processing XML document: {str(e)}")
raise FileProcessingError(f"Failed to process XML document: {str(e)}")
- async def _processHtml(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
+ async def _processHtml(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process HTML document with robust encoding detection"""
try:
- content = self._robustTextDecode(fileData, filename)
- mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
+ content = self._robustTextDecode(fileData, fileName)
+ mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem(
label="main",
data=content,
@@ -433,10 +433,10 @@ class DocumentExtraction:
logger.error(f"Error processing HTML document: {str(e)}")
raise FileProcessingError(f"Failed to process HTML document: {str(e)}")
- async def _processSvg(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
+ async def _processSvg(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process SVG document with robust encoding detection and meaningful content extraction"""
try:
- content = self._robustTextDecode(fileData, filename)
+ content = self._robustTextDecode(fileData, fileName)
# Check if it's actually SVG content
if "