neutralizer activated

This commit is contained in:
ValueOn AG 2025-11-02 00:50:21 +01:00
parent 4d3ca3342a
commit 24b09ea7ce
5 changed files with 157 additions and 20 deletions

View file

@ -271,8 +271,17 @@ class AiPerplexity(BaseConnectorAi):
temperature = getattr(options, "temperature", None) or model.temperature
maxTokens = model.maxTokens
# Parse prompt JSON
promptContent = messages[0]["content"] if messages else ""
# Parse prompt JSON - find user message (not system message)
promptContent = ""
if messages:
for msg in messages:
if msg.get("role") == "user":
promptContent = msg.get("content", "")
break
# Fallback to first message if no user message found
if not promptContent and len(messages) > 0:
promptContent = messages[0].get("content", "")
import json
promptData = json.loads(promptContent)
@ -309,7 +318,20 @@ Return ONLY a JSON array of URLs, no additional text:
if response.status_code != 200:
raise HTTPException(status_code=500, detail=f"Perplexity Web Search API error: {response.text}")
apiResponse = response.json()
# Check if response body is empty or invalid
responseText = response.text
if not responseText or not responseText.strip():
raise HTTPException(status_code=500, detail="Perplexity Web Search API returned empty response")
try:
apiResponse = response.json()
except Exception as jsonError:
logger.error(f"Failed to parse Perplexity response as JSON. Status: {response.status_code}, Response: {responseText[:500]}")
raise HTTPException(status_code=500, detail=f"Perplexity Web Search API returned invalid JSON: {str(jsonError)}")
if "choices" not in apiResponse or not apiResponse["choices"]:
raise HTTPException(status_code=500, detail="Perplexity Web Search API response missing 'choices' field")
content = apiResponse["choices"][0]["message"]["content"]
return AiModelResponse(
@ -350,8 +372,17 @@ Return ONLY a JSON array of URLs, no additional text:
temperature = getattr(options, "temperature", None) or model.temperature
maxTokens = model.maxTokens
# Parse prompt JSON
promptContent = messages[0]["content"] if messages else ""
# Parse prompt JSON - find user message (not system message)
promptContent = ""
if messages:
for msg in messages:
if msg.get("role") == "user":
promptContent = msg.get("content", "")
break
# Fallback to first message if no user message found
if not promptContent and len(messages) > 0:
promptContent = messages[0].get("content", "")
import json
promptData = json.loads(promptContent)
@ -384,7 +415,19 @@ Return ONLY a JSON array of URLs, no additional text:
if response.status_code != 200:
raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API error: {response.text}")
apiResponse = response.json()
# Check if response body is empty or invalid
responseText = response.text
if not responseText or not responseText.strip():
raise HTTPException(status_code=500, detail="Perplexity Web Crawl API returned empty response")
try:
apiResponse = response.json()
except Exception as jsonError:
logger.error(f"Failed to parse Perplexity response as JSON. Status: {response.status_code}, Response: {responseText[:500]}")
raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API returned invalid JSON: {str(jsonError)}")
if "choices" not in apiResponse or not apiResponse["choices"]:
raise HTTPException(status_code=500, detail="Perplexity Web Crawl API response missing 'choices' field")
# Extract the main content
content = apiResponse["choices"][0]["message"]["content"]

View file

@ -466,10 +466,26 @@ class AiTavily(BaseConnectorAi):
AiModelResponse with JSON list of URLs
"""
try:
# Extract parameters
promptContent = modelCall.messages[0]["content"] if modelCall.messages else ""
# Extract parameters - find user message (not system message)
promptContent = ""
if modelCall.messages:
for msg in modelCall.messages:
if msg.get("role") == "user":
promptContent = msg.get("content", "")
break
# Fallback to first message if no user message found
if not promptContent and len(modelCall.messages) > 0:
promptContent = modelCall.messages[0].get("content", "")
if not promptContent or not promptContent.strip():
raise ValueError("Empty prompt content received for web search")
import json
promptData = json.loads(promptContent)
try:
promptData = json.loads(promptContent)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
# Create Pydantic model
webSearchPrompt = AiCallPromptWebSearch(**promptData)
@ -520,10 +536,26 @@ class AiTavily(BaseConnectorAi):
AiModelResponse with crawl results as JSON (may include multiple pages)
"""
try:
# Extract parameters
promptContent = modelCall.messages[0]["content"] if modelCall.messages else ""
# Extract parameters - find user message (not system message)
promptContent = ""
if modelCall.messages:
for msg in modelCall.messages:
if msg.get("role") == "user":
promptContent = msg.get("content", "")
break
# Fallback to first message if no user message found
if not promptContent and len(modelCall.messages) > 0:
promptContent = modelCall.messages[0].get("content", "")
if not promptContent or not promptContent.strip():
raise ValueError("Empty prompt content received for web crawl")
import json
promptData = json.loads(promptContent)
try:
promptData = json.loads(promptContent)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
# Create Pydantic model
webCrawlPrompt = AiCallPromptWebCrawl(**promptData)

View file

@ -54,9 +54,27 @@ class NeutralizationPlayground:
allAttributes = self.services.neutralization.getAttributes()
patternCounts: Dict[str, int] = {}
for attr in allAttributes:
patternType = attr.patternType
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
uniqueFiles = set(attr.fileId for attr in allAttributes if attr.fileId)
# Handle both dict and object access patterns
if isinstance(attr, dict):
patternType = attr.get('patternType', 'unknown')
fileId = attr.get('fileId')
else:
patternType = getattr(attr, 'patternType', 'unknown')
fileId = getattr(attr, 'fileId', None)
if patternType:
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
# Get unique files - handle both dict and object
uniqueFiles = set()
for attr in allAttributes:
if isinstance(attr, dict):
fileId = attr.get('fileId')
else:
fileId = getattr(attr, 'fileId', None)
if fileId:
uniqueFiles.add(fileId)
return {
'total_attributes': len(allAttributes),
'unique_files': len(uniqueFiles),

View file

@ -130,10 +130,8 @@ class NeutralizationService:
if not self.interfaceDbApp:
return []
try:
return self.interfaceDbApp.db.getRecordset(
DataNeutralizerAttributes,
recordFilter={"mandateId": self.interfaceDbApp.mandateId}
)
# Use the interface method which properly converts dicts to objects
return self.interfaceDbApp.getNeutralizationAttributes()
except Exception as e:
logger.error(f"Error getting neutralization attributes: {str(e)}")
return []

View file

@ -269,8 +269,11 @@ class WorkflowManager:
fileName = (title or f"user_context_{idx+1}.txt").strip()
mimeType = (mime or "text/plain").strip()
# Create file in component storage
# Neutralize content before storing if neutralization is enabled
content_bytes = content.encode('utf-8')
content_bytes = await self._neutralizeContentIfEnabled(content_bytes, mimeType)
# Create file in component storage
file_item = self.services.interfaceDbComponent.createFile(
name=fileName,
mimeType=mimeType,
@ -719,3 +722,46 @@ class WorkflowManager:
def _setUserLanguage(self, language: str) -> None:
"""Set user language for the service center"""
self.services.user.language = language
async def _neutralizeContentIfEnabled(self, content_bytes: bytes, mimeType: str) -> bytes:
"""Neutralize content if neutralization is enabled in user settings"""
try:
# Check if neutralization is enabled
config = self.services.neutralization.getConfig()
if not config or not config.enabled:
return content_bytes
# Decode content to text for neutralization
try:
text_content = content_bytes.decode('utf-8')
except UnicodeDecodeError:
# Try alternative encodings
for enc in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
text_content = content_bytes.decode(enc)
break
except UnicodeDecodeError:
continue
else:
# If unable to decode, return original bytes (binary content)
logger.debug(f"Unable to decode content for neutralization, skipping: {mimeType}")
return content_bytes
# Neutralize the text content
# Note: The neutralization service should use names from config when processing
result = self.services.neutralization.processText(text_content)
if result and 'neutralized_text' in result:
neutralized_text = result['neutralized_text']
# Encode back to bytes using the same encoding
try:
return neutralized_text.encode('utf-8')
except Exception as e:
logger.warning(f"Error encoding neutralized text: {str(e)}")
return content_bytes
else:
logger.warning("Neutralization did not return neutralized_text")
return content_bytes
except Exception as e:
logger.error(f"Error during content neutralization: {str(e)}")
# Return original content on error
return content_bytes