neutralizer activated
This commit is contained in:
parent
4d3ca3342a
commit
24b09ea7ce
5 changed files with 157 additions and 20 deletions
|
|
@ -271,8 +271,17 @@ class AiPerplexity(BaseConnectorAi):
|
|||
temperature = getattr(options, "temperature", None) or model.temperature
|
||||
maxTokens = model.maxTokens
|
||||
|
||||
# Parse prompt JSON
|
||||
promptContent = messages[0]["content"] if messages else ""
|
||||
# Parse prompt JSON - find user message (not system message)
|
||||
promptContent = ""
|
||||
if messages:
|
||||
for msg in messages:
|
||||
if msg.get("role") == "user":
|
||||
promptContent = msg.get("content", "")
|
||||
break
|
||||
# Fallback to first message if no user message found
|
||||
if not promptContent and len(messages) > 0:
|
||||
promptContent = messages[0].get("content", "")
|
||||
|
||||
import json
|
||||
promptData = json.loads(promptContent)
|
||||
|
||||
|
|
@ -309,7 +318,20 @@ Return ONLY a JSON array of URLs, no additional text:
|
|||
if response.status_code != 200:
|
||||
raise HTTPException(status_code=500, detail=f"Perplexity Web Search API error: {response.text}")
|
||||
|
||||
apiResponse = response.json()
|
||||
# Check if response body is empty or invalid
|
||||
responseText = response.text
|
||||
if not responseText or not responseText.strip():
|
||||
raise HTTPException(status_code=500, detail="Perplexity Web Search API returned empty response")
|
||||
|
||||
try:
|
||||
apiResponse = response.json()
|
||||
except Exception as jsonError:
|
||||
logger.error(f"Failed to parse Perplexity response as JSON. Status: {response.status_code}, Response: {responseText[:500]}")
|
||||
raise HTTPException(status_code=500, detail=f"Perplexity Web Search API returned invalid JSON: {str(jsonError)}")
|
||||
|
||||
if "choices" not in apiResponse or not apiResponse["choices"]:
|
||||
raise HTTPException(status_code=500, detail="Perplexity Web Search API response missing 'choices' field")
|
||||
|
||||
content = apiResponse["choices"][0]["message"]["content"]
|
||||
|
||||
return AiModelResponse(
|
||||
|
|
@ -350,8 +372,17 @@ Return ONLY a JSON array of URLs, no additional text:
|
|||
temperature = getattr(options, "temperature", None) or model.temperature
|
||||
maxTokens = model.maxTokens
|
||||
|
||||
# Parse prompt JSON
|
||||
promptContent = messages[0]["content"] if messages else ""
|
||||
# Parse prompt JSON - find user message (not system message)
|
||||
promptContent = ""
|
||||
if messages:
|
||||
for msg in messages:
|
||||
if msg.get("role") == "user":
|
||||
promptContent = msg.get("content", "")
|
||||
break
|
||||
# Fallback to first message if no user message found
|
||||
if not promptContent and len(messages) > 0:
|
||||
promptContent = messages[0].get("content", "")
|
||||
|
||||
import json
|
||||
promptData = json.loads(promptContent)
|
||||
|
||||
|
|
@ -384,7 +415,19 @@ Return ONLY a JSON array of URLs, no additional text:
|
|||
if response.status_code != 200:
|
||||
raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API error: {response.text}")
|
||||
|
||||
apiResponse = response.json()
|
||||
# Check if response body is empty or invalid
|
||||
responseText = response.text
|
||||
if not responseText or not responseText.strip():
|
||||
raise HTTPException(status_code=500, detail="Perplexity Web Crawl API returned empty response")
|
||||
|
||||
try:
|
||||
apiResponse = response.json()
|
||||
except Exception as jsonError:
|
||||
logger.error(f"Failed to parse Perplexity response as JSON. Status: {response.status_code}, Response: {responseText[:500]}")
|
||||
raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API returned invalid JSON: {str(jsonError)}")
|
||||
|
||||
if "choices" not in apiResponse or not apiResponse["choices"]:
|
||||
raise HTTPException(status_code=500, detail="Perplexity Web Crawl API response missing 'choices' field")
|
||||
|
||||
# Extract the main content
|
||||
content = apiResponse["choices"][0]["message"]["content"]
|
||||
|
|
|
|||
|
|
@ -466,10 +466,26 @@ class AiTavily(BaseConnectorAi):
|
|||
AiModelResponse with JSON list of URLs
|
||||
"""
|
||||
try:
|
||||
# Extract parameters
|
||||
promptContent = modelCall.messages[0]["content"] if modelCall.messages else ""
|
||||
# Extract parameters - find user message (not system message)
|
||||
promptContent = ""
|
||||
if modelCall.messages:
|
||||
for msg in modelCall.messages:
|
||||
if msg.get("role") == "user":
|
||||
promptContent = msg.get("content", "")
|
||||
break
|
||||
# Fallback to first message if no user message found
|
||||
if not promptContent and len(modelCall.messages) > 0:
|
||||
promptContent = modelCall.messages[0].get("content", "")
|
||||
|
||||
if not promptContent or not promptContent.strip():
|
||||
raise ValueError("Empty prompt content received for web search")
|
||||
|
||||
import json
|
||||
promptData = json.loads(promptContent)
|
||||
try:
|
||||
promptData = json.loads(promptContent)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
|
||||
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
|
||||
|
||||
# Create Pydantic model
|
||||
webSearchPrompt = AiCallPromptWebSearch(**promptData)
|
||||
|
|
@ -520,10 +536,26 @@ class AiTavily(BaseConnectorAi):
|
|||
AiModelResponse with crawl results as JSON (may include multiple pages)
|
||||
"""
|
||||
try:
|
||||
# Extract parameters
|
||||
promptContent = modelCall.messages[0]["content"] if modelCall.messages else ""
|
||||
# Extract parameters - find user message (not system message)
|
||||
promptContent = ""
|
||||
if modelCall.messages:
|
||||
for msg in modelCall.messages:
|
||||
if msg.get("role") == "user":
|
||||
promptContent = msg.get("content", "")
|
||||
break
|
||||
# Fallback to first message if no user message found
|
||||
if not promptContent and len(modelCall.messages) > 0:
|
||||
promptContent = modelCall.messages[0].get("content", "")
|
||||
|
||||
if not promptContent or not promptContent.strip():
|
||||
raise ValueError("Empty prompt content received for web crawl")
|
||||
|
||||
import json
|
||||
promptData = json.loads(promptContent)
|
||||
try:
|
||||
promptData = json.loads(promptContent)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
|
||||
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
|
||||
|
||||
# Create Pydantic model
|
||||
webCrawlPrompt = AiCallPromptWebCrawl(**promptData)
|
||||
|
|
|
|||
|
|
@ -54,9 +54,27 @@ class NeutralizationPlayground:
|
|||
allAttributes = self.services.neutralization.getAttributes()
|
||||
patternCounts: Dict[str, int] = {}
|
||||
for attr in allAttributes:
|
||||
patternType = attr.patternType
|
||||
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
|
||||
uniqueFiles = set(attr.fileId for attr in allAttributes if attr.fileId)
|
||||
# Handle both dict and object access patterns
|
||||
if isinstance(attr, dict):
|
||||
patternType = attr.get('patternType', 'unknown')
|
||||
fileId = attr.get('fileId')
|
||||
else:
|
||||
patternType = getattr(attr, 'patternType', 'unknown')
|
||||
fileId = getattr(attr, 'fileId', None)
|
||||
|
||||
if patternType:
|
||||
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
|
||||
|
||||
# Get unique files - handle both dict and object
|
||||
uniqueFiles = set()
|
||||
for attr in allAttributes:
|
||||
if isinstance(attr, dict):
|
||||
fileId = attr.get('fileId')
|
||||
else:
|
||||
fileId = getattr(attr, 'fileId', None)
|
||||
if fileId:
|
||||
uniqueFiles.add(fileId)
|
||||
|
||||
return {
|
||||
'total_attributes': len(allAttributes),
|
||||
'unique_files': len(uniqueFiles),
|
||||
|
|
|
|||
|
|
@ -130,10 +130,8 @@ class NeutralizationService:
|
|||
if not self.interfaceDbApp:
|
||||
return []
|
||||
try:
|
||||
return self.interfaceDbApp.db.getRecordset(
|
||||
DataNeutralizerAttributes,
|
||||
recordFilter={"mandateId": self.interfaceDbApp.mandateId}
|
||||
)
|
||||
# Use the interface method which properly converts dicts to objects
|
||||
return self.interfaceDbApp.getNeutralizationAttributes()
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -269,8 +269,11 @@ class WorkflowManager:
|
|||
fileName = (title or f"user_context_{idx+1}.txt").strip()
|
||||
mimeType = (mime or "text/plain").strip()
|
||||
|
||||
# Create file in component storage
|
||||
# Neutralize content before storing if neutralization is enabled
|
||||
content_bytes = content.encode('utf-8')
|
||||
content_bytes = await self._neutralizeContentIfEnabled(content_bytes, mimeType)
|
||||
|
||||
# Create file in component storage
|
||||
file_item = self.services.interfaceDbComponent.createFile(
|
||||
name=fileName,
|
||||
mimeType=mimeType,
|
||||
|
|
@ -719,3 +722,46 @@ class WorkflowManager:
|
|||
def _setUserLanguage(self, language: str) -> None:
|
||||
"""Set user language for the service center"""
|
||||
self.services.user.language = language
|
||||
|
||||
async def _neutralizeContentIfEnabled(self, content_bytes: bytes, mimeType: str) -> bytes:
|
||||
"""Neutralize content if neutralization is enabled in user settings"""
|
||||
try:
|
||||
# Check if neutralization is enabled
|
||||
config = self.services.neutralization.getConfig()
|
||||
if not config or not config.enabled:
|
||||
return content_bytes
|
||||
|
||||
# Decode content to text for neutralization
|
||||
try:
|
||||
text_content = content_bytes.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
# Try alternative encodings
|
||||
for enc in ['latin-1', 'cp1252', 'iso-8859-1']:
|
||||
try:
|
||||
text_content = content_bytes.decode(enc)
|
||||
break
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
else:
|
||||
# If unable to decode, return original bytes (binary content)
|
||||
logger.debug(f"Unable to decode content for neutralization, skipping: {mimeType}")
|
||||
return content_bytes
|
||||
|
||||
# Neutralize the text content
|
||||
# Note: The neutralization service should use names from config when processing
|
||||
result = self.services.neutralization.processText(text_content)
|
||||
if result and 'neutralized_text' in result:
|
||||
neutralized_text = result['neutralized_text']
|
||||
# Encode back to bytes using the same encoding
|
||||
try:
|
||||
return neutralized_text.encode('utf-8')
|
||||
except Exception as e:
|
||||
logger.warning(f"Error encoding neutralized text: {str(e)}")
|
||||
return content_bytes
|
||||
else:
|
||||
logger.warning("Neutralization did not return neutralized_text")
|
||||
return content_bytes
|
||||
except Exception as e:
|
||||
logger.error(f"Error during content neutralization: {str(e)}")
|
||||
# Return original content on error
|
||||
return content_bytes
|
||||
|
|
|
|||
Loading…
Reference in a new issue