neutralizer activated
This commit is contained in:
parent
4d3ca3342a
commit
24b09ea7ce
5 changed files with 157 additions and 20 deletions
|
|
@ -271,8 +271,17 @@ class AiPerplexity(BaseConnectorAi):
|
||||||
temperature = getattr(options, "temperature", None) or model.temperature
|
temperature = getattr(options, "temperature", None) or model.temperature
|
||||||
maxTokens = model.maxTokens
|
maxTokens = model.maxTokens
|
||||||
|
|
||||||
# Parse prompt JSON
|
# Parse prompt JSON - find user message (not system message)
|
||||||
promptContent = messages[0]["content"] if messages else ""
|
promptContent = ""
|
||||||
|
if messages:
|
||||||
|
for msg in messages:
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
promptContent = msg.get("content", "")
|
||||||
|
break
|
||||||
|
# Fallback to first message if no user message found
|
||||||
|
if not promptContent and len(messages) > 0:
|
||||||
|
promptContent = messages[0].get("content", "")
|
||||||
|
|
||||||
import json
|
import json
|
||||||
promptData = json.loads(promptContent)
|
promptData = json.loads(promptContent)
|
||||||
|
|
||||||
|
|
@ -309,7 +318,20 @@ Return ONLY a JSON array of URLs, no additional text:
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise HTTPException(status_code=500, detail=f"Perplexity Web Search API error: {response.text}")
|
raise HTTPException(status_code=500, detail=f"Perplexity Web Search API error: {response.text}")
|
||||||
|
|
||||||
apiResponse = response.json()
|
# Check if response body is empty or invalid
|
||||||
|
responseText = response.text
|
||||||
|
if not responseText or not responseText.strip():
|
||||||
|
raise HTTPException(status_code=500, detail="Perplexity Web Search API returned empty response")
|
||||||
|
|
||||||
|
try:
|
||||||
|
apiResponse = response.json()
|
||||||
|
except Exception as jsonError:
|
||||||
|
logger.error(f"Failed to parse Perplexity response as JSON. Status: {response.status_code}, Response: {responseText[:500]}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Perplexity Web Search API returned invalid JSON: {str(jsonError)}")
|
||||||
|
|
||||||
|
if "choices" not in apiResponse or not apiResponse["choices"]:
|
||||||
|
raise HTTPException(status_code=500, detail="Perplexity Web Search API response missing 'choices' field")
|
||||||
|
|
||||||
content = apiResponse["choices"][0]["message"]["content"]
|
content = apiResponse["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
return AiModelResponse(
|
return AiModelResponse(
|
||||||
|
|
@ -350,8 +372,17 @@ Return ONLY a JSON array of URLs, no additional text:
|
||||||
temperature = getattr(options, "temperature", None) or model.temperature
|
temperature = getattr(options, "temperature", None) or model.temperature
|
||||||
maxTokens = model.maxTokens
|
maxTokens = model.maxTokens
|
||||||
|
|
||||||
# Parse prompt JSON
|
# Parse prompt JSON - find user message (not system message)
|
||||||
promptContent = messages[0]["content"] if messages else ""
|
promptContent = ""
|
||||||
|
if messages:
|
||||||
|
for msg in messages:
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
promptContent = msg.get("content", "")
|
||||||
|
break
|
||||||
|
# Fallback to first message if no user message found
|
||||||
|
if not promptContent and len(messages) > 0:
|
||||||
|
promptContent = messages[0].get("content", "")
|
||||||
|
|
||||||
import json
|
import json
|
||||||
promptData = json.loads(promptContent)
|
promptData = json.loads(promptContent)
|
||||||
|
|
||||||
|
|
@ -384,7 +415,19 @@ Return ONLY a JSON array of URLs, no additional text:
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API error: {response.text}")
|
raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API error: {response.text}")
|
||||||
|
|
||||||
apiResponse = response.json()
|
# Check if response body is empty or invalid
|
||||||
|
responseText = response.text
|
||||||
|
if not responseText or not responseText.strip():
|
||||||
|
raise HTTPException(status_code=500, detail="Perplexity Web Crawl API returned empty response")
|
||||||
|
|
||||||
|
try:
|
||||||
|
apiResponse = response.json()
|
||||||
|
except Exception as jsonError:
|
||||||
|
logger.error(f"Failed to parse Perplexity response as JSON. Status: {response.status_code}, Response: {responseText[:500]}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API returned invalid JSON: {str(jsonError)}")
|
||||||
|
|
||||||
|
if "choices" not in apiResponse or not apiResponse["choices"]:
|
||||||
|
raise HTTPException(status_code=500, detail="Perplexity Web Crawl API response missing 'choices' field")
|
||||||
|
|
||||||
# Extract the main content
|
# Extract the main content
|
||||||
content = apiResponse["choices"][0]["message"]["content"]
|
content = apiResponse["choices"][0]["message"]["content"]
|
||||||
|
|
|
||||||
|
|
@ -466,10 +466,26 @@ class AiTavily(BaseConnectorAi):
|
||||||
AiModelResponse with JSON list of URLs
|
AiModelResponse with JSON list of URLs
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Extract parameters
|
# Extract parameters - find user message (not system message)
|
||||||
promptContent = modelCall.messages[0]["content"] if modelCall.messages else ""
|
promptContent = ""
|
||||||
|
if modelCall.messages:
|
||||||
|
for msg in modelCall.messages:
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
promptContent = msg.get("content", "")
|
||||||
|
break
|
||||||
|
# Fallback to first message if no user message found
|
||||||
|
if not promptContent and len(modelCall.messages) > 0:
|
||||||
|
promptContent = modelCall.messages[0].get("content", "")
|
||||||
|
|
||||||
|
if not promptContent or not promptContent.strip():
|
||||||
|
raise ValueError("Empty prompt content received for web search")
|
||||||
|
|
||||||
import json
|
import json
|
||||||
promptData = json.loads(promptContent)
|
try:
|
||||||
|
promptData = json.loads(promptContent)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
|
||||||
|
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
|
||||||
|
|
||||||
# Create Pydantic model
|
# Create Pydantic model
|
||||||
webSearchPrompt = AiCallPromptWebSearch(**promptData)
|
webSearchPrompt = AiCallPromptWebSearch(**promptData)
|
||||||
|
|
@ -520,10 +536,26 @@ class AiTavily(BaseConnectorAi):
|
||||||
AiModelResponse with crawl results as JSON (may include multiple pages)
|
AiModelResponse with crawl results as JSON (may include multiple pages)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Extract parameters
|
# Extract parameters - find user message (not system message)
|
||||||
promptContent = modelCall.messages[0]["content"] if modelCall.messages else ""
|
promptContent = ""
|
||||||
|
if modelCall.messages:
|
||||||
|
for msg in modelCall.messages:
|
||||||
|
if msg.get("role") == "user":
|
||||||
|
promptContent = msg.get("content", "")
|
||||||
|
break
|
||||||
|
# Fallback to first message if no user message found
|
||||||
|
if not promptContent and len(modelCall.messages) > 0:
|
||||||
|
promptContent = modelCall.messages[0].get("content", "")
|
||||||
|
|
||||||
|
if not promptContent or not promptContent.strip():
|
||||||
|
raise ValueError("Empty prompt content received for web crawl")
|
||||||
|
|
||||||
import json
|
import json
|
||||||
promptData = json.loads(promptContent)
|
try:
|
||||||
|
promptData = json.loads(promptContent)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}")
|
||||||
|
raise ValueError(f"Invalid JSON in prompt content: {str(e)}")
|
||||||
|
|
||||||
# Create Pydantic model
|
# Create Pydantic model
|
||||||
webCrawlPrompt = AiCallPromptWebCrawl(**promptData)
|
webCrawlPrompt = AiCallPromptWebCrawl(**promptData)
|
||||||
|
|
|
||||||
|
|
@ -54,9 +54,27 @@ class NeutralizationPlayground:
|
||||||
allAttributes = self.services.neutralization.getAttributes()
|
allAttributes = self.services.neutralization.getAttributes()
|
||||||
patternCounts: Dict[str, int] = {}
|
patternCounts: Dict[str, int] = {}
|
||||||
for attr in allAttributes:
|
for attr in allAttributes:
|
||||||
patternType = attr.patternType
|
# Handle both dict and object access patterns
|
||||||
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
|
if isinstance(attr, dict):
|
||||||
uniqueFiles = set(attr.fileId for attr in allAttributes if attr.fileId)
|
patternType = attr.get('patternType', 'unknown')
|
||||||
|
fileId = attr.get('fileId')
|
||||||
|
else:
|
||||||
|
patternType = getattr(attr, 'patternType', 'unknown')
|
||||||
|
fileId = getattr(attr, 'fileId', None)
|
||||||
|
|
||||||
|
if patternType:
|
||||||
|
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
|
||||||
|
|
||||||
|
# Get unique files - handle both dict and object
|
||||||
|
uniqueFiles = set()
|
||||||
|
for attr in allAttributes:
|
||||||
|
if isinstance(attr, dict):
|
||||||
|
fileId = attr.get('fileId')
|
||||||
|
else:
|
||||||
|
fileId = getattr(attr, 'fileId', None)
|
||||||
|
if fileId:
|
||||||
|
uniqueFiles.add(fileId)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'total_attributes': len(allAttributes),
|
'total_attributes': len(allAttributes),
|
||||||
'unique_files': len(uniqueFiles),
|
'unique_files': len(uniqueFiles),
|
||||||
|
|
|
||||||
|
|
@ -130,10 +130,8 @@ class NeutralizationService:
|
||||||
if not self.interfaceDbApp:
|
if not self.interfaceDbApp:
|
||||||
return []
|
return []
|
||||||
try:
|
try:
|
||||||
return self.interfaceDbApp.db.getRecordset(
|
# Use the interface method which properly converts dicts to objects
|
||||||
DataNeutralizerAttributes,
|
return self.interfaceDbApp.getNeutralizationAttributes()
|
||||||
recordFilter={"mandateId": self.interfaceDbApp.mandateId}
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
|
||||||
|
|
@ -269,8 +269,11 @@ class WorkflowManager:
|
||||||
fileName = (title or f"user_context_{idx+1}.txt").strip()
|
fileName = (title or f"user_context_{idx+1}.txt").strip()
|
||||||
mimeType = (mime or "text/plain").strip()
|
mimeType = (mime or "text/plain").strip()
|
||||||
|
|
||||||
# Create file in component storage
|
# Neutralize content before storing if neutralization is enabled
|
||||||
content_bytes = content.encode('utf-8')
|
content_bytes = content.encode('utf-8')
|
||||||
|
content_bytes = await self._neutralizeContentIfEnabled(content_bytes, mimeType)
|
||||||
|
|
||||||
|
# Create file in component storage
|
||||||
file_item = self.services.interfaceDbComponent.createFile(
|
file_item = self.services.interfaceDbComponent.createFile(
|
||||||
name=fileName,
|
name=fileName,
|
||||||
mimeType=mimeType,
|
mimeType=mimeType,
|
||||||
|
|
@ -719,3 +722,46 @@ class WorkflowManager:
|
||||||
def _setUserLanguage(self, language: str) -> None:
|
def _setUserLanguage(self, language: str) -> None:
|
||||||
"""Set user language for the service center"""
|
"""Set user language for the service center"""
|
||||||
self.services.user.language = language
|
self.services.user.language = language
|
||||||
|
|
||||||
|
async def _neutralizeContentIfEnabled(self, content_bytes: bytes, mimeType: str) -> bytes:
|
||||||
|
"""Neutralize content if neutralization is enabled in user settings"""
|
||||||
|
try:
|
||||||
|
# Check if neutralization is enabled
|
||||||
|
config = self.services.neutralization.getConfig()
|
||||||
|
if not config or not config.enabled:
|
||||||
|
return content_bytes
|
||||||
|
|
||||||
|
# Decode content to text for neutralization
|
||||||
|
try:
|
||||||
|
text_content = content_bytes.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# Try alternative encodings
|
||||||
|
for enc in ['latin-1', 'cp1252', 'iso-8859-1']:
|
||||||
|
try:
|
||||||
|
text_content = content_bytes.decode(enc)
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# If unable to decode, return original bytes (binary content)
|
||||||
|
logger.debug(f"Unable to decode content for neutralization, skipping: {mimeType}")
|
||||||
|
return content_bytes
|
||||||
|
|
||||||
|
# Neutralize the text content
|
||||||
|
# Note: The neutralization service should use names from config when processing
|
||||||
|
result = self.services.neutralization.processText(text_content)
|
||||||
|
if result and 'neutralized_text' in result:
|
||||||
|
neutralized_text = result['neutralized_text']
|
||||||
|
# Encode back to bytes using the same encoding
|
||||||
|
try:
|
||||||
|
return neutralized_text.encode('utf-8')
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error encoding neutralized text: {str(e)}")
|
||||||
|
return content_bytes
|
||||||
|
else:
|
||||||
|
logger.warning("Neutralization did not return neutralized_text")
|
||||||
|
return content_bytes
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during content neutralization: {str(e)}")
|
||||||
|
# Return original content on error
|
||||||
|
return content_bytes
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue