1109 lines
No EOL
44 KiB
Python
1109 lines
No EOL
44 KiB
Python
"""
|
|
ChatManager Modul zur Verwaltung von AI-Chat-Workflows.
|
|
Implementiert eine kompakte und modulare Architektur für die Verarbeitung
|
|
von Benutzeranfragen, Agentenausführung und Ergebnisformatierung.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
import json
|
|
import re
|
|
import uuid
|
|
import base64
|
|
from datetime import datetime
|
|
from typing import Dict, Any, List, Optional, Union
|
|
|
|
# Notwendige Importe
|
|
from connectors.connector_aichat_openai import ChatService
|
|
from modules.chat_registry import get_agent_registry
|
|
from modules.lucydom_interface import get_lucydom_interface
|
|
from modules.chat_content_extraction import get_document_contents
|
|
|
|
# Logger konfigurieren
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class ChatManager:
|
|
"""
|
|
Verwaltet die Verarbeitung von Chat-Anfragen, Agentenausführung und
|
|
die Integration von Ergebnissen in den Workflow.
|
|
"""
|
|
|
|
def __init__(self, mandate_id: int, user_id: int):
|
|
"""
|
|
Initialisiert den ChatManager mit Mandanten- und Benutzerkontext.
|
|
|
|
Args:
|
|
mandate_id: ID des aktuellen Mandanten
|
|
user_id: ID des aktuellen Benutzers
|
|
"""
|
|
self.mandate_id = mandate_id
|
|
self.user_id = user_id
|
|
self.ai_service = ChatService()
|
|
self.lucy_interface = get_lucydom_interface(mandate_id, user_id)
|
|
self.agent_registry = get_agent_registry()
|
|
|
|
|
|
### Chat Management
|
|
|
|
async def chat_run(self, user_input: Dict[str, Any], workflow_id: Optional[str] = None) -> Dict[str, Any]:
|
|
"""
|
|
Hauptfunktion zur Integration von Benutzeranfragen in den Workflow.
|
|
|
|
Args:
|
|
user_input: Dictionary mit Benutzeranfrage und Datei-IDs
|
|
workflow_id: Optional - ID des Workflows (None für neue Workflows)
|
|
|
|
Returns:
|
|
Workflow-Objekt mit aktualisiertem Zustand
|
|
"""
|
|
# 1. Workflow initialisieren oder bestehenden laden
|
|
workflow = self.workflow_init(workflow_id)
|
|
|
|
# 2. User-Input in Message-Objekt transformieren und im Workflow speichern
|
|
message_user = self.chat_message_to_workflow("user", "", user_input, workflow)
|
|
|
|
# 3. Projektleiter-Prompt erstellen und Antwort analysieren
|
|
project_manager_response = await self.chat_prompt(message_user, workflow)
|
|
obj_final_documents = project_manager_response.get("obj_final_documents", [])
|
|
obj_workplan = project_manager_response.get("obj_workplan", [])
|
|
obj_user_response = project_manager_response.get("obj_user_response", "")
|
|
|
|
# 4. Speichere die Antwort als Message im Workflow und füge Log-Einträge hinzu
|
|
response_message = {
|
|
"role": "assistant",
|
|
"agent_name": "project_manager",
|
|
"content": obj_user_response
|
|
}
|
|
self.message_add(workflow, response_message)
|
|
|
|
self.log_add(workflow, f"Geplante Ergebnisse: {self.parse_json2text(obj_final_documents)}")
|
|
self.log_add(workflow, f"Arbeitsplan: {self.parse_json2text(obj_workplan)}")
|
|
self.log_add(workflow, f"Info an den User: {obj_user_response}")
|
|
|
|
# 5. Agenten gemäss Workplan ausführen
|
|
obj_results = []
|
|
if obj_workplan:
|
|
for task in obj_workplan:
|
|
task_results = await self.agent_processing(task, workflow)
|
|
obj_results.extend(task_results)
|
|
|
|
# 6. Erstelle die finale Antwort mit den relevanten Dokumenten aus obj_final_documents
|
|
final_message = self.chat_final_message(obj_user_response, obj_results, obj_final_documents)
|
|
self.message_add(workflow, final_message)
|
|
|
|
# 7. Finalisiere den Workflow
|
|
self.workflow_finish(workflow)
|
|
|
|
return workflow
|
|
|
|
async def chat_prompt(self, message_user: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Erstellt den Prompt für den Projektleiter und verarbeitet seine Antwort.
|
|
|
|
Args:
|
|
message_user: Message-Objekt mit Benutzeranfrage
|
|
workflow: Aktuelles Workflow-Objekt
|
|
|
|
Returns:
|
|
Antwort des Projektleiters mit obj_final_documents, obj_workplan und obj_user_response
|
|
"""
|
|
# Verfügbare Agenten mit ihren Fähigkeiten abrufen
|
|
available_agents = self.agent_profiles()
|
|
|
|
# Erstelle eine Zusammenfassung des Workflows
|
|
workflow_summary = await self.workflow_summarize(workflow, message_user)
|
|
|
|
# Liste der aktuell verfügbaren Dokumente aus User-Input oder bereits generierten Dokumenten erstellen
|
|
available_documents = self.available_documents_get(workflow, message_user)
|
|
available_docs_str = json.dumps(available_documents, indent=2)
|
|
|
|
# Erstelle den Prompt für den Projektleiter
|
|
prompt = f"""
|
|
Based on the user request and the provided documents, please analyze the requirements and create a processing plan.
|
|
|
|
<userrequest>
|
|
{message_user.get('content')}
|
|
</userrequest>
|
|
|
|
# Previous conversation history:
|
|
|
|
{workflow_summary}
|
|
|
|
|
|
# Available documents (currently in workflow):
|
|
|
|
{available_docs_str}
|
|
|
|
|
|
# Available agents and their capabilities:
|
|
|
|
{self.parse_json2text(available_agents)}
|
|
|
|
|
|
Please analyze the request and create:
|
|
|
|
1. A list of required result documents (obj_final_documents)
|
|
2. A plan for executing agents (obj_workplan)
|
|
3. A clear response to the user explaining what you're doing (obj_user_response)
|
|
|
|
## IMPORTANT RULES FOR THE WORKPLAN:
|
|
1. Each input document must either already exist (provided by the user or previously created by an agent) or be created by an agent before it's used.
|
|
2. If necessary, convert input documents to a suitable format using agents when the type doesn't match.
|
|
3. Do not define document inputs that don't exist or haven't been generated beforehand.
|
|
4. Create a logical sequence - earlier agents can create documents that are later used as inputs.
|
|
5. If the user has provided documents but hasn't clearly stated what they want, try to act according to the context.
|
|
|
|
Your answer must be strictly in the JSON_OUTPUT format, with no additions before or after the JSON object.
|
|
|
|
JSON_OUTPUT = {{
|
|
"obj_final_documents": [
|
|
FILEREF
|
|
],
|
|
"obj_workplan": [
|
|
{{
|
|
"agent": "agent_name", # Name of an available agent
|
|
"prompt": "Specific instructions to the agent, that he knows what to do with which documents and which output to provide."
|
|
"output_documents": [
|
|
"label":"document label in the format 'filename.ext'",
|
|
"prompt":"AI prompt to describe the content of the file"
|
|
],
|
|
"input_documents": [
|
|
"label":"document label in the format 'filename.ext'",
|
|
"file_id":id, # if refering to an existing document, provide file_id to select the correct file
|
|
"content_part":"", # provide empty string, if all document contents to consider, otherwise the content_part of the document to focus on
|
|
"prompt":"AI prompt to describe what data to extract from the file."
|
|
], # If no input documents are needed, include "input_documents" as an empty list
|
|
}}
|
|
# Multiple agent tasks can be added here and should build logically on each other
|
|
],
|
|
"obj_user_response": "Information to the user about how his request will be solved."
|
|
}}
|
|
|
|
## RULES for input_documents:
|
|
1. The user request refers to documents where "file_source" in available documents is "user". Those documents are in the focus for input
|
|
2. In case of redundant label in available documents, use document with highest sequence_nr if not specified differently
|
|
|
|
## STRICT RULES FOR document "label":
|
|
1. Every document label MUST include a proper file extension that matches the content type.
|
|
2. Use standard extensions like:
|
|
- ".txt" for text files
|
|
- ".md" for markdown files
|
|
- ".csv" for comma-separated values
|
|
- ".json" for JSON data
|
|
- ".html" for HTML content
|
|
- ".jpg" or ".png" for images
|
|
- ".docx" for Word documents
|
|
- ".xlsx" for Excel files
|
|
- ".pdf" for PDF documents
|
|
3. Use descriptive filenames that indicate the document's purpose (e.g., "analysis_report.txt" rather than just "report.txt")
|
|
4. If you use label for an existing file
|
|
"""
|
|
|
|
# Rufe den AI-Service auf, um die Antwort des Projektleiters zu erhalten
|
|
logger.debug(f"Planning prompt: {prompt}")
|
|
project_manager_output = await self.ai_service.call_api([
|
|
{
|
|
"role": "system",
|
|
"content": "You are an experienced project manager who analyzes user requests and creates work plans. You pay very careful attention to ensure that all document dependencies are correct and that no non-existent documents are defined as inputs. The output follows strictly the specified format."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": prompt
|
|
}
|
|
])
|
|
|
|
# Parsen der JSON-Antwort
|
|
return self.parse_json_response(project_manager_output)
|
|
|
|
def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Integriert Benutzereingaben in ein Message-Objekt inklusive Dateien mit vollständigen Inhalten.
|
|
|
|
Args:
|
|
chat_message: Eingabedaten "prompt"=str, "list_file_id"=[]
|
|
|
|
Returns:
|
|
Message-Objekt mit Inhalt und Dokumenten samt Inhalten
|
|
"""
|
|
logger.info(f"Message from {role} {agent_name} sent with {len(chat_message.get('list_file_id', []))} documents")
|
|
logger.debug(f"message = {self.parse_json2text(chat_message)}.")
|
|
|
|
# Nachrichteninhalt überprüfen
|
|
message_content = chat_message.get("prompt", "")
|
|
if isinstance(message_content, dict) and "content" in message_content:
|
|
message_content = message_content["content"]
|
|
|
|
# Wenn Nachrichteninhalt leer ist, kein Chat
|
|
if role=="user" and (message_content is None or message_content.strip() == ""):
|
|
logger.warning(f"Empty message, no chat")
|
|
message_content = "(No user input received)"
|
|
|
|
# Zusätzliche Dateien verarbeiten mit vollständigen Inhalten
|
|
additional_fileids = chat_message.get("list_file_id", [])
|
|
additional_files = self.process_file_ids(additional_fileids)
|
|
|
|
# Nachrichtenobjekt erstellen
|
|
message_object = {
|
|
"role": role,
|
|
"agent_name": agent_name,
|
|
"content": message_content,
|
|
"documents": additional_files
|
|
}
|
|
|
|
message_object=self.message_add(workflow, message_object)
|
|
logger.debug(f"message_user = {self.parse_json2text(message_object)}.")
|
|
return message_object
|
|
|
|
def chat_final_message(self, obj_user_response: str, obj_results: List[Dict[str, Any]],
|
|
obj_final_documents: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""
|
|
Creates the final response message with documents corresponding to obj_final_documents.
|
|
|
|
Args:
|
|
obj_user_response: Text response to the user
|
|
obj_results: List of generated result documents
|
|
obj_final_documents: List of expected response documents
|
|
|
|
Returns:
|
|
Complete message object with content and relevant documents
|
|
"""
|
|
# Create basic message structure
|
|
final_message = {
|
|
"role": "assistant",
|
|
"agent_name": "project_manager",
|
|
"content": obj_user_response,
|
|
"documents": []
|
|
}
|
|
|
|
# Find documents that match the obj_final_documents requirements
|
|
matching_documents = []
|
|
doc_references = []
|
|
|
|
for answer_spec in obj_final_documents:
|
|
answer_label = answer_spec.get("label")
|
|
|
|
# Find matching document in results
|
|
for doc in obj_results:
|
|
doc_name=self.get_filename(doc)
|
|
# Check if this document matches the answer specification
|
|
if doc_name == answer_label:
|
|
matching_documents.append(doc)
|
|
doc_type = answer_spec.get("doc_type", "Document")
|
|
doc_references.append(f"- {doc_name} ({doc_type})")
|
|
break
|
|
|
|
# Add matching documents to the final message
|
|
final_message["documents"] = matching_documents
|
|
|
|
# Add document references to the content if there are any
|
|
if doc_references:
|
|
doc_list = "\n".join(doc_references)
|
|
final_message["content"] += f"\n\nCreated documents:\n{doc_list}"
|
|
|
|
return final_message
|
|
|
|
|
|
### Workflow
|
|
|
|
def workflow_init(self, workflow_id: Optional[str] = None) -> Dict[str, Any]:
|
|
"""
|
|
Initialisiert einen Workflow oder lädt einen bestehenden mit Rundenzählung.
|
|
|
|
Args:
|
|
workflow_id: Optional - ID des zu ladenden Workflows
|
|
|
|
Returns:
|
|
Initialisiertes Workflow-Objekt
|
|
"""
|
|
current_time = datetime.now().isoformat()
|
|
|
|
if workflow_id is None or not self.lucy_interface.get_workflow(workflow_id):
|
|
# Neuen Workflow erstellen
|
|
new_workflow_id = str(uuid.uuid4()) if workflow_id is None else workflow_id
|
|
workflow = {
|
|
"id": new_workflow_id,
|
|
"mandate_id": self.mandate_id,
|
|
"user_id": self.user_id,
|
|
"name": f"Workflow {new_workflow_id[:8]}",
|
|
"started_at": current_time,
|
|
"messages": [],
|
|
"logs": [],
|
|
"data_stats": {},
|
|
"current_round": 1,
|
|
"status": "running",
|
|
"last_activity": current_time,
|
|
"waiting_for_user": False
|
|
}
|
|
|
|
# In Datenbank speichern
|
|
self.lucy_interface.create_workflow(workflow)
|
|
return workflow
|
|
else:
|
|
# Bestehenden Workflow laden
|
|
workflow = self.lucy_interface.load_workflow_state(workflow_id)
|
|
|
|
# Status aktualisieren und Rundenzähler inkrementieren
|
|
workflow["status"] = "running"
|
|
workflow["last_activity"] = current_time
|
|
workflow["waiting_for_user"] = False
|
|
|
|
# Inkrementiere current_round, wenn sie existiert, sonst setze sie auf 1
|
|
if "current_round" in workflow:
|
|
workflow["current_round"] += 1
|
|
else:
|
|
workflow["current_round"] = 1
|
|
|
|
# In Datenbank aktualisieren
|
|
self.lucy_interface.save_workflow_state(workflow)
|
|
return workflow
|
|
|
|
async def workflow_summarize(self, workflow: Dict[str, Any], message_user: Dict[str, Any]) -> str:
|
|
"""
|
|
Erstellt eine Zusammenfassung des Workflows ohne die aktuelle User-Message.
|
|
|
|
Args:
|
|
workflow: Workflow-Objekt
|
|
prompt: Anweisungen zur Erstellung der Zusammenfassung
|
|
|
|
Returns:
|
|
Zusammenfassung des Workflows
|
|
"""
|
|
if not workflow or "messages" not in workflow or not workflow["messages"]:
|
|
return "" # die erste Message
|
|
|
|
# Nachrichten in umgekehrter Reihenfolge durchgehen (neueste zuerst)
|
|
messages = sorted(workflow["messages"], key=lambda m: m.get("sequence_no", 0), reverse=False)
|
|
|
|
summary_parts = []
|
|
for message in messages:
|
|
if True: # including user message, excluding would be: message["id"] != message_user["id"]:
|
|
message_summary = await self.message_summarize(message)
|
|
summary_parts.append(message_summary)
|
|
|
|
return "\n\n".join(summary_parts)
|
|
|
|
def workflow_finish(self, workflow: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Finalisiert einen Workflow und setzt den Status auf 'stopped'.
|
|
|
|
Args:
|
|
workflow: Workflow-Objekt
|
|
|
|
Returns:
|
|
Aktualisiertes Workflow-Objekt
|
|
"""
|
|
workflow["status"] = "completed"
|
|
workflow["last_activity"] = datetime.now().isoformat()
|
|
workflow["waiting_for_user"] = True
|
|
|
|
# In Datenbank speichern
|
|
self.lucy_interface.save_workflow_state(workflow)
|
|
return workflow
|
|
|
|
|
|
### Agents
|
|
|
|
def agent_profiles(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Ruft Informationen über alle verfügbaren Agenten ab.
|
|
|
|
Returns:
|
|
Liste mit Informationen über alle verfügbaren Agenten
|
|
"""
|
|
return self.agent_registry.get_agent_infos()
|
|
|
|
def agent_input_documents(self, doc_input_list: List[Dict[str, Any]], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Prepares input documents for an agent, sorted with newest first.
|
|
|
|
Args:
|
|
doc_input_list: List of required input documents as specified by the project manager
|
|
workflow: Workflow object
|
|
|
|
Returns:
|
|
Prepared input documents for the agent, sorted with newest first
|
|
"""
|
|
prepared_inputs = []
|
|
|
|
# Sortiere die Workflow-Nachrichten nach Sequenznummer (absteigend)
|
|
sorted_messages = sorted(
|
|
workflow.get("messages", []),
|
|
key=lambda m: m.get("sequence_no", 0),
|
|
reverse=True
|
|
)
|
|
|
|
for doc_spec in doc_input_list:
|
|
doc_filename = doc_spec.get("label","")
|
|
doc_file_id = doc_spec.get("file_id","")
|
|
|
|
found_doc = None
|
|
# Search for the document in sorted workflow messages (newest first)
|
|
for message in sorted_messages:
|
|
for doc in message.get("documents", []):
|
|
if (doc_file_id!="" and doc_file_id==doc.file_id) or (doc_filename!="" and self.get_filename(doc) == doc_filename):
|
|
found_doc = doc
|
|
break
|
|
if found_doc:
|
|
break
|
|
if found_doc:
|
|
# Process document for agent based on the specification
|
|
processed_doc = self.process_document_for_agent(found_doc, doc_spec)
|
|
prepared_inputs.append(processed_doc)
|
|
else:
|
|
logger.warning(f"Document with label '{doc_filename}', file_id '{doc_file_id}' not found in workflow")
|
|
|
|
return prepared_inputs
|
|
|
|
async def process_document_for_agent(self, document: Dict[str, Any], doc_spec: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Processes a document for an agent based on the document specification.
|
|
Uses AI to extract relevant content from the document based on the specification.
|
|
|
|
Args:
|
|
document: The document to process
|
|
doc_spec: The document specification from the project manager
|
|
|
|
Returns:
|
|
Processed document with AI-extracted content
|
|
"""
|
|
processed_doc = document.copy()
|
|
part_spec = doc_spec.get("content_part","")
|
|
|
|
# Process each content item in the document
|
|
if "contents" in processed_doc:
|
|
processed_contents = []
|
|
|
|
for content in processed_doc["contents"]:
|
|
|
|
# Check if part required
|
|
if part_spec != "" and part_spec != content.get("name"):
|
|
continue
|
|
|
|
# Get the data from the content
|
|
data = content.get("data", "")
|
|
processed_content = content.copy()
|
|
|
|
try:
|
|
# Use the AI service to process the document content according to the prompt from the project manager for the document specification
|
|
summary = doc_spec.get("prompt", "Extract the relevant information from this document")
|
|
ai_prompt = f"""
|
|
# Please process the following document content according to this instruction:
|
|
<instruction>
|
|
{summary}
|
|
</instruction>
|
|
|
|
# Document content:
|
|
<data>
|
|
{data}
|
|
</data>
|
|
|
|
# Extract and provide only the relevant information as requested.
|
|
"""
|
|
|
|
# Call the AI service to process the content
|
|
processed_data = await self.ai_service.call_api([
|
|
{"role": "system", "content": "You are a document processing assistant. Extract only the relevant information as requested."},
|
|
{"role": "user", "content": ai_prompt}
|
|
])
|
|
|
|
# Update the processed content with the AI-processed data
|
|
processed_content["data"] = processed_data
|
|
processed_content["metadata"]["ai_processed"] = True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing document content with AI: {str(e)}")
|
|
# Fall back to original content if AI processing fails
|
|
|
|
processed_contents.append(processed_content)
|
|
|
|
processed_doc["contents"] = processed_contents
|
|
|
|
return processed_doc
|
|
|
|
async def agent_processing(self, task: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Process a single agent task from the workflow.
|
|
Optimized for the task-based approach where all agents implement process_task.
|
|
|
|
Args:
|
|
task: The task definition containing agent name, prompt, and document specifications
|
|
workflow: The current workflow object
|
|
|
|
Returns:
|
|
List of document objects created by the agent
|
|
"""
|
|
# Extract task information
|
|
agent_name = task.get("agent")
|
|
agent_prompt = task.get("prompt", "")
|
|
|
|
# Log the current step
|
|
output_labels = [d.get("label", "unknown") for d in task.get("output_documents", [])]
|
|
step_info = f"Agent '{agent_name}' to create {', '.join(output_labels)}."
|
|
self.log_add(workflow, step_info)
|
|
|
|
# Check if prompt is empty
|
|
if agent_prompt == "":
|
|
logger.warning("Empty prompt, no task to do")
|
|
return []
|
|
|
|
# Get agent from registry
|
|
agent = self.agent_registry.get_agent(agent_name)
|
|
if not agent:
|
|
logger.error(f"Agent '{agent_name}' not found")
|
|
return []
|
|
|
|
# Prepare input documents for the agent
|
|
input_documents = self.agent_input_documents(task.get('input_documents', []), workflow)
|
|
|
|
# Prepare output document specifications
|
|
output_specs = []
|
|
for doc in task.get("output_documents", []):
|
|
output_spec = {
|
|
"label": doc.get("label"),
|
|
"description": doc.get("prompt", "")
|
|
}
|
|
output_specs.append(output_spec)
|
|
|
|
# Create a standardized task object for the agent
|
|
agent_task = {
|
|
"task_id": str(uuid.uuid4()),
|
|
"workflow_id": workflow.get("id"),
|
|
"prompt": agent_prompt,
|
|
"input_documents": input_documents,
|
|
"output_specifications": output_specs,
|
|
"context": {
|
|
"workflow_round": workflow.get("current_round", 1),
|
|
"agent_type": agent_name,
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
}
|
|
|
|
# Execute the agent with the standardized task
|
|
try:
|
|
# Process the task using the agent's standardized interface
|
|
agent_results = await agent.process_task(agent_task)
|
|
|
|
# Log the agent response
|
|
self.log_add(
|
|
workflow,
|
|
f"Agent '{agent_name}' completed task. Feedback: {agent_results.get('feedback', 'No feedback provided')}"
|
|
)
|
|
|
|
# Store produced files and prepare input object for message
|
|
agent_inputs = {
|
|
"prompt": agent_results.get("feedback", ""),
|
|
"list_file_id": self.agent_save_documents(agent_results)
|
|
}
|
|
|
|
# Create a message in the workflow with the agent's response
|
|
agent_message = self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow)
|
|
logger.debug(f"Agent result = {self.parse_json2text(agent_message)}.")
|
|
|
|
return agent_message.get("documents", [])
|
|
|
|
except Exception as e:
|
|
error_msg = f"Error executing agent '{agent_name}': {str(e)}"
|
|
logger.error(error_msg)
|
|
self.log_add(workflow, error_msg, level="error")
|
|
return []
|
|
|
|
|
|
def agent_save_documents(self, agent_results: Dict[str, Any]) -> List[int]:
|
|
"""
|
|
Saves all documents from agent results as files and returns a list of file IDs.
|
|
Enhanced to handle the standardized document format from agents.
|
|
|
|
Args:
|
|
agent_results: Dictionary containing agent feedback and documents
|
|
|
|
Returns:
|
|
List of file IDs for the saved documents
|
|
"""
|
|
file_ids = []
|
|
|
|
# Extract documents from agent results
|
|
documents = agent_results.get("documents", [])
|
|
|
|
for doc in documents:
|
|
try:
|
|
# Extract label (filename) and content
|
|
label = doc.get("label", "unnamed_file.txt")
|
|
content = doc.get("content", "")
|
|
|
|
# Split label into name and extension
|
|
name, ext = os.path.splitext(label)
|
|
if ext.startswith('.'):
|
|
ext = ext[1:] # Remove leading dot
|
|
elif not ext:
|
|
# If no extension is provided, default to .txt for text content
|
|
ext = "txt"
|
|
label = f"{label}.{ext}"
|
|
|
|
# Determine if content is base64 encoded
|
|
is_base64 = False
|
|
if not isinstance(content, bytes):
|
|
# Check if content might be base64 encoded
|
|
try:
|
|
if content and isinstance(content, str):
|
|
# Check for base64 pattern (simplified)
|
|
if (len(content) % 4 == 0 and
|
|
re.match(r'^[A-Za-z0-9+/]+={0,2}$', content)):
|
|
# Try to decode a small sample
|
|
sample = content[:100] if len(content) > 100 else content
|
|
base64.b64decode(sample)
|
|
is_base64 = True
|
|
except Exception:
|
|
# Not base64, treat as regular text
|
|
is_base64 = False
|
|
|
|
# If content has metadata flag indicating it's base64
|
|
if isinstance(content, dict) and content.get("_is_base64", False):
|
|
is_base64 = True
|
|
content = content.get("data", "")
|
|
|
|
# Convert content to bytes
|
|
if isinstance(content, str):
|
|
if is_base64:
|
|
# Decode base64 to bytes
|
|
try:
|
|
file_content = base64.b64decode(content)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to decode base64 content: {str(e)}")
|
|
file_content = content.encode('utf-8')
|
|
else:
|
|
# Convert text to bytes
|
|
file_content = content.encode('utf-8')
|
|
else:
|
|
# Already bytes
|
|
file_content = content
|
|
|
|
# Save file to database
|
|
file_meta = self.lucy_interface.save_uploaded_file(file_content, label)
|
|
|
|
if file_meta and "id" in file_meta:
|
|
file_id = file_meta["id"]
|
|
file_ids.append(file_id)
|
|
logger.info(f"Saved document '{label}' with file ID: {file_id}")
|
|
else:
|
|
logger.warning(f"Failed to save document '{label}'")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error saving document from agent results: {str(e)}")
|
|
# Continue with other documents instead of failing
|
|
continue
|
|
|
|
return file_ids
|
|
|
|
|
|
### Messages
|
|
|
|
def message_add(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Fügt eine Nachricht zum Workflow hinzu und aktualisiert last_activity.
|
|
|
|
Args:
|
|
workflow: Workflow-Objekt
|
|
message: Zu speichernde Nachricht
|
|
|
|
Returns:
|
|
ID der hinzugefügten Nachricht
|
|
"""
|
|
current_time = datetime.now().isoformat()
|
|
|
|
# Sicherstellen, dass Messages-Liste existiert
|
|
if "messages" not in workflow:
|
|
workflow["messages"] = []
|
|
|
|
# Neue Nachrichten-ID generieren, falls nicht vorhanden
|
|
if "id" not in message:
|
|
message["id"] = f"msg_{str(uuid.uuid4())}"
|
|
|
|
# Workflow-ID und Zeitstempel hinzufügen
|
|
message["workflow_id"] = workflow["id"]
|
|
message["started_at"] = current_time
|
|
message["finished_at"] = current_time
|
|
|
|
# Sequenznummer setzen
|
|
message["sequence_no"] = len(workflow["messages"]) + 1
|
|
|
|
# Status setzen
|
|
message["status"] = "completed"
|
|
|
|
# Message zum Workflow hinzufügen
|
|
workflow["messages"].append(message)
|
|
|
|
# Workflow-Status aktualisieren
|
|
workflow["last_activity"] = current_time
|
|
workflow["last_message_id"] = message["id"]
|
|
|
|
# In Datenbank speichern
|
|
self.lucy_interface.create_workflow_message(message)
|
|
|
|
return message
|
|
|
|
async def message_summarize(self, message: Dict[str, Any]) -> str:
|
|
"""
|
|
Erstellt eine Zusammenfassung einer Nachricht einschließlich ihrer Dokumente.
|
|
|
|
Args:
|
|
message: Zu summarisierende Nachricht
|
|
prompt: Anweisungen zur Erstellung der Zusammenfassung
|
|
|
|
Returns:
|
|
Zusammenfassung der Nachricht
|
|
"""
|
|
role = message.get("role", "undefined")
|
|
agent_name = message.get("agent_name", "")
|
|
content = message.get("content", "")
|
|
|
|
try:
|
|
content_summary = await self.ai_service.call_api([
|
|
{"role": "system", "content": f"You are a chat message summarizer. Create a very concise summary (2-3 sentences, maximum 300 characters)"},
|
|
{"role": "user", "content": content}
|
|
])
|
|
except Exception as e:
|
|
logger.error(f"Fehler bei der Zusammenfassung: {str(e)}")
|
|
content_summary = content[:200] + "..."
|
|
|
|
# Dokumente zusammenfassen
|
|
docs_summary = ""
|
|
if "documents" in message and message["documents"]:
|
|
docs_list = []
|
|
for i, doc in enumerate(message["documents"]):
|
|
doc_name = self.get_filename(doc)
|
|
docs_list.append(doc_name)
|
|
if docs_list:
|
|
docs_summary = f"\nDocuments:\n{'- '.join(docs_list)}"
|
|
|
|
return f"[{role} {agent_name}]: {content_summary}{docs_summary}"
|
|
|
|
async def message_summarize_content(self, content: Dict[str, Any]) -> str:
|
|
"""
|
|
Generates a summary for a content item using AI.
|
|
|
|
Args:
|
|
content: Content item to summarize (already processed by get_document_contents)
|
|
|
|
Returns:
|
|
Brief summary of the content
|
|
"""
|
|
# Extract relevant information
|
|
data = content.get("data", "")
|
|
content_type = content.get("content_type", "text/plain")
|
|
is_text = content.get("metadata", {}).get("is_text", False)
|
|
|
|
try:
|
|
summary = await self.ai_service.call_api([
|
|
{"role": "system", "content": "You are a content summarizer. Create very concise summary (1-2 sentences, maximum 200 characters) about this file."},
|
|
{"role": "user", "content": f"Summarize this {content_type} content briefly:\n\n{data}"}
|
|
])
|
|
return summary
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating content summary: {str(e)}")
|
|
return f"Text content ({content_type})"
|
|
|
|
|
|
### Documents
|
|
|
|
async def process_file_ids(self, file_ids: List[int]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Processes a list of File-IDs and returns the corresponding file objects as a list of Document objects.
|
|
Loads all contents directly and adds summaries to each content item.
|
|
|
|
Args:
|
|
file_ids: List of file IDs
|
|
|
|
Returns:
|
|
List of Document objects with contents and summaries
|
|
"""
|
|
documents = []
|
|
logger.info(f"Processing {len(file_ids)} files")
|
|
|
|
for file_id in file_ids:
|
|
try:
|
|
# Check if the file exists
|
|
file = self.lucy_interface.get_file(file_id)
|
|
if not file:
|
|
logger.warning(f"File with ID {file_id} not found")
|
|
continue
|
|
|
|
# Check if file belongs to the current mandate
|
|
if file.get("mandate_id") != self.mandate_id:
|
|
logger.warning(f"File {file_id} does not belong to mandate {self.mandate_id}")
|
|
continue
|
|
|
|
# Create document
|
|
file_name_ext = file.get("name")
|
|
document = {
|
|
"id": f"doc_{str(uuid.uuid4())}",
|
|
"file_id": file_id,
|
|
"name": os.path.splitext(file_name_ext)[0] if os.path.splitext(file_name_ext)[0] else "noname",
|
|
"ext": os.path.splitext(file_name_ext)[1][1:] if os.path.splitext(file_name_ext)[1] else "bin",
|
|
"contents": []
|
|
}
|
|
|
|
# Load contents immediately
|
|
file_content = self.lucy_interface.get_file_data(file_id)
|
|
if file_content is not None:
|
|
# Extract contents with the external function
|
|
contents = get_document_contents(file, file_content)
|
|
|
|
# Add summaries to each content item
|
|
for content in contents:
|
|
content["summary"] = await self.message_summarize_content(content)
|
|
|
|
document["contents"] = contents
|
|
|
|
logger.info(f"File {file.get('name', 'unnamed')} (ID: {file_id}) loaded with {len(contents)} contents and summaries")
|
|
else:
|
|
logger.warning(f"No content found for file with ID {file_id}")
|
|
|
|
documents.append(document)
|
|
except Exception as e:
|
|
logger.error(f"Error processing file {file_id}: {str(e)}")
|
|
# Continue with remaining files instead of failing
|
|
continue
|
|
|
|
return documents
|
|
|
|
def available_documents_get(self, workflow: Dict[str, Any], message_user: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Determines all currently available documents from user input and already generated documents.
|
|
|
|
Args:
|
|
message_user: Current message from the user
|
|
workflow: Current workflow object
|
|
|
|
Returns:
|
|
List with information about all available documents, sorted by message sequence_nr in descending order
|
|
"""
|
|
available_docs = []
|
|
|
|
if "messages" in workflow and workflow["messages"]:
|
|
for message in workflow["messages"]:
|
|
message_id = message.get("id", "unknown")
|
|
sequence_nr = message.get("sequence_no", 0)
|
|
|
|
# Determine source
|
|
source = "user" if message_id == message_user.get("id") else "workflow"
|
|
|
|
# Process documents in this message
|
|
if "documents" in message and message["documents"]:
|
|
for doc in message["documents"]:
|
|
# Get filename using our helper method
|
|
filename = self.get_filename(doc)
|
|
file_id = doc.get("file_id")
|
|
|
|
# Extract summaries from all contents
|
|
content_summaries = []
|
|
for content in doc.get("contents", []):
|
|
content_summaries.append({
|
|
"content_part": content.get("name","noname"),
|
|
"metadata": content.get("metadata",""),
|
|
"summary": content.get("summary","No summary"),
|
|
})
|
|
|
|
# Create document info
|
|
doc_info = {
|
|
"sequence_nr": sequence_nr,
|
|
"file_source": source,
|
|
"file_id": file_id,
|
|
"message_id": message_id,
|
|
"label": filename,
|
|
"content_summary_list": content_summaries,
|
|
}
|
|
available_docs.append(doc_info)
|
|
|
|
# Sort by message sequence_nr in descending order (newest first)
|
|
available_docs.sort(key=lambda x: x["sequence_nr"], reverse=True)
|
|
|
|
logger.info(f"Available documents: {len(available_docs)}")
|
|
return available_docs
|
|
|
|
def save_document_to_file(self, document: Dict[str, Any]) -> Optional[int]:
|
|
"""
|
|
Speichert ein Document als Datei in der Datenbank und gibt die File-ID zurück.
|
|
|
|
Args:
|
|
document: Document-Objekt mit Inhalten
|
|
|
|
Returns:
|
|
File-ID oder None bei Fehler
|
|
"""
|
|
try:
|
|
if not document or "contents" not in document or not document["contents"]:
|
|
logger.warning("Dokument hat keine Inhalte zum Speichern")
|
|
return None
|
|
|
|
# Nimm den ersten Inhalt als Hauptinhalt
|
|
main_content = document["contents"][0]
|
|
name = main_content.get("name", "document")
|
|
content_type = main_content.get("content_type", "text/plain")
|
|
data = main_content.get("data", b"")
|
|
|
|
# Binäre Daten sicherstellen
|
|
if isinstance(data, str):
|
|
data = data.encode('utf-8')
|
|
|
|
# Datei in der Datenbank speichern
|
|
file_meta = self.lucy_interface.save_uploaded_file(data, name)
|
|
if file_meta and "id" in file_meta:
|
|
# Aktualisiere das Document mit der File-ID
|
|
document["file_id"] = file_meta["id"]
|
|
return file_meta["id"]
|
|
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Fehler beim Speichern des Dokuments als Datei: {str(e)}")
|
|
return None
|
|
|
|
def add_document_to_message(self, message: Dict[str, Any], document: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Fügt ein Document zu einer Nachricht hinzu.
|
|
|
|
Args:
|
|
message: Nachricht, zu der das Dokument hinzugefügt werden soll
|
|
document: Hinzuzufügendes Document
|
|
|
|
Returns:
|
|
Aktualisierte Nachricht
|
|
"""
|
|
# Sicherstellen, dass die Dokumente-Liste existiert
|
|
if "documents" not in message:
|
|
message["documents"] = []
|
|
|
|
# Document hinzufügen
|
|
message["documents"].append(document)
|
|
|
|
return message
|
|
|
|
|
|
### Tools
|
|
|
|
def get_filename(self, document: Dict[str, Any]) -> str:
|
|
name = document.get("name", "unnamed")
|
|
ext = document.get("ext", "")
|
|
if ext:
|
|
return f"{name}.{ext}"
|
|
return name
|
|
|
|
def log_add(self, workflow: Dict[str, Any], message: str, level: str = "info",
|
|
agent_id: Optional[str] = None, agent_name: Optional[str] = None) -> str:
|
|
"""
|
|
Fügt einen Log-Eintrag zum Workflow hinzu und loggt diesen auch im Logger.
|
|
|
|
Args:
|
|
workflow: Workflow-Objekt
|
|
message: Log-Nachricht
|
|
level: Log-Level (info, warning, error)
|
|
agent_id: Optional - ID des Agenten
|
|
agent_name: Optional - Name des Agenten
|
|
|
|
Returns:
|
|
ID des erstellten Log-Eintrags
|
|
"""
|
|
# Sicherstellen, dass Logs-Liste existiert
|
|
if "logs" not in workflow:
|
|
workflow["logs"] = []
|
|
|
|
# Log-ID generieren
|
|
log_id = f"log_{str(uuid.uuid4())}"
|
|
|
|
# Log-Eintrag erstellen
|
|
log_entry = {
|
|
"id": log_id,
|
|
"workflow_id": workflow["id"],
|
|
"message": message,
|
|
"type": level,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"agent_id": agent_id,
|
|
"agent_name": agent_name
|
|
}
|
|
|
|
# Log zum Workflow hinzufügen
|
|
workflow["logs"].append(log_entry)
|
|
|
|
# In Datenbank speichern
|
|
self.lucy_interface.create_workflow_log(log_entry)
|
|
|
|
# Auch im Logger loggen
|
|
if level == "info":
|
|
logger.info(f"Workflow {workflow['id']}: {message}")
|
|
elif level == "warning":
|
|
logger.warning(f"Workflow {workflow['id']}: {message}")
|
|
elif level == "error":
|
|
logger.error(f"Workflow {workflow['id']}: {message}")
|
|
|
|
return log_id
|
|
|
|
def parse_json2text(self, json_obj: Any) -> str:
|
|
"""
|
|
Konvertiert ein JSON-Objekt in eine lesbare Textdarstellung.
|
|
|
|
Args:
|
|
json_obj: Zu konvertierendes JSON-Objekt
|
|
|
|
Returns:
|
|
Formatierte Textdarstellung
|
|
"""
|
|
if not json_obj:
|
|
return "Keine Daten vorhanden"
|
|
|
|
try:
|
|
# Formatieren mit Einrückung für bessere Lesbarkeit
|
|
return json.dumps(json_obj, indent=2, ensure_ascii=False)
|
|
except Exception as e:
|
|
logger.error(f"Fehler bei JSON-Konvertierung: {str(e)}")
|
|
return str(json_obj)
|
|
|
|
def parse_json_response(self, response_text: str) -> Dict[str, Any]:
|
|
"""
|
|
Parst die JSON-Antwort aus einem Text.
|
|
|
|
Args:
|
|
response_text: Text mit JSON-Inhalt
|
|
|
|
Returns:
|
|
Geparste JSON-Daten
|
|
"""
|
|
try:
|
|
# Extrahiere JSON aus dem Text (falls mit anderen Inhalten vermischt)
|
|
json_start = response_text.find('{')
|
|
json_end = response_text.rfind('}') + 1
|
|
|
|
if json_start >= 0 and json_end > json_start:
|
|
json_str = response_text[json_start:json_end]
|
|
return json.loads(json_str)
|
|
else:
|
|
# Versuche den gesamten Text zu parsen
|
|
return json.loads(response_text)
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"JSON-Parse-Fehler: {str(e)}")
|
|
# Fallback: Leere Struktur zurückgeben
|
|
return {
|
|
"obj_final_documents": [],
|
|
"obj_workplan": [],
|
|
"obj_user_response": "Sorry, I could not parse your data."
|
|
}
|
|
|
|
|
|
# Singleton-Factory für den ChatManager
|
|
_chat_managers = {}
|
|
|
|
def get_chat_manager(mandate_id: int = 0, user_id: int = 0) -> ChatManager:
|
|
"""
|
|
Gibt einen ChatManager für den angegebenen Kontext zurück.
|
|
Wiederverwendet bestehende Instanzen.
|
|
|
|
Args:
|
|
mandate_id: ID des Mandanten
|
|
user_id: ID des Benutzers
|
|
|
|
Returns:
|
|
ChatManager-Instanz
|
|
"""
|
|
context_key = f"{mandate_id}_{user_id}"
|
|
if context_key not in _chat_managers:
|
|
_chat_managers[context_key] = ChatManager(mandate_id, user_id)
|
|
return _chat_managers[context_key] |