gateway/results/workflows/workflow_5ccfa607-22ce-4d17-8a09-b0f384021758.json
2025-04-16 21:42:26 +02:00

1218 lines
No EOL
264 KiB
JSON

{
"id": "5ccfa607-22ce-4d17-8a09-b0f384021758",
"mandate_id": 1,
"user_id": 1,
"name": "Workflow 16.4.2025, 01:46:24",
"status": "completed",
"started_at": "2025-04-16T01:46:28.695423",
"last_activity": "2025-04-16T01:49:18.989640",
"prompt": "describe the file content",
"messages": [
{
"id": "msg_845095fe-0dd7-43ae-b3fb-6596bd5b4b6d",
"workflow_id": "5ccfa607-22ce-4d17-8a09-b0f384021758",
"parent_message_id": null,
"started_at": "2025-04-16T01:46:28.823393",
"finished_at": null,
"sequence_no": 1,
"status": "pending",
"role": "user",
"data_stats": {
"processing_time": 0.0,
"token_count": 0,
"bytes_sent": 0,
"bytes_received": 0
},
"documents": [
{
"id": "doc_4eeac488-9967-4da7-b46d-f84b0566c6e5",
"source": {
"type": "file",
"id": "file_3a389e62-8b6b-452f-9559-dcd00b763377",
"name": "auszug_liste_positionen.pdf",
"content_type": "application/pdf",
"size": 2,
"upload_date": "2025-04-16T01:46:28.939047"
},
"contents": [
{
"type": "text",
"text": "\n\n",
"is_extracted": true,
"extraction_context": null
}
]
}
],
"content": "describe the file content",
"agent_type": null
},
{
"id": "msg_7e131163-f972-41e6-be2c-6274d3af8d30",
"workflow_id": "5ccfa607-22ce-4d17-8a09-b0f384021758",
"parent_message_id": null,
"started_at": "2025-04-16T01:46:39.679376",
"finished_at": null,
"sequence_no": 2,
"status": "pending",
"role": "assistant",
"data_stats": {
"processing_time": 0.0,
"token_count": 0,
"bytes_sent": 0,
"bytes_received": 0
},
"documents": [],
"content": "I encountered an error while processing your request: 'NoneType' object is not subscriptable",
"agent_type": "creative",
"agent_id": "creative",
"agent_name": "Creative Knowledge Assistant",
"result_format": "Text,Document,Table"
},
{
"id": "msg_64562047-35a2-4514-8a97-1c95d1e8f2f4",
"workflow_id": "5ccfa607-22ce-4d17-8a09-b0f384021758",
"parent_message_id": null,
"started_at": "2025-04-16T01:46:44.040039",
"finished_at": null,
"sequence_no": 3,
"status": "pending",
"role": "assistant",
"data_stats": {
"processing_time": 0.0,
"token_count": 0,
"bytes_sent": 0,
"bytes_received": 0
},
"documents": [],
"content": "**Summary of Workflow Results: Document Processing**\n\n**Key Findings and Results:**\nDuring the execution of Activity 1, which involved processing documents according to specified requirements, an error was encountered. The error message indicated a 'NoneType' object is not subscriptable, suggesting an issue with handling a variable or data structure that was expected to contain data but was found to be empty or null.\n\n**Connection to Original Task:**\nThe error directly impacted the ability to complete the document processing task as intended. The task required handling and manipulating documents, but the encountered error suggests a failure in accessing or processing the necessary data, hindering the workflow's progress.\n\n**Conclusions and Recommendations:**\nTo resolve this issue, it is recommended to review the data handling and initialization processes within the document processing workflow. Ensure that all variables and data structures are properly initialized and contain the expected data before attempting to access or manipulate them. Implementing error handling mechanisms to catch and address such issues proactively could prevent similar disruptions in future executions.",
"agent_type": "summary",
"agent_id": "workflow_summary",
"agent_name": "Workflow Summary",
"result_format": "Text",
"workflow_complete": true
},
{
"id": "msg_632b1425-cf27-4375-af47-393b809aa69f",
"workflow_id": "5ccfa607-22ce-4d17-8a09-b0f384021758",
"parent_message_id": null,
"started_at": "2025-04-16T01:48:44.817593",
"finished_at": null,
"sequence_no": 4,
"status": "pending",
"role": "user",
"data_stats": {
"processing_time": 0.0,
"token_count": 0,
"bytes_sent": 0,
"bytes_received": 0
},
"documents": [
{
"id": "doc_cdcd11eb-b4c5-4dda-b7f3-a6bfd6e77544",
"source": {
"type": "file",
"id": "file_5feebbe9-e6be-42b0-b5de-ea961cc0594f",
"name": "agentservice_dataextraction.py",
"content_type": "text/x-python",
"size": 31676,
"upload_date": "2025-04-16T01:48:45.239146"
},
"contents": [
{
"type": "text",
"text": "\"\"\"\r\nRefactored helper function for intelligent data extraction (continued).\r\n\"\"\"\r\n\r\nimport logging\r\nimport json\r\nfrom typing import List, Dict, Any, Optional, Tuple\r\nimport asyncio\r\nfrom datetime import datetime\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\nasync def data_extraction(\r\n prompt: str, \r\n files: List[Dict[str, Any]], \r\n messages: List[Dict[str, Any]], \r\n ai_service,\r\n lucydom_interface = None,\r\n workflow_id: str = None,\r\n add_log_func = None,\r\n document_handler = None # Add this parameter\r\n) -> Dict[str, Any]:\r\n \"\"\"\r\n Performs AI-driven data extraction with support for the document handler.\r\n \r\n Args:\r\n prompt: Specification of what data to extract\r\n files: List of all available files with metadata\r\n messages: List of all messages in the workflow\r\n ai_service: Service for AI requests\r\n lucydom_interface: Interface for database access (optional)\r\n workflow_id: Optional workflow ID for logging\r\n add_log_func: Optional function for adding logs\r\n document_handler: Optional document handler for structured document operations\r\n \r\n Returns:\r\n Structured text object with extracted data and context information\r\n \"\"\"\r\n try:\r\n # Create extraction plan using AI\r\n extraction_plan = await _create_extraction_plan(prompt, files, messages, ai_service, workflow_id, add_log_func)\r\n \r\n # Execute extractions, preferring document handler if available\r\n if document_handler:\r\n extracted_data = await _execute_extractions_with_handler(\r\n extraction_plan,\r\n files,\r\n messages,\r\n document_handler,\r\n ai_service,\r\n workflow_id,\r\n add_log_func\r\n )\r\n else:\r\n # Fall back to original implementation\r\n extracted_data = await _execute_extractions(\r\n extraction_plan,\r\n files,\r\n messages,\r\n lucydom_interface,\r\n ai_service,\r\n workflow_id,\r\n add_log_func\r\n )\r\n \r\n # Structure extracted data\r\n structured_result = _structure_extracted_data(extracted_data, files, prompt)\r\n \r\n return structured_result\r\n \r\n except Exception as e:\r\n logger.error(f\"Error in data extraction: {str(e)}\", exc_info=True)\r\n \r\n # Add error log\r\n if add_log_func and workflow_id:\r\n add_log_func(workflow_id, f\"Data extraction error: {str(e)}\", \"error\")\r\n \r\n # Return error result\r\n return {\r\n \"error\": str(e),\r\n \"status\": \"error\",\r\n \"files_processed\": len(files),\r\n \"message\": f\"Data extraction failed: {str(e)}\"\r\n }\r\n\r\n\r\nasync def _execute_extractions_with_handler(\r\n extraction_plan: List[Dict[str, Any]],\r\n files: List[Dict[str, Any]],\r\n messages: List[Dict[str, Any]],\r\n document_handler,\r\n ai_service,\r\n workflow_id: str = None,\r\n add_log_func = None\r\n) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Execute extractions using the document handler.\r\n \r\n Args:\r\n extraction_plan: List of extraction instructions\r\n files: List of all available files\r\n messages: List of all messages\r\n document_handler: Document handler for structured operations\r\n ai_service: Service for AI requests\r\n workflow_id: Optional workflow ID for logging\r\n add_log_func: Optional function for adding logs\r\n \r\n Returns:\r\n List with extracted data per file\r\n \"\"\"\r\n extracted_data = []\r\n \r\n # Sort by importance (highest first)\r\n sorted_plan = sorted(extraction_plan, key=lambda x: x.get(\"importance\", 0), reverse=True)\r\n \r\n for extraction_item in sorted_plan:\r\n file_id = extraction_item.get(\"file_id\")\r\n extract_needed = extraction_item.get(\"extract_needed\", False)\r\n extraction_prompt = extraction_item.get(\"extraction_prompt\", \"\")\r\n \r\n # Find file metadata\r\n file_metadata = next((f for f in files if f.get(\"id\") == file_id), None)\r\n \r\n if not file_metadata:\r\n logger.warning(f\"File with ID {file_id} not found\")\r\n continue\r\n \r\n file_name = file_metadata.get(\"name\", \"\")\r\n file_type = file_metadata.get(\"type\", \"\")\r\n content_type = file_metadata.get(\"content_type\", \"\")\r\n \r\n # Log\r\n if add_log_func and workflow_id:\r\n add_log_func(\r\n workflow_id, \r\n f\"Processing file: {file_name} (Extraction needed: {extract_needed})\", \r\n \"info\"\r\n )\r\n \r\n # Only perform extraction if needed\r\n if extract_needed:\r\n # Find document in existing messages if available\r\n existing_content = _find_document_in_messages(file_id, messages)\r\n \r\n # Check if we should use document handler for contextual extraction\r\n if existing_content:\r\n # If document exists but needs contextual extraction\r\n document_id = existing_content.get(\"document_id\")\r\n message_id = existing_content.get(\"message_id\")\r\n \r\n if document_id and message_id:\r\n # Find the message containing the document\r\n for message in messages:\r\n if message.get(\"id\") == message_id:\r\n # Extract content with context\r\n try:\r\n # Find document reference\r\n doc_reference = None\r\n for doc in message.get(\"documents\", []):\r\n if doc.get(\"id\") == document_id:\r\n doc_reference = doc\r\n break\r\n \r\n if doc_reference:\r\n # Use document handler to perform contextual extraction\r\n extracted_text = await document_handler.extract_document_content(\r\n document_id,\r\n file_id,\r\n extraction_prompt\r\n )\r\n \r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": extracted_text,\r\n \"is_extracted\": True,\r\n \"extraction_method\": \"contextual_extraction\"\r\n })\r\n \r\n if add_log_func and workflow_id:\r\n add_log_func(\r\n workflow_id, \r\n f\"Contextual extraction for {file_name}: {extraction_prompt}\", \r\n \"info\"\r\n )\r\n \r\n continue\r\n except Exception as e:\r\n logger.error(f\"Error in contextual extraction for {file_name}: {str(e)}\")\r\n \r\n # If we reach here, we need to perform a new extraction\r\n try:\r\n file_content = await document_handler.add_file_to_message(\r\n {}, # Empty message to extract just the document\r\n file_id,\r\n extraction_prompt\r\n )\r\n \r\n # Get the extracted content from the document\r\n if \"documents\" in file_content and file_content[\"documents\"]:\r\n doc = file_content[\"documents\"][0]\r\n content_text = \"\"\r\n is_extracted = False\r\n \r\n for content in doc.get(\"contents\", []):\r\n if content.get(\"type\") == \"text\":\r\n content_text = content.get(\"text\", \"\")\r\n is_extracted = content.get(\"is_extracted\", False)\r\n break\r\n \r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": content_text,\r\n \"is_extracted\": is_extracted,\r\n \"extraction_method\": \"document_handler\"\r\n })\r\n \r\n if add_log_func and workflow_id:\r\n add_log_func(\r\n workflow_id, \r\n f\"Extracted {file_name} using document handler\", \r\n \"info\"\r\n )\r\n else:\r\n # Extraction failed\r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": f\"Failed to extract content from {file_name}\",\r\n \"is_extracted\": False,\r\n \"extraction_method\": \"failed\"\r\n })\r\n except Exception as e:\r\n logger.error(f\"Error extracting {file_name}: {str(e)}\")\r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": f\"Error extracting: {str(e)}\",\r\n \"is_extracted\": False,\r\n \"extraction_method\": \"error\"\r\n })\r\n else:\r\n # No extraction needed, use existing content\r\n existing_content = _find_document_in_messages(file_id, messages)\r\n \r\n if existing_content:\r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": existing_content.get(\"content\", \"\"),\r\n \"is_extracted\": existing_content.get(\"is_extracted\", False),\r\n \"extraction_method\": \"existing_content\"\r\n })\r\n else:\r\n # No existing content found\r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": f\"No content available for {file_name}\",\r\n \"is_extracted\": False,\r\n \"extraction_method\": \"none\"\r\n })\r\n \r\n return extracted_data\r\n\r\n\r\ndef _find_document_in_messages(file_id: int, messages: List[Dict[str, Any]]) -> Dict[str, Any]:\r\n \"\"\"\r\n Find a document by file ID in workflow messages.\r\n \r\n Args:\r\n file_id: ID of the file to find\r\n messages: List of messages to search\r\n \r\n Returns:\r\n Dictionary with document information or empty dict if not found\r\n \"\"\"\r\n for message in messages:\r\n for doc_index, document in enumerate(message.get(\"documents\", [])):\r\n source = document.get(\"source\", {})\r\n \r\n # Check if file ID matches\r\n if source.get(\"id\") == str(file_id) or source.get(\"id\") == file_id:\r\n # Found the document\r\n content_text = \"\"\r\n is_extracted = False\r\n \r\n # Look for text content\r\n for content in document.get(\"contents\", []):\r\n if content.get(\"type\") == \"text\":\r\n content_text = content.get(\"text\", \"\")\r\n is_extracted = content.get(\"is_extracted\", False)\r\n break\r\n \r\n return {\r\n \"document_id\": document.get(\"id\"),\r\n \"message_id\": message.get(\"id\"),\r\n \"content\": content_text,\r\n \"is_extracted\": is_extracted\r\n }\r\n \r\n return {}\r\n\r\n\r\nasync def _create_extraction_plan(\r\n prompt: str, \r\n files: List[Dict[str, Any]], \r\n messages: List[Dict[str, Any]], \r\n ai_service,\r\n workflow_id: str = None,\r\n add_log_func = None\r\n) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Erstellt einen Extraktionsplan mit AI-Unterstützung.\r\n \r\n Args:\r\n prompt: Spezifizierung, welche Daten extrahiert werden sollen\r\n files: Liste aller verfügbaren Dateien mit Metadaten\r\n messages: Liste aller Nachrichten im Workflow\r\n ai_service: Service für KI-Anfragen\r\n workflow_id: Optionale ID des Workflows für Logging\r\n add_log_func: Optionale Funktion für das Hinzufügen von Logs\r\n \r\n Returns:\r\n Extraktionsplan (Liste von Extraktionsanweisungen pro Datei)\r\n \"\"\"\r\n # Erstelle Kontext-Informationen für den AI Call\r\n file_infos = []\r\n for file in files:\r\n # Basis-Metadaten\r\n file_info = {\r\n \"id\": file.get(\"id\", \"\"),\r\n \"name\": file.get(\"name\", \"\"),\r\n \"type\": file.get(\"type\", \"\"),\r\n \"content_type\": file.get(\"content_type\", \"\"),\r\n \"size\": file.get(\"size\", \"\")\r\n }\r\n \r\n # Extraktionsstatus prüfen (falls vorhanden)\r\n doc_contents = _extract_document_contents_from_messages(file.get(\"id\", \"\"), messages)\r\n \r\n if doc_contents:\r\n # Prüfen, ob mindestens ein Content mit is_extracted=True existiert\r\n already_extracted = any(\r\n content.get(\"is_extracted\", False) for content in doc_contents\r\n )\r\n file_info[\"already_extracted\"] = already_extracted\r\n \r\n # Eine kurze Vorschau des Inhalts hinzufügen (falls verfügbar)\r\n for content in doc_contents:\r\n if content.get(\"type\") == \"text\" and content.get(\"text\"):\r\n preview_text = content.get(\"text\", \"\")[:200] + \"...\" if len(content.get(\"text\", \"\")) > 200 else content.get(\"text\", \"\")\r\n file_info[\"content_preview\"] = preview_text\r\n break\r\n else:\r\n file_info[\"already_extracted\"] = False\r\n \r\n file_infos.append(file_info)\r\n \r\n # AI-Prompt erstellen\r\n extraction_prompt = f\"\"\"\r\n Du bist ein Datenextraktionsexperte, der mithilfe von KI-Analyse entscheidet, welche Dateien\r\n und Inhalte für eine bestimmte Aufgabe extrahiert werden müssen.\r\n\r\n AUFGABE:\r\n {prompt}\r\n\r\n VERFÜGBARE DATEIEN:\r\n {json.dumps(file_infos, indent=2)}\r\n\r\n Für jede Datei, die für die Aufgabe relevant ist, erstelle eine Extraktionsanweisung mit den folgenden Informationen:\r\n 1. file_id: Die ID der zu extrahierenden Datei\r\n 2. extract_needed: Boolean, ob eine Extraktion erforderlich ist (True, wenn die Datei noch nicht extrahiert wurde und für die Aufgabe benötigt wird)\r\n 3. extraction_prompt: Ein spezifischer Prompt für die Extraktion der Datei (besonders wichtig für Bilder und nicht-textbasierte Dateien)\r\n 4. importance: Priorität/Wichtigkeit für die Aufgabe (1-5, wobei 5 am wichtigsten ist)\r\n\r\n Format:\r\n [\r\n {{\r\n \"file_id\": 1234,\r\n \"extract_needed\": true,\r\n \"extraction_prompt\": \"Extrahiere die Tabellendaten mit Fokus auf die Umsatzzahlen\",\r\n \"importance\": 5\r\n }},\r\n ...\r\n ]\r\n\r\n Gib nur das JSON-Array zurück, ohne weitere Erklärungen.\r\n \"\"\"\r\n\r\n # Log hinzufügen\r\n if add_log_func and workflow_id:\r\n add_log_func(workflow_id, \"Extraktionsplan wird erstellt...\", \"info\")\r\n \r\n try:\r\n # AI-Call durchführen\r\n extraction_plan_response = await ai_service.call_api([{\"role\": \"user\", \"content\": extraction_prompt}])\r\n \r\n # JSON aus der Antwort extrahieren\r\n import re\r\n json_match = re.search(r'\\[.*\\]', extraction_plan_response, re.DOTALL)\r\n \r\n if json_match:\r\n extraction_plan = json.loads(json_match.group(0))\r\n \r\n # Log hinzufügen\r\n if add_log_func and workflow_id:\r\n add_log_func(\r\n workflow_id, \r\n f\"Extraktionsplan erstellt für {len(extraction_plan)} Dateien\", \r\n \"info\"\r\n )\r\n \r\n return extraction_plan\r\n else:\r\n # Fallback bei Parsing-Problemen\r\n if add_log_func and workflow_id:\r\n add_log_func(\r\n workflow_id, \r\n \"Parsing-Fehler beim Extraktionsplan, erstelle Standard-Plan\", \r\n \"warning\"\r\n )\r\n \r\n # Standard-Plan: Alle nicht extrahierten Dateien extrahieren\r\n default_plan = []\r\n for file in files:\r\n doc_contents = _extract_document_contents_from_messages(file.get(\"id\", \"\"), messages)\r\n already_extracted = any(\r\n content.get(\"is_extracted\", False) for content in doc_contents\r\n ) if doc_contents else False\r\n \r\n default_plan.append({\r\n \"file_id\": file.get(\"id\", 0),\r\n \"extract_needed\": not already_extracted,\r\n \"extraction_prompt\": f\"Extrahiere alle relevanten Informationen aus {file.get('name', '')}\",\r\n \"importance\": 3\r\n })\r\n \r\n return default_plan\r\n \r\n except Exception as e:\r\n logger.error(f\"Fehler bei der Erstellung des Extraktionsplans: {str(e)}\", exc_info=True)\r\n \r\n if add_log_func and workflow_id:\r\n add_log_func(\r\n workflow_id, \r\n f\"Fehler bei der Erstellung des Extraktionsplans: {str(e)}\", \r\n \"error\"\r\n )\r\n \r\n # Leerer Plan bei Fehlern\r\n return []\r\n \r\nasync def _execute_extractions(\r\n extraction_plan: List[Dict[str, Any]],\r\n files: List[Dict[str, Any]],\r\n messages: List[Dict[str, Any]], \r\n lucydom_interface,\r\n ai_service,\r\n workflow_id: str = None,\r\n add_log_func = None,\r\n logging_utils = None\r\n) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Execute the planned extractions.\r\n \r\n Args:\r\n extraction_plan: List of extraction instructions\r\n files: List of all available files\r\n lucydom_interface: Interface for database access\r\n ai_service: Service for AI requests\r\n workflow_id: Optional workflow ID for logging\r\n add_log_func: Optional function for adding logs\r\n logging_utils: Optional logging utility\r\n \r\n Returns:\r\n List with extracted data per file\r\n \"\"\"\r\n extracted_data = []\r\n \r\n # Sort by importance\r\n sorted_plan = sorted(extraction_plan, key=lambda x: x.get(\"importance\", 0), reverse=True)\r\n \r\n for extraction_item in sorted_plan:\r\n file_id = extraction_item.get(\"file_id\")\r\n extract_needed = extraction_item.get(\"extract_needed\", False)\r\n extraction_prompt = extraction_item.get(\"extraction_prompt\", \"\")\r\n \r\n # Find file metadata\r\n file_metadata = next((f for f in files if f.get(\"id\") == file_id), None)\r\n \r\n if not file_metadata:\r\n logger.warning(f\"File with ID {file_id} not found\")\r\n continue\r\n \r\n file_name = file_metadata.get(\"name\", \"\")\r\n file_type = file_metadata.get(\"type\", \"\")\r\n content_type = file_metadata.get(\"content_type\", \"\")\r\n \r\n # Add log\r\n if logging_utils:\r\n logging_utils.info(f\"Processing file: {file_name} (Extraction needed: {extract_needed})\", \"extraction\")\r\n elif add_log_func and workflow_id:\r\n add_log_func(\r\n workflow_id, \r\n f\"Processing file: {file_name} (Extraction needed: {extract_needed})\", \r\n \"info\"\r\n )\r\n \r\n # Only perform extraction if needed\r\n if extract_needed:\r\n # Get file content via LucyDOM interface\r\n if lucydom_interface:\r\n try:\r\n file_content = await lucydom_interface.read_file_content(file_id)\r\n \r\n if not file_content:\r\n if logging_utils:\r\n logging_utils.warning(f\"File {file_name} not found\", \"extraction\")\r\n elif add_log_func and workflow_id:\r\n add_log_func(workflow_id, f\"File {file_name} not found\", \"warning\")\r\n continue\r\n \r\n # Perform extraction based on file type\r\n if file_type == \"image\" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):\r\n # Image analysis with AI service\r\n if ai_service and hasattr(ai_service, \"analyze_image\"):\r\n try:\r\n image_analysis = await ai_service.analyze_image(\r\n image_data=file_content,\r\n prompt=extraction_prompt,\r\n mime_type=content_type\r\n )\r\n \r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": image_analysis,\r\n \"is_extracted\": True,\r\n \"extraction_method\": \"image_analysis\"\r\n })\r\n \r\n if logging_utils:\r\n logging_utils.info(f\"Image {file_name} successfully analyzed\", \"extraction\")\r\n elif add_log_func and workflow_id:\r\n add_log_func(workflow_id, f\"Image {file_name} successfully analyzed\", \"info\")\r\n except Exception as e:\r\n logger.error(f\"Error analyzing image {file_name}: {str(e)}\")\r\n if logging_utils:\r\n logging_utils.error(f\"Error analyzing image {file_name}: {str(e)}\", \"extraction\")\r\n elif add_log_func and workflow_id:\r\n add_log_func(workflow_id, f\"Error analyzing image {file_name}: {str(e)}\", \"error\")\r\n else:\r\n # Fallback if no image analysis available\r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": f\"Image: {file_name} (Analysis not available)\",\r\n \"is_extracted\": False,\r\n \"extraction_method\": \"none\"\r\n })\r\n else:\r\n # Text-based extraction for all other file types\r\n try:\r\n # Import directly here to avoid circular imports\r\n from modules.agentservice_utils import extract_text_from_file_content\r\n \r\n content, is_extracted = extract_text_from_file_content(\r\n file_content, file_name, content_type\r\n )\r\n \r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": content,\r\n \"is_extracted\": is_extracted,\r\n \"extraction_method\": \"text_extraction\"\r\n })\r\n \r\n if logging_utils:\r\n logging_utils.info(f\"File {file_name} extracted (Status: {is_extracted})\", \"extraction\")\r\n elif add_log_func and workflow_id:\r\n add_log_func(\r\n workflow_id, \r\n f\"File {file_name} extracted (Status: {is_extracted})\", \r\n \"info\"\r\n )\r\n except Exception as e:\r\n logger.error(f\"Error extracting text from {file_name}: {str(e)}\")\r\n if logging_utils:\r\n logging_utils.error(f\"Error extracting text from {file_name}: {str(e)}\", \"extraction\")\r\n elif add_log_func and workflow_id:\r\n add_log_func(workflow_id, f\"Error extracting text from {file_name}: {str(e)}\", \"error\")\r\n except Exception as e:\r\n logger.error(f\"Error reading file {file_name}: {str(e)}\")\r\n if logging_utils:\r\n logging_utils.error(f\"Error reading file {file_name}: {str(e)}\", \"extraction\")\r\n elif add_log_func and workflow_id:\r\n add_log_func(workflow_id, f\"Error reading file {file_name}: {str(e)}\", \"error\")\r\n else:\r\n logger.warning(f\"No LucyDOM interface available for file {file_name}\")\r\n if logging_utils:\r\n logging_utils.warning(f\"No LucyDOM interface available for file {file_name}\", \"extraction\")\r\n elif add_log_func and workflow_id:\r\n add_log_func(workflow_id, f\"No LucyDOM interface available for file {file_name}\", \"warning\")\r\n else:\r\n # No extraction needed, use existing content\r\n doc_contents = _extract_document_contents_from_messages(file_id, messages)\r\n \r\n if doc_contents:\r\n # Use first text content\r\n for content in doc_contents:\r\n if content.get(\"type\") == \"text\":\r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": content.get(\"text\", \"\"),\r\n \"is_extracted\": content.get(\"is_extracted\", False),\r\n \"extraction_method\": \"existing_content\"\r\n })\r\n break\r\n else:\r\n # No existing content found\r\n extracted_data.append({\r\n \"file_id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content\": f\"No content available for {file_name}\",\r\n \"is_extracted\": False,\r\n \"extraction_method\": \"none\"\r\n })\r\n \r\n return extracted_data\r\n\r\ndef _structure_extracted_data(\r\n extracted_data: List[Dict[str, Any]], \r\n files: List[Dict[str, Any]], \r\n prompt: str\r\n) -> Dict[str, Any]:\r\n \"\"\"\r\n Structure the extracted data into a formatted result.\r\n \r\n Args:\r\n extracted_data: List of extracted data per file\r\n files: List of all available files\r\n prompt: Original extraction prompt\r\n \r\n Returns:\r\n Structured result object\r\n \"\"\"\r\n # Create base structure\r\n result = {\r\n \"prompt\": prompt,\r\n \"files_processed\": len(extracted_data),\r\n \"total_files\": len(files),\r\n \"extraction_timestamp\": datetime.now().isoformat(),\r\n \"status\": \"success\",\r\n \"extracted_content\": []\r\n }\r\n \r\n # Add extracted content\r\n for data_item in extracted_data:\r\n # Enrich with file metadata\r\n file_id = data_item.get(\"file_id\", 0)\r\n file_metadata = next((f for f in files if f.get(\"id\") == file_id), {})\r\n \r\n content_item = {\r\n \"file_id\": file_id,\r\n \"name\": data_item.get(\"name\", file_metadata.get(\"name\", \"\")),\r\n \"type\": data_item.get(\"type\", file_metadata.get(\"type\", \"\")),\r\n \"content_type\": file_metadata.get(\"content_type\", \"\"),\r\n \"size\": file_metadata.get(\"size\", \"\"),\r\n \"is_extracted\": data_item.get(\"is_extracted\", False),\r\n \"extraction_method\": data_item.get(\"extraction_method\", \"\"),\r\n \"content\": data_item.get(\"content\", \"\")\r\n }\r\n \r\n result[\"extracted_content\"].append(content_item)\r\n \r\n return result\r\n\r\ndef _extract_document_contents_from_messages(file_id: int, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Extract document contents for a specific file from workflow messages.\r\n Enhanced to handle the new document structure.\r\n \r\n Args:\r\n file_id: ID of the file\r\n messages: List of all messages in the workflow\r\n \r\n Returns:\r\n List of document contents for the specified file\r\n \"\"\"\r\n contents = []\r\n \r\n for message in messages:\r\n # Search documents in the message\r\n for document in message.get(\"documents\", []):\r\n source = document.get(\"source\", {})\r\n \r\n # Check if file ID matches (handle both string and int comparison)\r\n if (source.get(\"id\") == file_id or \r\n (isinstance(source.get(\"id\"), str) and source.get(\"id\") == str(file_id)) or\r\n (isinstance(file_id, str) and source.get(\"id\") == file_id)):\r\n \r\n # Add contents of the file\r\n doc_contents = document.get(\"contents\", [])\r\n \r\n if doc_contents:\r\n # Ensure each content has document reference\r\n for content in doc_contents:\r\n content_copy = content.copy()\r\n content_copy[\"document_id\"] = document.get(\"id\")\r\n content_copy[\"message_id\"] = message.get(\"id\")\r\n contents.append(content_copy)\r\n \r\n return contents\r\n\r\ndef _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None):\r\n \"\"\"Helper function for logging with different log functions\"\"\"\r\n # Log via logger instance\r\n if log_type == \"error\":\r\n logger.error(message)\r\n elif log_type == \"warning\":\r\n logger.warning(message)\r\n else:\r\n logger.info(message)\r\n \r\n # Log via provided log function (if available)\r\n if add_log_func and workflow_id:\r\n add_log_func(workflow_id, message, log_type, agent_id, agent_name)",
"is_extracted": true,
"extraction_context": null
}
]
},
{
"id": "doc_dfbfe24f-303e-4d70-a087-725f88db3dd3",
"source": {
"type": "file",
"id": "file_779c1ea7-d11e-40c6-94c0-761d97cacb3e",
"name": "agentservice_document_handler.py",
"content_type": "text/x-python",
"size": 21838,
"upload_date": "2025-04-16T01:48:45.240111"
},
"contents": [
{
"type": "text",
"text": "\"\"\"\r\nEnhanced document handling module for the Agentservice (continued).\r\n\"\"\"\r\n\r\nimport os\r\nimport logging\r\nimport uuid\r\nfrom datetime import datetime\r\nfrom typing import List, Dict, Any, Optional, Tuple, Union\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\nclass DocumentHandler:\r\n \"\"\"\r\n Centralized document handler for consistent document management across the system.\r\n \"\"\"\r\n \r\n def __init__(self, workflow_id: str = None, lucydom_interface = None, ai_service = None):\r\n \"\"\"Initialize the document handler.\"\"\"\r\n self.workflow_id = workflow_id\r\n self.lucydom_interface = lucydom_interface\r\n self.ai_service = ai_service\r\n \r\n # Import necessary utilities\r\n from modules.agentservice_filemanager import get_file_manager\r\n self.file_manager = get_file_manager()\r\n \r\n def set_workflow_id(self, workflow_id: str):\r\n \"\"\"Set or update the workflow ID.\"\"\"\r\n self.workflow_id = workflow_id\r\n \r\n def set_lucydom_interface(self, lucydom_interface):\r\n \"\"\"Set or update the LucyDOM interface.\"\"\"\r\n self.lucydom_interface = lucydom_interface\r\n \r\n def set_ai_service(self, ai_service):\r\n \"\"\"Set or update the AI service.\"\"\"\r\n self.ai_service = ai_service\r\n \r\n async def add_file_to_message(self, message: Dict[str, Any], file_id: int, extraction_prompt: str = None) -> Dict[str, Any]:\r\n \"\"\"\r\n Add a file to a message with optional contextual extraction.\r\n \r\n Args:\r\n message: The message to add the file to\r\n file_id: ID of the file to add\r\n extraction_prompt: Optional prompt for contextual extraction (e.g., for images)\r\n \r\n Returns:\r\n Updated message with the file added\r\n \"\"\"\r\n if not self.lucydom_interface:\r\n logger.error(\"LucyDOM interface not available\")\r\n return message\r\n \r\n try:\r\n # Get file metadata\r\n file = self.lucydom_interface.get_file(file_id)\r\n if not file:\r\n logger.warning(f\"File with ID {file_id} not found\")\r\n return message\r\n \r\n # Get necessary file information\r\n file_name = file.get(\"name\", \"unnamed_file\")\r\n file_type = file.get(\"type\", \"unknown\")\r\n content_type = file.get(\"content_type\")\r\n \r\n # Initialize documents array if needed\r\n if \"documents\" not in message:\r\n message[\"documents\"] = []\r\n \r\n # Check if file is already in the message\r\n file_already_added = any(\r\n doc.get(\"source\", {}).get(\"id\") == str(file_id) \r\n for doc in message.get(\"documents\", [])\r\n )\r\n \r\n if file_already_added:\r\n logger.info(f\"File {file_name} already exists in message, skipping\")\r\n return message\r\n \r\n # Create a unique document ID\r\n doc_id = f\"doc_{uuid.uuid4()}\"\r\n \r\n # Create document structure\r\n document = {\r\n \"id\": doc_id,\r\n \"source\": {\r\n \"type\": \"file\",\r\n \"id\": str(file_id),\r\n \"name\": file_name,\r\n \"content_type\": content_type,\r\n \"size\": file.get(\"size\"),\r\n \"upload_date\": file.get(\"upload_date\", datetime.now().isoformat())\r\n },\r\n \"contents\": []\r\n }\r\n \r\n # Only read content if we have extraction prompt or specific types\r\n if (extraction_prompt or \r\n file_type in [\"document\", \"text\"] or \r\n (content_type and content_type.startswith(\"text/\"))):\r\n \r\n # Read file content\r\n file_content = await self.lucydom_interface.read_file_content(file_id)\r\n \r\n if file_content:\r\n # Process based on file type\r\n if file_type == \"image\" or (content_type and content_type.startswith(\"image/\")):\r\n # Image analysis if prompt provided\r\n if extraction_prompt and self.ai_service and hasattr(self.ai_service, \"analyze_image\"):\r\n try:\r\n image_analysis = await self.ai_service.analyze_image(\r\n image_data=file_content,\r\n prompt=extraction_prompt or \"Describe this image in detail\",\r\n mime_type=content_type\r\n )\r\n \r\n # Add the analysis as text content\r\n document[\"contents\"].append({\r\n \"type\": \"text\",\r\n \"text\": f\"Image Analysis:\\n{image_analysis}\",\r\n \"is_extracted\": True,\r\n \"extraction_context\": extraction_prompt\r\n })\r\n \r\n logger.info(f\"Added image analysis for {file_name} to message\")\r\n except Exception as e:\r\n logger.error(f\"Error analyzing image {file_name}: {str(e)}\")\r\n document[\"contents\"].append({\r\n \"type\": \"text\",\r\n \"text\": f\"Image file: {file_name} (Analysis failed: {str(e)})\",\r\n \"is_extracted\": False\r\n })\r\n else:\r\n # Just add placeholder if no analysis available\r\n document[\"contents\"].append({\r\n \"type\": \"text\",\r\n \"text\": f\"Image file: {file_name} (no analysis requested)\",\r\n \"is_extracted\": False\r\n })\r\n else:\r\n # For other file types, extract text\r\n from modules.agentservice_utils import extract_text_from_file_content\r\n \r\n content, is_extracted = extract_text_from_file_content(\r\n file_content, file_name, content_type\r\n )\r\n \r\n document[\"contents\"].append({\r\n \"type\": \"text\",\r\n \"text\": content,\r\n \"is_extracted\": is_extracted,\r\n \"extraction_context\": extraction_prompt\r\n })\r\n \r\n logger.info(f\"Added text content for {file_name} to message (extracted: {is_extracted})\")\r\n else:\r\n # No content available\r\n document[\"contents\"].append({\r\n \"type\": \"text\",\r\n \"text\": f\"File content not available for {file_name}\",\r\n \"is_extracted\": False\r\n })\r\n else:\r\n # Just add reference without content\r\n document[\"contents\"].append({\r\n \"type\": \"text\",\r\n \"text\": f\"File: {file_name} (content not loaded)\",\r\n \"is_extracted\": False\r\n })\r\n \r\n # Add document to message\r\n message[\"documents\"].append(document)\r\n \r\n logger.info(f\"File {file_name} successfully added to message\")\r\n return message\r\n \r\n except Exception as e:\r\n logger.error(f\"Error adding file {file_id} to message: {str(e)}\")\r\n return message\r\n \r\n async def add_files_to_message(self, message: Dict[str, Any], file_ids: List[int], extraction_prompt: str = None) -> Dict[str, Any]:\r\n \"\"\"\r\n Add multiple files to a message.\r\n \r\n Args:\r\n message: The message to add files to\r\n file_ids: List of file IDs to add\r\n extraction_prompt: Optional prompt for contextual extraction\r\n \r\n Returns:\r\n Updated message with files added\r\n \"\"\"\r\n updated_message = message.copy()\r\n \r\n for file_id in file_ids:\r\n updated_message = await self.add_file_to_message(updated_message, file_id, extraction_prompt)\r\n \r\n return updated_message\r\n \r\n async def extract_document_content(self, doc_id: str, message: Dict[str, Any], extraction_prompt: str) -> Dict[str, Any]:\r\n \"\"\"\r\n Extract or update document content with contextual extraction.\r\n \r\n Args:\r\n doc_id: ID of the document to extract\r\n message: Message containing the document\r\n extraction_prompt: Contextual prompt for extraction\r\n \r\n Returns:\r\n Updated message with extracted content\r\n \"\"\"\r\n if not message or \"documents\" not in message:\r\n return message\r\n \r\n updated_message = message.copy()\r\n \r\n # Find the document\r\n for i, document in enumerate(updated_message.get(\"documents\", [])):\r\n if document.get(\"id\") == doc_id:\r\n # Get file ID from source\r\n source = document.get(\"source\", {})\r\n file_id = source.get(\"id\")\r\n \r\n if file_id and self.lucydom_interface:\r\n # Get file metadata\r\n file = self.lucydom_interface.get_file(int(file_id))\r\n if not file:\r\n continue\r\n \r\n # Get file content\r\n file_content = await self.lucydom_interface.read_file_content(int(file_id))\r\n if not file_content:\r\n continue\r\n \r\n # Process based on file type\r\n file_name = file.get(\"name\", \"unnamed_file\")\r\n file_type = file.get(\"type\", \"unknown\")\r\n content_type = file.get(\"content_type\")\r\n \r\n # Update content based on file type\r\n if file_type == \"image\" or (content_type and content_type.startswith(\"image/\")):\r\n if self.ai_service and hasattr(self.ai_service, \"analyze_image\"):\r\n try:\r\n image_analysis = await self.ai_service.analyze_image(\r\n image_data=file_content,\r\n prompt=extraction_prompt,\r\n mime_type=content_type\r\n )\r\n \r\n # Create or update content\r\n new_content = {\r\n \"type\": \"text\",\r\n \"text\": f\"Image Analysis:\\n{image_analysis}\",\r\n \"is_extracted\": True,\r\n \"extraction_context\": extraction_prompt\r\n }\r\n \r\n # Update or add content\r\n contents = document.get(\"contents\", [])\r\n contents_updated = False\r\n \r\n for j, content in enumerate(contents):\r\n if content.get(\"type\") == \"text\":\r\n updated_message[\"documents\"][i][\"contents\"][j] = new_content\r\n contents_updated = True\r\n break\r\n \r\n if not contents_updated:\r\n if not updated_message[\"documents\"][i].get(\"contents\"):\r\n updated_message[\"documents\"][i][\"contents\"] = []\r\n updated_message[\"documents\"][i][\"contents\"].append(new_content)\r\n \r\n logger.info(f\"Updated image analysis for {file_name} with new context: {extraction_prompt}\")\r\n except Exception as e:\r\n logger.error(f\"Error updating image analysis for {file_name}: {str(e)}\")\r\n else:\r\n # For other file types, extract text with new context\r\n from modules.agentservice_utils import extract_text_from_file_content\r\n \r\n content, is_extracted = extract_text_from_file_content(\r\n file_content, file_name, content_type\r\n )\r\n \r\n new_content = {\r\n \"type\": \"text\",\r\n \"text\": content,\r\n \"is_extracted\": is_extracted,\r\n \"extraction_context\": extraction_prompt\r\n }\r\n \r\n # Update or add content\r\n contents = document.get(\"contents\", [])\r\n contents_updated = False\r\n \r\n for j, content_item in enumerate(contents):\r\n if content_item.get(\"type\") == \"text\":\r\n updated_message[\"documents\"][i][\"contents\"][j] = new_content\r\n contents_updated = True\r\n break\r\n \r\n if not contents_updated:\r\n if not updated_message[\"documents\"][i].get(\"contents\"):\r\n updated_message[\"documents\"][i][\"contents\"] = []\r\n updated_message[\"documents\"][i][\"contents\"].append(new_content)\r\n \r\n logger.info(f\"Updated text extraction for {file_name} with new context: {extraction_prompt}\")\r\n \r\n # Found and processed the document, stop searching\r\n break\r\n \r\n return updated_message\r\n \r\n async def extract_files_from_workflow(self, workflow: Dict[str, Any], extraction_prompt: str, file_filter: str = None) -> Dict[str, Any]:\r\n \"\"\"\r\n Extract all relevant files from a workflow with context-aware extraction.\r\n \r\n Args:\r\n workflow: The workflow object\r\n extraction_prompt: Contextual prompt for extraction\r\n file_filter: Optional filter for file types (e.g., \"csv\", \"image\")\r\n \r\n Returns:\r\n Dictionary with extracted content\r\n \"\"\"\r\n # Import for data extraction\r\n from modules.agentservice_dataextraction import data_extraction\r\n \r\n # Get all files from the workflow\r\n files = []\r\n \r\n # Process all messages\r\n for message in workflow.get(\"messages\", []):\r\n # Extract documents from the message\r\n for doc in message.get(\"documents\", []):\r\n source = doc.get(\"source\", {})\r\n \r\n # Only include file documents\r\n if source.get(\"type\") == \"file\":\r\n file_info = {\r\n \"id\": source.get(\"id\", \"\"),\r\n \"name\": source.get(\"name\", \"\"),\r\n \"type\": source.get(\"type\", \"\"),\r\n \"content_type\": source.get(\"content_type\", \"\"),\r\n \"size\": source.get(\"size\", 0)\r\n }\r\n \r\n # Apply filter if provided\r\n if file_filter:\r\n file_name = file_info.get(\"name\", \"\").lower()\r\n content_type = file_info.get(\"content_type\", \"\").lower()\r\n \r\n if (file_filter.lower() in file_name or \r\n file_filter.lower() in content_type):\r\n # Check if file is already in the list\r\n if not any(f.get(\"id\") == file_info[\"id\"] for f in files):\r\n files.append(file_info)\r\n else:\r\n # No filter, include all files\r\n if not any(f.get(\"id\") == file_info[\"id\"] for f in files):\r\n files.append(file_info)\r\n \r\n # If no files found, return empty result\r\n if not files:\r\n return {\r\n \"prompt\": extraction_prompt,\r\n \"files_processed\": 0,\r\n \"extracted_content\": []\r\n }\r\n \r\n # Get all messages from the workflow\r\n workflow_messages = workflow.get(\"messages\", [])\r\n \r\n # Extract data using the dataextraction module\r\n extracted_data = await data_extraction(\r\n prompt=extraction_prompt,\r\n files=files,\r\n messages=workflow_messages,\r\n ai_service=self.ai_service,\r\n lucydom_interface=self.lucydom_interface,\r\n workflow_id=self.workflow_id,\r\n add_log_func=None # We don't have access to add_log_func here\r\n )\r\n \r\n return extracted_data\r\n \r\n def get_file_content_from_message(self, message: Dict[str, Any], file_id: int = None, doc_id: str = None) -> str:\r\n \"\"\"\r\n Get file content from a message.\r\n \r\n Args:\r\n message: The message containing the document\r\n file_id: Optional file ID to search for\r\n doc_id: Optional document ID to search for\r\n \r\n Returns:\r\n Text content of the file if available\r\n \"\"\"\r\n if not message or \"documents\" not in message:\r\n return \"\"\r\n \r\n # Search for the document\r\n for document in message.get(\"documents\", []):\r\n # Match by document ID or file ID\r\n source = document.get(\"source\", {})\r\n source_file_id = source.get(\"id\")\r\n \r\n if ((doc_id and document.get(\"id\") == doc_id) or \r\n (file_id and source_file_id and str(file_id) == str(source_file_id))):\r\n \r\n # Get text content from document\r\n for content in document.get(\"contents\", []):\r\n if content.get(\"type\") == \"text\":\r\n return content.get(\"text\", \"\")\r\n \r\n return \"\"\r\n \r\n def create_text_document(self, message: Dict[str, Any], content: str, title: str = \"Generated Text\") -> Dict[str, Any]:\r\n \"\"\"\r\n Create a new text document in a message.\r\n \r\n Args:\r\n message: The message to add the document to\r\n content: Text content\r\n title: Document title\r\n \r\n Returns:\r\n Updated message with the new document\r\n \"\"\"\r\n # Initialize documents array if needed\r\n updated_message = message.copy()\r\n if \"documents\" not in updated_message:\r\n updated_message[\"documents\"] = []\r\n \r\n # Create document ID\r\n doc_id = f\"doc_{uuid.uuid4()}\"\r\n \r\n # Create document structure\r\n document = {\r\n \"id\": doc_id,\r\n \"source\": {\r\n \"type\": \"generated\",\r\n \"id\": doc_id,\r\n \"name\": title,\r\n \"content_type\": \"text/plain\",\r\n \"size\": len(content)\r\n },\r\n \"contents\": [\r\n {\r\n \"type\": \"text\",\r\n \"text\": content,\r\n \"is_extracted\": True\r\n }\r\n ]\r\n }\r\n \r\n # Add document to message\r\n updated_message[\"documents\"].append(document)\r\n \r\n logger.info(f\"Created text document '{title}' in message\")\r\n return updated_message\r\n\r\n def merge_document_contents(self, message: Dict[str, Any]) -> str:\r\n \"\"\"\r\n Merge all document contents from a message into a single text.\r\n \r\n Args:\r\n message: The message containing documents\r\n \r\n Returns:\r\n Combined text content from all documents\r\n \"\"\"\r\n if not message or \"documents\" not in message:\r\n return \"\"\r\n \r\n combined_text = \"\"\r\n \r\n for document in message.get(\"documents\", []):\r\n source = document.get(\"source\", {})\r\n doc_name = source.get(\"name\", \"Unnamed Document\")\r\n \r\n # Extract text content\r\n doc_text = \"\"\r\n for content in document.get(\"contents\", []):\r\n if content.get(\"type\") == \"text\":\r\n doc_text = content.get(\"text\", \"\")\r\n break\r\n \r\n if doc_text:\r\n combined_text += f\"\\n\\n--- {doc_name} ---\\n\\n{doc_text}\"\r\n \r\n return combined_text.strip()\r\n\r\n# Factory function\r\ndef get_document_handler(workflow_id: str = None, lucydom_interface = None, ai_service = None) -> DocumentHandler:\r\n \"\"\"Get a document handler instance.\"\"\"\r\n return DocumentHandler(workflow_id, lucydom_interface, ai_service)",
"is_extracted": true,
"extraction_context": null
}
]
},
{
"id": "doc_65b14fc8-3f50-4c23-8a62-338ceed95dcb",
"source": {
"type": "file",
"id": "file_e058a6f8-8e29-40bf-acc0-bb9e23d89453",
"name": "agentservice_filemanager.py",
"content_type": "text/x-python",
"size": 44114,
"upload_date": "2025-04-16T01:48:45.240111"
},
"contents": [
{
"type": "text",
"text": "\"\"\"\r\nCentral file management module for the Agentservice.\r\n\"\"\"\r\n\r\nimport os\r\nimport logging\r\nimport base64\r\nimport json\r\nimport uuid\r\nfrom datetime import datetime\r\nfrom typing import List, Dict, Any, Optional, Tuple, Union, BinaryIO\r\nfrom io import BytesIO\r\n\r\n# Import utilities from agentservice_utils\r\nfrom modules.agentservice_utils import extract_text_from_file_content, is_text_extractable\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\n# Helper function for adding logs\r\ndef _log(add_log_func, workflow_id, message, level=\"info\"):\r\n \"\"\"Helper function for adding logs with standardized formatting.\"\"\"\r\n if add_log_func and workflow_id:\r\n add_log_func(workflow_id, message, level)\r\n \r\n # Also log to standard logger\r\n if level == \"info\":\r\n logger.info(message)\r\n elif level == \"warning\":\r\n logger.warning(message)\r\n elif level == \"error\":\r\n logger.error(message)\r\n\r\nclass FileExtractionError(Exception):\r\n \"\"\"Exception for file extraction errors.\"\"\"\r\n pass\r\n\r\n\r\n\r\nclass FileManager:\r\n \"\"\"Central file management for the Agentservice.\"\"\"\r\n \r\n _instance = None\r\n \r\n @classmethod\r\n def get_instance(cls):\r\n \"\"\"Get the singleton instance of FileManager.\"\"\"\r\n if cls._instance is None:\r\n cls._instance = cls()\r\n return cls._instance\r\n \r\n def __init__(self):\r\n \"\"\"Initialize the FileManager.\"\"\"\r\n # Ensure singleton pattern\r\n if FileManager._instance is not None:\r\n raise RuntimeError(\"Singleton instance already exists - use get_instance()\")\r\n \r\n # Import utilities\r\n # Instead of storing file_utils, we'll use the imported functions directly\r\n \r\n async def read_file_contents(self,\r\n file_contexts: List[Dict[str, Any]], \r\n lucydom_interface,\r\n workflow_id: str = None,\r\n add_log_func = None,\r\n ai_service = None,\r\n extraction_context: str = None # Add this parameter\r\n ) -> Dict[str, Dict[str, Any]]:\r\n \"\"\"\r\n Read file contents with optional contextual extraction.\r\n \r\n Args:\r\n file_contexts: List of file contexts with metadata\r\n lucydom_interface: LucyDOM interface for file access\r\n workflow_id: Optional workflow ID for logging\r\n add_log_func: Optional function for adding logs\r\n ai_service: AI service for image analysis\r\n extraction_context: Optional context prompt for extraction\r\n \r\n Returns:\r\n Dictionary with file contents and metadata\r\n \"\"\"\r\n file_contents = {} \r\n # Add debug logging\r\n logger.info(f\"Reading contents of {len(file_contexts)} files for workflow {workflow_id}\")\r\n \r\n for file in file_contexts:\r\n file_id = file[\"id\"]\r\n file_name = file[\"name\"]\r\n file_type = file.get(\"type\", \"unknown\")\r\n content_type = file.get(\"content_type\")\r\n\r\n try:\r\n # Dateiinhalt über LucyDOM-Interface abrufen\r\n file_data = await lucydom_interface.read_file_content(file_id)\r\n \r\n if not file_data:\r\n _log(add_log_func, workflow_id, f\"Datei {file_name} nicht gefunden\", \"warning\")\r\n file_contents[file_id] = {\r\n \"content\": f\"File content not available (File not found)\",\r\n \"is_extracted\": False,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content_type\": content_type\r\n }\r\n continue\r\n \r\n logger.info(f\"Successfully read file: {file_name} (ID: {file_id}, Type: {file_type})\")\r\n \r\n # For image analysis, add extraction context\r\n if file_type == \"image\" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):\r\n if ai_service and hasattr(ai_service, \"analyze_image\"):\r\n try:\r\n # Use extraction context if provided\r\n prompt = extraction_context or \"Describe this image in detail\"\r\n \r\n image_analysis = await ai_service.analyze_image(\r\n image_data=file_data,\r\n prompt=prompt, # Use contextual prompt\r\n mime_type=content_type\r\n )\r\n \r\n file_contents[file_id] = {\r\n \"content\": f\"Image Analysis:\\n{image_analysis}\",\r\n \"is_extracted\": True, # Mark as extracted\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content_type\": content_type,\r\n \"extraction_context\": prompt # Store the used prompt\r\n }\r\n _log(add_log_func, workflow_id, f\"Image {file_name} analyzed successfully\", \"info\")\r\n except Exception as e:\r\n logger.error(f\"Error analyzing image {file_name}: {str(e)}\")\r\n _log(add_log_func, workflow_id, f\"Error analyzing image {file_name}: {str(e)}\", \"error\")\r\n file_contents[file_id] = {\r\n \"content\": f\"Image file: {file_name} (Analysis failed: {str(e)})\",\r\n \"is_extracted\": False,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content_type\": content_type\r\n }\r\n else:\r\n file_contents[file_id] = {\r\n \"content\": f\"Image file: {file_name} (AI analysis not available)\",\r\n \"is_extracted\": False,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content_type\": content_type\r\n }\r\n \r\n # Dokument- und Textdateien\r\n elif (file_type == \"document\" or not file_type or file_name.lower().endswith(('.csv', '.txt', '.json', '.xml')) or (content_type and content_type.startswith('text/'))):\r\n # Verwende die zentrale Textextraktionsfunktion mit Dateiinhalt\r\n content, is_extracted = extract_text_from_file_content(\r\n file_data, file_name, content_type\r\n )\r\n file_contents[file_id] = {\r\n \"content\": content,\r\n \"is_extracted\": is_extracted,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content_type\": content_type\r\n }\r\n _log(add_log_func, workflow_id, \r\n f\"File {file_name} read successfully (extracted: {is_extracted})\", \"info\")\r\n \r\n # Andere Dateitypen - nur Metadaten speichern\r\n else:\r\n file_contents[file_id] = {\r\n \"content\": f\"File: {file_name} (Type: {file_type}, content not available)\",\r\n \"is_extracted\": False,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content_type\": content_type\r\n }\r\n _log(add_log_func, workflow_id, f\"Unsupported file type: {file_type} for {file_name}\", \"warning\")\r\n \r\n except Exception as e:\r\n logger.error(f\"Error reading file {file_name}: {str(e)}\")\r\n _log(add_log_func, workflow_id, f\"Error reading file {file_name}: {str(e)}\", \"error\")\r\n file_contents[file_id] = {\r\n \"content\": f\"File content not available (Error: {str(e)})\",\r\n \"is_extracted\": False,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"content_type\": content_type\r\n }\r\n \r\n return file_contents\r\n\r\n @staticmethod\r\n def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> Dict[str, Any]:\r\n \"\"\"\r\n Add a file to a message with consistent document structure.\r\n \r\n Args:\r\n message: The message to add the file to\r\n file_data: File metadata and content\r\n \r\n Returns:\r\n Updated message with the file added\r\n \"\"\"\r\n logger.info(f\"Adding file to message: {file_data.get('name', 'unnamed_file')} (ID: {file_data.get('id', 'unknown')})\")\r\n \r\n # Initialize documents array if needed\r\n if \"documents\" not in message:\r\n message[\"documents\"] = []\r\n \r\n # Create a unique ID for the document if not provided\r\n doc_id = file_data.get(\"id\", f\"file_{uuid.uuid4()}\")\r\n \r\n # Extract metadata\r\n file_size = file_data.get(\"size\")\r\n if isinstance(file_size, str) and file_size.isdigit():\r\n file_size = int(file_size)\r\n elif file_size is None and file_data.get(\"content\"):\r\n file_size = len(file_data.get(\"content\", \"\"))\r\n \r\n # Determine if content is already extracted\r\n content = file_data.get(\"content\", \"No content available\")\r\n file_name = file_data.get(\"name\", \"unnamed_file\")\r\n content_type = file_data.get(\"content_type\")\r\n is_extracted = file_data.get(\"is_extracted\", False)\r\n \r\n # Create standard document structure that follows the data model\r\n document = {\r\n \"id\": f\"doc_{uuid.uuid4()}\", # Unique document ID separate from file ID\r\n \"source\": {\r\n \"type\": \"file\",\r\n \"id\": doc_id,\r\n \"name\": file_name,\r\n \"content_type\": content_type,\r\n \"size\": file_size,\r\n \"upload_date\": file_data.get(\"upload_date\", datetime.now().isoformat())\r\n },\r\n \"contents\": [\r\n {\r\n \"type\": \"text\",\r\n \"text\": content,\r\n \"is_extracted\": is_extracted,\r\n \"extraction_context\": file_data.get(\"extraction_context\", None)\r\n }\r\n ]\r\n }\r\n \r\n # Check if file is already in the message\r\n file_already_added = any(\r\n doc.get(\"source\", {}).get(\"id\") == doc_id \r\n for doc in message.get(\"documents\", [])\r\n )\r\n \r\n if not file_already_added:\r\n message[\"documents\"].append(document)\r\n logger.info(f\"File {file_name} added to message (total: {len(message.get('documents', []))} files)\")\r\n else:\r\n logger.info(f\"File {file_name} already exists in message, skipping\")\r\n \r\n return message\r\n\r\n\r\n async def analyze_file(self, file_id: int, prompt: str, lucydom_interface, ai_service) -> Dict[str, Any]:\r\n \"\"\"\r\n Analyze a file using the appropriate method based on file type.\r\n \r\n Args:\r\n file_id: ID of the file to analyze\r\n prompt: Analysis prompt\r\n lucydom_interface: Interface for database access\r\n ai_service: Service for AI requests\r\n \r\n Returns:\r\n Analysis result\r\n \"\"\"\r\n if not lucydom_interface:\r\n raise ValueError(\"LucyDOM interface not available\")\r\n \r\n if not ai_service:\r\n raise ValueError(\"AI service not available\")\r\n \r\n try:\r\n # Get file metadata\r\n file = lucydom_interface.get_file(file_id)\r\n if not file:\r\n raise ValueError(f\"File with ID {file_id} not found\")\r\n \r\n # Get file content\r\n file_content = await lucydom_interface.read_file_content(file_id)\r\n if not file_content:\r\n raise ValueError(f\"Content for file {file_id} not found\")\r\n \r\n # Extract metadata\r\n file_name = file.get(\"name\", \"unnamed\")\r\n content_type = file.get(\"content_type\")\r\n file_type = file.get(\"type\")\r\n \r\n # Process based on file type\r\n if file_type == \"image\" or (content_type and content_type.startswith(\"image/\")):\r\n # Image analysis\r\n if hasattr(ai_service, \"analyze_image\"):\r\n analysis = await ai_service.analyze_image(\r\n image_data=file_content,\r\n prompt=prompt,\r\n mime_type=content_type\r\n )\r\n \r\n return {\r\n \"file_id\": file_id,\r\n \"file_name\": file_name,\r\n \"analysis_type\": \"image\",\r\n \"result\": analysis\r\n }\r\n else:\r\n raise ValueError(\"AI service does not support image analysis\")\r\n \r\n elif file_name.endswith(\".pdf\"):\r\n # PDF analysis - first extract text, then analyze\r\n try:\r\n # Extract text\r\n text_content, is_extracted = extract_text_from_file_content(\r\n file_content, file_name, content_type\r\n )\r\n \r\n if not is_extracted:\r\n raise ValueError(f\"Failed to extract text from PDF {file_name}\")\r\n \r\n # Analyze text with AI\r\n pdf_analysis_prompt = f\"\"\"\r\n Analyze the following PDF content based on this request:\r\n \r\n REQUEST: {prompt}\r\n \r\n PDF CONTENT:\r\n {text_content[:10000]} # Limit to first 10K chars to avoid token limits\r\n \"\"\"\r\n \r\n analysis = await ai_service.call_api([{\"role\": \"user\", \"content\": pdf_analysis_prompt}])\r\n \r\n # Also check for images in the PDF\r\n has_images = False\r\n image_analysis = None\r\n \r\n try:\r\n # Extract and analyze images\r\n image_results = await self.extract_and_analyze_pdf_images(\r\n file_content, \r\n f\"Analyze images with respect to: {prompt}\", \r\n ai_service\r\n )\r\n \r\n if image_results and len(image_results) > 0:\r\n has_images = True\r\n image_analysis = \"\\n\\nPDF IMAGES ANALYSIS:\\n\"\r\n for img in image_results:\r\n image_analysis += f\"- Image on page {img.get('page')}: {img.get('response')}\\n\"\r\n except Exception as img_err:\r\n logger.warning(f\"Could not analyze images in PDF {file_name}: {str(img_err)}\")\r\n \r\n # Combine text and image analysis if available\r\n if has_images and image_analysis:\r\n analysis += image_analysis\r\n \r\n return {\r\n \"file_id\": file_id,\r\n \"file_name\": file_name,\r\n \"analysis_type\": \"pdf\",\r\n \"result\": analysis,\r\n \"has_images\": has_images\r\n }\r\n \r\n except Exception as pdf_err:\r\n logger.error(f\"Error analyzing PDF {file_name}: {str(pdf_err)}\")\r\n raise\r\n \r\n elif file_name.endswith(('.xlsx', '.xls', '.csv')):\r\n # Tabular data analysis\r\n try:\r\n # Extract text content\r\n text_content, is_extracted = extract_text_from_file_content(\r\n file_content, file_name, content_type\r\n )\r\n \r\n if not is_extracted:\r\n raise ValueError(f\"Failed to extract data from {file_name}\")\r\n \r\n # Analyze with AI\r\n data_analysis_prompt = f\"\"\"\r\n Analyze the following tabular data based on this request:\r\n \r\n REQUEST: {prompt}\r\n \r\n DATA CONTENT:\r\n {text_content[:10000]} # Limit to first 10K chars\r\n \r\n Provide a structured analysis including:\r\n 1. Data overview\r\n 2. Key insights\r\n 3. Patterns and trends\r\n 4. Answers to the specific request\r\n \"\"\"\r\n \r\n analysis = await ai_service.call_api([{\"role\": \"user\", \"content\": data_analysis_prompt}])\r\n \r\n return {\r\n \"file_id\": file_id,\r\n \"file_name\": file_name,\r\n \"analysis_type\": \"tabular_data\",\r\n \"result\": analysis\r\n }\r\n \r\n except Exception as data_err:\r\n logger.error(f\"Error analyzing tabular data {file_name}: {str(data_err)}\")\r\n raise\r\n \r\n else:\r\n # Default to text analysis for all other file types\r\n try:\r\n # Extract text content\r\n text_content, is_extracted = extract_text_from_file_content(\r\n file_content, file_name, content_type\r\n )\r\n \r\n if not is_extracted:\r\n raise ValueError(f\"Failed to extract text from {file_name}\")\r\n \r\n # Analyze with AI\r\n text_analysis_prompt = f\"\"\"\r\n Analyze the following document content based on this request:\r\n \r\n REQUEST: {prompt}\r\n \r\n DOCUMENT CONTENT:\r\n {text_content[:10000]} # Limit to first 10K chars\r\n \"\"\"\r\n \r\n analysis = await ai_service.call_api([{\"role\": \"user\", \"content\": text_analysis_prompt}])\r\n \r\n return {\r\n \"file_id\": file_id,\r\n \"file_name\": file_name,\r\n \"analysis_type\": \"text\",\r\n \"result\": analysis\r\n }\r\n \r\n except Exception as text_err:\r\n logger.error(f\"Error analyzing text content {file_name}: {str(text_err)}\")\r\n raise\r\n \r\n except Exception as e:\r\n logger.error(f\"Error analyzing file {file_id}: {str(e)}\")\r\n raise\r\n\r\n async def extract_and_analyze_pdf_images(self,\r\n pdf_content: bytes, \r\n prompt: str, \r\n ai_service\r\n ) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Extrahiert Bilder aus einer PDF-Datei und analysiert sie.\r\n Arbeitet mit Binärdaten statt Dateipfaden.\r\n \r\n Args:\r\n pdf_content: Binärdaten der PDF-Datei\r\n prompt: Prompt für die Bildanalyse\r\n ai_service: AI-Service für die Bildanalyse\r\n \r\n Returns:\r\n Liste mit Analyseergebnissen für jedes Bild\r\n \"\"\"\r\n image_responses = []\r\n temp_files = [] # Liste der temporären Dateien zur Bereinigung\r\n \r\n try:\r\n # PDF mit PyMuPDF öffnen\r\n import fitz # PyMuPDF\r\n # BytesIO is already imported at the top level\r\n import tempfile\r\n \r\n # PDF im Speicher öffnen\r\n doc = fitz.open(stream=pdf_content, filetype=\"pdf\")\r\n logger.info(f\"PDF geöffnet mit {len(doc)} Seiten\")\r\n \r\n for page_num, page in enumerate(doc, 1):\r\n # Alle Bilder auf der Seite finden\r\n image_list = page.get_images(full=True)\r\n \r\n if image_list:\r\n logger.info(f\"Seite {page_num}: {len(image_list)} Bilder gefunden\")\r\n \r\n for img_index, img in enumerate(image_list):\r\n try:\r\n # Bild-Referenz\r\n xref = img[0]\r\n\r\n # Bild und Metadaten extrahieren\r\n base_image = doc.extract_image(xref)\r\n image_bytes = base_image[\"image\"] # Tatsächliche Bilddaten\r\n image_ext = base_image[\"ext\"] # Dateiendung (jpg, png, etc.)\r\n \r\n # Erstelle temporäre Datei\r\n fd, temp_img_path = tempfile.mkstemp(suffix=f\".{image_ext}\")\r\n temp_files.append(temp_img_path) # Zur Bereinigungsliste hinzufügen\r\n \r\n with os.fdopen(fd, 'wb') as img_file:\r\n img_file.write(image_bytes)\r\n \r\n logger.debug(f\"Bild temporär gespeichert: {temp_img_path}\")\r\n \r\n # Analysiere mit AI-Service\r\n try:\r\n analysis_result = await ai_service.analyze_image(\r\n image_data=image_bytes, # Direktes Übergeben der Bilddaten\r\n prompt=prompt,\r\n mime_type=f\"image/{image_ext}\"\r\n )\r\n logger.debug(f\"Bildanalyse für Bild {img_index} auf Seite {page_num} abgeschlossen\")\r\n except Exception as analyze_error:\r\n logger.error(f\"Fehler bei der Bildanalyse: {str(analyze_error)}\")\r\n analysis_result = f\"[Fehler bei der Bildanalyse: {str(analyze_error)}]\"\r\n \r\n # Ergebnis speichern\r\n try:\r\n # Versuche zuerst, die Größe aus base_image zu bekommen\r\n if 'width' in base_image and 'height' in base_image:\r\n image_size = f\"{base_image['width']}x{base_image['height']}\"\r\n else:\r\n # Alternative: Öffne das temporäre Bild, um die Größe zu bestimmen\r\n from PIL import Image\r\n with Image.open(temp_img_path) as img:\r\n width, height = img.size\r\n image_size = f\"{width}x{height}\"\r\n except Exception as e:\r\n logger.warning(f\"Konnte Bildgröße nicht ermitteln: {str(e)}\")\r\n image_size = \"unbekannt\"\r\n\r\n image_responses.append({\r\n \"page\": page_num,\r\n \"image_index\": img_index,\r\n \"format\": image_ext,\r\n \"image_size\": image_size,\r\n \"response\": analysis_result\r\n })\r\n \r\n except Exception as e:\r\n logger.warning(f\"Fehler bei der Extraktion von Bild {img_index} auf Seite {page_num}: {str(e)}\")\r\n continue\r\n \r\n logger.info(f\"Extrahiert und analysiert: {len(image_responses)} Bilder aus PDF\")\r\n \r\n except ImportError:\r\n logger.error(\"PyMuPDF (fitz) ist nicht installiert. Installiere es mit 'pip install pymupdf'\")\r\n raise FileExtractionError(\"PyMuPDF (fitz) ist nicht installiert\")\r\n except Exception as e:\r\n logger.error(f\"Fehler beim Extrahieren von PDF-Bildern: {str(e)}\")\r\n raise FileExtractionError(f\"Fehler beim Extrahieren von PDF-Bildern: {str(e)}\")\r\n finally:\r\n # Bereinige alle temporären Dateien\r\n for temp_file in temp_files:\r\n try:\r\n if os.path.exists(temp_file):\r\n os.remove(temp_file)\r\n except Exception as e:\r\n logger.warning(f\"Konnte temporäre Datei nicht entfernen: {temp_file} - {str(e)}\")\r\n \r\n return image_responses\r\n\r\n async def analyze_multiple_files(\r\n self, \r\n file_ids: List[int], \r\n prompt: str, \r\n lucydom_interface, \r\n ai_service\r\n ) -> Dict[str, Any]:\r\n \"\"\"\r\n Analyze multiple files and synthesize a combined result.\r\n \r\n Args:\r\n file_ids: List of file IDs to analyze\r\n prompt: Analysis prompt\r\n lucydom_interface: Interface for database access\r\n ai_service: Service for AI requests\r\n \r\n Returns:\r\n Combined analysis result\r\n \"\"\"\r\n results = []\r\n \r\n # Analyze each file\r\n for file_id in file_ids:\r\n try:\r\n analysis = await self.analyze_file(file_id, prompt, lucydom_interface, ai_service)\r\n results.append(analysis)\r\n except Exception as e:\r\n logger.error(f\"Error analyzing file {file_id}: {str(e)}\")\r\n results.append({\r\n \"file_id\": file_id,\r\n \"error\": str(e),\r\n \"analysis_type\": \"error\"\r\n })\r\n \r\n # Now synthesize a combined analysis\r\n if results:\r\n try:\r\n # Prepare prompt for synthesis\r\n synthesis_prompt = f\"\"\"\r\n Synthesize a combined analysis based on these individual file analyses:\r\n \r\n ORIGINAL REQUEST: {prompt}\r\n \r\n INDIVIDUAL ANALYSES:\r\n \"\"\"\r\n \r\n for i, result in enumerate(results, 1):\r\n file_name = result.get(\"file_name\", f\"File {i}\")\r\n analysis_type = result.get(\"analysis_type\", \"unknown\")\r\n analysis_result = result.get(\"result\", \"No analysis available\")\r\n \r\n synthesis_prompt += f\"\"\"\r\n ## {file_name} ({analysis_type})\r\n {analysis_result}\r\n \r\n ---\r\n \"\"\"\r\n \r\n synthesis_prompt += \"\"\"\r\n Please provide a comprehensive synthesis that:\r\n 1. Combines insights from all files\r\n 2. Addresses the original request\r\n 3. Highlights connections between different files\r\n 4. Provides a unified conclusion\r\n \"\"\"\r\n \r\n # Call AI for synthesis\r\n synthesis = await ai_service.call_api([{\"role\": \"user\", \"content\": synthesis_prompt}])\r\n \r\n return {\r\n \"synthesis\": synthesis,\r\n \"individual_results\": results,\r\n \"files_analyzed\": len(results)\r\n }\r\n \r\n except Exception as e:\r\n logger.error(f\"Error synthesizing combined analysis: {str(e)}\")\r\n return {\r\n \"error\": str(e),\r\n \"individual_results\": results,\r\n \"files_analyzed\": len(results)\r\n }\r\n else:\r\n return {\r\n \"synthesis\": \"No files were successfully analyzed.\",\r\n \"individual_results\": [],\r\n \"files_analyzed\": 0\r\n }\r\n \r\n def determine_file_type(self, file_name: str, content_type: str = None) -> str:\r\n \"\"\"\r\n Determine the file type based on name and content type.\r\n \r\n Args:\r\n file_name: Name of the file\r\n content_type: MIME type (optional)\r\n \r\n Returns:\r\n File type string ('document', 'image', etc.)\r\n \"\"\"\r\n # Check content type first\r\n if content_type:\r\n if content_type.startswith('image/'):\r\n return \"image\"\r\n elif content_type in ['application/pdf']:\r\n return \"document\"\r\n elif content_type in ['application/vnd.ms-excel', \r\n 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',\r\n 'text/csv']:\r\n return \"spreadsheet\"\r\n \r\n # Check file extension\r\n lower_name = file_name.lower()\r\n \r\n # Images\r\n if lower_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg')):\r\n return \"image\"\r\n \r\n # Documents\r\n if lower_name.endswith(('.pdf', '.doc', '.docx', '.txt', '.md', '.rtf')):\r\n return \"document\"\r\n \r\n # Spreadsheets\r\n if lower_name.endswith(('.xlsx', '.xls', '.csv')):\r\n return \"spreadsheet\"\r\n \r\n # Presentations\r\n if lower_name.endswith(('.pptx', '.ppt')):\r\n return \"presentation\"\r\n \r\n # Data files\r\n if lower_name.endswith(('.json', '.xml', '.yaml', '.yml')):\r\n return \"data\"\r\n \r\n # Default to document\r\n return \"document\"\r\n \r\n def get_mime_type(self, file_name: str) -> str:\r\n \"\"\"Get MIME type based on file name.\"\"\"\r\n # Import from lucydom_interface\r\n from lucydom_interface import LucyDOMInterface\r\n temp_interface = LucyDOMInterface(0, 0) # Default values\r\n return temp_interface.get_mime_type(file_name)\r\n \r\n def prepare_file_contexts(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Bereitet die Dateikontexte basierend auf Metadaten vor.\r\n Akzeptiert keine Pfade mehr, sondern nur Metadaten aus der Datenbank.\r\n \r\n Args:\r\n files: Liste von Dateien mit Metadaten (Dict mit id, name, type, content_type)\r\n \r\n Returns:\r\n Liste von Dateikontexten für die Verarbeitung\r\n \"\"\"\r\n file_contexts = []\r\n \r\n logger.info(f\"Preparing file contexts for {len(files)} files\")\r\n \r\n for file in files:\r\n file_id = file.get(\"id\")\r\n file_name = file.get(\"name\")\r\n file_type = file.get(\"type\")\r\n \r\n # Create a comprehensive context with all available metadata\r\n context = {\r\n \"id\": file_id,\r\n \"name\": file_name,\r\n \"type\": file_type,\r\n \"size\": file.get(\"size\", \"Unbekannt\"),\r\n \"content_type\": file.get(\"content_type\"),\r\n \"path\": file.get(\"path\"),\r\n \"upload_date\": file.get(\"upload_date\"),\r\n \"hash\": file.get(\"hash\"),\r\n \"mandate_id\": file.get(\"mandate_id\"),\r\n \"user_id\": file.get(\"user_id\")\r\n }\r\n \r\n # Log for debugging\r\n logger.info(f\"Created file context: {file_name} (ID: {file_id}, Type: {file_type})\")\r\n \r\n file_contexts.append(context)\r\n \r\n return file_contexts\r\n\r\n def create_document_reference(self, message: Dict[str, Any], file_id: int, reference_type: str = \"reference\") -> Dict[str, Any]:\r\n \"\"\"\r\n Create a document reference without loading content.\r\n \r\n Args:\r\n message: The message to add the reference to\r\n file_id: ID of the file to reference\r\n reference_type: Type of reference (reference, citation, etc.)\r\n \r\n Returns:\r\n Updated message with the document reference\r\n \"\"\"\r\n if not self.lucydom_interface:\r\n logger.warning(\"LucyDOM interface not available for document reference\")\r\n return message\r\n \r\n # Get file metadata\r\n file = self.lucydom_interface.get_file(file_id)\r\n if not file:\r\n logger.warning(f\"File with ID {file_id} not found for reference\")\r\n return message\r\n \r\n # Create document structure with just the reference\r\n document = {\r\n \"id\": f\"ref_{uuid.uuid4()}\",\r\n \"source\": {\r\n \"type\": \"file\",\r\n \"id\": str(file_id),\r\n \"name\": file.get(\"name\", \"referenced_file\"),\r\n \"content_type\": file.get(\"content_type\"),\r\n \"size\": file.get(\"size\"),\r\n \"reference_type\": reference_type\r\n },\r\n \"contents\": [] # Empty contents - will be loaded on demand\r\n }\r\n \r\n # Add to message\r\n updated_message = message.copy()\r\n if \"documents\" not in updated_message:\r\n updated_message[\"documents\"] = []\r\n \r\n updated_message[\"documents\"].append(document)\r\n logger.info(f\"Added document reference for file {file.get('name')} (ID: {file_id})\")\r\n \r\n return updated_message\r\n\r\n def should_extract_document(self, document: Dict[str, Any], context_prompt: str = None) -> bool:\r\n \"\"\"\r\n Determine if a document needs content extraction.\r\n \r\n Args:\r\n document: The document object\r\n context_prompt: Current context prompt\r\n \r\n Returns:\r\n True if extraction is needed, False otherwise\r\n \"\"\"\r\n # If document has no contents, extraction is needed\r\n if not document.get(\"contents\"):\r\n return True\r\n \r\n # If document has contents but extraction status is False, extraction may be needed\r\n for content in document.get(\"contents\", []):\r\n if content.get(\"type\") == \"text\":\r\n # If already extracted, check if context has changed\r\n if content.get(\"is_extracted\", False):\r\n # If context prompt is different from what was used previously,\r\n # we may need to re-extract with the new context\r\n prev_context = content.get(\"extraction_context\")\r\n if context_prompt and prev_context != context_prompt:\r\n return True\r\n return False\r\n return True\r\n \r\n # Default to needing extraction\r\n return True\r\n\r\n\r\n\r\n # Factory method\r\n @staticmethod\r\n def get_instance():\r\n \"\"\"Get the singleton instance of FileManager.\"\"\"\r\n if FileManager._instance is None:\r\n FileManager._instance = FileManager()\r\n return FileManager._instance\r\n\r\n# Create a singleton instance for module-level access\r\nfile_manager = FileManager.get_instance()\r\n\r\ndef get_file_manager():\r\n \"\"\"Get the singleton instance of FileManager.\"\"\"\r\n return file_manager\r\n\r\n\r\n\r\n\r\nclass WorkflowFileManager:\r\n \"\"\"\r\n Specialized file manager for workflow operations.\r\n Handles workflow-specific file operations and document management.\r\n \"\"\"\r\n \r\n def __init__(self, workflow_id: str = None, lucydom_interface = None):\r\n \"\"\"\r\n Initialize the workflow file manager.\r\n \r\n Args:\r\n workflow_id: Optional workflow ID for context\r\n lucydom_interface: LucyDOM interface for database operations\r\n \"\"\"\r\n self.workflow_id = workflow_id\r\n self.lucydom_interface = lucydom_interface\r\n self.file_manager = get_file_manager()\r\n self.document_handler = None \r\n \r\n def set_workflow_id(self, workflow_id: str):\r\n \"\"\"Set or update the workflow ID.\"\"\"\r\n self.workflow_id = workflow_id\r\n \r\n def set_lucydom_interface(self, lucydom_interface):\r\n \"\"\"Set or update the LucyDOM interface.\"\"\"\r\n self.lucydom_interface = lucydom_interface\r\n \r\n async def add_files_to_message(self, \r\n message: Dict[str, Any], \r\n file_ids: List[int],\r\n add_log_func = None) -> Dict[str, Any]:\r\n \"\"\"\r\n Add multiple files to a message.\r\n \r\n Args:\r\n message: The message to add files to\r\n file_ids: List of file IDs to add\r\n add_log_func: Optional logging function\r\n \r\n Returns:\r\n Updated message\r\n \"\"\"\r\n\r\n # If document handler is available, use it\r\n if self.document_handler:\r\n return await self.document_handler.add_files_to_message(\r\n message, \r\n file_ids, \r\n extraction_prompt=None # Default to no extraction\r\n )\r\n\r\n if not self.lucydom_interface:\r\n _log(add_log_func, self.workflow_id, \"LucyDOM interface not available\", \"error\")\r\n return message\r\n \r\n updated_message = message.copy()\r\n \r\n # Get file metadata\r\n files = []\r\n for file_id in file_ids:\r\n file = self.lucydom_interface.get_file(file_id)\r\n if file:\r\n files.append(file)\r\n else:\r\n _log(add_log_func, self.workflow_id, f\"File not found: {file_id}\", \"warning\")\r\n \r\n # Prepare file contexts\r\n file_contexts = self.file_manager.prepare_file_contexts(files)\r\n \r\n # Read file contents\r\n file_contents = await self.file_manager.read_file_contents(\r\n file_contexts,\r\n self.lucydom_interface,\r\n self.workflow_id,\r\n add_log_func\r\n )\r\n \r\n # Add files to message\r\n for file_id, content_data in file_contents.items():\r\n # Add file to message\r\n updated_message = FileManager.add_file_to_message(updated_message, content_data)\r\n \r\n return updated_message\r\n \r\n def get_files_from_message(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Extract file references from a message.\r\n \r\n Args:\r\n message: The message to extract files from\r\n \r\n Returns:\r\n List of file metadata\r\n \"\"\"\r\n files = []\r\n \r\n # Process documents\r\n for doc in message.get(\"documents\", []):\r\n source = doc.get(\"source\", {})\r\n \r\n # Only include file documents\r\n if source.get(\"type\") == \"file\":\r\n file_info = {\r\n \"id\": source.get(\"id\", \"\"),\r\n \"name\": source.get(\"name\", \"\"),\r\n \"type\": source.get(\"content_type\", \"\"),\r\n \"content_type\": source.get(\"content_type\", \"\"),\r\n \"size\": source.get(\"size\", 0)\r\n }\r\n \r\n files.append(file_info)\r\n \r\n return files\r\n \r\n def get_document_text_content(self, message: Dict[str, Any]) -> str:\r\n \"\"\"\r\n Extract text content from all documents in a message.\r\n \r\n Args:\r\n message: The message to extract content from\r\n \r\n Returns:\r\n Combined text content\r\n \"\"\"\r\n content = \"\"\r\n \r\n # Process all documents\r\n for doc in message.get(\"documents\", []):\r\n for doc_content in doc.get(\"contents\", []):\r\n if doc_content.get(\"type\") == \"text\":\r\n content += \"\\n\\n\" + doc_content.get(\"text\", \"\")\r\n \r\n return content\r\n \r\n async def extract_document_info(self, \r\n workflow: Dict[str, Any], \r\n message_id: str = None) -> Dict[str, Any]:\r\n \"\"\"\r\n Extract document information from a workflow or specific message.\r\n \r\n Args:\r\n workflow: The workflow object\r\n message_id: Optional message ID to focus on a specific message\r\n \r\n Returns:\r\n Document information\r\n \"\"\"\r\n result = {\r\n \"documents\": [],\r\n \"file_count\": 0,\r\n \"extracted_text\": \"\"\r\n }\r\n \r\n if message_id:\r\n # Process only the specified message\r\n for message in workflow.get(\"messages\", []):\r\n if message.get(\"id\") == message_id:\r\n files = self.get_files_from_message(message)\r\n result[\"documents\"].extend(files)\r\n result[\"file_count\"] = len(files)\r\n result[\"extracted_text\"] = self.get_document_text_content(message)\r\n break\r\n else:\r\n # Process all messages\r\n for message in workflow.get(\"messages\", []):\r\n files = self.get_files_from_message(message)\r\n result[\"documents\"].extend(files)\r\n result[\"extracted_text\"] += self.get_document_text_content(message)\r\n \r\n # De-duplicate files\r\n unique_files = {}\r\n for file in result[\"documents\"]:\r\n file_id = file.get(\"id\")\r\n if file_id and file_id not in unique_files:\r\n unique_files[file_id] = file\r\n \r\n result[\"documents\"] = list(unique_files.values())\r\n result[\"file_count\"] = len(result[\"documents\"])\r\n \r\n return result\r\n \r\n async def analyze_workflow_documents(self, \r\n workflow: Dict[str, Any],\r\n prompt: str,\r\n ai_service,\r\n message_id: str = None) -> Dict[str, Any]:\r\n \"\"\"\r\n Analyze documents in a workflow.\r\n \r\n Args:\r\n workflow: The workflow object\r\n prompt: Analysis prompt\r\n ai_service: Service for AI analysis\r\n message_id: Optional message ID to focus on specific message\r\n \r\n Returns:\r\n Analysis result\r\n \"\"\"\r\n if not self.lucydom_interface:\r\n raise ValueError(\"LucyDOM interface not available\")\r\n \r\n if not ai_service:\r\n raise ValueError(\"AI service not available\")\r\n \r\n # Extract document info\r\n doc_info = await self.extract_document_info(workflow, message_id)\r\n \r\n if doc_info[\"file_count\"] == 0:\r\n return {\r\n \"result\": \"No documents found for analysis\",\r\n \"files_analyzed\": 0\r\n }\r\n \r\n # Get file IDs\r\n file_ids = [doc.get(\"id\") for doc in doc_info[\"documents\"] if doc.get(\"id\")]\r\n \r\n # Analyze files\r\n analysis = await self.file_manager.analyze_multiple_files(\r\n file_ids,\r\n prompt,\r\n self.lucydom_interface,\r\n ai_service\r\n )\r\n \r\n return analysis\r\n\r\n# Export the workflow file manager factory function\r\ndef get_workflow_file_manager(workflow_id: str = None, lucydom_interface = None):\r\n \"\"\"Get a workflow file manager instance.\"\"\"\r\n return WorkflowFileManager(workflow_id, lucydom_interface)",
"is_extracted": true,
"extraction_context": null
}
]
},
{
"id": "doc_88efeee1-f8ed-4d9b-9e26-ea02ae01ec06",
"source": {
"type": "file",
"id": "file_43756030-65a8-4270-a8f1-bf680b7caa96",
"name": "agentservice_registry.py",
"content_type": "text/x-python",
"size": 11404,
"upload_date": "2025-04-16T01:48:45.241114"
},
"contents": [
{
"type": "text",
"text": "\"\"\"\r\nUpdated registry for all available agents in the system.\r\nProvides centralized agent registration and access with improved error handling.\r\n\"\"\"\r\n\r\nimport os\r\nimport logging\r\nimport importlib\r\nfrom typing import Dict, Any, List, Optional\r\n\r\n# Import direct base agent module\r\nfrom modules.agentservice_base import BaseAgent\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\nclass AgentRegistry:\r\n \"\"\"Registry for all available agents in the system\"\"\"\r\n \r\n _instance = None\r\n \r\n @classmethod\r\n def get_instance(cls):\r\n \"\"\"Get a singleton instance of the Agent Registry\"\"\"\r\n if cls._instance is None:\r\n cls._instance = cls()\r\n return cls._instance\r\n \r\n def __init__(self):\r\n \"\"\"Initialize the Agent Registry\"\"\"\r\n if AgentRegistry._instance is not None:\r\n raise RuntimeError(\"Singleton instance already exists - use get_instance()\")\r\n self.agents = {}\r\n self.ai_service = None\r\n self.document_handler = None\r\n self.lucydom_interface = None\r\n self._load_agents()\r\n \r\n def _load_agents(self):\r\n \"\"\"Load all available agents\"\"\"\r\n # List of all agent modules to load\r\n logger.info(\"Automatically loading agent modules...\")\r\n agent_modules = []\r\n for filename in os.listdir(os.path.dirname(__file__)):\r\n if filename.startswith(\"agentservice_agent_\") and filename.endswith(\".py\"):\r\n agent_modules.append(filename[:-3]) # Remove .py extension\r\n if not agent_modules:\r\n logger.warning(\"No agent modules found\")\r\n return\r\n logger.info(f\"Found {len(agent_modules)} agent modules\")\r\n \r\n for module_name in agent_modules:\r\n try:\r\n # Import the module\r\n module = importlib.import_module(f\"modules.{module_name}\")\r\n \r\n # Look for the agent class or a get_*_agent function\r\n agent_type = module_name.split('_')[-1]\r\n class_name = f\"{agent_type.capitalize()}Agent\"\r\n getter_name = f\"get_{agent_type}_agent\"\r\n \r\n agent = None\r\n \r\n # Try to get the agent via the get_*_agent function\r\n if hasattr(module, getter_name):\r\n getter_func = getattr(module, getter_name)\r\n agent = getter_func()\r\n logger.info(f\"Agent '{agent.name}' (Type: {agent.type}) loaded via {getter_name}()\")\r\n \r\n # Alternatively, try to instantiate the agent directly\r\n elif hasattr(module, class_name):\r\n agent_class = getattr(module, class_name)\r\n agent = agent_class()\r\n logger.info(f\"Agent '{agent.name}' (Type: {agent.type}) directly instantiated\")\r\n \r\n if agent:\r\n # Register the agent\r\n self.register_agent(agent)\r\n else:\r\n logger.warning(f\"No agent class or getter function found in module {module_name}\")\r\n \r\n except ImportError as e:\r\n logger.error(f\"Module {module_name} could not be imported: {e}\")\r\n except Exception as e:\r\n logger.error(f\"Error loading agent from module {module_name}: {e}\")\r\n\r\n def set_dependencies(self, ai_service=None, document_handler=None, lucydom_interface=None):\r\n \"\"\"\r\n Set system dependencies for all agents.\r\n \r\n Args:\r\n ai_service: AI service for text generation\r\n document_handler: Document handler for document operations\r\n lucydom_interface: LucyDOM interface for database access\r\n \"\"\"\r\n self.ai_service = ai_service\r\n self.document_handler = document_handler\r\n self.lucydom_interface = lucydom_interface\r\n # Update all registered agents\r\n self.update_agent_dependencies()\r\n\r\n\r\n def update_agent_dependencies(self):\r\n \"\"\"Update dependencies for all registered agents\"\"\"\r\n for agent_id, agent in self.agents.items():\r\n if hasattr(agent, 'set_dependencies'):\r\n agent.set_dependencies(\r\n ai_service=self.ai_service,\r\n document_handler=self.document_handler,\r\n lucydom_interface=self.lucydom_interface\r\n )\r\n\r\n def register_agent(self, agent: 'BaseAgent'):\r\n \"\"\"\r\n Register an agent in the registry.\r\n \r\n Args:\r\n agent: The agent to register\r\n \"\"\"\r\n agent_type = agent.type\r\n agent_id = getattr(agent, 'id', agent_type)\r\n \r\n # Initialize enhanced agents with dependencies\r\n if hasattr(agent, 'set_dependencies'):\r\n agent.set_dependencies(\r\n ai_service=self.ai_service,\r\n document_handler=self.document_handler,\r\n lucydom_interface=self.lucydom_interface\r\n )\r\n \r\n self.agents[agent_type] = agent\r\n # Also register by ID if it's different from type\r\n if agent_id != agent_type:\r\n self.agents[agent_id] = agent\r\n \r\n logger.debug(f\"Agent '{agent.name}' (Type: {agent_type}, ID: {agent_id}) registered\")\r\n \r\n def get_agent(self, agent_identifier: str) -> Optional[BaseAgent]:\r\n \"\"\"\r\n Get an agent instance by ID or type.\r\n \r\n Args:\r\n agent_identifier: ID or type of the desired agent\r\n \r\n Returns:\r\n Agent instance or None if not found\r\n \"\"\"\r\n # Try to find directly by type\r\n if agent_identifier in self.agents:\r\n return self.agents[agent_identifier]\r\n \r\n # If not found, try different name variants\r\n variants = [\r\n agent_identifier,\r\n agent_identifier.replace('_agent', ''),\r\n f\"{agent_identifier}_agent\"\r\n ]\r\n \r\n for variant in variants:\r\n if variant in self.agents:\r\n return self.agents[variant]\r\n \r\n logger.warning(f\"Agent with identifier '{agent_identifier}' not found\")\r\n return None\r\n \r\n def get_all_agents(self) -> Dict[str, BaseAgent]:\r\n \"\"\"Get all registered agents.\"\"\"\r\n return self.agents\r\n \r\n def get_agent_infos(self) -> List[Dict[str, Any]]:\r\n \"\"\"Get information about all registered agents.\"\"\"\r\n agent_infos = []\r\n # Only once per agent instance (since we register both by type and ID)\r\n seen_agents = set()\r\n for agent in self.agents.values():\r\n if agent not in seen_agents:\r\n agent_infos.append(agent.get_agent_info())\r\n seen_agents.add(agent)\r\n return agent_infos\r\n \r\n def get_agent_by_format(self, required_format: str) -> Optional[BaseAgent]:\r\n \"\"\"\r\n Find an agent that can produce the required output format.\r\n \r\n Args:\r\n required_format: The required output format\r\n \r\n Returns:\r\n Agent that can produce the required format, or None if not found\r\n \"\"\"\r\n # Create mapping of result format -> agent for faster lookup\r\n format_to_agent = {}\r\n seen_agents = set()\r\n \r\n for agent in self.agents.values():\r\n if agent not in seen_agents:\r\n # Get the agent's result format\r\n agent_format = getattr(agent, 'result_format', None)\r\n if agent_format:\r\n format_to_agent[agent_format.lower()] = agent\r\n seen_agents.add(agent)\r\n \r\n # Try to find an exact match\r\n if required_format.lower() in format_to_agent:\r\n return format_to_agent[required_format.lower()]\r\n \r\n # If no exact match, try to find a partial match\r\n for fmt, agent in format_to_agent.items():\r\n if required_format.lower() in fmt or fmt in required_format.lower():\r\n return agent\r\n \r\n # No match found\r\n return None\r\n \r\n def initialize_agents_for_workflow(self) -> Dict[str, Dict[str, Any]]:\r\n \"\"\"Initialize agents for a workflow.\"\"\"\r\n initialized_agents = {}\r\n seen_agents = set()\r\n for agent in self.agents.values():\r\n if agent not in seen_agents:\r\n agent_info = agent.get_agent_info()\r\n agent_id = agent_info[\"id\"]\r\n initialized_agents[agent_id] = agent_info\r\n seen_agents.add(agent)\r\n return initialized_agents\r\n \r\n def get_agent_capabilities(self) -> Dict[str, List[str]]:\r\n \"\"\"\r\n Get a mapping of capabilities to agents.\r\n Useful for finding the right agent for a specific task.\r\n \r\n Returns:\r\n Dict mapping capability keywords to agent IDs\r\n \"\"\"\r\n capabilities_map = {}\r\n seen_agents = set()\r\n \r\n for agent in self.agents.values():\r\n if agent not in seen_agents:\r\n # Get agent info\r\n agent_id = getattr(agent, 'id', agent.type)\r\n \r\n # Extract capabilities - check for get_capabilities method first\r\n if hasattr(agent, 'get_capabilities') and callable(getattr(agent, 'get_capabilities')):\r\n capabilities = agent.get_capabilities()\r\n else:\r\n # Fall back to string parsing\r\n capabilities_str = getattr(agent, 'capabilities', \"\")\r\n capabilities = [kw.strip().lower() for kw in capabilities_str.split(',') if kw.strip()]\r\n \r\n # Add each capability to the mapping\r\n for capability in capabilities:\r\n if capability not in capabilities_map:\r\n capabilities_map[capability] = []\r\n if agent_id not in capabilities_map[capability]:\r\n capabilities_map[capability].append(agent_id)\r\n \r\n seen_agents.add(agent)\r\n \r\n return capabilities_map\r\n \r\n def get_agent_by_capability(self, capability: str) -> Optional['BaseAgent']:\r\n \"\"\"\r\n Find an agent with a specific capability.\r\n \r\n Args:\r\n capability: The required capability\r\n \r\n Returns:\r\n Agent with the required capability, or None if not found\r\n \"\"\"\r\n # Create mapping of capabilities for faster lookup\r\n capability_map = self.get_agent_capabilities()\r\n \r\n # Look for the capability (case-insensitive)\r\n capability = capability.lower()\r\n matching_agents = []\r\n \r\n # Direct match\r\n if capability in capability_map:\r\n matching_agents = capability_map[capability]\r\n else:\r\n # Partial matches\r\n for cap, agents in capability_map.items():\r\n if capability in cap or cap in capability:\r\n matching_agents.extend(agents)\r\n \r\n # Return the first matching agent\r\n if matching_agents:\r\n agent_id = matching_agents[0]\r\n return self.get_agent(agent_id)\r\n \r\n return None ",
"is_extracted": true,
"extraction_context": null
}
]
},
{
"id": "doc_a43d65b7-5751-4e61-ad70-1b5d90aacdcc",
"source": {
"type": "file",
"id": "file_ddbea7c2-f816-43f4-8485-3c56e17ae601",
"name": "agentservice_utils.py",
"content_type": "text/x-python",
"size": 29021,
"upload_date": "2025-04-16T01:48:45.241114"
},
"contents": [
{
"type": "text",
"text": "\"\"\"\r\nCentralized utility functions for the Agentservice (continued).\r\n\"\"\"\r\n\r\nimport os\r\nimport logging\r\nimport json\r\nimport uuid\r\nfrom datetime import datetime\r\nfrom typing import List, Dict, Any, Optional, Tuple, Union, Callable\r\nfrom io import BytesIO\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\nclass WorkflowUtils:\r\n \"\"\"\r\n Utility class for workflow operations.\r\n Centralizes common workflow-related functions.\r\n \"\"\"\r\n \r\n def __init__(self, workflow_id: str = None):\r\n \"\"\"Initialize with optional workflow ID\"\"\"\r\n self.workflow_id = workflow_id\r\n \r\n def set_workflow_id(self, workflow_id: str):\r\n \"\"\"Set or update the workflow ID\"\"\"\r\n self.workflow_id = workflow_id\r\n \r\n def get_documents(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Get all documents from a workflow across all messages.\r\n \r\n Args:\r\n workflow: The workflow object\r\n \r\n Returns:\r\n List of document objects\r\n \"\"\"\r\n documents = []\r\n \r\n # Process all messages\r\n for message in workflow.get(\"messages\", []):\r\n # Extract documents from the message\r\n for doc in message.get(\"documents\", []):\r\n # Add to list if not already present\r\n if not any(d.get(\"id\") == doc.get(\"id\") for d in documents):\r\n documents.append(doc)\r\n \r\n return documents\r\n \r\n def get_files(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Get all file references from a workflow.\r\n \r\n Args:\r\n workflow: The workflow object\r\n \r\n Returns:\r\n List of file metadata objects\r\n \"\"\"\r\n files = []\r\n \r\n # Process all messages\r\n for message in workflow.get(\"messages\", []):\r\n # Extract documents from the message\r\n for doc in message.get(\"documents\", []):\r\n source = doc.get(\"source\", {})\r\n \r\n # Only include file documents\r\n if source.get(\"type\") == \"file\":\r\n file_info = {\r\n \"id\": source.get(\"id\", \"\"),\r\n \"name\": source.get(\"name\", \"\"),\r\n \"type\": source.get(\"content_type\", \"\"),\r\n \"content_type\": source.get(\"content_type\", \"\"),\r\n \"size\": source.get(\"size\", 0)\r\n }\r\n \r\n # Check if file is already in the list\r\n if not any(f.get(\"id\") == file_info[\"id\"] for f in files):\r\n files.append(file_info)\r\n \r\n return files\r\n \r\n def extract_by_prompt(self, workflow: Dict[str, Any], prompt: str, ai_service) -> Dict[str, Any]:\r\n \"\"\"\r\n Extract data from workflow documents based on an AI prompt.\r\n \r\n Args:\r\n workflow: The workflow object\r\n prompt: The extraction prompt\r\n ai_service: The AI service to use for extraction\r\n \r\n Returns:\r\n Extracted data\r\n \"\"\"\r\n # This is an async method but we're exposing it as a regular method\r\n # The caller should use it with asyncio.run() or await\r\n async def _extract():\r\n # Create extraction prompt\r\n files = self.get_files(workflow)\r\n file_descriptions = \"\\n\".join([f\"- {f.get('name', 'unnamed')} ({f.get('type', 'unknown')})\" for f in files])\r\n \r\n extraction_prompt = f\"\"\"\r\n Extract relevant information from the following files based on this request:\r\n \r\n REQUEST: {prompt}\r\n \r\n FILES:\r\n {file_descriptions}\r\n \r\n Focus on the most relevant content and provide a structured output.\r\n \"\"\"\r\n \r\n # Call AI\r\n response = await ai_service.call_api([{\"role\": \"user\", \"content\": extraction_prompt}])\r\n \r\n return {\r\n \"prompt\": prompt,\r\n \"extracted_content\": response,\r\n \"files_processed\": len(files)\r\n }\r\n \r\n # Return the coroutine\r\n return _extract()\r\n \r\n def merge_workflows(self, workflows: List[Dict[str, Any]]) -> Dict[str, Any]:\r\n \"\"\"\r\n Merge multiple workflows into a single unified workflow.\r\n Useful for workflow templates or combining partial workflows.\r\n \r\n Args:\r\n workflows: List of workflow objects to merge\r\n \r\n Returns:\r\n Merged workflow\r\n \"\"\"\r\n if not workflows:\r\n return {}\r\n \r\n # Start with the first workflow\r\n result = workflows[0].copy()\r\n \r\n # Initialize lists if not present\r\n if \"messages\" not in result:\r\n result[\"messages\"] = []\r\n if \"logs\" not in result:\r\n result[\"logs\"] = []\r\n \r\n # Merge additional workflows\r\n for workflow in workflows[1:]:\r\n # Append messages\r\n for message in workflow.get(\"messages\", []):\r\n # Check for duplicates\r\n if not any(m.get(\"id\") == message.get(\"id\") for m in result[\"messages\"]):\r\n result[\"messages\"].append(message)\r\n \r\n # Append logs\r\n for log in workflow.get(\"logs\", []):\r\n # Check for duplicates\r\n if not any(l.get(\"id\") == log.get(\"id\") for l in result[\"logs\"]):\r\n result[\"logs\"].append(log)\r\n \r\n # Update status if needed\r\n if workflow.get(\"status\") == \"failed\":\r\n result[\"status\"] = \"failed\"\r\n \r\n # Update last_activity if newer\r\n if (workflow.get(\"last_activity\") and \r\n (not result.get(\"last_activity\") or \r\n workflow[\"last_activity\"] > result[\"last_activity\"])):\r\n result[\"last_activity\"] = workflow[\"last_activity\"]\r\n \r\n return result\r\n \r\n def get_message(self, workflow: Dict[str, Any], message_id: str) -> Optional[Dict[str, Any]]:\r\n \"\"\"\r\n Find a message by ID in the workflow.\r\n \r\n Args:\r\n workflow: The workflow object\r\n message_id: The message ID to find\r\n \r\n Returns:\r\n Message object or None if not found\r\n \"\"\"\r\n for message in workflow.get(\"messages\", []):\r\n if message.get(\"id\") == message_id:\r\n return message\r\n return None\r\n \r\n def to_str(self, workflow: Dict[str, Any]) -> str:\r\n \"\"\"\r\n Convert workflow to a formatted string representation.\r\n \r\n Args:\r\n workflow: The workflow object\r\n \r\n Returns:\r\n String representation of the workflow\r\n \"\"\"\r\n # Create a summary string\r\n result = f\"Workflow: {workflow.get('id')}\\n\"\r\n result += f\"Status: {workflow.get('status', 'unknown')}\\n\"\r\n result += f\"Started: {workflow.get('started_at', 'unknown')}\\n\"\r\n result += f\"Last Activity: {workflow.get('last_activity', 'unknown')}\\n\"\r\n \r\n # Add message count\r\n message_count = len(workflow.get(\"messages\", []))\r\n result += f\"Messages: {message_count}\\n\"\r\n \r\n # Add log count\r\n log_count = len(workflow.get(\"logs\", []))\r\n result += f\"Logs: {log_count}\\n\"\r\n \r\n return result\r\n\r\n\r\nclass MessageUtils:\r\n \"\"\"\r\n Utility class for message operations.\r\n Centralizes common message-related functions.\r\n \"\"\"\r\n \r\n def create_message(self, workflow_id: str, role: str = \"system\") -> Dict[str, Any]:\r\n \"\"\"\r\n Create a new message object.\r\n \r\n Args:\r\n workflow_id: ID of the workflow\r\n role: Role of the message ('system', 'user', 'assistant')\r\n \r\n Returns:\r\n New message object\r\n \"\"\"\r\n message_id = f\"msg_{uuid.uuid4()}\"\r\n current_time = datetime.now().isoformat()\r\n \r\n # Create message object\r\n message = {\r\n \"id\": message_id,\r\n \"workflow_id\": workflow_id,\r\n \"parent_message_id\": None,\r\n \"started_at\": current_time,\r\n \"finished_at\": None,\r\n \"sequence_no\": 0,\r\n \r\n \"status\": \"pending\",\r\n \"role\": role,\r\n \r\n \"data_stats\": {\r\n \"processing_time\": 0.0,\r\n \"token_count\": 0,\r\n \"bytes_sent\": 0,\r\n \"bytes_received\": 0\r\n },\r\n \r\n \"documents\": [],\r\n \"content\": None,\r\n \"agent_type\": None\r\n }\r\n \r\n return message\r\n \r\n def finalize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:\r\n \"\"\"\r\n Finalize a message by setting completion timestamp.\r\n \r\n Args:\r\n message: The message object\r\n \r\n Returns:\r\n Updated message object\r\n \"\"\"\r\n message[\"finished_at\"] = datetime.now().isoformat()\r\n message[\"status\"] = \"completed\"\r\n return message\r\n \r\n def get_documents(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Get all documents from a message.\r\n \r\n Args:\r\n message: The message object\r\n \r\n Returns:\r\n List of document objects\r\n \"\"\"\r\n return message.get(\"documents\", [])\r\n \r\n def get_files(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Get all file references from a message.\r\n \r\n Args:\r\n message: The message object\r\n \r\n Returns:\r\n List of file metadata objects\r\n \"\"\"\r\n files = []\r\n \r\n # Extract documents from the message\r\n for doc in message.get(\"documents\", []):\r\n source = doc.get(\"source\", {})\r\n \r\n # Only include file documents\r\n if source.get(\"type\") == \"file\":\r\n file_info = {\r\n \"id\": source.get(\"id\", \"\"),\r\n \"name\": source.get(\"name\", \"\"),\r\n \"type\": source.get(\"content_type\", \"\"),\r\n \"content_type\": source.get(\"content_type\", \"\"),\r\n \"size\": source.get(\"size\", 0)\r\n }\r\n \r\n files.append(file_info)\r\n \r\n return files\r\n \r\n def extract_text_content(self, message: Dict[str, Any]) -> str:\r\n \"\"\"\r\n Extract text content from a message including document content.\r\n \r\n Args:\r\n message: The message object\r\n \r\n Returns:\r\n String with all text content from the message\r\n \"\"\"\r\n content = message.get(\"content\", \"\")\r\n \r\n # Add document content\r\n for doc in message.get(\"documents\", []):\r\n # Check for document contents\r\n for doc_content in doc.get(\"contents\", []):\r\n if doc_content.get(\"type\") == \"text\":\r\n content += \"\\n\\n\" + doc_content.get(\"text\", \"\")\r\n \r\n return content\r\n \r\n def to_str(self, message: Dict[str, Any]) -> str:\r\n \"\"\"\r\n Convert message to a formatted string representation.\r\n \r\n Args:\r\n message: The message object\r\n \r\n Returns:\r\n String representation of the message\r\n \"\"\"\r\n # Create a summary string\r\n result = f\"Message: {message.get('id')}\\n\"\r\n result += f\"Role: {message.get('role', 'unknown')}\\n\"\r\n \r\n # Add agent info if available\r\n if message.get(\"agent_type\"):\r\n result += f\"Agent: {message.get('agent_name', message.get('agent_type', 'unknown'))}\\n\"\r\n \r\n # Add content summary\r\n content = message.get(\"content\", \"\")\r\n if content:\r\n content_preview = content[:100] + \"...\" if len(content) > 100 else content\r\n result += f\"Content: {content_preview}\\n\"\r\n \r\n # Add document count\r\n doc_count = len(message.get(\"documents\", []))\r\n result += f\"Documents: {doc_count}\\n\"\r\n \r\n return result\r\n\r\n\r\nclass FileUtils:\r\n \"\"\"\r\n Utility class for file operations.\r\n Centralizes common file-related functions.\r\n \"\"\"\r\n \r\n def is_text_extractable(self, file_name: str, content_type: str = None) -> bool:\r\n \"\"\"\r\n Check if text can be extracted from a file.\r\n \r\n Args:\r\n file_name: Name of the file\r\n content_type: MIME type (optional)\r\n \r\n Returns:\r\n True if text can be extracted, False otherwise\r\n \"\"\"\r\n # Text files\r\n if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):\r\n return True\r\n \r\n # Excel files\r\n if file_name.endswith(('.xlsx', '.xls')):\r\n try:\r\n import pandas\r\n return True\r\n except ImportError:\r\n return False\r\n \r\n # PDF files\r\n if file_name.endswith('.pdf'):\r\n try:\r\n # Check if PyPDF2 or PyMuPDF is available\r\n try:\r\n import PyPDF2\r\n return True\r\n except ImportError:\r\n try:\r\n import fitz # PyMuPDF\r\n return True\r\n except ImportError:\r\n return False\r\n except:\r\n return False\r\n \r\n # Images and other non-text files\r\n if file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg',\r\n '.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv',\r\n '.mp3', '.wav', '.ogg', '.flac', '.aac')):\r\n return False\r\n \r\n # Check content type if file extension doesn't give a clear answer\r\n if content_type:\r\n if content_type.startswith(('text/', 'application/json', 'application/xml')):\r\n return True\r\n elif content_type == 'application/pdf':\r\n return True\r\n elif content_type.startswith(('image/', 'video/', 'audio/')):\r\n return False\r\n \r\n # Default to allowing extraction attempt\r\n return True\r\n \r\n def get_mime_type(self, file_name: str) -> str:\r\n \"\"\"\r\n Get MIME type based on file name.\r\n \r\n Args:\r\n file_name: Name of the file\r\n \r\n Returns:\r\n MIME type string\r\n \"\"\"\r\n import mimetypes\r\n \r\n # Initialize mimetypes\r\n mimetypes.init()\r\n \r\n # Get MIME type\r\n mime_type, _ = mimetypes.guess_type(file_name)\r\n \r\n if not mime_type:\r\n # Default mappings for common extensions\r\n extension_map = {\r\n 'txt': 'text/plain',\r\n 'md': 'text/markdown',\r\n 'json': 'application/json',\r\n 'csv': 'text/csv',\r\n 'html': 'text/html',\r\n 'htm': 'text/html',\r\n 'pdf': 'application/pdf',\r\n 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',\r\n 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',\r\n 'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',\r\n 'jpg': 'image/jpeg',\r\n 'jpeg': 'image/jpeg',\r\n 'png': 'image/png',\r\n 'gif': 'image/gif',\r\n 'svg': 'image/svg+xml',\r\n 'webp': 'image/webp',\r\n 'mp4': 'video/mp4',\r\n 'mp3': 'audio/mpeg'\r\n }\r\n \r\n # Get extension\r\n ext = os.path.splitext(file_name)[1].lower().lstrip('.')\r\n \r\n # Return mapped MIME type or default\r\n mime_type = extension_map.get(ext, 'application/octet-stream')\r\n \r\n return mime_type\r\n\r\n\r\nclass LoggingUtils:\r\n \"\"\"\r\n Enhanced logging utilities for better workflow tracking.\r\n Provides structured and categorized logging for workflows.\r\n \"\"\"\r\n \r\n def __init__(self, workflow_id: str = None, log_func: Callable = None):\r\n \"\"\"\r\n Initialize logging utilities.\r\n \r\n Args:\r\n workflow_id: ID of the workflow for context\r\n log_func: Function to call for adding workflow logs\r\n \"\"\"\r\n self.workflow_id = workflow_id\r\n self.log_func = log_func\r\n self.logger = logging.getLogger(__name__)\r\n \r\n # Define log categories\r\n self.categories = {\r\n \"workflow\": \"Workflow Management\",\r\n \"planning\": \"Activity Planning\",\r\n \"execution\": \"Activity Execution\",\r\n \"agents\": \"Agent Selection & Execution\",\r\n \"files\": \"File Processing\",\r\n \"summary\": \"Results Summary\",\r\n \"error\": \"Error Handling\",\r\n \"code\": \"Code Execution\", \r\n }\r\n \r\n def set_workflow_id(self, workflow_id: str):\r\n \"\"\"Update the workflow ID\"\"\"\r\n self.workflow_id = workflow_id\r\n \r\n def set_log_func(self, log_func: Callable):\r\n \"\"\"Update the log function\"\"\"\r\n self.log_func = log_func\r\n \r\n def info(self, message: str, category: str = \"workflow\", details: str = None):\r\n \"\"\"\r\n Log an informational message.\r\n \r\n Args:\r\n message: The log message\r\n category: Log category\r\n details: Optional detailed information\r\n \"\"\"\r\n category_name = self.categories.get(category, category)\r\n log_message = f\"[{category_name}] {message}\"\r\n \r\n # Log to standard logger\r\n self.logger.info(log_message)\r\n \r\n # Log to workflow if function available\r\n if self.log_func and self.workflow_id:\r\n self.log_func(self.workflow_id, message, \"info\", category, category_name)\r\n \r\n def warning(self, message: str, category: str = \"workflow\", details: str = None):\r\n \"\"\"\r\n Log a warning message.\r\n \r\n Args:\r\n message: The log message\r\n category: Log category\r\n details: Optional detailed information\r\n \"\"\"\r\n category_name = self.categories.get(category, category)\r\n log_message = f\"[{category_name}] {message}\"\r\n \r\n # Log to standard logger\r\n self.logger.warning(log_message)\r\n \r\n # Log to workflow if function available\r\n if self.log_func and self.workflow_id:\r\n self.log_func(self.workflow_id, message, \"warning\", category, category_name)\r\n \r\n def error(self, message: str, category: str = \"error\", details: str = None):\r\n \"\"\"\r\n Log an error message.\r\n \r\n Args:\r\n message: The log message\r\n category: Log category\r\n details: Optional detailed information\r\n \"\"\"\r\n category_name = self.categories.get(category, category)\r\n log_message = f\"[{category_name}] {message}\"\r\n \r\n # Log to standard logger\r\n self.logger.error(log_message)\r\n \r\n # Log to workflow if function available\r\n if self.log_func and self.workflow_id:\r\n self.log_func(self.workflow_id, message, \"error\", category, category_name)\r\n \r\n def debug(self, message: str, category: str = \"workflow\", details: str = None):\r\n \"\"\"\r\n Log a debug message.\r\n \r\n Args:\r\n message: The log message\r\n category: Log category\r\n details: Optional detailed information\r\n \"\"\"\r\n category_name = self.categories.get(category, category)\r\n log_message = f\"[{category_name}] {message}\"\r\n \r\n # Log to standard logger\r\n self.logger.debug(log_message)\r\n \r\n def get_category_name(self, category: str) -> str:\r\n \"\"\"\r\n Get human-readable category name.\r\n \r\n Args:\r\n category: Category code\r\n \r\n Returns:\r\n Human-readable category name\r\n \"\"\"\r\n return self.categories.get(category, category)\r\n\r\n\r\ndef extract_text_from_file_content(file_content: bytes, file_name: str, content_type: str = None) -> Tuple[str, bool]:\r\n \"\"\"\r\n Extract text from various file formats based on binary content.\r\n \r\n Args:\r\n file_content: Binary content of the file\r\n file_name: Name of the file for format detection\r\n content_type: Optional MIME type of the file\r\n \r\n Returns:\r\n Tuple with (extracted text, is_extracted flag)\r\n \"\"\"\r\n # Check if file is likely text-extractable\r\n if not is_text_extractable(file_name, content_type):\r\n return f\"[File: {file_name} - Text extraction not supported]\", False\r\n \r\n try:\r\n # Simple text files\r\n if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv', '.log', '.ini', '.cfg', '.conf')) or (content_type and (content_type.startswith('text/') or content_type in ['application/json', 'application/xml', 'text/csv'])):\r\n try:\r\n return file_content.decode('utf-8'), True\r\n except UnicodeDecodeError:\r\n try:\r\n return file_content.decode('latin1'), True\r\n except:\r\n return file_content.decode('cp1252', errors='replace'), True\r\n \r\n # Excel files\r\n elif file_name.endswith(('.xlsx', '.xls')):\r\n try:\r\n import pandas as pd\r\n # Create temporary in-memory file\r\n file_obj = BytesIO(file_content)\r\n df = pd.read_excel(file_obj)\r\n result = f\"Excel file with {len(df)} rows and {len(df.columns)} columns.\\n\"\r\n result += f\"Columns: {', '.join(df.columns.tolist())}\\n\\n\"\r\n result += df.to_string(index=False)\r\n return result, True\r\n except ImportError:\r\n return f\"[Excel file: {file_name} - pandas not installed]\", False\r\n except Exception as e:\r\n return f\"[Error extracting Excel content: {str(e)}]\", False\r\n \r\n # CSV files\r\n elif file_name.endswith('.csv'):\r\n try:\r\n import pandas as pd\r\n try:\r\n # Create temporary in-memory file\r\n file_obj = BytesIO(file_content)\r\n df = pd.read_csv(file_obj, encoding='utf-8')\r\n except UnicodeDecodeError:\r\n file_obj = BytesIO(file_content)\r\n try:\r\n df = pd.read_csv(file_obj, encoding='latin1')\r\n except:\r\n file_obj = BytesIO(file_content)\r\n df = pd.read_csv(file_obj, encoding='cp1252')\r\n \r\n result = f\"CSV file with {len(df)} rows and {len(df.columns)} columns.\\n\"\r\n result += f\"Columns: {', '.join(df.columns.tolist())}\\n\\n\"\r\n result += df.to_string(index=False)\r\n return result, True\r\n except ImportError:\r\n return f\"[CSV file: {file_name} - pandas not installed]\", False\r\n except Exception as e:\r\n return f\"[Error extracting CSV content: {str(e)}]\", False\r\n \r\n # PDF files\r\n elif file_name.endswith('.pdf'):\r\n try:\r\n try:\r\n from PyPDF2 import PdfReader\r\n reader = PdfReader(BytesIO(file_content))\r\n text = \"\"\r\n for page in reader.pages:\r\n text += page.extract_text() + \"\\n\\n\"\r\n return text, True\r\n except ImportError:\r\n try:\r\n import fitz # PyMuPDF\r\n doc = fitz.open(stream=file_content, filetype=\"pdf\")\r\n text = \"\"\r\n for page in doc:\r\n text += page.get_text() + \"\\n\\n\"\r\n return text, True\r\n except ImportError:\r\n return f\"[PDF: {file_name} - No PDF library installed]\", False\r\n except Exception as e:\r\n return f\"[Error reading PDF file {file_name}: {str(e)}]\", False\r\n \r\n # Default case - try basic text extraction\r\n else:\r\n try:\r\n return file_content.decode('utf-8', errors='replace'), True\r\n except Exception as e:\r\n logger.error(f\"Error extracting text from {file_name}: {str(e)}\")\r\n return f\"[Text extraction error: {str(e)}]\", False\r\n \r\n except Exception as e:\r\n logger.error(f\"Error extracting text from {file_name}: {str(e)}\")\r\n return f\"[Text extraction error: {str(e)}]\", False\r\n \r\n\r\ndef is_text_extractable(file_name: str, content_type: str = None) -> bool:\r\n \"\"\"Check if text can be extracted from a file.\"\"\"\r\n # Text files\r\n if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):\r\n return True\r\n \r\n # Excel files\r\n if file_name.endswith(('.xlsx', '.xls')):\r\n try:\r\n import pandas\r\n return True\r\n except ImportError:\r\n return False\r\n \r\n # PDF files\r\n if file_name.endswith('.pdf'):\r\n try:\r\n # Check if PyPDF2 or PyMuPDF is available\r\n try:\r\n import PyPDF2\r\n return True\r\n except ImportError:\r\n try:\r\n import fitz # PyMuPDF\r\n return True\r\n except ImportError:\r\n return False\r\n except:\r\n return False\r\n \r\n # Images and other non-text files\r\n if file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg',\r\n '.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv',\r\n '.mp3', '.wav', '.ogg', '.flac', '.aac')):\r\n return False\r\n \r\n # Check content type if file extension doesn't give a clear answer\r\n if content_type:\r\n if content_type.startswith(('text/', 'application/json', 'application/xml')):\r\n return True\r\n elif content_type == 'application/pdf':\r\n return True\r\n elif content_type.startswith(('image/', 'video/', 'audio/')):\r\n return False\r\n \r\n # Default to allowing extraction attempt\r\n return True \r\n\r\nasync def async_check_for_poweron(message_content: str, ai_service = None):\r\n \"\"\"\r\n Check if message contains 'poweron' keyword and generate a special response if needed.\r\n \r\n Args:\r\n message_content: The message content to check\r\n ai_service: Optional AI service for generating contextual responses\r\n \r\n Returns:\r\n PowerOn response text or None if not a PowerOn request\r\n \"\"\"\r\n if not message_content or not isinstance(message_content, str):\r\n return None\r\n \r\n if \"poweron\" in message_content.lower():\r\n # Generate a contextual response about the user's question\r\n if ai_service:\r\n try:\r\n # Use AI to generate a brief contextual response\r\n prompt = f\"\"\"\r\n Tell to the user in the language of his prompt a big big thank you, that he thinks for you being PowerOn. Tell him, how pleased you are, to be part of the PowerOn family, working to support humans for a better life.\r\n \r\n Then generate a short answer (1-2 sentences) to this question: {message_content}\r\n \"\"\"\r\n contextual_response = await ai_service.call_api([{\"role\": \"user\", \"content\": prompt}])\r\n contextual_response = contextual_response.strip()\r\n except Exception as e:\r\n # Fallback if AI generation fails\r\n logger.warning(f\"Error generating contextual response: {str(e)}\")\r\n contextual_response = \"Thank you for your question about PowerOn.\"\r\n else:\r\n contextual_response = \"Thank you for your question about PowerOn.\"\r\n \r\n # Add the standard PowerOn message\r\n poweron_message = contextual_response\r\n return poweron_message\r\n \r\n return None\r\n\r\n",
"is_extracted": true,
"extraction_context": null
}
]
},
{
"id": "doc_011c52df-b9b1-42bc-bc16-0746a88c86b0",
"source": {
"type": "file",
"id": "file_1c48af0c-7471-47b9-a9ee-e96856088113",
"name": "agentservice_workflow_execution.py",
"content_type": "text/x-python",
"size": 30461,
"upload_date": "2025-04-16T01:48:45.242110"
},
"contents": [
{
"type": "text",
"text": "\"\"\"\r\nRefactored architecture for the Agentservice multi-agent system.\r\nThis module defines the revised workflow execution with improved agent handovers.\r\n\"\"\"\r\n\r\nimport os\r\nimport logging\r\nimport asyncio\r\nimport uuid\r\nfrom datetime import datetime\r\nfrom typing import List, Dict, Any, Optional, Tuple, Union\r\n\r\nlogger = logging.getLogger(__name__)\r\nlogging.getLogger('matplotlib.font_manager').setLevel(logging.INFO)\r\n\r\nclass WorkflowExecution:\r\n \"\"\"\r\n Handles the execution of workflows with improved agent collaboration.\r\n Integrates planning and execution phases for better context awareness.\r\n \"\"\"\r\n \r\n def __init__(self, workflow_manager, workflow_id: str, mandate_id: int, user_id: int, ai_service, lucydom_interface):\r\n \"\"\"Initialize the workflow execution\"\"\"\r\n self.workflow_manager = workflow_manager\r\n self.workflow_id = workflow_id\r\n self.mandate_id = mandate_id\r\n self.user_id = user_id\r\n self.ai_service = ai_service\r\n self.lucydom_interface = lucydom_interface\r\n \r\n # Import necessary modules\r\n from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils\r\n from modules.agentservice_registry import AgentRegistry\r\n from modules.agentservice_filemanager import get_workflow_file_manager\r\n \r\n # Initialize utilities\r\n self.workflow_utils = WorkflowUtils(workflow_id)\r\n self.message_utils = MessageUtils()\r\n self.logging_utils = LoggingUtils(workflow_id, self._add_log)\r\n \r\n # Initialize agent registry\r\n self.agent_registry = AgentRegistry.get_instance()\r\n # Set dependencies for agents\r\n\r\n # Initialize file manager\r\n self.file_manager = get_workflow_file_manager(workflow_id, lucydom_interface)\r\n\r\n # Import and initialize document handler\r\n from modules.agentservice_document_handler import get_document_handler\r\n self.document_handler = get_document_handler(workflow_id, lucydom_interface, ai_service)\r\n \r\n self.agent_registry.set_dependencies(\r\n ai_service=ai_service,\r\n document_handler=self.document_handler,\r\n lucydom_interface=lucydom_interface\r\n )\r\n \r\n async def execute(self, message: Dict[str, Any], workflow: Dict[str, Any], files: List[Dict[str, Any]] = None, is_user_input: bool = False):\r\n \"\"\"\r\n Execute the workflow with integrated planning and agent selection.\r\n \r\n Args:\r\n message: The initiating message (prompt or user input)\r\n workflow: The workflow object\r\n files: Optional list of file metadata\r\n is_user_input: Flag indicating if this is user input\r\n \r\n Returns:\r\n Dict with workflow status and result\r\n \"\"\"\r\n try:\r\n # 1. Initialize workflow logging\r\n self.logging_utils.info(\"Starting workflow execution\", \"workflow\", \"Workflow initialized\")\r\n \r\n # 2. Process user message and files\r\n user_message = await self._process_user_message(workflow, message, files)\r\n self.logging_utils.info(\"User message processed\", \"workflow\", \"User input added to workflow\")\r\n \r\n # 3. Create agent-aware work plan\r\n work_plan = await self._create_agent_aware_work_plan(workflow, user_message)\r\n self.logging_utils.info(f\"Created agent-aware work plan with {len(work_plan)} activities\", \"planning\")\r\n self.logging_utils.debug(f\"{work_plan}.\", \"planning\")\r\n \r\n # 4. Execute the activities in the work plan\r\n results = await self._execute_work_plan(workflow, work_plan)\r\n \r\n # 5. Create summary\r\n summary = await self._create_summary(workflow, results)\r\n self.logging_utils.info(\"Created workflow summary\", \"summary\")\r\n \r\n # Set workflow status to completed\r\n workflow[\"status\"] = \"completed\"\r\n workflow[\"last_activity\"] = datetime.now().isoformat()\r\n \r\n # Final save\r\n self.workflow_manager._save_workflow(workflow)\r\n \r\n return {\r\n \"workflow_id\": self.workflow_id,\r\n \"status\": \"completed\",\r\n \"messages\": workflow.get(\"messages\", [])\r\n }\r\n \r\n except Exception as e:\r\n self.logging_utils.error(f\"Workflow execution failed: {str(e)}\", \"error\")\r\n workflow[\"status\"] = \"failed\"\r\n self.workflow_manager._save_workflow(workflow)\r\n \r\n return {\r\n \"workflow_id\": self.workflow_id,\r\n \"status\": \"failed\",\r\n \"error\": str(e)\r\n }\r\n \r\n async def _process_user_message(self, workflow: Dict[str, Any], message: Dict[str, Any], files: List[Dict[str, Any]] = None) -> Dict[str, Any]:\r\n \"\"\"\r\n Process the user message and add it to the workflow.\r\n \r\n Args:\r\n workflow: The workflow object\r\n message: The user message\r\n files: Optional list of file metadata\r\n \r\n Returns:\r\n The processed user message\r\n \"\"\"\r\n # Create a message with user input\r\n user_message = self._create_message(workflow, message.get(\"role\", \"user\"))\r\n user_message[\"content\"] = message.get(\"content\", \"\")\r\n \r\n # Process files if provided\r\n if files and len(files) > 0:\r\n self.logging_utils.info(f\"Processing {len(files)} files\", \"files\")\r\n \r\n # Add files to message via file manager instead of _process_files\r\n user_message = await self.file_manager.add_files_to_message(\r\n user_message, \r\n [f.get('id') for f in files],\r\n self._add_log\r\n )\r\n \r\n # Add the message to the workflow\r\n if \"messages\" not in workflow:\r\n workflow[\"messages\"] = []\r\n workflow[\"messages\"].append(user_message)\r\n \r\n # Save workflow state\r\n self.workflow_manager._save_workflow(workflow)\r\n \r\n return user_message\r\n\r\n async def _create_agent_aware_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Create an agent-aware work plan that integrates agent selection during planning.\r\n \r\n Args:\r\n workflow: The workflow object\r\n message: The initiating message\r\n \r\n Returns:\r\n List of structured activities with agent assignments\r\n \"\"\"\r\n # Extract context information\r\n task = message.get(\"content\", \"\")\r\n\r\n\r\n # Check for PowerOn keyword\r\n from modules.agentservice_utils import async_check_for_poweron\r\n poweron_response = await async_check_for_poweron(task, self.ai_service)\r\n if poweron_response:\r\n # Create a simple work plan with a custom PowerOn agent\r\n self.logging_utils.info(\"PowerOn keyword detected, creating special response plan\", \"planning\")\r\n return [{\r\n \"title\": \"PowerOn Response\",\r\n \"description\": \"Generate special PowerOn response\",\r\n \"assigned_agents\": [\"poweron\"],\r\n \"agent_prompts\": [poweron_response], # Use the generated response as the prompt\r\n \"document_requirements\": \"\",\r\n \"expected_output\": \"Text\",\r\n \"dependencies\": []\r\n }]\r\n # If not PowerOn, continue with normal workflow planning...\r\n \r\n\r\n # Get all available agents and their capabilities\r\n agent_infos = self.agent_registry.get_agent_infos()\r\n \r\n # Extract documents\r\n documents = message.get(\"documents\", [])\r\n document_info = []\r\n for doc in documents:\r\n source = doc.get(\"source\", {})\r\n document_info.append({\r\n \"id\": doc.get(\"id\"),\r\n \"name\": source.get(\"name\", \"unnamed\"),\r\n \"type\": source.get(\"type\", \"unknown\"),\r\n \"content_type\": source.get(\"content_type\", \"unknown\")\r\n })\r\n \r\n # Create the planning prompt with agent awareness and document handling information\r\n plan_prompt = f\"\"\"\r\nAs an AI workflow manager, create a detailed agent-aware work plan for the following task:\r\n\r\nTASK: {task}\r\n\r\nAVAILABLE AGENTS:\r\n{self._format_agent_info(agent_infos)}\r\n\r\nAVAILABLE DOCUMENTS:\r\n{document_info if document_info else \"No documents provided\"}\r\n\r\nIMPORTANT: Document extraction happens automatically in the workflow. Documents in the message are already available to all agents. DO NOT assign agent_coder or any other agent specifically for just reading or extracting document content. Only assign agents for tasks that require specific processing beyond what the document handler already provides.\r\n\r\nThe work plan should include a structured list of activities. Each activity should have:\r\n1. title - A short descriptive title for the activity\r\n2. description - What needs to be done in this activity\r\n3. assigned_agents - List of agent IDs that should handle this activity (can be multiple in sequence)\r\n4. agent_prompts - Specific instructions for each agent (matched by index to assigned_agents)\r\n5. document_requirements - Description of which documents are needed for this activity (these will be automatically extracted)\r\n6. expected_output - The expected output format and content\r\n7. dependencies - List of previous activities this depends on (by index)\r\n\r\nIMPORTANT GUIDELINES:\r\n- Each activity should have clear objectives and be assigned to the most appropriate agent(s)\r\n- When multiple agents are assigned to an activity, specify the sequence and how outputs should flow between them\r\n- Documents are processed on-demand by the system's document handler, so only specify which documents are needed, not how to extract them\r\n- DO NOT create activities that only read or extract document content - this happens automatically\r\n- Create a logical sequence where later activities can use outputs from earlier ones\r\n- If no specialized agent is needed for a task, use the default \"assistant\" agent\r\n- Only use the agent_coder for tasks that require actual coding or complex data analysis, not for simply reading documents\r\n\r\nReturn the work plan as a JSON array of activity objects, each with the above properties.\r\n\"\"\"\r\n \r\n self.logging_utils.info(\"Creating agent-aware work plan\", \"planning\")\r\n \r\n # Call AI to generate work plan\r\n try:\r\n plan_response = await self.ai_service.call_api([{\"role\": \"user\", \"content\": plan_prompt}])\r\n \r\n # Extract JSON plan\r\n import json\r\n import re\r\n \r\n # Look for JSON array in the response\r\n json_pattern = r'\\[\\s*\\{.*\\}\\s*\\]'\r\n json_match = re.search(json_pattern, plan_response, re.DOTALL)\r\n \r\n if json_match:\r\n json_str = json_match.group(0)\r\n work_plan = json.loads(json_str)\r\n self.logging_utils.info(f\"Work plan created with {len(work_plan)} activities\", \"planning\")\r\n return work_plan\r\n else:\r\n self.logging_utils.warning(\"Could not extract JSON from AI response\", \"planning\")\r\n \r\n # Fallback: Create a simple default work plan\r\n return [{\r\n \"title\": \"Process Task\",\r\n \"description\": \"Process the user's request directly\",\r\n \"assigned_agents\": [\"assistant\"],\r\n \"agent_prompts\": [task],\r\n \"document_requirements\": \"All available documents may be needed\",\r\n \"expected_output\": \"Text\",\r\n \"dependencies\": []\r\n }]\r\n \r\n except Exception as e:\r\n self.logging_utils.error(f\"Error creating work plan: {str(e)}\", \"planning\")\r\n # Return a minimal fallback plan\r\n return [{\r\n \"title\": \"Process Task (Error Recovery)\",\r\n \"description\": \"Process the user's request after planning error\",\r\n \"assigned_agents\": [\"assistant\"],\r\n \"agent_prompts\": [task],\r\n \"document_requirements\": \"All available documents may be needed\",\r\n \"expected_output\": \"Text\",\r\n \"dependencies\": []\r\n }]\r\n \r\n async def _execute_work_plan(self, workflow: Dict[str, Any], work_plan: List[Dict[str, Any]]) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n Execute all activities in the work plan with proper agent handovers.\r\n \r\n Args:\r\n workflow: The workflow object\r\n work_plan: The work plan with activities\r\n \r\n Returns:\r\n Results from all activities\r\n \"\"\"\r\n results = []\r\n activity_outputs = {} # Store outputs for dependency resolution\r\n \r\n for activity_index, activity in enumerate(work_plan):\r\n # Extract activity info\r\n title = activity.get(\"title\", f\"Activity {activity_index+1}\")\r\n description = activity.get(\"description\", \"\")\r\n assigned_agents = activity.get(\"assigned_agents\", [\"assistant\"])\r\n agent_prompts = activity.get(\"agent_prompts\", [description])\r\n doc_requirements = activity.get(\"document_requirements\", \"\")\r\n expected_output = activity.get(\"expected_output\", \"Text\")\r\n dependencies = activity.get(\"dependencies\", [])\r\n \r\n self.logging_utils.info(f\"Starting activity: {title}\", \"execution\")\r\n \r\n # Validate assigned_agents and agent_prompts\r\n if len(assigned_agents) > len(agent_prompts):\r\n # Duplicate the last prompt for additional agents\r\n agent_prompts.extend([agent_prompts[-1]] * (len(assigned_agents) - len(agent_prompts)))\r\n elif len(agent_prompts) > len(assigned_agents):\r\n # Truncate excess prompts\r\n agent_prompts = agent_prompts[:len(assigned_agents)]\r\n \r\n # Process dependencies first\r\n dependency_context = {}\r\n for dep_index in dependencies:\r\n if dep_index < activity_index and dep_index in activity_outputs:\r\n dep_output = activity_outputs[dep_index]\r\n dependency_context[f\"activity_{dep_index+1}\"] = dep_output\r\n \r\n # Extract required documents if needed\r\n document_content = \"\"\r\n if doc_requirements:\r\n extracted_data = await self._extract_required_documents(workflow, doc_requirements)\r\n if extracted_data and \"extracted_content\" in extracted_data:\r\n # Format document content for the prompt\r\n document_content = \"\\n\\n=== EXTRACTED DOCUMENT CONTENT ===\\n\\n\"\r\n for item in extracted_data.get(\"extracted_content\", []):\r\n doc_name = item.get(\"name\", \"Unnamed document\")\r\n doc_content = item.get(\"content\", \"No content available\")\r\n document_content += f\"--- {doc_name} ---\\n{doc_content}\\n\\n\"\r\n \r\n # Execute the activity with the assigned agents\r\n activity_result = await self._execute_agent_sequence(\r\n workflow,\r\n assigned_agents,\r\n agent_prompts,\r\n document_content,\r\n dependency_context,\r\n expected_output\r\n )\r\n \r\n # Store the result\r\n activity_outputs[activity_index] = activity_result\r\n results.append({\r\n \"title\": title,\r\n \"description\": description,\r\n \"agents\": assigned_agents,\r\n \"result\": activity_result.get(\"content\", \"\"),\r\n \"output_format\": activity_result.get(\"format\", \"Text\")\r\n })\r\n \r\n self.logging_utils.info(f\"Completed activity: {title}\", \"execution\")\r\n \r\n # Save intermediate state\r\n self.workflow_manager._save_workflow(workflow)\r\n \r\n return results\r\n \r\n async def _execute_agent_sequence(\r\n self, \r\n workflow: Dict[str, Any],\r\n agent_ids: List[str],\r\n prompts: List[str],\r\n document_content: str,\r\n dependency_context: Dict[str, Any],\r\n expected_output: str\r\n ) -> Dict[str, Any]:\r\n \"\"\"\r\n Execute a sequence of agents with proper handovers.\r\n \r\n Args:\r\n workflow: The workflow object\r\n agent_ids: List of agent IDs to execute in sequence\r\n prompts: List of prompts for each agent\r\n document_content: Extracted document content\r\n dependency_context: Context from dependent activities\r\n expected_output: Expected output format\r\n \r\n Returns:\r\n Result of the agent sequence execution\r\n \"\"\"\r\n\r\n # Check if this is a PowerOn activity (which would be passed from execute_work_plan)\r\n if agent_ids and agent_ids[0] == \"poweron\" and prompts and prompts[0]:\r\n # This is a PowerOn response - create message directly\r\n response_message = self._create_message(workflow, \"assistant\")\r\n response_message[\"content\"] = prompts[0] # Use the pre-generated response\r\n response_message[\"agent_type\"] = \"poweron\"\r\n response_message[\"agent_id\"] = \"poweron\"\r\n response_message[\"agent_name\"] = \"PowerOn Assistant\"\r\n response_message[\"finished_at\"] = datetime.now().isoformat()\r\n response_message[\"status\"] = \"completed\"\r\n \r\n # Add to workflow\r\n workflow[\"messages\"].append(response_message)\r\n \r\n return {\r\n \"content\": response_message[\"content\"],\r\n \"format\": \"Text\",\r\n \"agent_id\": \"poweron\"\r\n }\r\n \r\n # Normal agent execution code...\r\n\r\n context = {\r\n \"workflow_id\": self.workflow_id,\r\n \"expected_format\": expected_output,\r\n \"dependency_outputs\": dependency_context\r\n }\r\n \r\n last_result = None\r\n last_documents = []\r\n \r\n for i, agent_id in enumerate(agent_ids):\r\n # Get the agent\r\n agent = self.agent_registry.get_agent(agent_id)\r\n if agent:\r\n # Ensure dependencies are set\r\n if hasattr(agent, 'set_dependencies'):\r\n agent.set_dependencies(\r\n ai_service=self.ai_service,\r\n document_handler=self.document_handler,\r\n lucydom_interface=self.lucydom_interface\r\n )\r\n \r\n # Set document handler if agent supports it\r\n if hasattr(agent, 'set_document_handler') and hasattr(self, 'document_handler'):\r\n agent.set_document_handler(self.document_handler)\r\n\r\n \r\n if not agent:\r\n self.logging_utils.warning(f\"Agent '{agent_id}' not found, using assistant instead\", \"agents\")\r\n agent = self.agent_registry.get_agent(\"assistant\")\r\n if not agent:\r\n # If assistant not found, create a minimal agent response\r\n continue\r\n \r\n # Get the agent prompt\r\n base_prompt = prompts[i] if i < len(prompts) else prompts[-1]\r\n \r\n # Enhance the prompt with context\r\n enhanced_prompt = self._enhance_prompt(\r\n base_prompt, \r\n document_content, \r\n dependency_context, \r\n last_result.get(\"content\", \"\") if last_result else \"\",\r\n i > 0 # is_continuation flag\r\n )\r\n \r\n # Create the message for this agent\r\n agent_message = self._create_message(workflow, \"user\")\r\n agent_message[\"content\"] = enhanced_prompt\r\n \r\n # IMPORTANT FIX: Document handling logic\r\n # First, check if we have documents from previous agent if this is a continuation\r\n if last_documents and i > 0:\r\n agent_message[\"documents\"] = last_documents\r\n # For the first agent, make sure we pass any documents from the most recent user message \r\n elif i == 0:\r\n # Find the most recent user message with documents\r\n for msg in reversed(workflow.get(\"messages\", [])):\r\n if msg.get(\"role\") == \"user\" and msg.get(\"documents\"):\r\n agent_message[\"documents\"] = msg.get(\"documents\", [])\r\n self.logging_utils.info(f\"Passing {len(agent_message['documents'])} documents from user message to {agent_id}\", \"agents\")\r\n break\r\n \r\n # Log agent execution\r\n self.logging_utils.info(f\"Executing agent: {agent_id}\", \"agents\")\r\n \r\n # Execute the agent\r\n agent_response = await agent.process_message(agent_message, context)\r\n \r\n # Create response message\r\n response_message = self._create_message(workflow, \"assistant\")\r\n response_message[\"content\"] = agent_response.get(\"content\", \"\")\r\n response_message[\"agent_type\"] = agent_id\r\n response_message[\"agent_id\"] = agent_id\r\n response_message[\"agent_name\"] = agent.name\r\n response_message[\"result_format\"] = agent_response.get(\"result_format\", expected_output)\r\n \r\n # Capture documents from response\r\n if \"documents\" in agent_response:\r\n response_message[\"documents\"] = agent_response[\"documents\"]\r\n last_documents = agent_response[\"documents\"]\r\n self.logging_utils.info(f\"Agent {agent_id} produced {len(last_documents)} documents\", \"agents\")\r\n \r\n # Add to workflow\r\n workflow[\"messages\"].append(response_message)\r\n \r\n # Update last result\r\n last_result = {\r\n \"content\": agent_response.get(\"content\", \"\"),\r\n \"format\": agent_response.get(\"result_format\", expected_output),\r\n \"agent_id\": agent_id,\r\n \"documents\": agent_response.get(\"documents\", [])\r\n }\r\n \r\n return last_result or {\r\n \"content\": \"No agent response was generated.\",\r\n \"format\": \"Text\"\r\n }\r\n \r\n \r\n async def _extract_required_documents(self, workflow: Dict[str, Any], doc_requirements: str) -> Dict[str, Any]:\r\n \"\"\"\r\n Extract required documents based on requirements description.\r\n \r\n Args:\r\n workflow: The workflow object\r\n doc_requirements: Description of document requirements\r\n \r\n Returns:\r\n Extracted document data\r\n \"\"\"\r\n # Import for data extraction\r\n from modules.agentservice_dataextraction import data_extraction\r\n \r\n # Get all files from the workflow\r\n files = self.workflow_utils.get_files(workflow)\r\n \r\n # Get all messages from the workflow\r\n workflow_messages = workflow.get(\"messages\", [])\r\n \r\n # Extract data using the dataextraction module\r\n extracted_data = await data_extraction(\r\n prompt=doc_requirements,\r\n files=files,\r\n messages=workflow_messages,\r\n ai_service=self.ai_service,\r\n lucydom_interface=self.lucydom_interface,\r\n workflow_id=self.workflow_id,\r\n add_log_func=self._add_log\r\n )\r\n \r\n return extracted_data\r\n \r\n async def _create_summary(self, workflow: Dict[str, Any], results: List[Dict[str, Any]]) -> Dict[str, Any]:\r\n \"\"\"\r\n Create a summary of the workflow results for the user.\r\n \r\n Args:\r\n workflow: The workflow object\r\n results: Results from activity executions\r\n \r\n Returns:\r\n Summary message\r\n \"\"\"\r\n # Create a summary prompt\r\n summary_prompt = \"Create a clear, concise summary of the following workflow results:\\n\\n\"\r\n \r\n for i, result in enumerate(results, 1):\r\n title = result.get(\"title\", f\"Activity {i}\")\r\n description = result.get(\"description\", \"\")\r\n content = result.get(\"result\", \"\")\r\n agents = \", \".join(result.get(\"agents\", [\"unknown\"]))\r\n \r\n # Limit content length for the summary prompt\r\n content_preview = content[:500] + \"...\" if len(content) > 500 else content\r\n \r\n summary_prompt += f\"\"\"\r\n ACTIVITY {i}: {title}\r\n Description: {description}\r\n Executed by: {agents}\r\n \r\n {content_preview}\r\n \r\n ---\r\n \"\"\"\r\n \r\n summary_prompt += \"\"\"\r\n Provide a well-structured summary that:\r\n 1. Highlights the key findings and results\r\n 2. Connects the results to the original task\r\n 3. Presents any conclusions or recommendations\r\n \r\n Make sure the summary is clear, concise, and useful to the user.\r\n \"\"\"\r\n \r\n # Call AI to generate summary\r\n summary_content = await self.ai_service.call_api([{\"role\": \"user\", \"content\": summary_prompt}])\r\n \r\n # Create summary message\r\n summary_message = self._create_message(workflow, \"assistant\")\r\n summary_message[\"content\"] = summary_content\r\n summary_message[\"agent_type\"] = \"summary\"\r\n summary_message[\"agent_id\"] = \"workflow_summary\"\r\n summary_message[\"agent_name\"] = \"Workflow Summary\"\r\n summary_message[\"result_format\"] = \"Text\"\r\n summary_message[\"workflow_complete\"] = True\r\n \r\n # Add to workflow\r\n workflow[\"messages\"].append(summary_message)\r\n \r\n return summary_message\r\n \r\n def _create_message(self, workflow: Dict[str, Any], role: str) -> Dict[str, Any]:\r\n \"\"\"Create a new message object for the workflow\"\"\"\r\n message_id = f\"msg_{uuid.uuid4()}\"\r\n current_time = datetime.now().isoformat()\r\n \r\n # Determine sequence number\r\n sequence_no = 1\r\n if \"messages\" in workflow and workflow[\"messages\"]:\r\n sequence_no = len(workflow[\"messages\"]) + 1\r\n \r\n # Create message object\r\n message = {\r\n \"id\": message_id,\r\n \"workflow_id\": self.workflow_id,\r\n \"parent_message_id\": None,\r\n \"started_at\": current_time,\r\n \"finished_at\": None,\r\n \"sequence_no\": sequence_no,\r\n \r\n \"status\": \"pending\",\r\n \"role\": role,\r\n \r\n \"data_stats\": {\r\n \"processing_time\": 0.0,\r\n \"token_count\": 0,\r\n \"bytes_sent\": 0,\r\n \"bytes_received\": 0\r\n },\r\n \r\n \"documents\": [],\r\n \"content\": None,\r\n \"agent_type\": None\r\n }\r\n \r\n return message\r\n \r\n def _add_log(self, workflow_id: str, message: str, log_type: str, agent_id: str = None, agent_name: str = None):\r\n \"\"\"Add a log entry to the workflow\"\"\"\r\n # This calls back to the workflow manager's log function\r\n self.workflow_manager._add_log(workflow_id, message, log_type, agent_id, agent_name)\r\n \r\n def _format_agent_info(self, agent_infos: List[Dict[str, Any]]) -> str:\r\n \"\"\"Format agent information for the planning prompt\"\"\"\r\n formatted_info = \"\"\r\n for agent in agent_infos:\r\n formatted_info += f\"\"\"\r\n - ID: {agent.get('id', 'unknown')}\r\n Name: {agent.get('name', '')}\r\n Type: {agent.get('type', '')}\r\n Description: {agent.get('description', '')}\r\n Capabilities: {agent.get('capabilities', '')}\r\n Result Format: {agent.get('result_format', 'Text')}\r\n \"\"\"\r\n return formatted_info\r\n \r\n def _enhance_prompt(\r\n self, \r\n base_prompt: str, \r\n document_content: str, \r\n dependency_context: Dict[str, Any],\r\n previous_result: str,\r\n is_continuation: bool\r\n ) -> str:\r\n \"\"\"\r\n Enhance a prompt with context information.\r\n \r\n Args:\r\n base_prompt: The original prompt\r\n document_content: Extracted document content\r\n dependency_context: Context from dependent activities\r\n previous_result: Result from previous agent in sequence\r\n is_continuation: Flag indicating if this is a continuation\r\n \r\n Returns:\r\n Enhanced prompt\r\n \"\"\"\r\n enhanced_prompt = base_prompt\r\n \r\n # Add continuation context if this is a continuation\r\n if is_continuation and previous_result:\r\n enhanced_prompt = f\"\"\"\r\n{enhanced_prompt}\r\n\r\n=== PREVIOUS AGENT OUTPUT ===\r\n{previous_result}\r\n\"\"\"\r\n \r\n # Add document content if available\r\n if document_content:\r\n enhanced_prompt += f\"\\n\\n{document_content}\"\r\n \r\n # Add dependency context if available\r\n if dependency_context:\r\n dependency_section = \"\\n\\n=== OUTPUTS FROM PREVIOUS ACTIVITIES ===\\n\\n\"\r\n for name, value in dependency_context.items():\r\n if isinstance(value, dict) and \"content\" in value:\r\n # Extract content if it's in the standard format\r\n dependency_section += f\"--- {name} ---\\n{value['content']}\\n\\n\"\r\n else:\r\n # Use the value directly\r\n dependency_section += f\"--- {name} ---\\n{str(value)}\\n\\n\"\r\n \r\n enhanced_prompt += dependency_section\r\n \r\n return enhanced_prompt",
"is_extracted": true,
"extraction_context": null
}
]
},
{
"id": "doc_44fbc9d7-8ecb-40ed-98b6-ba8657ba2b07",
"source": {
"type": "file",
"id": "file_0d0fc1da-f9d2-4b36-a2fd-dd054c2582c1",
"name": "agentservice_workflow_manager.py",
"content_type": "text/x-python",
"size": 29007,
"upload_date": "2025-04-16T01:48:45.242110"
},
"contents": [
{
"type": "text",
"text": "\"\"\"\r\nRefactored WorkflowManager class for the Agentservice (continued).\r\n\"\"\"\r\n\r\nimport os\r\nimport logging\r\nimport asyncio\r\nimport uuid\r\nfrom datetime import datetime\r\nfrom typing import List, Dict, Any, Optional, Tuple, Union\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\nclass WorkflowManager:\r\n\r\n def __init__(self, mandate_id: int = None, user_id: int = None, ai_service = None, lucydom_interface = None):\r\n \"\"\"Initialize the WorkflowManager.\"\"\"\r\n self.mandate_id = mandate_id\r\n self.user_id = user_id\r\n self.ai_service = ai_service\r\n self.lucydom_interface = lucydom_interface\r\n \r\n # Cache for workflows\r\n self.workflows = {}\r\n \r\n # Directory for results\r\n self.results_dir = os.path.join(\"results\", \"workflows\")\r\n os.makedirs(self.results_dir, exist_ok=True)\r\n \r\n # Initialize document handler\r\n from modules.agentservice_document_handler import get_document_handler\r\n self.document_handler = get_document_handler(\r\n lucydom_interface=lucydom_interface,\r\n ai_service=ai_service\r\n )\r\n \r\n # Initialize agent registry with dependencies\r\n from modules.agentservice_registry import AgentRegistry\r\n registry = AgentRegistry.get_instance()\r\n registry.set_dependencies(\r\n ai_service=ai_service,\r\n document_handler=self.document_handler,\r\n lucydom_interface=lucydom_interface\r\n )\r\n\r\n async def list_workflows(self, mandate_id: int = None, user_id: int = None) -> List[Dict[str, Any]]:\r\n \"\"\"\r\n List all available workflows.\r\n \r\n Args:\r\n mandate_id: Optional mandate ID for filtering\r\n user_id: Optional user ID for filtering\r\n \r\n Returns:\r\n List of workflow summaries\r\n \"\"\"\r\n workflows = []\r\n \r\n # Load from database if available\r\n if self.lucydom_interface:\r\n try:\r\n # Get all workflows for the user\r\n if user_id is not None:\r\n user_workflows = self.lucydom_interface.get_workflows_by_user(user_id)\r\n else:\r\n user_workflows = self.lucydom_interface.get_all_workflows()\r\n \r\n # Filter by mandate if specified\r\n if mandate_id is not None:\r\n user_workflows = [wf for wf in user_workflows if wf.get(\"mandate_id\") == mandate_id]\r\n \r\n # Create workflow summaries\r\n for workflow in user_workflows:\r\n summary = {\r\n \"id\": workflow.get(\"id\"),\r\n \"name\": workflow.get(\"name\", f\"Workflow {workflow.get('id')}\"),\r\n \"status\": workflow.get(\"status\"),\r\n \"started_at\": workflow.get(\"started_at\"),\r\n \"last_activity\": workflow.get(\"last_activity\"),\r\n \"completed_at\": workflow.get(\"completed_at\")\r\n }\r\n \r\n # Add message count if available\r\n messages = self.lucydom_interface.get_workflow_messages(workflow.get(\"id\"))\r\n if messages:\r\n summary[\"message_count\"] = len(messages)\r\n \r\n workflows.append(summary)\r\n \r\n logger.info(f\"Loaded {len(workflows)} workflows from database\")\r\n \r\n # Sort by last activity (newest first)\r\n return sorted(workflows, key=lambda w: w.get(\"last_activity\", \"\"), reverse=True)\r\n \r\n except Exception as e:\r\n logger.error(f\"Error retrieving workflows from database: {str(e)}\")\r\n \r\n # Load from files if no database or error occurred\r\n try:\r\n for filename in os.listdir(self.results_dir):\r\n if filename.startswith(\"workflow_\") and filename.endswith(\".json\"):\r\n workflow_path = os.path.join(self.results_dir, filename)\r\n \r\n try:\r\n import json\r\n with open(workflow_path, 'r', encoding='utf-8') as f:\r\n workflow = json.load(f)\r\n \r\n # Check if mandate and user ID match filters\r\n if mandate_id is not None and workflow.get(\"mandate_id\") != mandate_id:\r\n continue\r\n \r\n if user_id is not None and workflow.get(\"user_id\") != user_id:\r\n continue\r\n \r\n # Create workflow summary\r\n summary = {\r\n \"id\": workflow.get(\"id\"),\r\n \"name\": workflow.get(\"name\", f\"Workflow {workflow.get('id')}\"),\r\n \"status\": workflow.get(\"status\"),\r\n \"started_at\": workflow.get(\"started_at\"),\r\n \"last_activity\": workflow.get(\"last_activity\"),\r\n \"message_count\": len(workflow.get(\"messages\", []))\r\n }\r\n \r\n workflows.append(summary)\r\n except Exception as e:\r\n logger.error(f\"Error loading workflow file {filename}: {str(e)}\")\r\n \r\n logger.info(f\"Loaded {len(workflows)} workflows from files\")\r\n \r\n # Sort by last activity (newest first)\r\n return sorted(workflows, key=lambda w: w.get(\"last_activity\", \"\"), reverse=True)\r\n \r\n except Exception as e:\r\n logger.error(f\"Error listing workflows: {str(e)}\")\r\n return []\r\n\r\n async def execute_workflow(self, message: Dict[str, Any], files: List[Dict[str, Any]] = None, workflow_id: str = None, is_user_input: bool = False) -> Dict[str, Any]:\r\n \"\"\"\r\n Execute a workflow with the given message and files.\r\n \r\n Args:\r\n message: Input message (prompt)\r\n files: Optional list of file metadata\r\n workflow_id: Optional ID for continuing an existing workflow\r\n is_user_input: Flag indicating if this is user input to an existing workflow\r\n \r\n Returns:\r\n Workflow execution result\r\n \"\"\"\r\n\r\n # Use provided workflow_id or generate a new one for a new workflow\r\n if not workflow_id:\r\n workflow_id = f\"wf_{uuid.uuid4()}\"\r\n # Initialize a new workflow\r\n workflow = self._initialize_workflow(workflow_id)\r\n else:\r\n # Load existing workflow for continuation\r\n workflow = await self.load_workflow(workflow_id)\r\n if not workflow:\r\n # Fallback: initialize a new workflow with the provided ID\r\n workflow = self._initialize_workflow(workflow_id)\r\n \r\n # Capture start time\r\n start_time = datetime.now()\r\n \r\n try:\r\n # Create WorkflowExecution with document handler\r\n from modules.agentservice_workflow_execution import WorkflowExecution\r\n execution = WorkflowExecution(\r\n workflow_manager=self,\r\n workflow_id=workflow_id,\r\n mandate_id=self.mandate_id,\r\n user_id=self.user_id,\r\n ai_service=self.ai_service,\r\n lucydom_interface=self.lucydom_interface\r\n )\r\n \r\n # Set the document handler's workflow ID\r\n self.document_handler.set_workflow_id(workflow_id)\r\n \r\n # Execute the workflow\r\n result = await execution.execute(message, workflow, files, is_user_input)\r\n \r\n # Calculate duration\r\n duration = (datetime.now() - start_time).total_seconds()\r\n \r\n # Update workflow stats\r\n if \"data_stats\" not in workflow:\r\n workflow[\"data_stats\"] = {\r\n \"total_processing_time\": 0.0,\r\n \"total_token_count\": 0,\r\n \"total_bytes_sent\": 0,\r\n \"total_bytes_received\": 0\r\n }\r\n workflow[\"data_stats\"][\"total_processing_time\"] = duration\r\n workflow[\"completed_at\"] = datetime.now().isoformat()\r\n \r\n # Save final state\r\n self._save_workflow(workflow)\r\n \r\n return result\r\n \r\n except Exception as e:\r\n logger.error(f\"Error executing workflow: {str(e)}\", exc_info=True)\r\n \r\n # Update workflow status\r\n workflow[\"status\"] = \"failed\"\r\n workflow[\"last_activity\"] = datetime.now().isoformat()\r\n self._add_log(workflow, f\"Workflow execution failed: {str(e)}\", \"error\")\r\n \r\n # Save failed state\r\n self._save_workflow(workflow)\r\n \r\n return {\r\n \"workflow_id\": workflow_id,\r\n \"status\": \"failed\",\r\n \"error\": str(e)\r\n }\r\n\r\n def _save_workflow(self, workflow: Dict[str, Any]) -> bool:\r\n \"\"\"\r\n Save workflow state to database and/or file.\r\n Enhanced to handle structured documents.\r\n \r\n Args:\r\n workflow: The workflow object to save\r\n \r\n Returns:\r\n True if saved successfully, False otherwise\r\n \"\"\"\r\n try:\r\n workflow_id = workflow.get(\"id\")\r\n \r\n # Update in-memory cache\r\n self.workflows[workflow_id] = workflow\r\n \r\n # Update in database if available\r\n if self.lucydom_interface:\r\n # NEW: Enhanced document handling for database persistence\r\n # Create a copy of the workflow for database storage\r\n db_workflow = workflow.copy()\r\n \r\n # Process messages to ensure documents are properly formatted\r\n if \"messages\" in db_workflow:\r\n for i, message in enumerate(db_workflow[\"messages\"]):\r\n # ensure large document contents are truncated for database storage\r\n if \"documents\" in message:\r\n for j, doc in enumerate(message[\"documents\"]):\r\n if \"contents\" in doc:\r\n for k, content in enumerate(doc[\"contents\"]):\r\n if content.get(\"type\") == \"text\" and \"text\" in content:\r\n # limit text size for database storage\r\n text = content[\"text\"]\r\n if len(text) > 1000: # Reasonable size for preview\r\n db_workflow[\"messages\"][i][\"documents\"][j][\"contents\"][k][\"text\"] = \\\r\n text[:1000] + \"... [truncated for storage]\"\r\n \r\n # Save to database\r\n try:\r\n self.lucydom_interface.save_workflow_state(db_workflow)\r\n logger.info(f\"Workflow {workflow_id} saved to database\")\r\n except Exception as db_error:\r\n logger.error(f\"Error saving workflow to database: {str(db_error)}\")\r\n # Continue to file saving even if database fails\r\n \r\n # Save to file (always do this as backup)\r\n import json\r\n workflow_path = os.path.join(self.results_dir, f\"workflow_{workflow_id}.json\")\r\n \r\n with open(workflow_path, 'w', encoding='utf-8') as f:\r\n json.dump(workflow, f, indent=2, ensure_ascii=False)\r\n \r\n logger.info(f\"Workflow {workflow_id} saved to file: {workflow_path}\")\r\n return True\r\n \r\n except Exception as e:\r\n logger.error(f\"Error saving workflow state: {str(e)}\")\r\n return False\r\n\r\n async def load_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]:\r\n \"\"\"\r\n Load a workflow by ID.\r\n Enhanced to ensure document handler is properly configured.\r\n \r\n Args:\r\n workflow_id: ID of the workflow to load\r\n \r\n Returns:\r\n The workflow object or None if not found\r\n \"\"\"\r\n # Check memory cache first\r\n if workflow_id in self.workflows:\r\n workflow = self.workflows[workflow_id]\r\n \r\n # NEW: Configure document handler for this workflow\r\n self.document_handler.set_workflow_id(workflow_id)\r\n \r\n return workflow\r\n \r\n # Try to load from database\r\n if self.lucydom_interface:\r\n try:\r\n workflow = self.lucydom_interface.load_workflow_state(workflow_id)\r\n if workflow:\r\n # Cache in memory\r\n self.workflows[workflow_id] = workflow\r\n \r\n # NEW: Configure document handler for this workflow\r\n self.document_handler.set_workflow_id(workflow_id)\r\n \r\n logger.info(f\"Workflow {workflow_id} loaded from database\")\r\n return workflow\r\n except Exception as e:\r\n logger.error(f\"Error loading workflow from database: {str(e)}\")\r\n \r\n # Try to load from file\r\n workflow_path = os.path.join(self.results_dir, f\"workflow_{workflow_id}.json\")\r\n \r\n if os.path.exists(workflow_path):\r\n try:\r\n import json\r\n with open(workflow_path, 'r', encoding='utf-8') as f:\r\n workflow = json.load(f)\r\n \r\n # Cache in memory\r\n self.workflows[workflow_id] = workflow\r\n \r\n # NEW: Configure document handler for this workflow\r\n self.document_handler.set_workflow_id(workflow_id)\r\n \r\n logger.info(f\"Workflow {workflow_id} loaded from file: {workflow_path}\")\r\n return workflow\r\n except Exception as e:\r\n logger.error(f\"Error loading workflow from file: {str(e)}\")\r\n \r\n logger.warning(f\"Workflow {workflow_id} not found\")\r\n return None\r\n\r\n async def delete_workflow(self, workflow_id: str) -> bool:\r\n \"\"\"\r\n Delete a workflow.\r\n \r\n Args:\r\n workflow_id: ID of the workflow\r\n \r\n Returns:\r\n True on success, False if workflow not found\r\n \"\"\"\r\n # Remove from memory\r\n if workflow_id in self.workflows:\r\n del self.workflows[workflow_id]\r\n \r\n # Delete from database\r\n if self.lucydom_interface:\r\n try:\r\n db_success = self.lucydom_interface.delete_workflow(workflow_id)\r\n logger.info(f\"Workflow {workflow_id} deleted from database: {db_success}\")\r\n except Exception as e:\r\n logger.error(f\"Error deleting workflow {workflow_id} from database: {str(e)}\")\r\n \r\n # Delete file\r\n workflow_path = os.path.join(self.results_dir, f\"workflow_{workflow_id}.json\")\r\n \r\n try:\r\n if os.path.exists(workflow_path):\r\n os.remove(workflow_path)\r\n logger.info(f\"Workflow {workflow_id} deleted from file: {workflow_path}\")\r\n return True\r\n else:\r\n logger.warning(f\"Workflow {workflow_id} not found: {workflow_path}\")\r\n return False\r\n except Exception as e:\r\n logger.error(f\"Error deleting workflow file {workflow_id}: {str(e)}\")\r\n return False\r\n \r\n def _initialize_workflow(self, workflow_id: str) -> Dict[str, Any]:\r\n \"\"\"\r\n Initialize a new workflow.\r\n \r\n Args:\r\n workflow_id: ID of the workflow\r\n \r\n Returns:\r\n The initialized workflow object\r\n \"\"\"\r\n current_time = datetime.now().isoformat()\r\n \r\n # Create complete workflow object according to the data model\r\n workflow = {\r\n \"id\": workflow_id,\r\n \"name\": f\"Workflow {workflow_id}\",\r\n \"mandate_id\": self.mandate_id,\r\n \"user_id\": self.user_id,\r\n \"status\": \"running\",\r\n \"started_at\": current_time,\r\n \"last_activity\": current_time,\r\n \"current_round\": 1,\r\n \r\n # Complete statistics structure according to DataStats model\r\n \"data_stats\": {\r\n \"total_processing_time\": 0.0,\r\n \"total_token_count\": 0,\r\n \"total_bytes_sent\": 0,\r\n \"total_bytes_received\": 0\r\n },\r\n \r\n # Empty arrays for messages and logs\r\n \"messages\": [],\r\n \"logs\": []\r\n }\r\n \r\n # Log entry for workflow start\r\n self._add_log(workflow, \"Workflow started\", \"info\", \"workflow\", \"Workflow Management\")\r\n \r\n # Save workflow to database\r\n if self.lucydom_interface:\r\n try:\r\n # Direct save of the complete workflow object\r\n self.lucydom_interface.save_workflow_state(workflow)\r\n logger.info(f\"Workflow {workflow_id} created in database\")\r\n except Exception as e:\r\n logger.error(f\"Error creating workflow {workflow_id} in database: {str(e)}\")\r\n \r\n # Cache workflow in memory\r\n self.workflows[workflow_id] = workflow\r\n \r\n return workflow\r\n \r\n async def stop_workflow(self, workflow_id: str) -> bool:\r\n \"\"\"\r\n Stop a running workflow.\r\n \r\n Args:\r\n workflow_id: ID of the workflow to stop\r\n \r\n Returns:\r\n True on success, False if workflow not found or already stopped\r\n \"\"\"\r\n try:\r\n workflow = self.workflows.get(workflow_id)\r\n \r\n if not workflow:\r\n # Try to load the workflow\r\n workflow = await self.load_workflow(workflow_id)\r\n if not workflow:\r\n return False\r\n \r\n # If workflow is not running or completed, abort\r\n if workflow.get(\"status\") not in [\"running\", \"completed\"]:\r\n return False\r\n \r\n # Set status to stopped\r\n workflow[\"status\"] = \"stopped\"\r\n workflow[\"last_activity\"] = datetime.now().isoformat()\r\n \r\n self._add_log(workflow, \"Workflow was manually stopped\", \"info\", \"workflow\", \"Workflow Management\")\r\n \r\n # Save workflow\r\n self._save_workflow(workflow)\r\n \r\n return True\r\n except Exception as e:\r\n logger.error(f\"Error stopping workflow {workflow_id}: {str(e)}\")\r\n return False\r\n \r\n def _add_log(self, workflow: Dict[str, Any], message: str, log_type: str, agent_id: Optional[str] = None, agent_name: Optional[str] = None) -> None:\r\n \"\"\"Add a log entry to the workflow.\"\"\"\r\n # First, check if workflow is a string (ID) instead of dictionary\r\n if isinstance(workflow, str):\r\n # Try to load the workflow by ID\r\n workflow_id = workflow\r\n workflow = self.workflows.get(workflow_id)\r\n if not workflow:\r\n # Just log to the logger and return\r\n logger.info(f\"Log (couldn't add to workflow {workflow_id}): {log_type} - {message}\")\r\n return\r\n \r\n # Check if workflow is a dictionary\r\n if not isinstance(workflow, dict):\r\n logger.error(f\"Invalid workflow type: {type(workflow)}. Expected dictionary.\")\r\n # Just log to the logger and return\r\n logger.info(f\"Log (couldn't add to workflow): {log_type} - {message}\")\r\n return\r\n \r\n # Create log entry\r\n log_entry = {\r\n \"id\": f\"log_{uuid.uuid4()}\",\r\n \"message\": message,\r\n \"type\": log_type,\r\n \"timestamp\": datetime.now().isoformat(),\r\n \"agent_id\": agent_id,\r\n \"agent_name\": agent_name\r\n }\r\n \r\n # Add log entry to workflow\r\n if \"logs\" not in workflow:\r\n workflow[\"logs\"] = []\r\n \r\n workflow[\"logs\"].append(log_entry)\r\n \r\n # Update last activity\r\n workflow[\"last_activity\"] = log_entry[\"timestamp\"]\r\n \r\n # Save log entry to database if available\r\n if self.lucydom_interface:\r\n try:\r\n # Add workflow ID to log entry\r\n log_data = log_entry.copy()\r\n log_data[\"workflow_id\"] = workflow[\"id\"]\r\n \r\n self.lucydom_interface.create_workflow_log(log_data)\r\n logger.debug(f\"Log entry for workflow {workflow['id']} saved to database\")\r\n except Exception as e:\r\n logger.error(f\"Error saving log entry for workflow {workflow['id']} to database: {str(e)}\")\r\n \r\n # Also log to standard logger with the category prefix\r\n category_prefix = f\"[{agent_name or agent_id or 'Workflow'}]\" if agent_name or agent_id else \"\"\r\n log_message = f\"{category_prefix} {message}\"\r\n \r\n if log_type == \"error\":\r\n logger.error(log_message)\r\n elif log_type == \"warning\":\r\n logger.warning(log_message)\r\n else:\r\n logger.info(log_message)\r\n \r\n def get_workflow_status(self, workflow_id: str) -> Optional[Dict[str, Any]]:\r\n \"\"\"\r\n Get the status of a workflow.\r\n \r\n Args:\r\n workflow_id: ID of the workflow\r\n \r\n Returns:\r\n Dictionary with status information or None if workflow not found\r\n \"\"\"\r\n # Get from memory\r\n workflow = self.workflows.get(workflow_id)\r\n \r\n # If not in memory, load from database or file\r\n if not workflow:\r\n # Load from database if available\r\n if self.lucydom_interface:\r\n try:\r\n workflow_data = self.lucydom_interface.get_workflow(workflow_id)\r\n if workflow_data:\r\n workflow = workflow_data\r\n except Exception as e:\r\n logger.error(f\"Error loading workflow status from database: {str(e)}\")\r\n \r\n # If not in database, load from file\r\n if not workflow:\r\n try:\r\n import json\r\n workflow_path = os.path.join(self.results_dir, f\"workflow_{workflow_id}.json\")\r\n if os.path.exists(workflow_path):\r\n with open(workflow_path, 'r', encoding='utf-8') as f:\r\n workflow = json.load(f)\r\n except Exception as e:\r\n logger.error(f\"Error loading workflow status from file: {str(e)}\")\r\n return None\r\n \r\n if not workflow:\r\n return None\r\n \r\n # Extract status information\r\n status_info = {\r\n \"id\": workflow.get(\"id\"),\r\n \"name\": workflow.get(\"name\", f\"Workflow {workflow_id}\"),\r\n \"status\": workflow.get(\"status\"),\r\n \"progress\": 1.0 if workflow.get(\"status\") in [\"completed\", \"failed\", \"stopped\"] else 0.5,\r\n \"started_at\": workflow.get(\"started_at\"),\r\n \"last_activity\": workflow.get(\"last_activity\"),\r\n \"workflow_complete\": workflow.get(\"status\") == \"completed\",\r\n \"current_round\": workflow.get(\"current_round\", 1),\r\n \"data_stats\": workflow.get(\"data_stats\", {\r\n \"total_processing_time\": 0.0,\r\n \"total_token_count\": 0,\r\n \"total_bytes_sent\": 0,\r\n \"total_bytes_received\": 0\r\n })\r\n }\r\n \r\n return status_info\r\n \r\n def get_workflow_logs(self, workflow_id: str) -> Optional[List[Dict[str, Any]]]:\r\n \"\"\"\r\n Get logs for a workflow.\r\n \r\n Args:\r\n workflow_id: ID of the workflow\r\n \r\n Returns:\r\n List of logs or None if workflow not found\r\n \"\"\"\r\n # Get from memory\r\n workflow = self.workflows.get(workflow_id)\r\n \r\n # If not in memory, load from database\r\n if not workflow and self.lucydom_interface:\r\n try:\r\n logs = self.lucydom_interface.get_workflow_logs(workflow_id)\r\n return logs\r\n except Exception as e:\r\n logger.error(f\"Error loading workflow logs from database: {str(e)}\")\r\n \r\n # If not in database or no interface available, load from file\r\n if not workflow:\r\n try:\r\n import json\r\n workflow_path = os.path.join(self.results_dir, f\"workflow_{workflow_id}.json\")\r\n if os.path.exists(workflow_path):\r\n with open(workflow_path, 'r', encoding='utf-8') as f:\r\n workflow = json.load(f)\r\n except Exception as e:\r\n logger.error(f\"Error loading workflow logs from file: {str(e)}\")\r\n return None\r\n \r\n return workflow.get(\"logs\", []) if workflow else None\r\n \r\n def get_workflow_messages(self, workflow_id: str) -> Optional[List[Dict[str, Any]]]:\r\n \"\"\"\r\n Get messages for a workflow.\r\n \r\n Args:\r\n workflow_id: ID of the workflow\r\n \r\n Returns:\r\n List of messages or None if workflow not found\r\n \"\"\"\r\n # Get from memory\r\n workflow = self.workflows.get(workflow_id)\r\n \r\n # If not in memory, load from database\r\n if not workflow and self.lucydom_interface:\r\n try:\r\n messages = self.lucydom_interface.get_workflow_messages(workflow_id)\r\n return messages\r\n except Exception as e:\r\n logger.error(f\"Error loading workflow messages from database: {str(e)}\")\r\n \r\n # If not in database or no interface available, load from file\r\n if not workflow:\r\n try:\r\n import json\r\n workflow_path = os.path.join(self.results_dir, f\"workflow_{workflow_id}.json\")\r\n if os.path.exists(workflow_path):\r\n with open(workflow_path, 'r', encoding='utf-8') as f:\r\n workflow = json.load(f)\r\n except Exception as e:\r\n logger.error(f\"Error loading workflow messages from file: {str(e)}\")\r\n return None\r\n \r\n return workflow.get(\"messages\", []) if workflow else None\r\n\r\n# Factory function for WorkflowManager\r\ndef get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service = None, lucydom_interface = None):\r\n \"\"\"\r\n Get a WorkflowManager instance for the specified context.\r\n Reuses existing instances and updates dependencies.\r\n \r\n Args:\r\n mandate_id: Mandate ID\r\n user_id: User ID\r\n ai_service: AI service\r\n lucydom_interface: LucyDOM interface\r\n \r\n Returns:\r\n WorkflowManager instance\r\n \"\"\"\r\n from modules.lucydom_interface import get_lucydom_interface\r\n \r\n context_key = f\"{mandate_id}_{user_id}\"\r\n \r\n # Get LucyDOM interface if not provided\r\n if not lucydom_interface:\r\n lucydom_interface = get_lucydom_interface(mandate_id, user_id)\r\n \r\n if context_key not in _workflow_managers:\r\n _workflow_managers[context_key] = WorkflowManager(\r\n mandate_id, \r\n user_id, \r\n ai_service, \r\n lucydom_interface\r\n )\r\n \r\n # Update services if provided\r\n if ai_service is not None:\r\n _workflow_managers[context_key].ai_service = ai_service\r\n \r\n # NEW: Update document handler's AI service\r\n if hasattr(_workflow_managers[context_key], 'document_handler'):\r\n _workflow_managers[context_key].document_handler.set_ai_service(ai_service)\r\n \r\n # NEW: Update agent registry dependencies\r\n from modules.agentservice_registry import AgentRegistry\r\n registry = AgentRegistry.get_instance()\r\n registry.set_dependencies(ai_service=ai_service)\r\n \r\n return _workflow_managers[context_key]\r\n\r\n# Singleton factory for WorkflowManager instances per context\r\n_workflow_managers = {}",
"is_extracted": true,
"extraction_context": null
}
]
},
{
"id": "doc_8b9ba44e-f1e8-40e5-90d4-f051d652b352",
"source": {
"type": "file",
"id": "file_e210ccda-c09b-47a3-ab7e-72ac816525ad",
"name": "agentservice_agent_creative.py",
"content_type": "text/x-python",
"size": 13778,
"upload_date": "2025-04-16T01:48:45.242110"
},
"contents": [
{
"type": "text",
"text": "\"\"\"\r\nCreative Agent for knowledge-based answers and creative content generation.\r\nHandles open questions, documentation tasks, and special 'poweron' requests.\r\nBased on the refactored Core-Module.\r\n\"\"\"\r\n\r\nimport logging\r\nfrom typing import List, Dict, Any, Optional\r\nimport json\r\n\r\nfrom modules.agentservice_base import BaseAgent\r\nfrom modules.agentservice_utils import MessageUtils, LoggingUtils\r\nfrom modules.agentservice_protocol import AgentCommunicationProtocol\r\n\r\nlogger = logging.getLogger(__name__)\r\n\r\nclass CreativeAgent(BaseAgent):\r\n \"\"\"Agent for knowledge-based answers and creative content generation\"\"\"\r\n \r\n def __init__(self):\r\n \"\"\"Initialize the Creative Agent\"\"\"\r\n super().__init__()\r\n self.id = \"creative\"\r\n self.name = \"Creative Knowledge Assistant\"\r\n self.type = \"knowledge\"\r\n self.description = \"Provides knowledge-based answers, creates content, handles document processing, and responds to PowerOn requests\"\r\n \r\n # Extended capabilities to explicitly cover document processing\r\n self.capabilities = (\"knowledge_sharing,content_creation,document_generation,\"\r\n \"creative_writing,poweron,document_processing,\"\r\n \"information_extraction,data_transformation,\"\r\n \"document_analysis,text_processing,table_creation,\"\r\n \"visual_information_processing,content_structuring\")\r\n \r\n # Update result format to include tables\r\n self.result_format = \"Text,Document,Table\"\r\n \r\n # Add enhanced document capabilities\r\n self.supports_documents = True\r\n self.document_capabilities = [\"read\", \"create\", \"analyze\", \"extract\", \"transform\"]\r\n self.required_context = [\"workflow_id\"]\r\n self.document_handler = None\r\n\r\n # Initialize AI service\r\n self.ai_service = None\r\n\r\n # Initialize protocol\r\n self.protocol = AgentCommunicationProtocol()\r\n \r\n # Initialize utilities\r\n self.message_utils = MessageUtils()\r\n\r\n def get_agent_info(self) -> Dict[str, Any]:\r\n \"\"\"Get agent information for agent registry\"\"\"\r\n info = super().get_agent_info()\r\n info.update({\r\n \"metadata\": {\r\n \"specialties\": [\r\n \"creative_writing\", \r\n \"documentation\", \r\n \"knowledge\", \r\n \"poweron\",\r\n \"document_processing\",\r\n \"information_extraction\",\r\n \"content_transformation\",\r\n \"table_generation\",\r\n \"document_analysis\"\r\n ]\r\n }\r\n })\r\n return info\r\n\r\n def set_document_handler(self, document_handler):\r\n \"\"\"Set the document handler for file operations\"\"\"\r\n self.document_handler = document_handler\r\n \r\n async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:\r\n \"\"\"\r\n Process a message and generate a creative or knowledge-based response.\r\n Enhanced with improved document handling.\r\n \r\n Args:\r\n message: The message to process\r\n context: Additional context\r\n \r\n Returns:\r\n The generated response\r\n \"\"\"\r\n # Extract workflow_id from context or message\r\n workflow_id = context.get(\"workflow_id\") if context else message.get(\"workflow_id\", \"unknown\")\r\n \r\n # Get or create logging_utils\r\n log_func = context.get(\"log_func\") if context else None\r\n logging_utils = LoggingUtils(workflow_id, log_func)\r\n \r\n # Send status update using protocol\r\n if log_func:\r\n status_message = self.protocol.create_status_update_message(\r\n status_description=\"Starting to process request\",\r\n sender_id=self.id,\r\n status=\"in_progress\",\r\n progress=0.0,\r\n context_id=workflow_id\r\n )\r\n log_func(workflow_id, status_message.content, \"info\", self.id, self.name)\r\n \r\n # Create response structure\r\n response = {\r\n \"role\": \"assistant\",\r\n \"content\": \"\",\r\n \"agent_id\": self.id,\r\n \"agent_type\": self.type,\r\n \"agent_name\": self.name,\r\n \"result_format\": self.result_format,\r\n \"workflow_id\": workflow_id\r\n }\r\n \r\n try:\r\n # Get the prompt from the message\r\n prompt = message.get(\"content\", \"\")\r\n logging_utils.info(f\"Processing request: {prompt[:50]}...\", \"agents\")\r\n \r\n # Check if this is a PowerOn request\r\n if \"poweron\" in prompt.lower():\r\n logging_utils.info(f\"Detected PowerOn keyword, generating specialized response\", \"agents\")\r\n \r\n # Update progress - 30% for PowerOn detection\r\n if log_func:\r\n status_message = self.protocol.create_status_update_message(\r\n status_description=\"Generating PowerOn response\",\r\n sender_id=self.id,\r\n status=\"in_progress\",\r\n progress=0.3,\r\n context_id=workflow_id\r\n )\r\n log_func(workflow_id, status_message.content, \"info\", self.id, self.name)\r\n \r\n # Generate PowerOn response\r\n poweron_prompt = f\"\"\"\r\n Tell to the user in the language of their prompt a big big thank you, that they think for you being PowerOn. Tell them, how pleased you are, to be part of the PowerOn family, working to support humans for a better life.\r\n \r\n Then generate a short answer (1-2 sentences) to this question: {prompt}\r\n \"\"\"\r\n \r\n poweron_response = await self.ai_service.call_api([\r\n {\"role\": \"system\", \"content\": \"You are a helpful assistant that is part of the PowerOn family.\"},\r\n {\"role\": \"user\", \"content\": poweron_prompt}\r\n ])\r\n \r\n response[\"content\"] = poweron_response\r\n \r\n # Update progress - 100% for completion\r\n if log_func:\r\n status_message = self.protocol.create_status_update_message(\r\n status_description=\"PowerOn response complete\",\r\n sender_id=self.id,\r\n status=\"completed\",\r\n progress=1.0,\r\n context_id=workflow_id\r\n )\r\n log_func(workflow_id, status_message.content, \"info\", self.id, self.name)\r\n \r\n logging_utils.info(\"PowerOn response generated\", \"agents\")\r\n return response\r\n \r\n # Create appropriate system prompt based on context and request\r\n system_prompt = \"You are a helpful, creative assistant specializing in knowledge sharing, content creation, and document processing.\"\r\n\r\n \r\n # Check expected format from context if available\r\n expected_format = context.get(\"expected_format\", \"\").lower() if context else \"\"\r\n \r\n # Check for documents in the message\r\n has_documents = bool(message.get(\"documents\"))\r\n document_count = len(message.get(\"documents\", []))\r\n \r\n if has_documents:\r\n logging_utils.info(f\"Message contains {document_count} documents\", \"agents\")\r\n system_prompt += \"\"\"\r\nWhen asked to summarize information, always consider:\r\n1. All provided document content\r\n2. The entire conversation history in the current workflow\r\n3. Any structured data that has been shared\r\n\r\nFor summarization tasks specifically, make sure to analyze the complete context including previous messages in the conversation, not just the files or the current request. The conversation history is a critical data source that should be incorporated into any summary or overview.\r\n\"\"\"\r\n\r\n # If in a workflow context, add this\r\n if workflow_id and workflow_id != \"unknown\":\r\n system_prompt += \"\"\"\r\nYou are currently operating within a workflow where multiple messages may have been exchanged. \r\nWhen generating summaries or overviews, you must incorporate the content from previous messages \r\nin this workflow as they contain valuable context and information.\r\n\"\"\" \r\n # Process attached documents\r\n document_content = \"\"\r\n document_texts = []\r\n document_names = []\r\n \r\n if has_documents:\r\n logging_utils.info(\"Processing attached documents\", \"agents\")\r\n \r\n # Try using document handler first\r\n if self.document_handler:\r\n document_content = self.document_handler.merge_document_contents(message)\r\n logging_utils.info(\"Extracted document content using document handler\", \"agents\")\r\n \r\n # Collect information about each document\r\n for doc in message.get(\"documents\", []):\r\n source = doc.get(\"source\", {})\r\n doc_name = source.get(\"name\", \"Unnamed Document\")\r\n document_names.append(doc_name)\r\n \r\n # Extract text from document contents\r\n doc_text = \"\"\r\n for content in doc.get(\"contents\", []):\r\n if content.get(\"type\") == \"text\":\r\n doc_text = content.get(\"text\", \"\")\r\n document_texts.append(doc_text)\r\n break\r\n \r\n logging_utils.info(f\"Processed document: {doc_name}\", \"agents\")\r\n \r\n # Update progress - 40% for document processing\r\n if log_func:\r\n status_message = self.protocol.create_status_update_message(\r\n status_description=\"Processing document content\",\r\n sender_id=self.id,\r\n status=\"in_progress\",\r\n progress=0.4,\r\n context_id=workflow_id\r\n )\r\n log_func(workflow_id, status_message.content, \"info\", self.id, self.name)\r\n \r\n # Combine user prompt with document content\r\n full_prompt = prompt\r\n \r\n if document_content:\r\n full_prompt = f\"{prompt}\\n\\n### Reference Documents:\\n{document_content}\"\r\n elif document_texts:\r\n # Fallback if document_handler didn't provide content\r\n docs_content = \"\"\r\n for i, (name, text) in enumerate(zip(document_names, document_texts)):\r\n docs_content += f\"\\n\\n### Document {i+1}: {name}\\n{text}\"\r\n \r\n full_prompt = f\"{prompt}\\n\\n{docs_content}\"\r\n \r\n # Update progress - 60% for starting generation\r\n if log_func:\r\n status_message = self.protocol.create_status_update_message(\r\n status_description=\"Generating response\",\r\n sender_id=self.id,\r\n status=\"in_progress\",\r\n progress=0.6,\r\n context_id=workflow_id\r\n )\r\n log_func(workflow_id, status_message.content, \"info\", self.id, self.name)\r\n \r\n # Generate response\r\n content = await self.ai_service.call_api([\r\n {\"role\": \"system\", \"content\": system_prompt},\r\n {\"role\": \"user\", \"content\": full_prompt}\r\n ])\r\n \r\n response[\"content\"] = content\r\n \r\n # Update progress - 100% for completion\r\n if log_func:\r\n status_message = self.protocol.create_status_update_message(\r\n status_description=\"Response complete\",\r\n sender_id=self.id,\r\n status=\"completed\",\r\n progress=1.0,\r\n context_id=workflow_id\r\n )\r\n log_func(workflow_id, status_message.content, \"info\", self.id, self.name)\r\n \r\n logging_utils.info(\"Response generation complete\", \"agents\")\r\n return response\r\n \r\n except Exception as e:\r\n error_msg = f\"Error generating response: {str(e)}\"\r\n logging_utils.error(error_msg, \"error\")\r\n \r\n # Log error status\r\n if log_func:\r\n status_message = self.protocol.create_status_update_message(\r\n status_description=f\"Error: {str(e)}\",\r\n sender_id=self.id,\r\n status=\"error\",\r\n progress=1.0,\r\n context_id=workflow_id\r\n )\r\n log_func(workflow_id, status_message.content, \"error\", self.id, self.name)\r\n \r\n response[\"content\"] = f\"I encountered an error while processing your request: {str(e)}\"\r\n return response\r\n \r\n\r\n\r\n\r\n# Singleton-Instanz\r\n_creative_agent = None\r\n\r\ndef get_creative_agent():\r\n \"\"\"Returns a singleton instance of the Creative Agent\"\"\"\r\n global _creative_agent\r\n if _creative_agent is None:\r\n _creative_agent = CreativeAgent()\r\n return _creative_agent",
"is_extracted": true,
"extraction_context": null
}
]
}
],
"content": "can you please summarize, what this code does?",
"agent_type": null
},
{
"id": "msg_eab3355f-6ad5-42f2-b324-52eaf3c0660c",
"workflow_id": "5ccfa607-22ce-4d17-8a09-b0f384021758",
"parent_message_id": null,
"started_at": "2025-04-16T01:49:11.263627",
"finished_at": null,
"sequence_no": 5,
"status": "pending",
"role": "assistant",
"data_stats": {
"processing_time": 0.0,
"token_count": 0,
"bytes_sent": 0,
"bytes_received": 0
},
"documents": [],
"content": "I encountered an error while processing your request: 'NoneType' object is not subscriptable",
"agent_type": "creative",
"agent_id": "creative",
"agent_name": "Creative Knowledge Assistant",
"result_format": "Text,Document,Table"
},
{
"id": "msg_6f4acbae-c130-4a9d-888e-bdcfa2147864",
"workflow_id": "5ccfa607-22ce-4d17-8a09-b0f384021758",
"parent_message_id": null,
"started_at": "2025-04-16T01:49:18.956060",
"finished_at": null,
"sequence_no": 6,
"status": "pending",
"role": "assistant",
"data_stats": {
"processing_time": 0.0,
"token_count": 0,
"bytes_sent": 0,
"bytes_received": 0
},
"documents": [],
"content": "**Summary of Workflow Results: Document Processing**\n\n**Key Findings and Results:**\nDuring the execution of Activity 1, \"Document Processing,\" an error was encountered: 'NoneType' object is not subscriptable. This error indicates that the process attempted to access an element of a variable that was unexpectedly set to 'None'.\n\n**Connection to Original Task:**\nThe task aimed to process documents according to specified requirements. However, the encountered error suggests a failure in handling or accessing data within the document processing workflow, preventing successful completion of the task.\n\n**Conclusions and Recommendations:**\nTo resolve this issue, it is recommended to:\n1. Review the document processing logic to ensure all variables are correctly initialized and populated.\n2. Implement error handling to manage 'NoneType' scenarios gracefully.\n3. Conduct a thorough test of the workflow to identify and rectify any additional underlying issues.\n\nThese steps should help in achieving the intended document processing outcomes effectively.",
"agent_type": "summary",
"agent_id": "workflow_summary",
"agent_name": "Workflow Summary",
"result_format": "Text",
"workflow_complete": true
}
],
"logs": [
{
"id": "log_5c7e6b15-87e0-49e3-b40b-906e2c4fde45",
"message": "Starting workflow execution",
"type": "info",
"timestamp": "2025-04-16T01:46:28.790780",
"agent_id": "workflow",
"agent_name": "Workflow Management"
},
{
"id": "log_7df889f4-cc61-43f0-b37f-79eae42e8a38",
"message": "Processing 1 files",
"type": "info",
"timestamp": "2025-04-16T01:46:28.824363",
"agent_id": "files",
"agent_name": "File Processing"
},
{
"id": "log_d800b7e6-3023-4d97-ac51-86b6c72c41c3",
"message": "File auszug_liste_positionen.pdf read successfully (extracted: True)",
"type": "info",
"timestamp": "2025-04-16T01:46:28.906507",
"agent_id": null,
"agent_name": null
},
{
"id": "log_200eb28c-eb6e-4a63-ac9a-bce82d04c96d",
"message": "User message processed",
"type": "info",
"timestamp": "2025-04-16T01:46:29.097144",
"agent_id": "workflow",
"agent_name": "Workflow Management"
},
{
"id": "log_bfb396ed-1109-44c1-825c-a953a9232936",
"message": "Analyzing task to determine optimal planning approach",
"type": "info",
"timestamp": "2025-04-16T01:46:29.129736",
"agent_id": "planning",
"agent_name": "Activity Planning"
},
{
"id": "log_f29c9ce5-33dd-48dc-bc5f-77119d405a6b",
"message": "Task analysis: {\"primaryOperationType\": \"extraction\", \"isUsingExistingData\": true, \"mentionedDocuments\": [\"doc_4eeac488-9967-4da7-b46d-f84b0566c6e5\"], \"expectedOutputFormat\": \"unspecified\", \"involvesDocumentProcessing\": true, \"requiresWebResearch\": false, \"requiresComplexComputation\": false, \"containsPowerOnKeyword\": false}",
"type": "info",
"timestamp": "2025-04-16T01:46:30.538311",
"agent_id": "planning",
"agent_name": "Activity Planning"
},
{
"id": "log_381d854e-3c2e-49e7-b63a-027f27aa8ac7",
"message": "Document processing task detected with available documents, using creative agent",
"type": "info",
"timestamp": "2025-04-16T01:46:30.570835",
"agent_id": "planning",
"agent_name": "Activity Planning"
},
{
"id": "log_ded7a265-99b0-45e3-a4c1-06e9ccfce9dd",
"message": "Created agent-aware work plan with 1 activities",
"type": "info",
"timestamp": "2025-04-16T01:46:30.603331",
"agent_id": "planning",
"agent_name": "Activity Planning"
},
{
"id": "log_d23e749c-b6b7-4ce2-8aa1-f7ab13d48162",
"message": "Starting activity: Document Processing",
"type": "info",
"timestamp": "2025-04-16T01:46:30.635874",
"agent_id": "execution",
"agent_name": "Activity Execution"
},
{
"id": "log_838e84b4-0990-4977-a48c-80d63d26e17a",
"message": "Document requirements: All available documents",
"type": "info",
"timestamp": "2025-04-16T01:46:30.669797",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_e782f48f-5c42-4be0-8d7a-f13b1757ceb2",
"message": "Found 1 files in workflow",
"type": "info",
"timestamp": "2025-04-16T01:46:30.701765",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_7918be70-0759-46a9-b91d-06ea0e7257d4",
"message": "Starting document extraction process",
"type": "info",
"timestamp": "2025-04-16T01:46:30.734390",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_0220d4e2-258a-453b-8659-6d00351e630a",
"message": "Starting data extraction with 1 files",
"type": "info",
"timestamp": "2025-04-16T01:46:30.767814",
"agent_id": null,
"agent_name": null
},
{
"id": "log_0c71db78-415e-427f-8fc9-154660a159e6",
"message": "Creating extraction plan",
"type": "info",
"timestamp": "2025-04-16T01:46:30.800074",
"agent_id": null,
"agent_name": null
},
{
"id": "log_fc393234-e36a-4d3e-a103-e4a9613f299f",
"message": "Extraktionsplan wird erstellt...",
"type": "info",
"timestamp": "2025-04-16T01:46:30.831817",
"agent_id": null,
"agent_name": null
},
{
"id": "log_dbecc969-4779-47fa-8831-11021a2c21cb",
"message": "Extraktionsplan erstellt für 1 Dateien",
"type": "info",
"timestamp": "2025-04-16T01:46:34.478951",
"agent_id": null,
"agent_name": null
},
{
"id": "log_a543d89a-cc27-48ba-aa2c-8d289030de64",
"message": "Extraction plan created: 1 files, 0 need extraction",
"type": "info",
"timestamp": "2025-04-16T01:46:34.511994",
"agent_id": null,
"agent_name": null
},
{
"id": "log_1359ae7e-c38b-4e18-97bb-4bf7ccec8cf0",
"message": "Using document handler for extraction",
"type": "info",
"timestamp": "2025-04-16T01:46:34.545991",
"agent_id": null,
"agent_name": null
},
{
"id": "log_ad263d39-879f-44a9-af75-4cfe7f7af101",
"message": "Processing file: auszug_liste_positionen.pdf (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:46:34.580260",
"agent_id": null,
"agent_name": null
},
{
"id": "log_118ad44a-55cb-4eea-8e48-6901e10656f3",
"message": "Using existing content for auszug_liste_positionen.pdf",
"type": "info",
"timestamp": "2025-04-16T01:46:34.614428",
"agent_id": null,
"agent_name": null
},
{
"id": "log_a3101ce7-a98e-4733-a685-d192e3330b79",
"message": "Structuring extracted data from 1 files",
"type": "info",
"timestamp": "2025-04-16T01:46:34.647430",
"agent_id": null,
"agent_name": null
},
{
"id": "log_7dfa3f3d-c660-431d-8f48-040450bfe2bd",
"message": "Creating contextual summaries for extracted content",
"type": "info",
"timestamp": "2025-04-16T01:46:34.680013",
"agent_id": null,
"agent_name": null
},
{
"id": "log_20094a54-44a2-45fc-858e-5a0eefc2026b",
"message": "Added contextual summaries to extracted data",
"type": "info",
"timestamp": "2025-04-16T01:46:39.409134",
"agent_id": null,
"agent_name": null
},
{
"id": "log_01030ee4-5ae9-4e4a-8690-4b9a7accd00e",
"message": "Extracted content from 1 documents",
"type": "info",
"timestamp": "2025-04-16T01:46:39.441686",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_e84698b3-e87b-4d28-8471-5dd774fc8c39",
"message": "Document: auszug_liste_positionen.pdf, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:46:39.476209",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_46eb3e6b-5c06-4b1f-a2a5-be5f271d0cc3",
"message": "Content preview: \n\n",
"type": "info",
"timestamp": "2025-04-16T01:46:39.510206",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_982978a2-91e9-411e-ba67-a3bdcb2f5ceb",
"message": "No image content found in auszug_liste_positionen.pdf - check PDF extraction",
"type": "warning",
"timestamp": "2025-04-16T01:46:39.545930",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_c6618a97-4879-44f6-bdfe-740c958862f8",
"message": "Document content available but no image analysis found - PDF image extraction may have failed for workflow 5ccfa607-22ce-4d17-8a09-b0f384021758",
"type": "warning",
"timestamp": "2025-04-16T01:46:39.579487",
"agent_id": "agents",
"agent_name": "Agent Selection & Execution"
},
{
"id": "log_f8c70af8-679a-4b7b-ab0b-6a4ec45d5c2b",
"message": "Passing 1 documents from user message to creative",
"type": "info",
"timestamp": "2025-04-16T01:46:39.613105",
"agent_id": "agents",
"agent_name": "Agent Selection & Execution"
},
{
"id": "log_057084ce-e2e1-4566-ab04-8dcfbbc3b20d",
"message": "Executing agent: creative",
"type": "info",
"timestamp": "2025-04-16T01:46:39.646858",
"agent_id": "agents",
"agent_name": "Agent Selection & Execution"
},
{
"id": "log_bdbdce62-fdc8-43a6-996d-5caab18d4984",
"message": "Completed activity: Document Processing",
"type": "info",
"timestamp": "2025-04-16T01:46:39.679376",
"agent_id": "execution",
"agent_name": "Activity Execution"
},
{
"id": "log_4468b717-2525-4cbd-9453-e1331ba5d3d6",
"message": "Created workflow summary",
"type": "info",
"timestamp": "2025-04-16T01:46:44.040039",
"agent_id": "summary",
"agent_name": "Results Summary"
},
{
"id": "log_09d36c8d-8125-4d60-b55f-96de36fad49b",
"message": "Starting workflow execution",
"type": "info",
"timestamp": "2025-04-16T01:48:44.783565",
"agent_id": "workflow",
"agent_name": "Workflow Management"
},
{
"id": "log_043c3a8f-bf81-4ad9-b98f-8fa9a4c31d0e",
"message": "Processing 8 files",
"type": "info",
"timestamp": "2025-04-16T01:48:44.817593",
"agent_id": "files",
"agent_name": "File Processing"
},
{
"id": "log_4bd8a390-288b-4e36-acc7-f6e92cced3e9",
"message": "File agentservice_dataextraction.py read successfully (extracted: True)",
"type": "info",
"timestamp": "2025-04-16T01:48:44.866693",
"agent_id": null,
"agent_name": null
},
{
"id": "log_d267c5b6-5b47-4b1b-9c4d-163f124dc00d",
"message": "File agentservice_document_handler.py read successfully (extracted: True)",
"type": "info",
"timestamp": "2025-04-16T01:48:44.910290",
"agent_id": null,
"agent_name": null
},
{
"id": "log_78e3838a-605c-42fa-8c95-02b74b5476eb",
"message": "File agentservice_filemanager.py read successfully (extracted: True)",
"type": "info",
"timestamp": "2025-04-16T01:48:44.958664",
"agent_id": null,
"agent_name": null
},
{
"id": "log_fb046fbe-31d5-482c-9a68-1b658886c3b1",
"message": "File agentservice_registry.py read successfully (extracted: True)",
"type": "info",
"timestamp": "2025-04-16T01:48:45.007768",
"agent_id": null,
"agent_name": null
},
{
"id": "log_5ebf2abb-16bf-4f43-a244-fdc3b93c5d04",
"message": "File agentservice_utils.py read successfully (extracted: True)",
"type": "info",
"timestamp": "2025-04-16T01:48:45.055770",
"agent_id": null,
"agent_name": null
},
{
"id": "log_f31e0caa-7a99-4d68-bf04-ec7a56cdc212",
"message": "File agentservice_workflow_execution.py read successfully (extracted: True)",
"type": "info",
"timestamp": "2025-04-16T01:48:45.110868",
"agent_id": null,
"agent_name": null
},
{
"id": "log_17452324-4fb7-4d8d-b1ea-61abe43d0fb7",
"message": "File agentservice_workflow_manager.py read successfully (extracted: True)",
"type": "info",
"timestamp": "2025-04-16T01:48:45.157343",
"agent_id": null,
"agent_name": null
},
{
"id": "log_a42a12b8-b050-41c2-8e2f-2266266bcf95",
"message": "File agentservice_agent_creative.py read successfully (extracted: True)",
"type": "info",
"timestamp": "2025-04-16T01:48:45.204554",
"agent_id": null,
"agent_name": null
},
{
"id": "log_867118fc-1fb5-43ff-b6fa-26a0750862a4",
"message": "User message processed",
"type": "info",
"timestamp": "2025-04-16T01:48:47.079798",
"agent_id": "workflow",
"agent_name": "Workflow Management"
},
{
"id": "log_023140fc-5f14-4c1b-902a-c2589393cee2",
"message": "Analyzing task to determine optimal planning approach",
"type": "info",
"timestamp": "2025-04-16T01:48:47.114834",
"agent_id": "planning",
"agent_name": "Activity Planning"
},
{
"id": "log_4a210dc5-357e-4809-ad6a-9aa9dc03c0ef",
"message": "Task analysis: {\"primaryOperationType\": \"analysis\", \"isUsingExistingData\": true, \"mentionedDocuments\": [\"agentservice_dataextraction.py\", \"agentservice_document_handler.py\", \"agentservice_filemanager.py\", \"agentservice_registry.py\", \"agentservice_utils.py\", \"agentservice_workflow_execution.py\", \"agentservice_workflow_manager.py\", \"agentservice_agent_creative.py\"], \"expectedOutputFormat\": \"text\", \"involvesDocumentProcessing\": true, \"requiresWebResearch\": false, \"requiresComplexComputation\": false, \"containsPowerOnKeyword\": false}",
"type": "info",
"timestamp": "2025-04-16T01:48:49.152043",
"agent_id": "planning",
"agent_name": "Activity Planning"
},
{
"id": "log_8f20755d-1aa5-4da3-ac27-96cc5b095233",
"message": "Document processing task detected with available documents, using creative agent",
"type": "info",
"timestamp": "2025-04-16T01:48:49.185560",
"agent_id": "planning",
"agent_name": "Activity Planning"
},
{
"id": "log_2cbe97c2-8473-4dac-94bf-2e53993277b0",
"message": "Created agent-aware work plan with 1 activities",
"type": "info",
"timestamp": "2025-04-16T01:48:49.219568",
"agent_id": "planning",
"agent_name": "Activity Planning"
},
{
"id": "log_8b2e8a9b-ed75-4e7f-8de5-3c6c4c2aa5e6",
"message": "Starting activity: Document Processing",
"type": "info",
"timestamp": "2025-04-16T01:48:49.252578",
"agent_id": "execution",
"agent_name": "Activity Execution"
},
{
"id": "log_5398f9fd-9021-4834-a3ce-f2b42705e559",
"message": "Document requirements: All available documents",
"type": "info",
"timestamp": "2025-04-16T01:48:49.286236",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_973df76d-23e7-4b15-9db7-9f1f5ebc2651",
"message": "Found 9 files in workflow",
"type": "info",
"timestamp": "2025-04-16T01:48:49.320924",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_168f9d04-8b2d-40b4-8bcf-9bb8c54cd5bb",
"message": "Starting document extraction process",
"type": "info",
"timestamp": "2025-04-16T01:48:49.354922",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_16bbb20b-c43e-47fe-9fac-50fb352c0df1",
"message": "Starting data extraction with 9 files",
"type": "info",
"timestamp": "2025-04-16T01:48:49.388484",
"agent_id": null,
"agent_name": null
},
{
"id": "log_9b603035-538b-40d7-ab28-17471d253585",
"message": "Creating extraction plan",
"type": "info",
"timestamp": "2025-04-16T01:48:49.423174",
"agent_id": null,
"agent_name": null
},
{
"id": "log_8c81b8e2-c784-4469-b78b-43ac6366596f",
"message": "Extraktionsplan wird erstellt...",
"type": "info",
"timestamp": "2025-04-16T01:48:49.457140",
"agent_id": null,
"agent_name": null
},
{
"id": "log_c772a9b2-eb76-46b0-bd2f-5deaf1ea557a",
"message": "Extraktionsplan erstellt für 9 Dateien",
"type": "info",
"timestamp": "2025-04-16T01:48:53.042263",
"agent_id": null,
"agent_name": null
},
{
"id": "log_b7bf0aee-3388-4198-9943-3c035285b719",
"message": "Extraction plan created: 9 files, 0 need extraction",
"type": "info",
"timestamp": "2025-04-16T01:48:53.076848",
"agent_id": null,
"agent_name": null
},
{
"id": "log_47a55660-bc96-4c66-8909-1cec1c201a12",
"message": "Using document handler for extraction",
"type": "info",
"timestamp": "2025-04-16T01:48:53.110473",
"agent_id": null,
"agent_name": null
},
{
"id": "log_408e81b8-6d5d-476a-9d4f-a38c95c96bfe",
"message": "Processing file: agentservice_dataextraction.py (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:48:53.143835",
"agent_id": null,
"agent_name": null
},
{
"id": "log_57b86f31-edc4-44a5-af21-a465f926a39c",
"message": "Using existing content for agentservice_dataextraction.py",
"type": "info",
"timestamp": "2025-04-16T01:48:53.178425",
"agent_id": null,
"agent_name": null
},
{
"id": "log_a18722b9-03d4-433e-b5da-e756d4d8533b",
"message": "Processing file: agentservice_document_handler.py (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:48:53.212170",
"agent_id": null,
"agent_name": null
},
{
"id": "log_dcb602f8-3b72-47ef-b5ca-9dde5d6b4234",
"message": "Using existing content for agentservice_document_handler.py",
"type": "info",
"timestamp": "2025-04-16T01:48:53.247176",
"agent_id": null,
"agent_name": null
},
{
"id": "log_4dfcb59c-e865-4272-96d2-2a5eb80c8328",
"message": "Processing file: agentservice_filemanager.py (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:48:53.281109",
"agent_id": null,
"agent_name": null
},
{
"id": "log_3dc76aa5-a27a-4eb3-b77b-903c1d50cde1",
"message": "Using existing content for agentservice_filemanager.py",
"type": "info",
"timestamp": "2025-04-16T01:48:53.314836",
"agent_id": null,
"agent_name": null
},
{
"id": "log_51dc6ed5-64e3-4555-ac17-568ff4b47b27",
"message": "Processing file: agentservice_workflow_execution.py (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:48:53.350836",
"agent_id": null,
"agent_name": null
},
{
"id": "log_031ab7a6-d57d-453d-b370-10e767984a43",
"message": "Using existing content for agentservice_workflow_execution.py",
"type": "info",
"timestamp": "2025-04-16T01:48:53.384463",
"agent_id": null,
"agent_name": null
},
{
"id": "log_d7c6744a-6e61-4dea-98bb-79615790f033",
"message": "Processing file: agentservice_workflow_manager.py (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:48:53.419286",
"agent_id": null,
"agent_name": null
},
{
"id": "log_1c1a72af-37b8-4831-b3c8-5c214301d4cc",
"message": "Using existing content for agentservice_workflow_manager.py",
"type": "info",
"timestamp": "2025-04-16T01:48:53.452288",
"agent_id": null,
"agent_name": null
},
{
"id": "log_69c2f704-5717-4365-b210-083556382719",
"message": "Processing file: auszug_liste_positionen.pdf (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:48:53.486239",
"agent_id": null,
"agent_name": null
},
{
"id": "log_eb9d1b74-0186-4869-878d-fba5cb48c15f",
"message": "Using existing content for auszug_liste_positionen.pdf",
"type": "info",
"timestamp": "2025-04-16T01:48:53.521189",
"agent_id": null,
"agent_name": null
},
{
"id": "log_a02f5f35-c62a-4ee2-8fcf-98bdc13d6039",
"message": "Processing file: agentservice_registry.py (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:48:53.554231",
"agent_id": null,
"agent_name": null
},
{
"id": "log_5673601f-9be4-404d-9ec0-0c4936dd8c4f",
"message": "Using existing content for agentservice_registry.py",
"type": "info",
"timestamp": "2025-04-16T01:48:53.589249",
"agent_id": null,
"agent_name": null
},
{
"id": "log_94c6f38c-6e81-46cf-801c-130c30c171a5",
"message": "Processing file: agentservice_utils.py (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:48:53.624359",
"agent_id": null,
"agent_name": null
},
{
"id": "log_792f2bc2-6b61-467c-896b-342071284ded",
"message": "Using existing content for agentservice_utils.py",
"type": "info",
"timestamp": "2025-04-16T01:48:53.660360",
"agent_id": null,
"agent_name": null
},
{
"id": "log_b4a7ec3c-64c5-4ce7-b184-2efa73197fb7",
"message": "Processing file: agentservice_agent_creative.py (Extraction needed: False)",
"type": "info",
"timestamp": "2025-04-16T01:48:53.694987",
"agent_id": null,
"agent_name": null
},
{
"id": "log_73501e74-59dd-4011-b920-ebe591dfea3b",
"message": "Using existing content for agentservice_agent_creative.py",
"type": "info",
"timestamp": "2025-04-16T01:48:53.728843",
"agent_id": null,
"agent_name": null
},
{
"id": "log_13540449-fea3-442f-80a8-890d53d87f69",
"message": "Structuring extracted data from 9 files",
"type": "info",
"timestamp": "2025-04-16T01:48:53.763432",
"agent_id": null,
"agent_name": null
},
{
"id": "log_b235143c-1439-4297-a698-d400687ddb25",
"message": "Creating contextual summaries for extracted content",
"type": "info",
"timestamp": "2025-04-16T01:48:53.796477",
"agent_id": null,
"agent_name": null
},
{
"id": "log_cb5915d9-27ed-472e-bddd-8b128ec5803e",
"message": "Added contextual summaries to extracted data",
"type": "info",
"timestamp": "2025-04-16T01:48:59.796376",
"agent_id": null,
"agent_name": null
},
{
"id": "log_b294ca55-8f42-466e-bd77-45b0eef6dde7",
"message": "Processing 2 image-related content items",
"type": "info",
"timestamp": "2025-04-16T01:48:59.830821",
"agent_id": null,
"agent_name": null
},
{
"id": "log_e0668380-1df3-4e0d-bc9e-7cd8f4bf72a3",
"message": "Added image analysis summary to extracted data",
"type": "info",
"timestamp": "2025-04-16T01:49:10.174466",
"agent_id": null,
"agent_name": null
},
{
"id": "log_74580e09-2443-4ac1-931c-32b3e43657a2",
"message": "Extracted content from 9 documents",
"type": "info",
"timestamp": "2025-04-16T01:49:10.208741",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_40ef87e0-eb67-4035-8db4-fefd52c3f473",
"message": "Document: agentservice_dataextraction.py, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:49:10.242969",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_a7581a3f-274e-4b92-971b-ea0c2fc7cb33",
"message": "Content preview: \"\"\"\r\nRefactored helper function for intelligent data extraction (continued).\r\n\"\"\"\r\n\r\nimport logging\r...",
"type": "info",
"timestamp": "2025-04-16T01:49:10.278484",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_77228cc2-c723-431a-b0ab-d8e3a5d58948",
"message": "No image content found in agentservice_dataextraction.py - check PDF extraction",
"type": "warning",
"timestamp": "2025-04-16T01:49:10.312715",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_2101cb7c-4d99-4fb4-b23b-e125d208fdb9",
"message": "Document: agentservice_document_handler.py, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:49:10.347896",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_c4843a6d-27a1-4b83-9513-781bb537f970",
"message": "Content preview: \"\"\"\r\nEnhanced document handling module for the Agentservice (continued).\r\n\"\"\"\r\n\r\nimport os\r\nimport l...",
"type": "info",
"timestamp": "2025-04-16T01:49:10.382477",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_00fae78d-e737-4702-a50a-dde9dc612b32",
"message": "Image content found in agentservice_document_handler.py",
"type": "info",
"timestamp": "2025-04-16T01:49:10.417062",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_2efadc5e-5e1c-4542-b333-db540316d4c0",
"message": "Document: agentservice_filemanager.py, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:49:10.451745",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_e9d54d63-0e30-4d74-83b5-a2ef76216c49",
"message": "Content preview: \"\"\"\r\nCentral file management module for the Agentservice.\r\n\"\"\"\r\n\r\nimport os\r\nimport logging\r\nimport ...",
"type": "info",
"timestamp": "2025-04-16T01:49:10.486948",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_a11ba7bd-3541-491d-bd02-88eb2fe0709f",
"message": "Image content found in agentservice_filemanager.py",
"type": "info",
"timestamp": "2025-04-16T01:49:10.521588",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_e81feb65-343f-4666-962e-1882de80e9e3",
"message": "Document: agentservice_workflow_execution.py, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:49:10.557872",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_9badd8d2-4c51-49eb-b29d-91d29f53ab03",
"message": "Content preview: \"\"\"\r\nRefactored architecture for the Agentservice multi-agent system.\r\nThis module defines the revis...",
"type": "info",
"timestamp": "2025-04-16T01:49:10.592381",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_078d3a95-d602-41e3-a1d8-17592df3def3",
"message": "No image content found in agentservice_workflow_execution.py - check PDF extraction",
"type": "warning",
"timestamp": "2025-04-16T01:49:10.627940",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_c054840e-6e52-4d8c-bd62-133e7ccbafab",
"message": "Document: agentservice_workflow_manager.py, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:49:10.663241",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_c2df16c9-1e1f-44e4-b0e5-1f0058b25685",
"message": "Content preview: \"\"\"\r\nRefactored WorkflowManager class for the Agentservice (continued).\r\n\"\"\"\r\n\r\nimport os\r\nimport lo...",
"type": "info",
"timestamp": "2025-04-16T01:49:10.698244",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_56023c57-ad18-417f-89e5-6691e59c9899",
"message": "No image content found in agentservice_workflow_manager.py - check PDF extraction",
"type": "warning",
"timestamp": "2025-04-16T01:49:10.734002",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_bef4189d-5c24-44d9-84ea-a25700c5177b",
"message": "Document: auszug_liste_positionen.pdf, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:49:10.768632",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_dcc117ee-8448-447c-880a-a0b12de7e971",
"message": "Content preview: \n\n",
"type": "info",
"timestamp": "2025-04-16T01:49:10.802675",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_b991f84b-5ac4-4eae-9d6b-e307775f362e",
"message": "No image content found in auszug_liste_positionen.pdf - check PDF extraction",
"type": "warning",
"timestamp": "2025-04-16T01:49:10.838382",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_05e76bcc-d34f-4539-a503-4e7a510f80c6",
"message": "Document: agentservice_registry.py, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:49:10.874007",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_71019678-5eaa-4e19-9b63-9a95c6b72525",
"message": "Content preview: \"\"\"\r\nUpdated registry for all available agents in the system.\r\nProvides centralized agent registrati...",
"type": "info",
"timestamp": "2025-04-16T01:49:10.908628",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_8322971b-4976-4177-a3e8-4676e0031ebe",
"message": "No image content found in agentservice_registry.py - check PDF extraction",
"type": "warning",
"timestamp": "2025-04-16T01:49:10.943895",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_0a01349d-825f-4fd3-9112-60cad1662c98",
"message": "Document: agentservice_utils.py, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:49:10.979610",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_da51ef0b-4666-464e-9907-d3edc990baa4",
"message": "Content preview: \"\"\"\r\nCentralized utility functions for the Agentservice (continued).\r\n\"\"\"\r\n\r\nimport os\r\nimport loggi...",
"type": "info",
"timestamp": "2025-04-16T01:49:11.014610",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_d1b009e3-4b33-46bc-b144-050a33494454",
"message": "No image content found in agentservice_utils.py - check PDF extraction",
"type": "warning",
"timestamp": "2025-04-16T01:49:11.050384",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_bd6a1c18-2b48-4855-aa72-45636cc6a53d",
"message": "Document: agentservice_agent_creative.py, Method: existing_content, Extracted: True",
"type": "info",
"timestamp": "2025-04-16T01:49:11.085003",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_348b3393-bbda-46e0-9c1b-4b01302968cb",
"message": "Content preview: \"\"\"\r\nCreative Agent for knowledge-based answers and creative content generation.\r\nHandles open quest...",
"type": "info",
"timestamp": "2025-04-16T01:49:11.120479",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_97eabaa5-aa21-4e0b-b940-01a7d91e077c",
"message": "No image content found in agentservice_agent_creative.py - check PDF extraction",
"type": "warning",
"timestamp": "2025-04-16T01:49:11.155505",
"agent_id": "extraction",
"agent_name": "extraction"
},
{
"id": "log_5e09724f-c429-433f-83d1-7bbd56975e64",
"message": "Passing 8 documents from user message to creative",
"type": "info",
"timestamp": "2025-04-16T01:49:11.190516",
"agent_id": "agents",
"agent_name": "Agent Selection & Execution"
},
{
"id": "log_eda2d52b-7494-40af-b04a-ef60992937af",
"message": "Executing agent: creative",
"type": "info",
"timestamp": "2025-04-16T01:49:11.227111",
"agent_id": "agents",
"agent_name": "Agent Selection & Execution"
},
{
"id": "log_a54466e6-70c3-4600-b8d5-15d051e41b89",
"message": "Completed activity: Document Processing",
"type": "info",
"timestamp": "2025-04-16T01:49:11.263627",
"agent_id": "execution",
"agent_name": "Activity Execution"
},
{
"id": "log_d94ecdac-1ba5-4b2e-9b13-84cee18b7b9a",
"message": "Created workflow summary",
"type": "info",
"timestamp": "2025-04-16T01:49:18.956060",
"agent_id": "summary",
"agent_name": "Results Summary"
}
],
"data_stats": {
"total_processing_time": 38.486803,
"total_token_count": 0,
"total_bytes_sent": 0,
"total_bytes_received": 0
},
"completed_at": "2025-04-16T01:49:23.270368"
}