import os import logging import pandas as pd from typing import Dict, Any, List, Optional, Tuple # Logger konfigurieren logger = logging.getLogger(__name__) async def read_file_contents( file_contexts: List[Dict[str, Any]], upload_dir: str, workflow_id: str = None, add_log_func = None, ai_service = None # Added AI service parameter for image analysis ) -> Dict[str, str]: """ Enhanced function to read the contents of all files with proper image and document analysis. Args: file_contexts: List of file contexts with metadata upload_dir: Directory for uploads workflow_id: Optional ID of the workflow for logging add_log_func: Optional function for adding logs ai_service: Optional AI service for image analysis Returns: Dictionary with file contents (file_id -> content) """ file_contents = {} for file in file_contexts: file_id = file["id"] file_name = file["name"] file_type = file.get("type", "unknown") file_path = file.get("path", "") # If path is not set, try to derive it from the upload directory if not file_path and file_name: possible_path = os.path.join(upload_dir, file_name) if os.path.exists(possible_path): file_path = possible_path file["path"] = file_path # Update the path in context logger.debug(f"Found path for file {file_name}: {file_path}") # Read file content if path is available if file_path and os.path.exists(file_path): try: # Image files - always perform image analysis if AI service is available if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')): if ai_service: try: _log(add_log_func, workflow_id, f"Analyzing image {file_name}...", "info") image_analysis = await ai_service.analyze_image(file_path, "Describe this image in detail") file_contents[file_id] = f"Image Analysis:\n{image_analysis}" _log(add_log_func, workflow_id, f"Image {file_name} analyzed successfully", "info") except Exception as e: logger.error(f"Error analyzing image {file_name}: {str(e)}") _log(add_log_func, workflow_id, f"Error analyzing image {file_name}: {str(e)}", "error") file_contents[file_id] = f"Image file: {file_name} (Analysis failed: {str(e)})" else: file_contents[file_id] = f"Image file: {file_name} (AI analysis not available)" # Document files elif file_type == "document" or not file_type: # Simple text files if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js')): with open(file_path, 'r', encoding='utf-8', errors='replace') as f: content = f.read() file_contents[file_id] = content _log(add_log_func, workflow_id, f"Text file {file_name} read successfully", "info") # Excel files elif file_name.endswith(('.xlsx', '.xls')): try: df = pd.read_excel(file_path) file_contents[file_id] = f"Excel file with {len(df)} rows and {len(df.columns)} columns.\n" file_contents[file_id] += f"Columns: {', '.join(df.columns.tolist())}\n\n" file_contents[file_id] += df.to_string(index=False) # Full table _log(add_log_func, workflow_id, f"Excel file {file_name} read successfully", "info") except Exception as e: logger.error(f"Error reading Excel file {file_name}: {str(e)}") _log(add_log_func, workflow_id, f"Error reading Excel file {file_name}: {str(e)}", "error") file_contents[file_id] = f"Excel file: {file_name} (Reading failed: {str(e)})" # CSV files elif file_name.endswith('.csv'): try: # Try various encodings and delimiters for robust CSV parsing try: df = pd.read_csv(file_path, encoding='utf-8') except UnicodeDecodeError: try: df = pd.read_csv(file_path, encoding='latin1') except: df = pd.read_csv(file_path, encoding='cp1252') file_contents[file_id] = f"CSV file with {len(df)} rows and {len(df.columns)} columns.\n" file_contents[file_id] += f"Columns: {', '.join(df.columns.tolist())}\n\n" file_contents[file_id] += df.to_string(index=False) # Full table _log(add_log_func, workflow_id, f"CSV file {file_name} read successfully", "info") except Exception as e: logger.error(f"Error reading CSV file {file_name}: {str(e)}") _log(add_log_func, workflow_id, f"Error reading CSV file {file_name}: {str(e)}", "error") file_contents[file_id] = f"CSV file: {file_name} (Reading failed: {str(e)})" # PDF files - with enhanced extraction and AI analysis elif file_name.endswith('.pdf'): try: # Try PyPDF2 first try: from PyPDF2 import PdfReader reader = PdfReader(file_path) num_pages = len(reader.pages) text = "" for page in reader.pages: text += page.extract_text() + "\n\n" # If AI service is available, also analyze images in PDF if ai_service: _log(add_log_func, workflow_id, f"Analyzing PDF images in {file_name}...", "info") try: image_analysis_results = await ai_service.extract_and_analyze_pdf_images( file_path, "Describe this image in the context of the document" ) if image_analysis_results: image_analysis_text = "\n\n=== PDF IMAGE ANALYSIS ===\n" for result in image_analysis_results: image_analysis_text += f"\nImage on page {result['page']}: {result['response']}\n" text += image_analysis_text _log(add_log_func, workflow_id, f"Successfully analyzed {len(image_analysis_results)} images in PDF", "info") except Exception as img_error: logger.error(f"Error analyzing PDF images: {str(img_error)}") _log(add_log_func, workflow_id, f"Error analyzing PDF images: {str(img_error)}", "warning") file_contents[file_id] = f"PDF with {num_pages} pages.\nContent:\n{text}" _log(add_log_func, workflow_id, f"PDF file {file_name} read successfully", "info") except ImportError: # Try to use a different PDF library if available try: import fitz # PyMuPDF doc = fitz.open(file_path) text = "" for page in doc: text += page.get_text() + "\n\n" file_contents[file_id] = f"PDF with {len(doc)} pages.\nContent:\n{text}" _log(add_log_func, workflow_id, f"PDF file {file_name} read with PyMuPDF", "info") except ImportError: _log(add_log_func, workflow_id, "No PDF library installed. Cannot extract PDF content.", "warning") file_contents[file_id] = f"PDF file (content not available, PDF libraries missing)" except Exception as e: logger.error(f"Error reading PDF file {file_name}: {str(e)}") _log(add_log_func, workflow_id, f"Error reading PDF file {file_name}: {str(e)}", "error") file_contents[file_id] = f"PDF file: {file_name} (Reading failed: {str(e)})" # Other document types else: try: # Try to read as binary first to check file type with open(file_path, 'rb') as f: first_bytes = f.read(8) # Read first few bytes to identify file type # Try to read as text if it appears to be text-based try: with open(file_path, 'r', encoding='utf-8', errors='replace') as f: content = f.read() file_contents[file_id] = content _log(add_log_func, workflow_id, f"File {file_name} read as text", "info") except Exception: file_contents[file_id] = f"File content not available (Binary or unsupported format)" _log(add_log_func, workflow_id, f"File {file_name} appears to be binary or has unknown format", "warning") except Exception as e: logger.error(f"Error processing file {file_name}: {str(e)}") _log(add_log_func, workflow_id, f"Error processing file {file_name}: {str(e)}", "error") file_contents[file_id] = f"File content not available (Error: {str(e)})" # Other file types - just store metadata else: file_contents[file_id] = f"File: {file_name} (Type: {file_type}, content not available)" _log(add_log_func, workflow_id, f"Unsupported file type: {file_type} for {file_name}", "warning") except Exception as e: logger.error(f"Error reading file {file_name}: {str(e)}") _log(add_log_func, workflow_id, f"Error reading file {file_name}: {str(e)}", "error") file_contents[file_id] = f"File content not available (Error: {str(e)})" else: if file_path: _log(add_log_func, workflow_id, f"File {file_name} not found: {file_path}", "warning") else: _log(add_log_func, workflow_id, f"No path available for file {file_name}", "warning") file_contents[file_id] = f"File content not available (File not found)" return file_contents def format_file_context_text(file_contexts: List[Dict[str, Any]], file_contents: Dict[str, str]) -> str: """ Erstellt eine formatierte Textdarstellung aller Dateien und ihrer Inhalte Args: file_contexts: Liste der Dateikontexte mit Metadaten file_contents: Dictionary mit Dateiinhalten Returns: Formatierter Text mit Dateiliste und Inhaltsauszügen """ # Erstelle einen Kontext mit Dateiliste und Inhalten für leichteren Zugriff file_context_text = "Verfügbare Dateien:\n" + "\n".join([ f"- {file['name']} ({file['type']}, {file['size']}, ID: {file['id']})" for file in file_contexts ]) # Füge Dateiinhalte hinzu (ohne Längenbegrenzung) for file_id, content in file_contents.items(): file_name = next((f['name'] for f in file_contexts if f['id'] == file_id), "Unbekannte Datei") file_context_text += f"\n\n==== DATEIINHALT: {file_name} (ID: {file_id}) ====\n" file_context_text += content return file_context_text def prepare_file_contexts(files: List[Dict[str, Any]], upload_dir: str) -> List[Dict[str, Any]]: """ Bereitet die Dateikontexte vor und ermittelt die vollen Dateipfade Args: files: Liste von Dateien mit Metadaten (Dict mit id, name, type) upload_dir: Verzeichnis für Uploads Returns: Liste von Dateikontexten mit vollständigen Pfaden """ file_contexts = [] for file in files: file_id = file["id"] file_name = file["name"] file_type = file["type"] file_path = file.get("path", "") # Wenn kein Pfad angegeben ist, versuche, ihn aus dem Upload-Verzeichnis abzuleiten if not file_path and file_name: possible_path = os.path.join(upload_dir, file_name) if os.path.exists(possible_path): file_path = possible_path logger.debug(f"Pfad für Datei {file_name} gefunden: {file_path}") file_contexts.append({ "id": file_id, "name": file_name, "type": file_type, "size": file.get("size", "Unbekannt"), "path": file_path }) return file_contexts async def prepare_message_for_ai( file_contexts: List[Dict[str, Any]], prompt_text: str, file_contents: Dict[str, str], service_aichat ) -> Dict[str, Any]: """ Enhanced function to prepare a complete message with all file contents for the AI model. Ensures proper file content integration and handles image analysis results. Args: file_contexts: List of file contexts with metadata prompt_text: The text prompt file_contents: Dictionary with file contents service_aichat: The AI service instance for special analyses Returns: A fully formatted message for the AI model """ # Use the AI connector to create the message try: message = await service_aichat.parse_filedata(file_contexts, prompt_text, file_contents) # Ensure file contents are correctly integrated if isinstance(message, dict) and message.get("content") and isinstance(message["content"], list): # For each file context, ensure its content is included for file_context in file_contexts: file_id = file_context["id"] file_name = file_context["name"] # Check if file content is already included file_mentioned = False for content_item in message["content"]: if isinstance(content_item, dict) and content_item.get("type") == "text": if file_name in content_item.get("text", ""): file_mentioned = True break # If file is not mentioned but we have its content, add it if not file_mentioned and file_id in file_contents: content = file_contents[file_id] message["content"].append({ "type": "text", "text": f"--- FILE: {file_name} ---\n\n{content}" }) logger.info(f"Added missing file content for {file_name} to message") return message except Exception as e: logger.error(f"Error preparing message for AI: {str(e)}") # Create a basic message structure if the AI connector fails message = { "role": "user", "content": prompt_text + "\n\n" } # Manually add file contents if file_contents: file_content_text = "\n\n=== FILE CONTENTS ===\n\n" for file_id, content in file_contents.items(): # Find file name from contexts file_name = next((f["name"] for f in file_contexts if f["id"] == file_id), f"File {file_id}") file_content_text += f"--- FILE: {file_name} ---\n\n{content}\n\n" # Append to message if isinstance(message["content"], str): message["content"] += file_content_text elif isinstance(message["content"], list): message["content"].append({ "type": "text", "text": file_content_text }) return message def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None): """Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen""" # Log über die Logger-Instanz if log_type == "error": logger.error(message) elif log_type == "warning": logger.warning(message) else: logger.info(message) # Log über die bereitgestellte Log-Funktion (falls vorhanden) if add_log_func and workflow_id: add_log_func(workflow_id, message, log_type, agent_id, agent_name) # Die folgenden Funktionen werden nicht mehr benötigt, da partielle Dateiladungen entfallen # Sie sind hier auskommentiert, könnten später aber wieder aktiviert werden """ def parse_file_access_commands(agent_text: str) -> List[Dict[str, Any]]: # Diese Funktion wird vorerst nicht benötigt return [] def load_additional_file_content( workflow_id: str, file_id: str, file_contents: Dict[str, str], file_contexts: List[Dict[str, Any]], add_log_func = None, read_complete: bool = False, start_pos: int = None, end_pos: int = None, page_numbers: List[int] = None ) -> Optional[str]: # Diese Funktion wird vorerst nicht benötigt return None """