phase 2 codeeditor and hotfixes voice

2026-02-23 22:09:27 +01:00 · 2026-02-23 22:09:27 +01:00 · f6f42d8db7
commit f6f42d8db7
parent 338f9522a5
10 changed files with 605 additions and 90 deletions
--- a/app.py
+++ b/app.py
@ -461,6 +461,13 @@ app.add_middleware(
    max_age=86400,  # Increased caching for preflight requests
 )

+# SlowAPI rate limiter initialization
+from modules.auth import limiter
+from slowapi.errors import RateLimitExceeded
+from slowapi import _rate_limit_exceeded_handler
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+
 # CSRF protection middleware
 from modules.auth import CSRFMiddleware
 from modules.auth import (
--- a/modules/connectors/connectorVoiceGoogle.py
+++ b/modules/connectors/connectorVoiceGoogle.py
@ -7,6 +7,7 @@ Replaces Azure Speech Services with Google Cloud APIs

 import json
 import html
+import asyncio
 import logging
 from typing import Dict, Optional, Any
 from google.cloud import speech
@ -73,6 +74,11 @@ class ConnectorGoogleSpeech:
            Dict containing transcribed text, confidence, and metadata
        """
        try:
+            # Treat sampleRate=0 as unknown (invalid value from client)
+            if sampleRate is not None and sampleRate <= 0:
+                logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
+                sampleRate = None
+
            # Auto-detect audio format if not provided
            if sampleRate is None or channels is None:
                validation = self.validateAudioFormat(audioContent)
@ -164,8 +170,11 @@ class ConnectorGoogleSpeech:
            
            try:
                # Use regular recognition for single audio files (not streaming)
+                # Run in thread pool to avoid blocking the asyncio event loop
                logger.info("Using regular recognition for single audio file...")
-                response = self.speech_client.recognize(config=config, audio=audio)
+                response = await asyncio.to_thread(
+                    self.speech_client.recognize, config=config, audio=audio
+                )
                logger.debug(f"Google Cloud response: {response}")
                
            except Exception as apiError:
@ -175,7 +184,7 @@ class ConnectorGoogleSpeech:
                    logger.info("Trying fallback with LINEAR16 encoding...")
                    fallbackConfig = speech.RecognitionConfig(
                        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
-                        sample_rate_hertz=16000,  # Use standard sample rate
+                        sample_rate_hertz=16000,
                        audio_channel_count=1,
                        language_code=language,
                        enable_automatic_punctuation=True,
@ -183,7 +192,9 @@ class ConnectorGoogleSpeech:
                    )
                    
                    try:
-                        response = self.speech_client.recognize(config=fallbackConfig, audio=audio)
+                        response = await asyncio.to_thread(
+                            self.speech_client.recognize, config=fallbackConfig, audio=audio
+                        )
                        logger.debug(f"Google Cloud fallback response: {response}")
                    except Exception as fallbackError:
                        logger.error(f"Google Cloud fallback error: {fallbackError}")
@ -297,7 +308,18 @@ class ConnectorGoogleSpeech:
                                "description": f"LINEAR16 with {std_rate}Hz"
                            })
                
-                # Try with different models
+                # Detect likely silence before expensive fallback loop
+                if len(audioContent) > 100:
+                    sampleSlice = audioContent[100:min(500, len(audioContent))]
+                    if len(set(sampleSlice)) < 3:
+                        logger.warning("Audio appears silent (low byte variation) - skipping fallbacks")
+                        return {
+                            "success": False,
+                            "text": "",
+                            "confidence": 0.0,
+                            "error": "No recognition results (silence or unclear audio)"
+                        }
+
                models = ["latest_long", "phone_call", "latest_short"]
                
                for fallback_config in fallback_configs:
@ -305,7 +327,6 @@ class ConnectorGoogleSpeech:
                        try:
                            logger.info(f"Trying fallback: {fallback_config['description']} with {model} model...")
                            
-                            # Build fallback config with proper sample rate handling
                            fallback_config_params = {
                                "encoding": fallback_config["encoding"],
                                "audio_channel_count": fallback_config["channels"],
@ -314,12 +335,13 @@ class ConnectorGoogleSpeech:
                                "model": model
                            }
                            
-                            # Only add sample_rate_hertz if needed
                            if fallback_config["use_sample_rate"]:
                                fallback_config_params["sample_rate_hertz"] = fallback_config["sample_rate"]
                            
                            fallback_config_obj = speech.RecognitionConfig(**fallback_config_params)
-                            fallback_response = self.speech_client.recognize(config=fallback_config_obj, audio=audio)
+                            fallback_response = await asyncio.to_thread(
+                                self.speech_client.recognize, config=fallback_config_obj, audio=audio
+                            )
                            
                            if fallback_response.results:
                                result = fallback_response.results[0]
--- a/modules/features/codeeditor/codeEditorProcessor.py
+++ b/modules/features/codeeditor/codeEditorProcessor.py
@ -1,16 +1,16 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
-"""CodeEditor processor -- single-shot orchestrator (Phase 1).
-Loads files, builds prompt, calls AI, parses response, emits SSE events."""
+"""CodeEditor processor -- single-shot (Phase 1) and agent loop (Phase 2).
+Orchestrates file loading, prompt building, AI calls, response parsing, and SSE emission."""

 import logging
-import uuid
-from typing import List, Optional, Dict, Any
+from typing import List, Dict, Any

 from modules.features.codeeditor import fileContextManager, promptAssembly, responseParser
 from modules.features.codeeditor.datamodelCodeeditor import (
-    FileEditProposal, ResponseSegment, SegmentTypeEnum
+    FileEditProposal, SegmentTypeEnum, AgentState
 )
+from modules.features.codeeditor import toolRegistry
 from modules.shared.timeUtils import getUtcTimestamp

 logger = logging.getLogger(__name__)
@ -23,78 +23,41 @@ async def processMessage(
    dbManagement,
    interfaceAi,
    chatInterface,
-    eventManager
+    eventManager,
+    agentMode: bool = False
 ):
-    """Process a user message: load files, call AI, parse and emit response segments.
+    """Process a user message. Dispatches to single-shot or agent loop based on mode."""
+    if agentMode:
+        await _processAgentMessage(
+            workflowId, userPrompt, dbManagement, interfaceAi, chatInterface, eventManager
+        )
+    else:
+        await _processSingleShot(
+            workflowId, userPrompt, selectedFileIds, dbManagement, interfaceAi, chatInterface, eventManager
+        )

-    Args:
-        workflowId: the active workflow ID
-        userPrompt: user's input text
-        selectedFileIds: file IDs the user selected as context
-        dbManagement: interfaceDbManagement instance with user context
-        interfaceAi: AiObjects instance for AI calls
-        chatInterface: interfaceDbChat instance for storing messages
-        eventManager: EventManager for SSE emission
-    """
+
+async def _processSingleShot(
+    workflowId, userPrompt, selectedFileIds, dbManagement, interfaceAi, chatInterface, eventManager
+):
+    """Phase 1: Single AI call with pre-loaded file context."""
    try:
-        await eventManager.emit_event(workflowId, "chatdata", {
-            "type": "status", "label": "Loading files..."
-        })
-
+        await _emitStatus(eventManager, workflowId, "Loading files...")
        fileContexts = await fileContextManager.loadFileContexts(dbManagement, selectedFileIds)

-        await eventManager.emit_event(workflowId, "chatdata", {
-            "type": "status", "label": "Building prompt..."
-        })
-
+        await _emitStatus(eventManager, workflowId, "Building prompt...")
        chatHistory = _loadChatHistory(chatInterface, workflowId)
-
        aiRequest = promptAssembly.buildRequest(userPrompt, fileContexts, chatHistory)

-        await eventManager.emit_event(workflowId, "chatdata", {
-            "type": "status", "label": "AI is processing..."
-        })
-
+        await _emitStatus(eventManager, workflowId, "AI is processing...")
        aiResponse = await interfaceAi.callWithTextContext(aiRequest)

        if aiResponse.errorCount > 0:
-            logger.error(f"AI call failed: {aiResponse.content}")
-            await eventManager.emit_event(workflowId, "chatdata", {
-                "type": "message",
-                "item": {"role": "assistant", "content": f"Error: {aiResponse.content}"}
-            })
-            await eventManager.emit_event(workflowId, "error", {
-                "workflowId": workflowId, "error": aiResponse.content
-            })
+            await _emitError(eventManager, workflowId, aiResponse.content)
            return

        segments = responseParser.parseResponse(aiResponse.content)
-
-        for segment in segments:
-            messageData = {
-                "role": "assistant",
-                "content": segment.content,
-                "type": segment.type.value,
-                "createdAt": getUtcTimestamp()
-            }
-
-            await eventManager.emit_event(workflowId, "chatdata", {
-                "type": "message", "item": messageData
-            })
-
-            if segment.type == SegmentTypeEnum.FILE_EDIT:
-                proposal = FileEditProposal(
-                    workflowId=workflowId,
-                    fileId=_resolveFileId(segment.fileName, fileContexts),
-                    fileName=segment.fileName,
-                    operation="edit",
-                    oldContent=segment.oldContent,
-                    newContent=segment.newContent
-                )
-                await eventManager.emit_event(workflowId, "chatdata", {
-                    "type": "file_edit_proposal", "item": proposal.model_dump()
-                })
-
+        await _emitSegments(eventManager, workflowId, segments, fileContexts)
        _logAiStats(aiResponse, workflowId)

        await eventManager.emit_event(workflowId, "complete", {
@ -105,12 +68,181 @@ async def processMessage(
        })

    except Exception as e:
-        logger.error(f"CodeEditor processing failed for workflow {workflowId}: {e}", exc_info=True)
+        logger.error(f"CodeEditor single-shot failed for {workflowId}: {e}", exc_info=True)
        await eventManager.emit_event(workflowId, "error", {
            "workflowId": workflowId, "error": str(e)
        })


+async def _processAgentMessage(
+    workflowId, userPrompt, dbManagement, interfaceAi, chatInterface, eventManager
+):
+    """Phase 2: Agent loop -- multiple AI calls with tool execution until done."""
+    state = AgentState(workflowId=workflowId)
+
+    try:
+        await _emitStatus(eventManager, workflowId, "Agent: Scanning available files...")
+        fileListContext = fileContextManager.buildFileListContext(dbManagement)
+
+        state.conversationHistory.append({"role": "user", "content": userPrompt})
+
+        aiRequest = promptAssembly.buildAgentRequest(
+            userPrompt=userPrompt,
+            fileListContext=fileListContext,
+            conversationHistory=[]
+        )
+
+        while state.status == "running" and state.currentRound < state.maxRounds:
+            state.currentRound += 1
+            state.totalAiCalls += 1
+
+            await _emitStatus(eventManager, workflowId,
+                              f"Agent round {state.currentRound}: AI is thinking...")
+
+            await eventManager.emit_event(workflowId, "chatdata", {
+                "type": "agent_progress",
+                "item": {
+                    "round": state.currentRound,
+                    "totalAiCalls": state.totalAiCalls,
+                    "totalToolCalls": state.totalToolCalls,
+                    "costCHF": round(state.totalCostCHF, 4),
+                }
+            })
+
+            aiResponse = await interfaceAi.callWithTextContext(aiRequest)
+            state.totalCostCHF += aiResponse.priceCHF
+            state.totalProcessingTime += aiResponse.processingTime
+
+            if aiResponse.errorCount > 0:
+                logger.error(f"Agent AI call failed in round {state.currentRound}: {aiResponse.content}")
+                await _emitError(eventManager, workflowId, aiResponse.content)
+                state.status = "error"
+                break
+
+            _logAiStats(aiResponse, workflowId)
+
+            state.conversationHistory.append({"role": "assistant", "content": aiResponse.content})
+
+            segments = responseParser.parseResponse(aiResponse.content)
+
+            textAndEditSegments = [s for s in segments if s.type != SegmentTypeEnum.TOOL_CALL]
+            if textAndEditSegments:
+                await _emitSegments(eventManager, workflowId, textAndEditSegments, [])
+
+            toolCallSegments = [s for s in segments if s.type == SegmentTypeEnum.TOOL_CALL]
+
+            if not toolCallSegments:
+                state.status = "completed"
+                break
+
+            toolResultTexts = []
+            for tc in toolCallSegments:
+                state.totalToolCalls += 1
+                await _emitStatus(eventManager, workflowId,
+                                  f"Agent: Running {tc.toolName}...")
+
+                result = await toolRegistry.dispatch(tc.toolName, tc.toolArgs or {}, dbManagement)
+                toolResultTexts.append(f"[{tc.toolName}] (success={result.success}):\n{result.result}")
+
+                logger.info(f"Agent tool {tc.toolName}: success={result.success}, time={result.executionTime:.2f}s")
+
+            combinedResults = "\n\n".join(toolResultTexts)
+            state.conversationHistory.append({
+                "role": "tool_result",
+                "content": combinedResults,
+                "toolName": "batch"
+            })
+
+            aiRequest = promptAssembly.buildAgentRequest(
+                userPrompt=None,
+                fileListContext=fileListContext,
+                conversationHistory=state.conversationHistory
+            )
+
+        if state.currentRound >= state.maxRounds and state.status == "running":
+            state.status = "max_rounds"
+            await eventManager.emit_event(workflowId, "chatdata", {
+                "type": "message",
+                "item": {
+                    "role": "system",
+                    "content": f"Agent stopped: maximum rounds ({state.maxRounds}) reached.",
+                    "createdAt": getUtcTimestamp()
+                }
+            })
+
+        await eventManager.emit_event(workflowId, "chatdata", {
+            "type": "agent_summary",
+            "item": {
+                "rounds": state.currentRound,
+                "totalAiCalls": state.totalAiCalls,
+                "totalToolCalls": state.totalToolCalls,
+                "costCHF": round(state.totalCostCHF, 4),
+                "processingTime": round(state.totalProcessingTime, 1),
+                "status": state.status,
+            }
+        })
+
+        await eventManager.emit_event(workflowId, "complete", {
+            "workflowId": workflowId,
+            "agentRounds": state.currentRound,
+            "totalCostCHF": round(state.totalCostCHF, 4),
+            "processingTime": round(state.totalProcessingTime, 1)
+        })
+
+    except Exception as e:
+        logger.error(f"CodeEditor agent loop failed for {workflowId}: {e}", exc_info=True)
+        await eventManager.emit_event(workflowId, "error", {
+            "workflowId": workflowId, "error": str(e)
+        })
+
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+async def _emitStatus(eventManager, workflowId: str, label: str):
+    await eventManager.emit_event(workflowId, "chatdata", {
+        "type": "status", "label": label
+    })
+
+
+async def _emitError(eventManager, workflowId: str, errorMsg: str):
+    await eventManager.emit_event(workflowId, "chatdata", {
+        "type": "message",
+        "item": {"role": "assistant", "content": f"Error: {errorMsg}"}
+    })
+    await eventManager.emit_event(workflowId, "error", {
+        "workflowId": workflowId, "error": errorMsg
+    })
+
+
+async def _emitSegments(eventManager, workflowId: str, segments, fileContexts):
+    """Emit parsed segments as SSE events."""
+    for segment in segments:
+        messageData = {
+            "role": "assistant",
+            "content": segment.content,
+            "type": segment.type.value,
+            "createdAt": getUtcTimestamp()
+        }
+        await eventManager.emit_event(workflowId, "chatdata", {
+            "type": "message", "item": messageData
+        })
+
+        if segment.type == SegmentTypeEnum.FILE_EDIT:
+            proposal = FileEditProposal(
+                workflowId=workflowId,
+                fileId=_resolveFileId(segment.fileName, fileContexts),
+                fileName=segment.fileName,
+                operation="edit",
+                oldContent=segment.oldContent,
+                newContent=segment.newContent
+            )
+            await eventManager.emit_event(workflowId, "chatdata", {
+                "type": "file_edit_proposal", "item": proposal.model_dump()
+            })
+
+
 def _loadChatHistory(chatInterface, workflowId: str) -> List[Dict[str, Any]]:
    """Load recent chat messages for multi-turn context."""
    try:
--- a/modules/features/codeeditor/datamodelCodeeditor.py
+++ b/modules/features/codeeditor/datamodelCodeeditor.py
@ -13,6 +13,7 @@ class SegmentTypeEnum(str, Enum):
    TEXT = "text"
    CODE_BLOCK = "code_block"
    FILE_EDIT = "file_edit"
+    TOOL_CALL = "tool_call"


 class EditStatusEnum(str, Enum):
@ -40,6 +41,8 @@ class ResponseSegment(BaseModel):
    fileName: Optional[str] = None
    oldContent: Optional[str] = None
    newContent: Optional[str] = None
+    toolName: Optional[str] = None
+    toolArgs: Optional[Dict[str, Any]] = None


 class FileEditProposal(BaseModel):
@ -65,6 +68,27 @@ class FileVersion(BaseModel):
    createdAt: float = Field(default_factory=getUtcTimestamp)


+class AgentState(BaseModel):
+    """Tracks state across an agent loop execution."""
+    workflowId: str
+    currentRound: int = 0
+    maxRounds: int = 50
+    totalAiCalls: int = 0
+    totalToolCalls: int = 0
+    totalCostCHF: float = 0.0
+    totalProcessingTime: float = 0.0
+    conversationHistory: List[Dict[str, Any]] = Field(default_factory=list)
+    status: str = "running"
+
+
+class ToolResult(BaseModel):
+    """Result from executing a tool."""
+    toolName: str
+    result: str
+    success: bool = True
+    executionTime: float = 0.0
+
+
 TEXT_MIME_TYPES = {
    "text/plain", "text/markdown", "text/html", "text/css", "text/csv",
    "text/xml", "text/yaml", "text/x-python", "text/x-java",
--- a/modules/features/codeeditor/fileContextManager.py
+++ b/modules/features/codeeditor/fileContextManager.py
@ -71,3 +71,12 @@ def listTextFiles(dbManagement) -> List[FileContext]:
            ))

    return textFiles
+
+
+def buildFileListContext(dbManagement) -> str:
+    """Build a compact file list string for the agent prompt (no content, just metadata)."""
+    textFiles = listTextFiles(dbManagement)
+    if not textFiles:
+        return "No text files available."
+    lines = [f"- {f.fileName} (id: {f.fileId}, size: {f.sizeBytes}B)" for f in textFiles]
+    return f"Total: {len(lines)} text files\n" + "\n".join(lines)
--- a/modules/features/codeeditor/promptAssembly.py
+++ b/modules/features/codeeditor/promptAssembly.py
@ -89,6 +89,84 @@ def _buildFileContext(fileContexts: List[FileContext]) -> str:
    return "\n\n".join(parts)


+def buildAgentRequest(
+    userPrompt: Optional[str],
+    fileListContext: str,
+    conversationHistory: List[Dict[str, Any]]
+) -> AiCallRequest:
+    """Build an AiCallRequest for agent mode with tool definitions and conversation history."""
+    from modules.features.codeeditor.toolRegistry import formatToolDefinitions
+
+    systemPrompt = _AGENT_SYSTEM_PROMPT.replace("{{TOOL_DEFINITIONS}}", formatToolDefinitions())
+
+    if not conversationHistory:
+        fullPrompt = systemPrompt
+        context = f"## Available files\n{fileListContext}\n\n## Task\n{userPrompt}"
+    else:
+        fullPrompt = systemPrompt
+        historyText = _buildConversationHistory(conversationHistory)
+        context = f"## Available files\n{fileListContext}\n\n## Conversation\n{historyText}"
+
+    return AiCallRequest(
+        prompt=fullPrompt,
+        context=context,
+        options=AiCallOptions(
+            operationType=OperationTypeEnum.DATA_ANALYSE,
+            temperature=0.0,
+            compressPrompt=False,
+            compressContext=False,
+            resultFormat="txt"
+        )
+    )
+
+
+_AGENT_SYSTEM_PROMPT = """You are an AI agent for file analysis and editing. You work autonomously by using tools to read files, search content, and propose edits.
+
+## Available tools
+{{TOOL_DEFINITIONS}}
+
+## How to call tools
+Use this exact format for each tool call:
+
+```tool_call
+tool: <tool_name>
+args: {"param": "value"}
+```
+
+## Rules
+- Read files ONE AT A TIME with read_file, never assume file contents
+- First create a plan, then execute it step by step
+- Use search_files to find relevant files before reading them
+- Use list_files to discover what files are available
+- For file changes, use ```file_edit``` blocks (same format as before)
+- You may combine text explanations, tool calls, and file edits in one response
+- When you are DONE and need no more tool calls, simply respond with text only (no tool_call blocks)
+- Keep responses focused and efficient
+
+## file_edit format (for changes)
+```file_edit
+fileName: <filename>
+oldContent: |
+  <exact existing content>
+newContent: |
+  <replacement content>
+```"""
+
+
+def _buildConversationHistory(history: List[Dict[str, Any]]) -> str:
+    """Build the full conversation history for agent multi-turn context."""
+    parts = []
+    for msg in history:
+        role = msg.get("role", "unknown")
+        content = msg.get("content", "")
+        if role == "tool_result":
+            toolName = msg.get("toolName", "")
+            parts.append(f"[Tool Result - {toolName}]:\n{content}")
+        else:
+            parts.append(f"[{role}]:\n{content}")
+    return "\n\n".join(parts)
+
+
 def _buildChatHistory(chatHistory: List[Dict[str, Any]]) -> str:
    """Build a condensed chat history string for multi-turn context."""
    if not chatHistory:
--- a/modules/features/codeeditor/responseParser.py
+++ b/modules/features/codeeditor/responseParser.py
@ -1,9 +1,10 @@
 # Copyright (c) 2025 Patrick Motsch
 # All rights reserved.
 """Response parser for the CodeEditor feature.
-Parses AI responses into typed segments (text, code_block, file_edit)."""
+Parses AI responses into typed segments (text, code_block, file_edit, tool_call)."""

 import logging
+import json
 import re
 from typing import List, Optional

@ -48,6 +49,16 @@ def parseResponse(rawContent: str) -> List[ResponseSegment]:
                        content=blockContent,
                        language="text"
                    ))
+            elif lang == "tool_call":
+                segment = _parseToolCallBlock(blockContent)
+                if segment:
+                    segments.append(segment)
+                else:
+                    segments.append(ResponseSegment(
+                        type=SegmentTypeEnum.CODE_BLOCK,
+                        content=blockContent,
+                        language="text"
+                    ))
            else:
                segments.append(ResponseSegment(
                    type=SegmentTypeEnum.CODE_BLOCK,
@ -66,6 +77,11 @@ def parseResponse(rawContent: str) -> List[ResponseSegment]:
    return segments


+def hasToolCalls(segments: List[ResponseSegment]) -> bool:
+    """Check if any segments contain tool calls."""
+    return any(s.type == SegmentTypeEnum.TOOL_CALL for s in segments)
+
+
 def _collectBlock(lines: List[str], startIdx: int) -> tuple:
    """Collect lines inside a fenced code block until closing ```."""
    blockLines = []
@ -137,3 +153,32 @@ def _parseFileEditBlock(blockContent: str) -> Optional[ResponseSegment]:
        oldContent=fields["oldContent"],
        newContent=fields["newContent"]
    )
+
+
+def _parseToolCallBlock(blockContent: str) -> Optional[ResponseSegment]:
+    """Parse a tool_call block into a ResponseSegment with toolName and toolArgs."""
+    toolName = None
+    toolArgs = {}
+
+    for line in blockContent.split("\n"):
+        stripped = line.strip()
+        if stripped.startswith("tool:"):
+            toolName = stripped[len("tool:"):].strip()
+        elif stripped.startswith("args:"):
+            argsStr = stripped[len("args:"):].strip()
+            try:
+                toolArgs = json.loads(argsStr)
+            except json.JSONDecodeError:
+                logger.warning(f"Could not parse tool args as JSON: {argsStr}")
+                toolArgs = {"raw": argsStr}
+
+    if not toolName:
+        logger.warning("tool_call block missing tool name")
+        return None
+
+    return ResponseSegment(
+        type=SegmentTypeEnum.TOOL_CALL,
+        content=f"Tool: {toolName}",
+        toolName=toolName,
+        toolArgs=toolArgs
+    )
--- a/modules/features/codeeditor/routeFeatureCodeeditor.py
+++ b/modules/features/codeeditor/routeFeatureCodeeditor.py
@ -80,10 +80,11 @@ async def streamCodeeditorStart(
    request: Request,
    instanceId: str = Path(..., description="Feature instance ID"),
    workflowId: Optional[str] = Query(None, description="Optional workflow ID to continue"),
+    mode: str = Query("simple", description="Processing mode: 'simple' (single AI call) or 'agent' (multi-step with tools)"),
    userInput: UserInputRequest = Body(...),
    context: RequestContext = Depends(getRequestContext)
 ):
-    """Start or continue a CodeEditor workflow with SSE streaming."""
+    """Start or continue a CodeEditor workflow with SSE streaming. Supports simple and agent mode."""
    try:
        mandateId = _validateInstanceAccess(instanceId, context)
        chatInterface = _getServiceChat(context, featureInstanceId=instanceId)
@ -116,6 +117,8 @@ async def streamCodeeditorStart(

        selectedFileIds = userInput.listFileId or []

+        agentMode = mode.lower() == "agent"
+
        asyncio.create_task(
            codeEditorProcessor.processMessage(
                workflowId=workflowId,
@ -124,7 +127,8 @@ async def streamCodeeditorStart(
                dbManagement=dbManagement,
                interfaceAi=aiObjects,
                chatInterface=chatInterface,
-                eventManager=eventManager
+                eventManager=eventManager,
+                agentMode=agentMode
            )
        )

@ -319,40 +323,52 @@ async def applyEdit(
    proposalData: Dict[str, Any] = Body(...),
    context: RequestContext = Depends(getRequestContext)
 ) -> Dict[str, Any]:
-    """Accept a file edit proposal and create a new file version."""
+    """Accept a file edit proposal. Updates existing file or creates new one."""
    try:
        _validateInstanceAccess(instanceId, context)
        dbManagement = _getDbManagement(context, featureInstanceId=instanceId)

-        fileId = proposalData.get("fileId")
+        fileId = proposalData.get("fileId", "")
        newContent = proposalData.get("newContent")
        fileName = proposalData.get("fileName", "")

-        if not fileId or newContent is None:
-            raise HTTPException(status_code=400, detail="fileId and newContent are required")
+        if newContent is None:
+            raise HTTPException(status_code=400, detail="newContent is required")

-        fileItem = dbManagement.getFile(fileId)
-        if not fileItem:
-            raise HTTPException(status_code=404, detail=f"File {fileId} not found")
+        contentBytes = newContent.encode("utf-8")
+        isNewFile = not fileId or fileId.startswith("unknown-")

-        success = dbManagement.createFileData(fileId, newContent.encode("utf-8"))
-        if not success:
-            raise HTTPException(status_code=500, detail="Failed to store updated file content")
+        if isNewFile:
+            mimeType = _guessMimeType(fileName)
+            fileItem = dbManagement.createFile(fileName, mimeType, contentBytes)
+            resultFileId = fileItem.id
+            resultFileName = fileItem.fileName
+        else:
+            fileItem = dbManagement.getFile(fileId)
+            if not fileItem:
+                raise HTTPException(status_code=404, detail=f"File {fileId} not found")
+            success = dbManagement.createFileData(fileId, contentBytes)
+            if not success:
+                raise HTTPException(status_code=500, detail="Failed to store updated file content")
+            resultFileId = fileId
+            resultFileName = fileName or fileItem.fileName

        eventManager = get_event_manager()
        await eventManager.emit_event(workflowId, "chatdata", {
            "type": "file_version",
            "item": {
-                "fileId": fileId,
-                "fileName": fileName or fileItem.fileName,
-                "status": "accepted"
+                "fileId": resultFileId,
+                "fileName": resultFileName,
+                "status": "accepted",
+                "isNew": isNewFile
            }
        })

        return {
            "status": "accepted",
-            "fileId": fileId,
-            "fileName": fileName or fileItem.fileName
+            "fileId": resultFileId,
+            "fileName": resultFileName,
+            "isNew": isNewFile
        }

    except HTTPException:
@ -360,3 +376,20 @@ async def applyEdit(
    except Exception as e:
        logger.error(f"Error applying edit: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
+
+
+_MIME_MAP = {
+    ".md": "text/markdown", ".txt": "text/plain", ".json": "application/json",
+    ".yaml": "application/yaml", ".yml": "application/yaml", ".xml": "application/xml",
+    ".csv": "text/csv", ".py": "text/x-python", ".js": "text/javascript",
+    ".ts": "text/x-typescript", ".html": "text/html", ".css": "text/css",
+    ".sql": "text/x-sql", ".sh": "text/x-shellscript",
+}
+
+
+def _guessMimeType(fileName: str) -> str:
+    """Guess MIME type from file extension."""
+    if not fileName or "." not in fileName:
+        return "text/plain"
+    ext = "." + fileName.rsplit(".", 1)[-1].lower()
+    return _MIME_MAP.get(ext, "text/plain")
--- a/modules/features/codeeditor/toolRegistry.py
+++ b/modules/features/codeeditor/toolRegistry.py
@ -0,0 +1,157 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""Tool registry and dispatcher for the CodeEditor agent loop.
+Defines available tools and executes them against the file context manager."""
+
+import logging
+import time
+import fnmatch
+from typing import Dict, Any, List
+
+from modules.features.codeeditor.datamodelCodeeditor import ToolResult
+
+logger = logging.getLogger(__name__)
+
+TOOL_DEFINITIONS = [
+    {
+        "name": "read_file",
+        "description": "Read the full content of a single file by its fileId.",
+        "parameters": {"fileId": "string (required)"}
+    },
+    {
+        "name": "list_files",
+        "description": "List all available text files with metadata (name, size, mimeType). Optionally filter by glob pattern.",
+        "parameters": {"filter": "string (optional, glob pattern e.g. '*.py')"}
+    },
+    {
+        "name": "search_files",
+        "description": "Search all file contents for a text query. Returns matching lines with file name and line number.",
+        "parameters": {"query": "string (required)", "fileType": "string (optional, extension e.g. 'py')"}
+    },
+]
+
+
+async def dispatch(toolName: str, toolArgs: Dict[str, Any], dbManagement) -> ToolResult:
+    """Execute a tool and return the result."""
+    startTime = time.time()
+    try:
+        if toolName == "read_file":
+            result = await _toolReadFile(toolArgs, dbManagement)
+        elif toolName == "list_files":
+            result = _toolListFiles(toolArgs, dbManagement)
+        elif toolName == "search_files":
+            result = await _toolSearchFiles(toolArgs, dbManagement)
+        else:
+            result = f"Unknown tool: {toolName}"
+            return ToolResult(toolName=toolName, result=result, success=False,
+                              executionTime=time.time() - startTime)
+
+        return ToolResult(toolName=toolName, result=result, success=True,
+                          executionTime=time.time() - startTime)
+    except Exception as e:
+        logger.error(f"Tool {toolName} failed: {e}", exc_info=True)
+        return ToolResult(toolName=toolName, result=f"Error: {str(e)}", success=False,
+                          executionTime=time.time() - startTime)
+
+
+async def _toolReadFile(args: Dict[str, Any], dbManagement) -> str:
+    """Read a single file's content."""
+    fileId = args.get("fileId", "")
+    if not fileId:
+        return "Error: fileId is required"
+
+    fileItem = dbManagement.getFile(fileId)
+    if not fileItem:
+        return f"Error: File {fileId} not found"
+
+    fileData = dbManagement.getFileData(fileId)
+    if not fileData:
+        return f"Error: No data for file {fileId}"
+
+    try:
+        content = fileData.decode("utf-8")
+    except UnicodeDecodeError:
+        return f"Error: File {fileItem.fileName} is not valid UTF-8"
+
+    lines = content.split("\n")
+    numbered = "\n".join([f"{i + 1}|{line}" for i, line in enumerate(lines)])
+    return f"--- FILE: {fileItem.fileName} (id: {fileId}) ---\n{numbered}\n--- END FILE ---"
+
+
+def _toolListFiles(args: Dict[str, Any], dbManagement) -> str:
+    """List all text files, optionally filtered by glob pattern."""
+    from modules.features.codeeditor.datamodelCodeeditor import isTextFile
+
+    filterPattern = args.get("filter", "")
+    allFiles = dbManagement.getAllFiles()
+    if not allFiles:
+        return "No files found."
+
+    lines = []
+    for f in allFiles:
+        if not isTextFile(f.mimeType, f.fileName):
+            continue
+        if filterPattern and not fnmatch.fnmatch(f.fileName, filterPattern):
+            continue
+        lines.append(f"- {f.fileName} (id: {f.id}, size: {f.fileSize}B, type: {f.mimeType})")
+
+    if not lines:
+        return "No matching text files found."
+    return f"Available files ({len(lines)}):\n" + "\n".join(lines)
+
+
+async def _toolSearchFiles(args: Dict[str, Any], dbManagement) -> str:
+    """Search file contents for a query string."""
+    from modules.features.codeeditor.datamodelCodeeditor import isTextFile
+
+    query = args.get("query", "")
+    if not query:
+        return "Error: query is required"
+
+    fileType = args.get("fileType", "")
+    allFiles = dbManagement.getAllFiles()
+    if not allFiles:
+        return "No files to search."
+
+    hits = []
+    maxHits = 50
+    queryLower = query.lower()
+
+    for f in allFiles:
+        if not isTextFile(f.mimeType, f.fileName):
+            continue
+        if fileType and not f.fileName.endswith(f".{fileType}"):
+            continue
+
+        fileData = dbManagement.getFileData(f.id)
+        if not fileData:
+            continue
+
+        try:
+            content = fileData.decode("utf-8")
+        except UnicodeDecodeError:
+            continue
+
+        for lineNum, line in enumerate(content.split("\n"), 1):
+            if queryLower in line.lower():
+                hits.append(f"{f.fileName}:{lineNum}: {line.strip()}")
+                if len(hits) >= maxHits:
+                    break
+        if len(hits) >= maxHits:
+            break
+
+    if not hits:
+        return f"No matches found for '{query}'."
+    result = f"Search results for '{query}' ({len(hits)} matches):\n" + "\n".join(hits)
+    if len(hits) >= maxHits:
+        result += f"\n... (truncated at {maxHits} matches)"
+    return result
+
+
+def formatToolDefinitions() -> str:
+    """Format tool definitions for inclusion in the system prompt."""
+    parts = []
+    for tool in TOOL_DEFINITIONS:
+        params = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()])
+        parts.append(f"- **{tool['name']}**: {tool['description']}\n  Parameters: {{{params}}}")
+    return "\n".join(parts)
--- a/modules/features/teamsbot/service.py
+++ b/modules/features/teamsbot/service.py
@ -401,14 +401,22 @@ class TeamsbotService:
            if len(audioBytes) < 1000:
                return

+            # Detect silent/all-zeros audio early to avoid expensive STT calls
+            if len(set(audioBytes[100:min(500, len(audioBytes))])) < 3:
+                logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, low byte variation)")
+                return
+
            if not voiceInterface:
                logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
                return

+            # Treat sampleRate=0 as unknown (triggers auto-detection)
+            effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
+
            sttResult = await voiceInterface.speechToText(
                audioContent=audioBytes,
                language=self.config.language or "de-DE",
-                sampleRate=sampleRate,
+                sampleRate=effectiveSampleRate,
            )

            if sttResult and sttResult.get("success") and sttResult.get("text"):