diff --git a/app.py b/app.py index 540d4e4d..68b51af0 100644 --- a/app.py +++ b/app.py @@ -461,6 +461,13 @@ app.add_middleware( max_age=86400, # Increased caching for preflight requests ) +# SlowAPI rate limiter initialization +from modules.auth import limiter +from slowapi.errors import RateLimitExceeded +from slowapi import _rate_limit_exceeded_handler +app.state.limiter = limiter +app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) + # CSRF protection middleware from modules.auth import CSRFMiddleware from modules.auth import ( diff --git a/modules/connectors/connectorVoiceGoogle.py b/modules/connectors/connectorVoiceGoogle.py index 42efffcf..10720efc 100644 --- a/modules/connectors/connectorVoiceGoogle.py +++ b/modules/connectors/connectorVoiceGoogle.py @@ -7,6 +7,7 @@ Replaces Azure Speech Services with Google Cloud APIs import json import html +import asyncio import logging from typing import Dict, Optional, Any from google.cloud import speech @@ -73,6 +74,11 @@ class ConnectorGoogleSpeech: Dict containing transcribed text, confidence, and metadata """ try: + # Treat sampleRate=0 as unknown (invalid value from client) + if sampleRate is not None and sampleRate <= 0: + logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection") + sampleRate = None + # Auto-detect audio format if not provided if sampleRate is None or channels is None: validation = self.validateAudioFormat(audioContent) @@ -164,8 +170,11 @@ class ConnectorGoogleSpeech: try: # Use regular recognition for single audio files (not streaming) + # Run in thread pool to avoid blocking the asyncio event loop logger.info("Using regular recognition for single audio file...") - response = self.speech_client.recognize(config=config, audio=audio) + response = await asyncio.to_thread( + self.speech_client.recognize, config=config, audio=audio + ) logger.debug(f"Google Cloud response: {response}") except Exception as apiError: @@ -175,7 +184,7 @@ class ConnectorGoogleSpeech: logger.info("Trying fallback with LINEAR16 encoding...") fallbackConfig = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=16000, # Use standard sample rate + sample_rate_hertz=16000, audio_channel_count=1, language_code=language, enable_automatic_punctuation=True, @@ -183,7 +192,9 @@ class ConnectorGoogleSpeech: ) try: - response = self.speech_client.recognize(config=fallbackConfig, audio=audio) + response = await asyncio.to_thread( + self.speech_client.recognize, config=fallbackConfig, audio=audio + ) logger.debug(f"Google Cloud fallback response: {response}") except Exception as fallbackError: logger.error(f"Google Cloud fallback error: {fallbackError}") @@ -297,7 +308,18 @@ class ConnectorGoogleSpeech: "description": f"LINEAR16 with {std_rate}Hz" }) - # Try with different models + # Detect likely silence before expensive fallback loop + if len(audioContent) > 100: + sampleSlice = audioContent[100:min(500, len(audioContent))] + if len(set(sampleSlice)) < 3: + logger.warning("Audio appears silent (low byte variation) - skipping fallbacks") + return { + "success": False, + "text": "", + "confidence": 0.0, + "error": "No recognition results (silence or unclear audio)" + } + models = ["latest_long", "phone_call", "latest_short"] for fallback_config in fallback_configs: @@ -305,7 +327,6 @@ class ConnectorGoogleSpeech: try: logger.info(f"Trying fallback: {fallback_config['description']} with {model} model...") - # Build fallback config with proper sample rate handling fallback_config_params = { "encoding": fallback_config["encoding"], "audio_channel_count": fallback_config["channels"], @@ -314,12 +335,13 @@ class ConnectorGoogleSpeech: "model": model } - # Only add sample_rate_hertz if needed if fallback_config["use_sample_rate"]: fallback_config_params["sample_rate_hertz"] = fallback_config["sample_rate"] fallback_config_obj = speech.RecognitionConfig(**fallback_config_params) - fallback_response = self.speech_client.recognize(config=fallback_config_obj, audio=audio) + fallback_response = await asyncio.to_thread( + self.speech_client.recognize, config=fallback_config_obj, audio=audio + ) if fallback_response.results: result = fallback_response.results[0] diff --git a/modules/features/codeeditor/codeEditorProcessor.py b/modules/features/codeeditor/codeEditorProcessor.py index 7986590d..7e472016 100644 --- a/modules/features/codeeditor/codeEditorProcessor.py +++ b/modules/features/codeeditor/codeEditorProcessor.py @@ -1,16 +1,16 @@ # Copyright (c) 2025 Patrick Motsch # All rights reserved. -"""CodeEditor processor -- single-shot orchestrator (Phase 1). -Loads files, builds prompt, calls AI, parses response, emits SSE events.""" +"""CodeEditor processor -- single-shot (Phase 1) and agent loop (Phase 2). +Orchestrates file loading, prompt building, AI calls, response parsing, and SSE emission.""" import logging -import uuid -from typing import List, Optional, Dict, Any +from typing import List, Dict, Any from modules.features.codeeditor import fileContextManager, promptAssembly, responseParser from modules.features.codeeditor.datamodelCodeeditor import ( - FileEditProposal, ResponseSegment, SegmentTypeEnum + FileEditProposal, SegmentTypeEnum, AgentState ) +from modules.features.codeeditor import toolRegistry from modules.shared.timeUtils import getUtcTimestamp logger = logging.getLogger(__name__) @@ -23,78 +23,41 @@ async def processMessage( dbManagement, interfaceAi, chatInterface, - eventManager + eventManager, + agentMode: bool = False ): - """Process a user message: load files, call AI, parse and emit response segments. + """Process a user message. Dispatches to single-shot or agent loop based on mode.""" + if agentMode: + await _processAgentMessage( + workflowId, userPrompt, dbManagement, interfaceAi, chatInterface, eventManager + ) + else: + await _processSingleShot( + workflowId, userPrompt, selectedFileIds, dbManagement, interfaceAi, chatInterface, eventManager + ) - Args: - workflowId: the active workflow ID - userPrompt: user's input text - selectedFileIds: file IDs the user selected as context - dbManagement: interfaceDbManagement instance with user context - interfaceAi: AiObjects instance for AI calls - chatInterface: interfaceDbChat instance for storing messages - eventManager: EventManager for SSE emission - """ + +async def _processSingleShot( + workflowId, userPrompt, selectedFileIds, dbManagement, interfaceAi, chatInterface, eventManager +): + """Phase 1: Single AI call with pre-loaded file context.""" try: - await eventManager.emit_event(workflowId, "chatdata", { - "type": "status", "label": "Loading files..." - }) - + await _emitStatus(eventManager, workflowId, "Loading files...") fileContexts = await fileContextManager.loadFileContexts(dbManagement, selectedFileIds) - await eventManager.emit_event(workflowId, "chatdata", { - "type": "status", "label": "Building prompt..." - }) - + await _emitStatus(eventManager, workflowId, "Building prompt...") chatHistory = _loadChatHistory(chatInterface, workflowId) - aiRequest = promptAssembly.buildRequest(userPrompt, fileContexts, chatHistory) - await eventManager.emit_event(workflowId, "chatdata", { - "type": "status", "label": "AI is processing..." - }) - + await _emitStatus(eventManager, workflowId, "AI is processing...") aiResponse = await interfaceAi.callWithTextContext(aiRequest) if aiResponse.errorCount > 0: - logger.error(f"AI call failed: {aiResponse.content}") - await eventManager.emit_event(workflowId, "chatdata", { - "type": "message", - "item": {"role": "assistant", "content": f"Error: {aiResponse.content}"} - }) - await eventManager.emit_event(workflowId, "error", { - "workflowId": workflowId, "error": aiResponse.content - }) + await _emitError(eventManager, workflowId, aiResponse.content) return segments = responseParser.parseResponse(aiResponse.content) - - for segment in segments: - messageData = { - "role": "assistant", - "content": segment.content, - "type": segment.type.value, - "createdAt": getUtcTimestamp() - } - - await eventManager.emit_event(workflowId, "chatdata", { - "type": "message", "item": messageData - }) - - if segment.type == SegmentTypeEnum.FILE_EDIT: - proposal = FileEditProposal( - workflowId=workflowId, - fileId=_resolveFileId(segment.fileName, fileContexts), - fileName=segment.fileName, - operation="edit", - oldContent=segment.oldContent, - newContent=segment.newContent - ) - await eventManager.emit_event(workflowId, "chatdata", { - "type": "file_edit_proposal", "item": proposal.model_dump() - }) - + await _emitSegments(eventManager, workflowId, segments, fileContexts) _logAiStats(aiResponse, workflowId) await eventManager.emit_event(workflowId, "complete", { @@ -105,12 +68,181 @@ async def processMessage( }) except Exception as e: - logger.error(f"CodeEditor processing failed for workflow {workflowId}: {e}", exc_info=True) + logger.error(f"CodeEditor single-shot failed for {workflowId}: {e}", exc_info=True) await eventManager.emit_event(workflowId, "error", { "workflowId": workflowId, "error": str(e) }) +async def _processAgentMessage( + workflowId, userPrompt, dbManagement, interfaceAi, chatInterface, eventManager +): + """Phase 2: Agent loop -- multiple AI calls with tool execution until done.""" + state = AgentState(workflowId=workflowId) + + try: + await _emitStatus(eventManager, workflowId, "Agent: Scanning available files...") + fileListContext = fileContextManager.buildFileListContext(dbManagement) + + state.conversationHistory.append({"role": "user", "content": userPrompt}) + + aiRequest = promptAssembly.buildAgentRequest( + userPrompt=userPrompt, + fileListContext=fileListContext, + conversationHistory=[] + ) + + while state.status == "running" and state.currentRound < state.maxRounds: + state.currentRound += 1 + state.totalAiCalls += 1 + + await _emitStatus(eventManager, workflowId, + f"Agent round {state.currentRound}: AI is thinking...") + + await eventManager.emit_event(workflowId, "chatdata", { + "type": "agent_progress", + "item": { + "round": state.currentRound, + "totalAiCalls": state.totalAiCalls, + "totalToolCalls": state.totalToolCalls, + "costCHF": round(state.totalCostCHF, 4), + } + }) + + aiResponse = await interfaceAi.callWithTextContext(aiRequest) + state.totalCostCHF += aiResponse.priceCHF + state.totalProcessingTime += aiResponse.processingTime + + if aiResponse.errorCount > 0: + logger.error(f"Agent AI call failed in round {state.currentRound}: {aiResponse.content}") + await _emitError(eventManager, workflowId, aiResponse.content) + state.status = "error" + break + + _logAiStats(aiResponse, workflowId) + + state.conversationHistory.append({"role": "assistant", "content": aiResponse.content}) + + segments = responseParser.parseResponse(aiResponse.content) + + textAndEditSegments = [s for s in segments if s.type != SegmentTypeEnum.TOOL_CALL] + if textAndEditSegments: + await _emitSegments(eventManager, workflowId, textAndEditSegments, []) + + toolCallSegments = [s for s in segments if s.type == SegmentTypeEnum.TOOL_CALL] + + if not toolCallSegments: + state.status = "completed" + break + + toolResultTexts = [] + for tc in toolCallSegments: + state.totalToolCalls += 1 + await _emitStatus(eventManager, workflowId, + f"Agent: Running {tc.toolName}...") + + result = await toolRegistry.dispatch(tc.toolName, tc.toolArgs or {}, dbManagement) + toolResultTexts.append(f"[{tc.toolName}] (success={result.success}):\n{result.result}") + + logger.info(f"Agent tool {tc.toolName}: success={result.success}, time={result.executionTime:.2f}s") + + combinedResults = "\n\n".join(toolResultTexts) + state.conversationHistory.append({ + "role": "tool_result", + "content": combinedResults, + "toolName": "batch" + }) + + aiRequest = promptAssembly.buildAgentRequest( + userPrompt=None, + fileListContext=fileListContext, + conversationHistory=state.conversationHistory + ) + + if state.currentRound >= state.maxRounds and state.status == "running": + state.status = "max_rounds" + await eventManager.emit_event(workflowId, "chatdata", { + "type": "message", + "item": { + "role": "system", + "content": f"Agent stopped: maximum rounds ({state.maxRounds}) reached.", + "createdAt": getUtcTimestamp() + } + }) + + await eventManager.emit_event(workflowId, "chatdata", { + "type": "agent_summary", + "item": { + "rounds": state.currentRound, + "totalAiCalls": state.totalAiCalls, + "totalToolCalls": state.totalToolCalls, + "costCHF": round(state.totalCostCHF, 4), + "processingTime": round(state.totalProcessingTime, 1), + "status": state.status, + } + }) + + await eventManager.emit_event(workflowId, "complete", { + "workflowId": workflowId, + "agentRounds": state.currentRound, + "totalCostCHF": round(state.totalCostCHF, 4), + "processingTime": round(state.totalProcessingTime, 1) + }) + + except Exception as e: + logger.error(f"CodeEditor agent loop failed for {workflowId}: {e}", exc_info=True) + await eventManager.emit_event(workflowId, "error", { + "workflowId": workflowId, "error": str(e) + }) + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +async def _emitStatus(eventManager, workflowId: str, label: str): + await eventManager.emit_event(workflowId, "chatdata", { + "type": "status", "label": label + }) + + +async def _emitError(eventManager, workflowId: str, errorMsg: str): + await eventManager.emit_event(workflowId, "chatdata", { + "type": "message", + "item": {"role": "assistant", "content": f"Error: {errorMsg}"} + }) + await eventManager.emit_event(workflowId, "error", { + "workflowId": workflowId, "error": errorMsg + }) + + +async def _emitSegments(eventManager, workflowId: str, segments, fileContexts): + """Emit parsed segments as SSE events.""" + for segment in segments: + messageData = { + "role": "assistant", + "content": segment.content, + "type": segment.type.value, + "createdAt": getUtcTimestamp() + } + await eventManager.emit_event(workflowId, "chatdata", { + "type": "message", "item": messageData + }) + + if segment.type == SegmentTypeEnum.FILE_EDIT: + proposal = FileEditProposal( + workflowId=workflowId, + fileId=_resolveFileId(segment.fileName, fileContexts), + fileName=segment.fileName, + operation="edit", + oldContent=segment.oldContent, + newContent=segment.newContent + ) + await eventManager.emit_event(workflowId, "chatdata", { + "type": "file_edit_proposal", "item": proposal.model_dump() + }) + + def _loadChatHistory(chatInterface, workflowId: str) -> List[Dict[str, Any]]: """Load recent chat messages for multi-turn context.""" try: diff --git a/modules/features/codeeditor/datamodelCodeeditor.py b/modules/features/codeeditor/datamodelCodeeditor.py index 7f39cbca..72a4f9bc 100644 --- a/modules/features/codeeditor/datamodelCodeeditor.py +++ b/modules/features/codeeditor/datamodelCodeeditor.py @@ -13,6 +13,7 @@ class SegmentTypeEnum(str, Enum): TEXT = "text" CODE_BLOCK = "code_block" FILE_EDIT = "file_edit" + TOOL_CALL = "tool_call" class EditStatusEnum(str, Enum): @@ -40,6 +41,8 @@ class ResponseSegment(BaseModel): fileName: Optional[str] = None oldContent: Optional[str] = None newContent: Optional[str] = None + toolName: Optional[str] = None + toolArgs: Optional[Dict[str, Any]] = None class FileEditProposal(BaseModel): @@ -65,6 +68,27 @@ class FileVersion(BaseModel): createdAt: float = Field(default_factory=getUtcTimestamp) +class AgentState(BaseModel): + """Tracks state across an agent loop execution.""" + workflowId: str + currentRound: int = 0 + maxRounds: int = 50 + totalAiCalls: int = 0 + totalToolCalls: int = 0 + totalCostCHF: float = 0.0 + totalProcessingTime: float = 0.0 + conversationHistory: List[Dict[str, Any]] = Field(default_factory=list) + status: str = "running" + + +class ToolResult(BaseModel): + """Result from executing a tool.""" + toolName: str + result: str + success: bool = True + executionTime: float = 0.0 + + TEXT_MIME_TYPES = { "text/plain", "text/markdown", "text/html", "text/css", "text/csv", "text/xml", "text/yaml", "text/x-python", "text/x-java", diff --git a/modules/features/codeeditor/fileContextManager.py b/modules/features/codeeditor/fileContextManager.py index 558c40c5..d0e2ff1b 100644 --- a/modules/features/codeeditor/fileContextManager.py +++ b/modules/features/codeeditor/fileContextManager.py @@ -71,3 +71,12 @@ def listTextFiles(dbManagement) -> List[FileContext]: )) return textFiles + + +def buildFileListContext(dbManagement) -> str: + """Build a compact file list string for the agent prompt (no content, just metadata).""" + textFiles = listTextFiles(dbManagement) + if not textFiles: + return "No text files available." + lines = [f"- {f.fileName} (id: {f.fileId}, size: {f.sizeBytes}B)" for f in textFiles] + return f"Total: {len(lines)} text files\n" + "\n".join(lines) diff --git a/modules/features/codeeditor/promptAssembly.py b/modules/features/codeeditor/promptAssembly.py index db145524..95f4ce91 100644 --- a/modules/features/codeeditor/promptAssembly.py +++ b/modules/features/codeeditor/promptAssembly.py @@ -89,6 +89,84 @@ def _buildFileContext(fileContexts: List[FileContext]) -> str: return "\n\n".join(parts) +def buildAgentRequest( + userPrompt: Optional[str], + fileListContext: str, + conversationHistory: List[Dict[str, Any]] +) -> AiCallRequest: + """Build an AiCallRequest for agent mode with tool definitions and conversation history.""" + from modules.features.codeeditor.toolRegistry import formatToolDefinitions + + systemPrompt = _AGENT_SYSTEM_PROMPT.replace("{{TOOL_DEFINITIONS}}", formatToolDefinitions()) + + if not conversationHistory: + fullPrompt = systemPrompt + context = f"## Available files\n{fileListContext}\n\n## Task\n{userPrompt}" + else: + fullPrompt = systemPrompt + historyText = _buildConversationHistory(conversationHistory) + context = f"## Available files\n{fileListContext}\n\n## Conversation\n{historyText}" + + return AiCallRequest( + prompt=fullPrompt, + context=context, + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + temperature=0.0, + compressPrompt=False, + compressContext=False, + resultFormat="txt" + ) + ) + + +_AGENT_SYSTEM_PROMPT = """You are an AI agent for file analysis and editing. You work autonomously by using tools to read files, search content, and propose edits. + +## Available tools +{{TOOL_DEFINITIONS}} + +## How to call tools +Use this exact format for each tool call: + +```tool_call +tool: +args: {"param": "value"} +``` + +## Rules +- Read files ONE AT A TIME with read_file, never assume file contents +- First create a plan, then execute it step by step +- Use search_files to find relevant files before reading them +- Use list_files to discover what files are available +- For file changes, use ```file_edit``` blocks (same format as before) +- You may combine text explanations, tool calls, and file edits in one response +- When you are DONE and need no more tool calls, simply respond with text only (no tool_call blocks) +- Keep responses focused and efficient + +## file_edit format (for changes) +```file_edit +fileName: +oldContent: | + +newContent: | + +```""" + + +def _buildConversationHistory(history: List[Dict[str, Any]]) -> str: + """Build the full conversation history for agent multi-turn context.""" + parts = [] + for msg in history: + role = msg.get("role", "unknown") + content = msg.get("content", "") + if role == "tool_result": + toolName = msg.get("toolName", "") + parts.append(f"[Tool Result - {toolName}]:\n{content}") + else: + parts.append(f"[{role}]:\n{content}") + return "\n\n".join(parts) + + def _buildChatHistory(chatHistory: List[Dict[str, Any]]) -> str: """Build a condensed chat history string for multi-turn context.""" if not chatHistory: diff --git a/modules/features/codeeditor/responseParser.py b/modules/features/codeeditor/responseParser.py index 998a0abb..8003e7b2 100644 --- a/modules/features/codeeditor/responseParser.py +++ b/modules/features/codeeditor/responseParser.py @@ -1,9 +1,10 @@ # Copyright (c) 2025 Patrick Motsch # All rights reserved. """Response parser for the CodeEditor feature. -Parses AI responses into typed segments (text, code_block, file_edit).""" +Parses AI responses into typed segments (text, code_block, file_edit, tool_call).""" import logging +import json import re from typing import List, Optional @@ -48,6 +49,16 @@ def parseResponse(rawContent: str) -> List[ResponseSegment]: content=blockContent, language="text" )) + elif lang == "tool_call": + segment = _parseToolCallBlock(blockContent) + if segment: + segments.append(segment) + else: + segments.append(ResponseSegment( + type=SegmentTypeEnum.CODE_BLOCK, + content=blockContent, + language="text" + )) else: segments.append(ResponseSegment( type=SegmentTypeEnum.CODE_BLOCK, @@ -66,6 +77,11 @@ def parseResponse(rawContent: str) -> List[ResponseSegment]: return segments +def hasToolCalls(segments: List[ResponseSegment]) -> bool: + """Check if any segments contain tool calls.""" + return any(s.type == SegmentTypeEnum.TOOL_CALL for s in segments) + + def _collectBlock(lines: List[str], startIdx: int) -> tuple: """Collect lines inside a fenced code block until closing ```.""" blockLines = [] @@ -137,3 +153,32 @@ def _parseFileEditBlock(blockContent: str) -> Optional[ResponseSegment]: oldContent=fields["oldContent"], newContent=fields["newContent"] ) + + +def _parseToolCallBlock(blockContent: str) -> Optional[ResponseSegment]: + """Parse a tool_call block into a ResponseSegment with toolName and toolArgs.""" + toolName = None + toolArgs = {} + + for line in blockContent.split("\n"): + stripped = line.strip() + if stripped.startswith("tool:"): + toolName = stripped[len("tool:"):].strip() + elif stripped.startswith("args:"): + argsStr = stripped[len("args:"):].strip() + try: + toolArgs = json.loads(argsStr) + except json.JSONDecodeError: + logger.warning(f"Could not parse tool args as JSON: {argsStr}") + toolArgs = {"raw": argsStr} + + if not toolName: + logger.warning("tool_call block missing tool name") + return None + + return ResponseSegment( + type=SegmentTypeEnum.TOOL_CALL, + content=f"Tool: {toolName}", + toolName=toolName, + toolArgs=toolArgs + ) diff --git a/modules/features/codeeditor/routeFeatureCodeeditor.py b/modules/features/codeeditor/routeFeatureCodeeditor.py index 421e9bbe..0d76389c 100644 --- a/modules/features/codeeditor/routeFeatureCodeeditor.py +++ b/modules/features/codeeditor/routeFeatureCodeeditor.py @@ -80,10 +80,11 @@ async def streamCodeeditorStart( request: Request, instanceId: str = Path(..., description="Feature instance ID"), workflowId: Optional[str] = Query(None, description="Optional workflow ID to continue"), + mode: str = Query("simple", description="Processing mode: 'simple' (single AI call) or 'agent' (multi-step with tools)"), userInput: UserInputRequest = Body(...), context: RequestContext = Depends(getRequestContext) ): - """Start or continue a CodeEditor workflow with SSE streaming.""" + """Start or continue a CodeEditor workflow with SSE streaming. Supports simple and agent mode.""" try: mandateId = _validateInstanceAccess(instanceId, context) chatInterface = _getServiceChat(context, featureInstanceId=instanceId) @@ -116,6 +117,8 @@ async def streamCodeeditorStart( selectedFileIds = userInput.listFileId or [] + agentMode = mode.lower() == "agent" + asyncio.create_task( codeEditorProcessor.processMessage( workflowId=workflowId, @@ -124,7 +127,8 @@ async def streamCodeeditorStart( dbManagement=dbManagement, interfaceAi=aiObjects, chatInterface=chatInterface, - eventManager=eventManager + eventManager=eventManager, + agentMode=agentMode ) ) @@ -319,40 +323,52 @@ async def applyEdit( proposalData: Dict[str, Any] = Body(...), context: RequestContext = Depends(getRequestContext) ) -> Dict[str, Any]: - """Accept a file edit proposal and create a new file version.""" + """Accept a file edit proposal. Updates existing file or creates new one.""" try: _validateInstanceAccess(instanceId, context) dbManagement = _getDbManagement(context, featureInstanceId=instanceId) - fileId = proposalData.get("fileId") + fileId = proposalData.get("fileId", "") newContent = proposalData.get("newContent") fileName = proposalData.get("fileName", "") - if not fileId or newContent is None: - raise HTTPException(status_code=400, detail="fileId and newContent are required") + if newContent is None: + raise HTTPException(status_code=400, detail="newContent is required") - fileItem = dbManagement.getFile(fileId) - if not fileItem: - raise HTTPException(status_code=404, detail=f"File {fileId} not found") + contentBytes = newContent.encode("utf-8") + isNewFile = not fileId or fileId.startswith("unknown-") - success = dbManagement.createFileData(fileId, newContent.encode("utf-8")) - if not success: - raise HTTPException(status_code=500, detail="Failed to store updated file content") + if isNewFile: + mimeType = _guessMimeType(fileName) + fileItem = dbManagement.createFile(fileName, mimeType, contentBytes) + resultFileId = fileItem.id + resultFileName = fileItem.fileName + else: + fileItem = dbManagement.getFile(fileId) + if not fileItem: + raise HTTPException(status_code=404, detail=f"File {fileId} not found") + success = dbManagement.createFileData(fileId, contentBytes) + if not success: + raise HTTPException(status_code=500, detail="Failed to store updated file content") + resultFileId = fileId + resultFileName = fileName or fileItem.fileName eventManager = get_event_manager() await eventManager.emit_event(workflowId, "chatdata", { "type": "file_version", "item": { - "fileId": fileId, - "fileName": fileName or fileItem.fileName, - "status": "accepted" + "fileId": resultFileId, + "fileName": resultFileName, + "status": "accepted", + "isNew": isNewFile } }) return { "status": "accepted", - "fileId": fileId, - "fileName": fileName or fileItem.fileName + "fileId": resultFileId, + "fileName": resultFileName, + "isNew": isNewFile } except HTTPException: @@ -360,3 +376,20 @@ async def applyEdit( except Exception as e: logger.error(f"Error applying edit: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) + + +_MIME_MAP = { + ".md": "text/markdown", ".txt": "text/plain", ".json": "application/json", + ".yaml": "application/yaml", ".yml": "application/yaml", ".xml": "application/xml", + ".csv": "text/csv", ".py": "text/x-python", ".js": "text/javascript", + ".ts": "text/x-typescript", ".html": "text/html", ".css": "text/css", + ".sql": "text/x-sql", ".sh": "text/x-shellscript", +} + + +def _guessMimeType(fileName: str) -> str: + """Guess MIME type from file extension.""" + if not fileName or "." not in fileName: + return "text/plain" + ext = "." + fileName.rsplit(".", 1)[-1].lower() + return _MIME_MAP.get(ext, "text/plain") diff --git a/modules/features/codeeditor/toolRegistry.py b/modules/features/codeeditor/toolRegistry.py new file mode 100644 index 00000000..69256671 --- /dev/null +++ b/modules/features/codeeditor/toolRegistry.py @@ -0,0 +1,157 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +"""Tool registry and dispatcher for the CodeEditor agent loop. +Defines available tools and executes them against the file context manager.""" + +import logging +import time +import fnmatch +from typing import Dict, Any, List + +from modules.features.codeeditor.datamodelCodeeditor import ToolResult + +logger = logging.getLogger(__name__) + +TOOL_DEFINITIONS = [ + { + "name": "read_file", + "description": "Read the full content of a single file by its fileId.", + "parameters": {"fileId": "string (required)"} + }, + { + "name": "list_files", + "description": "List all available text files with metadata (name, size, mimeType). Optionally filter by glob pattern.", + "parameters": {"filter": "string (optional, glob pattern e.g. '*.py')"} + }, + { + "name": "search_files", + "description": "Search all file contents for a text query. Returns matching lines with file name and line number.", + "parameters": {"query": "string (required)", "fileType": "string (optional, extension e.g. 'py')"} + }, +] + + +async def dispatch(toolName: str, toolArgs: Dict[str, Any], dbManagement) -> ToolResult: + """Execute a tool and return the result.""" + startTime = time.time() + try: + if toolName == "read_file": + result = await _toolReadFile(toolArgs, dbManagement) + elif toolName == "list_files": + result = _toolListFiles(toolArgs, dbManagement) + elif toolName == "search_files": + result = await _toolSearchFiles(toolArgs, dbManagement) + else: + result = f"Unknown tool: {toolName}" + return ToolResult(toolName=toolName, result=result, success=False, + executionTime=time.time() - startTime) + + return ToolResult(toolName=toolName, result=result, success=True, + executionTime=time.time() - startTime) + except Exception as e: + logger.error(f"Tool {toolName} failed: {e}", exc_info=True) + return ToolResult(toolName=toolName, result=f"Error: {str(e)}", success=False, + executionTime=time.time() - startTime) + + +async def _toolReadFile(args: Dict[str, Any], dbManagement) -> str: + """Read a single file's content.""" + fileId = args.get("fileId", "") + if not fileId: + return "Error: fileId is required" + + fileItem = dbManagement.getFile(fileId) + if not fileItem: + return f"Error: File {fileId} not found" + + fileData = dbManagement.getFileData(fileId) + if not fileData: + return f"Error: No data for file {fileId}" + + try: + content = fileData.decode("utf-8") + except UnicodeDecodeError: + return f"Error: File {fileItem.fileName} is not valid UTF-8" + + lines = content.split("\n") + numbered = "\n".join([f"{i + 1}|{line}" for i, line in enumerate(lines)]) + return f"--- FILE: {fileItem.fileName} (id: {fileId}) ---\n{numbered}\n--- END FILE ---" + + +def _toolListFiles(args: Dict[str, Any], dbManagement) -> str: + """List all text files, optionally filtered by glob pattern.""" + from modules.features.codeeditor.datamodelCodeeditor import isTextFile + + filterPattern = args.get("filter", "") + allFiles = dbManagement.getAllFiles() + if not allFiles: + return "No files found." + + lines = [] + for f in allFiles: + if not isTextFile(f.mimeType, f.fileName): + continue + if filterPattern and not fnmatch.fnmatch(f.fileName, filterPattern): + continue + lines.append(f"- {f.fileName} (id: {f.id}, size: {f.fileSize}B, type: {f.mimeType})") + + if not lines: + return "No matching text files found." + return f"Available files ({len(lines)}):\n" + "\n".join(lines) + + +async def _toolSearchFiles(args: Dict[str, Any], dbManagement) -> str: + """Search file contents for a query string.""" + from modules.features.codeeditor.datamodelCodeeditor import isTextFile + + query = args.get("query", "") + if not query: + return "Error: query is required" + + fileType = args.get("fileType", "") + allFiles = dbManagement.getAllFiles() + if not allFiles: + return "No files to search." + + hits = [] + maxHits = 50 + queryLower = query.lower() + + for f in allFiles: + if not isTextFile(f.mimeType, f.fileName): + continue + if fileType and not f.fileName.endswith(f".{fileType}"): + continue + + fileData = dbManagement.getFileData(f.id) + if not fileData: + continue + + try: + content = fileData.decode("utf-8") + except UnicodeDecodeError: + continue + + for lineNum, line in enumerate(content.split("\n"), 1): + if queryLower in line.lower(): + hits.append(f"{f.fileName}:{lineNum}: {line.strip()}") + if len(hits) >= maxHits: + break + if len(hits) >= maxHits: + break + + if not hits: + return f"No matches found for '{query}'." + result = f"Search results for '{query}' ({len(hits)} matches):\n" + "\n".join(hits) + if len(hits) >= maxHits: + result += f"\n... (truncated at {maxHits} matches)" + return result + + +def formatToolDefinitions() -> str: + """Format tool definitions for inclusion in the system prompt.""" + parts = [] + for tool in TOOL_DEFINITIONS: + params = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()]) + parts.append(f"- **{tool['name']}**: {tool['description']}\n Parameters: {{{params}}}") + return "\n".join(parts) diff --git a/modules/features/teamsbot/service.py b/modules/features/teamsbot/service.py index 919b77fa..fa468ff2 100644 --- a/modules/features/teamsbot/service.py +++ b/modules/features/teamsbot/service.py @@ -401,14 +401,22 @@ class TeamsbotService: if len(audioBytes) < 1000: return + # Detect silent/all-zeros audio early to avoid expensive STT calls + if len(set(audioBytes[100:min(500, len(audioBytes))])) < 3: + logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, low byte variation)") + return + if not voiceInterface: logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}") return + # Treat sampleRate=0 as unknown (triggers auto-detection) + effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None + sttResult = await voiceInterface.speechToText( audioContent=audioBytes, language=self.config.language or "de-DE", - sampleRate=sampleRate, + sampleRate=effectiveSampleRate, ) if sttResult and sttResult.get("success") and sttResult.get("text"):