phase 2 codeeditor and hotfixes voice

This commit is contained in:
patrick-motsch 2026-02-23 22:09:27 +01:00
parent 338f9522a5
commit f6f42d8db7
10 changed files with 605 additions and 90 deletions

7
app.py
View file

@ -461,6 +461,13 @@ app.add_middleware(
max_age=86400, # Increased caching for preflight requests
)
# SlowAPI rate limiter initialization
from modules.auth import limiter
from slowapi.errors import RateLimitExceeded
from slowapi import _rate_limit_exceeded_handler
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
# CSRF protection middleware
from modules.auth import CSRFMiddleware
from modules.auth import (

View file

@ -7,6 +7,7 @@ Replaces Azure Speech Services with Google Cloud APIs
import json
import html
import asyncio
import logging
from typing import Dict, Optional, Any
from google.cloud import speech
@ -73,6 +74,11 @@ class ConnectorGoogleSpeech:
Dict containing transcribed text, confidence, and metadata
"""
try:
# Treat sampleRate=0 as unknown (invalid value from client)
if sampleRate is not None and sampleRate <= 0:
logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
sampleRate = None
# Auto-detect audio format if not provided
if sampleRate is None or channels is None:
validation = self.validateAudioFormat(audioContent)
@ -164,8 +170,11 @@ class ConnectorGoogleSpeech:
try:
# Use regular recognition for single audio files (not streaming)
# Run in thread pool to avoid blocking the asyncio event loop
logger.info("Using regular recognition for single audio file...")
response = self.speech_client.recognize(config=config, audio=audio)
response = await asyncio.to_thread(
self.speech_client.recognize, config=config, audio=audio
)
logger.debug(f"Google Cloud response: {response}")
except Exception as apiError:
@ -175,7 +184,7 @@ class ConnectorGoogleSpeech:
logger.info("Trying fallback with LINEAR16 encoding...")
fallbackConfig = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000, # Use standard sample rate
sample_rate_hertz=16000,
audio_channel_count=1,
language_code=language,
enable_automatic_punctuation=True,
@ -183,7 +192,9 @@ class ConnectorGoogleSpeech:
)
try:
response = self.speech_client.recognize(config=fallbackConfig, audio=audio)
response = await asyncio.to_thread(
self.speech_client.recognize, config=fallbackConfig, audio=audio
)
logger.debug(f"Google Cloud fallback response: {response}")
except Exception as fallbackError:
logger.error(f"Google Cloud fallback error: {fallbackError}")
@ -297,7 +308,18 @@ class ConnectorGoogleSpeech:
"description": f"LINEAR16 with {std_rate}Hz"
})
# Try with different models
# Detect likely silence before expensive fallback loop
if len(audioContent) > 100:
sampleSlice = audioContent[100:min(500, len(audioContent))]
if len(set(sampleSlice)) < 3:
logger.warning("Audio appears silent (low byte variation) - skipping fallbacks")
return {
"success": False,
"text": "",
"confidence": 0.0,
"error": "No recognition results (silence or unclear audio)"
}
models = ["latest_long", "phone_call", "latest_short"]
for fallback_config in fallback_configs:
@ -305,7 +327,6 @@ class ConnectorGoogleSpeech:
try:
logger.info(f"Trying fallback: {fallback_config['description']} with {model} model...")
# Build fallback config with proper sample rate handling
fallback_config_params = {
"encoding": fallback_config["encoding"],
"audio_channel_count": fallback_config["channels"],
@ -314,12 +335,13 @@ class ConnectorGoogleSpeech:
"model": model
}
# Only add sample_rate_hertz if needed
if fallback_config["use_sample_rate"]:
fallback_config_params["sample_rate_hertz"] = fallback_config["sample_rate"]
fallback_config_obj = speech.RecognitionConfig(**fallback_config_params)
fallback_response = self.speech_client.recognize(config=fallback_config_obj, audio=audio)
fallback_response = await asyncio.to_thread(
self.speech_client.recognize, config=fallback_config_obj, audio=audio
)
if fallback_response.results:
result = fallback_response.results[0]

View file

@ -1,16 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""CodeEditor processor -- single-shot orchestrator (Phase 1).
Loads files, builds prompt, calls AI, parses response, emits SSE events."""
"""CodeEditor processor -- single-shot (Phase 1) and agent loop (Phase 2).
Orchestrates file loading, prompt building, AI calls, response parsing, and SSE emission."""
import logging
import uuid
from typing import List, Optional, Dict, Any
from typing import List, Dict, Any
from modules.features.codeeditor import fileContextManager, promptAssembly, responseParser
from modules.features.codeeditor.datamodelCodeeditor import (
FileEditProposal, ResponseSegment, SegmentTypeEnum
FileEditProposal, SegmentTypeEnum, AgentState
)
from modules.features.codeeditor import toolRegistry
from modules.shared.timeUtils import getUtcTimestamp
logger = logging.getLogger(__name__)
@ -23,78 +23,41 @@ async def processMessage(
dbManagement,
interfaceAi,
chatInterface,
eventManager
eventManager,
agentMode: bool = False
):
"""Process a user message: load files, call AI, parse and emit response segments.
"""Process a user message. Dispatches to single-shot or agent loop based on mode."""
if agentMode:
await _processAgentMessage(
workflowId, userPrompt, dbManagement, interfaceAi, chatInterface, eventManager
)
else:
await _processSingleShot(
workflowId, userPrompt, selectedFileIds, dbManagement, interfaceAi, chatInterface, eventManager
)
Args:
workflowId: the active workflow ID
userPrompt: user's input text
selectedFileIds: file IDs the user selected as context
dbManagement: interfaceDbManagement instance with user context
interfaceAi: AiObjects instance for AI calls
chatInterface: interfaceDbChat instance for storing messages
eventManager: EventManager for SSE emission
"""
async def _processSingleShot(
workflowId, userPrompt, selectedFileIds, dbManagement, interfaceAi, chatInterface, eventManager
):
"""Phase 1: Single AI call with pre-loaded file context."""
try:
await eventManager.emit_event(workflowId, "chatdata", {
"type": "status", "label": "Loading files..."
})
await _emitStatus(eventManager, workflowId, "Loading files...")
fileContexts = await fileContextManager.loadFileContexts(dbManagement, selectedFileIds)
await eventManager.emit_event(workflowId, "chatdata", {
"type": "status", "label": "Building prompt..."
})
await _emitStatus(eventManager, workflowId, "Building prompt...")
chatHistory = _loadChatHistory(chatInterface, workflowId)
aiRequest = promptAssembly.buildRequest(userPrompt, fileContexts, chatHistory)
await eventManager.emit_event(workflowId, "chatdata", {
"type": "status", "label": "AI is processing..."
})
await _emitStatus(eventManager, workflowId, "AI is processing...")
aiResponse = await interfaceAi.callWithTextContext(aiRequest)
if aiResponse.errorCount > 0:
logger.error(f"AI call failed: {aiResponse.content}")
await eventManager.emit_event(workflowId, "chatdata", {
"type": "message",
"item": {"role": "assistant", "content": f"Error: {aiResponse.content}"}
})
await eventManager.emit_event(workflowId, "error", {
"workflowId": workflowId, "error": aiResponse.content
})
await _emitError(eventManager, workflowId, aiResponse.content)
return
segments = responseParser.parseResponse(aiResponse.content)
for segment in segments:
messageData = {
"role": "assistant",
"content": segment.content,
"type": segment.type.value,
"createdAt": getUtcTimestamp()
}
await eventManager.emit_event(workflowId, "chatdata", {
"type": "message", "item": messageData
})
if segment.type == SegmentTypeEnum.FILE_EDIT:
proposal = FileEditProposal(
workflowId=workflowId,
fileId=_resolveFileId(segment.fileName, fileContexts),
fileName=segment.fileName,
operation="edit",
oldContent=segment.oldContent,
newContent=segment.newContent
)
await eventManager.emit_event(workflowId, "chatdata", {
"type": "file_edit_proposal", "item": proposal.model_dump()
})
await _emitSegments(eventManager, workflowId, segments, fileContexts)
_logAiStats(aiResponse, workflowId)
await eventManager.emit_event(workflowId, "complete", {
@ -105,12 +68,181 @@ async def processMessage(
})
except Exception as e:
logger.error(f"CodeEditor processing failed for workflow {workflowId}: {e}", exc_info=True)
logger.error(f"CodeEditor single-shot failed for {workflowId}: {e}", exc_info=True)
await eventManager.emit_event(workflowId, "error", {
"workflowId": workflowId, "error": str(e)
})
async def _processAgentMessage(
workflowId, userPrompt, dbManagement, interfaceAi, chatInterface, eventManager
):
"""Phase 2: Agent loop -- multiple AI calls with tool execution until done."""
state = AgentState(workflowId=workflowId)
try:
await _emitStatus(eventManager, workflowId, "Agent: Scanning available files...")
fileListContext = fileContextManager.buildFileListContext(dbManagement)
state.conversationHistory.append({"role": "user", "content": userPrompt})
aiRequest = promptAssembly.buildAgentRequest(
userPrompt=userPrompt,
fileListContext=fileListContext,
conversationHistory=[]
)
while state.status == "running" and state.currentRound < state.maxRounds:
state.currentRound += 1
state.totalAiCalls += 1
await _emitStatus(eventManager, workflowId,
f"Agent round {state.currentRound}: AI is thinking...")
await eventManager.emit_event(workflowId, "chatdata", {
"type": "agent_progress",
"item": {
"round": state.currentRound,
"totalAiCalls": state.totalAiCalls,
"totalToolCalls": state.totalToolCalls,
"costCHF": round(state.totalCostCHF, 4),
}
})
aiResponse = await interfaceAi.callWithTextContext(aiRequest)
state.totalCostCHF += aiResponse.priceCHF
state.totalProcessingTime += aiResponse.processingTime
if aiResponse.errorCount > 0:
logger.error(f"Agent AI call failed in round {state.currentRound}: {aiResponse.content}")
await _emitError(eventManager, workflowId, aiResponse.content)
state.status = "error"
break
_logAiStats(aiResponse, workflowId)
state.conversationHistory.append({"role": "assistant", "content": aiResponse.content})
segments = responseParser.parseResponse(aiResponse.content)
textAndEditSegments = [s for s in segments if s.type != SegmentTypeEnum.TOOL_CALL]
if textAndEditSegments:
await _emitSegments(eventManager, workflowId, textAndEditSegments, [])
toolCallSegments = [s for s in segments if s.type == SegmentTypeEnum.TOOL_CALL]
if not toolCallSegments:
state.status = "completed"
break
toolResultTexts = []
for tc in toolCallSegments:
state.totalToolCalls += 1
await _emitStatus(eventManager, workflowId,
f"Agent: Running {tc.toolName}...")
result = await toolRegistry.dispatch(tc.toolName, tc.toolArgs or {}, dbManagement)
toolResultTexts.append(f"[{tc.toolName}] (success={result.success}):\n{result.result}")
logger.info(f"Agent tool {tc.toolName}: success={result.success}, time={result.executionTime:.2f}s")
combinedResults = "\n\n".join(toolResultTexts)
state.conversationHistory.append({
"role": "tool_result",
"content": combinedResults,
"toolName": "batch"
})
aiRequest = promptAssembly.buildAgentRequest(
userPrompt=None,
fileListContext=fileListContext,
conversationHistory=state.conversationHistory
)
if state.currentRound >= state.maxRounds and state.status == "running":
state.status = "max_rounds"
await eventManager.emit_event(workflowId, "chatdata", {
"type": "message",
"item": {
"role": "system",
"content": f"Agent stopped: maximum rounds ({state.maxRounds}) reached.",
"createdAt": getUtcTimestamp()
}
})
await eventManager.emit_event(workflowId, "chatdata", {
"type": "agent_summary",
"item": {
"rounds": state.currentRound,
"totalAiCalls": state.totalAiCalls,
"totalToolCalls": state.totalToolCalls,
"costCHF": round(state.totalCostCHF, 4),
"processingTime": round(state.totalProcessingTime, 1),
"status": state.status,
}
})
await eventManager.emit_event(workflowId, "complete", {
"workflowId": workflowId,
"agentRounds": state.currentRound,
"totalCostCHF": round(state.totalCostCHF, 4),
"processingTime": round(state.totalProcessingTime, 1)
})
except Exception as e:
logger.error(f"CodeEditor agent loop failed for {workflowId}: {e}", exc_info=True)
await eventManager.emit_event(workflowId, "error", {
"workflowId": workflowId, "error": str(e)
})
# ---------------------------------------------------------------------------
# Shared helpers
# ---------------------------------------------------------------------------
async def _emitStatus(eventManager, workflowId: str, label: str):
await eventManager.emit_event(workflowId, "chatdata", {
"type": "status", "label": label
})
async def _emitError(eventManager, workflowId: str, errorMsg: str):
await eventManager.emit_event(workflowId, "chatdata", {
"type": "message",
"item": {"role": "assistant", "content": f"Error: {errorMsg}"}
})
await eventManager.emit_event(workflowId, "error", {
"workflowId": workflowId, "error": errorMsg
})
async def _emitSegments(eventManager, workflowId: str, segments, fileContexts):
"""Emit parsed segments as SSE events."""
for segment in segments:
messageData = {
"role": "assistant",
"content": segment.content,
"type": segment.type.value,
"createdAt": getUtcTimestamp()
}
await eventManager.emit_event(workflowId, "chatdata", {
"type": "message", "item": messageData
})
if segment.type == SegmentTypeEnum.FILE_EDIT:
proposal = FileEditProposal(
workflowId=workflowId,
fileId=_resolveFileId(segment.fileName, fileContexts),
fileName=segment.fileName,
operation="edit",
oldContent=segment.oldContent,
newContent=segment.newContent
)
await eventManager.emit_event(workflowId, "chatdata", {
"type": "file_edit_proposal", "item": proposal.model_dump()
})
def _loadChatHistory(chatInterface, workflowId: str) -> List[Dict[str, Any]]:
"""Load recent chat messages for multi-turn context."""
try:

View file

@ -13,6 +13,7 @@ class SegmentTypeEnum(str, Enum):
TEXT = "text"
CODE_BLOCK = "code_block"
FILE_EDIT = "file_edit"
TOOL_CALL = "tool_call"
class EditStatusEnum(str, Enum):
@ -40,6 +41,8 @@ class ResponseSegment(BaseModel):
fileName: Optional[str] = None
oldContent: Optional[str] = None
newContent: Optional[str] = None
toolName: Optional[str] = None
toolArgs: Optional[Dict[str, Any]] = None
class FileEditProposal(BaseModel):
@ -65,6 +68,27 @@ class FileVersion(BaseModel):
createdAt: float = Field(default_factory=getUtcTimestamp)
class AgentState(BaseModel):
"""Tracks state across an agent loop execution."""
workflowId: str
currentRound: int = 0
maxRounds: int = 50
totalAiCalls: int = 0
totalToolCalls: int = 0
totalCostCHF: float = 0.0
totalProcessingTime: float = 0.0
conversationHistory: List[Dict[str, Any]] = Field(default_factory=list)
status: str = "running"
class ToolResult(BaseModel):
"""Result from executing a tool."""
toolName: str
result: str
success: bool = True
executionTime: float = 0.0
TEXT_MIME_TYPES = {
"text/plain", "text/markdown", "text/html", "text/css", "text/csv",
"text/xml", "text/yaml", "text/x-python", "text/x-java",

View file

@ -71,3 +71,12 @@ def listTextFiles(dbManagement) -> List[FileContext]:
))
return textFiles
def buildFileListContext(dbManagement) -> str:
"""Build a compact file list string for the agent prompt (no content, just metadata)."""
textFiles = listTextFiles(dbManagement)
if not textFiles:
return "No text files available."
lines = [f"- {f.fileName} (id: {f.fileId}, size: {f.sizeBytes}B)" for f in textFiles]
return f"Total: {len(lines)} text files\n" + "\n".join(lines)

View file

@ -89,6 +89,84 @@ def _buildFileContext(fileContexts: List[FileContext]) -> str:
return "\n\n".join(parts)
def buildAgentRequest(
userPrompt: Optional[str],
fileListContext: str,
conversationHistory: List[Dict[str, Any]]
) -> AiCallRequest:
"""Build an AiCallRequest for agent mode with tool definitions and conversation history."""
from modules.features.codeeditor.toolRegistry import formatToolDefinitions
systemPrompt = _AGENT_SYSTEM_PROMPT.replace("{{TOOL_DEFINITIONS}}", formatToolDefinitions())
if not conversationHistory:
fullPrompt = systemPrompt
context = f"## Available files\n{fileListContext}\n\n## Task\n{userPrompt}"
else:
fullPrompt = systemPrompt
historyText = _buildConversationHistory(conversationHistory)
context = f"## Available files\n{fileListContext}\n\n## Conversation\n{historyText}"
return AiCallRequest(
prompt=fullPrompt,
context=context,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
temperature=0.0,
compressPrompt=False,
compressContext=False,
resultFormat="txt"
)
)
_AGENT_SYSTEM_PROMPT = """You are an AI agent for file analysis and editing. You work autonomously by using tools to read files, search content, and propose edits.
## Available tools
{{TOOL_DEFINITIONS}}
## How to call tools
Use this exact format for each tool call:
```tool_call
tool: <tool_name>
args: {"param": "value"}
```
## Rules
- Read files ONE AT A TIME with read_file, never assume file contents
- First create a plan, then execute it step by step
- Use search_files to find relevant files before reading them
- Use list_files to discover what files are available
- For file changes, use ```file_edit``` blocks (same format as before)
- You may combine text explanations, tool calls, and file edits in one response
- When you are DONE and need no more tool calls, simply respond with text only (no tool_call blocks)
- Keep responses focused and efficient
## file_edit format (for changes)
```file_edit
fileName: <filename>
oldContent: |
<exact existing content>
newContent: |
<replacement content>
```"""
def _buildConversationHistory(history: List[Dict[str, Any]]) -> str:
"""Build the full conversation history for agent multi-turn context."""
parts = []
for msg in history:
role = msg.get("role", "unknown")
content = msg.get("content", "")
if role == "tool_result":
toolName = msg.get("toolName", "")
parts.append(f"[Tool Result - {toolName}]:\n{content}")
else:
parts.append(f"[{role}]:\n{content}")
return "\n\n".join(parts)
def _buildChatHistory(chatHistory: List[Dict[str, Any]]) -> str:
"""Build a condensed chat history string for multi-turn context."""
if not chatHistory:

View file

@ -1,9 +1,10 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Response parser for the CodeEditor feature.
Parses AI responses into typed segments (text, code_block, file_edit)."""
Parses AI responses into typed segments (text, code_block, file_edit, tool_call)."""
import logging
import json
import re
from typing import List, Optional
@ -48,6 +49,16 @@ def parseResponse(rawContent: str) -> List[ResponseSegment]:
content=blockContent,
language="text"
))
elif lang == "tool_call":
segment = _parseToolCallBlock(blockContent)
if segment:
segments.append(segment)
else:
segments.append(ResponseSegment(
type=SegmentTypeEnum.CODE_BLOCK,
content=blockContent,
language="text"
))
else:
segments.append(ResponseSegment(
type=SegmentTypeEnum.CODE_BLOCK,
@ -66,6 +77,11 @@ def parseResponse(rawContent: str) -> List[ResponseSegment]:
return segments
def hasToolCalls(segments: List[ResponseSegment]) -> bool:
"""Check if any segments contain tool calls."""
return any(s.type == SegmentTypeEnum.TOOL_CALL for s in segments)
def _collectBlock(lines: List[str], startIdx: int) -> tuple:
"""Collect lines inside a fenced code block until closing ```."""
blockLines = []
@ -137,3 +153,32 @@ def _parseFileEditBlock(blockContent: str) -> Optional[ResponseSegment]:
oldContent=fields["oldContent"],
newContent=fields["newContent"]
)
def _parseToolCallBlock(blockContent: str) -> Optional[ResponseSegment]:
"""Parse a tool_call block into a ResponseSegment with toolName and toolArgs."""
toolName = None
toolArgs = {}
for line in blockContent.split("\n"):
stripped = line.strip()
if stripped.startswith("tool:"):
toolName = stripped[len("tool:"):].strip()
elif stripped.startswith("args:"):
argsStr = stripped[len("args:"):].strip()
try:
toolArgs = json.loads(argsStr)
except json.JSONDecodeError:
logger.warning(f"Could not parse tool args as JSON: {argsStr}")
toolArgs = {"raw": argsStr}
if not toolName:
logger.warning("tool_call block missing tool name")
return None
return ResponseSegment(
type=SegmentTypeEnum.TOOL_CALL,
content=f"Tool: {toolName}",
toolName=toolName,
toolArgs=toolArgs
)

View file

@ -80,10 +80,11 @@ async def streamCodeeditorStart(
request: Request,
instanceId: str = Path(..., description="Feature instance ID"),
workflowId: Optional[str] = Query(None, description="Optional workflow ID to continue"),
mode: str = Query("simple", description="Processing mode: 'simple' (single AI call) or 'agent' (multi-step with tools)"),
userInput: UserInputRequest = Body(...),
context: RequestContext = Depends(getRequestContext)
):
"""Start or continue a CodeEditor workflow with SSE streaming."""
"""Start or continue a CodeEditor workflow with SSE streaming. Supports simple and agent mode."""
try:
mandateId = _validateInstanceAccess(instanceId, context)
chatInterface = _getServiceChat(context, featureInstanceId=instanceId)
@ -116,6 +117,8 @@ async def streamCodeeditorStart(
selectedFileIds = userInput.listFileId or []
agentMode = mode.lower() == "agent"
asyncio.create_task(
codeEditorProcessor.processMessage(
workflowId=workflowId,
@ -124,7 +127,8 @@ async def streamCodeeditorStart(
dbManagement=dbManagement,
interfaceAi=aiObjects,
chatInterface=chatInterface,
eventManager=eventManager
eventManager=eventManager,
agentMode=agentMode
)
)
@ -319,40 +323,52 @@ async def applyEdit(
proposalData: Dict[str, Any] = Body(...),
context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]:
"""Accept a file edit proposal and create a new file version."""
"""Accept a file edit proposal. Updates existing file or creates new one."""
try:
_validateInstanceAccess(instanceId, context)
dbManagement = _getDbManagement(context, featureInstanceId=instanceId)
fileId = proposalData.get("fileId")
fileId = proposalData.get("fileId", "")
newContent = proposalData.get("newContent")
fileName = proposalData.get("fileName", "")
if not fileId or newContent is None:
raise HTTPException(status_code=400, detail="fileId and newContent are required")
if newContent is None:
raise HTTPException(status_code=400, detail="newContent is required")
fileItem = dbManagement.getFile(fileId)
if not fileItem:
raise HTTPException(status_code=404, detail=f"File {fileId} not found")
contentBytes = newContent.encode("utf-8")
isNewFile = not fileId or fileId.startswith("unknown-")
success = dbManagement.createFileData(fileId, newContent.encode("utf-8"))
if not success:
raise HTTPException(status_code=500, detail="Failed to store updated file content")
if isNewFile:
mimeType = _guessMimeType(fileName)
fileItem = dbManagement.createFile(fileName, mimeType, contentBytes)
resultFileId = fileItem.id
resultFileName = fileItem.fileName
else:
fileItem = dbManagement.getFile(fileId)
if not fileItem:
raise HTTPException(status_code=404, detail=f"File {fileId} not found")
success = dbManagement.createFileData(fileId, contentBytes)
if not success:
raise HTTPException(status_code=500, detail="Failed to store updated file content")
resultFileId = fileId
resultFileName = fileName or fileItem.fileName
eventManager = get_event_manager()
await eventManager.emit_event(workflowId, "chatdata", {
"type": "file_version",
"item": {
"fileId": fileId,
"fileName": fileName or fileItem.fileName,
"status": "accepted"
"fileId": resultFileId,
"fileName": resultFileName,
"status": "accepted",
"isNew": isNewFile
}
})
return {
"status": "accepted",
"fileId": fileId,
"fileName": fileName or fileItem.fileName
"fileId": resultFileId,
"fileName": resultFileName,
"isNew": isNewFile
}
except HTTPException:
@ -360,3 +376,20 @@ async def applyEdit(
except Exception as e:
logger.error(f"Error applying edit: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
_MIME_MAP = {
".md": "text/markdown", ".txt": "text/plain", ".json": "application/json",
".yaml": "application/yaml", ".yml": "application/yaml", ".xml": "application/xml",
".csv": "text/csv", ".py": "text/x-python", ".js": "text/javascript",
".ts": "text/x-typescript", ".html": "text/html", ".css": "text/css",
".sql": "text/x-sql", ".sh": "text/x-shellscript",
}
def _guessMimeType(fileName: str) -> str:
"""Guess MIME type from file extension."""
if not fileName or "." not in fileName:
return "text/plain"
ext = "." + fileName.rsplit(".", 1)[-1].lower()
return _MIME_MAP.get(ext, "text/plain")

View file

@ -0,0 +1,157 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Tool registry and dispatcher for the CodeEditor agent loop.
Defines available tools and executes them against the file context manager."""
import logging
import time
import fnmatch
from typing import Dict, Any, List
from modules.features.codeeditor.datamodelCodeeditor import ToolResult
logger = logging.getLogger(__name__)
TOOL_DEFINITIONS = [
{
"name": "read_file",
"description": "Read the full content of a single file by its fileId.",
"parameters": {"fileId": "string (required)"}
},
{
"name": "list_files",
"description": "List all available text files with metadata (name, size, mimeType). Optionally filter by glob pattern.",
"parameters": {"filter": "string (optional, glob pattern e.g. '*.py')"}
},
{
"name": "search_files",
"description": "Search all file contents for a text query. Returns matching lines with file name and line number.",
"parameters": {"query": "string (required)", "fileType": "string (optional, extension e.g. 'py')"}
},
]
async def dispatch(toolName: str, toolArgs: Dict[str, Any], dbManagement) -> ToolResult:
"""Execute a tool and return the result."""
startTime = time.time()
try:
if toolName == "read_file":
result = await _toolReadFile(toolArgs, dbManagement)
elif toolName == "list_files":
result = _toolListFiles(toolArgs, dbManagement)
elif toolName == "search_files":
result = await _toolSearchFiles(toolArgs, dbManagement)
else:
result = f"Unknown tool: {toolName}"
return ToolResult(toolName=toolName, result=result, success=False,
executionTime=time.time() - startTime)
return ToolResult(toolName=toolName, result=result, success=True,
executionTime=time.time() - startTime)
except Exception as e:
logger.error(f"Tool {toolName} failed: {e}", exc_info=True)
return ToolResult(toolName=toolName, result=f"Error: {str(e)}", success=False,
executionTime=time.time() - startTime)
async def _toolReadFile(args: Dict[str, Any], dbManagement) -> str:
"""Read a single file's content."""
fileId = args.get("fileId", "")
if not fileId:
return "Error: fileId is required"
fileItem = dbManagement.getFile(fileId)
if not fileItem:
return f"Error: File {fileId} not found"
fileData = dbManagement.getFileData(fileId)
if not fileData:
return f"Error: No data for file {fileId}"
try:
content = fileData.decode("utf-8")
except UnicodeDecodeError:
return f"Error: File {fileItem.fileName} is not valid UTF-8"
lines = content.split("\n")
numbered = "\n".join([f"{i + 1}|{line}" for i, line in enumerate(lines)])
return f"--- FILE: {fileItem.fileName} (id: {fileId}) ---\n{numbered}\n--- END FILE ---"
def _toolListFiles(args: Dict[str, Any], dbManagement) -> str:
"""List all text files, optionally filtered by glob pattern."""
from modules.features.codeeditor.datamodelCodeeditor import isTextFile
filterPattern = args.get("filter", "")
allFiles = dbManagement.getAllFiles()
if not allFiles:
return "No files found."
lines = []
for f in allFiles:
if not isTextFile(f.mimeType, f.fileName):
continue
if filterPattern and not fnmatch.fnmatch(f.fileName, filterPattern):
continue
lines.append(f"- {f.fileName} (id: {f.id}, size: {f.fileSize}B, type: {f.mimeType})")
if not lines:
return "No matching text files found."
return f"Available files ({len(lines)}):\n" + "\n".join(lines)
async def _toolSearchFiles(args: Dict[str, Any], dbManagement) -> str:
"""Search file contents for a query string."""
from modules.features.codeeditor.datamodelCodeeditor import isTextFile
query = args.get("query", "")
if not query:
return "Error: query is required"
fileType = args.get("fileType", "")
allFiles = dbManagement.getAllFiles()
if not allFiles:
return "No files to search."
hits = []
maxHits = 50
queryLower = query.lower()
for f in allFiles:
if not isTextFile(f.mimeType, f.fileName):
continue
if fileType and not f.fileName.endswith(f".{fileType}"):
continue
fileData = dbManagement.getFileData(f.id)
if not fileData:
continue
try:
content = fileData.decode("utf-8")
except UnicodeDecodeError:
continue
for lineNum, line in enumerate(content.split("\n"), 1):
if queryLower in line.lower():
hits.append(f"{f.fileName}:{lineNum}: {line.strip()}")
if len(hits) >= maxHits:
break
if len(hits) >= maxHits:
break
if not hits:
return f"No matches found for '{query}'."
result = f"Search results for '{query}' ({len(hits)} matches):\n" + "\n".join(hits)
if len(hits) >= maxHits:
result += f"\n... (truncated at {maxHits} matches)"
return result
def formatToolDefinitions() -> str:
"""Format tool definitions for inclusion in the system prompt."""
parts = []
for tool in TOOL_DEFINITIONS:
params = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()])
parts.append(f"- **{tool['name']}**: {tool['description']}\n Parameters: {{{params}}}")
return "\n".join(parts)

View file

@ -401,14 +401,22 @@ class TeamsbotService:
if len(audioBytes) < 1000:
return
# Detect silent/all-zeros audio early to avoid expensive STT calls
if len(set(audioBytes[100:min(500, len(audioBytes))])) < 3:
logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, low byte variation)")
return
if not voiceInterface:
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
return
# Treat sampleRate=0 as unknown (triggers auto-detection)
effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
sttResult = await voiceInterface.speechToText(
audioContent=audioBytes,
language=self.config.language or "de-DE",
sampleRate=sampleRate,
sampleRate=effectiveSampleRate,
)
if sttResult and sttResult.get("success") and sttResult.get("text"):