phase 2 codeeditor and hotfixes voice

This commit is contained in:
patrick-motsch 2026-02-23 22:09:27 +01:00
parent 338f9522a5
commit f6f42d8db7
10 changed files with 605 additions and 90 deletions

7
app.py
View file

@ -461,6 +461,13 @@ app.add_middleware(
max_age=86400, # Increased caching for preflight requests max_age=86400, # Increased caching for preflight requests
) )
# SlowAPI rate limiter initialization
from modules.auth import limiter
from slowapi.errors import RateLimitExceeded
from slowapi import _rate_limit_exceeded_handler
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
# CSRF protection middleware # CSRF protection middleware
from modules.auth import CSRFMiddleware from modules.auth import CSRFMiddleware
from modules.auth import ( from modules.auth import (

View file

@ -7,6 +7,7 @@ Replaces Azure Speech Services with Google Cloud APIs
import json import json
import html import html
import asyncio
import logging import logging
from typing import Dict, Optional, Any from typing import Dict, Optional, Any
from google.cloud import speech from google.cloud import speech
@ -73,6 +74,11 @@ class ConnectorGoogleSpeech:
Dict containing transcribed text, confidence, and metadata Dict containing transcribed text, confidence, and metadata
""" """
try: try:
# Treat sampleRate=0 as unknown (invalid value from client)
if sampleRate is not None and sampleRate <= 0:
logger.warning(f"Invalid sampleRate={sampleRate}, treating as unknown for auto-detection")
sampleRate = None
# Auto-detect audio format if not provided # Auto-detect audio format if not provided
if sampleRate is None or channels is None: if sampleRate is None or channels is None:
validation = self.validateAudioFormat(audioContent) validation = self.validateAudioFormat(audioContent)
@ -164,8 +170,11 @@ class ConnectorGoogleSpeech:
try: try:
# Use regular recognition for single audio files (not streaming) # Use regular recognition for single audio files (not streaming)
# Run in thread pool to avoid blocking the asyncio event loop
logger.info("Using regular recognition for single audio file...") logger.info("Using regular recognition for single audio file...")
response = self.speech_client.recognize(config=config, audio=audio) response = await asyncio.to_thread(
self.speech_client.recognize, config=config, audio=audio
)
logger.debug(f"Google Cloud response: {response}") logger.debug(f"Google Cloud response: {response}")
except Exception as apiError: except Exception as apiError:
@ -175,7 +184,7 @@ class ConnectorGoogleSpeech:
logger.info("Trying fallback with LINEAR16 encoding...") logger.info("Trying fallback with LINEAR16 encoding...")
fallbackConfig = speech.RecognitionConfig( fallbackConfig = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000, # Use standard sample rate sample_rate_hertz=16000,
audio_channel_count=1, audio_channel_count=1,
language_code=language, language_code=language,
enable_automatic_punctuation=True, enable_automatic_punctuation=True,
@ -183,7 +192,9 @@ class ConnectorGoogleSpeech:
) )
try: try:
response = self.speech_client.recognize(config=fallbackConfig, audio=audio) response = await asyncio.to_thread(
self.speech_client.recognize, config=fallbackConfig, audio=audio
)
logger.debug(f"Google Cloud fallback response: {response}") logger.debug(f"Google Cloud fallback response: {response}")
except Exception as fallbackError: except Exception as fallbackError:
logger.error(f"Google Cloud fallback error: {fallbackError}") logger.error(f"Google Cloud fallback error: {fallbackError}")
@ -297,7 +308,18 @@ class ConnectorGoogleSpeech:
"description": f"LINEAR16 with {std_rate}Hz" "description": f"LINEAR16 with {std_rate}Hz"
}) })
# Try with different models # Detect likely silence before expensive fallback loop
if len(audioContent) > 100:
sampleSlice = audioContent[100:min(500, len(audioContent))]
if len(set(sampleSlice)) < 3:
logger.warning("Audio appears silent (low byte variation) - skipping fallbacks")
return {
"success": False,
"text": "",
"confidence": 0.0,
"error": "No recognition results (silence or unclear audio)"
}
models = ["latest_long", "phone_call", "latest_short"] models = ["latest_long", "phone_call", "latest_short"]
for fallback_config in fallback_configs: for fallback_config in fallback_configs:
@ -305,7 +327,6 @@ class ConnectorGoogleSpeech:
try: try:
logger.info(f"Trying fallback: {fallback_config['description']} with {model} model...") logger.info(f"Trying fallback: {fallback_config['description']} with {model} model...")
# Build fallback config with proper sample rate handling
fallback_config_params = { fallback_config_params = {
"encoding": fallback_config["encoding"], "encoding": fallback_config["encoding"],
"audio_channel_count": fallback_config["channels"], "audio_channel_count": fallback_config["channels"],
@ -314,12 +335,13 @@ class ConnectorGoogleSpeech:
"model": model "model": model
} }
# Only add sample_rate_hertz if needed
if fallback_config["use_sample_rate"]: if fallback_config["use_sample_rate"]:
fallback_config_params["sample_rate_hertz"] = fallback_config["sample_rate"] fallback_config_params["sample_rate_hertz"] = fallback_config["sample_rate"]
fallback_config_obj = speech.RecognitionConfig(**fallback_config_params) fallback_config_obj = speech.RecognitionConfig(**fallback_config_params)
fallback_response = self.speech_client.recognize(config=fallback_config_obj, audio=audio) fallback_response = await asyncio.to_thread(
self.speech_client.recognize, config=fallback_config_obj, audio=audio
)
if fallback_response.results: if fallback_response.results:
result = fallback_response.results[0] result = fallback_response.results[0]

View file

@ -1,16 +1,16 @@
# Copyright (c) 2025 Patrick Motsch # Copyright (c) 2025 Patrick Motsch
# All rights reserved. # All rights reserved.
"""CodeEditor processor -- single-shot orchestrator (Phase 1). """CodeEditor processor -- single-shot (Phase 1) and agent loop (Phase 2).
Loads files, builds prompt, calls AI, parses response, emits SSE events.""" Orchestrates file loading, prompt building, AI calls, response parsing, and SSE emission."""
import logging import logging
import uuid from typing import List, Dict, Any
from typing import List, Optional, Dict, Any
from modules.features.codeeditor import fileContextManager, promptAssembly, responseParser from modules.features.codeeditor import fileContextManager, promptAssembly, responseParser
from modules.features.codeeditor.datamodelCodeeditor import ( from modules.features.codeeditor.datamodelCodeeditor import (
FileEditProposal, ResponseSegment, SegmentTypeEnum FileEditProposal, SegmentTypeEnum, AgentState
) )
from modules.features.codeeditor import toolRegistry
from modules.shared.timeUtils import getUtcTimestamp from modules.shared.timeUtils import getUtcTimestamp
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -23,53 +23,201 @@ async def processMessage(
dbManagement, dbManagement,
interfaceAi, interfaceAi,
chatInterface, chatInterface,
eventManager eventManager,
agentMode: bool = False
): ):
"""Process a user message: load files, call AI, parse and emit response segments. """Process a user message. Dispatches to single-shot or agent loop based on mode."""
if agentMode:
await _processAgentMessage(
workflowId, userPrompt, dbManagement, interfaceAi, chatInterface, eventManager
)
else:
await _processSingleShot(
workflowId, userPrompt, selectedFileIds, dbManagement, interfaceAi, chatInterface, eventManager
)
Args:
workflowId: the active workflow ID async def _processSingleShot(
userPrompt: user's input text workflowId, userPrompt, selectedFileIds, dbManagement, interfaceAi, chatInterface, eventManager
selectedFileIds: file IDs the user selected as context ):
dbManagement: interfaceDbManagement instance with user context """Phase 1: Single AI call with pre-loaded file context."""
interfaceAi: AiObjects instance for AI calls
chatInterface: interfaceDbChat instance for storing messages
eventManager: EventManager for SSE emission
"""
try: try:
await eventManager.emit_event(workflowId, "chatdata", { await _emitStatus(eventManager, workflowId, "Loading files...")
"type": "status", "label": "Loading files..."
})
fileContexts = await fileContextManager.loadFileContexts(dbManagement, selectedFileIds) fileContexts = await fileContextManager.loadFileContexts(dbManagement, selectedFileIds)
await eventManager.emit_event(workflowId, "chatdata", { await _emitStatus(eventManager, workflowId, "Building prompt...")
"type": "status", "label": "Building prompt..."
})
chatHistory = _loadChatHistory(chatInterface, workflowId) chatHistory = _loadChatHistory(chatInterface, workflowId)
aiRequest = promptAssembly.buildRequest(userPrompt, fileContexts, chatHistory) aiRequest = promptAssembly.buildRequest(userPrompt, fileContexts, chatHistory)
await eventManager.emit_event(workflowId, "chatdata", { await _emitStatus(eventManager, workflowId, "AI is processing...")
"type": "status", "label": "AI is processing..."
})
aiResponse = await interfaceAi.callWithTextContext(aiRequest) aiResponse = await interfaceAi.callWithTextContext(aiRequest)
if aiResponse.errorCount > 0: if aiResponse.errorCount > 0:
logger.error(f"AI call failed: {aiResponse.content}") await _emitError(eventManager, workflowId, aiResponse.content)
await eventManager.emit_event(workflowId, "chatdata", {
"type": "message",
"item": {"role": "assistant", "content": f"Error: {aiResponse.content}"}
})
await eventManager.emit_event(workflowId, "error", {
"workflowId": workflowId, "error": aiResponse.content
})
return return
segments = responseParser.parseResponse(aiResponse.content) segments = responseParser.parseResponse(aiResponse.content)
await _emitSegments(eventManager, workflowId, segments, fileContexts)
_logAiStats(aiResponse, workflowId)
await eventManager.emit_event(workflowId, "complete", {
"workflowId": workflowId,
"modelName": aiResponse.modelName,
"priceCHF": aiResponse.priceCHF,
"processingTime": aiResponse.processingTime
})
except Exception as e:
logger.error(f"CodeEditor single-shot failed for {workflowId}: {e}", exc_info=True)
await eventManager.emit_event(workflowId, "error", {
"workflowId": workflowId, "error": str(e)
})
async def _processAgentMessage(
workflowId, userPrompt, dbManagement, interfaceAi, chatInterface, eventManager
):
"""Phase 2: Agent loop -- multiple AI calls with tool execution until done."""
state = AgentState(workflowId=workflowId)
try:
await _emitStatus(eventManager, workflowId, "Agent: Scanning available files...")
fileListContext = fileContextManager.buildFileListContext(dbManagement)
state.conversationHistory.append({"role": "user", "content": userPrompt})
aiRequest = promptAssembly.buildAgentRequest(
userPrompt=userPrompt,
fileListContext=fileListContext,
conversationHistory=[]
)
while state.status == "running" and state.currentRound < state.maxRounds:
state.currentRound += 1
state.totalAiCalls += 1
await _emitStatus(eventManager, workflowId,
f"Agent round {state.currentRound}: AI is thinking...")
await eventManager.emit_event(workflowId, "chatdata", {
"type": "agent_progress",
"item": {
"round": state.currentRound,
"totalAiCalls": state.totalAiCalls,
"totalToolCalls": state.totalToolCalls,
"costCHF": round(state.totalCostCHF, 4),
}
})
aiResponse = await interfaceAi.callWithTextContext(aiRequest)
state.totalCostCHF += aiResponse.priceCHF
state.totalProcessingTime += aiResponse.processingTime
if aiResponse.errorCount > 0:
logger.error(f"Agent AI call failed in round {state.currentRound}: {aiResponse.content}")
await _emitError(eventManager, workflowId, aiResponse.content)
state.status = "error"
break
_logAiStats(aiResponse, workflowId)
state.conversationHistory.append({"role": "assistant", "content": aiResponse.content})
segments = responseParser.parseResponse(aiResponse.content)
textAndEditSegments = [s for s in segments if s.type != SegmentTypeEnum.TOOL_CALL]
if textAndEditSegments:
await _emitSegments(eventManager, workflowId, textAndEditSegments, [])
toolCallSegments = [s for s in segments if s.type == SegmentTypeEnum.TOOL_CALL]
if not toolCallSegments:
state.status = "completed"
break
toolResultTexts = []
for tc in toolCallSegments:
state.totalToolCalls += 1
await _emitStatus(eventManager, workflowId,
f"Agent: Running {tc.toolName}...")
result = await toolRegistry.dispatch(tc.toolName, tc.toolArgs or {}, dbManagement)
toolResultTexts.append(f"[{tc.toolName}] (success={result.success}):\n{result.result}")
logger.info(f"Agent tool {tc.toolName}: success={result.success}, time={result.executionTime:.2f}s")
combinedResults = "\n\n".join(toolResultTexts)
state.conversationHistory.append({
"role": "tool_result",
"content": combinedResults,
"toolName": "batch"
})
aiRequest = promptAssembly.buildAgentRequest(
userPrompt=None,
fileListContext=fileListContext,
conversationHistory=state.conversationHistory
)
if state.currentRound >= state.maxRounds and state.status == "running":
state.status = "max_rounds"
await eventManager.emit_event(workflowId, "chatdata", {
"type": "message",
"item": {
"role": "system",
"content": f"Agent stopped: maximum rounds ({state.maxRounds}) reached.",
"createdAt": getUtcTimestamp()
}
})
await eventManager.emit_event(workflowId, "chatdata", {
"type": "agent_summary",
"item": {
"rounds": state.currentRound,
"totalAiCalls": state.totalAiCalls,
"totalToolCalls": state.totalToolCalls,
"costCHF": round(state.totalCostCHF, 4),
"processingTime": round(state.totalProcessingTime, 1),
"status": state.status,
}
})
await eventManager.emit_event(workflowId, "complete", {
"workflowId": workflowId,
"agentRounds": state.currentRound,
"totalCostCHF": round(state.totalCostCHF, 4),
"processingTime": round(state.totalProcessingTime, 1)
})
except Exception as e:
logger.error(f"CodeEditor agent loop failed for {workflowId}: {e}", exc_info=True)
await eventManager.emit_event(workflowId, "error", {
"workflowId": workflowId, "error": str(e)
})
# ---------------------------------------------------------------------------
# Shared helpers
# ---------------------------------------------------------------------------
async def _emitStatus(eventManager, workflowId: str, label: str):
await eventManager.emit_event(workflowId, "chatdata", {
"type": "status", "label": label
})
async def _emitError(eventManager, workflowId: str, errorMsg: str):
await eventManager.emit_event(workflowId, "chatdata", {
"type": "message",
"item": {"role": "assistant", "content": f"Error: {errorMsg}"}
})
await eventManager.emit_event(workflowId, "error", {
"workflowId": workflowId, "error": errorMsg
})
async def _emitSegments(eventManager, workflowId: str, segments, fileContexts):
"""Emit parsed segments as SSE events."""
for segment in segments: for segment in segments:
messageData = { messageData = {
"role": "assistant", "role": "assistant",
@ -77,7 +225,6 @@ async def processMessage(
"type": segment.type.value, "type": segment.type.value,
"createdAt": getUtcTimestamp() "createdAt": getUtcTimestamp()
} }
await eventManager.emit_event(workflowId, "chatdata", { await eventManager.emit_event(workflowId, "chatdata", {
"type": "message", "item": messageData "type": "message", "item": messageData
}) })
@ -95,21 +242,6 @@ async def processMessage(
"type": "file_edit_proposal", "item": proposal.model_dump() "type": "file_edit_proposal", "item": proposal.model_dump()
}) })
_logAiStats(aiResponse, workflowId)
await eventManager.emit_event(workflowId, "complete", {
"workflowId": workflowId,
"modelName": aiResponse.modelName,
"priceCHF": aiResponse.priceCHF,
"processingTime": aiResponse.processingTime
})
except Exception as e:
logger.error(f"CodeEditor processing failed for workflow {workflowId}: {e}", exc_info=True)
await eventManager.emit_event(workflowId, "error", {
"workflowId": workflowId, "error": str(e)
})
def _loadChatHistory(chatInterface, workflowId: str) -> List[Dict[str, Any]]: def _loadChatHistory(chatInterface, workflowId: str) -> List[Dict[str, Any]]:
"""Load recent chat messages for multi-turn context.""" """Load recent chat messages for multi-turn context."""

View file

@ -13,6 +13,7 @@ class SegmentTypeEnum(str, Enum):
TEXT = "text" TEXT = "text"
CODE_BLOCK = "code_block" CODE_BLOCK = "code_block"
FILE_EDIT = "file_edit" FILE_EDIT = "file_edit"
TOOL_CALL = "tool_call"
class EditStatusEnum(str, Enum): class EditStatusEnum(str, Enum):
@ -40,6 +41,8 @@ class ResponseSegment(BaseModel):
fileName: Optional[str] = None fileName: Optional[str] = None
oldContent: Optional[str] = None oldContent: Optional[str] = None
newContent: Optional[str] = None newContent: Optional[str] = None
toolName: Optional[str] = None
toolArgs: Optional[Dict[str, Any]] = None
class FileEditProposal(BaseModel): class FileEditProposal(BaseModel):
@ -65,6 +68,27 @@ class FileVersion(BaseModel):
createdAt: float = Field(default_factory=getUtcTimestamp) createdAt: float = Field(default_factory=getUtcTimestamp)
class AgentState(BaseModel):
"""Tracks state across an agent loop execution."""
workflowId: str
currentRound: int = 0
maxRounds: int = 50
totalAiCalls: int = 0
totalToolCalls: int = 0
totalCostCHF: float = 0.0
totalProcessingTime: float = 0.0
conversationHistory: List[Dict[str, Any]] = Field(default_factory=list)
status: str = "running"
class ToolResult(BaseModel):
"""Result from executing a tool."""
toolName: str
result: str
success: bool = True
executionTime: float = 0.0
TEXT_MIME_TYPES = { TEXT_MIME_TYPES = {
"text/plain", "text/markdown", "text/html", "text/css", "text/csv", "text/plain", "text/markdown", "text/html", "text/css", "text/csv",
"text/xml", "text/yaml", "text/x-python", "text/x-java", "text/xml", "text/yaml", "text/x-python", "text/x-java",

View file

@ -71,3 +71,12 @@ def listTextFiles(dbManagement) -> List[FileContext]:
)) ))
return textFiles return textFiles
def buildFileListContext(dbManagement) -> str:
"""Build a compact file list string for the agent prompt (no content, just metadata)."""
textFiles = listTextFiles(dbManagement)
if not textFiles:
return "No text files available."
lines = [f"- {f.fileName} (id: {f.fileId}, size: {f.sizeBytes}B)" for f in textFiles]
return f"Total: {len(lines)} text files\n" + "\n".join(lines)

View file

@ -89,6 +89,84 @@ def _buildFileContext(fileContexts: List[FileContext]) -> str:
return "\n\n".join(parts) return "\n\n".join(parts)
def buildAgentRequest(
userPrompt: Optional[str],
fileListContext: str,
conversationHistory: List[Dict[str, Any]]
) -> AiCallRequest:
"""Build an AiCallRequest for agent mode with tool definitions and conversation history."""
from modules.features.codeeditor.toolRegistry import formatToolDefinitions
systemPrompt = _AGENT_SYSTEM_PROMPT.replace("{{TOOL_DEFINITIONS}}", formatToolDefinitions())
if not conversationHistory:
fullPrompt = systemPrompt
context = f"## Available files\n{fileListContext}\n\n## Task\n{userPrompt}"
else:
fullPrompt = systemPrompt
historyText = _buildConversationHistory(conversationHistory)
context = f"## Available files\n{fileListContext}\n\n## Conversation\n{historyText}"
return AiCallRequest(
prompt=fullPrompt,
context=context,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
temperature=0.0,
compressPrompt=False,
compressContext=False,
resultFormat="txt"
)
)
_AGENT_SYSTEM_PROMPT = """You are an AI agent for file analysis and editing. You work autonomously by using tools to read files, search content, and propose edits.
## Available tools
{{TOOL_DEFINITIONS}}
## How to call tools
Use this exact format for each tool call:
```tool_call
tool: <tool_name>
args: {"param": "value"}
```
## Rules
- Read files ONE AT A TIME with read_file, never assume file contents
- First create a plan, then execute it step by step
- Use search_files to find relevant files before reading them
- Use list_files to discover what files are available
- For file changes, use ```file_edit``` blocks (same format as before)
- You may combine text explanations, tool calls, and file edits in one response
- When you are DONE and need no more tool calls, simply respond with text only (no tool_call blocks)
- Keep responses focused and efficient
## file_edit format (for changes)
```file_edit
fileName: <filename>
oldContent: |
<exact existing content>
newContent: |
<replacement content>
```"""
def _buildConversationHistory(history: List[Dict[str, Any]]) -> str:
"""Build the full conversation history for agent multi-turn context."""
parts = []
for msg in history:
role = msg.get("role", "unknown")
content = msg.get("content", "")
if role == "tool_result":
toolName = msg.get("toolName", "")
parts.append(f"[Tool Result - {toolName}]:\n{content}")
else:
parts.append(f"[{role}]:\n{content}")
return "\n\n".join(parts)
def _buildChatHistory(chatHistory: List[Dict[str, Any]]) -> str: def _buildChatHistory(chatHistory: List[Dict[str, Any]]) -> str:
"""Build a condensed chat history string for multi-turn context.""" """Build a condensed chat history string for multi-turn context."""
if not chatHistory: if not chatHistory:

View file

@ -1,9 +1,10 @@
# Copyright (c) 2025 Patrick Motsch # Copyright (c) 2025 Patrick Motsch
# All rights reserved. # All rights reserved.
"""Response parser for the CodeEditor feature. """Response parser for the CodeEditor feature.
Parses AI responses into typed segments (text, code_block, file_edit).""" Parses AI responses into typed segments (text, code_block, file_edit, tool_call)."""
import logging import logging
import json
import re import re
from typing import List, Optional from typing import List, Optional
@ -48,6 +49,16 @@ def parseResponse(rawContent: str) -> List[ResponseSegment]:
content=blockContent, content=blockContent,
language="text" language="text"
)) ))
elif lang == "tool_call":
segment = _parseToolCallBlock(blockContent)
if segment:
segments.append(segment)
else:
segments.append(ResponseSegment(
type=SegmentTypeEnum.CODE_BLOCK,
content=blockContent,
language="text"
))
else: else:
segments.append(ResponseSegment( segments.append(ResponseSegment(
type=SegmentTypeEnum.CODE_BLOCK, type=SegmentTypeEnum.CODE_BLOCK,
@ -66,6 +77,11 @@ def parseResponse(rawContent: str) -> List[ResponseSegment]:
return segments return segments
def hasToolCalls(segments: List[ResponseSegment]) -> bool:
"""Check if any segments contain tool calls."""
return any(s.type == SegmentTypeEnum.TOOL_CALL for s in segments)
def _collectBlock(lines: List[str], startIdx: int) -> tuple: def _collectBlock(lines: List[str], startIdx: int) -> tuple:
"""Collect lines inside a fenced code block until closing ```.""" """Collect lines inside a fenced code block until closing ```."""
blockLines = [] blockLines = []
@ -137,3 +153,32 @@ def _parseFileEditBlock(blockContent: str) -> Optional[ResponseSegment]:
oldContent=fields["oldContent"], oldContent=fields["oldContent"],
newContent=fields["newContent"] newContent=fields["newContent"]
) )
def _parseToolCallBlock(blockContent: str) -> Optional[ResponseSegment]:
"""Parse a tool_call block into a ResponseSegment with toolName and toolArgs."""
toolName = None
toolArgs = {}
for line in blockContent.split("\n"):
stripped = line.strip()
if stripped.startswith("tool:"):
toolName = stripped[len("tool:"):].strip()
elif stripped.startswith("args:"):
argsStr = stripped[len("args:"):].strip()
try:
toolArgs = json.loads(argsStr)
except json.JSONDecodeError:
logger.warning(f"Could not parse tool args as JSON: {argsStr}")
toolArgs = {"raw": argsStr}
if not toolName:
logger.warning("tool_call block missing tool name")
return None
return ResponseSegment(
type=SegmentTypeEnum.TOOL_CALL,
content=f"Tool: {toolName}",
toolName=toolName,
toolArgs=toolArgs
)

View file

@ -80,10 +80,11 @@ async def streamCodeeditorStart(
request: Request, request: Request,
instanceId: str = Path(..., description="Feature instance ID"), instanceId: str = Path(..., description="Feature instance ID"),
workflowId: Optional[str] = Query(None, description="Optional workflow ID to continue"), workflowId: Optional[str] = Query(None, description="Optional workflow ID to continue"),
mode: str = Query("simple", description="Processing mode: 'simple' (single AI call) or 'agent' (multi-step with tools)"),
userInput: UserInputRequest = Body(...), userInput: UserInputRequest = Body(...),
context: RequestContext = Depends(getRequestContext) context: RequestContext = Depends(getRequestContext)
): ):
"""Start or continue a CodeEditor workflow with SSE streaming.""" """Start or continue a CodeEditor workflow with SSE streaming. Supports simple and agent mode."""
try: try:
mandateId = _validateInstanceAccess(instanceId, context) mandateId = _validateInstanceAccess(instanceId, context)
chatInterface = _getServiceChat(context, featureInstanceId=instanceId) chatInterface = _getServiceChat(context, featureInstanceId=instanceId)
@ -116,6 +117,8 @@ async def streamCodeeditorStart(
selectedFileIds = userInput.listFileId or [] selectedFileIds = userInput.listFileId or []
agentMode = mode.lower() == "agent"
asyncio.create_task( asyncio.create_task(
codeEditorProcessor.processMessage( codeEditorProcessor.processMessage(
workflowId=workflowId, workflowId=workflowId,
@ -124,7 +127,8 @@ async def streamCodeeditorStart(
dbManagement=dbManagement, dbManagement=dbManagement,
interfaceAi=aiObjects, interfaceAi=aiObjects,
chatInterface=chatInterface, chatInterface=chatInterface,
eventManager=eventManager eventManager=eventManager,
agentMode=agentMode
) )
) )
@ -319,40 +323,52 @@ async def applyEdit(
proposalData: Dict[str, Any] = Body(...), proposalData: Dict[str, Any] = Body(...),
context: RequestContext = Depends(getRequestContext) context: RequestContext = Depends(getRequestContext)
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Accept a file edit proposal and create a new file version.""" """Accept a file edit proposal. Updates existing file or creates new one."""
try: try:
_validateInstanceAccess(instanceId, context) _validateInstanceAccess(instanceId, context)
dbManagement = _getDbManagement(context, featureInstanceId=instanceId) dbManagement = _getDbManagement(context, featureInstanceId=instanceId)
fileId = proposalData.get("fileId") fileId = proposalData.get("fileId", "")
newContent = proposalData.get("newContent") newContent = proposalData.get("newContent")
fileName = proposalData.get("fileName", "") fileName = proposalData.get("fileName", "")
if not fileId or newContent is None: if newContent is None:
raise HTTPException(status_code=400, detail="fileId and newContent are required") raise HTTPException(status_code=400, detail="newContent is required")
contentBytes = newContent.encode("utf-8")
isNewFile = not fileId or fileId.startswith("unknown-")
if isNewFile:
mimeType = _guessMimeType(fileName)
fileItem = dbManagement.createFile(fileName, mimeType, contentBytes)
resultFileId = fileItem.id
resultFileName = fileItem.fileName
else:
fileItem = dbManagement.getFile(fileId) fileItem = dbManagement.getFile(fileId)
if not fileItem: if not fileItem:
raise HTTPException(status_code=404, detail=f"File {fileId} not found") raise HTTPException(status_code=404, detail=f"File {fileId} not found")
success = dbManagement.createFileData(fileId, contentBytes)
success = dbManagement.createFileData(fileId, newContent.encode("utf-8"))
if not success: if not success:
raise HTTPException(status_code=500, detail="Failed to store updated file content") raise HTTPException(status_code=500, detail="Failed to store updated file content")
resultFileId = fileId
resultFileName = fileName or fileItem.fileName
eventManager = get_event_manager() eventManager = get_event_manager()
await eventManager.emit_event(workflowId, "chatdata", { await eventManager.emit_event(workflowId, "chatdata", {
"type": "file_version", "type": "file_version",
"item": { "item": {
"fileId": fileId, "fileId": resultFileId,
"fileName": fileName or fileItem.fileName, "fileName": resultFileName,
"status": "accepted" "status": "accepted",
"isNew": isNewFile
} }
}) })
return { return {
"status": "accepted", "status": "accepted",
"fileId": fileId, "fileId": resultFileId,
"fileName": fileName or fileItem.fileName "fileName": resultFileName,
"isNew": isNewFile
} }
except HTTPException: except HTTPException:
@ -360,3 +376,20 @@ async def applyEdit(
except Exception as e: except Exception as e:
logger.error(f"Error applying edit: {e}", exc_info=True) logger.error(f"Error applying edit: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
_MIME_MAP = {
".md": "text/markdown", ".txt": "text/plain", ".json": "application/json",
".yaml": "application/yaml", ".yml": "application/yaml", ".xml": "application/xml",
".csv": "text/csv", ".py": "text/x-python", ".js": "text/javascript",
".ts": "text/x-typescript", ".html": "text/html", ".css": "text/css",
".sql": "text/x-sql", ".sh": "text/x-shellscript",
}
def _guessMimeType(fileName: str) -> str:
"""Guess MIME type from file extension."""
if not fileName or "." not in fileName:
return "text/plain"
ext = "." + fileName.rsplit(".", 1)[-1].lower()
return _MIME_MAP.get(ext, "text/plain")

View file

@ -0,0 +1,157 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Tool registry and dispatcher for the CodeEditor agent loop.
Defines available tools and executes them against the file context manager."""
import logging
import time
import fnmatch
from typing import Dict, Any, List
from modules.features.codeeditor.datamodelCodeeditor import ToolResult
logger = logging.getLogger(__name__)
TOOL_DEFINITIONS = [
{
"name": "read_file",
"description": "Read the full content of a single file by its fileId.",
"parameters": {"fileId": "string (required)"}
},
{
"name": "list_files",
"description": "List all available text files with metadata (name, size, mimeType). Optionally filter by glob pattern.",
"parameters": {"filter": "string (optional, glob pattern e.g. '*.py')"}
},
{
"name": "search_files",
"description": "Search all file contents for a text query. Returns matching lines with file name and line number.",
"parameters": {"query": "string (required)", "fileType": "string (optional, extension e.g. 'py')"}
},
]
async def dispatch(toolName: str, toolArgs: Dict[str, Any], dbManagement) -> ToolResult:
"""Execute a tool and return the result."""
startTime = time.time()
try:
if toolName == "read_file":
result = await _toolReadFile(toolArgs, dbManagement)
elif toolName == "list_files":
result = _toolListFiles(toolArgs, dbManagement)
elif toolName == "search_files":
result = await _toolSearchFiles(toolArgs, dbManagement)
else:
result = f"Unknown tool: {toolName}"
return ToolResult(toolName=toolName, result=result, success=False,
executionTime=time.time() - startTime)
return ToolResult(toolName=toolName, result=result, success=True,
executionTime=time.time() - startTime)
except Exception as e:
logger.error(f"Tool {toolName} failed: {e}", exc_info=True)
return ToolResult(toolName=toolName, result=f"Error: {str(e)}", success=False,
executionTime=time.time() - startTime)
async def _toolReadFile(args: Dict[str, Any], dbManagement) -> str:
"""Read a single file's content."""
fileId = args.get("fileId", "")
if not fileId:
return "Error: fileId is required"
fileItem = dbManagement.getFile(fileId)
if not fileItem:
return f"Error: File {fileId} not found"
fileData = dbManagement.getFileData(fileId)
if not fileData:
return f"Error: No data for file {fileId}"
try:
content = fileData.decode("utf-8")
except UnicodeDecodeError:
return f"Error: File {fileItem.fileName} is not valid UTF-8"
lines = content.split("\n")
numbered = "\n".join([f"{i + 1}|{line}" for i, line in enumerate(lines)])
return f"--- FILE: {fileItem.fileName} (id: {fileId}) ---\n{numbered}\n--- END FILE ---"
def _toolListFiles(args: Dict[str, Any], dbManagement) -> str:
"""List all text files, optionally filtered by glob pattern."""
from modules.features.codeeditor.datamodelCodeeditor import isTextFile
filterPattern = args.get("filter", "")
allFiles = dbManagement.getAllFiles()
if not allFiles:
return "No files found."
lines = []
for f in allFiles:
if not isTextFile(f.mimeType, f.fileName):
continue
if filterPattern and not fnmatch.fnmatch(f.fileName, filterPattern):
continue
lines.append(f"- {f.fileName} (id: {f.id}, size: {f.fileSize}B, type: {f.mimeType})")
if not lines:
return "No matching text files found."
return f"Available files ({len(lines)}):\n" + "\n".join(lines)
async def _toolSearchFiles(args: Dict[str, Any], dbManagement) -> str:
"""Search file contents for a query string."""
from modules.features.codeeditor.datamodelCodeeditor import isTextFile
query = args.get("query", "")
if not query:
return "Error: query is required"
fileType = args.get("fileType", "")
allFiles = dbManagement.getAllFiles()
if not allFiles:
return "No files to search."
hits = []
maxHits = 50
queryLower = query.lower()
for f in allFiles:
if not isTextFile(f.mimeType, f.fileName):
continue
if fileType and not f.fileName.endswith(f".{fileType}"):
continue
fileData = dbManagement.getFileData(f.id)
if not fileData:
continue
try:
content = fileData.decode("utf-8")
except UnicodeDecodeError:
continue
for lineNum, line in enumerate(content.split("\n"), 1):
if queryLower in line.lower():
hits.append(f"{f.fileName}:{lineNum}: {line.strip()}")
if len(hits) >= maxHits:
break
if len(hits) >= maxHits:
break
if not hits:
return f"No matches found for '{query}'."
result = f"Search results for '{query}' ({len(hits)} matches):\n" + "\n".join(hits)
if len(hits) >= maxHits:
result += f"\n... (truncated at {maxHits} matches)"
return result
def formatToolDefinitions() -> str:
"""Format tool definitions for inclusion in the system prompt."""
parts = []
for tool in TOOL_DEFINITIONS:
params = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()])
parts.append(f"- **{tool['name']}**: {tool['description']}\n Parameters: {{{params}}}")
return "\n".join(parts)

View file

@ -401,14 +401,22 @@ class TeamsbotService:
if len(audioBytes) < 1000: if len(audioBytes) < 1000:
return return
# Detect silent/all-zeros audio early to avoid expensive STT calls
if len(set(audioBytes[100:min(500, len(audioBytes))])) < 3:
logger.debug(f"[AudioChunk] Skipping silent audio ({len(audioBytes)} bytes, low byte variation)")
return
if not voiceInterface: if not voiceInterface:
logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}") logger.warning(f"[AudioChunk] No voice interface available for session {sessionId}")
return return
# Treat sampleRate=0 as unknown (triggers auto-detection)
effectiveSampleRate = sampleRate if sampleRate and sampleRate > 0 else None
sttResult = await voiceInterface.speechToText( sttResult = await voiceInterface.speechToText(
audioContent=audioBytes, audioContent=audioBytes,
language=self.config.language or "de-DE", language=self.config.language or "de-DE",
sampleRate=sampleRate, sampleRate=effectiveSampleRate,
) )
if sttResult and sttResult.get("success") and sttResult.get("text"): if sttResult and sttResult.get("success") and sttResult.get("text"):