wired infomaniac to ai adapters and tools

This commit is contained in:
ValueOn AG 2026-04-29 01:52:47 +02:00
parent 49f3660d89
commit 052647a52b
13 changed files with 488 additions and 114 deletions

View file

@ -234,6 +234,21 @@ async def listAccessibleDrives(token: str) -> List[Dict[str, Any]]:
return [d for d in drives if isinstance(d, dict) and d.get("id")]
def _lastNumericSegment(segments: List[str]) -> Optional[str]:
"""Return the last all-digit segment (kDrive file/folder IDs are int).
The agent sometimes appends the human-readable filename to a path,
e.g. ``/2980592/12/platform-overview.html``. The kDrive API does
not accept names -- only numeric IDs -- so we strip trailing
non-numeric segments and pick the last integer ID.
Returns ``None`` if no numeric segment exists.
"""
for seg in reversed(segments):
if seg.isdigit():
return seg
return None
class KdriveAdapter(ServiceAdapter):
"""kDrive ServiceAdapter -- browse drives, folders, files.
@ -246,6 +261,14 @@ class KdriveAdapter(ServiceAdapter):
The drive list is cached on the adapter instance so each browse
pays for one ``/2/drive/init`` call at most.
File-vs-folder handling: a DataSource may point at a single file
(e.g. ``/{driveId}/{fileId}`` where ``fileId`` is a regular file).
Calling ``/files/{fileId}/files`` on a file answers
``400 destination_not_a_directory`` -- so :meth:`browse` first
fetches the item's metadata and, if ``type=file``, returns a
one-element list describing the file itself instead of pretending
the directory is empty.
"""
def __init__(self, accessToken: str):
@ -258,6 +281,14 @@ class KdriveAdapter(ServiceAdapter):
self._drives = await listAccessibleDrives(self._token)
return self._drives
async def _fetchItemMeta(self, driveId: str, fileId: str) -> Optional[Dict[str, Any]]:
"""Return the kDrive file/folder metadata dict, or ``None`` on error."""
meta = await _infomaniakGet(self._token, f"/2/drive/{driveId}/files/{fileId}")
if not isinstance(meta, dict) or meta.get("error"):
return None
data = _unwrapData(meta)
return data if isinstance(data, dict) else None
async def browse(
self,
path: str,
@ -274,7 +305,21 @@ class KdriveAdapter(ServiceAdapter):
if len(segments) == 1:
return await self._listChildren(driveId, fileId=None, limit=limit)
fileId = segments[-1]
fileId = _lastNumericSegment(segments[1:])
if fileId is None:
return []
meta = await self._fetchItemMeta(driveId, fileId)
if meta is not None and meta.get("type") == "file":
return [ExternalEntry(
name=meta.get("name") or fileId,
path=f"/{driveId}/{fileId}",
isFolder=False,
size=meta.get("size"),
mimeType=meta.get("mime_type"),
lastModified=meta.get("last_modified_at"),
metadata={"id": fileId, "kind": "file"},
)]
return await self._listChildren(driveId, fileId=fileId, limit=limit)
async def _listDrives(self) -> List[ExternalEntry]:
@ -341,16 +386,16 @@ class KdriveAdapter(ServiceAdapter):
segments = [s for s in (path or "").strip("/").split("/") if s]
if len(segments) < 2:
return DownloadResult()
driveId, fileId = segments[0], segments[-1]
driveId = segments[0]
# Agent may append the filename: ``/{driveId}/{fileId}/{name}``.
# Pull the last numeric segment instead of trusting segments[-1].
fileId = _lastNumericSegment(segments[1:])
if fileId is None:
return DownloadResult()
meta = await _infomaniakGet(self._token, f"/2/drive/{driveId}/files/{fileId}")
fileName = fileId
mimeType = "application/octet-stream"
if isinstance(meta, dict) and not meta.get("error"):
data = _unwrapData(meta)
if isinstance(data, dict):
fileName = data.get("name") or fileId
mimeType = data.get("mime_type") or mimeType
meta = await self._fetchItemMeta(driveId, fileId)
fileName = (meta or {}).get("name") or fileId
mimeType = (meta or {}).get("mime_type") or "application/octet-stream"
content = await _infomaniakDownload(
self._token, f"/2/drive/{driveId}/files/{fileId}/download"

View file

@ -4,10 +4,13 @@
Document reference models for typed document references in workflows.
"""
from typing import List, Optional
import logging
from typing import Any, List, Optional
from pydantic import BaseModel, Field
from modules.shared.i18nRegistry import i18nModel
logger = logging.getLogger(__name__)
class DocumentReference(BaseModel):
"""Base class for document references"""
@ -115,3 +118,86 @@ class DocumentReferenceList(BaseModel):
references.append(DocumentListReference(label=refStr))
return cls(references=references)
def coerceDocumentReferenceList(value: Any) -> DocumentReferenceList:
"""Tolerant coercion of any agent/UI-supplied document list to
:class:`DocumentReferenceList`.
Accepts the canonical formats plus the dict-wrapper shapes that
LLM tool-callers tend to generate when they see a
``type=DocumentList`` parameter:
* ``None`` / ``""`` -> empty list
* :class:`DocumentReferenceList` -> as-is
* ``str`` -> single-element string list
* ``list[str]`` -> :meth:`from_string_list`
* ``list[dict]`` with ``id`` or ``documentId`` -> item references
* ``{"documents": [...]}`` / ``{"references": [...]}`` ->
recurse into the inner list (this is the shape LLMs love)
* ``{"id": "..."}`` / ``{"documentId": "..."}`` -> single
item reference
* any unrecognised input -> empty list with a WARN log; never
raises (the caller decides whether an empty list is fatal).
"""
if value is None or value == "":
return DocumentReferenceList(references=[])
if isinstance(value, DocumentReferenceList):
return value
if isinstance(value, str):
return DocumentReferenceList.from_string_list([value])
if isinstance(value, dict):
for innerKey in ("documents", "references", "items", "files"):
if innerKey in value and isinstance(value[innerKey], list):
return coerceDocumentReferenceList(value[innerKey])
docId = value.get("documentId") or value.get("id")
if docId:
return DocumentReferenceList(references=[
DocumentItemReference(
documentId=str(docId),
fileName=value.get("fileName") or value.get("name"),
)
])
logger.warning(
f"coerceDocumentReferenceList: unsupported dict shape "
f"(keys={list(value.keys())}); returning empty list."
)
return DocumentReferenceList(references=[])
if isinstance(value, list):
if not value:
return DocumentReferenceList(references=[])
first = value[0]
if isinstance(first, str):
return DocumentReferenceList.from_string_list(value)
if isinstance(first, dict):
references: List[DocumentReference] = []
for item in value:
if not isinstance(item, dict):
continue
docId = item.get("documentId") or item.get("id")
if docId:
references.append(DocumentItemReference(
documentId=str(docId),
fileName=item.get("fileName") or item.get("name"),
))
elif item.get("label"):
references.append(DocumentListReference(
label=str(item["label"]),
messageId=item.get("messageId"),
))
return DocumentReferenceList(references=references)
# Mixed/object list (e.g. inline ActionDocument-like): caller
# must pre-handle that case before calling this coercer.
logger.warning(
f"coerceDocumentReferenceList: list element type "
f"{type(first).__name__} not recognised; returning empty list."
)
return DocumentReferenceList(references=[])
logger.warning(
f"coerceDocumentReferenceList: unsupported value type "
f"{type(value).__name__}; returning empty list."
)
return DocumentReferenceList(references=[])

View file

@ -837,8 +837,20 @@ class ComponentObjects:
"""Checks if a file with the same hash AND fileName already exists for the current user
**within the same scope** (mandateId + featureInstanceId).
Duplicate = same user + same fileHash + same fileName + same scope.
Duplicate = same user + same fileHash + same fileName + same scope + RBAC-visible.
Same hash with different name is allowed (intentional copy by user).
RBAC parity contract: this method must NEVER return a FileItem that
``getFile()`` would not return for the current user. Otherwise callers
(``saveUploadedFile`` / ``createFile``) hand back an id that the very
next ``updateFile`` / ``getFile`` then rejects with
``File with ID ... not found`` -- the well-known "ghost duplicate"
symptom seen when ``interfaceDbComponent`` is initialised without an
``featureInstanceId`` (e.g. via ``serviceHub``) but a same-hash+name
file exists in another featureInstance under the same mandate.
We therefore cross-check the candidate through the RBAC-aware ``getFile``
before returning it; if RBAC blocks it, we treat it as "no duplicate
for this scope" and the caller will create a fresh per-scope copy.
"""
if not self.userId:
return None
@ -869,16 +881,17 @@ class ComponentObjects:
logger.warning(f"Duplicate FileItem {fileId} found but FileData missing — treating as new file")
return None
return FileItem(
id=fileId,
mandateId=file.get("mandateId", ""),
featureInstanceId=file.get("featureInstanceId", ""),
fileName=file["fileName"],
mimeType=file["mimeType"],
fileHash=file["fileHash"],
fileSize=file["fileSize"],
sysCreatedAt=file.get("sysCreatedAt"),
)
rbacVisible = self.getFile(fileId)
if rbacVisible is None:
logger.info(
f"Duplicate FileItem {fileId} ('{fileName}', hash {fileHash[:12]}...) found via "
f"sysCreatedBy+hash+name match but is not RBAC-visible in current scope "
f"(mandateId={self.mandateId or '-'}, featureInstanceId={self.featureInstanceId or '-'}). "
f"Treating as no-duplicate so a fresh per-scope copy gets created."
)
return None
return rbacVisible
# Class-level cache — built once from the ExtractorRegistry
_extensionToMime: Optional[Dict[str, str]] = None

View file

@ -392,6 +392,18 @@ def buildSystemPrompt(
"- Prefer modular file structures over monolithic files.\n"
"- When generating applications, create separate files for logical components.\n"
"- Always plan the structure before writing code.\n\n"
"### Document references for AI tools (CRITICAL)\n"
"Tools that produce a file (`downloadFromDataSource`, `writeFile mode=create`, "
"`renderDocument`, `generateImage`, `createChart`) return a result line with TWO ids:\n"
"- `documentList ref: docItem:<chatDocId>` — pass this STRING VERBATIM as an entry of "
" `documentList` for `ai_process`, `ai_summarizeDocument`, `context_extractContent`, "
" `context_neutralizeData`, etc. Always as the literal `docItem:<id>` — do NOT wrap "
" in `{\"documents\":[{\"id\":...}]}` and do NOT use the file id here, the documentList "
" resolver only matches `docItem:` references.\n"
"- `file id: <fileId>` — use for `readFile`, `searchInFileContent`, `writeFile mode=append`, "
" and image embeds (`![alt](file:<fileId>)`).\n"
"Example: after `downloadFromDataSource` returns `docItem:abc123`, call "
"`ai_summarizeDocument(documentList=[\"docItem:abc123\"], summaryLength=\"medium\")`.\n\n"
)
if toolsFormatted:

View file

@ -9,7 +9,9 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_attachFileAsChatDocument,
_buildResolverDbFromServices,
_formatToolFileResult,
_getOrCreateTempFolder,
_looksLikeBinary,
_resolveFileScope,
@ -231,11 +233,27 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId:
chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId})
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"datasource:{dsId or directService or 'download'}",
userMessage=f"Downloaded {fileName} from external data source",
)
ext = fileName.rsplit(".", 1)[-1].lower() if "." in fileName else ""
hint = "Use readFile to read the text content." if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg") else "Use readFile to access the content."
hint = (
"Use readFile to read the text content."
if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg")
else "Use readFile to access the content."
)
return ToolResult(
toolCallId="", toolName="downloadFromDataSource", success=True,
data=f"Downloaded '{fileName}' ({len(fileBytes)} bytes) → local file id: {fileItem.id}. {hint}"
data=_formatToolFileResult(
fileItem=fileItem,
chatDocId=chatDocId,
actionLabel="Downloaded",
extraInfo=hint,
),
)
except Exception as e:
return ToolResult(toolCallId="", toolName="downloadFromDataSource", success=False, error=str(e))
@ -308,8 +326,15 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
registry.register(
"downloadFromDataSource", _downloadFromDataSource,
description=(
"Download a file or email from a data source into local storage. Returns a local file ID "
"to read with readFile. Accepts either dataSourceId OR connectionId+service. "
"Download a file or email from a data source into local storage. "
"The result line contains TWO ids you must use for different purposes:\n"
" - `documentList ref: docItem:<chatDocId>` -- pass this string verbatim "
" inside the `documentList` parameter of `ai_process`, "
" `ai_summarizeDocument`, `context_extractContent`, `context_neutralizeData`, etc. "
" Always use the `docItem:<chatDocId>` form, NOT the file id, NOT a `{\"documents\":[{\"id\":...}]}` "
" wrapper -- the documentList resolver only matches `docItem:` references against the workflow.\n"
" - `file id: <fileId>` -- pass this to `readFile`, `searchInFileContent`, image embeds (`file:<fileId>`).\n"
"Accepts either dataSourceId OR connectionId+service. "
"For email sources (Outlook, Gmail), browse/search only return subjects -- use this to get full content."
),
parameters={

View file

@ -3,7 +3,8 @@
"""Shared helpers for core agent tools (file scope, binary detection, temp folder)."""
import logging
from typing import Any, Optional
import uuid
from typing import Any, Dict, Optional, Tuple
logger = logging.getLogger(__name__)
@ -78,6 +79,138 @@ def _getOrCreateTempFolder(chatService) -> Optional[str]:
return None
def _attachFileAsChatDocument(
services: Any,
fileItem: Any,
*,
label: str = "agent_tool_output",
userMessage: str = "",
role: str = "assistant",
) -> Optional[str]:
"""Bind a persisted FileItem to the active workflow as a ChatDocument.
This is the **single canonical bridge** between agent-tool-produced
artefacts and the workflow's document model. Mirrors the pattern
used by workflow actions (``workflowProcessor.persistTaskResult`` /
``methodTrustee.extractFromFiles``): every artefact a workflow step
-- including agent tools -- materialises ends up addressable via
``docItem:<chatDocId>`` so downstream tools that consume
``documentList`` can resolve it against
``workflow.messages[*].documents[*].id``.
Without this bind the agent's ``downloadFromDataSource`` /
``writeFile(create)`` / ``renderDocument`` / ``generateImage`` /
``createChart`` outputs are FileItem-only and unreachable from
``getChatDocumentsFromDocumentList`` -- the symptom is
``ai_summarizeDocument`` etc. running with 0 ContentParts.
Args:
services: agent-tool services container (must expose ``.chat``).
fileItem: persisted FileItem (Pydantic obj or dict) returned
from ``saveUploadedFile`` / ``createFile`` /
``saveGeneratedFile``.
label: ``documentsLabel`` for the carrier ChatMessage --
picked up by ``docList:<label>`` references.
userMessage: optional human-readable message text.
role: ``"assistant"`` (default) or ``"tool"``; affects only
display semantics, not resolution.
Returns:
The new ``ChatDocument.id`` on success, or ``None`` when no
active workflow is bound to the chat service (e.g. standalone
agent calls outside a chat workflow). Never raises.
"""
try:
chatService = services.chat
workflow = getattr(chatService, "_workflow", None)
if not workflow or not getattr(workflow, "id", None):
return None
if isinstance(fileItem, dict):
fileId = fileItem.get("id")
fileName = fileItem.get("fileName")
fileSize = fileItem.get("fileSize") or 0
mimeType = fileItem.get("mimeType") or "application/octet-stream"
else:
fileId = getattr(fileItem, "id", None)
fileName = getattr(fileItem, "fileName", None)
fileSize = getattr(fileItem, "fileSize", None) or 0
mimeType = getattr(fileItem, "mimeType", None) or "application/octet-stream"
if not fileId:
logger.warning("_attachFileAsChatDocument: fileItem has no id, skipping bind.")
return None
chatDoc: Dict[str, Any] = {
"id": str(uuid.uuid4()),
"fileId": fileId,
"fileName": fileName or fileId,
"fileSize": fileSize,
"mimeType": mimeType,
"roundNumber": getattr(workflow, "currentRound", None),
"taskNumber": getattr(workflow, "currentTask", None),
"actionNumber": getattr(workflow, "currentAction", None),
}
messageData: Dict[str, Any] = {
"id": f"msg_tool_{uuid.uuid4().hex[:12]}",
"role": role,
"status": "step",
"message": userMessage or f"Tool result: {fileName or fileId}",
"documentsLabel": label,
}
createdMessage = chatService.storeMessageWithDocuments(
workflow, messageData, [chatDoc],
)
if not createdMessage or not getattr(createdMessage, "documents", None):
return None
return createdMessage.documents[0].id
except Exception as e:
logger.warning(f"_attachFileAsChatDocument failed (fileItem id={getattr(fileItem, 'id', None) or (fileItem.get('id') if isinstance(fileItem, dict) else '?')}): {e}")
return None
def _formatToolFileResult(
*,
fileItem: Any,
chatDocId: Optional[str],
actionLabel: str = "Created",
extraInfo: str = "",
) -> str:
"""Render the canonical agent-tool file result message.
Always presents BOTH ids the agent needs:
* ``docItem:<chatDocId>`` -- use as ``documentList`` entry for
tools like ``ai_process`` / ``ai_summarizeDocument`` /
``context_extractContent`` (resolved through ChatDocument).
* ``file id: <fileItem.id>`` -- use as ``fileId`` for direct
reads via ``readFile`` / ``downloadFile`` / image embedding
(``file:<fileItem.id>``).
When no active workflow is bound, ``chatDocId`` is ``None`` and
only the file-id line is shown -- the file is still usable for
direct reads, just not for ``documentList`` references (those
require a workflow context anyway).
"""
if isinstance(fileItem, dict):
fileId = fileItem.get("id", "?")
fileName = fileItem.get("fileName", "")
fileSize = fileItem.get("fileSize", 0)
else:
fileId = getattr(fileItem, "id", "?")
fileName = getattr(fileItem, "fileName", "")
fileSize = getattr(fileItem, "fileSize", 0)
head = f"{actionLabel} '{fileName}' ({fileSize} bytes)" if fileName else f"{actionLabel} file ({fileSize} bytes)"
parts = [head]
if chatDocId:
parts.append(f" documentList ref: docItem:{chatDocId}")
parts.append(f" file id: {fileId}")
if extraInfo:
parts.append(extraInfo)
return "\n".join(parts)
def _buildResolverDbFromServices(services: Any):
"""DB adapter for ConnectorResolver: load UserConnections by id.

View file

@ -9,6 +9,8 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_attachFileAsChatDocument,
_formatToolFileResult,
_getOrCreateTempFolder,
_looksLikeBinary,
_resolveFileScope,
@ -316,7 +318,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId:
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
savedFiles.append(f"- {docName} (id: {fid})")
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"renderDocument:{docName}",
userMessage=f"Rendered document {docName}",
)
refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
sideEvents.append({
"type": "fileCreated",
"data": {
@ -340,7 +348,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
"Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
"For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
"`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
"Images: ![alt text](file:fileId) in the markdown."
"Images: ![alt text](file:fileId) in the markdown. "
"Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND "
"`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent "
"`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls."
),
parameters={
"type": "object",
@ -588,7 +599,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId:
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
savedFiles.append(f"- {docName} (id: {fid})")
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"generateImage:{docName}",
userMessage=f"Generated image {docName}",
)
refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
sideEvents.append({
"type": "fileCreated",
"data": {
@ -612,7 +629,9 @@ def _registerMediaTools(registry: ToolRegistry, services):
"Generate an image from a text description using AI (DALL-E). "
"The generated image is saved as a file in the workspace. "
"Use this when the user asks to create, generate, draw, or design an image, illustration, icon, logo, diagram, or any visual content. "
"Provide a detailed, descriptive prompt for best results."
"Provide a detailed, descriptive prompt for best results. "
"Each image's result line carries `file id: <fileId>` (for embeds / readFile) and "
"`doc ref: docItem:<chatDocId>` (use inside `documentList` for downstream AI tools)."
),
parameters={
"type": "object",
@ -743,14 +762,24 @@ def _registerMediaTools(registry: ToolRegistry, services):
if tempFolderId and fid != "?":
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"createChart:{fileName}",
userMessage=f"Created chart {fileName}",
)
sideEvents = [{"type": "fileCreated", "data": {
"fileId": fid, "fileName": fileName,
"mimeType": "image/png", "fileSize": len(pngData),
}}]
return ToolResult(
toolCallId="", toolName="createChart", success=True,
data=f"Chart saved as '{fileName}' (id: {fid}, {len(pngData)} bytes). "
f"Embed in documents with: ![{title}](file:{fid})",
data=_formatToolFileResult(
fileItem=fileItem,
chatDocId=chatDocId,
actionLabel="Chart saved as",
extraInfo=f"Embed in documents with: ![{title}](file:{fid})",
),
sideEvents=sideEvents,
)
@ -764,7 +793,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
"Create a data chart/graph as a PNG image using matplotlib. "
"Supported types: bar, horizontalBar, line, area, scatter, pie, donut. "
"The chart is saved as a file in the workspace. "
"Use the returned fileId to embed in documents via renderDocument: ![title](file:fileId). "
"Use the returned `file id: <fileId>` to embed in documents via "
"renderDocument: ![title](file:fileId). The result line also carries "
"`doc ref: docItem:<chatDocId>` -- use it inside `documentList` for "
"downstream AI tools that need the chart as a data source. "
"Provide structured data with labels and datasets."
),
parameters={

View file

@ -9,6 +9,8 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_attachFileAsChatDocument,
_formatToolFileResult,
_getOrCreateInstanceFolder,
_getOrCreateTempFolder,
_looksLikeBinary,
@ -428,9 +430,19 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId})
if args.get("tags"):
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"writeFile:{name}",
userMessage=f"Created {name} via writeFile",
)
return ToolResult(
toolCallId="", toolName="writeFile", success=True,
data=f"File '{name}' created (id: {fileItem.id})",
data=_formatToolFileResult(
fileItem=fileItem,
chatDocId=chatDocId,
actionLabel="Created",
),
sideEvents=[{
"type": "fileCreated",
"data": {
@ -573,7 +585,11 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
"- create (default): create a new file (name required).\n"
"- append: append content to an existing file (fileId required). "
"Use for large content that exceeds a single tool call (~8000 chars per call).\n"
"- overwrite: replace entire file content (fileId required)."
"- overwrite: replace entire file content (fileId required).\n"
"On `mode=create` the result line contains BOTH a `documentList ref: docItem:<chatDocId>` "
"(use this for documentList parameters of `ai_process` / `ai_summarizeDocument` / "
"`context_extractContent` etc., always as the literal string `docItem:<id>`) AND a "
"`file id: <fileId>` (use this for `readFile`, `writeFile mode=append`, image embeds)."
),
parameters={
"type": "object",

View file

@ -178,6 +178,33 @@ class AgentService:
if workflowId is None:
workflowId = getattr(self.services.workflow, "id", "unknown") if self.services.workflow else "unknown"
# Propagate the active workflow into every service's request
# context so agent-tool side effects (e.g. _attachFileAsChatDocument
# for downloadFromDataSource / writeFile / renderDocument) can
# bind their FileItem outputs to the workflow as ChatDocuments.
# Without this, chatService._workflow (= chatService._context.workflow)
# stays None and the documentList resolver finds zero docs --
# which is exactly the "Building structure prompt with 0 valid
# ContentParts" symptom we see when the workspace route calls
# runAgent for an attached single-file data source.
# Mirrors workflowManager._propagateWorkflowToContext.
if workflowId and workflowId != "unknown":
try:
workflow = getattr(self.services, "workflow", None)
if workflow is None or getattr(workflow, "id", None) != workflowId:
workflow = self.services.chat.getWorkflow(workflowId)
if workflow is not None:
self.services.workflow = workflow
ctx = getattr(self.services, "_service_context", None)
if ctx is not None:
ctx.workflow = workflow
for attr in ("chat", "ai", "extraction", "sharepoint", "clickup", "utils", "billing", "generation"):
svc = getattr(self.services, attr, None)
if svc is not None and hasattr(svc, "_context") and svc._context is not None:
svc._context.workflow = workflow
except Exception as e:
logger.warning(f"runAgent: could not propagate workflow {workflowId} into service contexts: {e}")
resolvedLanguage = userLanguage or ""
enrichedPrompt = await self._enrichPromptWithFiles(prompt, fileIds)

View file

@ -463,36 +463,38 @@ class ChatService:
Returns:
List of file info dicts.
"""
# `getAllFiles` returns `List[dict]` (each entry is a
# `FileItem.model_dump()` enriched with label columns) -- not
# Pydantic objects -- so we use dict-access throughout.
allFiles = self.interfaceDbComponent.getAllFiles()
results = []
for fileItem in allFiles:
if folderId is not None:
itemFolderId = getattr(fileItem, "folderId", None)
if itemFolderId != folderId:
if fileItem.get("folderId") != folderId:
continue
if tags:
itemTags = getattr(fileItem, "tags", None) or []
itemTags = fileItem.get("tags") or []
if not any(t in itemTags for t in tags):
continue
if search:
searchLower = search.lower()
nameMatch = searchLower in (fileItem.fileName or "").lower()
descMatch = searchLower in (getattr(fileItem, "description", None) or "").lower()
nameMatch = searchLower in (fileItem.get("fileName") or "").lower()
descMatch = searchLower in (fileItem.get("description") or "").lower()
if not nameMatch and not descMatch:
continue
results.append({
"id": fileItem.id,
"fileName": fileItem.fileName,
"mimeType": fileItem.mimeType,
"fileSize": fileItem.fileSize,
"creationDate": fileItem.sysCreatedAt,
"tags": getattr(fileItem, "tags", None),
"folderId": getattr(fileItem, "folderId", None),
"description": getattr(fileItem, "description", None),
"status": getattr(fileItem, "status", None),
"id": fileItem.get("id"),
"fileName": fileItem.get("fileName"),
"mimeType": fileItem.get("mimeType"),
"fileSize": fileItem.get("fileSize"),
"creationDate": fileItem.get("sysCreatedAt"),
"tags": fileItem.get("tags"),
"folderId": fileItem.get("folderId"),
"description": fileItem.get("description"),
"status": fileItem.get("status"),
})
return results

View file

@ -100,12 +100,18 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
# Update progress - preparing parameters
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
documentListParam = parameters.get("documentList")
inline_content_parts: Optional[List[ContentPart]] = None
# Handle inline ActionDocuments (e.g. from SharePoint/email in automation2 no persistence)
# Inline ActionDocuments (SharePoint/email in automation2, no
# persistence) are list[ActionDocument-like dict] -- handled
# separately because they carry pre-extracted content. Everything
# else is normalised through the tolerant coercer.
is_inline = (
isinstance(documentListParam, list)
and len(documentListParam) > 0
@ -117,28 +123,12 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
logger.info(
f"ai.process: Extracted {len(inline_content_parts)} ContentParts from {len(documentListParam)} inline ActionDocuments (no persistence)"
)
elif documentListParam is None:
documentList = DocumentReferenceList(references=[])
logger.debug(f"ai.process: documentList is None, using empty DocumentReferenceList")
elif isinstance(documentListParam, DocumentReferenceList):
documentList = documentListParam
logger.info(f"ai.process: Received DocumentReferenceList with {len(documentList.references)} references")
for idx, ref in enumerate(documentList.references):
logger.info(f" Reference {idx + 1}: documentId={ref.documentId}, type={type(ref).__name__}")
elif isinstance(documentListParam, str):
documentList = DocumentReferenceList.from_string_list([documentListParam])
logger.info(f"ai.process: Converted string to DocumentReferenceList with {len(documentList.references)} references")
elif isinstance(documentListParam, list):
first = documentListParam[0] if documentListParam else None
logger.info(
f"ai.process: documentList is list of {len(documentListParam)} items, "
f"first type={type(first).__name__}, has_documentData={_is_action_document_like(first) if first else False}"
)
documentList = DocumentReferenceList.from_string_list(documentListParam)
logger.info(f"ai.process: Converted list to DocumentReferenceList with {len(documentList.references)} references")
else:
logger.error(f"Invalid documentList type: {type(documentListParam)}")
documentList = DocumentReferenceList(references=[])
documentList = coerceDocumentReferenceList(documentListParam)
logger.info(
f"ai.process: Coerced documentList ({type(documentListParam).__name__}) "
f"to DocumentReferenceList with {len(documentList.references)} references"
)
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
resultType = parameters.get("resultType")

View file

@ -5,7 +5,10 @@ import logging
import time
from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
logger = logging.getLogger(__name__)
@ -16,20 +19,17 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"context_extract_{workflowId}_{int(time.time())}"
# Extract documentList from parameters dict
documentListParam = parameters.get("documentList")
if not documentListParam:
return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed
if isinstance(documentListParam, DocumentReferenceList):
documentList = documentListParam
elif isinstance(documentListParam, str):
documentList = DocumentReferenceList.from_string_list([documentListParam])
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
documentList = coerceDocumentReferenceList(documentListParam)
if not documentList.references:
return ActionResult.isFailure(
error=f"documentList could not be parsed (type={type(documentListParam).__name__}); "
f"expected DocumentReferenceList, list of strings/dicts, or "
f"a wrapper dict like {{'documents': [...]}}"
)
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')

View file

@ -5,7 +5,10 @@ import logging
import time
from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
logger = logging.getLogger(__name__)
@ -26,20 +29,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
if not neutralizationEnabled:
logger.info("Neutralization is not enabled, returning documents unchanged")
# Return original documents if neutralization is disabled
# Get documents from documentList
documentListParam = parameters.get("documentList")
if not documentListParam:
return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed
if isinstance(documentListParam, DocumentReferenceList):
documentList = documentListParam
elif isinstance(documentListParam, str):
documentList = DocumentReferenceList.from_string_list([documentListParam])
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
documentList = coerceDocumentReferenceList(documentListParam)
if not documentList.references:
return ActionResult.isFailure(
error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
)
# Get ChatDocuments from documentList
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
@ -65,20 +63,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
return ActionResult.isSuccess(documents=actionDocuments)
# Extract documentList from parameters dict
documentListParam = parameters.get("documentList")
if not documentListParam:
return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed
if isinstance(documentListParam, DocumentReferenceList):
documentList = documentListParam
elif isinstance(documentListParam, str):
documentList = DocumentReferenceList.from_string_list([documentListParam])
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
documentList = coerceDocumentReferenceList(documentListParam)
if not documentList.references:
return ActionResult.isFailure(
error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
)
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')