wired infomaniac to ai adapters and tools

2026-04-29 01:52:47 +02:00 · 2026-04-29 01:52:47 +02:00 · 052647a52b
commit 052647a52b
parent 49f3660d89
13 changed files with 488 additions and 114 deletions
--- a/modules/connectors/providerInfomaniak/connectorInfomaniak.py
+++ b/modules/connectors/providerInfomaniak/connectorInfomaniak.py
@ -234,6 +234,21 @@ async def listAccessibleDrives(token: str) -> List[Dict[str, Any]]:
    return [d for d in drives if isinstance(d, dict) and d.get("id")]


+def _lastNumericSegment(segments: List[str]) -> Optional[str]:
+    """Return the last all-digit segment (kDrive file/folder IDs are int).
+
+    The agent sometimes appends the human-readable filename to a path,
+    e.g. ``/2980592/12/platform-overview.html``. The kDrive API does
+    not accept names -- only numeric IDs -- so we strip trailing
+    non-numeric segments and pick the last integer ID.
+    Returns ``None`` if no numeric segment exists.
+    """
+    for seg in reversed(segments):
+        if seg.isdigit():
+            return seg
+    return None
+
+
 class KdriveAdapter(ServiceAdapter):
    """kDrive ServiceAdapter -- browse drives, folders, files.

@ -246,6 +261,14 @@ class KdriveAdapter(ServiceAdapter):

    The drive list is cached on the adapter instance so each browse
    pays for one ``/2/drive/init`` call at most.
+
+    File-vs-folder handling: a DataSource may point at a single file
+    (e.g. ``/{driveId}/{fileId}`` where ``fileId`` is a regular file).
+    Calling ``/files/{fileId}/files`` on a file answers
+    ``400 destination_not_a_directory`` -- so :meth:`browse` first
+    fetches the item's metadata and, if ``type=file``, returns a
+    one-element list describing the file itself instead of pretending
+    the directory is empty.
    """

    def __init__(self, accessToken: str):
@ -258,6 +281,14 @@ class KdriveAdapter(ServiceAdapter):
        self._drives = await listAccessibleDrives(self._token)
        return self._drives

+    async def _fetchItemMeta(self, driveId: str, fileId: str) -> Optional[Dict[str, Any]]:
+        """Return the kDrive file/folder metadata dict, or ``None`` on error."""
+        meta = await _infomaniakGet(self._token, f"/2/drive/{driveId}/files/{fileId}")
+        if not isinstance(meta, dict) or meta.get("error"):
+            return None
+        data = _unwrapData(meta)
+        return data if isinstance(data, dict) else None
+
    async def browse(
        self,
        path: str,
@ -274,7 +305,21 @@ class KdriveAdapter(ServiceAdapter):
        if len(segments) == 1:
            return await self._listChildren(driveId, fileId=None, limit=limit)

-        fileId = segments[-1]
+        fileId = _lastNumericSegment(segments[1:])
+        if fileId is None:
+            return []
+
+        meta = await self._fetchItemMeta(driveId, fileId)
+        if meta is not None and meta.get("type") == "file":
+            return [ExternalEntry(
+                name=meta.get("name") or fileId,
+                path=f"/{driveId}/{fileId}",
+                isFolder=False,
+                size=meta.get("size"),
+                mimeType=meta.get("mime_type"),
+                lastModified=meta.get("last_modified_at"),
+                metadata={"id": fileId, "kind": "file"},
+            )]
        return await self._listChildren(driveId, fileId=fileId, limit=limit)

    async def _listDrives(self) -> List[ExternalEntry]:
@ -341,16 +386,16 @@ class KdriveAdapter(ServiceAdapter):
        segments = [s for s in (path or "").strip("/").split("/") if s]
        if len(segments) < 2:
            return DownloadResult()
-        driveId, fileId = segments[0], segments[-1]
+        driveId = segments[0]
+        # Agent may append the filename: ``/{driveId}/{fileId}/{name}``.
+        # Pull the last numeric segment instead of trusting segments[-1].
+        fileId = _lastNumericSegment(segments[1:])
+        if fileId is None:
+            return DownloadResult()

-        meta = await _infomaniakGet(self._token, f"/2/drive/{driveId}/files/{fileId}")
-        fileName = fileId
-        mimeType = "application/octet-stream"
-        if isinstance(meta, dict) and not meta.get("error"):
-            data = _unwrapData(meta)
-            if isinstance(data, dict):
-                fileName = data.get("name") or fileId
-                mimeType = data.get("mime_type") or mimeType
+        meta = await self._fetchItemMeta(driveId, fileId)
+        fileName = (meta or {}).get("name") or fileId
+        mimeType = (meta or {}).get("mime_type") or "application/octet-stream"

        content = await _infomaniakDownload(
            self._token, f"/2/drive/{driveId}/files/{fileId}/download"
--- a/modules/datamodels/datamodelDocref.py
+++ b/modules/datamodels/datamodelDocref.py
@ -4,10 +4,13 @@
 Document reference models for typed document references in workflows.
 """

-from typing import List, Optional
+import logging
+from typing import Any, List, Optional
 from pydantic import BaseModel, Field
 from modules.shared.i18nRegistry import i18nModel

+logger = logging.getLogger(__name__)
+

 class DocumentReference(BaseModel):
    """Base class for document references"""
@ -115,3 +118,86 @@ class DocumentReferenceList(BaseModel):
                    references.append(DocumentListReference(label=refStr))

        return cls(references=references)
+
+
+def coerceDocumentReferenceList(value: Any) -> DocumentReferenceList:
+    """Tolerant coercion of any agent/UI-supplied document list to
+    :class:`DocumentReferenceList`.
+
+    Accepts the canonical formats plus the dict-wrapper shapes that
+    LLM tool-callers tend to generate when they see a
+    ``type=DocumentList`` parameter:
+
+    * ``None`` / ``""`` -> empty list
+    * :class:`DocumentReferenceList` -> as-is
+    * ``str`` -> single-element string list
+    * ``list[str]`` -> :meth:`from_string_list`
+    * ``list[dict]`` with ``id`` or ``documentId`` -> item references
+    * ``{"documents": [...]}`` / ``{"references": [...]}`` ->
+      recurse into the inner list (this is the shape LLMs love)
+    * ``{"id": "..."}`` / ``{"documentId": "..."}`` -> single
+      item reference
+    * any unrecognised input -> empty list with a WARN log; never
+      raises (the caller decides whether an empty list is fatal).
+    """
+    if value is None or value == "":
+        return DocumentReferenceList(references=[])
+    if isinstance(value, DocumentReferenceList):
+        return value
+    if isinstance(value, str):
+        return DocumentReferenceList.from_string_list([value])
+
+    if isinstance(value, dict):
+        for innerKey in ("documents", "references", "items", "files"):
+            if innerKey in value and isinstance(value[innerKey], list):
+                return coerceDocumentReferenceList(value[innerKey])
+        docId = value.get("documentId") or value.get("id")
+        if docId:
+            return DocumentReferenceList(references=[
+                DocumentItemReference(
+                    documentId=str(docId),
+                    fileName=value.get("fileName") or value.get("name"),
+                )
+            ])
+        logger.warning(
+            f"coerceDocumentReferenceList: unsupported dict shape "
+            f"(keys={list(value.keys())}); returning empty list."
+        )
+        return DocumentReferenceList(references=[])
+
+    if isinstance(value, list):
+        if not value:
+            return DocumentReferenceList(references=[])
+        first = value[0]
+        if isinstance(first, str):
+            return DocumentReferenceList.from_string_list(value)
+        if isinstance(first, dict):
+            references: List[DocumentReference] = []
+            for item in value:
+                if not isinstance(item, dict):
+                    continue
+                docId = item.get("documentId") or item.get("id")
+                if docId:
+                    references.append(DocumentItemReference(
+                        documentId=str(docId),
+                        fileName=item.get("fileName") or item.get("name"),
+                    ))
+                elif item.get("label"):
+                    references.append(DocumentListReference(
+                        label=str(item["label"]),
+                        messageId=item.get("messageId"),
+                    ))
+            return DocumentReferenceList(references=references)
+        # Mixed/object list (e.g. inline ActionDocument-like): caller
+        # must pre-handle that case before calling this coercer.
+        logger.warning(
+            f"coerceDocumentReferenceList: list element type "
+            f"{type(first).__name__} not recognised; returning empty list."
+        )
+        return DocumentReferenceList(references=[])
+
+    logger.warning(
+        f"coerceDocumentReferenceList: unsupported value type "
+        f"{type(value).__name__}; returning empty list."
+    )
+    return DocumentReferenceList(references=[])
--- a/modules/interfaces/interfaceDbManagement.py
+++ b/modules/interfaces/interfaceDbManagement.py
@ -837,8 +837,20 @@ class ComponentObjects:
        """Checks if a file with the same hash AND fileName already exists for the current user
        **within the same scope** (mandateId + featureInstanceId).

-        Duplicate = same user + same fileHash + same fileName + same scope.
+        Duplicate = same user + same fileHash + same fileName + same scope + RBAC-visible.
        Same hash with different name is allowed (intentional copy by user).
+
+        RBAC parity contract: this method must NEVER return a FileItem that
+        ``getFile()`` would not return for the current user. Otherwise callers
+        (``saveUploadedFile`` / ``createFile``) hand back an id that the very
+        next ``updateFile`` / ``getFile`` then rejects with
+        ``File with ID ... not found`` -- the well-known "ghost duplicate"
+        symptom seen when ``interfaceDbComponent`` is initialised without an
+        ``featureInstanceId`` (e.g. via ``serviceHub``) but a same-hash+name
+        file exists in another featureInstance under the same mandate.
+        We therefore cross-check the candidate through the RBAC-aware ``getFile``
+        before returning it; if RBAC blocks it, we treat it as "no duplicate
+        for this scope" and the caller will create a fresh per-scope copy.
        """
        if not self.userId:
            return None
@ -869,16 +881,17 @@ class ComponentObjects:
            logger.warning(f"Duplicate FileItem {fileId} found but FileData missing — treating as new file")
            return None

-        return FileItem(
-            id=fileId,
-            mandateId=file.get("mandateId", ""),
-            featureInstanceId=file.get("featureInstanceId", ""),
-            fileName=file["fileName"],
-            mimeType=file["mimeType"],
-            fileHash=file["fileHash"],
-            fileSize=file["fileSize"],
-            sysCreatedAt=file.get("sysCreatedAt"),
-        )
+        rbacVisible = self.getFile(fileId)
+        if rbacVisible is None:
+            logger.info(
+                f"Duplicate FileItem {fileId} ('{fileName}', hash {fileHash[:12]}...) found via "
+                f"sysCreatedBy+hash+name match but is not RBAC-visible in current scope "
+                f"(mandateId={self.mandateId or '-'}, featureInstanceId={self.featureInstanceId or '-'}). "
+                f"Treating as no-duplicate so a fresh per-scope copy gets created."
+            )
+            return None
+
+        return rbacVisible

    # Class-level cache — built once from the ExtractorRegistry
    _extensionToMime: Optional[Dict[str, str]] = None
--- a/modules/serviceCenter/services/serviceAgent/conversationManager.py
+++ b/modules/serviceCenter/services/serviceAgent/conversationManager.py
@ -392,6 +392,18 @@ def buildSystemPrompt(
        "- Prefer modular file structures over monolithic files.\n"
        "- When generating applications, create separate files for logical components.\n"
        "- Always plan the structure before writing code.\n\n"
+        "### Document references for AI tools (CRITICAL)\n"
+        "Tools that produce a file (`downloadFromDataSource`, `writeFile mode=create`, "
+        "`renderDocument`, `generateImage`, `createChart`) return a result line with TWO ids:\n"
+        "- `documentList ref: docItem:<chatDocId>` — pass this STRING VERBATIM as an entry of "
+        "  `documentList` for `ai_process`, `ai_summarizeDocument`, `context_extractContent`, "
+        "  `context_neutralizeData`, etc. Always as the literal `docItem:<id>` — do NOT wrap "
+        "  in `{\"documents\":[{\"id\":...}]}` and do NOT use the file id here, the documentList "
+        "  resolver only matches `docItem:` references.\n"
+        "- `file id: <fileId>` — use for `readFile`, `searchInFileContent`, `writeFile mode=append`, "
+        "  and image embeds (`![alt](file:<fileId>)`).\n"
+        "Example: after `downloadFromDataSource` returns `docItem:abc123`, call "
+        "`ai_summarizeDocument(documentList=[\"docItem:abc123\"], summaryLength=\"medium\")`.\n\n"
    )

    if toolsFormatted:
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_dataSourceTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_dataSourceTools.py
@ -9,7 +9,9 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
 from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry

 from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
+    _attachFileAsChatDocument,
    _buildResolverDbFromServices,
+    _formatToolFileResult,
    _getOrCreateTempFolder,
    _looksLikeBinary,
    _resolveFileScope,
@ -231,11 +233,27 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
            tempFolderId = _getOrCreateTempFolder(chatService)
            if tempFolderId:
                chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId})
+
+            chatDocId = _attachFileAsChatDocument(
+                services, fileItem,
+                label=f"datasource:{dsId or directService or 'download'}",
+                userMessage=f"Downloaded {fileName} from external data source",
+            )
+
            ext = fileName.rsplit(".", 1)[-1].lower() if "." in fileName else ""
-            hint = "Use readFile to read the text content." if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg") else "Use readFile to access the content."
+            hint = (
+                "Use readFile to read the text content."
+                if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg")
+                else "Use readFile to access the content."
+            )
            return ToolResult(
                toolCallId="", toolName="downloadFromDataSource", success=True,
-                data=f"Downloaded '{fileName}' ({len(fileBytes)} bytes) → local file id: {fileItem.id}. {hint}"
+                data=_formatToolFileResult(
+                    fileItem=fileItem,
+                    chatDocId=chatDocId,
+                    actionLabel="Downloaded",
+                    extraInfo=hint,
+                ),
            )
        except Exception as e:
            return ToolResult(toolCallId="", toolName="downloadFromDataSource", success=False, error=str(e))
@ -308,8 +326,15 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
    registry.register(
        "downloadFromDataSource", _downloadFromDataSource,
        description=(
-            "Download a file or email from a data source into local storage. Returns a local file ID "
-            "to read with readFile. Accepts either dataSourceId OR connectionId+service. "
+            "Download a file or email from a data source into local storage. "
+            "The result line contains TWO ids you must use for different purposes:\n"
+            "  - `documentList ref: docItem:<chatDocId>` -- pass this string verbatim "
+            "    inside the `documentList` parameter of `ai_process`, "
+            "    `ai_summarizeDocument`, `context_extractContent`, `context_neutralizeData`, etc. "
+            "    Always use the `docItem:<chatDocId>` form, NOT the file id, NOT a `{\"documents\":[{\"id\":...}]}` "
+            "    wrapper -- the documentList resolver only matches `docItem:` references against the workflow.\n"
+            "  - `file id: <fileId>` -- pass this to `readFile`, `searchInFileContent`, image embeds (`file:<fileId>`).\n"
+            "Accepts either dataSourceId OR connectionId+service. "
            "For email sources (Outlook, Gmail), browse/search only return subjects -- use this to get full content."
        ),
        parameters={
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_helpers.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_helpers.py
@ -3,7 +3,8 @@
 """Shared helpers for core agent tools (file scope, binary detection, temp folder)."""

 import logging
-from typing import Any, Optional
+import uuid
+from typing import Any, Dict, Optional, Tuple

 logger = logging.getLogger(__name__)

@ -78,6 +79,138 @@ def _getOrCreateTempFolder(chatService) -> Optional[str]:
        return None


+def _attachFileAsChatDocument(
+    services: Any,
+    fileItem: Any,
+    *,
+    label: str = "agent_tool_output",
+    userMessage: str = "",
+    role: str = "assistant",
+) -> Optional[str]:
+    """Bind a persisted FileItem to the active workflow as a ChatDocument.
+
+    This is the **single canonical bridge** between agent-tool-produced
+    artefacts and the workflow's document model. Mirrors the pattern
+    used by workflow actions (``workflowProcessor.persistTaskResult`` /
+    ``methodTrustee.extractFromFiles``): every artefact a workflow step
+    -- including agent tools -- materialises ends up addressable via
+    ``docItem:<chatDocId>`` so downstream tools that consume
+    ``documentList`` can resolve it against
+    ``workflow.messages[*].documents[*].id``.
+
+    Without this bind the agent's ``downloadFromDataSource`` /
+    ``writeFile(create)`` / ``renderDocument`` / ``generateImage`` /
+    ``createChart`` outputs are FileItem-only and unreachable from
+    ``getChatDocumentsFromDocumentList`` -- the symptom is
+    ``ai_summarizeDocument`` etc. running with 0 ContentParts.
+
+    Args:
+        services: agent-tool services container (must expose ``.chat``).
+        fileItem: persisted FileItem (Pydantic obj or dict) returned
+            from ``saveUploadedFile`` / ``createFile`` /
+            ``saveGeneratedFile``.
+        label: ``documentsLabel`` for the carrier ChatMessage --
+            picked up by ``docList:<label>`` references.
+        userMessage: optional human-readable message text.
+        role: ``"assistant"`` (default) or ``"tool"``; affects only
+            display semantics, not resolution.
+
+    Returns:
+        The new ``ChatDocument.id`` on success, or ``None`` when no
+        active workflow is bound to the chat service (e.g. standalone
+        agent calls outside a chat workflow). Never raises.
+    """
+    try:
+        chatService = services.chat
+        workflow = getattr(chatService, "_workflow", None)
+        if not workflow or not getattr(workflow, "id", None):
+            return None
+
+        if isinstance(fileItem, dict):
+            fileId = fileItem.get("id")
+            fileName = fileItem.get("fileName")
+            fileSize = fileItem.get("fileSize") or 0
+            mimeType = fileItem.get("mimeType") or "application/octet-stream"
+        else:
+            fileId = getattr(fileItem, "id", None)
+            fileName = getattr(fileItem, "fileName", None)
+            fileSize = getattr(fileItem, "fileSize", None) or 0
+            mimeType = getattr(fileItem, "mimeType", None) or "application/octet-stream"
+
+        if not fileId:
+            logger.warning("_attachFileAsChatDocument: fileItem has no id, skipping bind.")
+            return None
+
+        chatDoc: Dict[str, Any] = {
+            "id": str(uuid.uuid4()),
+            "fileId": fileId,
+            "fileName": fileName or fileId,
+            "fileSize": fileSize,
+            "mimeType": mimeType,
+            "roundNumber": getattr(workflow, "currentRound", None),
+            "taskNumber": getattr(workflow, "currentTask", None),
+            "actionNumber": getattr(workflow, "currentAction", None),
+        }
+        messageData: Dict[str, Any] = {
+            "id": f"msg_tool_{uuid.uuid4().hex[:12]}",
+            "role": role,
+            "status": "step",
+            "message": userMessage or f"Tool result: {fileName or fileId}",
+            "documentsLabel": label,
+        }
+
+        createdMessage = chatService.storeMessageWithDocuments(
+            workflow, messageData, [chatDoc],
+        )
+        if not createdMessage or not getattr(createdMessage, "documents", None):
+            return None
+        return createdMessage.documents[0].id
+    except Exception as e:
+        logger.warning(f"_attachFileAsChatDocument failed (fileItem id={getattr(fileItem, 'id', None) or (fileItem.get('id') if isinstance(fileItem, dict) else '?')}): {e}")
+        return None
+
+
+def _formatToolFileResult(
+    *,
+    fileItem: Any,
+    chatDocId: Optional[str],
+    actionLabel: str = "Created",
+    extraInfo: str = "",
+) -> str:
+    """Render the canonical agent-tool file result message.
+
+    Always presents BOTH ids the agent needs:
+    * ``docItem:<chatDocId>`` -- use as ``documentList`` entry for
+      tools like ``ai_process`` / ``ai_summarizeDocument`` /
+      ``context_extractContent`` (resolved through ChatDocument).
+    * ``file id: <fileItem.id>`` -- use as ``fileId`` for direct
+      reads via ``readFile`` / ``downloadFile`` / image embedding
+      (``file:<fileItem.id>``).
+
+    When no active workflow is bound, ``chatDocId`` is ``None`` and
+    only the file-id line is shown -- the file is still usable for
+    direct reads, just not for ``documentList`` references (those
+    require a workflow context anyway).
+    """
+    if isinstance(fileItem, dict):
+        fileId = fileItem.get("id", "?")
+        fileName = fileItem.get("fileName", "")
+        fileSize = fileItem.get("fileSize", 0)
+    else:
+        fileId = getattr(fileItem, "id", "?")
+        fileName = getattr(fileItem, "fileName", "")
+        fileSize = getattr(fileItem, "fileSize", 0)
+
+    head = f"{actionLabel} '{fileName}' ({fileSize} bytes)" if fileName else f"{actionLabel} file ({fileSize} bytes)"
+    parts = [head]
+    if chatDocId:
+        parts.append(f"  documentList ref: docItem:{chatDocId}")
+    parts.append(f"  file id: {fileId}")
+    if extraInfo:
+        parts.append(extraInfo)
+    return "\n".join(parts)
+
+
 def _buildResolverDbFromServices(services: Any):
    """DB adapter for ConnectorResolver: load UserConnections by id.

--- a/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_mediaTools.py
@ -9,6 +9,8 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
 from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry

 from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
+    _attachFileAsChatDocument,
+    _formatToolFileResult,
    _getOrCreateTempFolder,
    _looksLikeBinary,
    _resolveFileScope,
@ -316,7 +318,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
                    tempFolderId = _getOrCreateTempFolder(chatService)
                    if tempFolderId:
                        chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
-                    savedFiles.append(f"- {docName} (id: {fid})")
+                    chatDocId = _attachFileAsChatDocument(
+                        services, fileItem,
+                        label=f"renderDocument:{docName}",
+                        userMessage=f"Rendered document {docName}",
+                    )
+                    refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
+                    savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
                    sideEvents.append({
                        "type": "fileCreated",
                        "data": {
@ -340,7 +348,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
            "Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
            "For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
            "`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
-            "Images: ![alt text](file:fileId) in the markdown."
+            "Images: ![alt text](file:fileId) in the markdown. "
+            "Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND "
+            "`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent "
+            "`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls."
        ),
        parameters={
            "type": "object",
@ -588,7 +599,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
                    tempFolderId = _getOrCreateTempFolder(chatService)
                    if tempFolderId:
                        chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
-                    savedFiles.append(f"- {docName} (id: {fid})")
+                    chatDocId = _attachFileAsChatDocument(
+                        services, fileItem,
+                        label=f"generateImage:{docName}",
+                        userMessage=f"Generated image {docName}",
+                    )
+                    refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
+                    savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
                    sideEvents.append({
                        "type": "fileCreated",
                        "data": {
@ -612,7 +629,9 @@ def _registerMediaTools(registry: ToolRegistry, services):
            "Generate an image from a text description using AI (DALL-E). "
            "The generated image is saved as a file in the workspace. "
            "Use this when the user asks to create, generate, draw, or design an image, illustration, icon, logo, diagram, or any visual content. "
-            "Provide a detailed, descriptive prompt for best results."
+            "Provide a detailed, descriptive prompt for best results. "
+            "Each image's result line carries `file id: <fileId>` (for embeds / readFile) and "
+            "`doc ref: docItem:<chatDocId>` (use inside `documentList` for downstream AI tools)."
        ),
        parameters={
            "type": "object",
@ -743,14 +762,24 @@ def _registerMediaTools(registry: ToolRegistry, services):
            if tempFolderId and fid != "?":
                chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})

+            chatDocId = _attachFileAsChatDocument(
+                services, fileItem,
+                label=f"createChart:{fileName}",
+                userMessage=f"Created chart {fileName}",
+            )
+
            sideEvents = [{"type": "fileCreated", "data": {
                "fileId": fid, "fileName": fileName,
                "mimeType": "image/png", "fileSize": len(pngData),
            }}]
            return ToolResult(
                toolCallId="", toolName="createChart", success=True,
-                data=f"Chart saved as '{fileName}' (id: {fid}, {len(pngData)} bytes). "
-                     f"Embed in documents with: ![{title}](file:{fid})",
+                data=_formatToolFileResult(
+                    fileItem=fileItem,
+                    chatDocId=chatDocId,
+                    actionLabel="Chart saved as",
+                    extraInfo=f"Embed in documents with: ![{title}](file:{fid})",
+                ),
                sideEvents=sideEvents,
            )

@ -764,7 +793,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
            "Create a data chart/graph as a PNG image using matplotlib. "
            "Supported types: bar, horizontalBar, line, area, scatter, pie, donut. "
            "The chart is saved as a file in the workspace. "
-            "Use the returned fileId to embed in documents via renderDocument: ![title](file:fileId). "
+            "Use the returned `file id: <fileId>` to embed in documents via "
+            "renderDocument: ![title](file:fileId). The result line also carries "
+            "`doc ref: docItem:<chatDocId>` -- use it inside `documentList` for "
+            "downstream AI tools that need the chart as a data source. "
            "Provide structured data with labels and datasets."
        ),
        parameters={
--- a/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
+++ b/modules/serviceCenter/services/serviceAgent/coreTools/_workspaceTools.py
@ -9,6 +9,8 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
 from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry

 from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
+    _attachFileAsChatDocument,
+    _formatToolFileResult,
    _getOrCreateInstanceFolder,
    _getOrCreateTempFolder,
    _looksLikeBinary,
@ -428,9 +430,19 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
                    dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId})
            if args.get("tags"):
                dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
+
+            chatDocId = _attachFileAsChatDocument(
+                services, fileItem,
+                label=f"writeFile:{name}",
+                userMessage=f"Created {name} via writeFile",
+            )
            return ToolResult(
                toolCallId="", toolName="writeFile", success=True,
-                data=f"File '{name}' created (id: {fileItem.id})",
+                data=_formatToolFileResult(
+                    fileItem=fileItem,
+                    chatDocId=chatDocId,
+                    actionLabel="Created",
+                ),
                sideEvents=[{
                    "type": "fileCreated",
                    "data": {
@ -573,7 +585,11 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
            "- create (default): create a new file (name required).\n"
            "- append: append content to an existing file (fileId required). "
            "Use for large content that exceeds a single tool call (~8000 chars per call).\n"
-            "- overwrite: replace entire file content (fileId required)."
+            "- overwrite: replace entire file content (fileId required).\n"
+            "On `mode=create` the result line contains BOTH a `documentList ref: docItem:<chatDocId>` "
+            "(use this for documentList parameters of `ai_process` / `ai_summarizeDocument` / "
+            "`context_extractContent` etc., always as the literal string `docItem:<id>`) AND a "
+            "`file id: <fileId>` (use this for `readFile`, `writeFile mode=append`, image embeds)."
        ),
        parameters={
            "type": "object",
--- a/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
+++ b/modules/serviceCenter/services/serviceAgent/mainServiceAgent.py
@ -178,6 +178,33 @@ class AgentService:
        if workflowId is None:
            workflowId = getattr(self.services.workflow, "id", "unknown") if self.services.workflow else "unknown"

+        # Propagate the active workflow into every service's request
+        # context so agent-tool side effects (e.g. _attachFileAsChatDocument
+        # for downloadFromDataSource / writeFile / renderDocument) can
+        # bind their FileItem outputs to the workflow as ChatDocuments.
+        # Without this, chatService._workflow (= chatService._context.workflow)
+        # stays None and the documentList resolver finds zero docs --
+        # which is exactly the "Building structure prompt with 0 valid
+        # ContentParts" symptom we see when the workspace route calls
+        # runAgent for an attached single-file data source.
+        # Mirrors workflowManager._propagateWorkflowToContext.
+        if workflowId and workflowId != "unknown":
+            try:
+                workflow = getattr(self.services, "workflow", None)
+                if workflow is None or getattr(workflow, "id", None) != workflowId:
+                    workflow = self.services.chat.getWorkflow(workflowId)
+                if workflow is not None:
+                    self.services.workflow = workflow
+                    ctx = getattr(self.services, "_service_context", None)
+                    if ctx is not None:
+                        ctx.workflow = workflow
+                    for attr in ("chat", "ai", "extraction", "sharepoint", "clickup", "utils", "billing", "generation"):
+                        svc = getattr(self.services, attr, None)
+                        if svc is not None and hasattr(svc, "_context") and svc._context is not None:
+                            svc._context.workflow = workflow
+            except Exception as e:
+                logger.warning(f"runAgent: could not propagate workflow {workflowId} into service contexts: {e}")
+
        resolvedLanguage = userLanguage or ""

        enrichedPrompt = await self._enrichPromptWithFiles(prompt, fileIds)
--- a/modules/serviceCenter/services/serviceChat/mainServiceChat.py
+++ b/modules/serviceCenter/services/serviceChat/mainServiceChat.py
@ -463,36 +463,38 @@ class ChatService:
        Returns:
            List of file info dicts.
        """
+        # `getAllFiles` returns `List[dict]` (each entry is a
+        # `FileItem.model_dump()` enriched with label columns) -- not
+        # Pydantic objects -- so we use dict-access throughout.
        allFiles = self.interfaceDbComponent.getAllFiles()
        results = []
        for fileItem in allFiles:
            if folderId is not None:
-                itemFolderId = getattr(fileItem, "folderId", None)
-                if itemFolderId != folderId:
+                if fileItem.get("folderId") != folderId:
                    continue

            if tags:
-                itemTags = getattr(fileItem, "tags", None) or []
+                itemTags = fileItem.get("tags") or []
                if not any(t in itemTags for t in tags):
                    continue

            if search:
                searchLower = search.lower()
-                nameMatch = searchLower in (fileItem.fileName or "").lower()
-                descMatch = searchLower in (getattr(fileItem, "description", None) or "").lower()
+                nameMatch = searchLower in (fileItem.get("fileName") or "").lower()
+                descMatch = searchLower in (fileItem.get("description") or "").lower()
                if not nameMatch and not descMatch:
                    continue

            results.append({
-                "id": fileItem.id,
-                "fileName": fileItem.fileName,
-                "mimeType": fileItem.mimeType,
-                "fileSize": fileItem.fileSize,
-                "creationDate": fileItem.sysCreatedAt,
-                "tags": getattr(fileItem, "tags", None),
-                "folderId": getattr(fileItem, "folderId", None),
-                "description": getattr(fileItem, "description", None),
-                "status": getattr(fileItem, "status", None),
+                "id": fileItem.get("id"),
+                "fileName": fileItem.get("fileName"),
+                "mimeType": fileItem.get("mimeType"),
+                "fileSize": fileItem.get("fileSize"),
+                "creationDate": fileItem.get("sysCreatedAt"),
+                "tags": fileItem.get("tags"),
+                "folderId": fileItem.get("folderId"),
+                "description": fileItem.get("description"),
+                "status": fileItem.get("status"),
            })
        return results

--- a/modules/workflows/methods/methodAi/actions/process.py
+++ b/modules/workflows/methods/methodAi/actions/process.py
@ -100,12 +100,18 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
        # Update progress - preparing parameters
        self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
        
-        from modules.datamodels.datamodelDocref import DocumentReferenceList
+        from modules.datamodels.datamodelDocref import (
+            DocumentReferenceList,
+            coerceDocumentReferenceList,
+        )

        documentListParam = parameters.get("documentList")
        inline_content_parts: Optional[List[ContentPart]] = None

-        # Handle inline ActionDocuments (e.g. from SharePoint/email in automation2 – no persistence)
+        # Inline ActionDocuments (SharePoint/email in automation2, no
+        # persistence) are list[ActionDocument-like dict] -- handled
+        # separately because they carry pre-extracted content. Everything
+        # else is normalised through the tolerant coercer.
        is_inline = (
            isinstance(documentListParam, list)
            and len(documentListParam) > 0
@ -117,28 +123,12 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
            logger.info(
                f"ai.process: Extracted {len(inline_content_parts)} ContentParts from {len(documentListParam)} inline ActionDocuments (no persistence)"
            )
-        elif documentListParam is None:
-            documentList = DocumentReferenceList(references=[])
-            logger.debug(f"ai.process: documentList is None, using empty DocumentReferenceList")
-        elif isinstance(documentListParam, DocumentReferenceList):
-            documentList = documentListParam
-            logger.info(f"ai.process: Received DocumentReferenceList with {len(documentList.references)} references")
-            for idx, ref in enumerate(documentList.references):
-                logger.info(f"  Reference {idx + 1}: documentId={ref.documentId}, type={type(ref).__name__}")
-        elif isinstance(documentListParam, str):
-            documentList = DocumentReferenceList.from_string_list([documentListParam])
-            logger.info(f"ai.process: Converted string to DocumentReferenceList with {len(documentList.references)} references")
-        elif isinstance(documentListParam, list):
-            first = documentListParam[0] if documentListParam else None
-            logger.info(
-                f"ai.process: documentList is list of {len(documentListParam)} items, "
-                f"first type={type(first).__name__}, has_documentData={_is_action_document_like(first) if first else False}"
-            )
-            documentList = DocumentReferenceList.from_string_list(documentListParam)
-            logger.info(f"ai.process: Converted list to DocumentReferenceList with {len(documentList.references)} references")
        else:
-            logger.error(f"Invalid documentList type: {type(documentListParam)}")
-            documentList = DocumentReferenceList(references=[])
+            documentList = coerceDocumentReferenceList(documentListParam)
+            logger.info(
+                f"ai.process: Coerced documentList ({type(documentListParam).__name__}) "
+                f"to DocumentReferenceList with {len(documentList.references)} references"
+            )
        
        # Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
        resultType = parameters.get("resultType")
--- a/modules/workflows/methods/methodContext/actions/extractContent.py
+++ b/modules/workflows/methods/methodContext/actions/extractContent.py
@ -5,7 +5,10 @@ import logging
 import time
 from typing import Dict, Any
 from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelDocref import DocumentReferenceList
+from modules.datamodels.datamodelDocref import (
+    DocumentReferenceList,
+    coerceDocumentReferenceList,
+)
 from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy

 logger = logging.getLogger(__name__)
@ -16,20 +19,17 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
        workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
        operationId = f"context_extract_{workflowId}_{int(time.time())}"
        
-        # Extract documentList from parameters dict
        documentListParam = parameters.get("documentList")
        if not documentListParam:
            return ActionResult.isFailure(error="documentList is required")

-        # Convert to DocumentReferenceList if needed
-        if isinstance(documentListParam, DocumentReferenceList):
-            documentList = documentListParam
-        elif isinstance(documentListParam, str):
-            documentList = DocumentReferenceList.from_string_list([documentListParam])
-        elif isinstance(documentListParam, list):
-            documentList = DocumentReferenceList.from_string_list(documentListParam)
-        else:
-            return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
+        documentList = coerceDocumentReferenceList(documentListParam)
+        if not documentList.references:
+            return ActionResult.isFailure(
+                error=f"documentList could not be parsed (type={type(documentListParam).__name__}); "
+                      f"expected DocumentReferenceList, list of strings/dicts, or "
+                      f"a wrapper dict like {{'documents': [...]}}"
+            )
        
        # Start progress tracking
        parentOperationId = parameters.get('parentOperationId')
--- a/modules/workflows/methods/methodContext/actions/neutralizeData.py
+++ b/modules/workflows/methods/methodContext/actions/neutralizeData.py
@ -5,7 +5,10 @@ import logging
 import time
 from typing import Dict, Any
 from modules.datamodels.datamodelChat import ActionResult, ActionDocument
-from modules.datamodels.datamodelDocref import DocumentReferenceList
+from modules.datamodels.datamodelDocref import (
+    DocumentReferenceList,
+    coerceDocumentReferenceList,
+)
 from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart

 logger = logging.getLogger(__name__)
@ -26,20 +29,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
        if not neutralizationEnabled:
            logger.info("Neutralization is not enabled, returning documents unchanged")
            # Return original documents if neutralization is disabled
-            # Get documents from documentList
            documentListParam = parameters.get("documentList")
            if not documentListParam:
                return ActionResult.isFailure(error="documentList is required")

-            # Convert to DocumentReferenceList if needed
-            if isinstance(documentListParam, DocumentReferenceList):
-                documentList = documentListParam
-            elif isinstance(documentListParam, str):
-                documentList = DocumentReferenceList.from_string_list([documentListParam])
-            elif isinstance(documentListParam, list):
-                documentList = DocumentReferenceList.from_string_list(documentListParam)
-            else:
-                return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
+            documentList = coerceDocumentReferenceList(documentListParam)
+            if not documentList.references:
+                return ActionResult.isFailure(
+                    error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
+                )
            
            # Get ChatDocuments from documentList
            chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
@ -65,20 +63,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
            
            return ActionResult.isSuccess(documents=actionDocuments)
        
-        # Extract documentList from parameters dict
        documentListParam = parameters.get("documentList")
        if not documentListParam:
            return ActionResult.isFailure(error="documentList is required")

-        # Convert to DocumentReferenceList if needed
-        if isinstance(documentListParam, DocumentReferenceList):
-            documentList = documentListParam
-        elif isinstance(documentListParam, str):
-            documentList = DocumentReferenceList.from_string_list([documentListParam])
-        elif isinstance(documentListParam, list):
-            documentList = DocumentReferenceList.from_string_list(documentListParam)
-        else:
-            return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
+        documentList = coerceDocumentReferenceList(documentListParam)
+        if not documentList.references:
+            return ActionResult.isFailure(
+                error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
+            )
        
        # Start progress tracking
        parentOperationId = parameters.get('parentOperationId')