wired infomaniac to ai adapters and tools

This commit is contained in:
ValueOn AG 2026-04-29 01:52:47 +02:00
parent 49f3660d89
commit 052647a52b
13 changed files with 488 additions and 114 deletions

View file

@ -234,6 +234,21 @@ async def listAccessibleDrives(token: str) -> List[Dict[str, Any]]:
return [d for d in drives if isinstance(d, dict) and d.get("id")] return [d for d in drives if isinstance(d, dict) and d.get("id")]
def _lastNumericSegment(segments: List[str]) -> Optional[str]:
"""Return the last all-digit segment (kDrive file/folder IDs are int).
The agent sometimes appends the human-readable filename to a path,
e.g. ``/2980592/12/platform-overview.html``. The kDrive API does
not accept names -- only numeric IDs -- so we strip trailing
non-numeric segments and pick the last integer ID.
Returns ``None`` if no numeric segment exists.
"""
for seg in reversed(segments):
if seg.isdigit():
return seg
return None
class KdriveAdapter(ServiceAdapter): class KdriveAdapter(ServiceAdapter):
"""kDrive ServiceAdapter -- browse drives, folders, files. """kDrive ServiceAdapter -- browse drives, folders, files.
@ -246,6 +261,14 @@ class KdriveAdapter(ServiceAdapter):
The drive list is cached on the adapter instance so each browse The drive list is cached on the adapter instance so each browse
pays for one ``/2/drive/init`` call at most. pays for one ``/2/drive/init`` call at most.
File-vs-folder handling: a DataSource may point at a single file
(e.g. ``/{driveId}/{fileId}`` where ``fileId`` is a regular file).
Calling ``/files/{fileId}/files`` on a file answers
``400 destination_not_a_directory`` -- so :meth:`browse` first
fetches the item's metadata and, if ``type=file``, returns a
one-element list describing the file itself instead of pretending
the directory is empty.
""" """
def __init__(self, accessToken: str): def __init__(self, accessToken: str):
@ -258,6 +281,14 @@ class KdriveAdapter(ServiceAdapter):
self._drives = await listAccessibleDrives(self._token) self._drives = await listAccessibleDrives(self._token)
return self._drives return self._drives
async def _fetchItemMeta(self, driveId: str, fileId: str) -> Optional[Dict[str, Any]]:
"""Return the kDrive file/folder metadata dict, or ``None`` on error."""
meta = await _infomaniakGet(self._token, f"/2/drive/{driveId}/files/{fileId}")
if not isinstance(meta, dict) or meta.get("error"):
return None
data = _unwrapData(meta)
return data if isinstance(data, dict) else None
async def browse( async def browse(
self, self,
path: str, path: str,
@ -274,7 +305,21 @@ class KdriveAdapter(ServiceAdapter):
if len(segments) == 1: if len(segments) == 1:
return await self._listChildren(driveId, fileId=None, limit=limit) return await self._listChildren(driveId, fileId=None, limit=limit)
fileId = segments[-1] fileId = _lastNumericSegment(segments[1:])
if fileId is None:
return []
meta = await self._fetchItemMeta(driveId, fileId)
if meta is not None and meta.get("type") == "file":
return [ExternalEntry(
name=meta.get("name") or fileId,
path=f"/{driveId}/{fileId}",
isFolder=False,
size=meta.get("size"),
mimeType=meta.get("mime_type"),
lastModified=meta.get("last_modified_at"),
metadata={"id": fileId, "kind": "file"},
)]
return await self._listChildren(driveId, fileId=fileId, limit=limit) return await self._listChildren(driveId, fileId=fileId, limit=limit)
async def _listDrives(self) -> List[ExternalEntry]: async def _listDrives(self) -> List[ExternalEntry]:
@ -341,16 +386,16 @@ class KdriveAdapter(ServiceAdapter):
segments = [s for s in (path or "").strip("/").split("/") if s] segments = [s for s in (path or "").strip("/").split("/") if s]
if len(segments) < 2: if len(segments) < 2:
return DownloadResult() return DownloadResult()
driveId, fileId = segments[0], segments[-1] driveId = segments[0]
# Agent may append the filename: ``/{driveId}/{fileId}/{name}``.
# Pull the last numeric segment instead of trusting segments[-1].
fileId = _lastNumericSegment(segments[1:])
if fileId is None:
return DownloadResult()
meta = await _infomaniakGet(self._token, f"/2/drive/{driveId}/files/{fileId}") meta = await self._fetchItemMeta(driveId, fileId)
fileName = fileId fileName = (meta or {}).get("name") or fileId
mimeType = "application/octet-stream" mimeType = (meta or {}).get("mime_type") or "application/octet-stream"
if isinstance(meta, dict) and not meta.get("error"):
data = _unwrapData(meta)
if isinstance(data, dict):
fileName = data.get("name") or fileId
mimeType = data.get("mime_type") or mimeType
content = await _infomaniakDownload( content = await _infomaniakDownload(
self._token, f"/2/drive/{driveId}/files/{fileId}/download" self._token, f"/2/drive/{driveId}/files/{fileId}/download"

View file

@ -4,10 +4,13 @@
Document reference models for typed document references in workflows. Document reference models for typed document references in workflows.
""" """
from typing import List, Optional import logging
from typing import Any, List, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from modules.shared.i18nRegistry import i18nModel from modules.shared.i18nRegistry import i18nModel
logger = logging.getLogger(__name__)
class DocumentReference(BaseModel): class DocumentReference(BaseModel):
"""Base class for document references""" """Base class for document references"""
@ -115,3 +118,86 @@ class DocumentReferenceList(BaseModel):
references.append(DocumentListReference(label=refStr)) references.append(DocumentListReference(label=refStr))
return cls(references=references) return cls(references=references)
def coerceDocumentReferenceList(value: Any) -> DocumentReferenceList:
"""Tolerant coercion of any agent/UI-supplied document list to
:class:`DocumentReferenceList`.
Accepts the canonical formats plus the dict-wrapper shapes that
LLM tool-callers tend to generate when they see a
``type=DocumentList`` parameter:
* ``None`` / ``""`` -> empty list
* :class:`DocumentReferenceList` -> as-is
* ``str`` -> single-element string list
* ``list[str]`` -> :meth:`from_string_list`
* ``list[dict]`` with ``id`` or ``documentId`` -> item references
* ``{"documents": [...]}`` / ``{"references": [...]}`` ->
recurse into the inner list (this is the shape LLMs love)
* ``{"id": "..."}`` / ``{"documentId": "..."}`` -> single
item reference
* any unrecognised input -> empty list with a WARN log; never
raises (the caller decides whether an empty list is fatal).
"""
if value is None or value == "":
return DocumentReferenceList(references=[])
if isinstance(value, DocumentReferenceList):
return value
if isinstance(value, str):
return DocumentReferenceList.from_string_list([value])
if isinstance(value, dict):
for innerKey in ("documents", "references", "items", "files"):
if innerKey in value and isinstance(value[innerKey], list):
return coerceDocumentReferenceList(value[innerKey])
docId = value.get("documentId") or value.get("id")
if docId:
return DocumentReferenceList(references=[
DocumentItemReference(
documentId=str(docId),
fileName=value.get("fileName") or value.get("name"),
)
])
logger.warning(
f"coerceDocumentReferenceList: unsupported dict shape "
f"(keys={list(value.keys())}); returning empty list."
)
return DocumentReferenceList(references=[])
if isinstance(value, list):
if not value:
return DocumentReferenceList(references=[])
first = value[0]
if isinstance(first, str):
return DocumentReferenceList.from_string_list(value)
if isinstance(first, dict):
references: List[DocumentReference] = []
for item in value:
if not isinstance(item, dict):
continue
docId = item.get("documentId") or item.get("id")
if docId:
references.append(DocumentItemReference(
documentId=str(docId),
fileName=item.get("fileName") or item.get("name"),
))
elif item.get("label"):
references.append(DocumentListReference(
label=str(item["label"]),
messageId=item.get("messageId"),
))
return DocumentReferenceList(references=references)
# Mixed/object list (e.g. inline ActionDocument-like): caller
# must pre-handle that case before calling this coercer.
logger.warning(
f"coerceDocumentReferenceList: list element type "
f"{type(first).__name__} not recognised; returning empty list."
)
return DocumentReferenceList(references=[])
logger.warning(
f"coerceDocumentReferenceList: unsupported value type "
f"{type(value).__name__}; returning empty list."
)
return DocumentReferenceList(references=[])

View file

@ -836,13 +836,25 @@ class ComponentObjects:
def checkForDuplicateFile(self, fileHash: str, fileName: str) -> Optional[FileItem]: def checkForDuplicateFile(self, fileHash: str, fileName: str) -> Optional[FileItem]:
"""Checks if a file with the same hash AND fileName already exists for the current user """Checks if a file with the same hash AND fileName already exists for the current user
**within the same scope** (mandateId + featureInstanceId). **within the same scope** (mandateId + featureInstanceId).
Duplicate = same user + same fileHash + same fileName + same scope. Duplicate = same user + same fileHash + same fileName + same scope + RBAC-visible.
Same hash with different name is allowed (intentional copy by user). Same hash with different name is allowed (intentional copy by user).
RBAC parity contract: this method must NEVER return a FileItem that
``getFile()`` would not return for the current user. Otherwise callers
(``saveUploadedFile`` / ``createFile``) hand back an id that the very
next ``updateFile`` / ``getFile`` then rejects with
``File with ID ... not found`` -- the well-known "ghost duplicate"
symptom seen when ``interfaceDbComponent`` is initialised without an
``featureInstanceId`` (e.g. via ``serviceHub``) but a same-hash+name
file exists in another featureInstance under the same mandate.
We therefore cross-check the candidate through the RBAC-aware ``getFile``
before returning it; if RBAC blocks it, we treat it as "no duplicate
for this scope" and the caller will create a fresh per-scope copy.
""" """
if not self.userId: if not self.userId:
return None return None
recordFilter: dict = { recordFilter: dict = {
"sysCreatedBy": self.userId, "sysCreatedBy": self.userId,
"fileHash": fileHash, "fileHash": fileHash,
@ -857,10 +869,10 @@ class ComponentObjects:
FileItem, FileItem,
recordFilter=recordFilter, recordFilter=recordFilter,
) )
if not matchingFiles: if not matchingFiles:
return None return None
file = matchingFiles[0] file = matchingFiles[0]
fileId = file["id"] fileId = file["id"]
@ -869,16 +881,17 @@ class ComponentObjects:
logger.warning(f"Duplicate FileItem {fileId} found but FileData missing — treating as new file") logger.warning(f"Duplicate FileItem {fileId} found but FileData missing — treating as new file")
return None return None
return FileItem( rbacVisible = self.getFile(fileId)
id=fileId, if rbacVisible is None:
mandateId=file.get("mandateId", ""), logger.info(
featureInstanceId=file.get("featureInstanceId", ""), f"Duplicate FileItem {fileId} ('{fileName}', hash {fileHash[:12]}...) found via "
fileName=file["fileName"], f"sysCreatedBy+hash+name match but is not RBAC-visible in current scope "
mimeType=file["mimeType"], f"(mandateId={self.mandateId or '-'}, featureInstanceId={self.featureInstanceId or '-'}). "
fileHash=file["fileHash"], f"Treating as no-duplicate so a fresh per-scope copy gets created."
fileSize=file["fileSize"], )
sysCreatedAt=file.get("sysCreatedAt"), return None
)
return rbacVisible
# Class-level cache — built once from the ExtractorRegistry # Class-level cache — built once from the ExtractorRegistry
_extensionToMime: Optional[Dict[str, str]] = None _extensionToMime: Optional[Dict[str, str]] = None

View file

@ -392,6 +392,18 @@ def buildSystemPrompt(
"- Prefer modular file structures over monolithic files.\n" "- Prefer modular file structures over monolithic files.\n"
"- When generating applications, create separate files for logical components.\n" "- When generating applications, create separate files for logical components.\n"
"- Always plan the structure before writing code.\n\n" "- Always plan the structure before writing code.\n\n"
"### Document references for AI tools (CRITICAL)\n"
"Tools that produce a file (`downloadFromDataSource`, `writeFile mode=create`, "
"`renderDocument`, `generateImage`, `createChart`) return a result line with TWO ids:\n"
"- `documentList ref: docItem:<chatDocId>` — pass this STRING VERBATIM as an entry of "
" `documentList` for `ai_process`, `ai_summarizeDocument`, `context_extractContent`, "
" `context_neutralizeData`, etc. Always as the literal `docItem:<id>` — do NOT wrap "
" in `{\"documents\":[{\"id\":...}]}` and do NOT use the file id here, the documentList "
" resolver only matches `docItem:` references.\n"
"- `file id: <fileId>` — use for `readFile`, `searchInFileContent`, `writeFile mode=append`, "
" and image embeds (`![alt](file:<fileId>)`).\n"
"Example: after `downloadFromDataSource` returns `docItem:abc123`, call "
"`ai_summarizeDocument(documentList=[\"docItem:abc123\"], summaryLength=\"medium\")`.\n\n"
) )
if toolsFormatted: if toolsFormatted:

View file

@ -9,7 +9,9 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import ( from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_attachFileAsChatDocument,
_buildResolverDbFromServices, _buildResolverDbFromServices,
_formatToolFileResult,
_getOrCreateTempFolder, _getOrCreateTempFolder,
_looksLikeBinary, _looksLikeBinary,
_resolveFileScope, _resolveFileScope,
@ -231,11 +233,27 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
tempFolderId = _getOrCreateTempFolder(chatService) tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId: if tempFolderId:
chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId}) chatService.interfaceDbComponent.updateFile(fileItem.id, {"folderId": tempFolderId})
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"datasource:{dsId or directService or 'download'}",
userMessage=f"Downloaded {fileName} from external data source",
)
ext = fileName.rsplit(".", 1)[-1].lower() if "." in fileName else "" ext = fileName.rsplit(".", 1)[-1].lower() if "." in fileName else ""
hint = "Use readFile to read the text content." if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg") else "Use readFile to access the content." hint = (
"Use readFile to read the text content."
if ext in ("doc", "docx", "txt", "csv", "json", "xml", "html", "md", "rtf", "odt", "xls", "xlsx", "pptx", "pdf", "eml", "msg")
else "Use readFile to access the content."
)
return ToolResult( return ToolResult(
toolCallId="", toolName="downloadFromDataSource", success=True, toolCallId="", toolName="downloadFromDataSource", success=True,
data=f"Downloaded '{fileName}' ({len(fileBytes)} bytes) → local file id: {fileItem.id}. {hint}" data=_formatToolFileResult(
fileItem=fileItem,
chatDocId=chatDocId,
actionLabel="Downloaded",
extraInfo=hint,
),
) )
except Exception as e: except Exception as e:
return ToolResult(toolCallId="", toolName="downloadFromDataSource", success=False, error=str(e)) return ToolResult(toolCallId="", toolName="downloadFromDataSource", success=False, error=str(e))
@ -308,8 +326,15 @@ def _registerDataSourceTools(registry: ToolRegistry, services):
registry.register( registry.register(
"downloadFromDataSource", _downloadFromDataSource, "downloadFromDataSource", _downloadFromDataSource,
description=( description=(
"Download a file or email from a data source into local storage. Returns a local file ID " "Download a file or email from a data source into local storage. "
"to read with readFile. Accepts either dataSourceId OR connectionId+service. " "The result line contains TWO ids you must use for different purposes:\n"
" - `documentList ref: docItem:<chatDocId>` -- pass this string verbatim "
" inside the `documentList` parameter of `ai_process`, "
" `ai_summarizeDocument`, `context_extractContent`, `context_neutralizeData`, etc. "
" Always use the `docItem:<chatDocId>` form, NOT the file id, NOT a `{\"documents\":[{\"id\":...}]}` "
" wrapper -- the documentList resolver only matches `docItem:` references against the workflow.\n"
" - `file id: <fileId>` -- pass this to `readFile`, `searchInFileContent`, image embeds (`file:<fileId>`).\n"
"Accepts either dataSourceId OR connectionId+service. "
"For email sources (Outlook, Gmail), browse/search only return subjects -- use this to get full content." "For email sources (Outlook, Gmail), browse/search only return subjects -- use this to get full content."
), ),
parameters={ parameters={

View file

@ -3,7 +3,8 @@
"""Shared helpers for core agent tools (file scope, binary detection, temp folder).""" """Shared helpers for core agent tools (file scope, binary detection, temp folder)."""
import logging import logging
from typing import Any, Optional import uuid
from typing import Any, Dict, Optional, Tuple
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -78,6 +79,138 @@ def _getOrCreateTempFolder(chatService) -> Optional[str]:
return None return None
def _attachFileAsChatDocument(
services: Any,
fileItem: Any,
*,
label: str = "agent_tool_output",
userMessage: str = "",
role: str = "assistant",
) -> Optional[str]:
"""Bind a persisted FileItem to the active workflow as a ChatDocument.
This is the **single canonical bridge** between agent-tool-produced
artefacts and the workflow's document model. Mirrors the pattern
used by workflow actions (``workflowProcessor.persistTaskResult`` /
``methodTrustee.extractFromFiles``): every artefact a workflow step
-- including agent tools -- materialises ends up addressable via
``docItem:<chatDocId>`` so downstream tools that consume
``documentList`` can resolve it against
``workflow.messages[*].documents[*].id``.
Without this bind the agent's ``downloadFromDataSource`` /
``writeFile(create)`` / ``renderDocument`` / ``generateImage`` /
``createChart`` outputs are FileItem-only and unreachable from
``getChatDocumentsFromDocumentList`` -- the symptom is
``ai_summarizeDocument`` etc. running with 0 ContentParts.
Args:
services: agent-tool services container (must expose ``.chat``).
fileItem: persisted FileItem (Pydantic obj or dict) returned
from ``saveUploadedFile`` / ``createFile`` /
``saveGeneratedFile``.
label: ``documentsLabel`` for the carrier ChatMessage --
picked up by ``docList:<label>`` references.
userMessage: optional human-readable message text.
role: ``"assistant"`` (default) or ``"tool"``; affects only
display semantics, not resolution.
Returns:
The new ``ChatDocument.id`` on success, or ``None`` when no
active workflow is bound to the chat service (e.g. standalone
agent calls outside a chat workflow). Never raises.
"""
try:
chatService = services.chat
workflow = getattr(chatService, "_workflow", None)
if not workflow or not getattr(workflow, "id", None):
return None
if isinstance(fileItem, dict):
fileId = fileItem.get("id")
fileName = fileItem.get("fileName")
fileSize = fileItem.get("fileSize") or 0
mimeType = fileItem.get("mimeType") or "application/octet-stream"
else:
fileId = getattr(fileItem, "id", None)
fileName = getattr(fileItem, "fileName", None)
fileSize = getattr(fileItem, "fileSize", None) or 0
mimeType = getattr(fileItem, "mimeType", None) or "application/octet-stream"
if not fileId:
logger.warning("_attachFileAsChatDocument: fileItem has no id, skipping bind.")
return None
chatDoc: Dict[str, Any] = {
"id": str(uuid.uuid4()),
"fileId": fileId,
"fileName": fileName or fileId,
"fileSize": fileSize,
"mimeType": mimeType,
"roundNumber": getattr(workflow, "currentRound", None),
"taskNumber": getattr(workflow, "currentTask", None),
"actionNumber": getattr(workflow, "currentAction", None),
}
messageData: Dict[str, Any] = {
"id": f"msg_tool_{uuid.uuid4().hex[:12]}",
"role": role,
"status": "step",
"message": userMessage or f"Tool result: {fileName or fileId}",
"documentsLabel": label,
}
createdMessage = chatService.storeMessageWithDocuments(
workflow, messageData, [chatDoc],
)
if not createdMessage or not getattr(createdMessage, "documents", None):
return None
return createdMessage.documents[0].id
except Exception as e:
logger.warning(f"_attachFileAsChatDocument failed (fileItem id={getattr(fileItem, 'id', None) or (fileItem.get('id') if isinstance(fileItem, dict) else '?')}): {e}")
return None
def _formatToolFileResult(
*,
fileItem: Any,
chatDocId: Optional[str],
actionLabel: str = "Created",
extraInfo: str = "",
) -> str:
"""Render the canonical agent-tool file result message.
Always presents BOTH ids the agent needs:
* ``docItem:<chatDocId>`` -- use as ``documentList`` entry for
tools like ``ai_process`` / ``ai_summarizeDocument`` /
``context_extractContent`` (resolved through ChatDocument).
* ``file id: <fileItem.id>`` -- use as ``fileId`` for direct
reads via ``readFile`` / ``downloadFile`` / image embedding
(``file:<fileItem.id>``).
When no active workflow is bound, ``chatDocId`` is ``None`` and
only the file-id line is shown -- the file is still usable for
direct reads, just not for ``documentList`` references (those
require a workflow context anyway).
"""
if isinstance(fileItem, dict):
fileId = fileItem.get("id", "?")
fileName = fileItem.get("fileName", "")
fileSize = fileItem.get("fileSize", 0)
else:
fileId = getattr(fileItem, "id", "?")
fileName = getattr(fileItem, "fileName", "")
fileSize = getattr(fileItem, "fileSize", 0)
head = f"{actionLabel} '{fileName}' ({fileSize} bytes)" if fileName else f"{actionLabel} file ({fileSize} bytes)"
parts = [head]
if chatDocId:
parts.append(f" documentList ref: docItem:{chatDocId}")
parts.append(f" file id: {fileId}")
if extraInfo:
parts.append(extraInfo)
return "\n".join(parts)
def _buildResolverDbFromServices(services: Any): def _buildResolverDbFromServices(services: Any):
"""DB adapter for ConnectorResolver: load UserConnections by id. """DB adapter for ConnectorResolver: load UserConnections by id.

View file

@ -9,6 +9,8 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import ( from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_attachFileAsChatDocument,
_formatToolFileResult,
_getOrCreateTempFolder, _getOrCreateTempFolder,
_looksLikeBinary, _looksLikeBinary,
_resolveFileScope, _resolveFileScope,
@ -316,7 +318,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
tempFolderId = _getOrCreateTempFolder(chatService) tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId: if tempFolderId:
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId}) chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
savedFiles.append(f"- {docName} (id: {fid})") chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"renderDocument:{docName}",
userMessage=f"Rendered document {docName}",
)
refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
sideEvents.append({ sideEvents.append({
"type": "fileCreated", "type": "fileCreated",
"data": { "data": {
@ -340,7 +348,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
"Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). " "Render markdown into a document file (PDF, DOCX, XLSX, PPTX, CSV, HTML, MD, JSON, TXT). "
"For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with " "For long documents: write markdown with writeFile (mode=create then append chunks), then call this tool with "
"`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. " "`sourceFileId` only (tiny JSON — avoids model output truncation). For short docs you may pass `content` inline. "
"Images: ![alt text](file:fileId) in the markdown." "Images: ![alt text](file:fileId) in the markdown. "
"Each rendered file's result line contains `file id: <fileId>` (for embeds / readFile) AND "
"`doc ref: docItem:<chatDocId>` -- pass the latter inside `documentList` of subsequent "
"`ai_process` / `ai_summarizeDocument` / `context_extractContent` calls."
), ),
parameters={ parameters={
"type": "object", "type": "object",
@ -588,7 +599,13 @@ def _registerMediaTools(registry: ToolRegistry, services):
tempFolderId = _getOrCreateTempFolder(chatService) tempFolderId = _getOrCreateTempFolder(chatService)
if tempFolderId: if tempFolderId:
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId}) chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
savedFiles.append(f"- {docName} (id: {fid})") chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"generateImage:{docName}",
userMessage=f"Generated image {docName}",
)
refSuffix = f", doc ref: docItem:{chatDocId}" if chatDocId else ""
savedFiles.append(f"- {docName} (file id: {fid}{refSuffix})")
sideEvents.append({ sideEvents.append({
"type": "fileCreated", "type": "fileCreated",
"data": { "data": {
@ -612,7 +629,9 @@ def _registerMediaTools(registry: ToolRegistry, services):
"Generate an image from a text description using AI (DALL-E). " "Generate an image from a text description using AI (DALL-E). "
"The generated image is saved as a file in the workspace. " "The generated image is saved as a file in the workspace. "
"Use this when the user asks to create, generate, draw, or design an image, illustration, icon, logo, diagram, or any visual content. " "Use this when the user asks to create, generate, draw, or design an image, illustration, icon, logo, diagram, or any visual content. "
"Provide a detailed, descriptive prompt for best results." "Provide a detailed, descriptive prompt for best results. "
"Each image's result line carries `file id: <fileId>` (for embeds / readFile) and "
"`doc ref: docItem:<chatDocId>` (use inside `documentList` for downstream AI tools)."
), ),
parameters={ parameters={
"type": "object", "type": "object",
@ -743,14 +762,24 @@ def _registerMediaTools(registry: ToolRegistry, services):
if tempFolderId and fid != "?": if tempFolderId and fid != "?":
chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId}) chatService.interfaceDbComponent.updateFile(fid, {"folderId": tempFolderId})
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"createChart:{fileName}",
userMessage=f"Created chart {fileName}",
)
sideEvents = [{"type": "fileCreated", "data": { sideEvents = [{"type": "fileCreated", "data": {
"fileId": fid, "fileName": fileName, "fileId": fid, "fileName": fileName,
"mimeType": "image/png", "fileSize": len(pngData), "mimeType": "image/png", "fileSize": len(pngData),
}}] }}]
return ToolResult( return ToolResult(
toolCallId="", toolName="createChart", success=True, toolCallId="", toolName="createChart", success=True,
data=f"Chart saved as '{fileName}' (id: {fid}, {len(pngData)} bytes). " data=_formatToolFileResult(
f"Embed in documents with: ![{title}](file:{fid})", fileItem=fileItem,
chatDocId=chatDocId,
actionLabel="Chart saved as",
extraInfo=f"Embed in documents with: ![{title}](file:{fid})",
),
sideEvents=sideEvents, sideEvents=sideEvents,
) )
@ -764,7 +793,10 @@ def _registerMediaTools(registry: ToolRegistry, services):
"Create a data chart/graph as a PNG image using matplotlib. " "Create a data chart/graph as a PNG image using matplotlib. "
"Supported types: bar, horizontalBar, line, area, scatter, pie, donut. " "Supported types: bar, horizontalBar, line, area, scatter, pie, donut. "
"The chart is saved as a file in the workspace. " "The chart is saved as a file in the workspace. "
"Use the returned fileId to embed in documents via renderDocument: ![title](file:fileId). " "Use the returned `file id: <fileId>` to embed in documents via "
"renderDocument: ![title](file:fileId). The result line also carries "
"`doc ref: docItem:<chatDocId>` -- use it inside `documentList` for "
"downstream AI tools that need the chart as a data source. "
"Provide structured data with labels and datasets." "Provide structured data with labels and datasets."
), ),
parameters={ parameters={

View file

@ -9,6 +9,8 @@ from modules.serviceCenter.services.serviceAgent.datamodelAgent import ToolResul
from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry from modules.serviceCenter.services.serviceAgent.toolRegistry import ToolRegistry
from modules.serviceCenter.services.serviceAgent.coreTools._helpers import ( from modules.serviceCenter.services.serviceAgent.coreTools._helpers import (
_attachFileAsChatDocument,
_formatToolFileResult,
_getOrCreateInstanceFolder, _getOrCreateInstanceFolder,
_getOrCreateTempFolder, _getOrCreateTempFolder,
_looksLikeBinary, _looksLikeBinary,
@ -428,9 +430,19 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId}) dbMgmt.updateFile(fileItem.id, {"folderId": instanceFolderId})
if args.get("tags"): if args.get("tags"):
dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]}) dbMgmt.updateFile(fileItem.id, {"tags": args["tags"]})
chatDocId = _attachFileAsChatDocument(
services, fileItem,
label=f"writeFile:{name}",
userMessage=f"Created {name} via writeFile",
)
return ToolResult( return ToolResult(
toolCallId="", toolName="writeFile", success=True, toolCallId="", toolName="writeFile", success=True,
data=f"File '{name}' created (id: {fileItem.id})", data=_formatToolFileResult(
fileItem=fileItem,
chatDocId=chatDocId,
actionLabel="Created",
),
sideEvents=[{ sideEvents=[{
"type": "fileCreated", "type": "fileCreated",
"data": { "data": {
@ -573,7 +585,11 @@ def _registerWorkspaceTools(registry: ToolRegistry, services):
"- create (default): create a new file (name required).\n" "- create (default): create a new file (name required).\n"
"- append: append content to an existing file (fileId required). " "- append: append content to an existing file (fileId required). "
"Use for large content that exceeds a single tool call (~8000 chars per call).\n" "Use for large content that exceeds a single tool call (~8000 chars per call).\n"
"- overwrite: replace entire file content (fileId required)." "- overwrite: replace entire file content (fileId required).\n"
"On `mode=create` the result line contains BOTH a `documentList ref: docItem:<chatDocId>` "
"(use this for documentList parameters of `ai_process` / `ai_summarizeDocument` / "
"`context_extractContent` etc., always as the literal string `docItem:<id>`) AND a "
"`file id: <fileId>` (use this for `readFile`, `writeFile mode=append`, image embeds)."
), ),
parameters={ parameters={
"type": "object", "type": "object",

View file

@ -178,6 +178,33 @@ class AgentService:
if workflowId is None: if workflowId is None:
workflowId = getattr(self.services.workflow, "id", "unknown") if self.services.workflow else "unknown" workflowId = getattr(self.services.workflow, "id", "unknown") if self.services.workflow else "unknown"
# Propagate the active workflow into every service's request
# context so agent-tool side effects (e.g. _attachFileAsChatDocument
# for downloadFromDataSource / writeFile / renderDocument) can
# bind their FileItem outputs to the workflow as ChatDocuments.
# Without this, chatService._workflow (= chatService._context.workflow)
# stays None and the documentList resolver finds zero docs --
# which is exactly the "Building structure prompt with 0 valid
# ContentParts" symptom we see when the workspace route calls
# runAgent for an attached single-file data source.
# Mirrors workflowManager._propagateWorkflowToContext.
if workflowId and workflowId != "unknown":
try:
workflow = getattr(self.services, "workflow", None)
if workflow is None or getattr(workflow, "id", None) != workflowId:
workflow = self.services.chat.getWorkflow(workflowId)
if workflow is not None:
self.services.workflow = workflow
ctx = getattr(self.services, "_service_context", None)
if ctx is not None:
ctx.workflow = workflow
for attr in ("chat", "ai", "extraction", "sharepoint", "clickup", "utils", "billing", "generation"):
svc = getattr(self.services, attr, None)
if svc is not None and hasattr(svc, "_context") and svc._context is not None:
svc._context.workflow = workflow
except Exception as e:
logger.warning(f"runAgent: could not propagate workflow {workflowId} into service contexts: {e}")
resolvedLanguage = userLanguage or "" resolvedLanguage = userLanguage or ""
enrichedPrompt = await self._enrichPromptWithFiles(prompt, fileIds) enrichedPrompt = await self._enrichPromptWithFiles(prompt, fileIds)

View file

@ -463,36 +463,38 @@ class ChatService:
Returns: Returns:
List of file info dicts. List of file info dicts.
""" """
# `getAllFiles` returns `List[dict]` (each entry is a
# `FileItem.model_dump()` enriched with label columns) -- not
# Pydantic objects -- so we use dict-access throughout.
allFiles = self.interfaceDbComponent.getAllFiles() allFiles = self.interfaceDbComponent.getAllFiles()
results = [] results = []
for fileItem in allFiles: for fileItem in allFiles:
if folderId is not None: if folderId is not None:
itemFolderId = getattr(fileItem, "folderId", None) if fileItem.get("folderId") != folderId:
if itemFolderId != folderId:
continue continue
if tags: if tags:
itemTags = getattr(fileItem, "tags", None) or [] itemTags = fileItem.get("tags") or []
if not any(t in itemTags for t in tags): if not any(t in itemTags for t in tags):
continue continue
if search: if search:
searchLower = search.lower() searchLower = search.lower()
nameMatch = searchLower in (fileItem.fileName or "").lower() nameMatch = searchLower in (fileItem.get("fileName") or "").lower()
descMatch = searchLower in (getattr(fileItem, "description", None) or "").lower() descMatch = searchLower in (fileItem.get("description") or "").lower()
if not nameMatch and not descMatch: if not nameMatch and not descMatch:
continue continue
results.append({ results.append({
"id": fileItem.id, "id": fileItem.get("id"),
"fileName": fileItem.fileName, "fileName": fileItem.get("fileName"),
"mimeType": fileItem.mimeType, "mimeType": fileItem.get("mimeType"),
"fileSize": fileItem.fileSize, "fileSize": fileItem.get("fileSize"),
"creationDate": fileItem.sysCreatedAt, "creationDate": fileItem.get("sysCreatedAt"),
"tags": getattr(fileItem, "tags", None), "tags": fileItem.get("tags"),
"folderId": getattr(fileItem, "folderId", None), "folderId": fileItem.get("folderId"),
"description": getattr(fileItem, "description", None), "description": fileItem.get("description"),
"status": getattr(fileItem, "status", None), "status": fileItem.get("status"),
}) })
return results return results

View file

@ -100,12 +100,18 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
# Update progress - preparing parameters # Update progress - preparing parameters
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters") self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
from modules.datamodels.datamodelDocref import DocumentReferenceList from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
documentListParam = parameters.get("documentList") documentListParam = parameters.get("documentList")
inline_content_parts: Optional[List[ContentPart]] = None inline_content_parts: Optional[List[ContentPart]] = None
# Handle inline ActionDocuments (e.g. from SharePoint/email in automation2 no persistence) # Inline ActionDocuments (SharePoint/email in automation2, no
# persistence) are list[ActionDocument-like dict] -- handled
# separately because they carry pre-extracted content. Everything
# else is normalised through the tolerant coercer.
is_inline = ( is_inline = (
isinstance(documentListParam, list) isinstance(documentListParam, list)
and len(documentListParam) > 0 and len(documentListParam) > 0
@ -117,28 +123,12 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
logger.info( logger.info(
f"ai.process: Extracted {len(inline_content_parts)} ContentParts from {len(documentListParam)} inline ActionDocuments (no persistence)" f"ai.process: Extracted {len(inline_content_parts)} ContentParts from {len(documentListParam)} inline ActionDocuments (no persistence)"
) )
elif documentListParam is None:
documentList = DocumentReferenceList(references=[])
logger.debug(f"ai.process: documentList is None, using empty DocumentReferenceList")
elif isinstance(documentListParam, DocumentReferenceList):
documentList = documentListParam
logger.info(f"ai.process: Received DocumentReferenceList with {len(documentList.references)} references")
for idx, ref in enumerate(documentList.references):
logger.info(f" Reference {idx + 1}: documentId={ref.documentId}, type={type(ref).__name__}")
elif isinstance(documentListParam, str):
documentList = DocumentReferenceList.from_string_list([documentListParam])
logger.info(f"ai.process: Converted string to DocumentReferenceList with {len(documentList.references)} references")
elif isinstance(documentListParam, list):
first = documentListParam[0] if documentListParam else None
logger.info(
f"ai.process: documentList is list of {len(documentListParam)} items, "
f"first type={type(first).__name__}, has_documentData={_is_action_document_like(first) if first else False}"
)
documentList = DocumentReferenceList.from_string_list(documentListParam)
logger.info(f"ai.process: Converted list to DocumentReferenceList with {len(documentList.references)} references")
else: else:
logger.error(f"Invalid documentList type: {type(documentListParam)}") documentList = coerceDocumentReferenceList(documentListParam)
documentList = DocumentReferenceList(references=[]) logger.info(
f"ai.process: Coerced documentList ({type(documentListParam).__name__}) "
f"to DocumentReferenceList with {len(documentList.references)} references"
)
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only. # Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
resultType = parameters.get("resultType") resultType = parameters.get("resultType")

View file

@ -5,7 +5,10 @@ import logging
import time import time
from typing import Dict, Any from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -16,20 +19,17 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"context_extract_{workflowId}_{int(time.time())}" operationId = f"context_extract_{workflowId}_{int(time.time())}"
# Extract documentList from parameters dict
documentListParam = parameters.get("documentList") documentListParam = parameters.get("documentList")
if not documentListParam: if not documentListParam:
return ActionResult.isFailure(error="documentList is required") return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed documentList = coerceDocumentReferenceList(documentListParam)
if isinstance(documentListParam, DocumentReferenceList): if not documentList.references:
documentList = documentListParam return ActionResult.isFailure(
elif isinstance(documentListParam, str): error=f"documentList could not be parsed (type={type(documentListParam).__name__}); "
documentList = DocumentReferenceList.from_string_list([documentListParam]) f"expected DocumentReferenceList, list of strings/dicts, or "
elif isinstance(documentListParam, list): f"a wrapper dict like {{'documents': [...]}}"
documentList = DocumentReferenceList.from_string_list(documentListParam) )
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
# Start progress tracking # Start progress tracking
parentOperationId = parameters.get('parentOperationId') parentOperationId = parameters.get('parentOperationId')

View file

@ -5,7 +5,10 @@ import logging
import time import time
from typing import Dict, Any from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList from modules.datamodels.datamodelDocref import (
DocumentReferenceList,
coerceDocumentReferenceList,
)
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -26,20 +29,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
if not neutralizationEnabled: if not neutralizationEnabled:
logger.info("Neutralization is not enabled, returning documents unchanged") logger.info("Neutralization is not enabled, returning documents unchanged")
# Return original documents if neutralization is disabled # Return original documents if neutralization is disabled
# Get documents from documentList
documentListParam = parameters.get("documentList") documentListParam = parameters.get("documentList")
if not documentListParam: if not documentListParam:
return ActionResult.isFailure(error="documentList is required") return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed documentList = coerceDocumentReferenceList(documentListParam)
if isinstance(documentListParam, DocumentReferenceList): if not documentList.references:
documentList = documentListParam return ActionResult.isFailure(
elif isinstance(documentListParam, str): error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
documentList = DocumentReferenceList.from_string_list([documentListParam]) )
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
# Get ChatDocuments from documentList # Get ChatDocuments from documentList
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
@ -65,20 +63,15 @@ async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
return ActionResult.isSuccess(documents=actionDocuments) return ActionResult.isSuccess(documents=actionDocuments)
# Extract documentList from parameters dict
documentListParam = parameters.get("documentList") documentListParam = parameters.get("documentList")
if not documentListParam: if not documentListParam:
return ActionResult.isFailure(error="documentList is required") return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed documentList = coerceDocumentReferenceList(documentListParam)
if isinstance(documentListParam, DocumentReferenceList): if not documentList.references:
documentList = documentListParam return ActionResult.isFailure(
elif isinstance(documentListParam, str): error=f"documentList could not be parsed (type={type(documentListParam).__name__})"
documentList = DocumentReferenceList.from_string_list([documentListParam]) )
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
# Start progress tracking # Start progress tracking
parentOperationId = parameters.get('parentOperationId') parentOperationId = parameters.get('parentOperationId')