fix trustee 3-step ai call for documents

2026-02-24 22:49:08 +01:00 · 2026-02-24 22:49:08 +01:00 · f5143611b0
commit f5143611b0
parent ea8cffee3c
2 changed files with 65 additions and 54 deletions
--- a/modules/interfaces/interfaceDbChat.py
+++ b/modules/interfaces/interfaceDbChat.py
@ -669,17 +669,30 @@ class ChatObjects:
            stats = self.getStats(workflowId)
            
            # Validate workflow data against ChatWorkflow model
+            # Explicit type coercion: DB may store numeric fields as TEXT on some platforms
+            def _toInt(v, default=0):
+                try:
+                    return int(v) if v is not None else default
+                except (ValueError, TypeError):
+                    return default
+
+            def _toFloat(v, default=None):
+                try:
+                    return float(v) if v is not None else (default if default is not None else getUtcTimestamp())
+                except (ValueError, TypeError):
+                    return default if default is not None else getUtcTimestamp()
+
            return ChatWorkflow(
                id=workflow["id"],
                status=workflow.get("status", "running"),
                name=workflow.get("name"),
-                currentRound=workflow.get("currentRound", 0) or 0,
-                currentTask=workflow.get("currentTask", 0) or 0,
-                currentAction=workflow.get("currentAction", 0) or 0,
-                totalTasks=workflow.get("totalTasks", 0) or 0,
-                totalActions=workflow.get("totalActions", 0) or 0,
-                lastActivity=workflow.get("lastActivity", getUtcTimestamp()),
-                startedAt=workflow.get("startedAt", getUtcTimestamp()),
+                currentRound=_toInt(workflow.get("currentRound")),
+                currentTask=_toInt(workflow.get("currentTask")),
+                currentAction=_toInt(workflow.get("currentAction")),
+                totalTasks=_toInt(workflow.get("totalTasks")),
+                totalActions=_toInt(workflow.get("totalActions")),
+                lastActivity=_toFloat(workflow.get("lastActivity")),
+                startedAt=_toFloat(workflow.get("startedAt")),
                logs=logs,
                messages=messages,
                stats=stats
--- a/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py
+++ b/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py
@ -24,11 +24,13 @@ logger = logging.getLogger(__name__)
 ALLOWED_EXTENSIONS = (".pdf", ".jpg", ".jpeg")
 MAX_FILES = 50

-# Phase 1: Extract all text + classify document type (one step)
-_CLASSIFICATION_PROMPT = (
-    "Extract ALL text from this document verbatim. Then identify the document type.\n"
-    'Return JSON: {"documentType": "EXPENSE_RECEIPT"|"BANK_DOCUMENT"|"INVOICE"|"CONTRACT"|"UNKNOWN", '
-    '"rawText": "<complete extracted text>"}\n'
+# Phase 1a: Pure OCR / text extraction (no JSON, plain text only)
+_OCR_PROMPT = "Extract ALL readable text from this document. Return ONLY the plain text, nothing else."
+
+# Phase 1b: Classification (text-only, lightweight)
+_CLASSIFY_PROMPT = (
+    "Classify this document text into one of these types. "
+    "Return ONLY the type name, nothing else.\n"
    "EXPENSE_RECEIPT: Quittungen, Tankbelege, Kassenzettel\n"
    "BANK_DOCUMENT: Bankauszuege, Kontoauszuege mit Transaktionslisten\n"
    "INVOICE: Rechnungen mit Rechnungsnummer und Faelligkeitsdatum\n"
@ -67,20 +69,14 @@ _PROMPT_FALLBACK = (
 )


-def _parseClassificationResult(raw: str) -> Tuple[str, str]:
-    """Parse phase 1 AI response: {documentType, rawText}. Returns (documentType, rawText)."""
-    from modules.shared.jsonUtils import stripCodeFences, extractFirstBalancedJson
-
-    documentType = "UNKNOWN"
-    rawText = ""
-    cleaned = extractFirstBalancedJson(stripCodeFences((raw or "").strip()))
-    try:
-        data = json.loads(cleaned)
-        documentType = (data.get("documentType") or "UNKNOWN").strip().upper().replace(" ", "_")
-        rawText = (data.get("rawText") or data.get("raw_text") or "").strip()
-    except Exception as e:
-        logger.debug("Parse classification result: %s", e)
-    return (documentType, rawText)
+def _parseDocumentType(raw: str) -> str:
+    """Parse classification response (plain type name). Returns normalised document type."""
+    _VALID_TYPES = {"EXPENSE_RECEIPT", "BANK_DOCUMENT", "INVOICE", "CONTRACT", "UNKNOWN"}
+    cleaned = (raw or "").strip().upper().replace(" ", "_").replace('"', "").replace("'", "")
+    for t in _VALID_TYPES:
+        if t in cleaned:
+            return t
+    return "UNKNOWN"


 def _buildStructuringPrompt(documentType: str, expenseList: str, bankList: str) -> str:
@ -170,7 +166,7 @@ async def _extractWithAi(
    bankList: str,
    featureInstanceId: str,
 ) -> Dict[str, Any]:
-    """Run 2-phase AI extraction: (1) classify + full text, (2) structure by type. Returns { documentType, extractedData, fileId, fileName }."""
+    """3-step extraction: (1a) OCR/text via Vision AI, (1b) classify text, (2) structure by type."""
    await self.services.ai.ensureAiObjectsInitialized()
    from modules.datamodels.datamodelDocref import DocumentReferenceList, DocumentItemReference

@ -178,42 +174,44 @@ async def _extractWithAi(
        references=[DocumentItemReference(documentId=chatDocumentId, fileName=fileName)]
    )

+    # --- Step 1a: Pure text extraction (Vision AI for images, text extraction for text PDFs) ---
    try:
-        self.services.utils.writeDebugFile(_CLASSIFICATION_PROMPT, "trustee_classification_prompt")
+        self.services.utils.writeDebugFile(_OCR_PROMPT, "trustee_ocr_prompt")
    except Exception:
        pass

-    options = AiCallOptions(resultFormat="json", operationType=OperationTypeEnum.DATA_EXTRACT)
-    try:
-        phase1Response = await self.services.ai.callAiContent(
-            prompt=_CLASSIFICATION_PROMPT,
-            options=options,
-            documentList=docList,
-            contentParts=None,
-            outputFormat="json",
-            generationIntent="extract",
-        )
-    except Exception:
-        options = AiCallOptions(resultFormat="csv", operationType=OperationTypeEnum.DATA_EXTRACT)
-        phase1Response = await self.services.ai.callAiContent(
-            prompt=_CLASSIFICATION_PROMPT,
-            options=options,
-            documentList=docList,
-            contentParts=None,
-            outputFormat="csv",
-            generationIntent="extract",
-        )
+    ocrOptions = AiCallOptions(resultFormat="text", operationType=OperationTypeEnum.DATA_EXTRACT)
+    ocrResponse = await self.services.ai.callAiContent(
+        prompt=_OCR_PROMPT,
+        options=ocrOptions,
+        documentList=docList,
+        contentParts=None,
+        outputFormat="txt",
+        generationIntent="extract",
+    )

-    if not phase1Response or not phase1Response.documents:
+    if not ocrResponse or not ocrResponse.documents:
        return {"documentType": "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName}

-    raw1 = phase1Response.documents[0].documentData
-    if isinstance(raw1, bytes):
-        raw1 = raw1.decode("utf-8")
-    documentType, rawText = _parseClassificationResult(raw1 or "")
+    rawText = ocrResponse.documents[0].documentData
+    if isinstance(rawText, bytes):
+        rawText = rawText.decode("utf-8")
+    rawText = (rawText or "").strip()
+
+    try:
+        self.services.utils.writeDebugFile(rawText[:5000] if rawText else "(empty)", "trustee_ocr_result")
+    except Exception:
+        pass

    if not rawText:
-        return {"documentType": documentType or "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName}
+        return {"documentType": "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName}
+
+    # --- Step 1b: Classify the extracted text (lightweight text-only call, no Vision AI) ---
+    classifyPrompt = f"{_CLASSIFY_PROMPT}\n\nTEXT:\n{rawText[:3000]}"
+    classifyRequest = AiCallRequest(prompt=classifyPrompt, context="", options=AiCallOptions(resultFormat="text"))
+    classifyResponse = await self.services.ai.callAi(classifyRequest)
+    documentType = _parseDocumentType(classifyResponse.content if hasattr(classifyResponse, "content") else "")
+    logger.info("Document classified: type=%s, rawText_length=%d, file=%s", documentType, len(rawText), fileName)

    structuringPrompt = _buildStructuringPrompt(documentType, expenseList, bankList)
    try: