diff --git a/modules/interfaces/interfaceDbChat.py b/modules/interfaces/interfaceDbChat.py index 56743b50..cd688ca7 100644 --- a/modules/interfaces/interfaceDbChat.py +++ b/modules/interfaces/interfaceDbChat.py @@ -669,17 +669,30 @@ class ChatObjects: stats = self.getStats(workflowId) # Validate workflow data against ChatWorkflow model + # Explicit type coercion: DB may store numeric fields as TEXT on some platforms + def _toInt(v, default=0): + try: + return int(v) if v is not None else default + except (ValueError, TypeError): + return default + + def _toFloat(v, default=None): + try: + return float(v) if v is not None else (default if default is not None else getUtcTimestamp()) + except (ValueError, TypeError): + return default if default is not None else getUtcTimestamp() + return ChatWorkflow( id=workflow["id"], status=workflow.get("status", "running"), name=workflow.get("name"), - currentRound=workflow.get("currentRound", 0) or 0, - currentTask=workflow.get("currentTask", 0) or 0, - currentAction=workflow.get("currentAction", 0) or 0, - totalTasks=workflow.get("totalTasks", 0) or 0, - totalActions=workflow.get("totalActions", 0) or 0, - lastActivity=workflow.get("lastActivity", getUtcTimestamp()), - startedAt=workflow.get("startedAt", getUtcTimestamp()), + currentRound=_toInt(workflow.get("currentRound")), + currentTask=_toInt(workflow.get("currentTask")), + currentAction=_toInt(workflow.get("currentAction")), + totalTasks=_toInt(workflow.get("totalTasks")), + totalActions=_toInt(workflow.get("totalActions")), + lastActivity=_toFloat(workflow.get("lastActivity")), + startedAt=_toFloat(workflow.get("startedAt")), logs=logs, messages=messages, stats=stats diff --git a/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py b/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py index fe3473f9..88f2c544 100644 --- a/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py +++ b/modules/workflows/methods/methodTrustee/actions/extractFromFiles.py @@ -24,11 +24,13 @@ logger = logging.getLogger(__name__) ALLOWED_EXTENSIONS = (".pdf", ".jpg", ".jpeg") MAX_FILES = 50 -# Phase 1: Extract all text + classify document type (one step) -_CLASSIFICATION_PROMPT = ( - "Extract ALL text from this document verbatim. Then identify the document type.\n" - 'Return JSON: {"documentType": "EXPENSE_RECEIPT"|"BANK_DOCUMENT"|"INVOICE"|"CONTRACT"|"UNKNOWN", ' - '"rawText": ""}\n' +# Phase 1a: Pure OCR / text extraction (no JSON, plain text only) +_OCR_PROMPT = "Extract ALL readable text from this document. Return ONLY the plain text, nothing else." + +# Phase 1b: Classification (text-only, lightweight) +_CLASSIFY_PROMPT = ( + "Classify this document text into one of these types. " + "Return ONLY the type name, nothing else.\n" "EXPENSE_RECEIPT: Quittungen, Tankbelege, Kassenzettel\n" "BANK_DOCUMENT: Bankauszuege, Kontoauszuege mit Transaktionslisten\n" "INVOICE: Rechnungen mit Rechnungsnummer und Faelligkeitsdatum\n" @@ -67,20 +69,14 @@ _PROMPT_FALLBACK = ( ) -def _parseClassificationResult(raw: str) -> Tuple[str, str]: - """Parse phase 1 AI response: {documentType, rawText}. Returns (documentType, rawText).""" - from modules.shared.jsonUtils import stripCodeFences, extractFirstBalancedJson - - documentType = "UNKNOWN" - rawText = "" - cleaned = extractFirstBalancedJson(stripCodeFences((raw or "").strip())) - try: - data = json.loads(cleaned) - documentType = (data.get("documentType") or "UNKNOWN").strip().upper().replace(" ", "_") - rawText = (data.get("rawText") or data.get("raw_text") or "").strip() - except Exception as e: - logger.debug("Parse classification result: %s", e) - return (documentType, rawText) +def _parseDocumentType(raw: str) -> str: + """Parse classification response (plain type name). Returns normalised document type.""" + _VALID_TYPES = {"EXPENSE_RECEIPT", "BANK_DOCUMENT", "INVOICE", "CONTRACT", "UNKNOWN"} + cleaned = (raw or "").strip().upper().replace(" ", "_").replace('"', "").replace("'", "") + for t in _VALID_TYPES: + if t in cleaned: + return t + return "UNKNOWN" def _buildStructuringPrompt(documentType: str, expenseList: str, bankList: str) -> str: @@ -170,7 +166,7 @@ async def _extractWithAi( bankList: str, featureInstanceId: str, ) -> Dict[str, Any]: - """Run 2-phase AI extraction: (1) classify + full text, (2) structure by type. Returns { documentType, extractedData, fileId, fileName }.""" + """3-step extraction: (1a) OCR/text via Vision AI, (1b) classify text, (2) structure by type.""" await self.services.ai.ensureAiObjectsInitialized() from modules.datamodels.datamodelDocref import DocumentReferenceList, DocumentItemReference @@ -178,42 +174,44 @@ async def _extractWithAi( references=[DocumentItemReference(documentId=chatDocumentId, fileName=fileName)] ) + # --- Step 1a: Pure text extraction (Vision AI for images, text extraction for text PDFs) --- try: - self.services.utils.writeDebugFile(_CLASSIFICATION_PROMPT, "trustee_classification_prompt") + self.services.utils.writeDebugFile(_OCR_PROMPT, "trustee_ocr_prompt") except Exception: pass - options = AiCallOptions(resultFormat="json", operationType=OperationTypeEnum.DATA_EXTRACT) - try: - phase1Response = await self.services.ai.callAiContent( - prompt=_CLASSIFICATION_PROMPT, - options=options, - documentList=docList, - contentParts=None, - outputFormat="json", - generationIntent="extract", - ) - except Exception: - options = AiCallOptions(resultFormat="csv", operationType=OperationTypeEnum.DATA_EXTRACT) - phase1Response = await self.services.ai.callAiContent( - prompt=_CLASSIFICATION_PROMPT, - options=options, - documentList=docList, - contentParts=None, - outputFormat="csv", - generationIntent="extract", - ) + ocrOptions = AiCallOptions(resultFormat="text", operationType=OperationTypeEnum.DATA_EXTRACT) + ocrResponse = await self.services.ai.callAiContent( + prompt=_OCR_PROMPT, + options=ocrOptions, + documentList=docList, + contentParts=None, + outputFormat="txt", + generationIntent="extract", + ) - if not phase1Response or not phase1Response.documents: + if not ocrResponse or not ocrResponse.documents: return {"documentType": "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName} - raw1 = phase1Response.documents[0].documentData - if isinstance(raw1, bytes): - raw1 = raw1.decode("utf-8") - documentType, rawText = _parseClassificationResult(raw1 or "") + rawText = ocrResponse.documents[0].documentData + if isinstance(rawText, bytes): + rawText = rawText.decode("utf-8") + rawText = (rawText or "").strip() + + try: + self.services.utils.writeDebugFile(rawText[:5000] if rawText else "(empty)", "trustee_ocr_result") + except Exception: + pass if not rawText: - return {"documentType": documentType or "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName} + return {"documentType": "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName} + + # --- Step 1b: Classify the extracted text (lightweight text-only call, no Vision AI) --- + classifyPrompt = f"{_CLASSIFY_PROMPT}\n\nTEXT:\n{rawText[:3000]}" + classifyRequest = AiCallRequest(prompt=classifyPrompt, context="", options=AiCallOptions(resultFormat="text")) + classifyResponse = await self.services.ai.callAi(classifyRequest) + documentType = _parseDocumentType(classifyResponse.content if hasattr(classifyResponse, "content") else "") + logger.info("Document classified: type=%s, rawText_length=%d, file=%s", documentType, len(rawText), fileName) structuringPrompt = _buildStructuringPrompt(documentType, expenseList, bankList) try: