fix trustee 3-step ai call for documents
This commit is contained in:
parent
ea8cffee3c
commit
f5143611b0
2 changed files with 65 additions and 54 deletions
|
|
@ -669,17 +669,30 @@ class ChatObjects:
|
||||||
stats = self.getStats(workflowId)
|
stats = self.getStats(workflowId)
|
||||||
|
|
||||||
# Validate workflow data against ChatWorkflow model
|
# Validate workflow data against ChatWorkflow model
|
||||||
|
# Explicit type coercion: DB may store numeric fields as TEXT on some platforms
|
||||||
|
def _toInt(v, default=0):
|
||||||
|
try:
|
||||||
|
return int(v) if v is not None else default
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
def _toFloat(v, default=None):
|
||||||
|
try:
|
||||||
|
return float(v) if v is not None else (default if default is not None else getUtcTimestamp())
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return default if default is not None else getUtcTimestamp()
|
||||||
|
|
||||||
return ChatWorkflow(
|
return ChatWorkflow(
|
||||||
id=workflow["id"],
|
id=workflow["id"],
|
||||||
status=workflow.get("status", "running"),
|
status=workflow.get("status", "running"),
|
||||||
name=workflow.get("name"),
|
name=workflow.get("name"),
|
||||||
currentRound=workflow.get("currentRound", 0) or 0,
|
currentRound=_toInt(workflow.get("currentRound")),
|
||||||
currentTask=workflow.get("currentTask", 0) or 0,
|
currentTask=_toInt(workflow.get("currentTask")),
|
||||||
currentAction=workflow.get("currentAction", 0) or 0,
|
currentAction=_toInt(workflow.get("currentAction")),
|
||||||
totalTasks=workflow.get("totalTasks", 0) or 0,
|
totalTasks=_toInt(workflow.get("totalTasks")),
|
||||||
totalActions=workflow.get("totalActions", 0) or 0,
|
totalActions=_toInt(workflow.get("totalActions")),
|
||||||
lastActivity=workflow.get("lastActivity", getUtcTimestamp()),
|
lastActivity=_toFloat(workflow.get("lastActivity")),
|
||||||
startedAt=workflow.get("startedAt", getUtcTimestamp()),
|
startedAt=_toFloat(workflow.get("startedAt")),
|
||||||
logs=logs,
|
logs=logs,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stats=stats
|
stats=stats
|
||||||
|
|
|
||||||
|
|
@ -24,11 +24,13 @@ logger = logging.getLogger(__name__)
|
||||||
ALLOWED_EXTENSIONS = (".pdf", ".jpg", ".jpeg")
|
ALLOWED_EXTENSIONS = (".pdf", ".jpg", ".jpeg")
|
||||||
MAX_FILES = 50
|
MAX_FILES = 50
|
||||||
|
|
||||||
# Phase 1: Extract all text + classify document type (one step)
|
# Phase 1a: Pure OCR / text extraction (no JSON, plain text only)
|
||||||
_CLASSIFICATION_PROMPT = (
|
_OCR_PROMPT = "Extract ALL readable text from this document. Return ONLY the plain text, nothing else."
|
||||||
"Extract ALL text from this document verbatim. Then identify the document type.\n"
|
|
||||||
'Return JSON: {"documentType": "EXPENSE_RECEIPT"|"BANK_DOCUMENT"|"INVOICE"|"CONTRACT"|"UNKNOWN", '
|
# Phase 1b: Classification (text-only, lightweight)
|
||||||
'"rawText": "<complete extracted text>"}\n'
|
_CLASSIFY_PROMPT = (
|
||||||
|
"Classify this document text into one of these types. "
|
||||||
|
"Return ONLY the type name, nothing else.\n"
|
||||||
"EXPENSE_RECEIPT: Quittungen, Tankbelege, Kassenzettel\n"
|
"EXPENSE_RECEIPT: Quittungen, Tankbelege, Kassenzettel\n"
|
||||||
"BANK_DOCUMENT: Bankauszuege, Kontoauszuege mit Transaktionslisten\n"
|
"BANK_DOCUMENT: Bankauszuege, Kontoauszuege mit Transaktionslisten\n"
|
||||||
"INVOICE: Rechnungen mit Rechnungsnummer und Faelligkeitsdatum\n"
|
"INVOICE: Rechnungen mit Rechnungsnummer und Faelligkeitsdatum\n"
|
||||||
|
|
@ -67,20 +69,14 @@ _PROMPT_FALLBACK = (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _parseClassificationResult(raw: str) -> Tuple[str, str]:
|
def _parseDocumentType(raw: str) -> str:
|
||||||
"""Parse phase 1 AI response: {documentType, rawText}. Returns (documentType, rawText)."""
|
"""Parse classification response (plain type name). Returns normalised document type."""
|
||||||
from modules.shared.jsonUtils import stripCodeFences, extractFirstBalancedJson
|
_VALID_TYPES = {"EXPENSE_RECEIPT", "BANK_DOCUMENT", "INVOICE", "CONTRACT", "UNKNOWN"}
|
||||||
|
cleaned = (raw or "").strip().upper().replace(" ", "_").replace('"', "").replace("'", "")
|
||||||
documentType = "UNKNOWN"
|
for t in _VALID_TYPES:
|
||||||
rawText = ""
|
if t in cleaned:
|
||||||
cleaned = extractFirstBalancedJson(stripCodeFences((raw or "").strip()))
|
return t
|
||||||
try:
|
return "UNKNOWN"
|
||||||
data = json.loads(cleaned)
|
|
||||||
documentType = (data.get("documentType") or "UNKNOWN").strip().upper().replace(" ", "_")
|
|
||||||
rawText = (data.get("rawText") or data.get("raw_text") or "").strip()
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug("Parse classification result: %s", e)
|
|
||||||
return (documentType, rawText)
|
|
||||||
|
|
||||||
|
|
||||||
def _buildStructuringPrompt(documentType: str, expenseList: str, bankList: str) -> str:
|
def _buildStructuringPrompt(documentType: str, expenseList: str, bankList: str) -> str:
|
||||||
|
|
@ -170,7 +166,7 @@ async def _extractWithAi(
|
||||||
bankList: str,
|
bankList: str,
|
||||||
featureInstanceId: str,
|
featureInstanceId: str,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Run 2-phase AI extraction: (1) classify + full text, (2) structure by type. Returns { documentType, extractedData, fileId, fileName }."""
|
"""3-step extraction: (1a) OCR/text via Vision AI, (1b) classify text, (2) structure by type."""
|
||||||
await self.services.ai.ensureAiObjectsInitialized()
|
await self.services.ai.ensureAiObjectsInitialized()
|
||||||
from modules.datamodels.datamodelDocref import DocumentReferenceList, DocumentItemReference
|
from modules.datamodels.datamodelDocref import DocumentReferenceList, DocumentItemReference
|
||||||
|
|
||||||
|
|
@ -178,42 +174,44 @@ async def _extractWithAi(
|
||||||
references=[DocumentItemReference(documentId=chatDocumentId, fileName=fileName)]
|
references=[DocumentItemReference(documentId=chatDocumentId, fileName=fileName)]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# --- Step 1a: Pure text extraction (Vision AI for images, text extraction for text PDFs) ---
|
||||||
try:
|
try:
|
||||||
self.services.utils.writeDebugFile(_CLASSIFICATION_PROMPT, "trustee_classification_prompt")
|
self.services.utils.writeDebugFile(_OCR_PROMPT, "trustee_ocr_prompt")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
options = AiCallOptions(resultFormat="json", operationType=OperationTypeEnum.DATA_EXTRACT)
|
ocrOptions = AiCallOptions(resultFormat="text", operationType=OperationTypeEnum.DATA_EXTRACT)
|
||||||
try:
|
ocrResponse = await self.services.ai.callAiContent(
|
||||||
phase1Response = await self.services.ai.callAiContent(
|
prompt=_OCR_PROMPT,
|
||||||
prompt=_CLASSIFICATION_PROMPT,
|
options=ocrOptions,
|
||||||
options=options,
|
documentList=docList,
|
||||||
documentList=docList,
|
contentParts=None,
|
||||||
contentParts=None,
|
outputFormat="txt",
|
||||||
outputFormat="json",
|
generationIntent="extract",
|
||||||
generationIntent="extract",
|
)
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
options = AiCallOptions(resultFormat="csv", operationType=OperationTypeEnum.DATA_EXTRACT)
|
|
||||||
phase1Response = await self.services.ai.callAiContent(
|
|
||||||
prompt=_CLASSIFICATION_PROMPT,
|
|
||||||
options=options,
|
|
||||||
documentList=docList,
|
|
||||||
contentParts=None,
|
|
||||||
outputFormat="csv",
|
|
||||||
generationIntent="extract",
|
|
||||||
)
|
|
||||||
|
|
||||||
if not phase1Response or not phase1Response.documents:
|
if not ocrResponse or not ocrResponse.documents:
|
||||||
return {"documentType": "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName}
|
return {"documentType": "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName}
|
||||||
|
|
||||||
raw1 = phase1Response.documents[0].documentData
|
rawText = ocrResponse.documents[0].documentData
|
||||||
if isinstance(raw1, bytes):
|
if isinstance(rawText, bytes):
|
||||||
raw1 = raw1.decode("utf-8")
|
rawText = rawText.decode("utf-8")
|
||||||
documentType, rawText = _parseClassificationResult(raw1 or "")
|
rawText = (rawText or "").strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.services.utils.writeDebugFile(rawText[:5000] if rawText else "(empty)", "trustee_ocr_result")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
if not rawText:
|
if not rawText:
|
||||||
return {"documentType": documentType or "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName}
|
return {"documentType": "UNKNOWN", "extractedData": [], "fileId": fileId, "fileName": fileName}
|
||||||
|
|
||||||
|
# --- Step 1b: Classify the extracted text (lightweight text-only call, no Vision AI) ---
|
||||||
|
classifyPrompt = f"{_CLASSIFY_PROMPT}\n\nTEXT:\n{rawText[:3000]}"
|
||||||
|
classifyRequest = AiCallRequest(prompt=classifyPrompt, context="", options=AiCallOptions(resultFormat="text"))
|
||||||
|
classifyResponse = await self.services.ai.callAi(classifyRequest)
|
||||||
|
documentType = _parseDocumentType(classifyResponse.content if hasattr(classifyResponse, "content") else "")
|
||||||
|
logger.info("Document classified: type=%s, rawText_length=%d, file=%s", documentType, len(rawText), fileName)
|
||||||
|
|
||||||
structuringPrompt = _buildStructuringPrompt(documentType, expenseList, bankList)
|
structuringPrompt = _buildStructuringPrompt(documentType, expenseList, bankList)
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue