fixed vision for pre-extracted content
This commit is contained in:
parent
23bb1ff5d3
commit
a2315d6ace
1 changed files with 93 additions and 9 deletions
|
|
@ -1135,7 +1135,8 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
fileName=preExtracted["originalDocument"]["fileName"],
|
||||
mimeType=preExtracted["originalDocument"]["mimeType"],
|
||||
fileSize=preExtracted["originalDocument"].get("fileSize", doc.fileSize),
|
||||
fileId=doc.fileId # Behalte fileId vom JSON
|
||||
fileId=doc.fileId, # Behalte fileId vom JSON
|
||||
messageId=doc.messageId if hasattr(doc, 'messageId') else None # Behalte messageId falls vorhanden
|
||||
)
|
||||
resolvedDocuments.append(originalDoc)
|
||||
else:
|
||||
|
|
@ -1264,6 +1265,39 @@ If no trackable items can be identified, return: {{"kpis": []}}
|
|||
logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}")
|
||||
return None
|
||||
|
||||
async def _extractTextFromImage(self, imagePart: ContentPart, extractionPrompt: str) -> Optional[str]:
|
||||
"""
|
||||
Extrahiere Text aus einem Image-Part mit Vision AI.
|
||||
|
||||
Args:
|
||||
imagePart: ContentPart mit typeGroup="image"
|
||||
extractionPrompt: Prompt für die Text-Extraktion
|
||||
|
||||
Returns:
|
||||
Extrahierter Text oder None bei Fehler
|
||||
"""
|
||||
try:
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions
|
||||
|
||||
# Erstelle AI-Call-Request mit Image-Part
|
||||
request = AiCallRequest(
|
||||
prompt=extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting.",
|
||||
context="",
|
||||
options=AiCallOptions(operationType="extraction"),
|
||||
contentParts=[imagePart]
|
||||
)
|
||||
|
||||
# Verwende AI-Service für Vision AI-Verarbeitung
|
||||
response = await self.services.ai.call(request)
|
||||
|
||||
if response and response.content:
|
||||
return response.content.strip()
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting text from image {imagePart.id}: {str(e)}")
|
||||
return None
|
||||
|
||||
def _buildIntentAnalysisPrompt(
|
||||
self,
|
||||
userPrompt: str,
|
||||
|
|
@ -1420,12 +1454,22 @@ Return ONLY valid JSON following the structure above.
|
|||
# Wenn Intent "render" für Images hat, erstelle auch object Part
|
||||
if "render" in partIntent and part.typeGroup == "image" and part.data:
|
||||
# Image-Part mit render Intent: Erstelle sowohl extracted als auch object Part
|
||||
# 1. Extracted Part (bereits vorhanden)
|
||||
part.metadata["intent"] = "extract"
|
||||
part.metadata["fromExtractContent"] = True
|
||||
part.metadata["skipExtraction"] = True
|
||||
part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
|
||||
allContentParts.append(part)
|
||||
# 1. Extracted Part - prüfe ob "extract" Intent vorhanden ist
|
||||
if "extract" in partIntent:
|
||||
# Image hat sowohl extract als auch render Intent
|
||||
# Extracted Part: Wird mit Vision AI verarbeitet (skipExtraction=False)
|
||||
part.metadata["intent"] = "extract"
|
||||
part.metadata["fromExtractContent"] = True
|
||||
part.metadata["skipExtraction"] = False # WICHTIG: Vision AI-Verarbeitung nötig!
|
||||
part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
|
||||
allContentParts.append(part)
|
||||
else:
|
||||
# Nur render Intent - kein Text-Extraktion nötig
|
||||
part.metadata["intent"] = "render"
|
||||
part.metadata["fromExtractContent"] = True
|
||||
part.metadata["skipExtraction"] = True
|
||||
part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
|
||||
allContentParts.append(part)
|
||||
|
||||
# 2. Object Part für Rendering (base64 data ist bereits im extracted Part)
|
||||
objectPart = ContentPart(
|
||||
|
|
@ -1444,11 +1488,51 @@ Return ONLY valid JSON following the structure above.
|
|||
}
|
||||
)
|
||||
allContentParts.append(objectPart)
|
||||
elif part.typeGroup == "image" and "extract" in partIntent:
|
||||
# Image mit extract Intent: Vision AI-Verarbeitung nötig
|
||||
# Verarbeite Image mit Vision AI, um Text zu extrahieren
|
||||
try:
|
||||
extractedText = await self._extractTextFromImage(part, intent.extractionPrompt if intent else "Extract all text content from this image")
|
||||
if extractedText:
|
||||
# Erstelle neuen Text-Part mit extrahiertem Text
|
||||
textPart = ContentPart(
|
||||
id=f"extracted_{part.id}",
|
||||
label=f"Extracted text from {part.label or 'Image'}",
|
||||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=extractedText,
|
||||
metadata={
|
||||
"contentFormat": "extracted",
|
||||
"documentId": document.id,
|
||||
"intent": "extract",
|
||||
"originalFileName": preExtracted["originalDocument"]["fileName"],
|
||||
"relatedImagePartId": part.id,
|
||||
"extractionPrompt": intent.extractionPrompt if intent else "Extract all text content from this image"
|
||||
}
|
||||
)
|
||||
allContentParts.append(textPart)
|
||||
logger.info(f"✅ Extracted text from image {part.id} using Vision AI")
|
||||
|
||||
# Wenn auch render Intent vorhanden, füge Image-Part hinzu
|
||||
if "render" in partIntent:
|
||||
part.metadata["intent"] = "render"
|
||||
part.metadata["fromExtractContent"] = True
|
||||
part.metadata["skipExtraction"] = True
|
||||
part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
|
||||
allContentParts.append(part)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract text from image {part.id}: {str(e)}, adding image as-is")
|
||||
# Fallback: Füge Image-Part hinzu ohne Text-Extraktion
|
||||
part.metadata["intent"] = "extract"
|
||||
part.metadata["fromExtractContent"] = True
|
||||
part.metadata["skipExtraction"] = False
|
||||
part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
|
||||
allContentParts.append(part)
|
||||
else:
|
||||
# Normales extracted Part
|
||||
# Normales extracted Part (kein Image oder kein extract Intent)
|
||||
part.metadata["intent"] = partIntent[0] if partIntent else "extract"
|
||||
part.metadata["fromExtractContent"] = True
|
||||
part.metadata["skipExtraction"] = True
|
||||
part.metadata["skipExtraction"] = True # Bereits extrahiert
|
||||
part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
|
||||
allContentParts.append(part)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue