59 lines
2.3 KiB
Python
59 lines
2.3 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
"""
|
|
Extract Data action for AI operations.
|
|
Extracts structured data from documents (key-value pairs, entities, facts, etc.).
|
|
"""
|
|
|
|
import logging
|
|
from typing import Dict, Any
|
|
from modules.workflows.methods.methodBase import action
|
|
from modules.datamodels.datamodelChat import ActionResult
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@action
|
|
async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
|
|
- Input requirements: documentList (required); optional dataStructure, fields.
|
|
- Output format: JSON by default, or specified resultType.
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to extract data from.
|
|
- dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
|
|
- fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
|
|
- resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
dataStructure = parameters.get("dataStructure", "nested")
|
|
fields = parameters.get("fields", [])
|
|
resultType = parameters.get("resultType", "json")
|
|
|
|
aiPrompt = "Extract structured data from the provided document(s)."
|
|
if fields:
|
|
fieldsStr = ", ".join(fields)
|
|
aiPrompt += f" Extract the following specific fields: {fieldsStr}."
|
|
else:
|
|
aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
|
|
|
|
structureInstructions = {
|
|
"flat": "Use a flat key-value structure with simple properties.",
|
|
"nested": "Use a nested JSON structure with logical grouping of related data.",
|
|
"list": "Structure the data as a list/array of objects, one per entity or record."
|
|
}
|
|
aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
|
|
|
|
aiPrompt += " Ensure all extracted data is accurate and complete."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|
|
|