gateway/tests/functional/test06_workflow_prompt_variations.py

#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Workflow Test with Prompt Variations - Tests different workflow scenarios:
1. Simple prompt for short answer (no documents)
2. Merge 2 documents and output as Word document
3. Structured data output as Excel file
"""

import asyncio
import json
import sys
import os
import time
from typing import Dict, Any, List, Optional

# Add the gateway to path (go up 2 levels from tests/functional/)
_gateway_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if _gateway_path not in sys.path:
    sys.path.insert(0, _gateway_path)

# Import the service initialization
from modules.services import getInterface as getServices
from modules.datamodels.datamodelChat import UserInputRequest, WorkflowModeEnum
from modules.datamodels.datamodelUam import User
from modules.features.workflow import chatStart
import modules.interfaces.interfaceDbChat as interfaceDbChat


class WorkflowPromptVariationsTester:
    def __init__(self):
        # Use root user for testing (has full access to everything)
        from modules.interfaces.interfaceDbApp import getRootInterface
        rootInterface = getRootInterface()
        self.testUser = rootInterface.currentUser

        # Initialize services using the existing system
        self.services = getServices(self.testUser, None)  # Test user, no workflow
        self.testResults = {}

    async def initialize(self):
        """Initialize the test environment."""
        # Set logging level to INFO to see workflow progress
        import logging
        logging.getLogger().setLevel(logging.INFO)

        print(f"Initialized test with user: {self.testUser.id}")
        print(f"Mandate ID: {self.testUser.mandateId}")

    def _createFile(self, fileName: str, mimeType: str, content: str) -> str:
        """Helper method to create a file and return its ID."""
        fileItem = self.services.interfaceDbComponent.createFile(
            name=fileName,
            mimeType=mimeType,
            content=content.encode('utf-8')
        )
        self.services.interfaceDbComponent.createFileData(fileItem.id, content.encode('utf-8'))
        return fileItem.id

    async def _startWorkflow(self, prompt: str, fileIds: List[str] = None) -> Any:
        """Start a chat workflow with prompt and optional documents."""
        if fileIds is None:
            fileIds = []

        print(f"\nPrompt: {prompt}")
        print(f"Number of files: {len(fileIds)}")
        if fileIds:
            print(f"File IDs: {fileIds}")

        # Create UserInputRequest
        userInput = UserInputRequest(
            prompt=prompt,
            listFileId=fileIds,
            userLanguage="en"
        )

        # Start workflow (this is async and returns immediately)
        workflow = await chatStart(
            currentUser=self.testUser,
            userInput=userInput,
            workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC,
            workflowId=None
        )

        print(f"✅ Workflow started with ID: {workflow.id}")
        print(f"   Status: {workflow.status}")
        print(f"   Mode: {workflow.workflowMode}")

        return workflow

    async def _waitForWorkflowCompletion(self, workflow: Any, maxWaitTime: Optional[int] = None) -> bool:
        """Wait for workflow to complete, checking status periodically.

        Args:
            workflow: The workflow object to wait for
            maxWaitTime: Maximum wait time in seconds. If None, wait indefinitely.
        """
        if maxWaitTime:
            print(f"Maximum wait time: {maxWaitTime} seconds")
        else:
            print("Waiting indefinitely (no timeout)")

        startTime = time.time()
        checkInterval = 2  # Check every 2 seconds
        lastStatus = None

        while True:
            # Check timeout if maxWaitTime is set
            if maxWaitTime is not None:
                elapsed = time.time() - startTime
                if elapsed >= maxWaitTime:
                    print(f"\n⚠️  Workflow did not complete within {maxWaitTime} seconds")
                    print(f"   Final status: {workflow.status}")
                    return False

            # Get current workflow status
            interfaceDbChat = interfaceDbChat.getInterface(self.testUser)
            currentWorkflow = interfaceDbChat.getWorkflow(workflow.id)

            if not currentWorkflow:
                print("❌ Workflow not found in database")
                return False

            currentStatus = currentWorkflow.status
            elapsed = int(time.time() - startTime)

            # Print status if it changed
            if currentStatus != lastStatus:
                print(f"Workflow status: {currentStatus} (elapsed: {elapsed}s)")
                lastStatus = currentStatus

            # Check if workflow is complete
            if currentStatus in ["completed", "stopped", "failed"]:
                print(f"\n✅ Workflow finished with status: {currentStatus} (elapsed: {elapsed}s)")
                return currentStatus == "completed"

            # Wait before next check
            await asyncio.sleep(checkInterval)

    def _analyzeWorkflowResults(self, workflow: Any) -> Dict[str, Any]:
        """Analyze workflow results and extract information."""
        interfaceDbChat = interfaceDbChat.getInterface(self.testUser)
        workflow = interfaceDbChat.getWorkflow(workflow.id)

        if not workflow:
            return {"error": "Workflow not found"}

        # Get unified chat data
        chatData = interfaceDbChat.getUnifiedChatData(workflow.id, None)

        # Extract messages and documents from items
        items = chatData.get("items", [])
        messages = []
        allDocuments = []

        for item in items:
            if item.get("type") == "message":
                message = item.get("item")
                if message:
                    # Convert ChatMessage to dict if needed
                    if hasattr(message, 'dict'):
                        msgDict = message.dict()
                    elif hasattr(message, '__dict__'):
                        msgDict = message.__dict__
                    else:
                        msgDict = message if isinstance(message, dict) else {}

                    messages.append(msgDict)

                    # Extract documents from message
                    msgDocuments = msgDict.get("documents", [])
                    if msgDocuments:
                        for doc in msgDocuments:
                            # Convert ChatDocument to dict if needed
                            if hasattr(doc, 'dict'):
                                docDict = doc.dict()
                            elif hasattr(doc, '__dict__'):
                                docDict = doc.__dict__
                            else:
                                docDict = doc if isinstance(doc, dict) else {}

                            # Only add if not already in list (avoid duplicates)
                            docId = docDict.get("id") or docDict.get("fileId")
                            if docId and not any(d.get("id") == docId or d.get("fileId") == docId for d in allDocuments):
                                allDocuments.append(docDict)

        userMessages = [m for m in messages if m.get("role") == "user"]
        assistantMessages = [m for m in messages if m.get("role") == "assistant"]

        results = {
            "workflowId": workflow.id,
            "status": workflow.status,
            "workflowMode": str(workflow.workflowMode) if hasattr(workflow, 'workflowMode') else None,
            "currentRound": workflow.currentRound,
            "totalTasks": workflow.totalTasks,
            "totalActions": workflow.totalActions,
            "messageCount": len(messages),
            "userMessageCount": len(userMessages),
            "assistantMessageCount": len(assistantMessages),
            "documentCount": len(allDocuments),
            "documents": allDocuments
        }

        print(f"  Workflow ID: {results['workflowId']}")
        print(f"  Status: {results['status']}")
        print(f"  Messages: {results['messageCount']} (User: {results['userMessageCount']}, Assistant: {results['assistantMessageCount']})")
        print(f"  Documents: {results['documentCount']}")

        # Print document names
        if allDocuments:
            print(f"  Generated documents:")
            for doc in allDocuments:
                fileName = doc.get("fileName") or doc.get("documentName") or "unknown"
                fileSize = doc.get("fileSize") or doc.get("size") or 0
                print(f"    - {fileName} ({fileSize} bytes)")

        return results

    async def testSimplePrompt(self) -> Dict[str, Any]:
        """Test 1: Simple prompt for a short answer (no documents)."""
        print("\n" + "="*80)
        print("TEST 1: SIMPLE PROMPT FOR SHORT ANSWER")
        print("="*80)

        try:
            prompt = "What is the capital of France? Answer in one sentence."

            workflow = await self._startWorkflow(prompt, [])
            completed = await self._waitForWorkflowCompletion(workflow, maxWaitTime=120)
            results = self._analyzeWorkflowResults(workflow)

            return {
                "testName": "Simple Prompt",
                "completed": completed,
                "results": results
            }
        except Exception as e:
            import traceback
            print(f"❌ Test failed: {type(e).__name__}: {str(e)}")
            return {
                "testName": "Simple Prompt",
                "completed": False,
                "error": str(e),
                "traceback": traceback.format_exc()
            }

    async def testMergeDocumentsToWord(self) -> Dict[str, Any]:
        """Test 2: Merge 2 documents and output as Word document."""
        print("\n" + "="*80)
        print("TEST 2: MERGE 2 DOCUMENTS AND OUTPUT AS WORD")
        print("="*80)

        try:
            # Create first document
            doc1Content = """Project Overview

This document outlines the key objectives for our new software project.
The project aims to develop a modern web application with the following features:
- User authentication and authorization
- Real-time data synchronization
- Responsive design for mobile and desktop
- Integration with third-party APIs

Timeline: 6 months
Budget: $500,000
"""

            # Create second document
            doc2Content = """Technical Specifications

Architecture:
- Frontend: React with TypeScript
- Backend: Python with FastAPI
- Database: PostgreSQL
- Deployment: Docker containers on AWS

Key Requirements:
- Support for 10,000 concurrent users
- 99.9% uptime SLA
- End-to-end encryption for sensitive data
- Comprehensive logging and monitoring

Team Size: 8 developers, 2 designers, 1 project manager
"""

            print("\nCreating documents to merge...")
            doc1Id = self._createFile("project_overview.txt", "text/plain", doc1Content)
            print(f"✅ Created document 1 with ID: {doc1Id}")

            doc2Id = self._createFile("technical_specs.txt", "text/plain", doc2Content)
            print(f"✅ Created document 2 with ID: {doc2Id}")

            prompt = "Merge these two documents into a single comprehensive Word document. Include both the project overview and technical specifications in a well-formatted document with proper headings and sections."

            workflow = await self._startWorkflow(prompt, [doc1Id, doc2Id])
            completed = await self._waitForWorkflowCompletion(workflow, maxWaitTime=300)
            results = self._analyzeWorkflowResults(workflow)

            # Check if Word document was created
            wordDocFound = False
            if results.get("documents"):
                for doc in results["documents"]:
                    fileName = doc.get("fileName", "").lower()
                    if fileName.endswith(".docx") or fileName.endswith(".doc"):
                        wordDocFound = True
                        print(f"  ✅ Word document found: {doc.get('fileName')}")

            if not wordDocFound:
                print("  ⚠️  Warning: No Word document (.docx or .doc) found in results")

            return {
                "testName": "Merge Documents to Word",
                "completed": completed,
                "wordDocumentFound": wordDocFound,
                "results": results
            }
        except Exception as e:
            import traceback
            print(f"❌ Test failed: {type(e).__name__}: {str(e)}")
            return {
                "testName": "Merge Documents to Word",
                "completed": False,
                "error": str(e),
                "traceback": traceback.format_exc()
            }

    async def testStructuredDataToExcel(self) -> Dict[str, Any]:
        """Test 3: Structured data output as Excel file."""
        print("\n" + "="*80)
        print("TEST 3: STRUCTURED DATA OUTPUT AS EXCEL")
        print("="*80)

        try:
            # Create structured data as JSON
            structuredData = {
                "employees": [
                    {"id": 1, "name": "John Doe", "department": "Engineering", "salary": 95000, "startDate": "2020-01-15"},
                    {"id": 2, "name": "Jane Smith", "department": "Marketing", "salary": 85000, "startDate": "2019-03-20"},
                    {"id": 3, "name": "Bob Johnson", "department": "Engineering", "salary": 100000, "startDate": "2018-06-10"},
                    {"id": 4, "name": "Alice Williams", "department": "HR", "salary": 75000, "startDate": "2021-09-05"},
                    {"id": 5, "name": "Charlie Brown", "department": "Sales", "salary": 80000, "startDate": "2020-11-12"},
                    {"id": 6, "name": "Diana Prince", "department": "Engineering", "salary": 110000, "startDate": "2017-04-22"},
                    {"id": 7, "name": "Edward Norton", "department": "Marketing", "salary": 90000, "startDate": "2019-08-30"},
                    {"id": 8, "name": "Fiona Green", "department": "HR", "salary": 78000, "startDate": "2022-01-18"}
                ],
                "departments": [
                    {"name": "Engineering", "budget": 500000, "headCount": 3},
                    {"name": "Marketing", "budget": 300000, "headCount": 2},
                    {"name": "HR", "budget": 200000, "headCount": 2},
                    {"name": "Sales", "budget": 250000, "headCount": 1}
                ]
            }

            jsonContent = json.dumps(structuredData, indent=2)

            print("\nCreating structured data file...")
            dataFileId = self._createFile("employee_data.json", "application/json", jsonContent)
            print(f"✅ Created data file with ID: {dataFileId}")

            prompt = "Create an Excel file from this structured data. Include two sheets: one for employees with all their details, and one for departments with summary information. Format the data nicely with proper column headers and make it easy to read."

            workflow = await self._startWorkflow(prompt, [dataFileId])
            completed = await self._waitForWorkflowCompletion(workflow, maxWaitTime=300)
            results = self._analyzeWorkflowResults(workflow)

            # Check if Excel document was created
            excelDocFound = False
            if results.get("documents"):
                for doc in results["documents"]:
                    fileName = doc.get("fileName", "").lower()
                    if fileName.endswith(".xlsx") or fileName.endswith(".xls"):
                        excelDocFound = True
                        print(f"  ✅ Excel document found: {doc.get('fileName')}")

            if not excelDocFound:
                print("  ⚠️  Warning: No Excel document (.xlsx or .xls) found in results")

            return {
                "testName": "Structured Data to Excel",
                "completed": completed,
                "excelDocumentFound": excelDocFound,
                "results": results
            }
        except Exception as e:
            import traceback
            print(f"❌ Test failed: {type(e).__name__}: {str(e)}")
            return {
                "testName": "Structured Data to Excel",
                "completed": False,
                "error": str(e),
                "traceback": traceback.format_exc()
            }

    async def runAllTests(self):
        """Run all three test cases."""
        print("\n" + "="*80)
        print("WORKFLOW PROMPT VARIATIONS TEST SUITE")
        print("="*80)

        try:
            # Initialize
            await self.initialize()

            # Run all tests
            test1Results = await self.testSimplePrompt()
            test2Results = await self.testMergeDocumentsToWord()
            test3Results = await self.testStructuredDataToExcel()

            self.testResults = {
                "test1": test1Results,
                "test2": test2Results,
                "test3": test3Results,
                "summary": {
                    "totalTests": 3,
                    "passedTests": sum([
                        1 if test1Results.get("completed") else 0,
                        1 if test2Results.get("completed") else 0,
                        1 if test3Results.get("completed") else 0
                    ]),
                    "failedTests": sum([
                        1 if not test1Results.get("completed") else 0,
                        1 if not test2Results.get("completed") else 0,
                        1 if not test3Results.get("completed") else 0
                    ])
                }
            }

            print("\n" + "="*80)
            print("TEST SUITE SUMMARY")
            print("="*80)
            print(f"Test 1 - Simple Prompt: {'✅ PASSED' if test1Results.get('completed') else '❌ FAILED'}")
            print(f"Test 2 - Merge to Word: {'✅ PASSED' if test2Results.get('completed') else '❌ FAILED'}")
            if test2Results.get('wordDocumentFound'):
                print(f"  Word document created: ✅")
            print(f"Test 3 - Data to Excel: {'✅ PASSED' if test3Results.get('completed') else '❌ FAILED'}")
            if test3Results.get('excelDocumentFound'):
                print(f"  Excel document created: ✅")
            print(f"\nTotal: {self.testResults['summary']['passedTests']}/{self.testResults['summary']['totalTests']} tests passed")

            return self.testResults

        except Exception as e:
            import traceback
            print(f"\n❌ Test suite failed with error: {type(e).__name__}: {str(e)}")
            print(f"Traceback:\n{traceback.format_exc()}")
            self.testResults = {
                "error": str(e),
                "traceback": traceback.format_exc()
            }
            return self.testResults


async def main():
    """Run workflow prompt variations test suite."""
    tester = WorkflowPromptVariationsTester()
    results = await tester.runAllTests()

    # Print final results as JSON for easy parsing
    print("\n" + "="*80)
    print("FINAL RESULTS (JSON)")
    print("="*80)
    print(json.dumps(results, indent=2, default=str))


if __name__ == "__main__":
    asyncio.run(main())