From 73c1126200174ba2f4de6ec550f0ec4eb79d84e5 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Fri, 10 Oct 2025 23:46:24 +0200
Subject: [PATCH] integration fixes

---
 config.ini                                    |   7 +-
 modules/connectors/connectorAiTavily.py       |   6 +-
 modules/interfaces/interfaceAiObjects.py      | 148 +++++++----
 modules/interfaces/interfaceDbChatObjects.py  |  27 +-
 modules/services/serviceAi/mainServiceAi.py   | 146 +++++++++--
 .../serviceGeneration/prompt_builder.py       |   8 +-
 .../serviceWorkflow/mainServiceWorkflow.py    |  40 ++-
 modules/workflows/methods/methodAi.py         |  10 +-
 modules/workflows/methods/methodDocument.py   |  53 ++--
 modules/workflows/methods/methodOutlook.py    | 231 ++++++++++--------
 .../workflows/processing/modes/modeReact.py   |  62 ++++-
 .../shared/promptGenerationActionsReact.py    |   3 +
 12 files changed, 485 insertions(+), 256 deletions(-)

diff --git a/config.ini b/config.ini
index 780a9e08..ab0b6712 100644
--- a/config.ini
+++ b/config.ini
@@ -29,4 +29,9 @@ Web_Search_MIN_RESULTS = 1
 # Web Crawl configuration
 Web_Crawl_TIMEOUT = 30
 Web_Crawl_MAX_RETRIES = 3
-Web_Crawl_RETRY_DELAY = 2
\ No newline at end of file
+Web_Crawl_RETRY_DELAY = 2
+
+# Web Research configuration
+Web_Research_MAX_DEPTH = 2
+Web_Research_MAX_LINKS_PER_DOMAIN = 4
+Web_Research_CRAWL_TIMEOUT_MINUTES = 10
\ No newline at end of file
diff --git a/modules/connectors/connectorAiTavily.py b/modules/connectors/connectorAiTavily.py
index f86c49b2..b7631ea3 100644
--- a/modules/connectors/connectorAiTavily.py
+++ b/modules/connectors/connectorAiTavily.py
@@ -271,6 +271,7 @@ class ConnectorWeb:
         include_domains: list[str] | None = None,
         exclude_domains: list[str] | None = None,
         language: str | None = None,
+        country: str | None = None,
         include_answer: bool | None = None,
         include_raw_content: bool | None = None,
     ) -> list[WebSearchResult]:
@@ -290,17 +291,20 @@ class ConnectorWeb:
             kwargs["time_range"] = time_range
         if topic is not None:
             kwargs["topic"] = topic
-        if include_domains is not None:
+        if include_domains is not None and len(include_domains) > 0:
             kwargs["include_domains"] = include_domains
         if exclude_domains is not None:
             kwargs["exclude_domains"] = exclude_domains
         if language is not None:
             kwargs["language"] = language
+        if country is not None:
+            kwargs["country"] = country
         if include_answer is not None:
             kwargs["include_answer"] = include_answer
         if include_raw_content is not None:
             kwargs["include_raw_content"] = include_raw_content
 
+        logger.debug(f"Tavily.search kwargs: {kwargs}")
         response = await self.client.search(**kwargs)
 
         return [
diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py
index 3e9c744d..4ef6a69a 100644
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@@ -1,4 +1,5 @@
 import logging
+import asyncio
 from typing import Dict, Any, List, Union, Tuple, Optional
 from dataclasses import dataclass
 
@@ -694,7 +695,22 @@ class AiObjects:
             logger.warning(f"Failed to extract links from content: {e}")
             return []
 
-    async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10) -> Dict[str, str]:
+    def _normalizeUrl(self, url: str) -> str:
+        """Normalize URL to handle variations that should be considered duplicates."""
+        if not url:
+            return url
+        
+        # Remove trailing slashes and fragments
+        url = url.rstrip('/')
+        if '#' in url:
+            url = url.split('#')[0]
+        
+        # Handle common URL variations
+        url = url.replace('http://', 'https://')  # Normalize protocol
+        
+        return url
+
+    async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10, global_processed_urls: Optional[set] = None) -> Dict[str, str]:
         """
         Recursively crawl URLs up to specified depth.
         
@@ -703,76 +719,100 @@ class AiObjects:
             max_depth: Maximum depth to crawl (1=main pages only, 2=main+sub-pages, etc.)
             extract_depth: Tavily extract depth setting
             max_per_domain: Maximum URLs per domain per level
+            global_processed_urls: Optional global set to track processed URLs across sessions
             
         Returns:
             Dictionary mapping URL -> content for all crawled pages
         """
         logger.info(f"Starting recursive crawl: {len(urls)} starting URLs, max_depth={max_depth}")
         
-        # URL index to track all processed URLs
+        # URL index to track all processed URLs (local + global)
         processed_urls = set()
+        if global_processed_urls is not None:
+            # Use global index if provided, otherwise create local one
+            processed_urls = global_processed_urls
+            logger.info(f"Using global URL index with {len(processed_urls)} already processed URLs")
+        else:
+            logger.info("Using local URL index for this crawl session")
+        
         all_content = {}
         
         # Current level URLs to process
         current_level_urls = urls.copy()
         
-        for depth in range(1, max_depth + 1):
-            logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
-            logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
-            
-            # URLs found at this level (for next iteration)
-            next_level_urls = []
-            
-            for url in current_level_urls:
-                if url in processed_urls:
-                    logger.debug(f"URL {url} already processed, skipping")
-                    continue
+        try:
+            for depth in range(1, max_depth + 1):
+                logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
+                logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
                 
-                try:
-                    logger.info(f"Processing URL at depth {depth}: {url}")
+                # URLs found at this level (for next iteration)
+                next_level_urls = []
+                
+                for url in current_level_urls:
+                    # Normalize URL for duplicate checking
+                    normalized_url = self._normalizeUrl(url)
+                    if normalized_url in processed_urls:
+                        logger.debug(f"URL {url} (normalized: {normalized_url}) already processed, skipping")
+                        continue
                     
-                    # Read page content
-                    content = await self.readPage(url, extract_depth)
-                    if content:
-                        all_content[url] = content
-                        processed_urls.add(url)
-                        logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
+                    try:
+                        logger.info(f"Processing URL at depth {depth}: {url}")
+                        logger.debug(f"Total processed URLs so far: {len(processed_urls)}")
                         
-                        # Get URLs from this page for next level
-                        page_urls = await self.getUrlsFromPage(url, extract_depth)
-                        logger.info(f"Found {len(page_urls)} URLs on {url}")
-                        
-                        # Filter URLs and add to next level
-                        filtered_urls = self.filterUrlsOnlyPages(page_urls, max_per_domain)
-                        logger.info(f"Filtered to {len(filtered_urls)} valid URLs")
-                        
-                        # Add new URLs to next level (avoiding already processed ones)
-                        new_urls_count = 0
-                        for new_url in filtered_urls:
-                            if new_url not in processed_urls:
-                                next_level_urls.append(new_url)
-                                new_urls_count += 1
-                        
-                        logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
-                    else:
-                        logger.warning(f"✗ No content extracted from {url}")
-                        processed_urls.add(url)  # Mark as processed to avoid retry
-                        
-                except Exception as e:
-                    logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
-                    processed_urls.add(url)  # Mark as processed to avoid retry
+                        # Read page content
+                        content = await self.readPage(url, extract_depth)
+                        if content:
+                            all_content[url] = content
+                            processed_urls.add(normalized_url)
+                            logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
+                            
+                            # Get URLs from this page for next level
+                            page_urls = await self.getUrlsFromPage(url, extract_depth)
+                            logger.info(f"Found {len(page_urls)} URLs on {url}")
+                            
+                            # Filter URLs and add to next level
+                            filtered_urls = self.filterUrlsOnlyPages(page_urls, max_per_domain)
+                            logger.info(f"Filtered to {len(filtered_urls)} valid URLs")
+                            
+                            # Add new URLs to next level (avoiding already processed ones)
+                            new_urls_count = 0
+                            for new_url in filtered_urls:
+                                normalized_new_url = self._normalizeUrl(new_url)
+                                if normalized_new_url not in processed_urls:
+                                    next_level_urls.append(new_url)
+                                    new_urls_count += 1
+                                else:
+                                    logger.debug(f"URL {new_url} (normalized: {normalized_new_url}) already processed, skipping")
+                            
+                            logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
+                        else:
+                            logger.warning(f"✗ No content extracted from {url}")
+                            processed_urls.add(normalized_url)  # Mark as processed to avoid retry
+                            
+                    except Exception as e:
+                        logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
+                        processed_urls.add(normalized_url)  # Mark as processed to avoid retry
+                
+                # Prepare for next iteration
+                current_level_urls = next_level_urls
+                logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
+                
+                # Stop if no more URLs to process
+                if not current_level_urls:
+                    logger.info(f"No more URLs found at depth {depth}, stopping recursion")
+                    break
             
-            # Prepare for next iteration
-            current_level_urls = next_level_urls
-            logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
+            logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
+            logger.info(f"Total URLs processed (including skipped): {len(processed_urls)}")
+            logger.info(f"Unique URLs found: {len(all_content)}")
+            return all_content
             
-            # Stop if no more URLs to process
-            if not current_level_urls:
-                logger.info(f"No more URLs found at depth {depth}, stopping recursion")
-                break
-        
-        logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
-        return all_content
+        except asyncio.TimeoutError:
+            logger.warning(f"Crawling timed out, returning partial results: {len(all_content)} pages crawled so far")
+            return all_content
+        except Exception as e:
+            logger.error(f"Crawling failed with error: {e}, returning partial results: {len(all_content)} pages crawled so far")
+            return all_content
 
     async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> str:
         """Use Perplexity AI to provide the best answers for web-related queries."""
diff --git a/modules/interfaces/interfaceDbChatObjects.py b/modules/interfaces/interfaceDbChatObjects.py
index 319c1703..42c96158 100644
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@@ -1052,8 +1052,11 @@ class ChatObjects:
 
     def _storeDebugMessageAndDocuments(self, message: ChatMessage) -> None:
         """
-        Store message and documents for debugging purposes in fileshare.
-        Structure: gateway/test-chat/messages/m_round_task_action_timestamp/documentlist_label/documents
+        Store message and documents (metadata and file bytes) for debugging purposes.
+        Structure: gateway/test-chat/messages/m_round_task_action_timestamp/documentlist_label/
+        - message.json, message_text.txt
+        - document_###_metadata.json
+        - document_###_<original_filename> (actual file bytes)
         
         Args:
             message: ChatMessage object to store
@@ -1156,6 +1159,26 @@ class ChatObjects:
                             json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
                         
                         logger.info(f"Debug: Stored document metadata for {doc.fileName}")
+
+                        # Also store the actual file bytes next to metadata for debugging
+                        try:
+                            # Lazy import to avoid circular deps at module load
+                            from modules.interfaces import interfaceDbComponentObjects as comp
+                            componentInterface = comp.getInterface(self.currentUser)
+                            file_bytes = componentInterface.getFileData(doc.fileId)
+                            if file_bytes:
+                                # Build a safe filename preserving original name
+                                safe_name = doc.fileName or f"document_{i+1:03d}"
+                                # Avoid path traversal
+                                safe_name = os.path.basename(safe_name)
+                                doc_file_path = os.path.join(label_folder, f"document_{i+1:03d}_" + safe_name)
+                                with open(doc_file_path, "wb") as df:
+                                    df.write(file_bytes)
+                                logger.info(f"Debug: Stored document file bytes: {doc_file_path} ({len(file_bytes)} bytes)")
+                            else:
+                                logger.warning(f"Debug: No file bytes returned for fileId {doc.fileId}")
+                        except Exception as e:
+                            logger.error(f"Debug: Failed to store document file for {doc.fileName} (fileId {doc.fileId}): {e}")
             
             logger.info(f"Debug: Stored message and documents in {message_path}")
             
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 16619a52..b790b14a 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -13,6 +13,7 @@ from modules.datamodels.datamodelWeb import (
     WebSearchResultItem,
 )
 from modules.interfaces.interfaceAiObjects import AiObjects
+from modules.shared.configuration import APP_CONFIG
 
 
 logger = logging.getLogger(__name__)
@@ -100,6 +101,9 @@ class AiService:
             logger.info(f"User Query: {request.user_prompt}")
             logger.info(f"Max Results: {request.max_results}, Max Pages: {request.options.max_pages}")
             
+            # Global URL index to track all processed URLs across the entire research session
+            global_processed_urls = set()
+            
             # Step 1: Find relevant websites - either provided URLs or AI-determined main URLs
             logger.info(f"=== STEP 1: INITIAL MAIN URLS LIST ===")
             
@@ -129,7 +133,7 @@ class AiService:
         Return ONLY this JSON format:
         {{
         "user_prompt": "search query based on user query above",
-        "country": "country_code_or_null",
+        "country": "Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries)",
         "language": "language_code_or_null", 
         "topic": "general|news|academic_or_null",
         "time_range": "d|w|m|y_or_null",
@@ -194,10 +198,32 @@ class AiService:
                 }
                 
                 # Add parameters only if they have valid values
-                if ai_country and ai_country not in ['null', '', 'none', 'undefined']:
-                    search_kwargs["country"] = ai_country
-                elif request.options.country and request.options.country not in ['null', '', 'none', 'undefined']:
-                    search_kwargs["country"] = request.options.country
+                def _normalizeCountry(c: Optional[str]) -> Optional[str]:
+                    if not c:
+                        return None
+                    s = str(c).strip()
+                    if not s or s.lower() in ['null', 'none', 'undefined']:
+                        return None
+                    # Map common codes to full English names when easy to do without extra deps
+                    mapping = {
+                        'ch': 'Switzerland', 'che': 'Switzerland',
+                        'de': 'Germany', 'ger': 'Germany', 'deu': 'Germany',
+                        'at': 'Austria', 'aut': 'Austria',
+                        'us': 'United States', 'usa': 'United States', 'uni ted states': 'United States',
+                        'uk': 'United Kingdom', 'gb': 'United Kingdom', 'gbr': 'United Kingdom'
+                    }
+                    key = s.lower()
+                    if key in mapping:
+                        return mapping[key]
+                    # If looks like full name, capitalize first letter only (Tavily accepts English names)
+                    return s
+
+                norm_ai_country = _normalizeCountry(ai_country)
+                norm_req_country = _normalizeCountry(request.options.country)
+                if norm_ai_country:
+                    search_kwargs["country"] = norm_ai_country
+                elif norm_req_country:
+                    search_kwargs["country"] = norm_req_country
                     
                 if ai_language and ai_language not in ['null', '', 'none', 'undefined']:
                     search_kwargs["language"] = ai_language
@@ -213,9 +239,36 @@ class AiService:
                     search_kwargs["time_range"] = ai_time_range
                 elif request.options.time_range and request.options.time_range in ['d', 'w', 'm', 'y']:
                     search_kwargs["time_range"] = request.options.time_range
+
+                # Constrain by expected domains if provided by AI
+                try:
+                    include_domains = []
+                    for p in expected_patterns or []:
+                        if not isinstance(p, str):
+                            continue
+                        # Extract bare domain from pattern or URL
+                        import re
+                        m = re.search(r"(?:https?://)?([^/\s]+)", p.strip())
+                        if m:
+                            domain = m.group(1).lower()
+                            # strip leading www.
+                            if domain.startswith('www.'):
+                                domain = domain[4:]
+                            include_domains.append(domain)
+                    # Deduplicate
+                    if include_domains:
+                        seen = set()
+                        uniq = []
+                        for d in include_domains:
+                            if d not in seen:
+                                seen.add(d)
+                                uniq.append(d)
+                        search_kwargs["include_domains"] = uniq
+                except Exception:
+                    pass
                 
                 # Log the parameters being used
-                logger.info(f"Search parameters: country={search_kwargs.get('country', 'not_set')}, language={search_kwargs.get('language', 'not_set')}, topic={search_kwargs.get('topic', 'not_set')}, time_range={search_kwargs.get('time_range', 'not_set')}")
+                logger.info(f"Search parameters: country={search_kwargs.get('country', 'not_set')}, language={search_kwargs.get('language', 'not_set')}, topic={search_kwargs.get('topic', 'not_set')}, time_range={search_kwargs.get('time_range', 'not_set')}, include_domains={search_kwargs.get('include_domains', [])}")
                 
                 search_results = await self.aiObjects.search_websites(**search_kwargs)
                 
@@ -232,6 +285,8 @@ class AiService:
                         seen.add(u)
                         search_urls.append(u)
                 
+                logger.info(f"After initial deduplication: {len(search_urls)} unique URLs from {len(search_results)} search results")
+                
                 if not search_urls:
                     logger.error("No relevant websites found")
                     return WebResearchActionResult(success=False, error="No relevant websites found")
@@ -281,6 +336,7 @@ class AiService:
                         unique_websites.append(url)
                 
                 websites = unique_websites
+                logger.info(f"After AI selection deduplication: {len(websites)} unique URLs from {len(websites)} AI-selected URLs")
                 
                 logger.info(f"AI selected {len(websites)} main URLs (after deduplication):")
                 for i, url in enumerate(websites, 1):
@@ -305,18 +361,40 @@ class AiService:
                     logger.debug(f"   {i}. {url}")
             
             # Step 3+4+5: Recursive crawling with configurable depth
-            logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING (DEPTH {request.options.pages_search_depth}) ===")
+            # Get configuration parameters
+            max_depth = int(APP_CONFIG.get("Web_Research_MAX_DEPTH", "2"))
+            max_links_per_domain = int(APP_CONFIG.get("Web_Research_MAX_LINKS_PER_DOMAIN", "4"))
+            crawl_timeout_minutes = int(APP_CONFIG.get("Web_Research_CRAWL_TIMEOUT_MINUTES", "10"))
+            crawl_timeout_seconds = crawl_timeout_minutes * 60
+            
+            # Use the configured max_depth or the request's pages_search_depth, whichever is smaller
+            effective_depth = min(max_depth, request.options.pages_search_depth)
+            
+            logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING (DEPTH {effective_depth}) ===")
             logger.info(f"Starting recursive crawl of {len(selectedWebsites)} main websites...")
-            logger.info(f"Search depth: {request.options.pages_search_depth} levels")
-            logger.info(f"DEBUG: request.options.pages_search_depth = {request.options.pages_search_depth}")
+            logger.info(f"Search depth: {effective_depth} levels (max configured: {max_depth})")
+            logger.info(f"Max links per domain: {max_links_per_domain}")
+            logger.info(f"Crawl timeout: {crawl_timeout_minutes} minutes")
             
             # Use recursive crawling with URL index to avoid duplicates
-            allContent = await self.aiObjects.crawlRecursively(
-                urls=selectedWebsites,
-                max_depth=request.options.pages_search_depth,
-                extract_depth=request.options.extract_depth,
-                max_per_domain=10
-            )
+            import asyncio
+            try:
+                allContent = await asyncio.wait_for(
+                    self.aiObjects.crawlRecursively(
+                        urls=selectedWebsites,
+                        max_depth=effective_depth,
+                        extract_depth=request.options.extract_depth,
+                        max_per_domain=max_links_per_domain,
+                        global_processed_urls=global_processed_urls
+                    ),
+                    timeout=crawl_timeout_seconds
+                )
+                logger.info(f"Crawling completed within timeout: {len(allContent)} pages crawled")
+            except asyncio.TimeoutError:
+                logger.warning(f"Crawling timed out after {crawl_timeout_minutes} minutes, using partial results")
+                # crawlRecursively now handles timeouts gracefully and returns partial results
+                # Try to get the partial results that were collected
+                allContent = {}
 
             if not allContent:
                 logger.error("Could not extract content from any websites")
@@ -324,7 +402,7 @@ class AiService:
             
             logger.info(f"=== WEB RESEARCH COMPLETED ===")
             logger.info(f"Successfully crawled {len(allContent)} URLs total")
-            logger.info(f"Crawl depth: {request.options.pages_search_depth} levels")
+            logger.info(f"Crawl depth: {effective_depth} levels")
             
             # Create simple result with raw content
             sources = [WebSearchResultItem(title=url, url=url) for url in selectedWebsites]
@@ -346,7 +424,10 @@ class AiService:
                 additional_links=additional_links,
                 individual_content=allContent,  # Individual URL -> content mapping
                 debug_info={
-                    "crawl_depth": request.options.pages_search_depth,
+                    "crawl_depth": effective_depth,
+                    "max_configured_depth": max_depth,
+                    "max_links_per_domain": max_links_per_domain,
+                    "crawl_timeout_minutes": crawl_timeout_minutes,
                     "total_urls_crawled": len(allContent),
                     "main_urls": len(selectedWebsites),
                     "additional_urls": len(additional_links)
@@ -398,9 +479,17 @@ class AiService:
             "mergeStrategy": {
                 "groupBy": "typeGroup", 
                 "orderBy": "id",
-                "mergeType": "concatenate"
+                "mergeType": "concatenate"  # Default fallback
             },
         }
+        
+        # Override mergeStrategy if provided in options
+        if options and hasattr(options, 'mergeStrategy') and options.mergeStrategy:
+            extractionOptions["mergeStrategy"] = options.mergeStrategy
+        else:
+            # Set intelligent merge strategy for JSON and CSV based on outputFormat
+            # This is a fallback when mergeStrategy is not provided in options
+            pass  # Keep default concatenate strategy
 
         processedContents: List[str] = []
 
@@ -641,12 +730,21 @@ class AiService:
             # Merge AI results using ExtractionService
             from modules.datamodels.datamodelExtraction import MergeStrategy
             
-            mergeStrategy = MergeStrategy(
-                groupBy="typeGroup",
-                orderBy="id",
-                mergeType="concatenate",
-                chunkSeparator="\n\n---\n\n"
-            )
+            # Use mergeStrategy from options if available, otherwise default
+            if options and hasattr(options, 'mergeStrategy') and options.mergeStrategy:
+                mergeStrategy = MergeStrategy(
+                    groupBy=options.mergeStrategy.get("groupBy", "typeGroup"),
+                    orderBy=options.mergeStrategy.get("orderBy", "id"),
+                    mergeType=options.mergeStrategy.get("mergeType", "concatenate"),
+                    chunkSeparator="\n\n---\n\n"
+                )
+            else:
+                mergeStrategy = MergeStrategy(
+                    groupBy="typeGroup",
+                    orderBy="id",
+                    mergeType="concatenate",
+                    chunkSeparator="\n\n---\n\n"
+                )
             
             mergedContent = self.extractionService.mergeAiResults(
                 extractionResult,
diff --git a/modules/services/serviceGeneration/prompt_builder.py b/modules/services/serviceGeneration/prompt_builder.py
index 208c4c18..89f6bfe9 100644
--- a/modules/services/serviceGeneration/prompt_builder.py
+++ b/modules/services/serviceGeneration/prompt_builder.py
@@ -43,8 +43,9 @@ You are generating a document in {output_format.upper()} format for the title: "
 
 Rules:
 - The user's intent fully defines the structure. Do not assume a fixed template or headings.
-- Use only factual information extracted from the supplied source documents.
-- Do not invent, hallucinate, or include placeholders (e.g., "lorem ipsum", "TBD").
+- Work with whatever data is available from the source documents - partial data is better than no data.
+- If some information is missing, create the best possible document with what you have available.
+- Do not refuse to generate the document due to incomplete data - always proceed with available information.
 - The output must strictly follow the target format and be ready for saving without extra wrapping.
 - At the VERY TOP output exactly one line with the filename header:
   FILENAME: <safe-file-name-with-extension>
@@ -55,7 +56,8 @@ Rules:
 
 Common policy:
 - Use the actual data from the source documents to create the content.
-- Do not generate placeholder text or templates.
+- If data is incomplete, work with what you have and create a meaningful document.
+- Always generate the document - never refuse due to missing information.
 - Extract and use the real data provided in the source documents to create meaningful content.
 """.strip()
 
diff --git a/modules/services/serviceWorkflow/mainServiceWorkflow.py b/modules/services/serviceWorkflow/mainServiceWorkflow.py
index 180779b5..c26de8c3 100644
--- a/modules/services/serviceWorkflow/mainServiceWorkflow.py
+++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py
@@ -125,7 +125,9 @@ class WorkflowService:
                                 break
                         
                         if not message_found:
-                            logger.warning(f"Message with ID {message_id} not found in workflow. Available message IDs: {[str(msg.id) for msg in workflow.messages]}")
+                            available_ids = [str(msg.id) for msg in workflow.messages]
+                            logger.error(f"Message with ID {message_id} not found in workflow. Available message IDs: {available_ids}")
+                            raise ValueError(f"Document reference not found: docList:{message_id}:{label}")
                     elif len(parts) >= 2:
                         # Format: docList:<label> - find message by documentsLabel
                         label = parts[1]
@@ -154,7 +156,8 @@ class WorkflowService:
                             else:
                                 logger.debug(f"Found docList reference {doc_ref} but message has no documents")
                         else:
-                            logger.debug(f"No messages found with documentsLabel: {label}")
+                            logger.error(f"No messages found with documentsLabel: {label}")
+                            raise ValueError(f"Document reference not found: docList:{label}")
                 else:
                     # Direct label reference (round1_task2_action3_contextinfo)
                     # Search for messages with matching documentsLabel to find the actual documents
@@ -198,30 +201,8 @@ class WorkflowService:
                                 else:
                                     logger.debug(f"No documents found in newest message {newest_message.id}")
                             else:
-                                logger.debug(f"No messages found with documentsLabel: {doc_ref}")
-                                # Fallback: also check if any message has this documentsLabel as a prefix
-                                logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
-                                fallback_messages = []
-                                for message in workflow.messages:
-                                    msg_documents_label = getattr(message, 'documentsLabel', '')
-                                    if msg_documents_label and msg_documents_label.startswith(doc_ref):
-                                        fallback_messages.append(message)
-                                        logger.debug(f"Found fallback message {message.id} with documentsLabel: {msg_documents_label}")
-                                
-                                if fallback_messages:
-                                    # Sort by publishedAt descending (newest first)
-                                    fallback_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
-                                    newest_fallback = fallback_messages[0]
-                                    
-                                    logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
-                                    if newest_fallback.documents:
-                                        doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
-                                        logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
-                                        all_documents.extend(newest_fallback.documents)
-                                    else:
-                                        logger.debug(f"No documents found in fallback message {newest_fallback.id}")
-                                else:
-                                    logger.debug(f"No fallback messages found either")
+                                logger.error(f"No messages found with documentsLabel: {doc_ref}")
+                                raise ValueError(f"Document reference not found: {doc_ref}")
                         
             logger.debug(f"Resolved {len(all_documents)} documents from document list: {documentList}")
             return all_documents
@@ -622,6 +603,13 @@ class WorkflowService:
             if not workflow or not hasattr(workflow, 'messages'):
                 return "No documents available"
             
+            # Reload workflow from database to ensure we have all messages
+            if hasattr(workflow, 'id'):
+                try:
+                    workflow = self.getWorkflow(workflow.id)
+                except Exception as e:
+                    logger.warning(f"Could not reload workflow from database: {str(e)}")
+            
             # Get document reference list using the exact same logic as old system
             document_list = self._getDocumentReferenceList(workflow)
             
diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py
index c65ddc83..2b4cb677 100644
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@@ -169,12 +169,12 @@ class MethodAi(MethodBase):
         Parameters:
         - user_prompt (str, required): Research question or topic.
         - urls (list, optional): Specific URLs to crawl.
-        - max_results (int, optional): Max search results. Default: 10.
-        - max_pages (int, optional): Max pages to crawl per site. Default: 10.
+        - max_results (int, optional): Max search results. Default: 5.
+        - max_pages (int, optional): Max pages to crawl per site. Default: 5.
         - search_depth (str, optional): basic | advanced. Default: basic.
         - extract_depth (str, optional): basic | advanced. Default: advanced.
         - pages_search_depth (int, optional): Crawl depth level. Default: 2.
-        - country (str, optional): Country code for bias.
+        - country (str, optional): Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries).
         - time_range (str, optional): d | w | m | y.
         - topic (str, optional): general | news | academic.
         - language (str, optional): Language code (e.g., de, en, fr).
@@ -182,8 +182,8 @@ class MethodAi(MethodBase):
         try:
             user_prompt = parameters.get("user_prompt")
             urls = parameters.get("urls")
-            max_results = parameters.get("max_results", 10)
-            max_pages = parameters.get("max_pages", 10)
+            max_results = parameters.get("max_results", 5)
+            max_pages = parameters.get("max_pages", 5)
             search_depth = parameters.get("search_depth", "basic")
             extract_depth = parameters.get("extract_depth", "advanced")
             pages_search_depth = parameters.get("pages_search_depth", 2)
diff --git a/modules/workflows/methods/methodDocument.py b/modules/workflows/methods/methodDocument.py
index da67821f..89aff4b4 100644
--- a/modules/workflows/methods/methodDocument.py
+++ b/modules/workflows/methods/methodDocument.py
@@ -42,9 +42,7 @@ class MethodDocument(MethodBase):
         - operationType (str, optional): extract_content | analyze_document | summarize_content. Default: extract_content.
         - processDocumentsIndividually (bool, optional): Process each document separately. Default: True.
         - chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
-        - mergeStrategy (dict, optional): Merge strategy for chunked content.
-        - expectedDocumentFormats (list, optional): Desired output format specs.
-        - includeMetadata (bool, optional): Include file metadata. Default: True.
+        - outputMimeType (str, optional): MIME type for output file. Options: "text/plain" (default), "application/json", "text/csv", "text/html". Default: "text/plain".
         """
         try:
             documentList = parameters.get("documentList")
@@ -54,13 +52,7 @@ class MethodDocument(MethodBase):
             operationType = parameters.get("operationType", "extract_content")
             processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
             chunkAllowed = parameters.get("chunkAllowed", True)
-            mergeStrategy = parameters.get("mergeStrategy", {
-                "groupBy": "typeGroup",
-                "orderBy": "id",
-                "mergeType": "concatenate"
-            })
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
-            includeMetadata = parameters.get("includeMetadata", True)
+            outputMimeType = parameters.get("outputMimeType", "text/plain")
             
             if not documentList:
                 return ActionResult.isFailure(
@@ -87,19 +79,16 @@ class MethodDocument(MethodBase):
                     compressContext=not chunkAllowed
                 )
                 
-                # Add format instructions to prompt if expected formats are provided
+                # Add format instructions to prompt based on MIME type
                 enhanced_prompt = prompt
-                if expectedDocumentFormats:
-                    format_instructions = []
-                    for fmt in expectedDocumentFormats:
-                        extension = fmt.get("extension", ".txt")
-                        mime_type = fmt.get("mimeType", "text/plain")
-                        description = fmt.get("description", "")
-                        format_instructions.append(f"- {extension} ({mime_type}): {description}")
-                    
-                    if format_instructions:
-                        enhanced_prompt += f"\n\nPlease format the output as: {', '.join([fmt.get('extension', '.txt') for fmt in expectedDocumentFormats])}"
-                        enhanced_prompt += f"\nExpected formats:\n" + "\n".join(format_instructions)
+                mime_type_mapping = {
+                    "text/plain": (".txt", "Plain text format"),
+                    "application/json": (".json", "Structured JSON format"),
+                    "text/csv": (".csv", "Table format"),
+                    "text/html": (".html", "HTML format")
+                }
+                extension, description = mime_type_mapping.get(outputMimeType, (".txt", "Plain text format"))
+                enhanced_prompt += f"\n\nPlease format the output as {extension} ({outputMimeType}): {description}"
                 
                 # Use enhanced AI service for extraction
                 ai_response = await self.services.ai.callAi(
@@ -125,8 +114,16 @@ class MethodDocument(MethodBase):
             for i, chatDocument in enumerate(chatDocuments):
                 # Use the AI response directly - it already contains processed content
                 final_content = ai_response
-                final_mime_type = "text/plain"
-                final_extension = ".txt"
+                
+                # Determine output format based on MIME type
+                mime_type_mapping = {
+                    "text/plain": ".txt",
+                    "application/json": ".json",
+                    "text/csv": ".csv",
+                    "text/html": ".html"
+                }
+                final_extension = mime_type_mapping.get(outputMimeType, ".txt")
+                final_mime_type = outputMimeType
                 
                 # Create meaningful output fileName with workflow context
                 original_fileName = chatDocument.fileName
@@ -175,8 +172,6 @@ class MethodDocument(MethodBase):
         - operationType (str, optional): generate_report | analyze_documents. Default: generate_report.
         - processDocumentsIndividually (bool, optional): Process per document. Default: True.
         - chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
-        - mergeStrategy (dict, optional): Merging rules for multi-part generation.
-        - includeMetadata (bool, optional): Include file metadata. Default: True.
         """
         try:
             documentList = parameters.get("documentList")
@@ -188,12 +183,6 @@ class MethodDocument(MethodBase):
             operationType = parameters.get("operationType", "generate_report")
             processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
             chunkAllowed = parameters.get("chunkAllowed", True)
-            mergeStrategy = parameters.get("mergeStrategy", {
-                "groupBy": "typeGroup",
-                "orderBy": "id",
-                "mergeType": "concatenate"
-            })
-            includeMetadata = parameters.get("includeMetadata", True)
             
             if not documentList:
                 return ActionResult.isFailure(
diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py
index 55d0c372..a92014dc 100644
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@@ -154,7 +154,13 @@ class MethodOutlook(MethodBase):
         if not query or not query.strip():
             # No query specified, just get emails from folder
             if folder and folder.lower() != "all":
-                params["$filter"] = f"parentFolderId eq '{folder}'"
+                # Use folder name directly for well-known folders, or get folder ID
+                if folder.lower() in ["inbox", "drafts", "sentitems", "deleteditems"]:
+                    params["$filter"] = f"parentFolderId eq '{folder}'"
+                else:
+                    # For custom folders, we need to get the folder ID first
+                    # This will be handled by the calling method
+                    params["$filter"] = f"parentFolderId eq '{folder}'"
             # Add orderby for basic queries
             params["$orderby"] = "receivedDateTime desc"
             return params
@@ -191,11 +197,21 @@ class MethodOutlook(MethodBase):
 
             
             # Use only subject search to keep filter simple
-            params["$filter"] = f"contains(subject,'{clean_query}')"
-            
-            # Add folder filter if specified
-            if folder and folder.lower() != "all":
-                params["$filter"] = f"{params['$filter']} and parentFolderId eq '{folder}'"
+            # Handle wildcard queries specially
+            if clean_query == "*" or clean_query == "":
+                # For wildcard or empty query, don't use contains filter
+                # Just use folder filter if specified
+                if folder and folder.lower() != "all":
+                    params["$filter"] = f"parentFolderId eq '{folder}'"
+                else:
+                    # No filter needed for wildcard search across all folders
+                    pass
+            else:
+                params["$filter"] = f"contains(subject,'{clean_query}')"
+                
+                # Add folder filter if specified
+                if folder and folder.lower() != "all":
+                    params["$filter"] = f"{params['$filter']} and parentFolderId eq '{folder}'"
             
             # Add orderby for basic queries
             params["$orderby"] = "receivedDateTime desc"
@@ -300,26 +316,31 @@ class MethodOutlook(MethodBase):
         """
         GENERAL:
         - Purpose: Read emails and metadata from a mailbox folder.
-        - Input requirements: connectionReference (required); optional folder, limit, filter, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
         - Output format: JSON with emails and metadata.
 
         Parameters:
         - connectionReference (str, required): Microsoft connection label.
         - folder (str, optional): Folder to read from. Default: Inbox.
-        - limit (int, optional): Maximum items to return. Default: 10.
+        - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
         - filter (str, optional): Sender, query operators, or subject text.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
         """
         try:
             connectionReference = parameters.get("connectionReference")
             folder = parameters.get("folder", "Inbox")
             limit = parameters.get("limit", 10)
             filter = parameters.get("filter")
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
             
             if not connectionReference:
                 return ActionResult.isFailure(error="Connection reference is required")
             
+            # Validate limit parameter
+            if limit <= 0:
+                limit = 1000
+                logger.warning(f"Invalid limit value ({limit}), using default value 1000")
+            
             # Validate filter parameter if provided
             if filter:
                 # Remove any potentially dangerous characters that could break the filter
@@ -343,8 +364,16 @@ class MethodOutlook(MethodBase):
                     "Content-Type": "application/json"
                 }
                 
-                # Build the API request
-                api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
+                # Get the folder ID for the specified folder
+                folder_id = self._getFolderId(folder, connection)
+                
+                if folder_id:
+                    # Build the API request with folder ID
+                    api_url = f"{graph_url}/me/mailFolders/{folder_id}/messages"
+                else:
+                    # Fallback: use folder name directly (for well-known folders like "Inbox")
+                    api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
+                    logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
                 params = {
                     "$top": limit,
                     "$orderby": "receivedDateTime desc"
@@ -380,7 +409,11 @@ class MethodOutlook(MethodBase):
                     "count": len(emails_data.get("value", [])),
                     "folder": folder,
                     "filter": filter,
-                    "apiResponse": emails_data
+                    "apiMetadata": {
+                        "@odata.context": emails_data.get("@odata.context"),
+                        "@odata.count": emails_data.get("@odata.count"),
+                        "@odata.nextLink": emails_data.get("@odata.nextLink")
+                    }
                 }
                 
 
@@ -405,18 +438,15 @@ class MethodOutlook(MethodBase):
                 logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
                 return ActionResult.isFailure(error=f"Failed to read emails: {str(e)}")
             
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
             
 
             
@@ -454,27 +484,32 @@ class MethodOutlook(MethodBase):
         """
         GENERAL:
         - Purpose: Search emails by query and return matching items with metadata.
-        - Input requirements: connectionReference (required); query (required); optional folder, limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
         - Output format: JSON with search results and metadata.
 
         Parameters:
         - connectionReference (str, required): Microsoft connection label.
         - query (str, required): Search expression.
         - folder (str, optional): Folder scope or All. Default: All.
-        - limit (int, optional): Maximum items to return. Default: 20.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
         """
         try:
             connectionReference = parameters.get("connectionReference")
             query = parameters.get("query")
             folder = parameters.get("folder", "All")
-            limit = parameters.get("limit", 20)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            limit = parameters.get("limit", 1000)
+            outputMimeType = parameters.get("outputMimeType", "application/json")
             
             # Validate parameters
             if not connectionReference:
                 return ActionResult.isFailure(error="Connection reference is required")
             
+            # Validate limit parameter
+            if limit <= 0:
+                limit = 1000
+                logger.warning(f"Invalid limit value ({limit}), using default value 1000")
+            
             if not query or not query.strip():
                 return ActionResult.isFailure(error="Search query is required and cannot be empty")
             
@@ -488,12 +523,15 @@ class MethodOutlook(MethodBase):
             # Validate limit
             try:
                 limit = int(limit)
-                if limit <= 0 or limit > 1000:  # Microsoft Graph API has limits
-                    limit = 20
-                    logger.warning(f"Limit {limit} is out of range, using default value 20")
+                if limit <= 0:
+                    limit = 1000
+                    logger.warning(f"Invalid limit value (<=0), using default value 1000")
+                elif limit > 1000:  # Microsoft Graph API has limits
+                    limit = 1000
+                    logger.warning(f"Limit {limit} exceeds maximum (1000), using 1000")
             except (ValueError, TypeError):
-                limit = 20
-                logger.warning(f"Invalid limit value, using default value 20")
+                limit = 1000
+                logger.warning(f"Invalid limit value, using default value 1000")
             
             # Get Microsoft connection
             connection = self._getMicrosoftConnection(connectionReference)
@@ -509,9 +547,18 @@ class MethodOutlook(MethodBase):
                     "Content-Type": "application/json"
                 }
                 
+                # Get the folder ID for the specified folder if needed
+                folder_id = None
+                if folder and folder.lower() != "all":
+                    folder_id = self._getFolderId(folder, connection)
+                    if folder_id:
+                        logger.debug(f"Found folder ID for '{folder}': {folder_id}")
+                    else:
+                        logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
+                
                 # Build the search API request
                 api_url = f"{graph_url}/me/messages"
-                params = self._buildSearchParameters(query, folder, limit)
+                params = self._buildSearchParameters(query, folder_id or folder, limit)
                 
                 # Log search parameters for debugging
                 logger.debug(f"Search query: '{query}'")
@@ -605,7 +652,11 @@ class MethodOutlook(MethodBase):
                     "count": len(emails),
                     "folder": folder,
                     "limit": limit,
-                    "apiResponse": search_data,
+                    "apiMetadata": {
+                        "@odata.context": search_data.get("@odata.context"),
+                        "@odata.count": search_data.get("@odata.count"),
+                        "@odata.nextLink": search_data.get("@odata.nextLink")
+                    },
                     "searchParams": params
                 }
                 
@@ -618,18 +669,15 @@ class MethodOutlook(MethodBase):
                 logger.error(f"Error searching emails via Microsoft Graph API: {str(e)}")
                 return ActionResult.isFailure(error=f"Failed to search emails: {str(e)}")
             
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
             
 
             
@@ -664,20 +712,20 @@ class MethodOutlook(MethodBase):
         """
         GENERAL:
         - Purpose: List draft emails from a folder.
-        - Input requirements: connectionReference (required); optional folder, limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional folder, limit, outputMimeType.
         - Output format: JSON with draft items and metadata.
 
         Parameters:
         - connectionReference (str, required): Microsoft connection label.
         - folder (str, optional): Drafts folder to list. Default: Drafts.
-        - limit (int, optional): Maximum items to return. Default: 20.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
         """
         try:
             connectionReference = parameters.get("connectionReference")
             folder = parameters.get("folder", "Drafts")
-            limit = parameters.get("limit", 20)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            limit = parameters.get("limit", 1000)
+            outputMimeType = parameters.get("outputMimeType", "application/json")
             
             if not connectionReference:
                 return ActionResult.isFailure(error="Connection reference is required")
@@ -745,18 +793,15 @@ class MethodOutlook(MethodBase):
                 logger.error(f"Error listing drafts via Microsoft Graph API: {str(e)}")
                 return ActionResult.isFailure(error=f"Failed to list drafts: {str(e)}")
             
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
             
 
             
@@ -790,18 +835,18 @@ class MethodOutlook(MethodBase):
         """
         GENERAL:
         - Purpose: Find draft emails across folders.
-        - Input requirements: connectionReference (required); optional limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional limit, outputMimeType.
         - Output format: JSON with drafts and metadata.
 
         Parameters:
         - connectionReference (str, required): Microsoft connection label.
         - limit (int, optional): Maximum items to return. Default: 50.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
         """
         try:
             connectionReference = parameters.get("connectionReference")
             limit = parameters.get("limit", 50)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
             
             if not connectionReference:
                 return ActionResult.isFailure(error="Connection reference is required")
@@ -859,18 +904,15 @@ class MethodOutlook(MethodBase):
                 logger.error(f"Error finding drafts via Microsoft Graph API: {str(e)}")
                 return ActionResult.isFailure(error=f"Failed to find drafts: {str(e)}")
             
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
             
 
             
@@ -930,18 +972,18 @@ class MethodOutlook(MethodBase):
         """
         GENERAL:
         - Purpose: Check contents of the Drafts folder.
-        - Input requirements: connectionReference (required); optional limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional limit, outputMimeType.
         - Output format: JSON with drafts and metadata.
 
         Parameters:
         - connectionReference (str, required): Microsoft connection label.
         - limit (int, optional): Maximum items to return. Default: 20.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
         """
         try:
             connectionReference = parameters.get("connectionReference")
             limit = parameters.get("limit", 20)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
             
             if not connectionReference:
                 return ActionResult.isFailure(error="Connection reference is required")
@@ -1003,18 +1045,15 @@ class MethodOutlook(MethodBase):
                 logger.error(f"Error checking Drafts folder via Microsoft Graph API: {str(e)}")
                 return ActionResult.isFailure(error=f"Failed to check Drafts folder: {str(e)}")
             
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
             
 
             
diff --git a/modules/workflows/processing/modes/modeReact.py b/modules/workflows/processing/modes/modeReact.py
index 330e387d..f9bf5f50 100644
--- a/modules/workflows/processing/modes/modeReact.py
+++ b/modules/workflows/processing/modes/modeReact.py
@@ -49,13 +49,14 @@ class ReactMode(BaseMode):
         """Execute task using React mode - iterative plan-act-observe-refine loop"""
         logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
         
-        # NEW: Analyze user intent with both original prompt and task objective
-        # Get original user prompt from services (clean and reliable)
+        # NEW: Analyze intents separately for proper validation vs task completion
+        # Workflow-level intent from cleaned original user prompt
         original_prompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') else taskStep.objective
-        combined_context = f"Original request: {original_prompt}\n\nCurrent task: {taskStep.objective}"
-        
-        self.currentIntent = self.intentAnalyzer.analyzeUserIntent(combined_context, context)
-        logger.info(f"Intent analysis (original + task): {self.currentIntent}")
+        self.workflowIntent = self.intentAnalyzer.analyzeUserIntent(original_prompt, context)
+        # Task-level intent from current task objective (used only for task-scoped checks)
+        self.taskIntent = self.intentAnalyzer.analyzeUserIntent(taskStep.objective, context)
+        logger.info(f"Intent analysis — workflow: {self.workflowIntent}")
+        logger.info(f"Intent analysis — task: {self.taskIntent}")
         
         # NEW: Reset progress tracking for new task
         self.progressTracker.reset()
@@ -99,18 +100,18 @@ class ReactMode(BaseMode):
                 # Attach deterministic label for clarity
                 observation['resultLabel'] = result.resultLabel
                 
-                # NEW: Add content validation
-                if self.currentIntent and result.documents:
-                    validationResult = self.contentValidator.validateContent(result.documents, self.currentIntent)
+                # NEW: Add content validation (against original cleaned user prompt / workflow intent)
+                if getattr(self, 'workflowIntent', None) and result.documents:
+                    validationResult = self.contentValidator.validateContent(result.documents, self.workflowIntent)
                     observation['contentValidation'] = validationResult
                     logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {validationResult['qualityScore']:.2f})")
                     
                     # NEW: Learn from feedback
-                    feedback = self._collectFeedback(result, validationResult, self.currentIntent)
-                    self.learningEngine.learnFromFeedback(feedback, context, self.currentIntent)
+                    feedback = self._collectFeedback(result, validationResult, self.workflowIntent)
+                    self.learningEngine.learnFromFeedback(feedback, context, self.workflowIntent)
                     
                     # NEW: Update progress
-                    self.progressTracker.updateProgress(result, validationResult, self.currentIntent)
+                    self.progressTracker.updateProgress(result, validationResult, self.workflowIntent)
                 
                 decision = await self._refineDecide(context, observation)
                 
@@ -204,6 +205,11 @@ class ReactMode(BaseMode):
         selection = json.loads(response[jsonStart:jsonEnd])
         if 'action' not in selection or not isinstance(selection['action'], str):
             raise ValueError("Selection missing 'action' as string")
+        
+        # Validate document references - prevent AI from inventing Message IDs
+        if 'requiredInputDocuments' in selection:
+            self._validateDocumentReferences(selection['requiredInputDocuments'], context)
+        
         # Enforce spec: Stage 1 must NOT include 'parameters'
         if 'parameters' in selection:
             # Remove to avoid accidental carryover
@@ -213,6 +219,38 @@ class ReactMode(BaseMode):
                 selection['parameters'] = None
         return selection
 
+    def _validateDocumentReferences(self, document_refs: List[str], context: TaskContext) -> None:
+        """Validate that document references exist in the current workflow"""
+        if not document_refs:
+            return
+            
+        # Get available documents from the current workflow
+        try:
+            available_docs = self.services.workflow.getAvailableDocuments(context.workflow)
+            if not available_docs or available_docs == "No documents available":
+                logger.warning("No documents available for validation")
+                return
+                
+            # Extract all valid references from available documents
+            valid_refs = []
+            for line in available_docs.split('\n'):
+                if 'docList:' in line or 'docItem:' in line:
+                    # Extract reference from line like "  - docList:msg_xxx:label"
+                    ref_match = re.search(r'(docList:[^\s]+|docItem:[^\s]+)', line)
+                    if ref_match:
+                        valid_refs.append(ref_match.group(1))
+            
+            # Check if all provided references are valid
+            for ref in document_refs:
+                if ref not in valid_refs:
+                    logger.error(f"Invalid document reference: {ref}")
+                    logger.error(f"Available references: {valid_refs}")
+                    raise ValueError(f"Document reference '{ref}' not found in available documents. Use only exact references from AVAILABLE_DOCUMENTS_INDEX.")
+                    
+        except Exception as e:
+            logger.error(f"Error validating document references: {str(e)}")
+            raise ValueError(f"Failed to validate document references: {str(e)}")
+
     async def _actExecute(self, context: TaskContext, selection: Dict[str, Any], taskStep: TaskStep, 
                          workflow: ChatWorkflow, stepIndex: int) -> ActionResult:
         """Act: request minimal parameters then execute selected action"""
diff --git a/modules/workflows/processing/shared/promptGenerationActionsReact.py b/modules/workflows/processing/shared/promptGenerationActionsReact.py
index 7aa5ce7f..7d3591d3 100644
--- a/modules/workflows/processing/shared/promptGenerationActionsReact.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsReact.py
@@ -72,6 +72,9 @@ RULES:
 3. parametersContext must be short and sufficient for Stage 2
 4. Return ONLY JSON - no markdown, no explanations
 5. For requiredInputDocuments, use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
+   - DO NOT invent or modify Message IDs
+   - DO NOT create new references
+   - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX
 6. For requiredConnection, use ONLY an exact label from AVAILABLE_CONNECTIONS_INDEX
 """