#!/usr/bin/env python3 # Copyright (c) 2025 Patrick Motsch # All rights reserved. """Unit tests for cleanEmailBody. Covers: HTML→text normalisation, quoted-reply removal, signature removal, whitespace collapse and truncation. The utility is used during Outlook bootstrap; buggy cleaning would leak quoted threads / signatures into every embedding. """ import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../..")) from modules.serviceCenter.services.serviceKnowledge.subTextClean import ( cleanEmailBody, ) def test_strips_html_tags_and_scripts(): html = ( "" "

Hello world

" "" ) cleaned = cleanEmailBody(html) assert "Hello" in cleaned assert "world" in cleaned assert "<" not in cleaned assert "alert" not in cleaned def test_strips_quoted_reply_english(): body = ( "Actual answer from me.\n\n" "On Mon, 1 Jan 2024 at 10:00, Someone wrote:\n" "> Original question?\n" "> Second line.\n" ) cleaned = cleanEmailBody(body) assert "Actual answer" in cleaned assert "Original question" not in cleaned assert "wrote:" not in cleaned def test_strips_quoted_reply_german(): body = ( "Meine Antwort.\n\n" "Am 1. Januar 2024 um 10:00 schrieb Max Muster :\n" "> Ursprüngliche Frage?\n" ) cleaned = cleanEmailBody(body) assert "Meine Antwort" in cleaned assert "Ursprüngliche Frage" not in cleaned def test_strips_signature_after_dashes(): body = ( "Kurze Nachricht.\n" "\n" "--\n" "Max Muster\n" "Vorstand, Beispiel GmbH\n" ) cleaned = cleanEmailBody(body) assert "Kurze Nachricht" in cleaned assert "Beispiel GmbH" not in cleaned def test_strips_signature_salutation_de(): body = ( "Die eigentliche Information steht hier.\n\n" "Mit freundlichen Grüßen\n" "Max Muster" ) cleaned = cleanEmailBody(body) assert "eigentliche Information" in cleaned assert "Max Muster" not in cleaned def test_truncate_to_max_chars(): body = "abc " * 5000 cleaned = cleanEmailBody(body, maxChars=200) assert len(cleaned) <= 201 # includes trailing ellipsis def test_empty_input_returns_empty_string(): assert cleanEmailBody("") == "" assert cleanEmailBody(None) == "" # type: ignore[arg-type] def test_collapses_whitespace(): body = "A lot of spaces\n\n\n\nand blank lines" cleaned = cleanEmailBody(body) assert " " not in cleaned assert "\n\n\n" not in cleaned if __name__ == "__main__": test_strips_html_tags_and_scripts() test_strips_quoted_reply_english() test_strips_quoted_reply_german() test_strips_signature_after_dashes() test_strips_signature_salutation_de() test_truncate_to_max_chars() test_empty_input_returns_empty_string() test_collapses_whitespace() print("OK — cleanEmailBody tests passed")