From e952b4c9eef55d6e6d7231cea78d870e387df81c Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Mon, 13 Apr 2026 09:43:56 +0200
Subject: [PATCH] i18n tags tools
---
.../mainServiceNeutralization.py | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py b/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py
index 0911d0b7..7b680edc 100644
--- a/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py
+++ b/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py
@@ -90,16 +90,24 @@ class NeutralizationService:
_NEUT_INSTRUCTION = (
"Analyze the following text and identify ALL sensitive content that must be neutralized:\n"
- "1. Personal data (PII): names of persons, email addresses, phone numbers, "
- "physical addresses, ID numbers, dates of birth, financial data (IBAN, account numbers), "
- "social security numbers\n"
+ "1. Personal data (PII):\n"
+ " - Full names of persons\n"
+ " - Email addresses\n"
+ " - Phone numbers\n"
+ " - Physical addresses (street, city, postal code)\n"
+ " - ID numbers (passport, driver license, AHV/SSN)\n"
+ " - Dates of birth (e.g. '14.03.1982', '1982-03-14', 'March 14, 1982', 'born in 1982')\n"
+ " - Age when it identifies a person\n"
+ " - Financial data (IBAN, account numbers, salary, balances)\n"
+ " - Nationality, citizenship, place of origin\n"
"2. Protected business logic: proprietary algorithms, trade secrets, confidential "
"processes, internal procedures, code snippets that reveal implementation details\n"
"3. Named entities: company names, product names, project names, brand names\n\n"
"Return ONLY a JSON array (no markdown, no explanation):\n"
- '[{"text":"exact substring","type":"name|email|phone|address|id|financial|logic|company|product|location|other"}]\n\n'
+ '[{"text":"exact substring","type":"name|email|phone|address|id|dob|financial|nationality|logic|company|product|location|other"}]\n\n'
"Rules:\n"
"- Every entry's 'text' must be an exact, verbatim substring of the input.\n"
+ "- Dates of birth MUST always be captured — use type 'dob'.\n"
"- Do NOT include generic words, common language constructs or non-sensitive terms.\n"
"- If nothing is sensitive, return [].\n\n"
)