From e952b4c9eef55d6e6d7231cea78d870e387df81c Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 13 Apr 2026 09:43:56 +0200 Subject: [PATCH] i18n tags tools --- .../mainServiceNeutralization.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py b/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py index 0911d0b7..7b680edc 100644 --- a/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py +++ b/modules/features/neutralization/serviceNeutralization/mainServiceNeutralization.py @@ -90,16 +90,24 @@ class NeutralizationService: _NEUT_INSTRUCTION = ( "Analyze the following text and identify ALL sensitive content that must be neutralized:\n" - "1. Personal data (PII): names of persons, email addresses, phone numbers, " - "physical addresses, ID numbers, dates of birth, financial data (IBAN, account numbers), " - "social security numbers\n" + "1. Personal data (PII):\n" + " - Full names of persons\n" + " - Email addresses\n" + " - Phone numbers\n" + " - Physical addresses (street, city, postal code)\n" + " - ID numbers (passport, driver license, AHV/SSN)\n" + " - Dates of birth (e.g. '14.03.1982', '1982-03-14', 'March 14, 1982', 'born in 1982')\n" + " - Age when it identifies a person\n" + " - Financial data (IBAN, account numbers, salary, balances)\n" + " - Nationality, citizenship, place of origin\n" "2. Protected business logic: proprietary algorithms, trade secrets, confidential " "processes, internal procedures, code snippets that reveal implementation details\n" "3. Named entities: company names, product names, project names, brand names\n\n" "Return ONLY a JSON array (no markdown, no explanation):\n" - '[{"text":"exact substring","type":"name|email|phone|address|id|financial|logic|company|product|location|other"}]\n\n' + '[{"text":"exact substring","type":"name|email|phone|address|id|dob|financial|nationality|logic|company|product|location|other"}]\n\n' "Rules:\n" "- Every entry's 'text' must be an exact, verbatim substring of the input.\n" + "- Dates of birth MUST always be captured — use type 'dob'.\n" "- Do NOT include generic words, common language constructs or non-sensitive terms.\n" "- If nothing is sensitive, return [].\n\n" )