gateway/tool_db_export_migration.py
2026-01-19 09:18:37 +01:00

508 lines
16 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Datenbank Export-Tool für Migration.
Dieses Script exportiert alle Daten aus ALLEN PowerOn PostgreSQL-Datenbanken
in eine JSON-Datei, die als Migrationsdatensatz verwendet werden kann.
Datenbanken:
- poweron_app (User, Mandate, RBAC, Features, etc.)
- poweron_chat (Chat-Konversationen und Nachrichten)
- poweron_management (Workflows, Prompts, Connections, etc.)
- poweron_realestate (Real Estate Daten)
- poweron_trustee (Trustee Daten)
Verwendung:
python tool_db_export_migration.py [--output <pfad>] [--pretty]
Optionen:
--output, -o Pfad zur Ausgabedatei (Standard: migration_export_<timestamp>.json)
--pretty, -p JSON formatiert ausgeben (für bessere Lesbarkeit)
--exclude Komma-getrennte Liste von Tabellen, die ausgeschlossen werden sollen
--include-meta System-Metadaten (_createdAt, _modifiedAt, etc.) beibehalten
--db Nur bestimmte Datenbank(en) exportieren (komma-getrennt)
"""
import os
import sys
import json
import argparse
import logging
from datetime import datetime
from typing import Dict, List, Any, Optional
from pathlib import Path
import psycopg2
import psycopg2.extras
# Logging konfigurieren
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
# Alle PowerOn Datenbanken
ALL_DATABASES = [
"poweron_app", # Haupt-App: User, Mandate, RBAC, Features
"poweron_chat", # Chat-Konversationen
"poweron_management", # Workflows, Prompts, Connections
"poweron_realestate", # Real Estate
"poweron_trustee", # Trustee
]
def _loadEnvConfig() -> Dict[str, str]:
"""Lädt die Konfiguration direkt aus der .env Datei."""
config = {}
envPath = Path(__file__).parent / '.env'
if not envPath.exists():
logger.warning(f"Environment file not found at {envPath}")
return config
# Versuche verschiedene Encodings
encodings = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252']
for encoding in encodings:
try:
with open(envPath, 'r', encoding=encoding) as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
if '=' in line:
key, value = line.split('=', 1)
config[key.strip()] = value.strip()
# Erfolgreich geladen
return config
except UnicodeDecodeError:
continue
except Exception as e:
logger.error(f"Error loading .env file with {encoding}: {e}")
continue
logger.error(f"Could not load .env file with any encoding")
return config
# Globale Konfiguration laden
_ENV_CONFIG = _loadEnvConfig()
def _getConfigValue(key: str, default: str = None) -> str:
"""Holt einen Konfigurationswert."""
return _ENV_CONFIG.get(key, os.environ.get(key, default))
def _databaseExists(dbDatabase: str) -> bool:
"""Prüft ob eine Datenbank existiert."""
dbHost = _getConfigValue("DB_HOST", "localhost")
dbUser = _getConfigValue("DB_USER")
dbPassword = _getConfigValue("DB_PASSWORD_SECRET")
dbPort = int(_getConfigValue("DB_PORT", "5432"))
try:
# Verbinde zur postgres Datenbank um zu prüfen
conn = psycopg2.connect(
host=dbHost,
port=dbPort,
database="postgres",
user=dbUser,
password=dbPassword
)
conn.autocommit = True
with conn.cursor() as cursor:
cursor.execute(
"SELECT 1 FROM pg_database WHERE datname = %s",
(dbDatabase,)
)
exists = cursor.fetchone() is not None
conn.close()
return exists
except Exception as e:
logger.error(f"Fehler beim Prüfen der Datenbank {dbDatabase}: {e}")
return False
def _getDbConnection(dbDatabase: str):
"""Erstellt eine Verbindung zu einer spezifischen PostgreSQL-Datenbank."""
# Erst prüfen ob Datenbank existiert
if not _databaseExists(dbDatabase):
logger.warning(f"Datenbank '{dbDatabase}' existiert nicht - übersprungen")
return None
dbHost = _getConfigValue("DB_HOST", "localhost")
dbUser = _getConfigValue("DB_USER")
dbPassword = _getConfigValue("DB_PASSWORD_SECRET")
dbPort = int(_getConfigValue("DB_PORT", "5432"))
try:
conn = psycopg2.connect(
host=dbHost,
port=dbPort,
database=dbDatabase,
user=dbUser,
password=dbPassword,
cursor_factory=psycopg2.extras.RealDictCursor
)
conn.set_client_encoding('UTF8')
return conn
except Exception as e:
logger.error(f"Datenbankverbindung zu {dbDatabase} fehlgeschlagen: {e}")
raise
def _getTables(conn) -> List[str]:
"""Gibt alle Tabellennamen in der Datenbank zurück."""
with conn.cursor() as cursor:
cursor.execute("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
AND table_type = 'BASE TABLE'
ORDER BY table_name
""")
tables = [row["table_name"] for row in cursor.fetchall()]
return tables
def _getTableData(conn, tableName: str, includeMeta: bool = False) -> List[Dict[str, Any]]:
"""Liest alle Daten aus einer Tabelle."""
with conn.cursor() as cursor:
cursor.execute(f'SELECT * FROM "{tableName}"')
rows = cursor.fetchall()
records = []
for row in rows:
record = dict(row)
# Optional: System-Metadaten entfernen
if not includeMeta:
metaFields = ["_createdAt", "_modifiedAt", "_createdBy", "_modifiedBy"]
for field in metaFields:
record.pop(field, None)
# Konvertiere JSONB-Felder (sind bereits als Dict/List von psycopg2)
for key, value in record.items():
if isinstance(value, (int, float)):
record[key] = float(value) if isinstance(value, float) else int(value)
records.append(record)
return records
def _getTableRowCount(conn, tableName: str) -> int:
"""Zählt die Anzahl der Zeilen in einer Tabelle."""
with conn.cursor() as cursor:
cursor.execute(f'SELECT COUNT(*) as count FROM "{tableName}"')
result = cursor.fetchone()
return result["count"] if result else 0
def _exportSingleDatabase(
dbDatabase: str,
excludeTables: List[str],
includeMeta: bool
) -> Optional[Dict[str, Any]]:
"""Exportiert eine einzelne Datenbank."""
conn = _getDbConnection(dbDatabase)
if conn is None:
return None
try:
allTables = _getTables(conn)
# System-Tabellen ausschliessen
systemTables = ["_system"]
tablesToExport = [
t for t in allTables
if t not in systemTables and t not in excludeTables
]
dbExport = {
"tables": {},
"summary": {},
"tableCount": len(tablesToExport),
"totalRecords": 0
}
for tableName in tablesToExport:
try:
records = _getTableData(conn, tableName, includeMeta)
rowCount = len(records)
dbExport["totalRecords"] += rowCount
dbExport["tables"][tableName] = records
dbExport["summary"][tableName] = {"recordCount": rowCount}
if rowCount > 0:
logger.info(f" {tableName}: {rowCount} Datensätze")
except Exception as e:
logger.error(f" Fehler bei Tabelle {tableName}: {e}")
dbExport["tables"][tableName] = []
dbExport["summary"][tableName] = {"recordCount": 0, "error": str(e)}
return dbExport
finally:
conn.close()
def exportDatabase(
outputPath: Optional[str] = None,
prettyPrint: bool = False,
excludeTables: Optional[List[str]] = None,
includeMeta: bool = False,
onlyDatabases: Optional[List[str]] = None
) -> str:
"""
Exportiert alle Datenbanken in eine JSON-Datei.
Args:
outputPath: Pfad zur Ausgabedatei (optional)
prettyPrint: JSON formatiert ausgeben
excludeTables: Liste von Tabellen, die ausgeschlossen werden sollen
includeMeta: System-Metadaten beibehalten
onlyDatabases: Nur diese Datenbanken exportieren
Returns:
Pfad zur erstellten Exportdatei
"""
excludeTables = excludeTables or []
# Welche Datenbanken exportieren?
databasesToExport = onlyDatabases if onlyDatabases else ALL_DATABASES
# Standard-Ausgabepfad generieren (im Log-Ordner)
if not outputPath:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
logDir = _getConfigValue("APP_LOGGING_LOG_DIR")
if logDir and os.path.isabs(logDir):
outputDir = logDir
else:
outputDir = os.path.join(os.path.dirname(__file__), "local", "logs")
os.makedirs(outputDir, exist_ok=True)
outputPath = os.path.join(outputDir, f"migration_export_{timestamp}.json")
logger.info(f"Starte Export von {len(databasesToExport)} Datenbank(en)...")
logger.info(f"Datenbanken: {', '.join(databasesToExport)}")
# Export-Struktur erstellen
exportData = {
"meta": {
"exportedAt": datetime.utcnow().isoformat() + "Z",
"exportedFrom": _getConfigValue("APP_ENV_LABEL", "unknown"),
"version": "1.0",
"databaseCount": 0,
"totalTables": 0,
"totalRecords": 0,
"excludedTables": excludeTables,
"includesMeta": includeMeta
},
"databases": {}
}
# Jede Datenbank exportieren
for dbName in databasesToExport:
logger.info(f"Exportiere Datenbank: {dbName}")
dbExport = _exportSingleDatabase(dbName, excludeTables, includeMeta)
if dbExport is not None:
exportData["databases"][dbName] = dbExport
exportData["meta"]["databaseCount"] += 1
exportData["meta"]["totalTables"] += dbExport["tableCount"]
exportData["meta"]["totalRecords"] += dbExport["totalRecords"]
logger.info(f" -> {dbExport['tableCount']} Tabellen, {dbExport['totalRecords']} Datensätze")
else:
logger.info(f" -> Übersprungen (existiert nicht)")
# JSON-Datei schreiben
logger.info(f"Schreibe Exportdatei: {outputPath}")
with open(outputPath, "w", encoding="utf-8") as f:
if prettyPrint:
json.dump(exportData, f, indent=2, ensure_ascii=False, default=str)
else:
json.dump(exportData, f, ensure_ascii=False, default=str)
# Dateigrösse berechnen
fileSize = os.path.getsize(outputPath)
fileSizeStr = _formatFileSize(fileSize)
logger.info(f"Export abgeschlossen!")
logger.info(f" Datenbanken: {exportData['meta']['databaseCount']}")
logger.info(f" Tabellen: {exportData['meta']['totalTables']}")
logger.info(f" Datensätze: {exportData['meta']['totalRecords']}")
logger.info(f" Dateigrösse: {fileSizeStr}")
logger.info(f" Ausgabedatei: {outputPath}")
return outputPath
def _formatFileSize(sizeBytes: int) -> str:
"""Formatiert Dateigrösse in lesbares Format."""
for unit in ['B', 'KB', 'MB', 'GB']:
if sizeBytes < 1024:
return f"{sizeBytes:.2f} {unit}"
sizeBytes /= 1024
return f"{sizeBytes:.2f} TB"
def printDatabaseSummary():
"""Zeigt eine Zusammenfassung aller Datenbanken an."""
print("\n" + "=" * 70)
print("DATENBANK ZUSAMMENFASSUNG - ALLE POWEREON DATENBANKEN")
print("=" * 70)
print(f"Umgebung: {_getConfigValue('APP_ENV_LABEL', 'unknown')}")
print(f"Host: {_getConfigValue('DB_HOST', 'localhost')}")
print("=" * 70)
grandTotalRecords = 0
grandTotalTables = 0
for dbName in ALL_DATABASES:
print(f"\n{dbName}")
print("-" * 70)
conn = _getDbConnection(dbName)
if conn is None:
print(" (Datenbank existiert nicht)")
continue
try:
tables = _getTables(conn)
dbTotalRecords = 0
print(f" {'Tabelle':<45} {'Datensätze':>15}")
print(f" {'-' * 45} {'-' * 15}")
for tableName in tables:
if tableName.startswith("_"):
continue # System-Tabellen überspringen
count = _getTableRowCount(conn, tableName)
dbTotalRecords += count
if count > 0: # Nur nicht-leere Tabellen anzeigen
print(f" {tableName:<45} {count:>15}")
print(f" {'-' * 45} {'-' * 15}")
print(f" {'Gesamt':<45} {dbTotalRecords:>15}")
grandTotalRecords += dbTotalRecords
grandTotalTables += len([t for t in tables if not t.startswith("_")])
finally:
conn.close()
print("\n" + "=" * 70)
print(f"GESAMTÜBERSICHT")
print(f" Datenbanken: {len(ALL_DATABASES)}")
print(f" Tabellen: {grandTotalTables}")
print(f" Datensätze: {grandTotalRecords}")
print("=" * 70 + "\n")
def main():
parser = argparse.ArgumentParser(
description="Exportiert alle PowerOn Datenbank-Daten für Migration",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Datenbanken:
poweron_app - User, Mandate, RBAC, Features
poweron_chat - Chat-Konversationen
poweron_management - Workflows, Prompts, Connections
poweron_realestate - Real Estate Daten
poweron_trustee - Trustee Daten
Beispiele:
python tool_db_export_migration.py
python tool_db_export_migration.py --pretty
python tool_db_export_migration.py -o backup.json --pretty
python tool_db_export_migration.py --db poweron_app,poweron_chat
python tool_db_export_migration.py --exclude Token,AuthEvent --include-meta
python tool_db_export_migration.py --summary
"""
)
parser.add_argument(
"-o", "--output",
help="Pfad zur Ausgabedatei",
type=str,
default=None
)
parser.add_argument(
"-p", "--pretty",
help="JSON formatiert ausgeben",
action="store_true"
)
parser.add_argument(
"--exclude",
help="Komma-getrennte Liste von Tabellen zum Ausschliessen",
type=str,
default=""
)
parser.add_argument(
"--include-meta",
help="System-Metadaten (_createdAt, etc.) beibehalten",
action="store_true"
)
parser.add_argument(
"--db",
help="Nur bestimmte Datenbank(en) exportieren (komma-getrennt)",
type=str,
default=""
)
parser.add_argument(
"--summary",
help="Nur Zusammenfassung anzeigen (kein Export)",
action="store_true"
)
args = parser.parse_args()
# Nur Zusammenfassung anzeigen
if args.summary:
printDatabaseSummary()
return
# Exclude-Liste parsen
excludeTables = []
if args.exclude:
excludeTables = [t.strip() for t in args.exclude.split(",") if t.strip()]
# Datenbank-Liste parsen
onlyDatabases = None
if args.db:
onlyDatabases = [db.strip() for db in args.db.split(",") if db.strip()]
# Export durchführen
try:
outputPath = exportDatabase(
outputPath=args.output,
prettyPrint=args.pretty,
excludeTables=excludeTables,
includeMeta=args.include_meta,
onlyDatabases=onlyDatabases
)
print(f"\n Export erfolgreich: {outputPath}\n")
except Exception as e:
logger.error(f"Export fehlgeschlagen: {e}")
sys.exit(1)
if __name__ == "__main__":
main()