""" Script to analyze codebase for snake_case naming violations that should be camelStyle. Excludes routes (decorated endpoint functions) and JSON field names. """ import ast import os import re from collections import defaultdict from pathlib import Path from typing import Dict, List, Tuple import csv # Patterns to exclude (external library interfaces, etc.) EXCLUDE_PATTERNS = [ r'@.*\.(get|post|put|delete|patch|options|head)', # FastAPI route decorators r'self\.(db|db_|model|orm)', # Database ORM attributes r'\.(objects|query|filter|get|all)', # ORM methods r'(request|response|response_model|status_code)', # FastAPI params r'(snake_case|kebab-case)', # String literals ] # External library attribute patterns (should not be changed) EXTERNAL_LIB_ATTRIBUTES = { 'pydantic', 'fastapi', 'sqlalchemy', 'psycopg', 'requests', 'aiohttp', 'azure', 'google', 'openai', 'anthropic', 'reportlab', 'docx', 'pptx', 'openpyxl', 'json', 'logging', 'datetime', 'typing' } def isRouteFile(filePath: str) -> bool: """Check if file is a route file""" return 'routes' in filePath or 'route' in os.path.basename(filePath).lower() def shouldExcludeName(name: str, context: str = "") -> bool: """Check if a name should be excluded from analysis""" # Skip if it's a builtin or external library attribute if name.startswith('__') and name.endswith('__'): return True # Skip if context suggests external library usage for pattern in EXCLUDE_PATTERNS: if re.search(pattern, context, re.IGNORECASE): return True return False def isSnakeCase(name: str) -> bool: """Check if a name is snake_case""" if not name or name.startswith('_'): return False # Check if contains underscore and is not all caps return '_' in name and not name.isupper() def analyzeFile(filePath: str) -> Dict[str, List[str]]: """Analyze a Python file for naming violations""" violations = { 'functions': [], 'parameters': [], 'variables': [] } try: with open(filePath, 'r', encoding='utf-8') as f: content = f.read() tree = ast.parse(content, filename=filePath) except (SyntaxError, UnicodeDecodeError): return violations # Track current context currentClass = None inRouteDecorator = False class NamingAnalyzer(ast.NodeVisitor): def __init__(self): self.violations = violations self.currentClass = None self.inRouteDecorator = False self.functionDefs = [] def visit_FunctionDef(self, node): # Check if this is a route endpoint (has FastAPI decorator) isRouteEndpoint = False for decorator in node.decorator_list: if isinstance(decorator, ast.Attribute): if decorator.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']: isRouteEndpoint = True break elif isinstance(decorator, ast.Call): if isinstance(decorator.func, ast.Attribute): if decorator.func.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']: isRouteEndpoint = True break # Skip route endpoint function names # But we still need to check their parameters and variables funcName = node.name if not isRouteEndpoint and isSnakeCase(funcName) and not shouldExcludeName(funcName): self.violations['functions'].append(f"{funcName} (line {node.lineno})") # Analyze parameters for arg in node.args.args: if arg.arg != 'self' and arg.arg != 'cls': paramName = arg.arg if isSnakeCase(paramName) and not shouldExcludeName(paramName): self.violations['parameters'].append(f"{paramName} in {funcName} (line {node.lineno})") # Analyze function body for local variables for stmt in node.body: self.visit(stmt) def visit_ClassDef(self, node): oldClass = self.currentClass self.currentClass = node.name self.generic_visit(node) self.currentClass = oldClass def visit_Assign(self, node): for target in node.targets: if isinstance(target, ast.Name): varName = target.id # Skip constants (ALL_CAPS), builtins, and private (_xxx) if varName.isupper() or varName.startswith('_'): continue # Local variables should be camelStyle if isSnakeCase(varName) and not shouldExcludeName(varName): self.violations['variables'].append(f"{varName} (line {node.lineno})") def visit_For(self, node): if isinstance(node.target, ast.Name): varName = node.target.id if isSnakeCase(varName) and not shouldExcludeName(varName): self.violations['variables'].append(f"{varName} (line {node.lineno})") self.generic_visit(node) def visit_With(self, node): if node.items: for item in node.items: if item.optional_vars: if isinstance(item.optional_vars, ast.Name): varName = item.optional_vars.id if isSnakeCase(varName) and not shouldExcludeName(varName): self.violations['variables'].append(f"{varName} (line {node.lineno})") self.generic_visit(node) analyzer = NamingAnalyzer() analyzer.visit(tree) return violations def analyzeCodebase(rootDir: str = 'gateway') -> Dict[str, Dict[str, int]]: """Analyze entire codebase""" results = defaultdict(lambda: { 'functions': 0, 'parameters': 0, 'variables': 0, 'details': { 'functions': [], 'parameters': [], 'variables': [] } }) # Handle both absolute and relative paths rootPath = Path(rootDir) if not rootPath.exists(): # Try relative to current directory rootPath = Path('.').resolve() / rootDir if not rootPath.exists(): # Try just current directory if we're already in gateway rootPath = Path('.') # Find all Python files for pyFile in rootPath.rglob('*.py'): # Skip route files for function name analysis (but analyze their internals) filePath = str(pyFile.relative_to(rootPath)) # Skip test files and special scripts if 'test' in filePath.lower() or 'tool_' in filePath or '__pycache__' in filePath: continue violations = analyzeFile(str(pyFile)) # Check if there are any violations totalViolations = len(violations['functions']) + len(violations['parameters']) + len(violations['variables']) if totalViolations > 0: moduleName = filePath.replace('\\', '/') results[moduleName]['functions'] = len(violations['functions']) results[moduleName]['parameters'] = len(violations['parameters']) results[moduleName]['variables'] = len(violations['variables']) results[moduleName]['details'] = violations return results def generateCSV(results: Dict[str, Dict[str, int]], outputFile: str = 'naming_violations.csv'): """Generate CSV report""" with open(outputFile, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(['Module', 'Function Names', 'Parameter Names', 'Variable Names', 'Total']) # Sort by total violations sortedResults = sorted( results.items(), key=lambda x: x[1]['functions'] + x[1]['parameters'] + x[1]['variables'], reverse=True ) rowsWritten = 0 for module, stats in sortedResults: total = stats['functions'] + stats['parameters'] + stats['variables'] if total > 0: writer.writerow([ module, stats['functions'], stats['parameters'], stats['variables'], total ]) rowsWritten += 1 if rowsWritten == 0: print("WARNING: No rows written to CSV despite finding violations!") print(f"CSV report generated: {outputFile}") print(f"Total modules analyzed: {len(results)}") # Print summary totalFuncs = sum(r['functions'] for r in results.values()) totalParams = sum(r['parameters'] for r in results.values()) totalVars = sum(r['variables'] for r in results.values()) print(f"\nSummary:") print(f" Function names: {totalFuncs}") print(f" Parameter names: {totalParams}") print(f" Variable names: {totalVars}") print(f" Total violations: {totalFuncs + totalParams + totalVars}") if __name__ == '__main__': print("Analyzing codebase for naming violations...") results = analyzeCodebase('gateway') # Write CSV to gateway directory outputPath = Path('gateway') / 'naming_violations_report.csv' generateCSV(results, str(outputPath))