gateway/analyze_naming_violations.py

242 lines
9.4 KiB
Python

"""
Script to analyze codebase for snake_case naming violations that should be camelStyle.
Excludes routes (decorated endpoint functions) and JSON field names.
"""
import ast
import os
import re
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple
import csv
# Patterns to exclude (external library interfaces, etc.)
EXCLUDE_PATTERNS = [
r'@.*\.(get|post|put|delete|patch|options|head)', # FastAPI route decorators
r'self\.(db|db_|model|orm)', # Database ORM attributes
r'\.(objects|query|filter|get|all)', # ORM methods
r'(request|response|response_model|status_code)', # FastAPI params
r'(snake_case|kebab-case)', # String literals
]
# External library attribute patterns (should not be changed)
EXTERNAL_LIB_ATTRIBUTES = {
'pydantic', 'fastapi', 'sqlalchemy', 'psycopg', 'requests',
'aiohttp', 'azure', 'google', 'openai', 'anthropic', 'reportlab',
'docx', 'pptx', 'openpyxl', 'json', 'logging', 'datetime', 'typing'
}
def isRouteFile(filePath: str) -> bool:
"""Check if file is a route file"""
return 'routes' in filePath or 'route' in os.path.basename(filePath).lower()
def shouldExcludeName(name: str, context: str = "") -> bool:
"""Check if a name should be excluded from analysis"""
# Skip if it's a builtin or external library attribute
if name.startswith('__') and name.endswith('__'):
return True
# Skip if context suggests external library usage
for pattern in EXCLUDE_PATTERNS:
if re.search(pattern, context, re.IGNORECASE):
return True
return False
def isSnakeCase(name: str) -> bool:
"""Check if a name is snake_case"""
if not name or name.startswith('_'):
return False
# Check if contains underscore and is not all caps
return '_' in name and not name.isupper()
def analyzeFile(filePath: str) -> Dict[str, List[str]]:
"""Analyze a Python file for naming violations"""
violations = {
'functions': [],
'parameters': [],
'variables': []
}
try:
with open(filePath, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content, filename=filePath)
except (SyntaxError, UnicodeDecodeError):
return violations
# Track current context
currentClass = None
inRouteDecorator = False
class NamingAnalyzer(ast.NodeVisitor):
def __init__(self):
self.violations = violations
self.currentClass = None
self.inRouteDecorator = False
self.functionDefs = []
def visit_FunctionDef(self, node):
# Check if this is a route endpoint (has FastAPI decorator)
isRouteEndpoint = False
for decorator in node.decorator_list:
if isinstance(decorator, ast.Attribute):
if decorator.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
isRouteEndpoint = True
break
elif isinstance(decorator, ast.Call):
if isinstance(decorator.func, ast.Attribute):
if decorator.func.attr in ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']:
isRouteEndpoint = True
break
# Skip route endpoint function names
# But we still need to check their parameters and variables
funcName = node.name
if not isRouteEndpoint and isSnakeCase(funcName) and not shouldExcludeName(funcName):
self.violations['functions'].append(f"{funcName} (line {node.lineno})")
# Analyze parameters
for arg in node.args.args:
if arg.arg != 'self' and arg.arg != 'cls':
paramName = arg.arg
if isSnakeCase(paramName) and not shouldExcludeName(paramName):
self.violations['parameters'].append(f"{paramName} in {funcName} (line {node.lineno})")
# Analyze function body for local variables
for stmt in node.body:
self.visit(stmt)
def visit_ClassDef(self, node):
oldClass = self.currentClass
self.currentClass = node.name
self.generic_visit(node)
self.currentClass = oldClass
def visit_Assign(self, node):
for target in node.targets:
if isinstance(target, ast.Name):
varName = target.id
# Skip constants (ALL_CAPS), builtins, and private (_xxx)
if varName.isupper() or varName.startswith('_'):
continue
# Local variables should be camelStyle
if isSnakeCase(varName) and not shouldExcludeName(varName):
self.violations['variables'].append(f"{varName} (line {node.lineno})")
def visit_For(self, node):
if isinstance(node.target, ast.Name):
varName = node.target.id
if isSnakeCase(varName) and not shouldExcludeName(varName):
self.violations['variables'].append(f"{varName} (line {node.lineno})")
self.generic_visit(node)
def visit_With(self, node):
if node.items:
for item in node.items:
if item.optional_vars:
if isinstance(item.optional_vars, ast.Name):
varName = item.optional_vars.id
if isSnakeCase(varName) and not shouldExcludeName(varName):
self.violations['variables'].append(f"{varName} (line {node.lineno})")
self.generic_visit(node)
analyzer = NamingAnalyzer()
analyzer.visit(tree)
return violations
def analyzeCodebase(rootDir: str = 'gateway') -> Dict[str, Dict[str, int]]:
"""Analyze entire codebase"""
results = defaultdict(lambda: {
'functions': 0,
'parameters': 0,
'variables': 0,
'details': {
'functions': [],
'parameters': [],
'variables': []
}
})
# Handle both absolute and relative paths
rootPath = Path(rootDir)
if not rootPath.exists():
# Try relative to current directory
rootPath = Path('.').resolve() / rootDir
if not rootPath.exists():
# Try just current directory if we're already in gateway
rootPath = Path('.')
# Find all Python files
for pyFile in rootPath.rglob('*.py'):
# Skip route files for function name analysis (but analyze their internals)
filePath = str(pyFile.relative_to(rootPath))
# Skip test files and special scripts
if 'test' in filePath.lower() or 'tool_' in filePath or '__pycache__' in filePath:
continue
violations = analyzeFile(str(pyFile))
# Check if there are any violations
totalViolations = len(violations['functions']) + len(violations['parameters']) + len(violations['variables'])
if totalViolations > 0:
moduleName = filePath.replace('\\', '/')
results[moduleName]['functions'] = len(violations['functions'])
results[moduleName]['parameters'] = len(violations['parameters'])
results[moduleName]['variables'] = len(violations['variables'])
results[moduleName]['details'] = violations
return results
def generateCSV(results: Dict[str, Dict[str, int]], outputFile: str = 'naming_violations.csv'):
"""Generate CSV report"""
with open(outputFile, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Module', 'Function Names', 'Parameter Names', 'Variable Names', 'Total'])
# Sort by total violations
sortedResults = sorted(
results.items(),
key=lambda x: x[1]['functions'] + x[1]['parameters'] + x[1]['variables'],
reverse=True
)
rowsWritten = 0
for module, stats in sortedResults:
total = stats['functions'] + stats['parameters'] + stats['variables']
if total > 0:
writer.writerow([
module,
stats['functions'],
stats['parameters'],
stats['variables'],
total
])
rowsWritten += 1
if rowsWritten == 0:
print("WARNING: No rows written to CSV despite finding violations!")
print(f"CSV report generated: {outputFile}")
print(f"Total modules analyzed: {len(results)}")
# Print summary
totalFuncs = sum(r['functions'] for r in results.values())
totalParams = sum(r['parameters'] for r in results.values())
totalVars = sum(r['variables'] for r in results.values())
print(f"\nSummary:")
print(f" Function names: {totalFuncs}")
print(f" Parameter names: {totalParams}")
print(f" Variable names: {totalVars}")
print(f" Total violations: {totalFuncs + totalParams + totalVars}")
if __name__ == '__main__':
print("Analyzing codebase for naming violations...")
results = analyzeCodebase('gateway')
# Write CSV to gateway directory
outputPath = Path('gateway') / 'naming_violations_report.csv'
generateCSV(results, str(outputPath))