gateway/tool_stats_durations_from_log.py
2025-09-22 23:34:47 +02:00

132 lines
4.3 KiB
Python

import argparse
import csv
import re
from datetime import datetime
from typing import Iterable, List, Optional, Tuple
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
def parse_line(line: str) -> Tuple[Optional[str], Optional[str], Optional[datetime]]:
"""
Extract (logger, function, timestamp) from a log line.
Expected format examples (single line):
2025-09-18 16:35:04 - INFO - modules.workflows._transfer.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
Returns (logger, function, timestamp_dt) or (None, None, None) if not matched.
"""
# Timestamp is first 19 chars in given logs
if len(line) < 19:
return None, None, None
ts_str = line[:19]
try:
ts = datetime.strptime(ts_str, TIMESTAMP_FORMAT)
except ValueError:
return None, None, None
# Extract logger name between first two " - " segments following level
# ^TIMESTAMP - LEVEL - LOGGER - ...
m_logger = re.search(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} - \w+ - ([\w\.]+) - ", line)
if not m_logger:
return None, None, None
logger = m_logger.group(1)
# Function is the very last token after the final " - "
m_func = re.search(r" - (\w+)$", line.rstrip())
if not m_func:
return None, None, None
func = m_func.group(1)
return logger, func, ts
def iter_jobs(
lines: Iterable[str], ignore_substrings: Optional[List[str]] = None
) -> Iterable[Tuple[datetime, datetime, str]]:
"""
Yields tuples of (start_ts, end_ts, job_label) for sequential groups of same job.
- A job label is "{logger}.{function}" based on each parsed line.
- Consecutive lines with the same job label are grouped together.
- If a group has a single line, start_ts == end_ts, duration becomes 0 seconds.
- Lines containing any of ignore_substrings are skipped.
"""
ignore_substrings = ignore_substrings or []
current_label: Optional[str] = None
current_start: Optional[datetime] = None
current_end: Optional[datetime] = None
for line in lines:
# Optional ignores by substring within the entire line
if any(substr in line for substr in ignore_substrings):
continue
logger, func, ts = parse_line(line)
if not logger or not func or not ts:
continue
job_label = f"{logger}.{func}"
if current_label is None:
current_label = job_label
current_start = ts
current_end = ts
continue
if job_label == current_label:
# Extend the current group
current_end = ts
continue
# Close the previous group and start a new one
if current_label is not None and current_start is not None and current_end is not None:
yield current_start, current_end, current_label
current_label = job_label
current_start = ts
current_end = ts
# Flush the last open group
if current_label is not None and current_start is not None and current_end is not None:
yield current_start, current_end, current_label
def main() -> None:
parser = argparse.ArgumentParser(description="Extract job durations from poweron.log into CSV")
parser.add_argument("log_path", help="Path to poweron.log")
parser.add_argument(
"--output",
"-o",
default="job_durations.csv",
help="Output CSV path (default: job_durations.csv)",
)
parser.add_argument(
"--ignore",
nargs="*",
default=[
# Default ignore examples; add/remove as needed
"Starting action", # e.g., "Task 1 - Starting action 3/4"
],
help="List of substrings; lines containing any will be ignored",
)
args = parser.parse_args()
with open(args.log_path, "r", encoding="utf-8", errors="ignore") as f_in, open(
args.output, "w", newline="", encoding="utf-8"
) as f_out:
writer = csv.writer(f_out)
writer.writerow(["start_timestamp", "duration_seconds", "job_label"])
for start_ts, end_ts, job_label in iter_jobs(f_in, ignore_substrings=args.ignore):
duration_seconds = int((end_ts - start_ts).total_seconds())
writer.writerow([start_ts.strftime(TIMESTAMP_FORMAT), duration_seconds, job_label])
if __name__ == "__main__":
main()