import argparse import csv import re from datetime import datetime from typing import Iterable, List, Optional, Tuple TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S" def parse_line(line: str) -> Tuple[Optional[str], Optional[str], Optional[datetime]]: """ Extract (logger, function, timestamp) from a log line. Expected format examples (single line): 2025-09-18 16:35:04 - INFO - modules.workflows._transfer.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask Returns (logger, function, timestamp_dt) or (None, None, None) if not matched. """ # Timestamp is first 19 chars in given logs if len(line) < 19: return None, None, None ts_str = line[:19] try: ts = datetime.strptime(ts_str, TIMESTAMP_FORMAT) except ValueError: return None, None, None # Extract logger name between first two " - " segments following level # ^TIMESTAMP - LEVEL - LOGGER - ... m_logger = re.search(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} - \w+ - ([\w\.]+) - ", line) if not m_logger: return None, None, None logger = m_logger.group(1) # Function is the very last token after the final " - " m_func = re.search(r" - (\w+)$", line.rstrip()) if not m_func: return None, None, None func = m_func.group(1) return logger, func, ts def iter_jobs( lines: Iterable[str], ignore_substrings: Optional[List[str]] = None ) -> Iterable[Tuple[datetime, datetime, str]]: """ Yields tuples of (start_ts, end_ts, job_label) for sequential groups of same job. - A job label is "{logger}.{function}" based on each parsed line. - Consecutive lines with the same job label are grouped together. - If a group has a single line, start_ts == end_ts, duration becomes 0 seconds. - Lines containing any of ignore_substrings are skipped. """ ignore_substrings = ignore_substrings or [] current_label: Optional[str] = None current_start: Optional[datetime] = None current_end: Optional[datetime] = None for line in lines: # Optional ignores by substring within the entire line if any(substr in line for substr in ignore_substrings): continue logger, func, ts = parse_line(line) if not logger or not func or not ts: continue job_label = f"{logger}.{func}" if current_label is None: current_label = job_label current_start = ts current_end = ts continue if job_label == current_label: # Extend the current group current_end = ts continue # Close the previous group and start a new one if current_label is not None and current_start is not None and current_end is not None: yield current_start, current_end, current_label current_label = job_label current_start = ts current_end = ts # Flush the last open group if current_label is not None and current_start is not None and current_end is not None: yield current_start, current_end, current_label def main() -> None: parser = argparse.ArgumentParser(description="Extract job durations from poweron.log into CSV") parser.add_argument("log_path", help="Path to poweron.log") parser.add_argument( "--output", "-o", default="job_durations.csv", help="Output CSV path (default: job_durations.csv)", ) parser.add_argument( "--ignore", nargs="*", default=[ # Default ignore examples; add/remove as needed "Starting action", # e.g., "Task 1 - Starting action 3/4" ], help="List of substrings; lines containing any will be ignored", ) args = parser.parse_args() with open(args.log_path, "r", encoding="utf-8", errors="ignore") as f_in, open( args.output, "w", newline="", encoding="utf-8" ) as f_out: writer = csv.writer(f_out) writer.writerow(["start_timestamp", "duration_seconds", "job_label"]) for start_ts, end_ts, job_label in iter_jobs(f_in, ignore_substrings=args.ignore): duration_seconds = int((end_ts - start_ts).total_seconds()) writer.writerow([start_ts.strftime(TIMESTAMP_FORMAT), duration_seconds, job_label]) if __name__ == "__main__": main()