132 lines
4.3 KiB
Python
132 lines
4.3 KiB
Python
import argparse
|
|
import csv
|
|
import re
|
|
from datetime import datetime
|
|
from typing import Iterable, List, Optional, Tuple
|
|
|
|
|
|
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
|
|
|
|
|
|
def parse_line(line: str) -> Tuple[Optional[str], Optional[str], Optional[datetime]]:
|
|
"""
|
|
Extract (logger, function, timestamp) from a log line.
|
|
|
|
Expected format examples (single line):
|
|
2025-09-18 16:35:04 - INFO - modules.workflows._transfer.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
|
|
|
|
Returns (logger, function, timestamp_dt) or (None, None, None) if not matched.
|
|
"""
|
|
# Timestamp is first 19 chars in given logs
|
|
if len(line) < 19:
|
|
return None, None, None
|
|
|
|
ts_str = line[:19]
|
|
try:
|
|
ts = datetime.strptime(ts_str, TIMESTAMP_FORMAT)
|
|
except ValueError:
|
|
return None, None, None
|
|
|
|
# Extract logger name between first two " - " segments following level
|
|
# ^TIMESTAMP - LEVEL - LOGGER - ...
|
|
m_logger = re.search(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} - \w+ - ([\w\.]+) - ", line)
|
|
if not m_logger:
|
|
return None, None, None
|
|
logger = m_logger.group(1)
|
|
|
|
# Function is the very last token after the final " - "
|
|
m_func = re.search(r" - (\w+)$", line.rstrip())
|
|
if not m_func:
|
|
return None, None, None
|
|
func = m_func.group(1)
|
|
|
|
return logger, func, ts
|
|
|
|
|
|
def iter_jobs(
|
|
lines: Iterable[str], ignore_substrings: Optional[List[str]] = None
|
|
) -> Iterable[Tuple[datetime, datetime, str]]:
|
|
"""
|
|
Yields tuples of (start_ts, end_ts, job_label) for sequential groups of same job.
|
|
|
|
- A job label is "{logger}.{function}" based on each parsed line.
|
|
- Consecutive lines with the same job label are grouped together.
|
|
- If a group has a single line, start_ts == end_ts, duration becomes 0 seconds.
|
|
- Lines containing any of ignore_substrings are skipped.
|
|
"""
|
|
ignore_substrings = ignore_substrings or []
|
|
|
|
current_label: Optional[str] = None
|
|
current_start: Optional[datetime] = None
|
|
current_end: Optional[datetime] = None
|
|
|
|
for line in lines:
|
|
# Optional ignores by substring within the entire line
|
|
if any(substr in line for substr in ignore_substrings):
|
|
continue
|
|
|
|
logger, func, ts = parse_line(line)
|
|
if not logger or not func or not ts:
|
|
continue
|
|
|
|
job_label = f"{logger}.{func}"
|
|
|
|
if current_label is None:
|
|
current_label = job_label
|
|
current_start = ts
|
|
current_end = ts
|
|
continue
|
|
|
|
if job_label == current_label:
|
|
# Extend the current group
|
|
current_end = ts
|
|
continue
|
|
|
|
# Close the previous group and start a new one
|
|
if current_label is not None and current_start is not None and current_end is not None:
|
|
yield current_start, current_end, current_label
|
|
|
|
current_label = job_label
|
|
current_start = ts
|
|
current_end = ts
|
|
|
|
# Flush the last open group
|
|
if current_label is not None and current_start is not None and current_end is not None:
|
|
yield current_start, current_end, current_label
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Extract job durations from poweron.log into CSV")
|
|
parser.add_argument("log_path", help="Path to poweron.log")
|
|
parser.add_argument(
|
|
"--output",
|
|
"-o",
|
|
default="job_durations.csv",
|
|
help="Output CSV path (default: job_durations.csv)",
|
|
)
|
|
parser.add_argument(
|
|
"--ignore",
|
|
nargs="*",
|
|
default=[
|
|
# Default ignore examples; add/remove as needed
|
|
"Starting action", # e.g., "Task 1 - Starting action 3/4"
|
|
],
|
|
help="List of substrings; lines containing any will be ignored",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
with open(args.log_path, "r", encoding="utf-8", errors="ignore") as f_in, open(
|
|
args.output, "w", newline="", encoding="utf-8"
|
|
) as f_out:
|
|
writer = csv.writer(f_out)
|
|
writer.writerow(["start_timestamp", "duration_seconds", "job_label"])
|
|
|
|
for start_ts, end_ts, job_label in iter_jobs(f_in, ignore_substrings=args.ignore):
|
|
duration_seconds = int((end_ts - start_ts).total_seconds())
|
|
writer.writerow([start_ts.strftime(TIMESTAMP_FORMAT), duration_seconds, job_label])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|