commit
4d4ad8d49b
5 changed files with 176 additions and 35312 deletions
Binary file not shown.
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -236,26 +236,55 @@ class ConnectorGoogleSpeech:
|
|||
fallback_configs = []
|
||||
|
||||
if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
|
||||
# Try LINEAR16 with detected sample rate
|
||||
fallback_configs.append({
|
||||
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
"sample_rate": sample_rate,
|
||||
"channels": channels,
|
||||
"use_sample_rate": True,
|
||||
"description": f"LINEAR16 with {sample_rate}Hz"
|
||||
})
|
||||
|
||||
# Try LINEAR16 with standard sample rates
|
||||
for std_rate in [16000, 8000, 22050, 44100]:
|
||||
if std_rate != sample_rate:
|
||||
# For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts
|
||||
if audio_format != "webm_opus":
|
||||
# Try LINEAR16 with detected sample rate for non-WEBM formats
|
||||
fallback_configs.append({
|
||||
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
"sample_rate": std_rate,
|
||||
"channels": 1,
|
||||
"sample_rate": sample_rate,
|
||||
"channels": channels,
|
||||
"use_sample_rate": True,
|
||||
"description": f"LINEAR16 with {std_rate}Hz"
|
||||
"description": f"LINEAR16 with {sample_rate}Hz"
|
||||
})
|
||||
|
||||
# For WEBM_OPUS, only try compatible sample rates or skip sample rate specification
|
||||
if audio_format == "webm_opus":
|
||||
# Try WEBM_OPUS without sample rate specification (let Google read from header)
|
||||
fallback_configs.append({
|
||||
"encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
|
||||
"sample_rate": 48000,
|
||||
"channels": 1,
|
||||
"use_sample_rate": False, # Don't specify sample rate
|
||||
"description": f"WEBM_OPUS with auto sample rate"
|
||||
})
|
||||
# Try WEBM_OPUS with explicit 48000Hz (matching header)
|
||||
fallback_configs.append({
|
||||
"encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
|
||||
"sample_rate": 48000,
|
||||
"channels": 1,
|
||||
"use_sample_rate": True,
|
||||
"description": f"WEBM_OPUS with 48000Hz"
|
||||
})
|
||||
# Try LINEAR16 with 48000Hz as last resort (may not work with WEBM data)
|
||||
fallback_configs.append({
|
||||
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
"sample_rate": 48000, # Match the WEBM header
|
||||
"channels": 1,
|
||||
"use_sample_rate": True,
|
||||
"description": f"LINEAR16 with 48000Hz (WEBM compatible)"
|
||||
})
|
||||
else:
|
||||
# For other formats, try standard sample rates
|
||||
for std_rate in [16000, 8000, 22050, 44100]:
|
||||
if std_rate != sample_rate:
|
||||
fallback_configs.append({
|
||||
"encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
"sample_rate": std_rate,
|
||||
"channels": 1,
|
||||
"use_sample_rate": True,
|
||||
"description": f"LINEAR16 with {std_rate}Hz"
|
||||
})
|
||||
|
||||
# Try with different models
|
||||
models = ["latest_long", "phone_call", "latest_short"]
|
||||
|
||||
|
|
|
|||
132
tool_durations_from_log.py
Normal file
132
tool_durations_from_log.py
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
import argparse
|
||||
import csv
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Iterable, List, Optional, Tuple
|
||||
|
||||
|
||||
TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
|
||||
def parse_line(line: str) -> Tuple[Optional[str], Optional[str], Optional[datetime]]:
|
||||
"""
|
||||
Extract (logger, function, timestamp) from a log line.
|
||||
|
||||
Expected format examples (single line):
|
||||
2025-09-18 16:35:04 - INFO - modules.chat.handling.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
|
||||
|
||||
Returns (logger, function, timestamp_dt) or (None, None, None) if not matched.
|
||||
"""
|
||||
# Timestamp is first 19 chars in given logs
|
||||
if len(line) < 19:
|
||||
return None, None, None
|
||||
|
||||
ts_str = line[:19]
|
||||
try:
|
||||
ts = datetime.strptime(ts_str, TIMESTAMP_FORMAT)
|
||||
except ValueError:
|
||||
return None, None, None
|
||||
|
||||
# Extract logger name between first two " - " segments following level
|
||||
# ^TIMESTAMP - LEVEL - LOGGER - ...
|
||||
m_logger = re.search(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} - \w+ - ([\w\.]+) - ", line)
|
||||
if not m_logger:
|
||||
return None, None, None
|
||||
logger = m_logger.group(1)
|
||||
|
||||
# Function is the very last token after the final " - "
|
||||
m_func = re.search(r" - (\w+)$", line.rstrip())
|
||||
if not m_func:
|
||||
return None, None, None
|
||||
func = m_func.group(1)
|
||||
|
||||
return logger, func, ts
|
||||
|
||||
|
||||
def iter_jobs(
|
||||
lines: Iterable[str], ignore_substrings: Optional[List[str]] = None
|
||||
) -> Iterable[Tuple[datetime, datetime, str]]:
|
||||
"""
|
||||
Yields tuples of (start_ts, end_ts, job_label) for sequential groups of same job.
|
||||
|
||||
- A job label is "{logger}.{function}" based on each parsed line.
|
||||
- Consecutive lines with the same job label are grouped together.
|
||||
- If a group has a single line, start_ts == end_ts, duration becomes 0 seconds.
|
||||
- Lines containing any of ignore_substrings are skipped.
|
||||
"""
|
||||
ignore_substrings = ignore_substrings or []
|
||||
|
||||
current_label: Optional[str] = None
|
||||
current_start: Optional[datetime] = None
|
||||
current_end: Optional[datetime] = None
|
||||
|
||||
for line in lines:
|
||||
# Optional ignores by substring within the entire line
|
||||
if any(substr in line for substr in ignore_substrings):
|
||||
continue
|
||||
|
||||
logger, func, ts = parse_line(line)
|
||||
if not logger or not func or not ts:
|
||||
continue
|
||||
|
||||
job_label = f"{logger}.{func}"
|
||||
|
||||
if current_label is None:
|
||||
current_label = job_label
|
||||
current_start = ts
|
||||
current_end = ts
|
||||
continue
|
||||
|
||||
if job_label == current_label:
|
||||
# Extend the current group
|
||||
current_end = ts
|
||||
continue
|
||||
|
||||
# Close the previous group and start a new one
|
||||
if current_label is not None and current_start is not None and current_end is not None:
|
||||
yield current_start, current_end, current_label
|
||||
|
||||
current_label = job_label
|
||||
current_start = ts
|
||||
current_end = ts
|
||||
|
||||
# Flush the last open group
|
||||
if current_label is not None and current_start is not None and current_end is not None:
|
||||
yield current_start, current_end, current_label
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Extract job durations from poweron.log into CSV")
|
||||
parser.add_argument("log_path", help="Path to poweron.log")
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
default="job_durations.csv",
|
||||
help="Output CSV path (default: job_durations.csv)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ignore",
|
||||
nargs="*",
|
||||
default=[
|
||||
# Default ignore examples; add/remove as needed
|
||||
"Starting action", # e.g., "Task 1 - Starting action 3/4"
|
||||
],
|
||||
help="List of substrings; lines containing any will be ignored",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.log_path, "r", encoding="utf-8", errors="ignore") as f_in, open(
|
||||
args.output, "w", newline="", encoding="utf-8"
|
||||
) as f_out:
|
||||
writer = csv.writer(f_out)
|
||||
writer.writerow(["start_timestamp", "duration_seconds", "job_label"])
|
||||
|
||||
for start_ts, end_ts, job_label in iter_jobs(f_in, ignore_substrings=args.ignore):
|
||||
duration_seconds = int((end_ts - start_ts).total_seconds())
|
||||
writer.writerow([start_ts.strftime(TIMESTAMP_FORMAT), duration_seconds, job_label])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
Loading…
Reference in a new issue