Merge pull request #50 from valueonag/int

fixed voice
2025-09-19 14:32:33 +02:00 · 2025-09-19 14:32:33 +02:00 · 4d4ad8d49b
commit 4d4ad8d49b
parent 41d2b463db 875b188238
5 changed files with 176 additions and 35312 deletions
--- a/debug_audio/audio_google_interpreter_recording.webm
+++ b/debug_audio/audio_google_interpreter_recording.webm
--- a/jira_debug/search_response_0.json
+++ b/jira_debug/search_response_0.json
--- a/jira_debug/search_response_ChkjU3RyaW5nJlJF.json
+++ b/jira_debug/search_response_ChkjU3RyaW5nJlJF.json
--- a/modules/connectors/connectorGoogleSpeech.py
+++ b/modules/connectors/connectorGoogleSpeech.py
@ -236,26 +236,55 @@ class ConnectorGoogleSpeech:
                fallback_configs = []
                
                if encoding != speech.RecognitionConfig.AudioEncoding.LINEAR16:
-                    # Try LINEAR16 with detected sample rate
-                    fallback_configs.append({
-                        "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
-                        "sample_rate": sample_rate,
-                        "channels": channels,
-                        "use_sample_rate": True,
-                        "description": f"LINEAR16 with {sample_rate}Hz"
-                    })
-                
-                # Try LINEAR16 with standard sample rates
-                for std_rate in [16000, 8000, 22050, 44100]:
-                    if std_rate != sample_rate:
+                    # For WEBM_OPUS, don't try LINEAR16 with detected sample rate as it causes conflicts
+                    if audio_format != "webm_opus":
+                        # Try LINEAR16 with detected sample rate for non-WEBM formats
                        fallback_configs.append({
                            "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
-                            "sample_rate": std_rate,
-                            "channels": 1,
+                            "sample_rate": sample_rate,
+                            "channels": channels,
                            "use_sample_rate": True,
-                            "description": f"LINEAR16 with {std_rate}Hz"
+                            "description": f"LINEAR16 with {sample_rate}Hz"
                        })
                
+                # For WEBM_OPUS, only try compatible sample rates or skip sample rate specification
+                if audio_format == "webm_opus":
+                    # Try WEBM_OPUS without sample rate specification (let Google read from header)
+                    fallback_configs.append({
+                        "encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
+                        "sample_rate": 48000,
+                        "channels": 1,
+                        "use_sample_rate": False,  # Don't specify sample rate
+                        "description": f"WEBM_OPUS with auto sample rate"
+                    })
+                    # Try WEBM_OPUS with explicit 48000Hz (matching header)
+                    fallback_configs.append({
+                        "encoding": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
+                        "sample_rate": 48000,
+                        "channels": 1,
+                        "use_sample_rate": True,
+                        "description": f"WEBM_OPUS with 48000Hz"
+                    })
+                    # Try LINEAR16 with 48000Hz as last resort (may not work with WEBM data)
+                    fallback_configs.append({
+                        "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
+                        "sample_rate": 48000,  # Match the WEBM header
+                        "channels": 1,
+                        "use_sample_rate": True,
+                        "description": f"LINEAR16 with 48000Hz (WEBM compatible)"
+                    })
+                else:
+                    # For other formats, try standard sample rates
+                    for std_rate in [16000, 8000, 22050, 44100]:
+                        if std_rate != sample_rate:
+                            fallback_configs.append({
+                                "encoding": speech.RecognitionConfig.AudioEncoding.LINEAR16,
+                                "sample_rate": std_rate,
+                                "channels": 1,
+                                "use_sample_rate": True,
+                                "description": f"LINEAR16 with {std_rate}Hz"
+                            })
+                
                # Try with different models
                models = ["latest_long", "phone_call", "latest_short"]
                
--- a/tool_durations_from_log.py
+++ b/tool_durations_from_log.py
@ -0,0 +1,132 @@
+import argparse
+import csv
+import re
+from datetime import datetime
+from typing import Iterable, List, Optional, Tuple
+
+
+TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
+
+
+def parse_line(line: str) -> Tuple[Optional[str], Optional[str], Optional[datetime]]:
+    """
+    Extract (logger, function, timestamp) from a log line.
+
+    Expected format examples (single line):
+    2025-09-18 16:35:04 - INFO - modules.chat.handling.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
+
+    Returns (logger, function, timestamp_dt) or (None, None, None) if not matched.
+    """
+    # Timestamp is first 19 chars in given logs
+    if len(line) < 19:
+        return None, None, None
+
+    ts_str = line[:19]
+    try:
+        ts = datetime.strptime(ts_str, TIMESTAMP_FORMAT)
+    except ValueError:
+        return None, None, None
+
+    # Extract logger name between first two " - " segments following level
+    # ^TIMESTAMP - LEVEL - LOGGER - ...
+    m_logger = re.search(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} - \w+ - ([\w\.]+) - ", line)
+    if not m_logger:
+        return None, None, None
+    logger = m_logger.group(1)
+
+    # Function is the very last token after the final " - "
+    m_func = re.search(r" - (\w+)$", line.rstrip())
+    if not m_func:
+        return None, None, None
+    func = m_func.group(1)
+
+    return logger, func, ts
+
+
+def iter_jobs(
+    lines: Iterable[str], ignore_substrings: Optional[List[str]] = None
+) -> Iterable[Tuple[datetime, datetime, str]]:
+    """
+    Yields tuples of (start_ts, end_ts, job_label) for sequential groups of same job.
+
+    - A job label is "{logger}.{function}" based on each parsed line.
+    - Consecutive lines with the same job label are grouped together.
+    - If a group has a single line, start_ts == end_ts, duration becomes 0 seconds.
+    - Lines containing any of ignore_substrings are skipped.
+    """
+    ignore_substrings = ignore_substrings or []
+
+    current_label: Optional[str] = None
+    current_start: Optional[datetime] = None
+    current_end: Optional[datetime] = None
+
+    for line in lines:
+        # Optional ignores by substring within the entire line
+        if any(substr in line for substr in ignore_substrings):
+            continue
+
+        logger, func, ts = parse_line(line)
+        if not logger or not func or not ts:
+            continue
+
+        job_label = f"{logger}.{func}"
+
+        if current_label is None:
+            current_label = job_label
+            current_start = ts
+            current_end = ts
+            continue
+
+        if job_label == current_label:
+            # Extend the current group
+            current_end = ts
+            continue
+
+        # Close the previous group and start a new one
+        if current_label is not None and current_start is not None and current_end is not None:
+            yield current_start, current_end, current_label
+
+        current_label = job_label
+        current_start = ts
+        current_end = ts
+
+    # Flush the last open group
+    if current_label is not None and current_start is not None and current_end is not None:
+        yield current_start, current_end, current_label
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Extract job durations from poweron.log into CSV")
+    parser.add_argument("log_path", help="Path to poweron.log")
+    parser.add_argument(
+        "--output",
+        "-o",
+        default="job_durations.csv",
+        help="Output CSV path (default: job_durations.csv)",
+    )
+    parser.add_argument(
+        "--ignore",
+        nargs="*",
+        default=[
+            # Default ignore examples; add/remove as needed
+            "Starting action",  # e.g., "Task 1 - Starting action 3/4"
+        ],
+        help="List of substrings; lines containing any will be ignored",
+    )
+    args = parser.parse_args()
+
+    with open(args.log_path, "r", encoding="utf-8", errors="ignore") as f_in, open(
+        args.output, "w", newline="", encoding="utf-8"
+    ) as f_out:
+        writer = csv.writer(f_out)
+        writer.writerow(["start_timestamp", "duration_seconds", "job_label"])
+
+        for start_ts, end_ts, job_label in iter_jobs(f_in, ignore_substrings=args.ignore):
+            duration_seconds = int((end_ts - start_ts).total_seconds())
+            writer.writerow([start_ts.strftime(TIMESTAMP_FORMAT), duration_seconds, job_label])
+
+
+if __name__ == "__main__":
+    main()
+
+