Commit c45b0361 authored by Lisa's avatar Lisa

fix: harden browser attach and detect virtual mic

parent b37dff0d
......@@ -19,6 +19,7 @@ Implements the interface expected by hermes_node_agent.py.
import asyncio
import base64
import inspect
import json
import logging
from typing import Dict, Any, Optional
from pathlib import Path
......@@ -42,7 +43,118 @@ class BrowserController:
self.default_context: Optional[BrowserContext] = None
self.pages: Dict[str, Page] = {} # page_id -> page
self.lock = asyncio.Lock()
def _ingest_attached_browser_state(self) -> None:
"""Populate internal context/page maps from an attached browser session."""
self.contexts = {}
self.pages = {}
self.default_context = None
if not self.browser:
return
all_contexts = list(getattr(self.browser, "contexts", []) or [])
if all_contexts and not self.default_context:
self.default_context = all_contexts[0]
page_index = 0
for idx, ctx in enumerate(all_contexts):
ctx_name = "default" if idx == 0 else f"attached_ctx_{idx}"
self.contexts[ctx_name] = ctx
if idx == 0:
self.default_context = ctx
for page in list(getattr(ctx, "pages", []) or []):
self.pages[f"page_{page_index}"] = page
page_index += 1
async def _fetch_cdp_target_counts(self, cdp_url: str) -> Dict[str, Any]:
"""Inspect the raw CDP target list for attach diagnostics."""
json_list_url = cdp_url.rstrip("/") + "/json/list"
try:
proc = await asyncio.create_subprocess_exec(
"curl",
"-fsS",
json_list_url,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=5)
if proc.returncode != 0:
err = (stderr or b"").decode("utf-8", errors="replace").strip()
return {"ok": False, "error": err or f"curl exit {proc.returncode}"}
targets = json.loads((stdout or b"[]").decode("utf-8", errors="replace"))
counts: Dict[str, int] = {}
for target in targets:
kind = str(target.get("type", "unknown"))
counts[kind] = counts.get(kind, 0) + 1
return {
"ok": True,
"total": len(targets),
"counts": counts,
}
except Exception as e:
return {"ok": False, "error": str(e)}
async def _connect_over_cdp_with_retries(self, browser_type: str, endpoint: str, cdp_url: str, config: Dict[str, Any]):
"""Attach to a CDP endpoint with progressive timeout backoff and diagnostics."""
base_timeout_ms = int(config.get("connect_timeout_ms", 5000))
max_retries = max(1, min(5, int(config.get("connect_retries", 5))))
timeout_step_ms = int(config.get("connect_timeout_step_ms", 5000))
browser_launcher = {
"chromium": self.playwright.chromium,
"firefox": self.playwright.firefox,
"webkit": self.playwright.webkit,
}.get(browser_type, self.playwright.chromium)
diagnostics = await self._fetch_cdp_target_counts(cdp_url)
if diagnostics.get("ok"):
logger.info(
"CDP pre-attach targets at %s: total=%s counts=%s",
cdp_url,
diagnostics.get("total"),
diagnostics.get("counts"),
)
else:
logger.warning("CDP pre-attach target inspection failed at %s: %s", cdp_url, diagnostics.get("error"))
last_error = None
for attempt in range(1, max_retries + 1):
timeout_ms = base_timeout_ms + ((attempt - 1) * timeout_step_ms)
logger.info(
"CDP attach attempt %s/%s to %s (browser=%s timeout_ms=%s)",
attempt,
max_retries,
endpoint,
browser_type,
timeout_ms,
)
try:
browser = await browser_launcher.connect_over_cdp(endpoint, timeout=timeout_ms)
logger.info(
"CDP attach succeeded on attempt %s/%s to %s",
attempt,
max_retries,
endpoint,
)
return browser, {
"attempts": attempt,
"timeout_ms": timeout_ms,
"target_diagnostics": diagnostics,
}
except Exception as e:
last_error = e
logger.warning("CDP attach attempt %s/%s failed: %s", attempt, max_retries, e)
if attempt < max_retries:
await asyncio.sleep(min(1.5 * attempt, 5.0))
raise RuntimeError(
f"connect_over_cdp failed after {max_retries} attempts "
f"(start_timeout_ms={base_timeout_ms}, step_ms={timeout_step_ms}): {last_error}"
)
async def initialize(self):
"""Initialize Playwright."""
if not HAS_PLAYWRIGHT:
......@@ -81,6 +193,7 @@ class BrowserController:
attach = config.get("attach", False)
cdp_url = config.get("cdp_url", "http://localhost:9222")
browser_type = config.get("browser_type", "chromium")
attach_meta: Dict[str, Any] = {}
try:
if attach:
......@@ -91,17 +204,18 @@ class BrowserController:
"webkit": f"{cdp_url.replace(':9222', ':9223') if ':9222' in cdp_url else cdp_url}"
}
endpoint = endpoints.get(browser_type, cdp_url)
if browser_type == "chromium":
self.browser = await self.playwright.chromium.connect_over_cdp(endpoint)
elif browser_type == "firefox":
self.browser = await self.playwright.firefox.connect_over_cdp(endpoint)
elif browser_type == "webkit":
self.browser = await self.playwright.webkit.connect_over_cdp(endpoint)
else:
self.browser = await self.playwright.chromium.connect_over_cdp(endpoint)
logger.info(f"Attached to existing {browser_type} browser at {endpoint}")
self.browser, attach_meta = await self._connect_over_cdp_with_retries(
browser_type,
endpoint,
cdp_url,
config,
)
self._ingest_attached_browser_state()
logger.info(
f"Attached to existing {browser_type} browser at {endpoint} "
f"(contexts={len(self.contexts)}, pages={len(self.pages)})"
)
else:
# Launch new browser
browser_types = {
......@@ -124,7 +238,14 @@ class BrowserController:
)
logger.info(f"Launched {browser_type} browser (headless={headless})")
return {"success": True, "browser": browser_type, "headless": headless, "mode": "attach" if attach else "launch"}
result = {"success": True, "browser": browser_type, "headless": headless, "mode": "attach" if attach else "launch"}
if attach:
result["contexts"] = list(self.contexts.keys())
result["page_ids"] = list(self.pages.keys())
result["attach_attempts"] = attach_meta.get("attempts")
result["attach_timeout_ms"] = attach_meta.get("timeout_ms")
result["target_diagnostics"] = attach_meta.get("target_diagnostics")
return result
except Exception as e:
logger.error(f"Failed to launch/attach browser: {e}")
......
This diff is collapsed.
......@@ -634,6 +634,7 @@ class PosixAudioController(AudioControllerBase):
def capability_info(self) -> Dict[str, Any]:
monitor_ready, monitor_name = self._default_monitor_source()
input_ready, input_source = self._default_input_source()
inject_ready, inject_source, inject_sink = self._virtual_mic_info()
return {
'platform': 'linux',
'backend': self.backend,
......@@ -641,11 +642,13 @@ class PosixAudioController(AudioControllerBase):
'can_capture_output': monitor_ready,
'can_capture_input': input_ready,
'can_play_audio': bool(self.ffplay or self.aplay or self.ffmpeg),
'can_inject_mic': False,
'can_inject_mic': inject_ready,
'capture_output_ready': monitor_ready,
'capture_output_backend': 'pulseaudio-monitor' if monitor_ready else None,
'default_output_monitor': monitor_name,
'default_input_source': input_source,
'virtual_mic_source': inject_source,
'virtual_mic_sink': inject_sink,
'ffmpeg': bool(self.ffmpeg),
'ffplay': bool(self.ffplay),
'pactl': bool(self.pactl),
......@@ -690,6 +693,26 @@ class PosixAudioController(AudioControllerBase):
return True, f'{sink}.monitor'
return False, None
def _virtual_mic_info(self) -> tuple[bool, Optional[str], Optional[str]]:
if not self.pactl:
return False, None, None
sinks = self._run_quiet([self.pactl, 'list', 'short', 'sinks'])
sources = self._run_quiet([self.pactl, 'list', 'short', 'sources'])
sink_names = []
source_names = []
if sinks['success']:
sink_names = [line.split('\t')[1] for line in sinks.get('stdout', '').splitlines() if '\t' in line]
if sources['success']:
source_names = [line.split('\t')[1] for line in sources.get('stdout', '').splitlines() if '\t' in line]
preferred_pairs = [
('hermes_virtual_mic', 'hermes_mic_sink'),
('hermes_mic_sink.monitor', 'hermes_mic_sink'),
]
for source_name, sink_name in preferred_pairs:
if source_name in source_names and sink_name in sink_names:
return True, source_name, sink_name
return False, None, None
def _expand_output_path(self, path: Optional[str], suffix: str) -> str:
if path:
return str(Path(path).expanduser())
......@@ -741,6 +764,7 @@ class PosixAudioController(AudioControllerBase):
def get_audio_status(self) -> Dict[str, Any]:
monitor_ready, monitor_name = self._default_monitor_source()
input_ready, input_source = self._default_input_source()
inject_ready, inject_source, inject_sink = self._virtual_mic_info()
status = {
'success': True,
'backend': self.backend,
......@@ -750,6 +774,9 @@ class PosixAudioController(AudioControllerBase):
'capture_output_ready': monitor_ready,
'capture_input_ready': input_ready,
'can_play_audio': bool(self.ffplay or self.aplay or self.ffmpeg),
'can_inject_mic': inject_ready,
'virtual_mic_source': inject_source,
'virtual_mic_sink': inject_sink,
}
if self.pactl:
info = self._run_quiet([self.pactl, 'info'])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment