Commit c45b0361 authored by Lisa's avatar Lisa

fix: harden browser attach and detect virtual mic

parent b37dff0d
...@@ -19,6 +19,7 @@ Implements the interface expected by hermes_node_agent.py. ...@@ -19,6 +19,7 @@ Implements the interface expected by hermes_node_agent.py.
import asyncio import asyncio
import base64 import base64
import inspect import inspect
import json
import logging import logging
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
from pathlib import Path from pathlib import Path
...@@ -43,6 +44,117 @@ class BrowserController: ...@@ -43,6 +44,117 @@ class BrowserController:
self.pages: Dict[str, Page] = {} # page_id -> page self.pages: Dict[str, Page] = {} # page_id -> page
self.lock = asyncio.Lock() self.lock = asyncio.Lock()
def _ingest_attached_browser_state(self) -> None:
"""Populate internal context/page maps from an attached browser session."""
self.contexts = {}
self.pages = {}
self.default_context = None
if not self.browser:
return
all_contexts = list(getattr(self.browser, "contexts", []) or [])
if all_contexts and not self.default_context:
self.default_context = all_contexts[0]
page_index = 0
for idx, ctx in enumerate(all_contexts):
ctx_name = "default" if idx == 0 else f"attached_ctx_{idx}"
self.contexts[ctx_name] = ctx
if idx == 0:
self.default_context = ctx
for page in list(getattr(ctx, "pages", []) or []):
self.pages[f"page_{page_index}"] = page
page_index += 1
async def _fetch_cdp_target_counts(self, cdp_url: str) -> Dict[str, Any]:
"""Inspect the raw CDP target list for attach diagnostics."""
json_list_url = cdp_url.rstrip("/") + "/json/list"
try:
proc = await asyncio.create_subprocess_exec(
"curl",
"-fsS",
json_list_url,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=5)
if proc.returncode != 0:
err = (stderr or b"").decode("utf-8", errors="replace").strip()
return {"ok": False, "error": err or f"curl exit {proc.returncode}"}
targets = json.loads((stdout or b"[]").decode("utf-8", errors="replace"))
counts: Dict[str, int] = {}
for target in targets:
kind = str(target.get("type", "unknown"))
counts[kind] = counts.get(kind, 0) + 1
return {
"ok": True,
"total": len(targets),
"counts": counts,
}
except Exception as e:
return {"ok": False, "error": str(e)}
async def _connect_over_cdp_with_retries(self, browser_type: str, endpoint: str, cdp_url: str, config: Dict[str, Any]):
"""Attach to a CDP endpoint with progressive timeout backoff and diagnostics."""
base_timeout_ms = int(config.get("connect_timeout_ms", 5000))
max_retries = max(1, min(5, int(config.get("connect_retries", 5))))
timeout_step_ms = int(config.get("connect_timeout_step_ms", 5000))
browser_launcher = {
"chromium": self.playwright.chromium,
"firefox": self.playwright.firefox,
"webkit": self.playwright.webkit,
}.get(browser_type, self.playwright.chromium)
diagnostics = await self._fetch_cdp_target_counts(cdp_url)
if diagnostics.get("ok"):
logger.info(
"CDP pre-attach targets at %s: total=%s counts=%s",
cdp_url,
diagnostics.get("total"),
diagnostics.get("counts"),
)
else:
logger.warning("CDP pre-attach target inspection failed at %s: %s", cdp_url, diagnostics.get("error"))
last_error = None
for attempt in range(1, max_retries + 1):
timeout_ms = base_timeout_ms + ((attempt - 1) * timeout_step_ms)
logger.info(
"CDP attach attempt %s/%s to %s (browser=%s timeout_ms=%s)",
attempt,
max_retries,
endpoint,
browser_type,
timeout_ms,
)
try:
browser = await browser_launcher.connect_over_cdp(endpoint, timeout=timeout_ms)
logger.info(
"CDP attach succeeded on attempt %s/%s to %s",
attempt,
max_retries,
endpoint,
)
return browser, {
"attempts": attempt,
"timeout_ms": timeout_ms,
"target_diagnostics": diagnostics,
}
except Exception as e:
last_error = e
logger.warning("CDP attach attempt %s/%s failed: %s", attempt, max_retries, e)
if attempt < max_retries:
await asyncio.sleep(min(1.5 * attempt, 5.0))
raise RuntimeError(
f"connect_over_cdp failed after {max_retries} attempts "
f"(start_timeout_ms={base_timeout_ms}, step_ms={timeout_step_ms}): {last_error}"
)
async def initialize(self): async def initialize(self):
"""Initialize Playwright.""" """Initialize Playwright."""
if not HAS_PLAYWRIGHT: if not HAS_PLAYWRIGHT:
...@@ -81,6 +193,7 @@ class BrowserController: ...@@ -81,6 +193,7 @@ class BrowserController:
attach = config.get("attach", False) attach = config.get("attach", False)
cdp_url = config.get("cdp_url", "http://localhost:9222") cdp_url = config.get("cdp_url", "http://localhost:9222")
browser_type = config.get("browser_type", "chromium") browser_type = config.get("browser_type", "chromium")
attach_meta: Dict[str, Any] = {}
try: try:
if attach: if attach:
...@@ -91,17 +204,18 @@ class BrowserController: ...@@ -91,17 +204,18 @@ class BrowserController:
"webkit": f"{cdp_url.replace(':9222', ':9223') if ':9222' in cdp_url else cdp_url}" "webkit": f"{cdp_url.replace(':9222', ':9223') if ':9222' in cdp_url else cdp_url}"
} }
endpoint = endpoints.get(browser_type, cdp_url) endpoint = endpoints.get(browser_type, cdp_url)
self.browser, attach_meta = await self._connect_over_cdp_with_retries(
browser_type,
endpoint,
cdp_url,
config,
)
self._ingest_attached_browser_state()
if browser_type == "chromium": logger.info(
self.browser = await self.playwright.chromium.connect_over_cdp(endpoint) f"Attached to existing {browser_type} browser at {endpoint} "
elif browser_type == "firefox": f"(contexts={len(self.contexts)}, pages={len(self.pages)})"
self.browser = await self.playwright.firefox.connect_over_cdp(endpoint) )
elif browser_type == "webkit":
self.browser = await self.playwright.webkit.connect_over_cdp(endpoint)
else:
self.browser = await self.playwright.chromium.connect_over_cdp(endpoint)
logger.info(f"Attached to existing {browser_type} browser at {endpoint}")
else: else:
# Launch new browser # Launch new browser
browser_types = { browser_types = {
...@@ -124,7 +238,14 @@ class BrowserController: ...@@ -124,7 +238,14 @@ class BrowserController:
) )
logger.info(f"Launched {browser_type} browser (headless={headless})") logger.info(f"Launched {browser_type} browser (headless={headless})")
return {"success": True, "browser": browser_type, "headless": headless, "mode": "attach" if attach else "launch"} result = {"success": True, "browser": browser_type, "headless": headless, "mode": "attach" if attach else "launch"}
if attach:
result["contexts"] = list(self.contexts.keys())
result["page_ids"] = list(self.pages.keys())
result["attach_attempts"] = attach_meta.get("attempts")
result["attach_timeout_ms"] = attach_meta.get("timeout_ms")
result["target_diagnostics"] = attach_meta.get("target_diagnostics")
return result
except Exception as e: except Exception as e:
logger.error(f"Failed to launch/attach browser: {e}") logger.error(f"Failed to launch/attach browser: {e}")
......
This diff is collapsed.
...@@ -634,6 +634,7 @@ class PosixAudioController(AudioControllerBase): ...@@ -634,6 +634,7 @@ class PosixAudioController(AudioControllerBase):
def capability_info(self) -> Dict[str, Any]: def capability_info(self) -> Dict[str, Any]:
monitor_ready, monitor_name = self._default_monitor_source() monitor_ready, monitor_name = self._default_monitor_source()
input_ready, input_source = self._default_input_source() input_ready, input_source = self._default_input_source()
inject_ready, inject_source, inject_sink = self._virtual_mic_info()
return { return {
'platform': 'linux', 'platform': 'linux',
'backend': self.backend, 'backend': self.backend,
...@@ -641,11 +642,13 @@ class PosixAudioController(AudioControllerBase): ...@@ -641,11 +642,13 @@ class PosixAudioController(AudioControllerBase):
'can_capture_output': monitor_ready, 'can_capture_output': monitor_ready,
'can_capture_input': input_ready, 'can_capture_input': input_ready,
'can_play_audio': bool(self.ffplay or self.aplay or self.ffmpeg), 'can_play_audio': bool(self.ffplay or self.aplay or self.ffmpeg),
'can_inject_mic': False, 'can_inject_mic': inject_ready,
'capture_output_ready': monitor_ready, 'capture_output_ready': monitor_ready,
'capture_output_backend': 'pulseaudio-monitor' if monitor_ready else None, 'capture_output_backend': 'pulseaudio-monitor' if monitor_ready else None,
'default_output_monitor': monitor_name, 'default_output_monitor': monitor_name,
'default_input_source': input_source, 'default_input_source': input_source,
'virtual_mic_source': inject_source,
'virtual_mic_sink': inject_sink,
'ffmpeg': bool(self.ffmpeg), 'ffmpeg': bool(self.ffmpeg),
'ffplay': bool(self.ffplay), 'ffplay': bool(self.ffplay),
'pactl': bool(self.pactl), 'pactl': bool(self.pactl),
...@@ -690,6 +693,26 @@ class PosixAudioController(AudioControllerBase): ...@@ -690,6 +693,26 @@ class PosixAudioController(AudioControllerBase):
return True, f'{sink}.monitor' return True, f'{sink}.monitor'
return False, None return False, None
def _virtual_mic_info(self) -> tuple[bool, Optional[str], Optional[str]]:
if not self.pactl:
return False, None, None
sinks = self._run_quiet([self.pactl, 'list', 'short', 'sinks'])
sources = self._run_quiet([self.pactl, 'list', 'short', 'sources'])
sink_names = []
source_names = []
if sinks['success']:
sink_names = [line.split('\t')[1] for line in sinks.get('stdout', '').splitlines() if '\t' in line]
if sources['success']:
source_names = [line.split('\t')[1] for line in sources.get('stdout', '').splitlines() if '\t' in line]
preferred_pairs = [
('hermes_virtual_mic', 'hermes_mic_sink'),
('hermes_mic_sink.monitor', 'hermes_mic_sink'),
]
for source_name, sink_name in preferred_pairs:
if source_name in source_names and sink_name in sink_names:
return True, source_name, sink_name
return False, None, None
def _expand_output_path(self, path: Optional[str], suffix: str) -> str: def _expand_output_path(self, path: Optional[str], suffix: str) -> str:
if path: if path:
return str(Path(path).expanduser()) return str(Path(path).expanduser())
...@@ -741,6 +764,7 @@ class PosixAudioController(AudioControllerBase): ...@@ -741,6 +764,7 @@ class PosixAudioController(AudioControllerBase):
def get_audio_status(self) -> Dict[str, Any]: def get_audio_status(self) -> Dict[str, Any]:
monitor_ready, monitor_name = self._default_monitor_source() monitor_ready, monitor_name = self._default_monitor_source()
input_ready, input_source = self._default_input_source() input_ready, input_source = self._default_input_source()
inject_ready, inject_source, inject_sink = self._virtual_mic_info()
status = { status = {
'success': True, 'success': True,
'backend': self.backend, 'backend': self.backend,
...@@ -750,6 +774,9 @@ class PosixAudioController(AudioControllerBase): ...@@ -750,6 +774,9 @@ class PosixAudioController(AudioControllerBase):
'capture_output_ready': monitor_ready, 'capture_output_ready': monitor_ready,
'capture_input_ready': input_ready, 'capture_input_ready': input_ready,
'can_play_audio': bool(self.ffplay or self.aplay or self.ffmpeg), 'can_play_audio': bool(self.ffplay or self.aplay or self.ffmpeg),
'can_inject_mic': inject_ready,
'virtual_mic_source': inject_source,
'virtual_mic_sink': inject_sink,
} }
if self.pactl: if self.pactl:
info = self._run_quiet([self.pactl, 'info']) info = self._run_quiet([self.pactl, 'info'])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment