suby
/
mic-system


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185
							from __future__ import annotations

from dataclasses import asdict, dataclass
from datetime import datetime
import base64
from collections import deque
from concurrent.futures import Future, ProcessPoolExecutor
import math
import os
from pathlib import Path
import threading
import time

import numpy as np
from scipy import signal
import sounddevice as sd

from agc import AgcProcessor
from beamforming import beamform_delay_and_sum
from recorder import RecorderStatus, WavRecorder


def _gcc_phat_job(sig: np.ndarray, refsig: np.ndarray, sample_rate: int, max_tau: float) -> float:
    n = sig.size + refsig.size
    sig_fft = np.fft.rfft(sig, n=n)
    ref_fft = np.fft.rfft(refsig, n=n)
    cross = sig_fft * np.conj(ref_fft)
    denom = np.abs(cross)
    cross = cross / np.maximum(denom, 1e-10)
    cc = np.fft.irfft(cross, n=n)

    max_shift = int(min(n // 2, max_tau * sample_rate))
    if max_shift <= 0:
        return 0.0

    cc_window = np.concatenate((cc[-max_shift:], cc[: max_shift + 1]))
    shift = int(np.argmax(np.abs(cc_window)) - max_shift)
    return float(shift) / float(sample_rate)


def _estimate_speech_angle_job(
    mic1: np.ndarray,
    mic2: np.ndarray,
    sample_rate: int,
    mic_spacing: float,
    prev_angle_deg: float,
    prev_noise_floor: float,
) -> tuple[float, bool, float]:
    if mic1.size < 64 or mic2.size < 64:
        return float(prev_angle_deg), False, float(prev_noise_floor)

    high = min(3400.0, 0.45 * sample_rate)
    low = min(300.0, high * 0.5)
    if high <= low + 1.0:
        return float(prev_angle_deg), False, float(prev_noise_floor)

    try:
        sos = signal.butter(4, [low, high], btype="bandpass", fs=sample_rate, output="sos")
    except ValueError:
        return float(prev_angle_deg), False, float(prev_noise_floor)

    speech1 = signal.sosfilt(sos, mic1).astype(np.float32, copy=False)
    speech2 = signal.sosfilt(sos, mic2).astype(np.float32, copy=False)

    speech_energy = 0.5 * (np.mean(speech1 * speech1) + np.mean(speech2 * speech2))
    full_energy = 0.5 * (np.mean(mic1 * mic1) + np.mean(mic2 * mic2))
    speech_ratio = float(speech_energy / max(full_energy, 1e-12))

    noise_floor = 0.995 * float(prev_noise_floor) + 0.005 * float(speech_energy)
    speech_threshold = max(2.5e-7, noise_floor * 2.0)
    speech_detected = bool(speech_energy > speech_threshold and speech_ratio > 0.08)
    if not speech_detected:
        return float(prev_angle_deg), False, float(noise_floor)

    max_tau = mic_spacing / 343.0
    tau = _gcc_phat_job(speech1, speech2, sample_rate, max_tau=max_tau)
    sin_theta = np.clip((tau * 343.0) / max(mic_spacing, 1e-6), -1.0, 1.0)
    raw_angle = float(np.rad2deg(np.arcsin(sin_theta)))
    raw_angle = float(np.clip(raw_angle, -90.0, 90.0))

    angle = 0.88 * float(prev_angle_deg) + 0.12 * raw_angle
    return float(angle), True, float(noise_floor)


ALLOWED_SAMPLE_RATES = {16000, 22050, 24000, 32000, 48000}
ALLOWED_MODES = {"mic1", "mic2", "mono_mix", "beamforming"}
ALLOWED_SOURCES = {"mic1", "mic2", "mono_mix", "beam", "compare_all", "hifi_raw"}
ALLOWED_MONITOR_SOURCES = {"mic1", "mic2", "mono_mix", "beam"}
ALLOWED_HIFI_MICS = {"mic1", "mic2"}


@dataclass
class AudioSettings:
    mode: str = "beamforming"
    gain_db: float = 0.0
    agc: bool = True
    attack_ms: float = 6.0
    release_ms: float = 280.0
    noise_suppression: bool = True
    speech_gate: bool = False
    hum_filter: bool = True
    limiter: bool = True
    beam_clarity: bool = True
    hifi_mode: bool = False
    hifi_mic: str = "mic1"
    angle: float = 0.0
    auto_beam: bool = True
    monitor_on: bool = False
    monitor_source: str = "beam"
    sample_rate: int = 16000


class AudioEngine:
    CHANNELS = 2
    BIT_DEPTH = 32
    CHUNK_SIZE = 2048
    HARDWARE_SAMPLE_RATE = 48000
    RING_DIAMETER_M = 0.06
    RING_SLOTS = 4
    SLOT_STEP = 1  # neighboring slots (90 degrees). Use 2 for opposite mics.
    MIC_SPACING = RING_DIAMETER_M * math.sin(math.pi * SLOT_STEP / RING_SLOTS)
    STARTUP_IGNORE_SECONDS = 0.30
    SPEECH_GATE_HOLD_SECONDS = 0.85
    SPEECH_GATE_FLOOR = 0.55
    SPEECH_GATE_ATTACK_SECONDS = 0.012
    SPEECH_GATE_RELEASE_SECONDS = 0.360
    NOISE_SUPPRESS_OPEN_FLOOR = 0.72
    NOISE_SUPPRESS_CLOSED_FLOOR = 0.40
    NOISE_SUPPRESS_OPEN_STRENGTH = 0.30
    NOISE_SUPPRESS_CLOSED_STRENGTH = 0.55
    HUM_HPF_CUTOFF_HZ = 75.0
    HUM_NOTCH_HZ = 50.0
    HUM_NOTCH_Q = 22.0
    BEAM_CLARITY_BLEND = 0.22
    BEAM_PRESENCE_BOOST = 0.20

    def __init__(self, recordings_dir: Path) -> None:
        self._lock = threading.Lock()
        self._stream: sd.InputStream | None = None
        self._stream_channels = self.CHANNELS
        self._input_device_name = "unknown"
        self._settings = AudioSettings()
        self._running = False
        self._startup_deadline = 0.0
        self._auto_angle_deg = 0.0
        self._noise_floor = 1e-7
        self._vad_noise_floor = 1e-7
        self._speech_sos_cache: dict[int, np.ndarray | None] = {}
        self._hpf_sos_cache: dict[int, np.ndarray | None] = {}
        self._notch_cache: dict[int, tuple[np.ndarray, np.ndarray] | None] = {}
        self._hpf_state: dict[tuple[str, int], np.ndarray] = {}
        self._notch_state: dict[tuple[str, int], np.ndarray] = {}
        self._last_speech_detected = False
        self._angle_update_counter = 0
        self._angle_update_interval = 4
        cpu_total = max(1, int(os.cpu_count() or 1))
        self._cpu_workers = max(1, min(4, cpu_total))
        self._angle_workers = max(1, min(3, self._cpu_workers - 1))
        self._max_pending_angle_jobs = max(1, min(2, self._angle_workers))
        self._angle_executor: ProcessPoolExecutor | None = ProcessPoolExecutor(max_workers=self._angle_workers)
        self._angle_futures: deque[Future] = deque()
        self._speech_gate_hold_chunks = 0
        self._speech_gate_gain = 1.0
        self._ns_noise_power = {
            "mic1": 1e-7,
            "mic2": 1e-7,
            "mono_mix": 1e-7,
            "beam": 1e-7,
        }
        self._presence_prev = {"beam": 0.0}

        self._recordings_dir = Path(recordings_dir)
        self._recorder = WavRecorder(recordings_dir)
        self._compare_recorders: dict[str, WavRecorder] = {}
        self._compare_filenames: dict[str, str] = {}
        self._record_duration_limit_sec: float | None = None
        self._auto_stop_requested = False
        self._monitor_queue: deque[np.ndarray] = deque()
        self._monitor_queue_samples = 0

        self._agc_mic1 = AgcProcessor(self._settings.sample_rate)
        self._agc_mic2 = AgcProcessor(self._settings.sample_rate)
        self._agc_beam = AgcProcessor(self._settings.sample_rate)

        self._latest_frame: dict[str, object] = self._make_empty_frame()
        self._stt_bridge = None

    def start(self) -> None:
        with self._lock:
            if self._running:
                return
        self._open_stream()

    def stop(self) -> None:
        with self._lock:
            stream = self._stream
            self._stream = None
            self._running = False
            angle_executor = self._angle_executor
            self._angle_executor = None
            angle_futures = list(self._angle_futures)
            self._angle_futures.clear()

        if stream is not None:
            stream.stop()
            stream.close()

        self.stop_recording()
        for fut in angle_futures:
            fut.cancel()
        if angle_executor is not None:
            angle_executor.shutdown(wait=False, cancel_futures=True)

    def is_running(self) -> bool:
        with self._lock:
            return self._running

    def set_stt_bridge(self, bridge) -> None:
        self._stt_bridge = bridge

    def get_settings(self) -> dict[str, object]:
        with self._lock:
            return asdict(self._settings)

    def get_status(self) -> dict[str, object]:
        rec_status = self._current_recording_status()
        return {
            "recording": rec_status.recording,
            "mic_count": self.CHANNELS,
            "hardware_sample_rate": self.HARDWARE_SAMPLE_RATE,
            "mic_spacing_m": self.MIC_SPACING,
            "auto_beam_angle_deg": self._auto_angle_deg,
            "input_device": self._input_device_name,
            "settings": self.get_settings(),
            "recording_status": asdict(rec_status),
            "audio_running": self.is_running(),
        }

    def get_latest_packet(self) -> dict[str, object]:
        empty = np.empty(0, dtype=np.float32)
        with self._lock:
            frame = dict(self._latest_frame)
            mic1 = np.asarray(frame.get("mic1", empty), dtype=np.float32)
            mic2 = np.asarray(frame.get("mic2", empty), dtype=np.float32)
            beam = np.asarray(frame.get("beam", empty), dtype=np.float32)
            mono_mix = np.asarray(frame.get("mono_mix", empty), dtype=np.float32)
            show_mic2 = bool(frame.get("show_mic2", True))
            show_beam = bool(frame.get("show_beam", False))
            show_mono_mix = bool(frame.get("show_mono_mix", False))
            beam_angle_deg = float(frame.get("beam_angle_deg", 0.0))
            auto_beam = bool(frame.get("auto_beam", True))
            speech_detected = bool(frame.get("speech_detected", False))
            speech_gate_open = bool(frame.get("speech_gate_open", True))
            hifi_mode = bool(frame.get("hifi_mode", False))
            monitor_on = bool(frame.get("monitor_on", False))
            monitor_source = str(frame.get("monitor_source", "beam"))
            recording = bool(frame.get("recording", False))
            rec_duration = float(frame.get("rec_duration", 0.0))

            monitor_rate = int(self.HARDWARE_SAMPLE_RATE if hifi_mode else self._settings.sample_rate)
            monitor_chunk = None
            if monitor_on:
                monitor_chunk = self._pop_monitor_chunk(max_samples=max(512, int(monitor_rate * 0.08)))

        packet = {
            "type": "audio_data",
            "mic1": self._downsample_for_ui(mic1).tolist(),
            "mic2": self._downsample_for_ui(mic2).tolist() if show_mic2 else [],
            "beam": self._downsample_for_ui(beam).tolist() if show_beam else [],
            "mono_mix": self._downsample_for_ui(mono_mix).tolist() if show_mono_mix else [],
            "rms_mic1": self._rms(mic1),
            "rms_mic2": self._rms(mic2) if show_mic2 else 0.0,
            "rms_beam": self._rms(beam) if show_beam else 0.0,
            "rms_mono_mix": self._rms(mono_mix) if show_mono_mix else 0.0,
            "beam_angle_deg": beam_angle_deg,
            "auto_beam": auto_beam,
            "speech_detected": speech_detected,
            "speech_gate_open": speech_gate_open,
            "hifi_mode": hifi_mode,
            "monitor_on": monitor_on,
            "monitor_source": monitor_source,
            "monitor_sr": monitor_rate,
            "monitor_chunk_b64": "",
            "recording": recording,
            "rec_duration": rec_duration,
        }
        if monitor_chunk is not None and monitor_chunk.size > 0:
            packet["monitor_chunk_b64"] = self._encode_pcm16_base64(monitor_chunk)
        return packet

    def update_settings(self, updates: dict[str, object]) -> dict[str, object]:
        with self._lock:
            current = self._settings

            mode = str(updates.get("mode", current.mode))
            if mode not in ALLOWED_MODES:
                mode = current.mode

            sample_rate = int(updates.get("sample_rate", current.sample_rate))
            if sample_rate not in ALLOWED_SAMPLE_RATES:
                sample_rate = current.sample_rate

            gain_db = float(updates.get("gain_db", current.gain_db))
            gain_db = float(np.clip(gain_db, 0.0, 30.0))

            agc = bool(updates.get("agc", current.agc))

            attack_ms = float(updates.get("attack_ms", current.attack_ms))
            attack_ms = float(np.clip(attack_ms, 1.0, 50.0))

            release_ms = float(updates.get("release_ms", current.release_ms))
            release_ms = float(np.clip(release_ms, 50.0, 1000.0))

            noise_suppression = bool(updates.get("noise_suppression", current.noise_suppression))
            speech_gate = bool(updates.get("speech_gate", current.speech_gate))
            hum_filter = bool(updates.get("hum_filter", current.hum_filter))
            limiter = bool(updates.get("limiter", current.limiter))
            beam_clarity = bool(updates.get("beam_clarity", current.beam_clarity))
            hifi_mode = bool(updates.get("hifi_mode", current.hifi_mode))
            hifi_mic = str(updates.get("hifi_mic", current.hifi_mic))
            if hifi_mic not in ALLOWED_HIFI_MICS:
                hifi_mic = current.hifi_mic

            angle = float(updates.get("angle", current.angle))
            angle = float(np.clip(angle, -90.0, 90.0))
            auto_beam = bool(updates.get("auto_beam", current.auto_beam))
            monitor_on = bool(updates.get("monitor_on", current.monitor_on))
            monitor_source = str(updates.get("monitor_source", current.monitor_source))
            if monitor_source not in ALLOWED_MONITOR_SOURCES:
                monitor_source = current.monitor_source
            if hifi_mode:
                monitor_on = False

            self._settings = AudioSettings(
                mode=mode,
                gain_db=gain_db,
                agc=agc,
                attack_ms=attack_ms,
                release_ms=release_ms,
                noise_suppression=noise_suppression,
                speech_gate=speech_gate,
                hum_filter=hum_filter,
                limiter=limiter,
                beam_clarity=beam_clarity,
                hifi_mode=hifi_mode,
                hifi_mic=hifi_mic,
                angle=angle,
                auto_beam=auto_beam,
                monitor_on=monitor_on,
                monitor_source=monitor_source,
                sample_rate=sample_rate,
            )

            if not auto_beam:
                self._auto_angle_deg = angle
            if not monitor_on or hifi_mode:
                self._clear_monitor_queue_locked()

            self._agc_mic1.update(
                sample_rate=sample_rate,
                attack_ms=attack_ms,
                release_ms=release_ms,
            )
            self._agc_mic2.update(
                sample_rate=sample_rate,
                attack_ms=attack_ms,
                release_ms=release_ms,
            )
            self._agc_beam.update(
                sample_rate=sample_rate,
                attack_ms=attack_ms,
                release_ms=release_ms,
            )

        return self.get_settings()

    def start_recording(self, source: str, duration_sec: float | None = None) -> dict[str, object]:
        source = source.lower().strip()
        if source not in ALLOWED_SOURCES:
            raise ValueError("Invalid recording source")
        if self._current_recording_status().recording:
            raise RuntimeError("Recording is already active")

        limit = None
        if duration_sec is not None:
            try:
                duration_value = float(duration_sec)
            except (TypeError, ValueError):
                duration_value = 0.0
            if duration_value > 0.0:
                limit = float(np.clip(duration_value, 1.0, 3600.0))

        with self._lock:
            self._record_duration_limit_sec = limit
            self._auto_stop_requested = False

        settings = self.get_settings()
        sample_rate = int(settings["sample_rate"])
        hifi_mode = bool(settings.get("hifi_mode", False))
        hifi_mic = str(settings.get("hifi_mic", "mic1"))
        if hifi_mic not in ALLOWED_HIFI_MICS:
            hifi_mic = "mic1"

        if hifi_mode:
            timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
            filename = self._recorder.start(
                source="hifi_raw",
                sample_rate=self.HARDWARE_SAMPLE_RATE,
                channels=1,
                filename=f"rec_{timestamp}_hifi_{hifi_mic}_48k.wav",
            )
            with self._lock:
                self._compare_recorders = {}
                self._compare_filenames = {}
            return {
                "source": "hifi_raw",
                "filenames": [filename],
                "duration_limit_sec": limit or 0.0,
            }

        if source == "hifi_raw":
            source = hifi_mic

        if source == "compare_all":
            timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
            recorders = {
                "mic1": WavRecorder(self._recordings_dir),
                "mono_mix": WavRecorder(self._recordings_dir),
                "beam": WavRecorder(self._recordings_dir),
            }
            filenames = {
                "mic1": f"rec_{timestamp}_mic1.wav",
                "mono_mix": f"rec_{timestamp}_mono_mix.wav",
                "beam": f"rec_{timestamp}_beam.wav",
            }
            for key, recorder in recorders.items():
                recorder.start(
                    source=key,
                    sample_rate=sample_rate,
                    channels=1,
                    filename=filenames[key],
                )
            with self._lock:
                self._compare_recorders = recorders
                self._compare_filenames = filenames
            return {
                "source": source,
                "filenames": [filenames["mic1"], filenames["mono_mix"], filenames["beam"]],
                "duration_limit_sec": limit or 0.0,
            }

        channels = 1
        filename = self._recorder.start(
            source=source,
            sample_rate=sample_rate,
            channels=channels,
        )
        with self._lock:
            self._compare_recorders = {}
            self._compare_filenames = {}
        return {
            "source": source,
            "filenames": [filename],
            "duration_limit_sec": limit or 0.0,
        }

    def stop_recording(self) -> RecorderStatus:
        with self._lock:
            compare_recorders = self._compare_recorders
            compare_filenames = dict(self._compare_filenames)
            self._compare_recorders = {}
            self._compare_filenames = {}
            self._record_duration_limit_sec = None
            self._auto_stop_requested = False

        if compare_recorders:
            max_duration = 0.0
            sample_rate = self._settings.sample_rate
            for recorder in compare_recorders.values():
                status = recorder.stop()
                max_duration = max(max_duration, status.duration_sec)
                sample_rate = status.sample_rate
            return RecorderStatus(
                recording=False,
                filename=",".join(compare_filenames.values()),
                duration_sec=max_duration,
                channels=1,
                sample_rate=sample_rate,
                source="compare_all",
            )

        return self._recorder.stop()

    def get_recording_status(self) -> RecorderStatus:
        return self._current_recording_status()

    def _current_recording_status(self) -> RecorderStatus:
        with self._lock:
            compare_recorders = dict(self._compare_recorders)
            compare_filenames = dict(self._compare_filenames)

        if compare_recorders:
            statuses = [recorder.get_status() for recorder in compare_recorders.values()]
            is_recording = any(status.recording for status in statuses)
            duration = max((status.duration_sec for status in statuses), default=0.0)
            sample_rate = statuses[0].sample_rate if statuses else self._settings.sample_rate
            return RecorderStatus(
                recording=is_recording,
                filename=",".join(compare_filenames.values()),
                duration_sec=duration,
                channels=1,
                sample_rate=sample_rate,
                source="compare_all",
            )

        return self._recorder.get_status()

    def _open_stream(self) -> None:
        device_idx, device_name, input_channels = self._resolve_input_device()

        stream = sd.InputStream(
            samplerate=self.HARDWARE_SAMPLE_RATE,
            device=device_idx,
            channels=input_channels,
            dtype="int32",
            blocksize=self.CHUNK_SIZE,
            callback=self._audio_callback,
        )
        stream.start()

        with self._lock:
            self._stream = stream
            self._stream_channels = input_channels
            self._input_device_name = device_name
            self._running = True
            self._startup_deadline = time.monotonic() + self.STARTUP_IGNORE_SECONDS

    def _restart_stream(self) -> None:
        with self._lock:
            stream = self._stream
            self._stream = None
            self._running = False

        if stream is not None:
            stream.stop()
            stream.close()

        self._open_stream()

    def _audio_callback(self, indata: np.ndarray, frames: int, time_info: dict, status: sd.CallbackFlags) -> None:
        del frames, time_info
        if status:
            return

        with self._lock:
            if not self._running:
                return
            if time.monotonic() < self._startup_deadline:
                return
            settings = self._settings
            stream_channels = self._stream_channels

        pcm32 = indata.astype(np.int32, copy=False)
        pcm24 = pcm32 >> 8
        normalized = np.clip(pcm24.astype(np.float32) / 8388608.0, -1.0, 1.0)

        mic1_raw = normalized[:, 0]
        mic2_raw = normalized[:, 1] if stream_channels > 1 else mic1_raw.copy()
        processing_rate = self.HARDWARE_SAMPLE_RATE if settings.hifi_mode else settings.sample_rate

        if (not settings.hifi_mode) and settings.sample_rate != self.HARDWARE_SAMPLE_RATE:
            mic1_raw = self._resample_audio(mic1_raw, self.HARDWARE_SAMPLE_RATE, settings.sample_rate)
            mic2_raw = self._resample_audio(mic2_raw, self.HARDWARE_SAMPLE_RATE, settings.sample_rate)
            common_len = min(mic1_raw.size, mic2_raw.size)
            mic1_raw = mic1_raw[:common_len]
            mic2_raw = mic2_raw[:common_len]

        if settings.hifi_mode:
            hifi_mic = settings.hifi_mic if settings.hifi_mic in ALLOWED_HIFI_MICS else "mic1"
            hifi_signal = mic1_raw if hifi_mic == "mic1" else mic2_raw
            mic1_proc = hifi_signal
            mic2_proc = mic2_raw if hifi_mic == "mic1" else mic1_raw
            mono_mix_proc = hifi_signal
            beam_proc = hifi_signal
            angle_to_use = 0.0
            speech_active = True
            gate_open = True
        else:
            gain_linear = 10.0 ** (settings.gain_db / 20.0)
            mic1_base = np.clip(mic1_raw * gain_linear, -1.0, 1.0)
            mic2_base = np.clip(mic2_raw * gain_linear, -1.0, 1.0)
            mono_base = np.clip(0.5 * (mic1_base + mic2_base), -1.0, 1.0)
            angle_to_use = settings.angle
            speech_detected = False
            if settings.mode == "beamforming" and settings.auto_beam:
                latest_done: Future | None = None
                pending: deque[Future] = deque()
                while self._angle_futures:
                    fut = self._angle_futures.popleft()
                    if fut.done():
                        latest_done = fut
                    else:
                        pending.append(fut)
                self._angle_futures = pending

                if latest_done is not None:
                    try:
                        angle_est, speech_est, noise_floor = latest_done.result(timeout=0)
                        self._auto_angle_deg = float(angle_est)
                        self._last_speech_detected = bool(speech_est)
                        self._noise_floor = max(float(noise_floor), 1e-9)
                    except Exception:
                        pass

                self._angle_update_counter += 1
                if (
                    self._angle_update_counter >= self._angle_update_interval
                    and len(self._angle_futures) < self._max_pending_angle_jobs
                    and self._angle_executor is not None
                ):
                    self._angle_update_counter = 0
                    try:
                        fut = self._angle_executor.submit(
                            _estimate_speech_angle_job,
                            mic1_base.astype(np.float32, copy=True),
                            mic2_base.astype(np.float32, copy=True),
                            int(processing_rate),
                            float(self.MIC_SPACING),
                            float(self._auto_angle_deg),
                            float(self._noise_floor),
                        )
                        self._angle_futures.append(fut)
                    except Exception:
                        pass

                angle_to_use = self._auto_angle_deg
                speech_detected = self._last_speech_detected
            else:
                while self._angle_futures:
                    self._angle_futures.popleft().cancel()
                if not settings.auto_beam:
                    self._auto_angle_deg = settings.angle
                speech_detected = False

            speech_presence = self._estimate_speech_presence_fast(mic1_base, mic2_base)
            speech_active = bool(speech_detected or speech_presence)

            beam_proc = beamform_delay_and_sum(
                mic1_base,
                mic2_base,
                angle_deg=angle_to_use,
                sample_rate=processing_rate,
                mic_spacing=self.MIC_SPACING,
            )
            if settings.beam_clarity:
                beam_proc = self._apply_beam_clarity_blend(beam_proc, mono_base)
            gate_open, gate_gain = self._update_speech_gate(
                speech_detected=speech_active,
                sample_rate=processing_rate,
                chunk_len=max(beam_proc.size, 1),
                enabled=settings.speech_gate,
            )

            if settings.agc:
                mic1_proc = self._agc_mic1.process(mic1_base, speech_hint=gate_open)
                mic2_proc = self._agc_mic2.process(mic2_base, speech_hint=gate_open)
                beam_proc = self._agc_beam.process(beam_proc, speech_hint=gate_open)
            else:
                mic1_proc = mic1_base
                mic2_proc = mic2_base
            mono_mix_proc = np.clip(0.5 * (mic1_proc + mic2_proc), -1.0, 1.0)

            if settings.hum_filter:
                mic1_proc = self._apply_hum_filter(mic1_proc, processing_rate, "mic1")
                mic2_proc = self._apply_hum_filter(mic2_proc, processing_rate, "mic2")
                beam_proc = self._apply_hum_filter(beam_proc, processing_rate, "beam")
                mono_mix_proc = np.clip(0.5 * (mic1_proc + mic2_proc), -1.0, 1.0)
                mono_mix_proc = self._apply_hum_filter(mono_mix_proc, processing_rate, "mono_mix")

            if settings.noise_suppression:
                mic1_proc = self._apply_noise_suppression(mic1_proc, "mic1", speech_active=gate_open)
                mic2_proc = self._apply_noise_suppression(mic2_proc, "mic2", speech_active=gate_open)
                mono_mix_proc = self._apply_noise_suppression(mono_mix_proc, "mono_mix", speech_active=gate_open)
                beam_proc = self._apply_noise_suppression(beam_proc, "beam", speech_active=gate_open)

            if settings.speech_gate:
                mic1_proc = np.clip(mic1_proc * gate_gain, -1.0, 1.0).astype(np.float32, copy=False)
                mic2_proc = np.clip(mic2_proc * gate_gain, -1.0, 1.0).astype(np.float32, copy=False)
                mono_mix_proc = np.clip(mono_mix_proc * gate_gain, -1.0, 1.0).astype(np.float32, copy=False)
                beam_proc = np.clip(beam_proc * gate_gain, -1.0, 1.0).astype(np.float32, copy=False)

            if settings.limiter:
                mic1_proc = self._apply_limiter(mic1_proc)
                mic2_proc = self._apply_limiter(mic2_proc)
                mono_mix_proc = self._apply_limiter(mono_mix_proc)
                beam_proc = self._apply_limiter(beam_proc)

        rec_status = self._current_recording_status()
        if rec_status.recording:
            self._write_recording_chunk(rec_status.source, mic1_raw, mic2_raw, mic1_proc, mono_mix_proc, beam_proc)
            rec_status = self._current_recording_status()

        should_auto_stop = False
        with self._lock:
            duration_limit = self._record_duration_limit_sec
            if (
                rec_status.recording
                and duration_limit is not None
                and rec_status.duration_sec >= duration_limit
                and not self._auto_stop_requested
            ):
                self._auto_stop_requested = True
                should_auto_stop = True

        if should_auto_stop:
            threading.Thread(target=self.stop_recording, daemon=True).start()

        show_beam = (settings.mode == "beamforming") and (not settings.hifi_mode)
        show_mono_mix = (settings.mode == "mono_mix") and (not settings.hifi_mode)
        show_mic2 = not settings.hifi_mode

        if settings.monitor_on:
            monitor_map = {
                "mic1": mic1_proc,
                "mic2": mic2_proc,
                "mono_mix": mono_mix_proc,
                "beam": beam_proc,
            }
            monitor_signal = monitor_map.get(settings.monitor_source, beam_proc)
            with self._lock:
                self._push_monitor_chunk_locked(monitor_signal, processing_rate)

        frame = {
            "mic1": mic1_proc.astype(np.float32, copy=False),
            "mic2": mic2_proc.astype(np.float32, copy=False),
            "beam": beam_proc.astype(np.float32, copy=False),
            "mono_mix": mono_mix_proc.astype(np.float32, copy=False),
            "show_mic2": show_mic2,
            "show_beam": show_beam,
            "show_mono_mix": show_mono_mix,
            "beam_angle_deg": float(angle_to_use),
            "auto_beam": settings.auto_beam,
            "speech_detected": speech_active,
            "speech_gate_open": gate_open,
            "hifi_mode": settings.hifi_mode,
            "monitor_on": settings.monitor_on,
            "monitor_source": settings.monitor_source,
            "recording": rec_status.recording,
            "rec_duration": rec_status.duration_sec,
        }
        # Feed processed audio to STT bridge
        if self._stt_bridge is not None:
            if settings.mode == 'beamforming':
                stt_signal = beam_proc
            elif settings.mode == 'mono_mix':
                stt_signal = mono_mix_proc
            else:
                stt_signal = mic1_proc
            try:
                self._stt_bridge.feed_audio(stt_signal, processing_rate)
            except Exception:
                pass

        with self._lock:
            self._latest_frame = frame

    def _write_recording_chunk(
        self,
        source: str,
        mic1_raw: np.ndarray,
        mic2_raw: np.ndarray,
        mic1_proc: np.ndarray,
        mono_mix_proc: np.ndarray,
        beam_proc: np.ndarray,
    ) -> None:
        if source == "compare_all":
            with self._lock:
                mic1_rec = self._compare_recorders.get("mic1")
                mix_rec = self._compare_recorders.get("mono_mix")
                beam_rec = self._compare_recorders.get("beam")
            if mic1_rec is not None:
                mic1_rec.write(mic1_proc)
            if mix_rec is not None:
                mix_rec.write(mono_mix_proc)
            if beam_rec is not None:
                beam_rec.write(beam_proc)
            return

        if source == "mic1":
            self._recorder.write(mic1_raw)
            return
        if source == "mic2":
            self._recorder.write(mic2_raw)
            return
        if source == "mono_mix":
            self._recorder.write(mono_mix_proc)
            return
        if source == "beam":
            self._recorder.write(beam_proc)
            return
        if source == "hifi_raw":
            with self._lock:
                hifi_mic = self._settings.hifi_mic
            if hifi_mic == "mic2":
                self._recorder.write(mic2_raw)
            else:
                self._recorder.write(mic1_raw)
            return

    def _apply_beam_clarity_blend(self, beam: np.ndarray, mono: np.ndarray) -> np.ndarray:
        if beam.size == 0 or mono.size == 0:
            return beam.astype(np.float32, copy=False)
        n = min(beam.size, mono.size)
        if n <= 0:
            return beam.astype(np.float32, copy=False)
        blend = float(np.clip(self.BEAM_CLARITY_BLEND, 0.0, 0.5))
        out = ((1.0 - blend) * beam[:n] + blend * mono[:n]).astype(np.float32, copy=False)

        prev = float(self._presence_prev.get("beam", 0.0))
        hp = np.empty_like(out)
        hp[0] = out[0] - prev
        if out.size > 1:
            hp[1:] = out[1:] - out[:-1]
        self._presence_prev["beam"] = float(out[-1])

        out = out + float(self.BEAM_PRESENCE_BOOST) * hp
        return np.clip(out, -1.0, 1.0).astype(np.float32, copy=False)

    def _update_speech_gate(
        self,
        *,
        speech_detected: bool,
        sample_rate: int,
        chunk_len: int,
        enabled: bool,
    ) -> tuple[bool, float]:
        if not enabled:
            self._speech_gate_hold_chunks = 0
            self._speech_gate_gain = 1.0
            return True, 1.0

        chunk_seconds = max(float(chunk_len) / float(sample_rate), 1e-6)
        hold_chunks = max(1, int(round(self.SPEECH_GATE_HOLD_SECONDS / chunk_seconds)))
        if speech_detected:
            self._speech_gate_hold_chunks = hold_chunks
        elif self._speech_gate_hold_chunks > 0:
            self._speech_gate_hold_chunks -= 1

        gate_open = bool(speech_detected or self._speech_gate_hold_chunks > 0)
        target_gain = 1.0 if gate_open else self.SPEECH_GATE_FLOOR
        tau = self.SPEECH_GATE_ATTACK_SECONDS if target_gain > self._speech_gate_gain else self.SPEECH_GATE_RELEASE_SECONDS
        coeff = np.exp(-chunk_seconds / max(tau, 1e-4))
        self._speech_gate_gain = float(coeff * self._speech_gate_gain + (1.0 - coeff) * target_gain)
        return gate_open, self._speech_gate_gain

    def _apply_noise_suppression(self, audio: np.ndarray, key: str, *, speech_active: bool) -> np.ndarray:
        if audio.size == 0:
            return audio.astype(np.float32, copy=False)

        power = float(np.mean(audio * audio, dtype=np.float64))
        prev_noise = float(self._ns_noise_power.get(key, 1e-7))
        alpha = 0.01 if speech_active else 0.07
        noise = (1.0 - alpha) * prev_noise + alpha * power
        noise = max(noise, 1e-9)
        self._ns_noise_power[key] = noise

        ratio = noise / max(power, 1e-9)
        if speech_active:
            strength = self.NOISE_SUPPRESS_OPEN_STRENGTH
            floor = self.NOISE_SUPPRESS_OPEN_FLOOR
        else:
            strength = self.NOISE_SUPPRESS_CLOSED_STRENGTH
            floor = self.NOISE_SUPPRESS_CLOSED_FLOOR

        gain = float(np.clip(1.0 - strength * ratio, floor, 1.0))
        out = audio.astype(np.float32, copy=False) * gain
        return np.clip(out, -1.0, 1.0).astype(np.float32, copy=False)

    def _apply_hum_filter(self, audio: np.ndarray, sample_rate: int, channel_key: str) -> np.ndarray:
        if audio.size == 0:
            return audio.astype(np.float32, copy=False)

        out = audio.astype(np.float32, copy=False)
        state_key = (channel_key, int(sample_rate))

        sos = self._get_hpf_sos(sample_rate)
        if sos is not None:
            zi = self._hpf_state.get(state_key)
            if zi is None or zi.shape != (sos.shape[0], 2):
                zi = np.zeros((sos.shape[0], 2), dtype=np.float32)
            out, zi_new = signal.sosfilt(sos, out, zi=zi)
            self._hpf_state[state_key] = zi_new.astype(np.float32, copy=False)
            out = out.astype(np.float32, copy=False)

        notch = self._get_notch_coeff(sample_rate)
        if notch is not None:
            b, a = notch
            zi = self._notch_state.get(state_key)
            expected = max(len(a), len(b)) - 1
            if zi is None or zi.size != expected:
                zi = np.zeros(expected, dtype=np.float32)
            out, zi_new = signal.lfilter(b, a, out, zi=zi)
            self._notch_state[state_key] = zi_new.astype(np.float32, copy=False)
            out = out.astype(np.float32, copy=False)

        return out

    def _get_hpf_sos(self, sample_rate: int) -> np.ndarray | None:
        cached = self._hpf_sos_cache.get(sample_rate, "__missing__")
        if isinstance(cached, np.ndarray):
            return cached
        if cached is None:
            return None
        cutoff = min(self.HUM_HPF_CUTOFF_HZ, 0.45 * sample_rate)
        if cutoff < 20.0:
            self._hpf_sos_cache[sample_rate] = None
            return None
        try:
            sos = signal.butter(2, cutoff, btype="highpass", fs=sample_rate, output="sos")
        except ValueError:
            self._hpf_sos_cache[sample_rate] = None
            return None
        self._hpf_sos_cache[sample_rate] = sos
        return sos

    def _get_notch_coeff(self, sample_rate: int) -> tuple[np.ndarray, np.ndarray] | None:
        cached = self._notch_cache.get(sample_rate, "__missing__")
        if isinstance(cached, tuple):
            return cached
        if cached is None:
            return None
        nyquist = 0.5 * sample_rate
        if nyquist <= self.HUM_NOTCH_HZ * 1.2:
            self._notch_cache[sample_rate] = None
            return None
        w0 = self.HUM_NOTCH_HZ / nyquist
        try:
            b, a = signal.iirnotch(w0, self.HUM_NOTCH_Q)
        except ValueError:
            self._notch_cache[sample_rate] = None
            return None
        coeff = (b.astype(np.float32), a.astype(np.float32))
        self._notch_cache[sample_rate] = coeff
        return coeff

    @staticmethod
    def _apply_limiter(audio: np.ndarray) -> np.ndarray:
        if audio.size == 0:
            return audio.astype(np.float32, copy=False)
        x = np.clip(audio.astype(np.float32, copy=False), -1.0, 1.0)
        threshold = 0.82
        abs_x = np.abs(x)
        if not np.any(abs_x > threshold):
            return x.astype(np.float32, copy=False)

        out = x.copy()
        over = abs_x > threshold
        norm = (abs_x[over] - threshold) / max(1.0 - threshold, 1e-6)
        compressed = threshold + (1.0 - threshold) * (np.tanh(2.2 * norm) / np.tanh(2.2))
        out[over] = np.sign(x[over]) * compressed
        return out.astype(np.float32, copy=False)

    @staticmethod
    def _downsample_for_ui(audio: np.ndarray, target_points: int = 320) -> np.ndarray:
        if audio.size <= target_points:
            return audio.astype(np.float32, copy=False)

        step = int(np.ceil(audio.size / target_points))
        sampled = audio[::step]
        if sampled.size > target_points:
            sampled = sampled[:target_points]
        return sampled.astype(np.float32, copy=False)

    @staticmethod
    def _rms(audio: np.ndarray) -> float:
        if audio.size == 0:
            return 0.0
        return float(np.sqrt(np.mean(np.square(audio), dtype=np.float64)))

    @staticmethod
    def _resample_audio(audio: np.ndarray, source_rate: int, target_rate: int) -> np.ndarray:
        if audio.size == 0 or source_rate == target_rate:
            return audio.astype(np.float32, copy=False)

        if source_rate == 48000 and target_rate == 16000:
            usable = audio.size - (audio.size % 3)
            if usable <= 0:
                return audio.astype(np.float32, copy=False)
            # Fast decimation-by-3 tuned for speech workloads on Zero 2W.
            grouped = audio[:usable].reshape(-1, 3)
            return grouped.mean(axis=1, dtype=np.float32).astype(np.float32, copy=False)

        gcd = math.gcd(source_rate, target_rate)
        up = target_rate // gcd
        down = source_rate // gcd
        resampled = signal.resample_poly(audio, up=up, down=down)
        return resampled.astype(np.float32, copy=False)

    def _resolve_input_device(self) -> tuple[int | None, str, int]:
        try:
            default_input = sd.default.device[0]
            if isinstance(default_input, int) and default_input >= 0:
                info = sd.query_devices(default_input, "input")
                max_inputs = int(info.get("max_input_channels", 0))
                if max_inputs > 0:
                    return int(default_input), str(info.get("name", f"device-{default_input}")), min(self.CHANNELS, max_inputs)
        except Exception:
            pass

        all_devices = sd.query_devices()
        preferred: tuple[int, dict] | None = None
        fallback: tuple[int, dict] | None = None

        for idx, raw_info in enumerate(all_devices):
            info = dict(raw_info)
            max_inputs = int(info.get("max_input_channels", 0))
            if max_inputs <= 0:
                continue

            device = (idx, info)
            name = str(info.get("name", "")).lower()
            if max_inputs >= self.CHANNELS and any(tag in name for tag in ("google", "voicehat", "i2s", "mic")):
                preferred = device
                break
            if max_inputs >= self.CHANNELS and fallback is None:
                fallback = device
            if fallback is None:
                fallback = device

        chosen = preferred or fallback
        if chosen is None:
            raise RuntimeError("No audio input device available")

        idx, info = chosen
        max_inputs = int(info.get("max_input_channels", 1))
        channels = min(self.CHANNELS, max_inputs)
        return int(idx), str(info.get("name", f"device-{idx}")), channels

    def _push_monitor_chunk_locked(self, chunk: np.ndarray, sample_rate: int) -> None:
        samples = chunk.astype(np.float32, copy=True)
        if samples.size == 0:
            return
        self._monitor_queue.append(samples)
        self._monitor_queue_samples += samples.size
        max_samples = max(sample_rate * 2, 2048)
        while self._monitor_queue_samples > max_samples and self._monitor_queue:
            dropped = self._monitor_queue.popleft()
            self._monitor_queue_samples -= dropped.size

    def _pop_monitor_chunk(self, max_samples: int) -> np.ndarray | None:
        if self._monitor_queue_samples <= 0 or not self._monitor_queue:
            return None

        take: list[np.ndarray] = []
        collected = 0
        while self._monitor_queue and collected < max_samples:
            chunk = self._monitor_queue[0]
            remaining = max_samples - collected
            if chunk.size <= remaining:
                take.append(chunk)
                collected += chunk.size
                self._monitor_queue.popleft()
            else:
                take.append(chunk[:remaining])
                self._monitor_queue[0] = chunk[remaining:]
                collected += remaining
                break

        self._monitor_queue_samples -= collected
        if not take:
            return None
        if len(take) == 1:
            return take[0]
        return np.concatenate(take).astype(np.float32, copy=False)

    @staticmethod
    def _encode_pcm16_base64(audio: np.ndarray) -> str:
        pcm16 = (np.clip(audio, -1.0, 1.0) * 32767.0).astype(np.int16)
        return base64.b64encode(pcm16.tobytes()).decode("ascii")

    def _clear_monitor_queue_locked(self) -> None:
        self._monitor_queue.clear()
        self._monitor_queue_samples = 0

    def _estimate_speech_presence_fast(self, mic1: np.ndarray, mic2: np.ndarray) -> bool:
        if mic1.size == 0 or mic2.size == 0:
            return False
        energy = 0.5 * (np.mean(mic1 * mic1) + np.mean(mic2 * mic2))
        energy = float(max(energy, 1e-10))

        if energy < self._vad_noise_floor:
            alpha = 0.05
        else:
            alpha = 0.004
        self._vad_noise_floor = (1.0 - alpha) * self._vad_noise_floor + alpha * energy
        threshold = max(2.2e-7, self._vad_noise_floor * 1.9)
        return bool(energy > threshold)

    @staticmethod
    def _gcc_phat(sig: np.ndarray, refsig: np.ndarray, sample_rate: int, max_tau: float) -> float:
        n = sig.size + refsig.size
        sig_fft = np.fft.rfft(sig, n=n)
        ref_fft = np.fft.rfft(refsig, n=n)
        cross = sig_fft * np.conj(ref_fft)
        denom = np.abs(cross)
        cross = cross / np.maximum(denom, 1e-10)
        cc = np.fft.irfft(cross, n=n)

        max_shift = int(min(n // 2, max_tau * sample_rate))
        if max_shift <= 0:
            return 0.0

        cc_window = np.concatenate((cc[-max_shift:], cc[: max_shift + 1]))
        shift = int(np.argmax(np.abs(cc_window)) - max_shift)
        return float(shift) / float(sample_rate)

    def _estimate_speech_angle(self, mic1: np.ndarray, mic2: np.ndarray, sample_rate: int) -> tuple[float, bool]:
        if mic1.size < 64 or mic2.size < 64:
            return self._auto_angle_deg, False

        high = min(3400.0, 0.45 * sample_rate)
        low = min(300.0, high * 0.5)
        if high <= low + 1.0:
            return self._auto_angle_deg, False

        sos = self._get_speech_sos(sample_rate, low, high)
        if sos is None:
            return self._auto_angle_deg, False
        speech1 = signal.sosfilt(sos, mic1).astype(np.float32, copy=False)
        speech2 = signal.sosfilt(sos, mic2).astype(np.float32, copy=False)

        speech_energy = 0.5 * (np.mean(speech1 * speech1) + np.mean(speech2 * speech2))
        full_energy = 0.5 * (np.mean(mic1 * mic1) + np.mean(mic2 * mic2))
        speech_ratio = float(speech_energy / max(full_energy, 1e-12))

        self._noise_floor = 0.995 * self._noise_floor + 0.005 * float(speech_energy)
        speech_threshold = max(2.5e-7, self._noise_floor * 2.0)
        speech_detected = bool(speech_energy > speech_threshold and speech_ratio > 0.08)
        if not speech_detected:
            return self._auto_angle_deg, False

        max_tau = self.MIC_SPACING / 343.0
        tau = self._gcc_phat(speech1, speech2, sample_rate, max_tau=max_tau)
        sin_theta = np.clip((tau * 343.0) / self.MIC_SPACING, -1.0, 1.0)
        raw_angle = float(np.rad2deg(np.arcsin(sin_theta)))
        raw_angle = float(np.clip(raw_angle, -90.0, 90.0))

        self._auto_angle_deg = 0.88 * self._auto_angle_deg + 0.12 * raw_angle
        return self._auto_angle_deg, True

    def _get_speech_sos(self, sample_rate: int, low: float, high: float) -> np.ndarray | None:
        cached = self._speech_sos_cache.get(sample_rate, "__missing__")
        if isinstance(cached, np.ndarray):
            return cached
        if cached is None:
            return None
        try:
            sos = signal.butter(4, [low, high], btype="bandpass", fs=sample_rate, output="sos")
        except ValueError:
            self._speech_sos_cache[sample_rate] = None
            return None
        self._speech_sos_cache[sample_rate] = sos
        return sos

    @staticmethod
    def _make_empty_frame() -> dict[str, object]:
        empty = np.empty(0, dtype=np.float32)
        return {
            "mic1": empty,
            "mic2": empty,
            "beam": empty,
            "mono_mix": empty,
            "show_mic2": True,
            "show_beam": False,
            "show_mono_mix": False,
            "beam_angle_deg": 0.0,
            "auto_beam": True,
            "speech_detected": False,
            "speech_gate_open": True,
            "hifi_mode": False,
            "monitor_on": False,
            "monitor_source": "beam",
            "recording": False,
            "rec_duration": 0.0,
        }