// ── STT Integration ── const sttState = { enabled: false, language: "pl", timestamps: true, diarize: true, itn: true, detect_emotion: false, server_vad: false, vad_threshold: 0.3, vad_pad_ms: 400, vad_min_ms: 100, connected: false, segments: [], // finalized segments partialText: "", // current partial speakerMap: {}, // SPEAKER_00 -> {name, score} }; const sttEls = { enabled: document.getElementById("sttEnabled"), status: document.getElementById("sttStatus"), controls: document.getElementById("sttControls"), language: document.getElementById("sttLanguage"), timestamps: document.getElementById("sttTimestamps"), diarize: document.getElementById("sttDiarize"), itn: document.getElementById("sttItn"), emotion: document.getElementById("sttEmotion"), vad: document.getElementById("sttVad"), vadControls: document.getElementById("sttVadControls"), vadThreshold: document.getElementById("sttVadThreshold"), vadThresholdValue: document.getElementById("sttVadThresholdValue"), vadPadMs: document.getElementById("sttVadPadMs"), vadPadMsValue: document.getElementById("sttVadPadMsValue"), output: document.getElementById("sttOutput"), }; function sttSendSettings() { socket.emit("client_message", { type: "stt_settings", stt_enabled: sttState.enabled, stt_language: sttState.language, stt_timestamps: sttState.timestamps, stt_diarize: sttState.diarize, stt_itn: sttState.itn, stt_detect_emotion: sttState.detect_emotion, stt_server_vad: sttState.server_vad, stt_vad_threshold: sttState.vad_threshold, stt_vad_pad_ms: sttState.vad_pad_ms, stt_vad_min_ms: sttState.vad_min_ms, }); } function sttFormatTime(seconds) { if (!seconds && seconds !== 0) return ""; var m = Math.floor(seconds / 60); var s = Math.floor(seconds % 60); return String(m).padStart(2, "0") + ":" + String(s).padStart(2, "0"); } function sttSpeakerColor(speakerId) { if (!speakerId) return ""; var match = speakerId.match(/(\d+)/); var idx = match ? parseInt(match[1], 10) % 8 : 0; return "stt-speaker-" + idx; } function sttSpeakerName(speakerId) { if (!speakerId) return ""; var mapped = sttState.speakerMap[speakerId]; if (mapped && mapped.name && mapped.name !== "Unknown") { return mapped.name; } return speakerId.replace("SPEAKER_", "Mowca "); } function sttConfidenceClass(conf) { if (conf >= 0.85) return "conf-high"; if (conf >= 0.6) return "conf-mid"; return "conf-low"; } function sttRenderSegment(seg, isPartial) { var div = document.createElement("div"); div.className = "stt-segment" + (isPartial ? " partial" : ""); var html = ""; // Timestamp if (seg.duration && sttState.timestamps) { html += '[' + sttFormatTime(seg.duration) + ']'; } // Speaker segments with diarization if (seg.speakers && seg.speakers.length > 0 && sttState.diarize) { seg.speakers.forEach(function(sp) { var spName = sttSpeakerName(sp.speaker); var colorClass = sttSpeakerColor(sp.speaker); html += '
'; html += '' + spName + ''; if (sp.start !== undefined && sttState.timestamps) { html += '' + sttFormatTime(sp.start) + '-' + sttFormatTime(sp.end) + ''; } html += '' + (sp.text || "") + ''; html += '
'; }); } else { // Words with confidence if (seg.words && seg.words.length > 0) { seg.words.forEach(function(w) { var cls = sttConfidenceClass(w.confidence || 1.0); html += '' + w.word + ' '; }); } else { html += '' + (seg.text || "") + ''; } } // Confidence badge if (seg.confidence && !isPartial) { html += '' + Math.round(seg.confidence * 100) + '%'; } // Emotion if (seg.emotion && sttState.detect_emotion) { html += '' + seg.emotion + ''; } div.innerHTML = html; return div; } function sttRenderAll() { var output = sttEls.output; output.innerHTML = ""; sttState.segments.forEach(function(seg) { output.appendChild(sttRenderSegment(seg, false)); }); if (sttState.partialText) { output.appendChild(sttRenderSegment({ text: sttState.partialText }, true)); } // Auto-scroll to bottom output.scrollTop = output.scrollHeight; } function sttHandleMessage(msg) { if (!msg || !msg.type) return; if (msg.type === "stt_status") { sttState.connected = Boolean(msg.connected); sttUpdateStatus(); return; } if (msg.type === "partial") { sttState.partialText = msg.text || ""; sttRenderAll(); return; } if (msg.type === "final") { sttState.partialText = ""; if (msg.text) { sttState.segments.push(msg); } if (msg.speaker_map) { Object.assign(sttState.speakerMap, msg.speaker_map); } sttRenderAll(); return; } if (msg.type === "speaker_update" && msg.speaker_map) { Object.assign(sttState.speakerMap, msg.speaker_map); sttRenderAll(); return; } } function sttUpdateStatus() { if (!sttState.enabled) { sttEls.status.textContent = "STT: wylaczone"; sttEls.status.style.background = ""; } else if (sttState.connected) { sttEls.status.textContent = "STT: polaczone"; sttEls.status.style.background = "rgba(81, 207, 102, 0.25)"; } else { sttEls.status.textContent = "STT: laczenie..."; sttEls.status.style.background = "rgba(255, 210, 0, 0.2)"; } } function sttRefreshControls() { sttEls.controls.classList.toggle("hidden", !sttState.enabled); sttEls.vadControls.classList.toggle("hidden", !sttState.server_vad); } function sttBindControls() { sttEls.enabled.addEventListener("change", function() { sttState.enabled = sttEls.enabled.checked; if (!sttState.enabled) { sttState.connected = false; sttState.segments = []; sttState.partialText = ""; sttState.speakerMap = {}; sttRenderAll(); } sttRefreshControls(); sttUpdateStatus(); sttSendSettings(); }); sttEls.language.addEventListener("change", function() { sttState.language = sttEls.language.value; // Clear transcript on language change sttState.segments = []; sttState.partialText = ""; sttRenderAll(); sttSendSettings(); }); sttEls.timestamps.addEventListener("change", function() { sttState.timestamps = sttEls.timestamps.checked; sttSendSettings(); }); sttEls.diarize.addEventListener("change", function() { sttState.diarize = sttEls.diarize.checked; sttSendSettings(); }); sttEls.itn.addEventListener("change", function() { sttState.itn = sttEls.itn.checked; sttSendSettings(); }); sttEls.emotion.addEventListener("change", function() { sttState.detect_emotion = sttEls.emotion.checked; sttSendSettings(); }); sttEls.vad.addEventListener("change", function() { sttState.server_vad = sttEls.vad.checked; sttRefreshControls(); sttSendSettings(); }); sttEls.vadThreshold.addEventListener("input", function() { sttState.vad_threshold = parseFloat(sttEls.vadThreshold.value); sttEls.vadThresholdValue.textContent = sttState.vad_threshold.toFixed(2); }); sttEls.vadThreshold.addEventListener("change", sttSendSettings); sttEls.vadPadMs.addEventListener("input", function() { sttState.vad_pad_ms = parseInt(sttEls.vadPadMs.value, 10); sttEls.vadPadMsValue.textContent = String(sttState.vad_pad_ms); }); sttEls.vadPadMs.addEventListener("change", sttSendSettings); } function sttLoadFromStatus(data) { if (data.stt_enabled !== undefined) sttState.enabled = Boolean(data.stt_enabled); if (data.stt_language !== undefined) sttState.language = data.stt_language; if (data.stt_timestamps !== undefined) sttState.timestamps = Boolean(data.stt_timestamps); if (data.stt_diarize !== undefined) sttState.diarize = Boolean(data.stt_diarize); if (data.stt_itn !== undefined) sttState.itn = Boolean(data.stt_itn); if (data.stt_detect_emotion !== undefined) sttState.detect_emotion = Boolean(data.stt_detect_emotion); if (data.stt_server_vad !== undefined) sttState.server_vad = Boolean(data.stt_server_vad); if (data.stt_vad_threshold !== undefined) sttState.vad_threshold = Number(data.stt_vad_threshold); if (data.stt_vad_pad_ms !== undefined) sttState.vad_pad_ms = Number(data.stt_vad_pad_ms); if (data.stt_vad_min_ms !== undefined) sttState.vad_min_ms = Number(data.stt_vad_min_ms); if (data.stt_connected !== undefined) sttState.connected = Boolean(data.stt_connected); sttEls.enabled.checked = sttState.enabled; sttEls.language.value = sttState.language; sttEls.timestamps.checked = sttState.timestamps; sttEls.diarize.checked = sttState.diarize; sttEls.itn.checked = sttState.itn; sttEls.emotion.checked = sttState.detect_emotion; sttEls.vad.checked = sttState.server_vad; sttEls.vadThreshold.value = String(sttState.vad_threshold); sttEls.vadThresholdValue.textContent = sttState.vad_threshold.toFixed(2); sttEls.vadPadMs.value = String(sttState.vad_pad_ms); sttEls.vadPadMsValue.textContent = String(sttState.vad_pad_ms); sttRefreshControls(); sttUpdateStatus(); } // Hook into existing socket events socket.on("stt_message", sttHandleMessage); // Patch the existing status handler to also load STT state var _origLoadStatus = loadStatus; loadStatus = async function() { var response = await fetch("/api/status"); var data = await response.json(); if (data.settings) { state.mode = data.settings.mode; state.gain_db = data.settings.gain_db; state.agc = data.settings.agc; state.attack_ms = data.settings.attack_ms; state.release_ms = data.settings.release_ms; state.noise_suppression = Boolean(data.settings.noise_suppression); state.speech_gate = Boolean(data.settings.speech_gate); state.hum_filter = Boolean(data.settings.hum_filter); state.limiter = Boolean(data.settings.limiter); state.beam_clarity = Boolean(data.settings.beam_clarity); state.hifi_mode = Boolean(data.settings.hifi_mode); state.hifi_mic = data.settings.hifi_mic || "mic1"; state.angle = data.settings.angle; state.auto_beam = Boolean(data.settings.auto_beam); state.monitor_on = Boolean(data.settings.monitor_on); state.monitor_source = data.settings.monitor_source || "beam"; state.sample_rate = data.settings.sample_rate; } state.auto_angle = Number(data.auto_beam_angle_deg ?? state.angle ?? 0); state.speech_detected = false; state.recording = Boolean(data.recording); els.audioStatus.textContent = data.audio_error ? "Audio: blad (" + data.audio_error + ")" : data.audio_running ? "Audio: aktywne" : "Audio: zatrzymane"; syncUiFromState(); sttLoadFromStatus(data); }; // Also hook into status WS event var _origStatusHandler = null; socket.off("status"); socket.on("status", function(payload) { if (payload && payload.settings) { state.mode = payload.settings.mode; state.gain_db = payload.settings.gain_db; state.agc = payload.settings.agc; state.attack_ms = payload.settings.attack_ms; state.release_ms = payload.settings.release_ms; state.noise_suppression = Boolean(payload.settings.noise_suppression); state.speech_gate = Boolean(payload.settings.speech_gate); state.hum_filter = Boolean(payload.settings.hum_filter); state.limiter = Boolean(payload.settings.limiter); state.beam_clarity = Boolean(payload.settings.beam_clarity); state.hifi_mode = Boolean(payload.settings.hifi_mode); state.hifi_mic = payload.settings.hifi_mic || "mic1"; state.angle = payload.settings.angle; state.auto_beam = Boolean(payload.settings.auto_beam); state.monitor_on = Boolean(payload.settings.monitor_on); state.monitor_source = payload.settings.monitor_source || "beam"; state.sample_rate = payload.settings.sample_rate; syncUiFromState(); } sttLoadFromStatus(payload || {}); }); // Hook stt_settings_applied socket.on("server_ack", function(payload) { if (payload && payload.type === "stt_settings_applied" && payload.settings) { sttLoadFromStatus(payload.settings); } }); // Initialize STT controls sttBindControls();