| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371 |
- // ── STT Integration ──
- const sttState = {
- enabled: false,
- language: "pl",
- timestamps: true,
- diarize: true,
- itn: true,
- detect_emotion: false,
- server_vad: false,
- vad_threshold: 0.3,
- vad_pad_ms: 400,
- vad_min_ms: 100,
- connected: false,
- segments: [], // finalized segments
- partialText: "", // current partial
- speakerMap: {}, // SPEAKER_00 -> {name, score}
- };
- const sttEls = {
- enabled: document.getElementById("sttEnabled"),
- status: document.getElementById("sttStatus"),
- controls: document.getElementById("sttControls"),
- language: document.getElementById("sttLanguage"),
- timestamps: document.getElementById("sttTimestamps"),
- diarize: document.getElementById("sttDiarize"),
- itn: document.getElementById("sttItn"),
- emotion: document.getElementById("sttEmotion"),
- vad: document.getElementById("sttVad"),
- vadControls: document.getElementById("sttVadControls"),
- vadThreshold: document.getElementById("sttVadThreshold"),
- vadThresholdValue: document.getElementById("sttVadThresholdValue"),
- vadPadMs: document.getElementById("sttVadPadMs"),
- vadPadMsValue: document.getElementById("sttVadPadMsValue"),
- output: document.getElementById("sttOutput"),
- };
- function sttSendSettings() {
- socket.emit("client_message", {
- type: "stt_settings",
- stt_enabled: sttState.enabled,
- stt_language: sttState.language,
- stt_timestamps: sttState.timestamps,
- stt_diarize: sttState.diarize,
- stt_itn: sttState.itn,
- stt_detect_emotion: sttState.detect_emotion,
- stt_server_vad: sttState.server_vad,
- stt_vad_threshold: sttState.vad_threshold,
- stt_vad_pad_ms: sttState.vad_pad_ms,
- stt_vad_min_ms: sttState.vad_min_ms,
- });
- }
- function sttFormatTime(seconds) {
- if (!seconds && seconds !== 0) return "";
- var m = Math.floor(seconds / 60);
- var s = Math.floor(seconds % 60);
- return String(m).padStart(2, "0") + ":" + String(s).padStart(2, "0");
- }
- function sttSpeakerColor(speakerId) {
- if (!speakerId) return "";
- var match = speakerId.match(/(\d+)/);
- var idx = match ? parseInt(match[1], 10) % 8 : 0;
- return "stt-speaker-" + idx;
- }
- function sttSpeakerName(speakerId) {
- if (!speakerId) return "";
- var mapped = sttState.speakerMap[speakerId];
- if (mapped && mapped.name && mapped.name !== "Unknown") {
- return mapped.name;
- }
- return speakerId.replace("SPEAKER_", "Mowca ");
- }
- function sttConfidenceClass(conf) {
- if (conf >= 0.85) return "conf-high";
- if (conf >= 0.6) return "conf-mid";
- return "conf-low";
- }
- function sttRenderSegment(seg, isPartial) {
- var div = document.createElement("div");
- div.className = "stt-segment" + (isPartial ? " partial" : "");
- var html = "";
- // Timestamp
- if (seg.duration && sttState.timestamps) {
- html += '<span class="stt-timestamp">[' + sttFormatTime(seg.duration) + ']</span>';
- }
- // Speaker segments with diarization
- if (seg.speakers && seg.speakers.length > 0 && sttState.diarize) {
- seg.speakers.forEach(function(sp) {
- var spName = sttSpeakerName(sp.speaker);
- var colorClass = sttSpeakerColor(sp.speaker);
- html += '<div style="margin: 0.2rem 0;">';
- html += '<span class="stt-speaker ' + colorClass + '">' + spName + '</span>';
- if (sp.start !== undefined && sttState.timestamps) {
- html += '<span class="stt-timestamp">' + sttFormatTime(sp.start) + '-' + sttFormatTime(sp.end) + '</span>';
- }
- html += '<span>' + (sp.text || "") + '</span>';
- html += '</div>';
- });
- } else {
- // Words with confidence
- if (seg.words && seg.words.length > 0) {
- seg.words.forEach(function(w) {
- var cls = sttConfidenceClass(w.confidence || 1.0);
- html += '<span class="stt-word ' + cls + '">' + w.word + '</span> ';
- });
- } else {
- html += '<span>' + (seg.text || "") + '</span>';
- }
- }
- // Confidence badge
- if (seg.confidence && !isPartial) {
- html += '<span class="stt-confidence">' + Math.round(seg.confidence * 100) + '%</span>';
- }
- // Emotion
- if (seg.emotion && sttState.detect_emotion) {
- html += '<span class="stt-emotion">' + seg.emotion + '</span>';
- }
- div.innerHTML = html;
- return div;
- }
- function sttRenderAll() {
- var output = sttEls.output;
- output.innerHTML = "";
- sttState.segments.forEach(function(seg) {
- output.appendChild(sttRenderSegment(seg, false));
- });
- if (sttState.partialText) {
- output.appendChild(sttRenderSegment({ text: sttState.partialText }, true));
- }
- // Auto-scroll to bottom
- output.scrollTop = output.scrollHeight;
- }
- function sttHandleMessage(msg) {
- if (!msg || !msg.type) return;
- if (msg.type === "stt_status") {
- sttState.connected = Boolean(msg.connected);
- sttUpdateStatus();
- return;
- }
- if (msg.type === "partial") {
- sttState.partialText = msg.text || "";
- sttRenderAll();
- return;
- }
- if (msg.type === "final") {
- sttState.partialText = "";
- if (msg.text) {
- sttState.segments.push(msg);
- }
- if (msg.speaker_map) {
- Object.assign(sttState.speakerMap, msg.speaker_map);
- }
- sttRenderAll();
- return;
- }
- if (msg.type === "speaker_update" && msg.speaker_map) {
- Object.assign(sttState.speakerMap, msg.speaker_map);
- sttRenderAll();
- return;
- }
- }
- function sttUpdateStatus() {
- if (!sttState.enabled) {
- sttEls.status.textContent = "STT: wylaczone";
- sttEls.status.style.background = "";
- } else if (sttState.connected) {
- sttEls.status.textContent = "STT: polaczone";
- sttEls.status.style.background = "rgba(81, 207, 102, 0.25)";
- } else {
- sttEls.status.textContent = "STT: laczenie...";
- sttEls.status.style.background = "rgba(255, 210, 0, 0.2)";
- }
- }
- function sttRefreshControls() {
- sttEls.controls.classList.toggle("hidden", !sttState.enabled);
- sttEls.vadControls.classList.toggle("hidden", !sttState.server_vad);
- }
- function sttBindControls() {
- sttEls.enabled.addEventListener("change", function() {
- sttState.enabled = sttEls.enabled.checked;
- if (!sttState.enabled) {
- sttState.connected = false;
- sttState.segments = [];
- sttState.partialText = "";
- sttState.speakerMap = {};
- sttRenderAll();
- }
- sttRefreshControls();
- sttUpdateStatus();
- sttSendSettings();
- });
- sttEls.language.addEventListener("change", function() {
- sttState.language = sttEls.language.value;
- // Clear transcript on language change
- sttState.segments = [];
- sttState.partialText = "";
- sttRenderAll();
- sttSendSettings();
- });
- sttEls.timestamps.addEventListener("change", function() {
- sttState.timestamps = sttEls.timestamps.checked;
- sttSendSettings();
- });
- sttEls.diarize.addEventListener("change", function() {
- sttState.diarize = sttEls.diarize.checked;
- sttSendSettings();
- });
- sttEls.itn.addEventListener("change", function() {
- sttState.itn = sttEls.itn.checked;
- sttSendSettings();
- });
- sttEls.emotion.addEventListener("change", function() {
- sttState.detect_emotion = sttEls.emotion.checked;
- sttSendSettings();
- });
- sttEls.vad.addEventListener("change", function() {
- sttState.server_vad = sttEls.vad.checked;
- sttRefreshControls();
- sttSendSettings();
- });
- sttEls.vadThreshold.addEventListener("input", function() {
- sttState.vad_threshold = parseFloat(sttEls.vadThreshold.value);
- sttEls.vadThresholdValue.textContent = sttState.vad_threshold.toFixed(2);
- });
- sttEls.vadThreshold.addEventListener("change", sttSendSettings);
- sttEls.vadPadMs.addEventListener("input", function() {
- sttState.vad_pad_ms = parseInt(sttEls.vadPadMs.value, 10);
- sttEls.vadPadMsValue.textContent = String(sttState.vad_pad_ms);
- });
- sttEls.vadPadMs.addEventListener("change", sttSendSettings);
- }
- function sttLoadFromStatus(data) {
- if (data.stt_enabled !== undefined) sttState.enabled = Boolean(data.stt_enabled);
- if (data.stt_language !== undefined) sttState.language = data.stt_language;
- if (data.stt_timestamps !== undefined) sttState.timestamps = Boolean(data.stt_timestamps);
- if (data.stt_diarize !== undefined) sttState.diarize = Boolean(data.stt_diarize);
- if (data.stt_itn !== undefined) sttState.itn = Boolean(data.stt_itn);
- if (data.stt_detect_emotion !== undefined) sttState.detect_emotion = Boolean(data.stt_detect_emotion);
- if (data.stt_server_vad !== undefined) sttState.server_vad = Boolean(data.stt_server_vad);
- if (data.stt_vad_threshold !== undefined) sttState.vad_threshold = Number(data.stt_vad_threshold);
- if (data.stt_vad_pad_ms !== undefined) sttState.vad_pad_ms = Number(data.stt_vad_pad_ms);
- if (data.stt_vad_min_ms !== undefined) sttState.vad_min_ms = Number(data.stt_vad_min_ms);
- if (data.stt_connected !== undefined) sttState.connected = Boolean(data.stt_connected);
- sttEls.enabled.checked = sttState.enabled;
- sttEls.language.value = sttState.language;
- sttEls.timestamps.checked = sttState.timestamps;
- sttEls.diarize.checked = sttState.diarize;
- sttEls.itn.checked = sttState.itn;
- sttEls.emotion.checked = sttState.detect_emotion;
- sttEls.vad.checked = sttState.server_vad;
- sttEls.vadThreshold.value = String(sttState.vad_threshold);
- sttEls.vadThresholdValue.textContent = sttState.vad_threshold.toFixed(2);
- sttEls.vadPadMs.value = String(sttState.vad_pad_ms);
- sttEls.vadPadMsValue.textContent = String(sttState.vad_pad_ms);
- sttRefreshControls();
- sttUpdateStatus();
- }
- // Hook into existing socket events
- socket.on("stt_message", sttHandleMessage);
- // Patch the existing status handler to also load STT state
- var _origLoadStatus = loadStatus;
- loadStatus = async function() {
- var response = await fetch("/api/status");
- var data = await response.json();
- if (data.settings) {
- state.mode = data.settings.mode;
- state.gain_db = data.settings.gain_db;
- state.agc = data.settings.agc;
- state.attack_ms = data.settings.attack_ms;
- state.release_ms = data.settings.release_ms;
- state.noise_suppression = Boolean(data.settings.noise_suppression);
- state.speech_gate = Boolean(data.settings.speech_gate);
- state.hum_filter = Boolean(data.settings.hum_filter);
- state.limiter = Boolean(data.settings.limiter);
- state.beam_clarity = Boolean(data.settings.beam_clarity);
- state.hifi_mode = Boolean(data.settings.hifi_mode);
- state.hifi_mic = data.settings.hifi_mic || "mic1";
- state.angle = data.settings.angle;
- state.auto_beam = Boolean(data.settings.auto_beam);
- state.monitor_on = Boolean(data.settings.monitor_on);
- state.monitor_source = data.settings.monitor_source || "beam";
- state.sample_rate = data.settings.sample_rate;
- }
- state.auto_angle = Number(data.auto_beam_angle_deg ?? state.angle ?? 0);
- state.speech_detected = false;
- state.recording = Boolean(data.recording);
- els.audioStatus.textContent = data.audio_error
- ? "Audio: blad (" + data.audio_error + ")"
- : data.audio_running
- ? "Audio: aktywne"
- : "Audio: zatrzymane";
- syncUiFromState();
- sttLoadFromStatus(data);
- };
- // Also hook into status WS event
- var _origStatusHandler = null;
- socket.off("status");
- socket.on("status", function(payload) {
- if (payload && payload.settings) {
- state.mode = payload.settings.mode;
- state.gain_db = payload.settings.gain_db;
- state.agc = payload.settings.agc;
- state.attack_ms = payload.settings.attack_ms;
- state.release_ms = payload.settings.release_ms;
- state.noise_suppression = Boolean(payload.settings.noise_suppression);
- state.speech_gate = Boolean(payload.settings.speech_gate);
- state.hum_filter = Boolean(payload.settings.hum_filter);
- state.limiter = Boolean(payload.settings.limiter);
- state.beam_clarity = Boolean(payload.settings.beam_clarity);
- state.hifi_mode = Boolean(payload.settings.hifi_mode);
- state.hifi_mic = payload.settings.hifi_mic || "mic1";
- state.angle = payload.settings.angle;
- state.auto_beam = Boolean(payload.settings.auto_beam);
- state.monitor_on = Boolean(payload.settings.monitor_on);
- state.monitor_source = payload.settings.monitor_source || "beam";
- state.sample_rate = payload.settings.sample_rate;
- syncUiFromState();
- }
- sttLoadFromStatus(payload || {});
- });
- // Hook stt_settings_applied
- socket.on("server_ack", function(payload) {
- if (payload && payload.type === "stt_settings_applied" && payload.settings) {
- sttLoadFromStatus(payload.settings);
- }
- });
- // Initialize STT controls
- sttBindControls();
|