// ── STT Integration ──
const sttState = {
enabled: false,
language: "pl",
timestamps: true,
diarize: true,
itn: true,
detect_emotion: false,
server_vad: false,
vad_threshold: 0.3,
vad_pad_ms: 400,
vad_min_ms: 100,
connected: false,
segments: [], // finalized segments
partialText: "", // current partial
speakerMap: {}, // SPEAKER_00 -> {name, score}
};
const sttEls = {
enabled: document.getElementById("sttEnabled"),
status: document.getElementById("sttStatus"),
controls: document.getElementById("sttControls"),
language: document.getElementById("sttLanguage"),
timestamps: document.getElementById("sttTimestamps"),
diarize: document.getElementById("sttDiarize"),
itn: document.getElementById("sttItn"),
emotion: document.getElementById("sttEmotion"),
vad: document.getElementById("sttVad"),
vadControls: document.getElementById("sttVadControls"),
vadThreshold: document.getElementById("sttVadThreshold"),
vadThresholdValue: document.getElementById("sttVadThresholdValue"),
vadPadMs: document.getElementById("sttVadPadMs"),
vadPadMsValue: document.getElementById("sttVadPadMsValue"),
output: document.getElementById("sttOutput"),
};
function sttSendSettings() {
socket.emit("client_message", {
type: "stt_settings",
stt_enabled: sttState.enabled,
stt_language: sttState.language,
stt_timestamps: sttState.timestamps,
stt_diarize: sttState.diarize,
stt_itn: sttState.itn,
stt_detect_emotion: sttState.detect_emotion,
stt_server_vad: sttState.server_vad,
stt_vad_threshold: sttState.vad_threshold,
stt_vad_pad_ms: sttState.vad_pad_ms,
stt_vad_min_ms: sttState.vad_min_ms,
});
}
function sttFormatTime(seconds) {
if (!seconds && seconds !== 0) return "";
var m = Math.floor(seconds / 60);
var s = Math.floor(seconds % 60);
return String(m).padStart(2, "0") + ":" + String(s).padStart(2, "0");
}
function sttSpeakerColor(speakerId) {
if (!speakerId) return "";
var match = speakerId.match(/(\d+)/);
var idx = match ? parseInt(match[1], 10) % 8 : 0;
return "stt-speaker-" + idx;
}
function sttSpeakerName(speakerId) {
if (!speakerId) return "";
var mapped = sttState.speakerMap[speakerId];
if (mapped && mapped.name && mapped.name !== "Unknown") {
return mapped.name;
}
return speakerId.replace("SPEAKER_", "Mowca ");
}
function sttConfidenceClass(conf) {
if (conf >= 0.85) return "conf-high";
if (conf >= 0.6) return "conf-mid";
return "conf-low";
}
function sttRenderSegment(seg, isPartial) {
var div = document.createElement("div");
div.className = "stt-segment" + (isPartial ? " partial" : "");
var html = "";
// Timestamp
if (seg.duration && sttState.timestamps) {
html += '[' + sttFormatTime(seg.duration) + ']';
}
// Speaker segments with diarization
if (seg.speakers && seg.speakers.length > 0 && sttState.diarize) {
seg.speakers.forEach(function(sp) {
var spName = sttSpeakerName(sp.speaker);
var colorClass = sttSpeakerColor(sp.speaker);
html += '
';
html += '' + spName + '';
if (sp.start !== undefined && sttState.timestamps) {
html += '' + sttFormatTime(sp.start) + '-' + sttFormatTime(sp.end) + '';
}
html += '' + (sp.text || "") + '';
html += '
';
});
} else {
// Words with confidence
if (seg.words && seg.words.length > 0) {
seg.words.forEach(function(w) {
var cls = sttConfidenceClass(w.confidence || 1.0);
html += '' + w.word + ' ';
});
} else {
html += '' + (seg.text || "") + '';
}
}
// Confidence badge
if (seg.confidence && !isPartial) {
html += '' + Math.round(seg.confidence * 100) + '%';
}
// Emotion
if (seg.emotion && sttState.detect_emotion) {
html += '' + seg.emotion + '';
}
div.innerHTML = html;
return div;
}
function sttRenderAll() {
var output = sttEls.output;
output.innerHTML = "";
sttState.segments.forEach(function(seg) {
output.appendChild(sttRenderSegment(seg, false));
});
if (sttState.partialText) {
output.appendChild(sttRenderSegment({ text: sttState.partialText }, true));
}
// Auto-scroll to bottom
output.scrollTop = output.scrollHeight;
}
function sttHandleMessage(msg) {
if (!msg || !msg.type) return;
if (msg.type === "stt_status") {
sttState.connected = Boolean(msg.connected);
sttUpdateStatus();
return;
}
if (msg.type === "partial") {
sttState.partialText = msg.text || "";
sttRenderAll();
return;
}
if (msg.type === "final") {
sttState.partialText = "";
if (msg.text) {
sttState.segments.push(msg);
}
if (msg.speaker_map) {
Object.assign(sttState.speakerMap, msg.speaker_map);
}
sttRenderAll();
return;
}
if (msg.type === "speaker_update" && msg.speaker_map) {
Object.assign(sttState.speakerMap, msg.speaker_map);
sttRenderAll();
return;
}
}
function sttUpdateStatus() {
if (!sttState.enabled) {
sttEls.status.textContent = "STT: wylaczone";
sttEls.status.style.background = "";
} else if (sttState.connected) {
sttEls.status.textContent = "STT: polaczone";
sttEls.status.style.background = "rgba(81, 207, 102, 0.25)";
} else {
sttEls.status.textContent = "STT: laczenie...";
sttEls.status.style.background = "rgba(255, 210, 0, 0.2)";
}
}
function sttRefreshControls() {
sttEls.controls.classList.toggle("hidden", !sttState.enabled);
sttEls.vadControls.classList.toggle("hidden", !sttState.server_vad);
}
function sttBindControls() {
sttEls.enabled.addEventListener("change", function() {
sttState.enabled = sttEls.enabled.checked;
if (!sttState.enabled) {
sttState.connected = false;
sttState.segments = [];
sttState.partialText = "";
sttState.speakerMap = {};
sttRenderAll();
}
sttRefreshControls();
sttUpdateStatus();
sttSendSettings();
});
sttEls.language.addEventListener("change", function() {
sttState.language = sttEls.language.value;
// Clear transcript on language change
sttState.segments = [];
sttState.partialText = "";
sttRenderAll();
sttSendSettings();
});
sttEls.timestamps.addEventListener("change", function() {
sttState.timestamps = sttEls.timestamps.checked;
sttSendSettings();
});
sttEls.diarize.addEventListener("change", function() {
sttState.diarize = sttEls.diarize.checked;
sttSendSettings();
});
sttEls.itn.addEventListener("change", function() {
sttState.itn = sttEls.itn.checked;
sttSendSettings();
});
sttEls.emotion.addEventListener("change", function() {
sttState.detect_emotion = sttEls.emotion.checked;
sttSendSettings();
});
sttEls.vad.addEventListener("change", function() {
sttState.server_vad = sttEls.vad.checked;
sttRefreshControls();
sttSendSettings();
});
sttEls.vadThreshold.addEventListener("input", function() {
sttState.vad_threshold = parseFloat(sttEls.vadThreshold.value);
sttEls.vadThresholdValue.textContent = sttState.vad_threshold.toFixed(2);
});
sttEls.vadThreshold.addEventListener("change", sttSendSettings);
sttEls.vadPadMs.addEventListener("input", function() {
sttState.vad_pad_ms = parseInt(sttEls.vadPadMs.value, 10);
sttEls.vadPadMsValue.textContent = String(sttState.vad_pad_ms);
});
sttEls.vadPadMs.addEventListener("change", sttSendSettings);
}
function sttLoadFromStatus(data) {
if (data.stt_enabled !== undefined) sttState.enabled = Boolean(data.stt_enabled);
if (data.stt_language !== undefined) sttState.language = data.stt_language;
if (data.stt_timestamps !== undefined) sttState.timestamps = Boolean(data.stt_timestamps);
if (data.stt_diarize !== undefined) sttState.diarize = Boolean(data.stt_diarize);
if (data.stt_itn !== undefined) sttState.itn = Boolean(data.stt_itn);
if (data.stt_detect_emotion !== undefined) sttState.detect_emotion = Boolean(data.stt_detect_emotion);
if (data.stt_server_vad !== undefined) sttState.server_vad = Boolean(data.stt_server_vad);
if (data.stt_vad_threshold !== undefined) sttState.vad_threshold = Number(data.stt_vad_threshold);
if (data.stt_vad_pad_ms !== undefined) sttState.vad_pad_ms = Number(data.stt_vad_pad_ms);
if (data.stt_vad_min_ms !== undefined) sttState.vad_min_ms = Number(data.stt_vad_min_ms);
if (data.stt_connected !== undefined) sttState.connected = Boolean(data.stt_connected);
sttEls.enabled.checked = sttState.enabled;
sttEls.language.value = sttState.language;
sttEls.timestamps.checked = sttState.timestamps;
sttEls.diarize.checked = sttState.diarize;
sttEls.itn.checked = sttState.itn;
sttEls.emotion.checked = sttState.detect_emotion;
sttEls.vad.checked = sttState.server_vad;
sttEls.vadThreshold.value = String(sttState.vad_threshold);
sttEls.vadThresholdValue.textContent = sttState.vad_threshold.toFixed(2);
sttEls.vadPadMs.value = String(sttState.vad_pad_ms);
sttEls.vadPadMsValue.textContent = String(sttState.vad_pad_ms);
sttRefreshControls();
sttUpdateStatus();
}
// Hook into existing socket events
socket.on("stt_message", sttHandleMessage);
// Patch the existing status handler to also load STT state
var _origLoadStatus = loadStatus;
loadStatus = async function() {
var response = await fetch("/api/status");
var data = await response.json();
if (data.settings) {
state.mode = data.settings.mode;
state.gain_db = data.settings.gain_db;
state.agc = data.settings.agc;
state.attack_ms = data.settings.attack_ms;
state.release_ms = data.settings.release_ms;
state.noise_suppression = Boolean(data.settings.noise_suppression);
state.speech_gate = Boolean(data.settings.speech_gate);
state.hum_filter = Boolean(data.settings.hum_filter);
state.limiter = Boolean(data.settings.limiter);
state.beam_clarity = Boolean(data.settings.beam_clarity);
state.hifi_mode = Boolean(data.settings.hifi_mode);
state.hifi_mic = data.settings.hifi_mic || "mic1";
state.angle = data.settings.angle;
state.auto_beam = Boolean(data.settings.auto_beam);
state.monitor_on = Boolean(data.settings.monitor_on);
state.monitor_source = data.settings.monitor_source || "beam";
state.sample_rate = data.settings.sample_rate;
}
state.auto_angle = Number(data.auto_beam_angle_deg ?? state.angle ?? 0);
state.speech_detected = false;
state.recording = Boolean(data.recording);
els.audioStatus.textContent = data.audio_error
? "Audio: blad (" + data.audio_error + ")"
: data.audio_running
? "Audio: aktywne"
: "Audio: zatrzymane";
syncUiFromState();
sttLoadFromStatus(data);
};
// Also hook into status WS event
var _origStatusHandler = null;
socket.off("status");
socket.on("status", function(payload) {
if (payload && payload.settings) {
state.mode = payload.settings.mode;
state.gain_db = payload.settings.gain_db;
state.agc = payload.settings.agc;
state.attack_ms = payload.settings.attack_ms;
state.release_ms = payload.settings.release_ms;
state.noise_suppression = Boolean(payload.settings.noise_suppression);
state.speech_gate = Boolean(payload.settings.speech_gate);
state.hum_filter = Boolean(payload.settings.hum_filter);
state.limiter = Boolean(payload.settings.limiter);
state.beam_clarity = Boolean(payload.settings.beam_clarity);
state.hifi_mode = Boolean(payload.settings.hifi_mode);
state.hifi_mic = payload.settings.hifi_mic || "mic1";
state.angle = payload.settings.angle;
state.auto_beam = Boolean(payload.settings.auto_beam);
state.monitor_on = Boolean(payload.settings.monitor_on);
state.monitor_source = payload.settings.monitor_source || "beam";
state.sample_rate = payload.settings.sample_rate;
syncUiFromState();
}
sttLoadFromStatus(payload || {});
});
// Hook stt_settings_applied
socket.on("server_ack", function(payload) {
if (payload && payload.type === "stt_settings_applied" && payload.settings) {
sttLoadFromStatus(payload.settings);
}
});
// Initialize STT controls
sttBindControls();