v0.2.0: System tray, IPC status, VAD, hotkey grab, and polish

- Add system tray icon with Exit menu (tray-icon/muda)
- Add IPC daemon status via named pipe (Windows) / Unix socket (Linux)
- Add `mouth status` command to query running daemon
- Add daemon lock to prevent multiple instances
- Hide Windows console window when running as daemon
- Wire up Silero VAD model download and speech filtering
- Switch hotkey listener from rdev::listen to rdev::grab to consume hotkeys
- Add hotkey capture mode in interactive config (press keys instead of typing)
- Add all missing key names (brackets, punctuation, numpad, etc.)
- Fix ONNX tensor type mismatches (encoder wants i64, decoder wants i32)
- Add 300ms lead-in silence to compensate for mic startup latency
- Add 300ms trailing recording after stop for speech not to be clipped
- Add 50ms silence before audio feedback blips for device warmup
- Reduce overlay size (150x18, was 200x36)
- Add PolyForm Noncommercial 1.0.0 license
- Flesh out user-focused README
- Update release script with Gitea/GitHub forge support

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-10 22:04:39 +01:00
parent f9d65ff850
commit 0cea6a4b28
19 changed files with 1948 additions and 490 deletions
+5 -1
View File
@@ -84,7 +84,11 @@ pub fn play_blip_down() {
}
fn play_blip_internal(freq_start: f32, freq_end: f32, duration_ms: u64) -> Result<()> {
let samples = generate_blip(freq_start, freq_end, duration_ms);
// Prepend silence so the audio device has time to warm up
let silence_ms = 50u64;
let silence_samples = (44100u64 * silence_ms / 1000) as usize;
let mut samples = vec![0i16; silence_samples];
samples.extend(generate_blip(freq_start, freq_end, duration_ms));
let wav_data = encode_wav(&samples, 44100);
let (_stream, stream_handle) = OutputStream::try_default()?;
+37 -8
View File
@@ -1,7 +1,9 @@
use anyhow::Result;
use dialoguer::{Input, Select};
use std::time::Duration;
use crate::config::{Accelerator, Config, OverlayPosition, PasteMethod, RecordingMode};
use crate::hotkey::capture_hotkey;
pub fn show() -> Result<()> {
let config = Config::load()?;
@@ -20,10 +22,7 @@ pub fn reset() -> Result<()> {
pub fn interactive() -> Result<()> {
let mut config = Config::load()?;
config.hotkey = Input::new()
.with_prompt("Hotkey")
.default(config.hotkey)
.interact_text()?;
config.hotkey = prompt_hotkey("Hotkey", &config.hotkey)?;
let mode_idx = Select::new()
.with_prompt("Recording mode")
@@ -38,10 +37,7 @@ pub fn interactive() -> Result<()> {
_ => RecordingMode::Toggle,
};
config.cancel_key = Input::new()
.with_prompt("Cancel key")
.default(config.cancel_key)
.interact_text()?;
config.cancel_key = prompt_hotkey("Cancel key", &config.cancel_key)?;
config.model = Input::new()
.with_prompt("Model")
@@ -125,3 +121,36 @@ pub fn interactive() -> Result<()> {
println!("\nConfig saved to {}", Config::path()?.display());
Ok(())
}
/// Prompt the user to either press a key combination or type it manually.
fn prompt_hotkey(label: &str, current: &str) -> Result<String> {
let choice = Select::new()
.with_prompt(format!("{label} (current: {current})"))
.items(&["Press the key combination", "Type it manually", "Keep current"])
.default(0)
.interact()?;
match choice {
0 => {
println!("Press your desired key combination (timeout: 10s)...");
match capture_hotkey(Duration::from_secs(10)) {
Some(hotkey) => {
println!(" Captured: {hotkey}");
Ok(hotkey)
}
None => {
println!(" No keypress detected, keeping current: {current}");
Ok(current.to_string())
}
}
}
1 => {
let value = Input::new()
.with_prompt(label)
.default(current.to_string())
.interact_text()?;
Ok(value)
}
_ => Ok(current.to_string()),
}
}
+93 -50
View File
@@ -1,18 +1,31 @@
use anyhow::{Context, Result};
use std::sync::mpsc;
use std::sync::{mpsc, Arc};
use std::thread;
use tracing::info;
use crate::config::{Config, OverlayPosition};
use crate::config::Config;
use crate::coordinator::Coordinator;
use crate::hotkey;
use crate::ipc;
use crate::model_cache;
use crate::overlay;
use crate::recorder;
use crate::shared_state::SharedState;
use crate::transcriber::Transcriber;
pub fn run() -> Result<()> {
let config = Config::load()?;
// Check if already running
if ipc::is_daemon_running() {
eprintln!("Mouth is already running.");
std::process::exit(1);
}
// Hide Windows console window
#[cfg(windows)]
hide_console();
info!("Mouth v{} starting", env!("CARGO_PKG_VERSION"));
info!("Mode: {:?}", config.mode);
info!("Hotkey: {}", config.hotkey);
@@ -30,10 +43,25 @@ pub fn run() -> Result<()> {
let transcriber = Transcriber::new(&model_paths, &config.accelerator, config.gpu_device)
.context("Failed to load transcription engine")?;
// Step 3: VAD (not yet bundled)
// Step 3: VAD
let vad = if config.vad_enabled {
info!("VAD enabled but Silero model not yet bundled — skipping");
None
info!("Loading Silero VAD...");
match model_cache::ensure_vad_model() {
Ok(vad_path) => match crate::vad::Vad::new(vad_path.to_str().unwrap_or_default()) {
Ok(v) => {
info!("VAD loaded");
Some(v)
}
Err(e) => {
tracing::warn!("Failed to load VAD, continuing without it: {e}");
None
}
},
Err(e) => {
tracing::warn!("Failed to download VAD model, continuing without it: {e}");
None
}
}
} else {
None
};
@@ -44,12 +72,29 @@ pub fn run() -> Result<()> {
let cancel_combo = hotkey::parse_hotkey(&config.cancel_key)
.with_context(|| format!("Invalid cancel key: {}", config.cancel_key))?;
// Step 5: Set up channels
// Step 5: Create shared state
let shared_state = Arc::new(SharedState::new(
config.model.clone(),
format!("{:?}", config.accelerator).to_lowercase(),
));
// Step 6: Start IPC listener
let ipc_state = Arc::clone(&shared_state);
thread::Builder::new()
.name("mouth-ipc".into())
.spawn(move || {
if let Err(e) = ipc::start_ipc_listener(ipc_state) {
tracing::error!("IPC listener failed: {e}");
}
})
.context("Failed to spawn IPC thread")?;
// Step 7: Set up channels
let (hotkey_tx, hotkey_rx) = mpsc::channel();
let (recorder_cmd_tx, recorder_cmd_rx) = mpsc::channel();
let (audio_tx, audio_rx) = mpsc::channel();
// Step 6: Spawn background threads
// Step 8: Spawn background threads
let device_name = config.input_device.clone();
thread::Builder::new()
.name("mouth-recorder".into())
@@ -65,52 +110,50 @@ pub fn run() -> Result<()> {
})
.context("Failed to spawn hotkey thread")?;
// Step 7: Start overlay + coordinator
if config.overlay_position != OverlayPosition::None {
let (event_loop, proxy) = overlay::create_event_loop()
.map_err(|e| anyhow::anyhow!("Failed to create overlay event loop: {e}"))?;
// Step 9: Start overlay + coordinator
// Always create the event loop (needed for tray icon even when overlay is hidden)
let (event_loop, proxy) = overlay::create_event_loop()
.map_err(|e| anyhow::anyhow!("Failed to create overlay event loop: {e}"))?;
let overlay_position = config.overlay_position.clone();
let coord_proxy = Some(proxy);
let overlay_position = config.overlay_position.clone();
// Coordinator runs on a background thread
let coord_config = config.clone();
thread::Builder::new()
.name("mouth-coordinator".into())
.spawn(move || {
let mut coordinator = Coordinator::new(
coord_config,
transcriber,
vad,
recorder_cmd_tx,
audio_rx,
hotkey_rx,
coord_proxy,
);
coordinator.run();
})
.context("Failed to spawn coordinator thread")?;
// Coordinator runs on a background thread
let coord_config = config.clone();
let coord_state = Arc::clone(&shared_state);
thread::Builder::new()
.name("mouth-coordinator".into())
.spawn(move || {
let mut coordinator = Coordinator::new(
coord_config,
coord_state,
transcriber,
vad,
recorder_cmd_tx,
audio_rx,
hotkey_rx,
Some(proxy),
);
coordinator.run();
})
.context("Failed to spawn coordinator thread")?;
println!("Mouth is running. Press {} to record. Ctrl+C to quit.", config.hotkey);
// Overlay event loop runs on main thread (blocking)
overlay::run_event_loop(event_loop, overlay_position)
.map_err(|e| anyhow::anyhow!("Overlay event loop error: {e}"))?;
} else {
// No overlay — coordinator runs on main thread
println!("Mouth is running. Press {} to record. Ctrl+C to quit.", config.hotkey);
let mut coordinator = Coordinator::new(
config,
transcriber,
vad,
recorder_cmd_tx,
audio_rx,
hotkey_rx,
None,
);
coordinator.run();
}
// Overlay event loop runs on main thread (blocking)
// Tray icon is created inside the overlay app
overlay::run_event_loop(event_loop, overlay_position)
.map_err(|e| anyhow::anyhow!("Overlay event loop error: {e}"))?;
ipc::cleanup();
Ok(())
}
#[cfg(windows)]
fn hide_console() {
use windows_sys::Win32::System::Console::GetConsoleWindow;
use windows_sys::Win32::UI::WindowsAndMessaging::{ShowWindow, SW_HIDE};
unsafe {
let console = GetConsoleWindow();
if !console.is_null() {
ShowWindow(console, SW_HIDE);
}
}
}
+20 -7
View File
@@ -1,11 +1,24 @@
use anyhow::Result;
pub fn status() -> Result<()> {
let version = env!("CARGO_PKG_VERSION");
use crate::ipc;
// TODO: Phase 10 — connect to daemon IPC socket/pipe and query status
// For now, just show version info
println!("Mouth v{version}");
println!("Status: not yet implemented (requires daemon IPC)");
Ok(())
pub fn status() -> Result<()> {
match ipc::query_daemon_status() {
Ok(status) => {
println!("Mouth v{}", status.version);
println!("State: {}", status.state);
println!("Model: {}", status.model);
println!("Accelerator: {}", status.accelerator);
let hours = status.uptime_secs / 3600;
let mins = (status.uptime_secs % 3600) / 60;
let secs = status.uptime_secs % 60;
println!("Uptime: {}h {}m {}s", hours, mins, secs);
Ok(())
}
Err(_) => {
eprintln!("Mouth is not running.");
std::process::exit(1);
}
}
}
+28 -11
View File
@@ -1,4 +1,4 @@
use std::sync::mpsc;
use std::sync::{mpsc, Arc};
use std::thread;
use tracing::{debug, error, info, warn};
use winit::event_loop::EventLoopProxy;
@@ -9,6 +9,7 @@ use crate::hotkey::HotkeyEvent;
use crate::overlay::{OverlayEvent, OverlayState};
use crate::paste;
use crate::recorder::{AudioData, RecorderCommand};
use crate::shared_state::SharedState;
use crate::transcriber::Transcriber;
use crate::vad::Vad;
@@ -24,6 +25,7 @@ enum State {
pub struct Coordinator {
config: Config,
state: State,
shared_state: Arc<SharedState>,
transcriber: Transcriber,
vad: Option<Vad>,
recorder_tx: mpsc::Sender<RecorderCommand>,
@@ -35,6 +37,7 @@ pub struct Coordinator {
impl Coordinator {
pub fn new(
config: Config,
shared_state: Arc<SharedState>,
transcriber: Transcriber,
vad: Option<Vad>,
recorder_tx: mpsc::Sender<RecorderCommand>,
@@ -45,6 +48,7 @@ impl Coordinator {
Self {
config,
state: State::Idle,
shared_state,
transcriber,
vad,
recorder_tx,
@@ -54,6 +58,16 @@ impl Coordinator {
}
}
fn set_state(&mut self, state: State) {
self.state = state;
let label = match state {
State::Idle => "idle",
State::Recording => "recording",
State::Transcribing => "transcribing",
};
self.shared_state.set_state(label);
}
/// Run the coordinator loop. This blocks until shutdown.
pub fn run(&mut self) {
info!("Coordinator started");
@@ -111,7 +125,7 @@ impl Coordinator {
fn start_recording(&mut self) {
info!("Recording started");
self.state = State::Recording;
self.set_state(State::Recording);
self.set_overlay(OverlayState::Recording);
if self.config.audio_feedback {
@@ -120,23 +134,26 @@ impl Coordinator {
if self.recorder_tx.send(RecorderCommand::Start).is_err() {
error!("Failed to send start command to recorder");
self.state = State::Idle;
self.set_state(State::Idle);
self.set_overlay(OverlayState::Hidden);
}
}
fn stop_recording(&mut self) {
info!("Recording stopped, starting transcription");
self.state = State::Transcribing;
self.set_state(State::Transcribing);
self.set_overlay(OverlayState::Transcribing);
if self.config.audio_feedback {
audio_feedback::play_blip_down();
}
// Keep recording briefly after the stop signal so trailing speech isn't clipped
thread::sleep(std::time::Duration::from_millis(300));
if self.recorder_tx.send(RecorderCommand::Stop).is_err() {
error!("Failed to send stop command to recorder");
self.state = State::Idle;
self.set_state(State::Idle);
self.set_overlay(OverlayState::Hidden);
return;
}
@@ -148,7 +165,7 @@ impl Coordinator {
}
Err(_) => {
error!("Failed to receive audio data");
self.state = State::Idle;
self.set_state(State::Idle);
self.set_overlay(OverlayState::Error);
self.delayed_hide_overlay();
}
@@ -157,7 +174,7 @@ impl Coordinator {
fn cancel_recording(&mut self) {
info!("Recording cancelled");
self.state = State::Idle;
self.set_state(State::Idle);
if self.recorder_tx.send(RecorderCommand::Stop).is_err() {
warn!("Failed to send stop command to recorder");
@@ -176,7 +193,7 @@ impl Coordinator {
Ok(filtered) => {
if filtered.is_empty() {
info!("No speech detected by VAD");
self.state = State::Idle;
self.set_state(State::Idle);
self.set_overlay(OverlayState::Hidden);
return;
}
@@ -199,7 +216,7 @@ impl Coordinator {
Ok(text) => {
if text.is_empty() {
info!("Empty transcription");
self.state = State::Idle;
self.set_state(State::Idle);
self.set_overlay(OverlayState::Hidden);
return;
}
@@ -218,11 +235,11 @@ impl Coordinator {
}
self.delayed_hide_overlay();
self.state = State::Idle;
self.set_state(State::Idle);
}
Err(e) => {
error!("Transcription failed: {e}");
self.state = State::Idle;
self.set_state(State::Idle);
self.set_overlay(OverlayState::Error);
self.delayed_hide_overlay();
}
+247 -26
View File
@@ -1,5 +1,6 @@
use anyhow::{bail, Result};
use rdev::{self, Event, EventType, Key};
use std::cell::RefCell;
use std::sync::mpsc;
use std::time::{Duration, Instant};
use tracing::{debug, error, info};
@@ -164,77 +165,297 @@ fn parse_key(s: &str) -> Result<Key> {
"7" => Key::Num7,
"8" => Key::Num8,
"9" => Key::Num9,
// Punctuation / symbol keys
"[" | "leftbracket" => Key::LeftBracket,
"]" | "rightbracket" => Key::RightBracket,
";" | "semicolon" => Key::SemiColon,
"'" | "quote" => Key::Quote,
"`" | "backquote" | "backtick" => Key::BackQuote,
"\\" | "backslash" => Key::BackSlash,
"," | "comma" => Key::Comma,
"." | "dot" | "period" => Key::Dot,
"/" | "slash" => Key::Slash,
"-" | "minus" => Key::Minus,
"=" | "equal" | "equals" => Key::Equal,
// Additional non-character keys
"printscreen" | "prtsc" => Key::PrintScreen,
"scrolllock" => Key::ScrollLock,
"pause" | "break" => Key::Pause,
"numlock" => Key::NumLock,
"capslock" => Key::CapsLock,
// Numpad
"kp0" | "numpad0" => Key::Kp0,
"kp1" | "numpad1" => Key::Kp1,
"kp2" | "numpad2" => Key::Kp2,
"kp3" | "numpad3" => Key::Kp3,
"kp4" | "numpad4" => Key::Kp4,
"kp5" | "numpad5" => Key::Kp5,
"kp6" | "numpad6" => Key::Kp6,
"kp7" | "numpad7" => Key::Kp7,
"kp8" | "numpad8" => Key::Kp8,
"kp9" | "numpad9" => Key::Kp9,
"kpenter" | "numpadenter" => Key::KpReturn,
"kpminus" | "numpadminus" => Key::KpMinus,
"kpplus" | "numpadplus" => Key::KpPlus,
"kpmultiply" | "numpadmultiply" => Key::KpMultiply,
"kpdivide" | "numpaddivide" => Key::KpDivide,
"kpdelete" | "numpaddelete" => Key::KpDelete,
_ => bail!("Unknown key: {s}"),
};
Ok(key)
}
/// Convert an rdev Key back to the config string representation.
fn key_to_string(key: &Key) -> Option<String> {
let s = match key {
Key::Space => "space",
Key::Return => "enter",
Key::Escape => "escape",
Key::Tab => "tab",
Key::Backspace => "backspace",
Key::Delete => "delete",
Key::Insert => "insert",
Key::Home => "home",
Key::End => "end",
Key::PageUp => "pageup",
Key::PageDown => "pagedown",
Key::UpArrow => "up",
Key::DownArrow => "down",
Key::LeftArrow => "left",
Key::RightArrow => "right",
Key::F1 => "f1",
Key::F2 => "f2",
Key::F3 => "f3",
Key::F4 => "f4",
Key::F5 => "f5",
Key::F6 => "f6",
Key::F7 => "f7",
Key::F8 => "f8",
Key::F9 => "f9",
Key::F10 => "f10",
Key::F11 => "f11",
Key::F12 => "f12",
Key::KeyA => "a",
Key::KeyB => "b",
Key::KeyC => "c",
Key::KeyD => "d",
Key::KeyE => "e",
Key::KeyF => "f",
Key::KeyG => "g",
Key::KeyH => "h",
Key::KeyI => "i",
Key::KeyJ => "j",
Key::KeyK => "k",
Key::KeyL => "l",
Key::KeyM => "m",
Key::KeyN => "n",
Key::KeyO => "o",
Key::KeyP => "p",
Key::KeyQ => "q",
Key::KeyR => "r",
Key::KeyS => "s",
Key::KeyT => "t",
Key::KeyU => "u",
Key::KeyV => "v",
Key::KeyW => "w",
Key::KeyX => "x",
Key::KeyY => "y",
Key::KeyZ => "z",
Key::Num0 => "0",
Key::Num1 => "1",
Key::Num2 => "2",
Key::Num3 => "3",
Key::Num4 => "4",
Key::Num5 => "5",
Key::Num6 => "6",
Key::Num7 => "7",
Key::Num8 => "8",
Key::Num9 => "9",
Key::LeftBracket => "[",
Key::RightBracket => "]",
Key::SemiColon => ";",
Key::Quote => "'",
Key::BackQuote => "`",
Key::BackSlash => "\\",
Key::Comma => ",",
Key::Dot => ".",
Key::Slash => "/",
Key::Minus => "-",
Key::Equal => "=",
Key::PrintScreen => "printscreen",
Key::ScrollLock => "scrolllock",
Key::Pause => "pause",
Key::NumLock => "numlock",
Key::CapsLock => "capslock",
Key::Kp0 => "kp0",
Key::Kp1 => "kp1",
Key::Kp2 => "kp2",
Key::Kp3 => "kp3",
Key::Kp4 => "kp4",
Key::Kp5 => "kp5",
Key::Kp6 => "kp6",
Key::Kp7 => "kp7",
Key::Kp8 => "kp8",
Key::Kp9 => "kp9",
Key::KpReturn => "kpenter",
Key::KpMinus => "kpminus",
Key::KpPlus => "kpplus",
Key::KpMultiply => "kpmultiply",
Key::KpDivide => "kpdivide",
Key::KpDelete => "kpdelete",
_ => return None,
};
Some(s.to_string())
}
/// Returns true if the key is a modifier (ctrl, alt, shift, meta).
fn is_modifier(key: &Key) -> bool {
matches!(
key,
Key::ControlLeft
| Key::ControlRight
| Key::Alt
| Key::AltGr
| Key::ShiftLeft
| Key::ShiftRight
| Key::MetaLeft
| Key::MetaRight
)
}
/// Capture a hotkey combination by listening for an actual keypress.
/// Blocks until the user presses a non-modifier key while optionally holding modifiers.
/// Returns the hotkey string (e.g. "ctrl+[") or None on timeout/error.
pub fn capture_hotkey(timeout: Duration) -> Option<String> {
let (tx, rx) = mpsc::channel();
std::thread::spawn(move || {
let mut modifier_state = ModifierState::default();
let callback = move |event: Event| {
match event.event_type {
EventType::KeyPress(key) => {
modifier_state.update(&key, true);
// Ignore pure modifier presses — wait for a real key
if is_modifier(&key) {
return;
}
if let Some(key_name) = key_to_string(&key) {
let mut parts = Vec::new();
if modifier_state.ctrl {
parts.push("ctrl".to_string());
}
if modifier_state.alt {
parts.push("alt".to_string());
}
if modifier_state.shift {
parts.push("shift".to_string());
}
if modifier_state.meta {
parts.push("meta".to_string());
}
parts.push(key_name);
let _ = tx.send(parts.join("+"));
}
}
EventType::KeyRelease(key) => {
modifier_state.update(&key, false);
}
_ => {}
}
};
let _ = rdev::listen(callback);
});
rx.recv_timeout(timeout).ok()
}
/// Start the global hotkey listener on the current thread (blocking).
/// Sends HotkeyEvents to the provided channel.
/// Uses `rdev::grab` to intercept and consume hotkey events so they don't
/// reach the focused application.
pub fn listen(
hotkey: HotkeyCombination,
cancel_key: HotkeyCombination,
tx: mpsc::Sender<HotkeyEvent>,
) {
let debounce_duration = Duration::from_millis(30);
let mut last_event_time = Instant::now() - debounce_duration;
let mut modifier_state = ModifierState::default();
let mut hotkey_held = false;
info!("Hotkey listener started");
info!("Hotkey listener started (grab mode)");
debug!("Hotkey: {:?}", hotkey);
debug!("Cancel: {:?}", cancel_key);
let callback = move |event: Event| {
// rdev::grab requires Fn (not FnMut), so wrap mutable state in RefCell
struct GrabState {
last_event_time: Instant,
modifier_state: ModifierState,
hotkey_held: bool,
}
let state = RefCell::new(GrabState {
last_event_time: Instant::now() - debounce_duration,
modifier_state: ModifierState::default(),
hotkey_held: false,
});
let callback = move |event: Event| -> Option<Event> {
let mut s = state.borrow_mut();
let now = Instant::now();
match event.event_type {
EventType::KeyPress(key) => {
modifier_state.update(&key, true);
s.modifier_state.update(&key, true);
// Check cancel key
if key == cancel_key.key && modifier_state.all_held(&cancel_key.modifiers) {
if now.duration_since(last_event_time) >= debounce_duration {
last_event_time = now;
// Check cancel key — swallow it
if key == cancel_key.key && s.modifier_state.all_held(&cancel_key.modifiers) {
if now.duration_since(s.last_event_time) >= debounce_duration {
s.last_event_time = now;
debug!("Cancel key pressed");
if tx.send(HotkeyEvent::Cancel).is_err() {
error!("Failed to send cancel event");
}
}
return;
return None;
}
// Check hotkey
if key == hotkey.key && modifier_state.all_held(&hotkey.modifiers) {
if now.duration_since(last_event_time) >= debounce_duration && !hotkey_held {
last_event_time = now;
hotkey_held = true;
// Check hotkey — swallow it
if key == hotkey.key && s.modifier_state.all_held(&hotkey.modifiers) {
if now.duration_since(s.last_event_time) >= debounce_duration && !s.hotkey_held {
s.last_event_time = now;
s.hotkey_held = true;
debug!("Hotkey pressed");
if tx.send(HotkeyEvent::Pressed).is_err() {
error!("Failed to send pressed event");
}
}
return None;
}
Some(event)
}
EventType::KeyRelease(key) => {
modifier_state.update(&key, false);
s.modifier_state.update(&key, false);
// Check hotkey release (for push-to-talk)
if key == hotkey.key && hotkey_held {
if now.duration_since(last_event_time) >= debounce_duration {
last_event_time = now;
hotkey_held = false;
// Check hotkey release — swallow it
if key == hotkey.key && s.hotkey_held {
if now.duration_since(s.last_event_time) >= debounce_duration {
s.last_event_time = now;
s.hotkey_held = false;
debug!("Hotkey released");
if tx.send(HotkeyEvent::Released).is_err() {
error!("Failed to send released event");
}
}
return None;
}
Some(event)
}
_ => {}
_ => Some(event),
}
};
if let Err(e) = rdev::listen(callback) {
error!("Hotkey listener error: {:?}", e);
if let Err(e) = rdev::grab(callback) {
error!("Hotkey grab error: {:?}", e);
}
}
+233
View File
@@ -0,0 +1,233 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::io::{Read, Write};
use std::sync::Arc;
use tracing::{debug, info};
use crate::shared_state::SharedState;
/// Status response sent over IPC.
#[derive(Debug, Serialize, Deserialize)]
pub struct DaemonStatus {
pub version: String,
pub state: String,
pub model: String,
pub accelerator: String,
pub uptime_secs: u64,
}
/// Returns the platform-specific IPC path.
pub fn ipc_path() -> String {
#[cfg(unix)]
{
"/tmp/mouth.sock".to_string()
}
#[cfg(windows)]
{
r"\\.\pipe\mouth".to_string()
}
}
/// Check if a daemon is already running by attempting to connect.
pub fn is_daemon_running() -> bool {
query_daemon_status().is_ok()
}
/// Query the running daemon for its status.
pub fn query_daemon_status() -> Result<DaemonStatus> {
let path = ipc_path();
#[cfg(unix)]
{
use std::os::unix::net::UnixStream;
let mut stream = UnixStream::connect(&path)
.with_context(|| format!("Could not connect to daemon at {path}"))?;
stream
.set_read_timeout(Some(std::time::Duration::from_secs(2)))
.ok();
let mut buf = String::new();
stream.read_to_string(&mut buf)?;
let status: DaemonStatus =
serde_json::from_str(&buf).context("Invalid status response from daemon")?;
Ok(status)
}
#[cfg(windows)]
{
use std::fs::OpenOptions;
let mut file = OpenOptions::new()
.read(true)
.write(true)
.open(&path)
.with_context(|| format!("Could not connect to daemon at {path}"))?;
// Write a newline to trigger the server to respond
file.write_all(b"\n")?;
file.flush()?;
// Read response — use a fixed buffer since read_to_string waits for EOF
let mut buf = vec![0u8; 4096];
let n = file.read(&mut buf)?;
let text = String::from_utf8_lossy(&buf[..n]);
let status: DaemonStatus =
serde_json::from_str(&text).context("Invalid status response from daemon")?;
Ok(status)
}
}
/// Start the IPC listener on the current thread (blocking).
/// Call this from a dedicated thread.
pub fn start_ipc_listener(shared_state: Arc<SharedState>) -> Result<()> {
let path = ipc_path();
info!("Starting IPC listener at {path}");
#[cfg(unix)]
{
unix_listener(&path, shared_state)
}
#[cfg(windows)]
{
windows_listener(&path, shared_state)
}
}
#[cfg(unix)]
fn unix_listener(path: &str, shared_state: Arc<SharedState>) -> Result<()> {
use std::os::unix::net::UnixListener;
// Clean up stale socket
if std::path::Path::new(path).exists() {
if is_daemon_running() {
anyhow::bail!("Another instance of Mouth is already running");
}
std::fs::remove_file(path).ok();
}
let listener = UnixListener::bind(path).context("Failed to bind IPC socket")?;
info!("IPC listener ready");
for stream in listener.incoming() {
match stream {
Ok(mut stream) => {
let status = build_status(&shared_state);
match serde_json::to_string(&status) {
Ok(json) => {
if let Err(e) = stream.write_all(json.as_bytes()) {
debug!("Failed to write IPC response: {e}");
}
}
Err(e) => {
warn!("Failed to serialize status: {e}");
}
}
}
Err(e) => {
debug!("IPC accept error: {e}");
}
}
}
Ok(())
}
#[cfg(windows)]
fn windows_listener(path: &str, shared_state: Arc<SharedState>) -> Result<()> {
use windows_sys::Win32::Foundation::{CloseHandle, INVALID_HANDLE_VALUE};
use windows_sys::Win32::Storage::FileSystem::{
FlushFileBuffers, ReadFile, WriteFile, PIPE_ACCESS_DUPLEX,
};
use windows_sys::Win32::System::Pipes::{
ConnectNamedPipe, CreateNamedPipeW, DisconnectNamedPipe,
PIPE_READMODE_BYTE, PIPE_TYPE_BYTE, PIPE_UNLIMITED_INSTANCES, PIPE_WAIT,
};
let wide_path: Vec<u16> = path.encode_utf16().chain(std::iter::once(0)).collect();
info!("IPC listener ready");
loop {
let handle = unsafe {
CreateNamedPipeW(
wide_path.as_ptr(),
PIPE_ACCESS_DUPLEX,
PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT,
PIPE_UNLIMITED_INSTANCES,
4096,
4096,
0,
std::ptr::null(),
)
};
if handle == INVALID_HANDLE_VALUE {
tracing::error!("Failed to create named pipe");
std::thread::sleep(std::time::Duration::from_secs(1));
continue;
}
// Wait for a client to connect
let connected = unsafe { ConnectNamedPipe(handle, std::ptr::null_mut()) };
if connected == 0 {
let err = std::io::Error::last_os_error();
// ERROR_PIPE_CONNECTED (535) means client already connected — that's ok
if err.raw_os_error() != Some(535) {
debug!("ConnectNamedPipe error: {err}");
unsafe { CloseHandle(handle) };
continue;
}
}
// Read the trigger byte from the client (just 1 byte to unblock)
let mut read_buf = [0u8; 1];
let mut bytes_read: u32 = 0;
unsafe {
ReadFile(
handle,
read_buf.as_mut_ptr(),
1,
&mut bytes_read,
std::ptr::null_mut(),
);
}
// Write the status response
let status = build_status(&shared_state);
if let Ok(json) = serde_json::to_string(&status) {
let bytes = json.as_bytes();
let mut written: u32 = 0;
unsafe {
WriteFile(
handle,
bytes.as_ptr().cast(),
bytes.len() as u32,
&mut written,
std::ptr::null_mut(),
);
FlushFileBuffers(handle);
}
}
unsafe {
DisconnectNamedPipe(handle);
CloseHandle(handle);
}
}
}
fn build_status(shared_state: &SharedState) -> DaemonStatus {
DaemonStatus {
version: env!("CARGO_PKG_VERSION").to_string(),
state: shared_state.get_state(),
model: shared_state.model.clone(),
accelerator: shared_state.accelerator.clone(),
uptime_secs: shared_state.uptime_secs(),
}
}
/// Clean up the IPC socket (Unix only).
pub fn cleanup() {
#[cfg(unix)]
{
let path = ipc_path();
std::fs::remove_file(&path).ok();
}
}
+2
View File
@@ -3,10 +3,12 @@ mod cli;
mod config;
mod coordinator;
mod hotkey;
mod ipc;
mod model_cache;
mod overlay;
mod paste;
mod recorder;
mod shared_state;
mod transcriber;
mod vad;
+17
View File
@@ -82,6 +82,23 @@ pub fn ensure_model(model_name: &str) -> Result<ModelPaths> {
})
}
/// Ensure the Silero VAD model is downloaded and return its path.
pub fn ensure_vad_model() -> Result<PathBuf> {
let repo_id = "onnx-community/silero-vad";
let model_file = "onnx/model.onnx";
let api = Api::new().context("Failed to create HuggingFace Hub API")?;
let repo = api.model(repo_id.to_string());
info!("Ensuring Silero VAD model from {repo_id}");
let path = repo
.get(model_file)
.with_context(|| format!("Failed to download VAD model from {repo_id}"))?;
debug!("VAD model: {}", path.display());
Ok(path)
}
/// Check if model files are already cached.
pub fn is_model_cached(model_name: &str) -> bool {
ensure_model(model_name).is_ok()
+139 -2
View File
@@ -8,8 +8,8 @@ use winit::window::{Window, WindowAttributes, WindowId, WindowLevel};
use crate::config::OverlayPosition;
const OVERLAY_WIDTH: u32 = 200;
const OVERLAY_HEIGHT: u32 = 36;
const OVERLAY_WIDTH: u32 = 150;
const OVERLAY_HEIGHT: u32 = 18;
/// State of the overlay display.
#[derive(Debug, Clone, Copy, PartialEq)]
@@ -34,6 +34,8 @@ struct OverlayApp {
surface: Option<softbuffer::Surface<std::rc::Rc<Window>, std::rc::Rc<Window>>>,
state: OverlayState,
position: OverlayPosition,
_tray_icon: Option<tray_icon::TrayIcon>,
tray_exit_id: Option<tray_icon::menu::MenuId>,
}
impl OverlayApp {
@@ -99,6 +101,43 @@ impl OverlayApp {
window.set_visible(visible);
}
}
fn create_tray_icon(&mut self) {
use tray_icon::menu::{Menu, MenuItem};
use tray_icon::TrayIconBuilder;
let menu = Menu::new();
let exit_item = MenuItem::new("Exit", true, None);
let exit_id = exit_item.id().clone();
if let Err(e) = menu.append(&exit_item) {
warn!("Failed to add tray menu item: {e}");
return;
}
let icon = match load_tray_icon() {
Ok(i) => i,
Err(e) => {
warn!("Failed to load tray icon: {e}");
return;
}
};
match TrayIconBuilder::new()
.with_menu(Box::new(menu))
.with_tooltip("Mouth — Speech to Text")
.with_icon(icon)
.build()
{
Ok(tray) => {
info!("System tray icon created");
self._tray_icon = Some(tray);
self.tray_exit_id = Some(exit_id);
}
Err(e) => {
warn!("Failed to create tray icon: {e}");
}
}
}
}
impl ApplicationHandler<OverlayEvent> for OverlayApp {
@@ -154,6 +193,9 @@ impl ApplicationHandler<OverlayEvent> for OverlayApp {
error!("Failed to create overlay window: {e}");
}
}
// Create tray icon (must be done on the main/event-loop thread)
self.create_tray_icon();
}
fn user_event(&mut self, event_loop: &ActiveEventLoop, event: OverlayEvent) {
@@ -176,6 +218,99 @@ impl ApplicationHandler<OverlayEvent> for OverlayApp {
self.draw();
}
}
fn about_to_wait(&mut self, event_loop: &ActiveEventLoop) {
// Poll tray menu events
if let Some(exit_id) = &self.tray_exit_id {
if let Ok(event) = tray_icon::menu::MenuEvent::receiver().try_recv() {
if event.id() == exit_id {
info!("Exit requested via tray icon");
crate::ipc::cleanup();
event_loop.exit();
}
}
}
}
}
fn load_tray_icon() -> Result<tray_icon::Icon, Box<dyn std::error::Error>> {
const S: u32 = 32;
let mut pixels = vec![0u8; (S * S * 4) as usize];
let cx = S as f32 / 2.0;
for y in 0..S {
for x in 0..S {
let fx = x as f32 + 0.5;
let fy = y as f32 + 0.5;
let idx = ((y * S + x) * 4) as usize;
let mut alpha: f32 = 0.0;
// Microphone body: rounded rectangle (capsule shape)
// Center x=16, from y=3 to y=18, radius 5
let mic_top = 3.0;
let mic_bot = 18.0;
let mic_r = 5.5;
let mic_cx = cx;
{
let dy = fy.clamp(mic_top + mic_r, mic_bot - mic_r);
let dist = ((fx - mic_cx).powi(2) + (fy - dy).powi(2)).sqrt();
if dist <= mic_r {
alpha = 1.0;
} else if dist <= mic_r + 1.0 {
alpha = alpha.max(mic_r + 1.0 - dist); // anti-alias
}
}
// Cradle arc: U-shape below mic, from y=14 to y=22
{
let arc_cy = 14.0;
let arc_r = 8.5;
let arc_thickness = 2.2;
let dx = fx - cx;
let dy = fy - arc_cy;
let dist = (dx * dx + dy * dy).sqrt();
if fy >= arc_cy && dist >= arc_r - arc_thickness / 2.0 && dist <= arc_r + arc_thickness / 2.0 {
let edge_outer = (arc_r + arc_thickness / 2.0 - dist).min(1.0).max(0.0);
let edge_inner = (dist - (arc_r - arc_thickness / 2.0)).min(1.0).max(0.0);
alpha = alpha.max(edge_outer.min(edge_inner));
}
}
// Stem: vertical line from arc bottom to near bottom
{
let stem_top = 22.0;
let stem_bot = 27.0;
let stem_w = 1.2;
if fy >= stem_top && fy <= stem_bot && (fx - cx).abs() <= stem_w {
let edge = (stem_w - (fx - cx).abs()).min(1.0);
alpha = alpha.max(edge);
}
}
// Base: horizontal line at bottom
{
let base_y = 27.0;
let base_h = 2.0;
let base_hw = 5.0;
if fy >= base_y && fy <= base_y + base_h && (fx - cx).abs() <= base_hw {
let edge = (base_hw - (fx - cx).abs()).min(1.0);
alpha = alpha.max(edge);
}
}
let a = (alpha.clamp(0.0, 1.0) * 255.0) as u8;
// White icon with alpha (looks good on both light and dark taskbars)
pixels[idx] = 255; // R
pixels[idx + 1] = 255; // G
pixels[idx + 2] = 255; // B
pixels[idx + 3] = a; // A
}
}
let icon = tray_icon::Icon::from_rgba(pixels, S, S)?;
Ok(icon)
}
/// Create an event loop and return the proxy for sending events.
@@ -195,6 +330,8 @@ pub fn run_event_loop(
surface: None,
state: OverlayState::Hidden,
position,
_tray_icon: None,
tray_exit_id: None,
};
event_loop.run_app(&mut app)
+9 -1
View File
@@ -7,6 +7,9 @@ use std::sync::{Arc, Mutex};
use tracing::{debug, error, info, warn};
const TARGET_SAMPLE_RATE: u32 = 16000;
/// Silence prepended to recordings to give the model a clean lead-in,
/// compensating for mic startup latency.
const LEAD_IN_MS: u32 = 300;
/// Commands sent to the recorder.
#[derive(Debug)]
@@ -252,8 +255,13 @@ pub fn run(
debug!("Resampled to {} samples at {}Hz", samples.len(), TARGET_SAMPLE_RATE);
// Prepend silence to compensate for mic startup latency
let lead_in_samples = (TARGET_SAMPLE_RATE * LEAD_IN_MS / 1000) as usize;
let mut padded = vec![0.0f32; lead_in_samples];
padded.extend_from_slice(&samples);
let audio = AudioData {
samples,
samples: padded,
sample_rate: TARGET_SAMPLE_RATE,
};
+35
View File
@@ -0,0 +1,35 @@
use std::sync::RwLock;
use std::time::Instant;
/// Thread-safe shared state accessible by the coordinator, IPC listener, and tray icon.
pub struct SharedState {
pub state: RwLock<String>,
pub model: String,
pub accelerator: String,
pub started_at: Instant,
}
impl SharedState {
pub fn new(model: String, accelerator: String) -> Self {
Self {
state: RwLock::new("idle".to_string()),
model,
accelerator,
started_at: Instant::now(),
}
}
pub fn set_state(&self, state: &str) {
if let Ok(mut s) = self.state.write() {
*s = state.to_string();
}
}
pub fn get_state(&self) -> String {
self.state.read().map(|s| s.clone()).unwrap_or_else(|_| "unknown".to_string())
}
pub fn uptime_secs(&self) -> u64 {
self.started_at.elapsed().as_secs()
}
}
+6 -6
View File
@@ -22,7 +22,7 @@ pub struct Transcriber {
encoder: Session,
decoder: Session,
vocab: Vec<String>,
blank_id: i64,
blank_id: i32,
vocab_size: usize,
}
@@ -45,7 +45,7 @@ impl Transcriber {
let vocab = load_vocab(&paths.vocab)?;
let vocab_size = vocab.len();
let blank_id = (vocab_size - 1) as i64; // <blk> is the last token
let blank_id = (vocab_size - 1) as i32; // <blk> is the last token
info!("Vocab loaded: {vocab_size} tokens, blank_id={blank_id}");
Ok(Self {
@@ -121,7 +121,7 @@ impl Transcriber {
Ok((enc_data.to_vec(), feat_dim, encoded_length))
}
fn tdt_greedy_decode(&mut self, encoder_output: &[f32], feat_dim: usize, encoded_length: usize) -> Result<Vec<i64>> {
fn tdt_greedy_decode(&mut self, encoder_output: &[f32], feat_dim: usize, encoded_length: usize) -> Result<Vec<i32>> {
// Determine decoder LSTM state dimensions by inspecting input metadata
// Default fallback values
let mut state_shape: [usize; 3] = [1, 1, 640];
@@ -168,7 +168,7 @@ impl Transcriber {
let frame = Array3::from_shape_vec([1, feat_dim, 1], frame_data)?;
let targets = ndarray::Array2::from_shape_vec((1, 1), vec![prev_token])?;
let target_length = ndarray::Array1::from_vec(vec![1i64]);
let target_length = ndarray::Array1::from_vec(vec![1i32]);
let outputs = self.decoder.run(vec![
make_input("encoder_outputs", Value::from_array(frame)?.into_dyn()),
@@ -186,7 +186,7 @@ impl Transcriber {
let token_logits = &output_data[..self.vocab_size];
let duration_logits = &output_data[self.vocab_size..];
let token_id = argmax(token_logits) as i64;
let token_id = argmax(token_logits) as i32;
let duration = if !duration_logits.is_empty() {
argmax(duration_logits)
} else {
@@ -225,7 +225,7 @@ impl Transcriber {
Ok(tokens)
}
fn tokens_to_text(&self, tokens: &[i64]) -> String {
fn tokens_to_text(&self, tokens: &[i32]) -> String {
let mut text = String::new();
for &token_id in tokens {
if token_id >= 0 && (token_id as usize) < self.vocab.len() {