Implement core speech-to-text pipeline

All major components: hotkey listener (rdev), audio capture (cpal),
resampling (rubato), VAD (Silero ONNX), Parakeet v3 TDT transcription
(ort), overlay window (winit+softbuffer), paste simulation (enigo+arboard),
audio feedback (rodio), YAML config, CLI with clap, HuggingFace model
download. ~2400 lines of Rust across 16 source files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-10 16:47:46 +01:00
parent 6b737f92fe
commit 9b0bf7d9e3
22 changed files with 7750 additions and 0 deletions
+201
View File
@@ -0,0 +1,201 @@
use std::num::NonZeroU32;
use tracing::{debug, error, info, warn};
use winit::application::ApplicationHandler;
use winit::dpi::{LogicalSize, PhysicalPosition};
use winit::event::WindowEvent;
use winit::event_loop::{ActiveEventLoop, EventLoop, EventLoopProxy};
use winit::window::{Window, WindowAttributes, WindowId, WindowLevel};
use crate::config::OverlayPosition;
const OVERLAY_WIDTH: u32 = 200;
const OVERLAY_HEIGHT: u32 = 36;
/// State of the overlay display.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum OverlayState {
Hidden,
Recording,
Transcribing,
Done,
Error,
}
/// Events sent to the overlay from the coordinator.
#[derive(Debug, Clone)]
pub enum OverlayEvent {
SetState(OverlayState),
Shutdown,
}
/// The overlay application handler for winit.
struct OverlayApp {
window: Option<std::rc::Rc<Window>>,
surface: Option<softbuffer::Surface<std::rc::Rc<Window>, std::rc::Rc<Window>>>,
state: OverlayState,
position: OverlayPosition,
}
impl OverlayApp {
fn draw(&mut self) {
let Some(surface) = &mut self.surface else { return };
let Some(window) = &self.window else { return };
let size = window.inner_size();
if size.width == 0 || size.height == 0 {
return;
}
let Ok(w) = NonZeroU32::try_from(size.width) else { return };
let Ok(h) = NonZeroU32::try_from(size.height) else { return };
if surface.resize(w, h).is_err() {
return;
}
let Ok(mut buffer) = surface.buffer_mut() else { return };
let color = match self.state {
OverlayState::Hidden => 0x00000000,
OverlayState::Recording => 0xFFDD3333, // Red
OverlayState::Transcribing => 0xFFDDAA33, // Amber
OverlayState::Done => 0xFF33AA33, // Green
OverlayState::Error => 0xFFDD3333, // Red
};
let width = size.width as usize;
let height = size.height as usize;
for y in 0..height {
for x in 0..width {
let radius = 8;
let in_corner = (x < radius || x >= width - radius)
&& (y < radius || y >= height - radius);
let pixel = if in_corner {
let cx = if x < radius { radius } else { width - radius - 1 };
let cy = if y < radius { radius } else { height - radius - 1 };
let dx = x as i32 - cx as i32;
let dy = y as i32 - cy as i32;
if dx * dx + dy * dy <= (radius * radius) as i32 {
color
} else {
0x00000000
}
} else {
color
};
buffer[y * width + x] = pixel;
}
}
let _ = buffer.present();
}
fn update_visibility(&self) {
if let Some(window) = &self.window {
let visible = self.state != OverlayState::Hidden;
window.set_visible(visible);
}
}
}
impl ApplicationHandler<OverlayEvent> for OverlayApp {
fn resumed(&mut self, event_loop: &ActiveEventLoop) {
if self.window.is_some() {
return;
}
let attrs = WindowAttributes::default()
.with_title("Mouth")
.with_inner_size(LogicalSize::new(OVERLAY_WIDTH, OVERLAY_HEIGHT))
.with_resizable(false)
.with_decorations(false)
.with_transparent(true)
.with_window_level(WindowLevel::AlwaysOnTop)
.with_visible(false);
match event_loop.create_window(attrs) {
Ok(window) => {
let window = std::rc::Rc::new(window);
// Position at top center of primary monitor
if let Some(monitor) = window.current_monitor() {
let screen_size = monitor.size();
let pos = match self.position {
OverlayPosition::Top => PhysicalPosition::new(
(screen_size.width - OVERLAY_WIDTH) / 2,
10,
),
OverlayPosition::Bottom => PhysicalPosition::new(
(screen_size.width - OVERLAY_WIDTH) / 2,
screen_size.height - OVERLAY_HEIGHT - 50,
),
OverlayPosition::None => PhysicalPosition::new(0, 0),
};
window.set_outer_position(pos);
}
let context = softbuffer::Context::new(window.clone()).ok();
let surface = context.and_then(|ctx| {
softbuffer::Surface::new(&ctx, window.clone()).ok()
});
if surface.is_none() {
warn!("Could not create softbuffer surface — overlay rendering disabled");
}
self.surface = surface;
self.window = Some(window);
info!("Overlay window created");
}
Err(e) => {
error!("Failed to create overlay window: {e}");
}
}
}
fn user_event(&mut self, event_loop: &ActiveEventLoop, event: OverlayEvent) {
match event {
OverlayEvent::SetState(state) => {
debug!("Overlay state: {:?} -> {:?}", self.state, state);
self.state = state;
self.update_visibility();
self.draw();
}
OverlayEvent::Shutdown => {
info!("Overlay shutting down");
event_loop.exit();
}
}
}
fn window_event(&mut self, _event_loop: &ActiveEventLoop, _id: WindowId, event: WindowEvent) {
if let WindowEvent::RedrawRequested = event {
self.draw();
}
}
}
/// Create an event loop and return the proxy for sending events.
pub fn create_event_loop() -> Result<(EventLoop<OverlayEvent>, EventLoopProxy<OverlayEvent>), winit::error::EventLoopError> {
let event_loop: EventLoop<OverlayEvent> = EventLoop::with_user_event().build()?;
let proxy = event_loop.create_proxy();
Ok((event_loop, proxy))
}
/// Run the event loop with the given position config.
pub fn run_event_loop(
event_loop: EventLoop<OverlayEvent>,
position: OverlayPosition,
) -> Result<(), winit::error::EventLoopError> {
let mut app = OverlayApp {
window: None,
surface: None,
state: OverlayState::Hidden,
position,
};
event_loop.run_app(&mut app)
}