Implement core speech-to-text pipeline

All major components: hotkey listener (rdev), audio capture (cpal),
resampling (rubato), VAD (Silero ONNX), Parakeet v3 TDT transcription
(ort), overlay window (winit+softbuffer), paste simulation (enigo+arboard),
audio feedback (rodio), YAML config, CLI with clap, HuggingFace model
download. ~2400 lines of Rust across 16 source files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-10 16:47:46 +01:00
parent 6b737f92fe
commit 9b0bf7d9e3
22 changed files with 7750 additions and 0 deletions
+127
View File
@@ -0,0 +1,127 @@
use anyhow::Result;
use dialoguer::{Input, Select};
use crate::config::{Accelerator, Config, OverlayPosition, PasteMethod, RecordingMode};
pub fn show() -> Result<()> {
let config = Config::load()?;
let yaml = serde_yaml::to_string(&config)?;
println!("{yaml}");
Ok(())
}
pub fn reset() -> Result<()> {
let config = Config::default();
config.save()?;
println!("Config reset to defaults at {}", Config::path()?.display());
Ok(())
}
pub fn interactive() -> Result<()> {
let mut config = Config::load()?;
config.hotkey = Input::new()
.with_prompt("Hotkey")
.default(config.hotkey)
.interact_text()?;
let mode_idx = Select::new()
.with_prompt("Recording mode")
.items(&["push_to_talk", "toggle"])
.default(match config.mode {
RecordingMode::PushToTalk => 0,
RecordingMode::Toggle => 1,
})
.interact()?;
config.mode = match mode_idx {
0 => RecordingMode::PushToTalk,
_ => RecordingMode::Toggle,
};
config.cancel_key = Input::new()
.with_prompt("Cancel key")
.default(config.cancel_key)
.interact_text()?;
config.model = Input::new()
.with_prompt("Model")
.default(config.model)
.interact_text()?;
let accel_idx = Select::new()
.with_prompt("Accelerator")
.items(&["auto", "cpu", "cuda", "directml"])
.default(match config.accelerator {
Accelerator::Auto => 0,
Accelerator::Cpu => 1,
Accelerator::Cuda => 2,
Accelerator::DirectMl => 3,
})
.interact()?;
config.accelerator = match accel_idx {
0 => Accelerator::Auto,
1 => Accelerator::Cpu,
2 => Accelerator::Cuda,
_ => Accelerator::DirectMl,
};
config.gpu_device = Input::new()
.with_prompt("GPU device index")
.default(config.gpu_device)
.interact_text()?;
let paste_idx = Select::new()
.with_prompt("Paste method")
.items(&["ctrl_v", "shift_insert", "ctrl_shift_v", "clipboard_only"])
.default(match config.paste_method {
PasteMethod::CtrlV => 0,
PasteMethod::ShiftInsert => 1,
PasteMethod::CtrlShiftV => 2,
PasteMethod::ClipboardOnly => 3,
})
.interact()?;
config.paste_method = match paste_idx {
0 => PasteMethod::CtrlV,
1 => PasteMethod::ShiftInsert,
2 => PasteMethod::CtrlShiftV,
_ => PasteMethod::ClipboardOnly,
};
let overlay_idx = Select::new()
.with_prompt("Overlay position")
.items(&["top", "bottom", "none"])
.default(match config.overlay_position {
OverlayPosition::Top => 0,
OverlayPosition::Bottom => 1,
OverlayPosition::None => 2,
})
.interact()?;
config.overlay_position = match overlay_idx {
0 => OverlayPosition::Top,
1 => OverlayPosition::Bottom,
_ => OverlayPosition::None,
};
let feedback_idx = Select::new()
.with_prompt("Audio feedback")
.items(&["yes", "no"])
.default(if config.audio_feedback { 0 } else { 1 })
.interact()?;
config.audio_feedback = feedback_idx == 0;
let vad_idx = Select::new()
.with_prompt("VAD (voice activity detection)")
.items(&["enabled", "disabled"])
.default(if config.vad_enabled { 0 } else { 1 })
.interact()?;
config.vad_enabled = vad_idx == 0;
config.language = Input::new()
.with_prompt("Language")
.default(config.language)
.interact_text()?;
config.save()?;
println!("\nConfig saved to {}", Config::path()?.display());
Ok(())
}