Implement core speech-to-text pipeline

All major components: hotkey listener (rdev), audio capture (cpal),
resampling (rubato), VAD (Silero ONNX), Parakeet v3 TDT transcription
(ort), overlay window (winit+softbuffer), paste simulation (enigo+arboard),
audio feedback (rodio), YAML config, CLI with clap, HuggingFace model
download. ~2400 lines of Rust across 16 source files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-10 16:47:46 +01:00
parent 6b737f92fe
commit 9b0bf7d9e3
22 changed files with 7750 additions and 0 deletions
+82
View File
@@ -0,0 +1,82 @@
mod audio_feedback;
mod cli;
mod config;
mod coordinator;
mod hotkey;
mod model_cache;
mod overlay;
mod paste;
mod recorder;
mod transcriber;
mod vad;
use clap::{Parser, Subcommand};
#[derive(Parser)]
#[command(name = "mouth", version, about = "Offline speech-to-text with global hotkey and paste")]
struct Cli {
#[command(subcommand)]
command: Option<Commands>,
}
#[derive(Subcommand)]
enum Commands {
/// Start the mouth daemon
Run,
/// View or edit configuration
Config {
/// Print current config to stdout
#[arg(long)]
show: bool,
/// Reset config to defaults
#[arg(long)]
reset: bool,
},
/// Manage speech-to-text models
Models {
/// Download the configured model
#[arg(long)]
download: bool,
},
/// Show daemon status, loaded model, and version
Status,
}
fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
)
.init();
let cli = Cli::parse();
match cli.command {
None | Some(Commands::Run) => cli::run_cmd::run(),
Some(Commands::Config { show, reset }) => {
if show {
cli::config_cmd::show()
} else if reset {
cli::config_cmd::reset()
} else {
cli::config_cmd::interactive()
}
}
Some(Commands::Models { download }) => {
if download {
cli::models_cmd::download()
} else {
cli::models_cmd::list()
}
}
Some(Commands::Status) => cli::status_cmd::status(),
}
}