Implement core speech-to-text pipeline
All major components: hotkey listener (rdev), audio capture (cpal), resampling (rubato), VAD (Silero ONNX), Parakeet v3 TDT transcription (ort), overlay window (winit+softbuffer), paste simulation (enigo+arboard), audio feedback (rodio), YAML config, CLI with clap, HuggingFace model download. ~2400 lines of Rust across 16 source files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+184
@@ -0,0 +1,184 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RecordingMode {
|
||||
PushToTalk,
|
||||
Toggle,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum PasteMethod {
|
||||
CtrlV,
|
||||
ShiftInsert,
|
||||
CtrlShiftV,
|
||||
ClipboardOnly,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum OverlayPosition {
|
||||
Top,
|
||||
Bottom,
|
||||
None,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum Accelerator {
|
||||
Auto,
|
||||
Cpu,
|
||||
Cuda,
|
||||
DirectMl,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Config {
|
||||
/// Hotkey to activate recording
|
||||
#[serde(default = "defaults::hotkey")]
|
||||
pub hotkey: String,
|
||||
|
||||
/// Recording mode
|
||||
#[serde(default = "defaults::mode")]
|
||||
pub mode: RecordingMode,
|
||||
|
||||
/// Cancel hotkey (only active while recording)
|
||||
#[serde(default = "defaults::cancel_key")]
|
||||
pub cancel_key: String,
|
||||
|
||||
/// Speech-to-text model identifier
|
||||
#[serde(default = "defaults::model")]
|
||||
pub model: String,
|
||||
|
||||
/// Inference accelerator
|
||||
#[serde(default = "defaults::accelerator")]
|
||||
pub accelerator: Accelerator,
|
||||
|
||||
/// GPU device index (when accelerator is cuda/directml)
|
||||
#[serde(default)]
|
||||
pub gpu_device: u32,
|
||||
|
||||
/// How to paste transcribed text
|
||||
#[serde(default = "defaults::paste_method")]
|
||||
pub paste_method: PasteMethod,
|
||||
|
||||
/// Keep transcribed text on clipboard after pasting
|
||||
#[serde(default = "defaults::yes")]
|
||||
pub copy_to_clipboard: bool,
|
||||
|
||||
/// Overlay position on screen
|
||||
#[serde(default = "defaults::overlay_position")]
|
||||
pub overlay_position: OverlayPosition,
|
||||
|
||||
/// Play audio feedback sounds
|
||||
#[serde(default = "defaults::yes")]
|
||||
pub audio_feedback: bool,
|
||||
|
||||
/// Audio input device name (null = system default)
|
||||
#[serde(default)]
|
||||
pub input_device: Option<String>,
|
||||
|
||||
/// Enable VAD to trim silence
|
||||
#[serde(default = "defaults::yes")]
|
||||
pub vad_enabled: bool,
|
||||
|
||||
/// Language hint for model
|
||||
#[serde(default = "defaults::language")]
|
||||
pub language: String,
|
||||
}
|
||||
|
||||
mod defaults {
|
||||
use super::*;
|
||||
|
||||
pub fn hotkey() -> String {
|
||||
"ctrl+space".into()
|
||||
}
|
||||
pub fn mode() -> RecordingMode {
|
||||
RecordingMode::PushToTalk
|
||||
}
|
||||
pub fn cancel_key() -> String {
|
||||
"escape".into()
|
||||
}
|
||||
pub fn model() -> String {
|
||||
"parakeet-tdt-0.6b-v3".into()
|
||||
}
|
||||
pub fn accelerator() -> Accelerator {
|
||||
Accelerator::Auto
|
||||
}
|
||||
pub fn paste_method() -> PasteMethod {
|
||||
PasteMethod::CtrlV
|
||||
}
|
||||
pub fn overlay_position() -> OverlayPosition {
|
||||
OverlayPosition::Top
|
||||
}
|
||||
pub fn yes() -> bool {
|
||||
true
|
||||
}
|
||||
pub fn language() -> String {
|
||||
"en".into()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
hotkey: defaults::hotkey(),
|
||||
mode: defaults::mode(),
|
||||
cancel_key: defaults::cancel_key(),
|
||||
model: defaults::model(),
|
||||
accelerator: defaults::accelerator(),
|
||||
gpu_device: 0,
|
||||
paste_method: defaults::paste_method(),
|
||||
copy_to_clipboard: true,
|
||||
overlay_position: defaults::overlay_position(),
|
||||
audio_feedback: true,
|
||||
input_device: None,
|
||||
vad_enabled: true,
|
||||
language: defaults::language(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Returns the platform-appropriate config directory.
|
||||
pub fn dir() -> Result<PathBuf> {
|
||||
let dir = dirs::config_dir()
|
||||
.context("Could not determine config directory")?
|
||||
.join("mouth");
|
||||
Ok(dir)
|
||||
}
|
||||
|
||||
/// Returns the path to the config file.
|
||||
pub fn path() -> Result<PathBuf> {
|
||||
Ok(Self::dir()?.join("config.yaml"))
|
||||
}
|
||||
|
||||
/// Load config from disk, falling back to defaults if file doesn't exist.
|
||||
pub fn load() -> Result<Self> {
|
||||
let path = Self::path()?;
|
||||
if !path.exists() {
|
||||
return Ok(Self::default());
|
||||
}
|
||||
let contents = std::fs::read_to_string(&path)
|
||||
.with_context(|| format!("Failed to read config from {}", path.display()))?;
|
||||
let config: Config = serde_yaml::from_str(&contents)
|
||||
.with_context(|| format!("Failed to parse config from {}", path.display()))?;
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
/// Save config to disk, creating the directory if needed.
|
||||
pub fn save(&self) -> Result<()> {
|
||||
let path = Self::path()?;
|
||||
if let Some(parent) = path.parent() {
|
||||
std::fs::create_dir_all(parent)
|
||||
.with_context(|| format!("Failed to create config directory {}", parent.display()))?;
|
||||
}
|
||||
let yaml = serde_yaml::to_string(self).context("Failed to serialize config")?;
|
||||
std::fs::write(&path, yaml)
|
||||
.with_context(|| format!("Failed to write config to {}", path.display()))?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user