Implement core speech-to-text pipeline
All major components: hotkey listener (rdev), audio capture (cpal), resampling (rubato), VAD (Silero ONNX), Parakeet v3 TDT transcription (ort), overlay window (winit+softbuffer), paste simulation (enigo+arboard), audio feedback (rodio), YAML config, CLI with clap, HuggingFace model download. ~2400 lines of Rust across 16 source files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+61
@@ -0,0 +1,61 @@
|
||||
[package]
|
||||
name = "mouth"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
description = "Offline speech-to-text with global hotkey and paste"
|
||||
|
||||
[dependencies]
|
||||
# CLI
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
|
||||
# Config
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_yaml = "0.9"
|
||||
dirs = "6"
|
||||
|
||||
# Interactive config TUI
|
||||
dialoguer = "0.11"
|
||||
|
||||
# Logging
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
|
||||
# Async
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
|
||||
# Global hotkey
|
||||
rdev = "0.5"
|
||||
|
||||
# Audio capture
|
||||
cpal = "0.15"
|
||||
|
||||
# Audio resampling
|
||||
rubato = "0.16"
|
||||
|
||||
# ONNX inference (Parakeet v3 + Silero VAD)
|
||||
ort = { version = "2.0.0-rc.12", features = ["download-binaries"] }
|
||||
ndarray = "0.17"
|
||||
|
||||
# Model download from HuggingFace
|
||||
hf-hub = "0.4"
|
||||
indicatif = "0.17"
|
||||
|
||||
# Clipboard
|
||||
arboard = "3"
|
||||
|
||||
# Keyboard simulation
|
||||
enigo = { version = "0.3", features = ["serde"] }
|
||||
|
||||
# Overlay window
|
||||
winit = "0.30"
|
||||
softbuffer = "0.4"
|
||||
|
||||
# Audio feedback
|
||||
rodio = "0.20"
|
||||
|
||||
# System info
|
||||
num_cpus = "1"
|
||||
|
||||
# Error handling
|
||||
anyhow = "1"
|
||||
thiserror = "2"
|
||||
Reference in New Issue
Block a user