Files
mouth/Cargo.toml
T
steve 9b0bf7d9e3 Implement core speech-to-text pipeline
All major components: hotkey listener (rdev), audio capture (cpal),
resampling (rubato), VAD (Silero ONNX), Parakeet v3 TDT transcription
(ort), overlay window (winit+softbuffer), paste simulation (enigo+arboard),
audio feedback (rodio), YAML config, CLI with clap, HuggingFace model
download. ~2400 lines of Rust across 16 source files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 16:47:46 +01:00

62 lines
1.0 KiB
TOML

[package]
name = "mouth"
version = "0.1.0"
edition = "2024"
description = "Offline speech-to-text with global hotkey and paste"
[dependencies]
# CLI
clap = { version = "4", features = ["derive"] }
# Config
serde = { version = "1", features = ["derive"] }
serde_yaml = "0.9"
dirs = "6"
# Interactive config TUI
dialoguer = "0.11"
# Logging
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# Async
tokio = { version = "1", features = ["full"] }
# Global hotkey
rdev = "0.5"
# Audio capture
cpal = "0.15"
# Audio resampling
rubato = "0.16"
# ONNX inference (Parakeet v3 + Silero VAD)
ort = { version = "2.0.0-rc.12", features = ["download-binaries"] }
ndarray = "0.17"
# Model download from HuggingFace
hf-hub = "0.4"
indicatif = "0.17"
# Clipboard
arboard = "3"
# Keyboard simulation
enigo = { version = "0.3", features = ["serde"] }
# Overlay window
winit = "0.30"
softbuffer = "0.4"
# Audio feedback
rodio = "0.20"
# System info
num_cpus = "1"
# Error handling
anyhow = "1"
thiserror = "2"