From 0cea6a4b28daaa9103694d4c760a84d3c20c1fba Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Fri, 10 Apr 2026 22:04:39 +0100 Subject: [PATCH] v0.2.0: System tray, IPC status, VAD, hotkey grab, and polish - Add system tray icon with Exit menu (tray-icon/muda) - Add IPC daemon status via named pipe (Windows) / Unix socket (Linux) - Add `mouth status` command to query running daemon - Add daemon lock to prevent multiple instances - Hide Windows console window when running as daemon - Wire up Silero VAD model download and speech filtering - Switch hotkey listener from rdev::listen to rdev::grab to consume hotkeys - Add hotkey capture mode in interactive config (press keys instead of typing) - Add all missing key names (brackets, punctuation, numpad, etc.) - Fix ONNX tensor type mismatches (encoder wants i64, decoder wants i32) - Add 300ms lead-in silence to compensate for mic startup latency - Add 300ms trailing recording after stop for speech not to be clipped - Add 50ms silence before audio feedback blips for device warmup - Reduce overlay size (150x18, was 200x36) - Add PolyForm Noncommercial 1.0.0 license - Flesh out user-focused README - Update release script with Gitea/GitHub forge support Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 762 ++++++++++++++++++++++++++++++++++++++---- Cargo.toml | 14 +- LICENSE | 131 ++++++++ README.md | 113 ++++++- plan.md | 287 ---------------- release.sh | 148 ++++++-- src/audio_feedback.rs | 6 +- src/cli/config_cmd.rs | 45 ++- src/cli/run_cmd.rs | 143 +++++--- src/cli/status_cmd.rs | 27 +- src/coordinator.rs | 39 ++- src/hotkey.rs | 273 +++++++++++++-- src/ipc.rs | 233 +++++++++++++ src/main.rs | 2 + src/model_cache.rs | 17 + src/overlay.rs | 141 +++++++- src/recorder.rs | 10 +- src/shared_state.rs | 35 ++ src/transcriber.rs | 12 +- 19 files changed, 1948 insertions(+), 490 deletions(-) create mode 100644 LICENSE delete mode 100644 plan.md create mode 100644 src/ipc.rs create mode 100644 src/shared_state.rs diff --git a/Cargo.lock b/Cargo.lock index 77bc5e3..98dace6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -187,6 +187,29 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "175571dd1d178ced59193a6fc02dde1b972eb0bc56c892cde9beeceac5bf0f6b" +[[package]] +name = "atk" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "241b621213072e993be4f6f3a9e4b45f65b7e6faad43001be957184b7bb1824b" +dependencies = [ + "atk-sys", + "glib", + "libc", +] + +[[package]] +name = "atk-sys" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5e48b684b0ca77d2bbadeef17424c2ea3c897d44d566a1617e7e8f30614d086" +dependencies = [ + "glib-sys", + "gobject-sys", + "libc", + "system-deps", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -226,7 +249,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn", + "syn 2.0.117", ] [[package]] @@ -240,6 +263,9 @@ name = "bitflags" version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +dependencies = [ + "serde_core", +] [[package]] name = "block" @@ -256,6 +282,15 @@ dependencies = [ "objc2 0.5.2", ] +[[package]] +name = "block2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5" +dependencies = [ + "objc2 0.6.4", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -279,7 +314,7 @@ checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -300,6 +335,31 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cairo-rs" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca26ef0159422fb77631dc9d17b102f253b876fe1586b03b803e63a309b4ee2" +dependencies = [ + "bitflags 2.11.0", + "cairo-sys-rs", + "glib", + "libc", + "once_cell", + "thiserror 1.0.69", +] + +[[package]] +name = "cairo-sys-rs" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "685c9fa8e590b8b3d678873528d83411db17242a73fccaed827770ea0fedda51" +dependencies = [ + "glib-sys", + "libc", + "system-deps", +] + [[package]] name = "calloop" version = "0.13.0" @@ -353,6 +413,16 @@ dependencies = [ "nom", ] +[[package]] +name = "cfg-expr" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02" +dependencies = [ + "smallvec", + "target-lexicon", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -373,7 +443,7 @@ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", "libc", - "libloading", + "libloading 0.8.9", ] [[package]] @@ -404,10 +474,10 @@ version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -650,6 +720,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -748,7 +827,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -757,7 +836,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab8ecd87370524b461f8557c119c405552c396ed91fc0a8eec68679eab26f94a" dependencies = [ - "libloading", + "libloading 0.8.9", ] [[package]] @@ -853,6 +932,16 @@ dependencies = [ "xkeysym", ] +[[package]] +name = "epoll" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e74d68fe2927dbf47aa976d14d93db9b23dced457c7bb2bdc6925a16d31b736e" +dependencies = [ + "bitflags 2.11.0", + "libc", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -875,6 +964,29 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "evdev-rs" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b92abc30d5fd1e4f6440dee4d626abc68f4a9b5014dc1de575901e23c2e02321" +dependencies = [ + "bitflags 1.3.2", + "evdev-sys", + "libc", + "log", +] + +[[package]] +name = "evdev-sys" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdcf0d489f4d9a80ac2b3b35b92fdd8fcf68d33bb67f947afe5cd36e482de576" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "fastrand" version = "2.4.1" @@ -898,7 +1010,7 @@ checksum = "a0aca10fb742cb43f9e7bb8467c91aa9bcb8e3ffbc6a6f7389bb93ffc920577d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -910,6 +1022,16 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "field-offset" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38e2275cc4e4fc009b0669731a1e5ab7ebf11f469eaede2bab9309a5b4d6057f" +dependencies = [ + "memoffset", + "rustc_version", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -965,7 +1087,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1045,7 +1167,7 @@ checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1077,6 +1199,64 @@ dependencies = [ "slab", ] +[[package]] +name = "gdk" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9f245958c627ac99d8e529166f9823fb3b838d1d41fd2b297af3075093c2691" +dependencies = [ + "cairo-rs", + "gdk-pixbuf", + "gdk-sys", + "gio", + "glib", + "libc", + "pango", +] + +[[package]] +name = "gdk-pixbuf" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50e1f5f1b0bfb830d6ccc8066d18db35c487b1b2b1e8589b5dfe9f07e8defaec" +dependencies = [ + "gdk-pixbuf-sys", + "gio", + "glib", + "libc", + "once_cell", +] + +[[package]] +name = "gdk-pixbuf-sys" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9839ea644ed9c97a34d129ad56d38a25e6756f99f3a88e15cd39c20629caf7" +dependencies = [ + "gio-sys", + "glib-sys", + "gobject-sys", + "libc", + "system-deps", +] + +[[package]] +name = "gdk-sys" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c2d13f38594ac1e66619e188c6d5a1adb98d11b2fcf7894fc416ad76aa2f3f7" +dependencies = [ + "cairo-sys-rs", + "gdk-pixbuf-sys", + "gio-sys", + "glib-sys", + "gobject-sys", + "libc", + "pango-sys", + "pkg-config", + "system-deps", +] + [[package]] name = "gethostname" version = "1.1.0" @@ -1123,12 +1303,154 @@ dependencies = [ "wasip3", ] +[[package]] +name = "gio" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fc8f532f87b79cbc51a79748f16a6828fb784be93145a322fa14d06d354c73" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "gio-sys", + "glib", + "libc", + "once_cell", + "pin-project-lite", + "smallvec", + "thiserror 1.0.69", +] + +[[package]] +name = "gio-sys" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37566df850baf5e4cb0dfb78af2e4b9898d817ed9263d1090a2df958c64737d2" +dependencies = [ + "glib-sys", + "gobject-sys", + "libc", + "system-deps", + "winapi", +] + +[[package]] +name = "glib" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233daaf6e83ae6a12a52055f568f9d7cf4671dabb78ff9560ab6da230ce00ee5" +dependencies = [ + "bitflags 2.11.0", + "futures-channel", + "futures-core", + "futures-executor", + "futures-task", + "futures-util", + "gio-sys", + "glib-macros", + "glib-sys", + "gobject-sys", + "libc", + "memchr", + "once_cell", + "smallvec", + "thiserror 1.0.69", +] + +[[package]] +name = "glib-macros" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb0228f477c0900c880fd78c8759b95c7636dbd7842707f49e132378aa2acdc" +dependencies = [ + "heck 0.4.1", + "proc-macro-crate 2.0.2", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "glib-sys" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063ce2eb6a8d0ea93d2bf8ba1957e78dbab6be1c2220dd3daca57d5a9d869898" +dependencies = [ + "libc", + "system-deps", +] + [[package]] name = "glob" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "gobject-sys" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0850127b514d1c4a4654ead6dedadb18198999985908e6ffe4436f53c785ce44" +dependencies = [ + "glib-sys", + "libc", + "system-deps", +] + +[[package]] +name = "gtk" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd56fb197bfc42bd5d2751f4f017d44ff59fbb58140c6b49f9b3b2bdab08506a" +dependencies = [ + "atk", + "cairo-rs", + "field-offset", + "futures-channel", + "gdk", + "gdk-pixbuf", + "gio", + "glib", + "gtk-sys", + "gtk3-macros", + "libc", + "pango", + "pkg-config", +] + +[[package]] +name = "gtk-sys" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f29a1c21c59553eb7dd40e918be54dccd60c52b049b75119d5d96ce6b624414" +dependencies = [ + "atk-sys", + "cairo-sys-rs", + "gdk-pixbuf-sys", + "gdk-sys", + "gio-sys", + "glib-sys", + "gobject-sys", + "libc", + "pango-sys", + "system-deps", +] + +[[package]] +name = "gtk3-macros" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ff3c5b21f14f0736fed6dcfc0bfb4225ebf5725f3c0209edeec181e4d73e9d" +dependencies = [ + "proc-macro-crate 1.3.1", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "h2" version = "0.4.13" @@ -1174,6 +1496,12 @@ version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -1458,7 +1786,7 @@ dependencies = [ "byteorder-lite", "moxcms", "num-traits", - "png", + "png 0.18.1", "tiff", ] @@ -1487,6 +1815,26 @@ dependencies = [ "web-time", ] +[[package]] +name = "inotify" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46dd0a94b393c730779ccfd2a872b67b1eb67be3fc33082e733bdb38b5fde4d4" +dependencies = [ + "bitflags 1.3.2", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + [[package]] name = "ipnet" version = "2.12.0" @@ -1567,7 +1915,7 @@ dependencies = [ "quote", "rustc_version", "simd_cesu8", - "syn", + "syn 2.0.117", ] [[package]] @@ -1595,7 +1943,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" dependencies = [ "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1620,6 +1968,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "keyboard-types" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b750dcadc39a09dbadd74e118f6dd6598df77fa01df0cfcdc52c28dece74528a" +dependencies = [ + "bitflags 2.11.0", + "serde", + "unicode-segmentation", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -1643,12 +2002,46 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "libappindicator" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03589b9607c868cc7ae54c0b2a22c8dc03dd41692d48f2d7df73615c6a95dc0a" +dependencies = [ + "glib", + "gtk", + "gtk-sys", + "libappindicator-sys", + "log", +] + +[[package]] +name = "libappindicator-sys" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf" +dependencies = [ + "gtk-sys", + "libloading 0.7.4", + "once_cell", +] + [[package]] name = "libc" version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "libloading" version = "0.8.9" @@ -1671,6 +2064,25 @@ dependencies = [ "redox_syscall 0.7.4", ] +[[package]] +name = "libxdo" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00333b8756a3d28e78def82067a377de7fa61b24909000aeaa2b446a948d14db" +dependencies = [ + "libxdo-sys", +] + +[[package]] +name = "libxdo-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db23b9e7e2b7831bbd8aac0bbeeeb7b68cbebc162b227e7052e8e55829a09212" +dependencies = [ + "libc", + "x11", +] + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -1768,6 +2180,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -1821,12 +2242,15 @@ dependencies = [ "rodio", "rubato", "serde", + "serde_json", "serde_yaml", "softbuffer", "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", + "tray-icon", + "windows-sys 0.59.0", "winit", ] @@ -1840,6 +2264,26 @@ dependencies = [ "pxfm", ] +[[package]] +name = "muda" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdae9c00e61cc0579bcac625e8ad22104c60548a025bfc972dc83868a28e1484" +dependencies = [ + "crossbeam-channel", + "dpi", + "gtk", + "keyboard-types", + "libxdo", + "objc2 0.5.2", + "objc2-app-kit 0.2.2", + "objc2-foundation 0.2.2", + "once_cell", + "png 0.17.16", + "thiserror 1.0.69", + "windows-sys 0.59.0", +] + [[package]] name = "native-tls" version = "0.2.18" @@ -1961,7 +2405,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2008,10 +2452,10 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 3.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2061,7 +2505,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4e89ad9e3d7d297152b17d39ed92cd50ca8063a89a9fa569046d41568891eff" dependencies = [ "bitflags 2.11.0", - "block2", + "block2 0.5.1", "libc", "objc2 0.5.2", "objc2-core-data", @@ -2078,6 +2522,7 @@ checksum = "d49e936b501e5c5bf01fda3a9452ff86dc3ea98ad5f283e1455153142d97518c" dependencies = [ "bitflags 2.11.0", "objc2 0.6.4", + "objc2-core-foundation", "objc2-core-graphics", "objc2-foundation 0.3.2", ] @@ -2089,7 +2534,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74dd3b56391c7a0596a295029734d3c1c5e7e510a4cb30245f8221ccea96b009" dependencies = [ "bitflags 2.11.0", - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-core-location", "objc2-foundation 0.2.2", @@ -2101,7 +2546,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5ff520e9c33812fd374d8deecef01d4a840e7b41862d849513de77e44aa4889" dependencies = [ - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", ] @@ -2113,7 +2558,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "617fbf49e071c178c0b24c080767db52958f716d9eabdf0890523aeae54773ef" dependencies = [ "bitflags 2.11.0", - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", ] @@ -2148,7 +2593,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55260963a527c99f1819c4f8e3b47fe04f9650694ef348ffd2227e8196d34c80" dependencies = [ - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", "objc2-metal", @@ -2160,7 +2605,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "000cfee34e683244f284252ee206a27953279d370e309649dc3ee317b37e5781" dependencies = [ - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-contacts", "objc2-foundation 0.2.2", @@ -2179,7 +2624,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ee638a5da3799329310ad4cfa62fbf045d5f56e3ef5ba4149e7452dcf89d5a8" dependencies = [ "bitflags 2.11.0", - "block2", + "block2 0.5.1", "dispatch", "libc", "objc2 0.5.2", @@ -2192,6 +2637,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" dependencies = [ "bitflags 2.11.0", + "block2 0.6.2", "objc2 0.6.4", "objc2-core-foundation", ] @@ -2213,7 +2659,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1a1ae721c5e35be65f01a03b6d2ac13a54cb4fa70d8a5da293d7b0020261398" dependencies = [ - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-app-kit 0.2.2", "objc2-foundation 0.2.2", @@ -2226,7 +2672,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0cba1276f6023976a406a14ffa85e1fdd19df6b0f737b063b95f6c8c7aadd6" dependencies = [ "bitflags 2.11.0", - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", ] @@ -2238,7 +2684,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e42bee7bff906b14b167da2bac5efe6b6a07e6f7c0a21a7308d40c960242dc7a" dependencies = [ "bitflags 2.11.0", - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", "objc2-metal", @@ -2273,7 +2719,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8bb46798b20cd6b91cbd113524c490f1686f4c4e8f49502431415f3512e2b6f" dependencies = [ "bitflags 2.11.0", - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-cloud-kit", "objc2-core-data", @@ -2293,7 +2739,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44fa5f9748dbfe1ca6c0b79ad20725a11eca7c2218bceb4b005cb1be26273bfe" dependencies = [ - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-foundation 0.2.2", ] @@ -2305,7 +2751,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76cfcbf642358e8689af64cee815d139339f3ed8ad05103ed5eaf73db8d84cb3" dependencies = [ "bitflags 2.11.0", - "block2", + "block2 0.5.1", "objc2 0.5.2", "objc2-core-location", "objc2-foundation 0.2.2", @@ -2378,7 +2824,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2448,6 +2894,31 @@ dependencies = [ "ttf-parser", ] +[[package]] +name = "pango" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca27ec1eb0457ab26f3036ea52229edbdb74dee1edd29063f5b9b010e7ebee4" +dependencies = [ + "gio", + "glib", + "libc", + "once_cell", + "pango-sys", +] + +[[package]] +name = "pango-sys" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436737e391a843e5933d6d9aa102cb126d501e815b83601365a948a518555dc5" +dependencies = [ + "glib-sys", + "gobject-sys", + "libc", + "system-deps", +] + [[package]] name = "parking_lot" version = "0.12.5" @@ -2503,7 +2974,7 @@ checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2524,6 +2995,19 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" +[[package]] +name = "png" +version = "0.17.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + [[package]] name = "png" version = "0.18.1" @@ -2591,7 +3075,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.117", ] [[package]] @@ -2603,13 +3087,57 @@ dependencies = [ "num-integer", ] +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit 0.19.15", +] + +[[package]] +name = "proc-macro-crate" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b00f26d3400549137f92511a46ac1cd8ce37cb5598a96d382381458b992a5d24" +dependencies = [ + "toml_datetime 0.6.3", + "toml_edit 0.20.2", +] + [[package]] name = "proc-macro-crate" version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" dependencies = [ - "toml_edit", + "toml_edit 0.25.11+spec-1.1.0", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", ] [[package]] @@ -2714,6 +3242,9 @@ dependencies = [ "core-foundation 0.7.0", "core-foundation-sys 0.7.0", "core-graphics 0.19.2", + "epoll", + "evdev-rs", + "inotify", "lazy_static", "libc", "winapi", @@ -3079,7 +3610,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3095,6 +3626,15 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -3351,6 +3891,16 @@ dependencies = [ "symphonia-core", ] +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.117" @@ -3379,7 +3929,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3403,6 +3953,25 @@ dependencies = [ "libc", ] +[[package]] +name = "system-deps" +version = "6.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349" +dependencies = [ + "cfg-expr", + "heck 0.5.0", + "pkg-config", + "toml", + "version-compare", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + [[package]] name = "tempfile" version = "3.27.0" @@ -3442,7 +4011,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3453,7 +4022,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3512,7 +4081,7 @@ checksum = "0324504befd01cab6e0c994f34b2ffa257849ee019d3fb3b64fb2c858887d89e" dependencies = [ "as-raw-xcb-connection", "ctor-lite", - "libloading", + "libloading 0.8.9", "pkg-config", "tracing", ] @@ -3567,7 +4136,7 @@ checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3603,6 +4172,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "185d8ab0dfbb35cf1399a6344d8484209c088f75f8f68230da55d48d95d43e3d" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime 0.6.3", + "toml_edit 0.20.2", +] + +[[package]] +name = "toml_datetime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" +dependencies = [ + "serde", +] + [[package]] name = "toml_datetime" version = "1.1.1+spec-1.1.0" @@ -3612,6 +4202,30 @@ dependencies = [ "serde_core", ] +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "toml_datetime 0.6.3", + "winnow 0.5.40", +] + +[[package]] +name = "toml_edit" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "396e4d48bbb2b7554c944bde63101b5ae446cff6ec4a24227428f15eb72ef338" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime 0.6.3", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.25.11+spec-1.1.0" @@ -3619,9 +4233,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" dependencies = [ "indexmap", - "toml_datetime", + "toml_datetime 1.1.1+spec-1.1.0", "toml_parser", - "winnow", + "winnow 1.0.1", ] [[package]] @@ -3630,7 +4244,7 @@ version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ - "winnow", + "winnow 1.0.1", ] [[package]] @@ -3697,7 +4311,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3749,6 +4363,27 @@ dependencies = [ "strength_reduce", ] +[[package]] +name = "tray-icon" +version = "0.19.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadd75f5002e2513eaa19b2365f533090cc3e93abd38788452d9ea85cff7b48a" +dependencies = [ + "crossbeam-channel", + "dirs", + "libappindicator", + "muda", + "objc2 0.6.4", + "objc2-app-kit 0.3.2", + "objc2-core-foundation", + "objc2-core-graphics", + "objc2-foundation 0.3.2", + "once_cell", + "png 0.17.16", + "thiserror 2.0.18", + "windows-sys 0.59.0", +] + [[package]] name = "try-lock" version = "0.2.5" @@ -3889,6 +4524,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version-compare" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c2856837ef78f57382f06b2b8563a2f512f7185d732608fd9176cb3b8edf0e" + [[package]] name = "version_check" version = "0.9.5" @@ -3980,7 +4621,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -4284,7 +4925,7 @@ checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4295,7 +4936,7 @@ checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4602,7 +5243,7 @@ dependencies = [ "android-activity", "atomic-waker", "bitflags 2.11.0", - "block2", + "block2 0.5.1", "bytemuck", "calloop", "cfg_aliases", @@ -4644,6 +5285,15 @@ dependencies = [ "xkbcommon-dl", ] +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + [[package]] name = "winnow" version = "1.0.1" @@ -4669,7 +5319,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "wit-parser", ] @@ -4680,10 +5330,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "indexmap", "prettyplease", - "syn", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -4699,7 +5349,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -4777,7 +5427,7 @@ dependencies = [ "as-raw-xcb-connection", "gethostname", "libc", - "libloading", + "libloading 0.8.9", "once_cell", "rustix 1.1.4", "x11rb-protocol", @@ -4844,7 +5494,7 @@ checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] @@ -4865,7 +5515,7 @@ checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -4885,7 +5535,7 @@ checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] @@ -4925,7 +5575,7 @@ checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e679589..5c4686d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,9 @@ [package] name = "mouth" -version = "0.1.0" +version = "0.2.0" edition = "2024" description = "Offline speech-to-text with global hotkey and paste" +license-file = "LICENSE" [dependencies] # CLI @@ -24,7 +25,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } tokio = { version = "1", features = ["full"] } # Global hotkey -rdev = "0.5" +rdev = { version = "0.5", features = ["unstable_grab"] } # Audio capture cpal = "0.15" @@ -56,6 +57,15 @@ rodio = "0.20" # System info num_cpus = "1" +# System tray +tray-icon = "0.19" + +# IPC status +serde_json = "1" + # Error handling anyhow = "1" thiserror = "2" + +[target.'cfg(windows)'.dependencies] +windows-sys = { version = "0.59", features = ["Win32_System_Console", "Win32_UI_WindowsAndMessaging", "Win32_System_Pipes", "Win32_System_IO", "Win32_Storage_FileSystem", "Win32_Foundation", "Win32_Security"] } diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1a71cb6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,131 @@ +# PolyForm Noncommercial License 1.0.0 + + + +## Acceptance + +In order to get any license under these terms, you must agree +to them as both strict obligations and conditions to all +your licenses. + +## Copyright License + +The licensor grants you a copyright license for the +software to do everything you might do with the software +that would otherwise infringe the licensor's copyright +in it for any permitted purpose. However, you may +only distribute the software according to [Distribution +License](#distribution-license) and make changes or new works +based on the software according to [Changes and New Works +License](#changes-and-new-works-license). + +## Distribution License + +The licensor grants you an additional copyright license +to distribute copies of the software. Your license +to distribute covers distributing the software with +changes and new works permitted by [Changes and New Works +License](#changes-and-new-works-license). + +## Notices + +You must ensure that anyone who gets a copy of any part of +the software from you also gets a copy of these terms or the +URL for them above, as well as copies of any plain-text lines +beginning with `Required Notice:` that the licensor provided +with the software. For example: + +> Required Notice: Copyright Yoyodyne, Inc. (http://example.com) + +## Changes and New Works License + +The licensor grants you an additional copyright license to +make changes and new works based on the software for any +permitted purpose. + +## Patent License + +The licensor grants you a patent license for the software that +covers patent claims the licensor can license, or becomes able +to license, that you would infringe by using the software. + +## Noncommercial Purposes + +Any noncommercial purpose is a permitted purpose. + +## Personal Uses + +Personal use for research, experiment, and testing for +the benefit of public knowledge, personal study, private +entertainment, hobby projects, amateur pursuits, or religious +observance, without any anticipated commercial application, +is use for a permitted purpose. + +## Noncommercial Organizations + +Use by any charitable organization, educational institution, +public research organization, public safety or health +organization, environmental protection organization, +or government institution is use for a permitted purpose +regardless of the source of funding or obligations resulting +from the funding. + +## Fair Use + +You may have "fair use" rights for the software under the +law. These terms do not limit them. + +## No Other Rights + +These terms do not allow you to sublicense or transfer any of +your licenses to anyone else, or prevent the licensor from +granting licenses to anyone else. These terms do not imply +any other licenses. + +## Patent Defense + +If you make any written claim that the software infringes or +contributes to infringement of any patent, your patent license +for the software granted under these terms ends immediately. If +your company makes such a claim, your patent license ends +immediately for work on behalf of your company. + +## Violations + +The first time you are notified in writing that you have +violated any of these terms, or done anything with the software +not covered by your licenses, your licenses can nonetheless +continue if you come into full compliance with these terms, +and take practical steps to correct past violations, within +32 days of receiving notice. Otherwise, all your licenses +end immediately. + +## No Liability + +***As far as the law allows, the software comes as is, without +any warranty or condition, and the licensor will not be liable +to you for any damages arising out of these terms or the use +or nature of the software, under any kind of legal claim.*** + +## Definitions + +The **licensor** is the individual or entity offering these +terms, and the **software** is the software the licensor makes +available under these terms. + +**You** refers to the individual or entity agreeing to these +terms. + +**Your company** is any legal entity, sole proprietorship, +or other kind of organization that you work for, plus all +organizations that have control over, are under the control of, +or are under common control with that organization. **Control** +means ownership of substantially all the assets of an entity, +or the power to direct its management and policies by vote, +contract, or otherwise. Control can be direct or indirect. + +**Your licenses** are all the licenses granted to you for the +software under these terms. + +**Use** means anything you do with the software requiring one +of your licenses. diff --git a/README.md b/README.md index 0f56082..78de918 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,114 @@ # Mouth -`Mouth` is a utility that sits in the background waiting for you to hit a global hot key - when you press, it listens to you, quickly translates your voice in to text using a local LLM model and pastes it in to your application where the cursor currently sits. No internet required! +Offline speech-to-text with a global hotkey. Press a key, speak, and transcribed text is pasted at your cursor. No cloud services, no API keys — everything runs locally. + +Uses [Parakeet TDT 0.6B v3](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx) for transcription and [Silero VAD](https://huggingface.co/onnx-community/silero-vad) for voice activity detection, both via ONNX Runtime. + +## Quick Start + +1. Download `mouth.exe` (Windows) or build from source +2. Run `mouth` — models download automatically on first launch (~800MB one-time) +3. Press your hotkey (default: `Ctrl+Space`), speak, release — text appears at your cursor + +Mouth runs in the background with a system tray icon. Right-click the tray icon to exit. + +## Usage + +| Command | Description | +| ----------------------- | ------------------------- | +| mouth | Run the daemon (default) | +| mouth config | Interactive configuration | +| mouth config --show | Print current config | +| mouth config --reset | Reset to defaults | +| mouth models | List available models | +| mouth models --download | Download configured model | +| mouth status | Show daemon status | + + +## Configuration + +Config file location: +- **Windows:** `%APPDATA%\mouth\config.yaml` +- **Linux/macOS:** `~/.config/mouth/config.yaml` + +Run `mouth config` for an interactive setup, or edit the YAML directly: + +```yaml +hotkey: "ctrl+space" +mode: push_to_talk # push_to_talk or toggle +cancel_key: "escape" +model: "parakeet-tdt-0.6b-v3" +accelerator: auto # auto, cpu, cuda, directml +gpu_device: 0 +paste_method: ctrl_v # ctrl_v, shift_insert, ctrl_shift_v, clipboard_only +copy_to_clipboard: true +overlay_position: top # top, bottom, none +audio_feedback: true +input_device: null # null = system default +vad_enabled: true +language: en +``` + +### Recording Modes + +- **push_to_talk** — Hold the hotkey while speaking, release to transcribe +- **toggle** — Press once to start recording, press again to stop and transcribe + +### Hotkey Format + +Hotkeys are written as modifier+key combinations: + +- Modifiers: `ctrl`, `alt`, `shift`, `meta` (Win key) +- Keys: letters (`a`-`z`), numbers (`0`-`9`), function keys (`f1`-`f12`), punctuation (`[`, `]`, `;`, etc.), and special keys (`space`, `enter`, `escape`, `tab`, etc.) + +Examples: `ctrl+space`, `alt+r`, `ctrl+shift+[`, `f9` + +When running `mouth config`, you can press the key combination directly instead of typing it. + +### Paste Methods + +- **ctrl_v** — Simulates Ctrl+V (works in most apps) +- **shift_insert** — Simulates Shift+Insert (useful for terminals) +- **ctrl_shift_v** — Simulates Ctrl+Shift+V (plain text paste) +- **clipboard_only** — Copies to clipboard without pasting + +## Overlay + +A small colour-coded bar appears at the top (or bottom) of your screen: + +- **Red** — Recording +- **Amber** — Transcribing +- **Green** — Done + +Set `overlay_position: none` to disable. + +## Building from Source + +Requires Rust 1.75+. + +```bash +# Linux dependencies (Ubuntu/Debian) +sudo apt-get install libssl-dev libasound2-dev libpulse-dev \ + libx11-dev libxcb-shape0-dev libxcb-xfixes0-dev libxkbcommon-dev \ + libwayland-dev libgtk-3-dev libxtst-dev libxdo-dev cmake + +# Build +cargo build --release + +# Cross-compile for Windows from Linux (requires cargo-xwin) +cargo xwin build --release --target x86_64-pc-windows-msvc +``` + +## How It Works + +1. A global hotkey listener intercepts your configured key combination (consuming it so it doesn't reach other apps) +2. Audio is captured from your microphone and resampled to 16kHz +3. Silero VAD trims silence from the recording +4. The Parakeet TDT model transcribes speech to text via ONNX Runtime +5. Text is placed on the clipboard and pasted at your cursor + +All processing happens locally. No data leaves your machine. + +## License + +[PolyForm Noncommercial 1.0.0](LICENSE) — free for personal and non-commercial use. For commercial licensing, contact the author. diff --git a/plan.md b/plan.md deleted file mode 100644 index c7478dd..0000000 --- a/plan.md +++ /dev/null @@ -1,287 +0,0 @@ -# Mouth — Implementation Plan - -## Overview - -Mouth is a single-binary, offline speech-to-text tool for Windows (with Linux/macOS support where possible). Press a hotkey, speak, and transcribed text is pasted at your cursor. Configured entirely via YAML. - -## Architecture - -``` -┌─────────────┐ ┌───────────┐ ┌─────────────┐ ┌────────────┐ -│ Hotkey │────▶│ Recorder │────▶│ Transcriber │────▶│ Paste │ -│ Listener │ │ (cpal) │ │ (ort/ONNX) │ │ (enigo) │ -│ (rdev) │ │ │ │ │ │ │ -└─────────────┘ └───────────┘ └─────────────┘ └────────────┘ - │ │ │ │ - │ ▼ │ │ - │ ┌───────────┐ │ │ - │ │ VAD │ │ │ - │ │ (silero) │ │ │ - │ └───────────┘ │ │ - │ │ │ - ▼ ▼ ▼ -┌──────────────────────────────────────────────────────────────────────┐ -│ Overlay (winit) │ -│ State: idle → recording → transcribing → done │ -└──────────────────────────────────────────────────────────────────────┘ -``` - -### Component Communication - -All components communicate via channels (`std::sync::mpsc` or `tokio::sync`). The main thread owns the overlay window (required by most windowing systems). A coordinator task receives events from hotkey/recorder/transcriber and drives state transitions. - -``` -HotkeyEvent(Pressed/Released) ──┐ -AudioReady(Vec) ───────────┼──▶ Coordinator ──▶ OverlayState -TranscriptionDone(String) ──────┘ ──▶ PasteAction -CancelRequested ────────────────┘ -``` - -## Crate Dependencies - -| Crate | Purpose | Notes | -|-------|---------|-------| -| `rdev` | Global hotkey capture | Cross-platform key events, no focus required | -| `cpal` | Audio capture | Cross-platform mic input | -| `rubato` | Audio resampling | Resample to 16kHz for Parakeet | -| `ort` | ONNX Runtime | Run Parakeet v3 + Silero VAD | -| `hf-hub` | Model download | Download from HuggingFace, standard cache dir | -| `enigo` | Keyboard simulation | Simulate Ctrl+V, Shift+Insert, etc. | -| `arboard` | Clipboard access | Read/write clipboard, save/restore | -| `winit` | Windowing | Minimal overlay window | -| `softbuffer` | Pixel rendering | Draw coloured overlay (no GPU needed for overlay) | -| `serde` + `serde_yaml` | Config | Deserialize YAML config | -| `clap` | CLI | Subcommands: `run`, `config`, `models` | -| `dialoguer` | Interactive TUI | `mouth config` interactive setup | -| `rodio` | Audio playback | Blip up/down sounds | -| `indicatif` | Progress bars | Model download progress | -| `dirs` | Platform dirs | Config/cache paths | -| `tracing` | Logging | Structured logging | - -## Config File - -Location: `~/.config/mouth/config.yaml` (Linux/macOS), `%APPDATA%\mouth\config.yaml` (Windows) - -```yaml -# Hotkey to activate recording -hotkey: "ctrl+space" - -# Recording mode: push_to_talk or toggle -mode: push_to_talk - -# Cancel hotkey (only active while recording) -cancel_key: "escape" - -# Speech-to-text model -model: "parakeet-tdt-0.6b-v3" - -# Inference accelerator: auto, cpu, cuda, directml -accelerator: auto - -# GPU device index (only used when accelerator is cuda/directml) -gpu_device: 0 - -# How to paste text -paste_method: ctrl_v # ctrl_v | shift_insert | ctrl_shift_v | clipboard_only - -# Also keep transcribed text on clipboard after pasting -copy_to_clipboard: true - -# Overlay position on screen -overlay_position: top # top | bottom | none - -# Audio feedback -audio_feedback: true - -# Audio input device (null = system default) -input_device: null - -# VAD: trim silence from audio before transcription -vad_enabled: true - -# Language (for model hint, if supported) -language: en -``` - -## CLI Interface - -``` -mouth run # Start the daemon (default if no subcommand) -mouth config # Interactive TUI to edit config -mouth config --show # Print current config to stdout -mouth config --reset # Reset config to defaults -mouth models # List available/downloaded models -mouth models download # Download configured model (if not cached) -mouth status # Show daemon status, loaded model, app version -``` - -## Implementation Phases - -### Phase 1: Project Skeleton + Config - -- Cargo.toml with all dependencies -- Config struct with serde, defaults, load/save -- CLI with clap (run, config, models subcommands) -- `mouth config` interactive TUI with dialoguer -- Platform-aware config/cache directory resolution - -### Phase 2: Hotkey Listener - -- Global hotkey capture using rdev -- Support configurable key combinations (parse from string like "ctrl+space") -- Push-to-talk mode: record on press, stop on release -- Toggle mode: start on first press, stop on second press -- Cancel on Escape while recording -- Debounce rapid key events (~30ms) - -### Phase 3: Audio Capture + VAD - -- Open mic input via cpal (default device or configured) -- Convert to f32 mono -- Resample to 16kHz via rubato -- Buffer audio chunks during recording -- Run Silero VAD to trim leading/trailing silence -- Produce final `Vec` of clean speech at 16kHz - -### Phase 4: Model Management - -- Use hf-hub to download Parakeet v3 ONNX model from HuggingFace -- Store in standard HF cache (`~/.cache/huggingface/hub/`) -- Show download progress with indicatif -- `mouth models` command to list/download models -- Auto-download on first run if model not cached - -### Phase 5: Transcription - -- Load Parakeet v3 ONNX model via ort -- Auto-detect GPU (DirectML on Windows, CUDA if available, CPU fallback) -- Respect accelerator override from config -- Run inference on captured audio -- Return transcribed text string - -### Phase 6: Overlay - -- Create a small always-on-top window using winit -- Render with softbuffer (simple coloured rectangle + text) -- States and colours: - - Recording: red pulsing indicator - - Transcribing: amber/yellow - - Done: brief green flash, then hide - - Error: brief red flash with error hint -- Window flags (Windows): `WS_EX_TOPMOST | WS_EX_TOOLWINDOW | WS_EX_NOACTIVATE` -- Position: centered horizontally at top or bottom of current monitor -- No focus steal, no taskbar entry - -### Phase 7: Paste System - -- Save current clipboard content (if preserving) -- Set transcribed text to clipboard via arboard -- Simulate keypress via enigo based on paste_method: - - `ctrl_v`: Ctrl+V (Cmd+V on macOS) - - `shift_insert`: Shift+Insert - - `ctrl_shift_v`: Ctrl+Shift+V - - `clipboard_only`: no keypress, just clipboard -- Restore previous clipboard content (unless copy_to_clipboard is true) -- Small delay between clipboard set and paste simulation (~50ms) - -### Phase 8: Audio Feedback - -- Bundle two short PCM blip sounds in the binary (via `include_bytes!`) -- "Blip up" on recording start -- "Blip down" on recording stop / transcription complete -- Play via rodio on a separate thread (non-blocking) -- Respect audio_feedback config flag - -### Phase 9: Coordinator + Integration - -- Wire all components together with channel-based message passing -- Main thread: overlay window event loop (winit requires this) -- Spawned threads/tasks: hotkey listener, audio recorder, transcriber -- Coordinator receives events, drives state machine: - ``` - Idle ──[hotkey press]──▶ Recording - Recording ──[hotkey release/press]──▶ Transcribing - Recording ──[cancel]──▶ Idle - Transcribing ──[result]──▶ Pasting ──▶ Idle - Transcribing ──[error]──▶ Error ──▶ Idle - ``` -- Graceful shutdown on SIGINT / tray quit - -### Phase 10: Daemon IPC + Status - -- The running daemon listens on a local Unix domain socket (Linux/macOS) or named pipe (Windows) for status queries -- Socket/pipe path: `/tmp/mouth.sock` (Linux/macOS), `\\.\pipe\mouth` (Windows) -- `mouth status` connects and requests current state; daemon responds with JSON: - ```json - { - "version": "0.1.0", - "state": "idle", - "model": "parakeet-tdt-0.6b-v3", - "accelerator": "directml", - "uptime_secs": 3420 - } - ``` -- If the daemon is not running, `mouth status` reports "Mouth is not running" and exits with code 1 -- Also used internally to prevent launching a second daemon instance (lock check) - -### Phase 11: Polish + Distribution - -- Error handling: user-friendly messages for common failures (no mic, model not found, etc.) -- Windows installer via `cargo-wix` or distribute as standalone .exe -- Test on Windows 10/11 primarily -- Test on Linux (X11 + Wayland) and macOS as secondary -- Update CLAUDE.md with build/run/test instructions -- Write user-facing README with setup instructions - -## Risks & Mitigations - -| Risk | Impact | Mitigation | -|------|--------|------------| -| Parakeet v3 ONNX model compatibility with `ort` | Blocks core functionality | Test early in Phase 5; Parakeet v2 as fallback | -| `rdev` hotkey reliability on Windows | Broken UX | Test early in Phase 2; fallback to Win32 `RegisterHotKey` | -| Overlay focus stealing | Annoying | Use proper window flags; test with various foreground apps | -| Audio resampling quality | Poor transcription | Use rubato SincInterpolation (high quality) | -| Binary size with bundled ONNX Runtime | Large download | ONNX Runtime is ~20-40MB; acceptable for a single-binary tool | -| winit event loop blocking | Unresponsive | All heavy work on background threads; overlay is lightweight | - -## File Structure - -``` -mouth/ -├── Cargo.toml -├── CLAUDE.md -├── README.md -├── plan.md -├── config.yaml.example -├── resources/ -│ ├── blip_up.pcm # bundled audio feedback -│ └── blip_down.pcm -└── src/ - ├── main.rs # CLI entry, clap setup - ├── config.rs # Config struct, YAML load/save, defaults - ├── hotkey.rs # Global hotkey listener (rdev) - ├── recorder.rs # Audio capture (cpal + rubato + VAD) - ├── vad.rs # Silero VAD wrapper - ├── transcriber.rs # ONNX inference, model loading, GPU detection - ├── model_cache.rs # HuggingFace download, cache management - ├── overlay.rs # Minimal overlay window (winit + softbuffer) - ├── paste.rs # Clipboard + paste simulation - ├── audio_feedback.rs # Blip sounds via rodio - ├── coordinator.rs # State machine, channel hub - └── cli/ - ├── mod.rs - ├── run.rs # `mouth run` handler - ├── config_cmd.rs # `mouth config` TUI - ├── models_cmd.rs # `mouth models` handler - └── status_cmd.rs # `mouth status` handler -``` - -## Not In Scope (v1) - -- LLM post-processing of transcriptions -- Transcription history / database -- Multiple model support (v1 is Parakeet v3 only, architecture supports adding more later) -- Auto-submit (Enter after paste) -- Multi-language UI -- Tray icon / system tray integration -- Translate-to-English mode diff --git a/release.sh b/release.sh index 72485a1..e03c8a2 100755 --- a/release.sh +++ b/release.sh @@ -1,11 +1,22 @@ #!/usr/bin/env bash set -euo pipefail +# ============================================================ +# Release configuration +# ============================================================ +REPO="https://gitea.dcglab.co.uk/steve/mouth" +FORGE="gitea" # "gitea" (uses tea CLI) or "github" (uses gh CLI) + +# ============================================================ +# Derived variables +# ============================================================ VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') RELEASE_DIR="release/v${VERSION}" BINARY_NAME="mouth" +TAG="v${VERSION}" -echo "=== Mouth Release Build v${VERSION} ===" +echo "=== Mouth Release Build ${TAG} ===" +echo "Forge: ${FORGE} (${REPO})" echo "" # Ensure we're in the project root @@ -14,6 +25,22 @@ if [ ! -f Cargo.toml ]; then exit 1 fi +# Check CLI tools +if [ "${FORGE}" = "gitea" ]; then + if ! command -v tea &>/dev/null; then + echo "ERROR: 'tea' CLI not found. Install: https://gitea.com/gitea/tea" + exit 1 + fi +elif [ "${FORGE}" = "github" ]; then + if ! command -v gh &>/dev/null; then + echo "ERROR: 'gh' CLI not found. Install: https://cli.github.com" + exit 1 + fi +else + echo "ERROR: Unknown forge '${FORGE}'. Must be 'gitea' or 'github'." + exit 1 +fi + # Clean previous release artifacts for this version rm -rf "${RELEASE_DIR}" mkdir -p "${RELEASE_DIR}" @@ -32,19 +59,12 @@ build_target() { if cargo build --release --target "${target}" 2>&1; then local binary="target/${target}/release/${BINARY_NAME}${ext}" if [ -f "${binary}" ]; then - local archive="${RELEASE_DIR}/${BINARY_NAME}-v${VERSION}-${target}" + local archive="${RELEASE_DIR}/${BINARY_NAME}-${TAG}-${target}" if [ -n "${ext}" ]; then - # Windows: zip - local zip_name="${archive}.zip" - zip -j "${zip_name}" "${binary}" 2>/dev/null || { - # Fallback if zip not installed - cp "${binary}" "${archive}${ext}" - echo " -> ${archive}${ext}" - BUILT+=("${archive}${ext}") - return - } - echo " -> ${zip_name}" - BUILT+=("${zip_name}") + # Windows: ship the exe directly + cp "${binary}" "${archive}${ext}" + echo " -> ${archive}${ext}" + BUILT+=("${archive}${ext}") else # Linux/macOS: tar.gz local tar_name="${archive}.tar.gz" @@ -71,30 +91,15 @@ build_target() { build_target "x86_64-unknown-linux-gnu" "Linux x86_64" # Windows x86_64 (MSVC target via cargo-xwin) -# ort requires the MSVC target — the GNU/MinGW target has no prebuilt -# ONNX Runtime binaries. cargo-xwin cross-compiles using the MSVC -# toolchain from Linux without needing a Windows machine. -# -# Install once: -# cargo install cargo-xwin -# rustup target add x86_64-pc-windows-msvc -# if command -v cargo-xwin &>/dev/null && rustup target list --installed | grep -q x86_64-pc-windows-msvc; then echo "--- Building Windows x86_64 (x86_64-pc-windows-msvc via cargo-xwin) ---" if cargo xwin build --release --target x86_64-pc-windows-msvc 2>&1; then local_binary="target/x86_64-pc-windows-msvc/release/${BINARY_NAME}.exe" if [ -f "${local_binary}" ]; then - archive="${RELEASE_DIR}/${BINARY_NAME}-v${VERSION}-x86_64-pc-windows-msvc" - zip_name="${archive}.zip" - zip -j "${zip_name}" "${local_binary}" 2>/dev/null || { - cp "${local_binary}" "${archive}.exe" - echo " -> ${archive}.exe" - BUILT+=("${archive}.exe") - } - if [ -f "${zip_name}" ]; then - echo " -> ${zip_name}" - BUILT+=("${zip_name}") - fi + archive="${RELEASE_DIR}/${BINARY_NAME}-${TAG}-x86_64-pc-windows-msvc.exe" + cp "${local_binary}" "${archive}" + echo " -> ${archive}" + BUILT+=("${archive}") else echo " WARN: Binary not found" FAILED+=("Windows x86_64 (MSVC)") @@ -149,3 +154,82 @@ if [ ${#BUILT[@]} -gt 0 ]; then cat checksums-sha256.txt cd - > /dev/null fi + +# ============================================================ +# Publish release +# ============================================================ + +if [ ${#BUILT[@]} -eq 0 ]; then + echo "" + echo "No successful builds — skipping release publish." + exit 1 +fi + +echo "" +read -rp "Publish release ${TAG} to ${FORGE}? [y/N] " confirm +if [[ ! "${confirm}" =~ ^[Yy]$ ]]; then + echo "Skipped. Artifacts are in ${RELEASE_DIR}/" + exit 0 +fi + +# Ensure the git tag exists +if ! git rev-parse "${TAG}" &>/dev/null; then + echo "Creating git tag ${TAG}..." + git tag -a "${TAG}" -m "Release ${TAG}" + git push origin "${TAG}" +fi + +# Collect all release files (artifacts + checksums) +RELEASE_FILES=() +for b in "${BUILT[@]}"; do + RELEASE_FILES+=("${b}") +done +RELEASE_FILES+=("${RELEASE_DIR}/checksums-sha256.txt") + +RELEASE_TITLE="Mouth ${TAG}" +RELEASE_BODY="## Mouth ${TAG} + +### Downloads +$(for b in "${BUILT[@]}"; do echo "- $(basename "${b}")"; done) + +### Checksums (SHA256) +\`\`\` +$(cat "${RELEASE_DIR}/checksums-sha256.txt") +\`\`\` +" + +if [ "${FORGE}" = "gitea" ]; then + echo "Publishing to Gitea via tea..." + + # Extract host and owner/repo from REPO URL + REPO_OWNER_NAME=$(echo "${REPO}" | sed 's|.*://[^/]*/||') + + # Create the release + tea release create \ + --repo "${REPO_OWNER_NAME}" \ + --tag "${TAG}" \ + --title "${RELEASE_TITLE}" \ + --note "${RELEASE_BODY}" + + # Upload assets + for f in "${RELEASE_FILES[@]}"; do + echo " Uploading $(basename "${f}")..." + tea release asset create \ + --repo "${REPO_OWNER_NAME}" \ + --tag "${TAG}" \ + --name "$(basename "${f}")" \ + --file "${f}" + done + +elif [ "${FORGE}" = "github" ]; then + echo "Publishing to GitHub via gh..." + + gh release create "${TAG}" \ + --repo "${REPO}" \ + --title "${RELEASE_TITLE}" \ + --notes "${RELEASE_BODY}" \ + "${RELEASE_FILES[@]}" +fi + +echo "" +echo "=== Release ${TAG} published to ${FORGE}! ===" diff --git a/src/audio_feedback.rs b/src/audio_feedback.rs index f3d2e31..367249d 100644 --- a/src/audio_feedback.rs +++ b/src/audio_feedback.rs @@ -84,7 +84,11 @@ pub fn play_blip_down() { } fn play_blip_internal(freq_start: f32, freq_end: f32, duration_ms: u64) -> Result<()> { - let samples = generate_blip(freq_start, freq_end, duration_ms); + // Prepend silence so the audio device has time to warm up + let silence_ms = 50u64; + let silence_samples = (44100u64 * silence_ms / 1000) as usize; + let mut samples = vec![0i16; silence_samples]; + samples.extend(generate_blip(freq_start, freq_end, duration_ms)); let wav_data = encode_wav(&samples, 44100); let (_stream, stream_handle) = OutputStream::try_default()?; diff --git a/src/cli/config_cmd.rs b/src/cli/config_cmd.rs index 2f2c827..d96e34e 100644 --- a/src/cli/config_cmd.rs +++ b/src/cli/config_cmd.rs @@ -1,7 +1,9 @@ use anyhow::Result; use dialoguer::{Input, Select}; +use std::time::Duration; use crate::config::{Accelerator, Config, OverlayPosition, PasteMethod, RecordingMode}; +use crate::hotkey::capture_hotkey; pub fn show() -> Result<()> { let config = Config::load()?; @@ -20,10 +22,7 @@ pub fn reset() -> Result<()> { pub fn interactive() -> Result<()> { let mut config = Config::load()?; - config.hotkey = Input::new() - .with_prompt("Hotkey") - .default(config.hotkey) - .interact_text()?; + config.hotkey = prompt_hotkey("Hotkey", &config.hotkey)?; let mode_idx = Select::new() .with_prompt("Recording mode") @@ -38,10 +37,7 @@ pub fn interactive() -> Result<()> { _ => RecordingMode::Toggle, }; - config.cancel_key = Input::new() - .with_prompt("Cancel key") - .default(config.cancel_key) - .interact_text()?; + config.cancel_key = prompt_hotkey("Cancel key", &config.cancel_key)?; config.model = Input::new() .with_prompt("Model") @@ -125,3 +121,36 @@ pub fn interactive() -> Result<()> { println!("\nConfig saved to {}", Config::path()?.display()); Ok(()) } + +/// Prompt the user to either press a key combination or type it manually. +fn prompt_hotkey(label: &str, current: &str) -> Result { + let choice = Select::new() + .with_prompt(format!("{label} (current: {current})")) + .items(&["Press the key combination", "Type it manually", "Keep current"]) + .default(0) + .interact()?; + + match choice { + 0 => { + println!("Press your desired key combination (timeout: 10s)..."); + match capture_hotkey(Duration::from_secs(10)) { + Some(hotkey) => { + println!(" Captured: {hotkey}"); + Ok(hotkey) + } + None => { + println!(" No keypress detected, keeping current: {current}"); + Ok(current.to_string()) + } + } + } + 1 => { + let value = Input::new() + .with_prompt(label) + .default(current.to_string()) + .interact_text()?; + Ok(value) + } + _ => Ok(current.to_string()), + } +} diff --git a/src/cli/run_cmd.rs b/src/cli/run_cmd.rs index 73cc5f1..9ccc1ad 100644 --- a/src/cli/run_cmd.rs +++ b/src/cli/run_cmd.rs @@ -1,18 +1,31 @@ use anyhow::{Context, Result}; -use std::sync::mpsc; +use std::sync::{mpsc, Arc}; use std::thread; use tracing::info; -use crate::config::{Config, OverlayPosition}; +use crate::config::Config; use crate::coordinator::Coordinator; use crate::hotkey; +use crate::ipc; use crate::model_cache; use crate::overlay; use crate::recorder; +use crate::shared_state::SharedState; use crate::transcriber::Transcriber; pub fn run() -> Result<()> { let config = Config::load()?; + + // Check if already running + if ipc::is_daemon_running() { + eprintln!("Mouth is already running."); + std::process::exit(1); + } + + // Hide Windows console window + #[cfg(windows)] + hide_console(); + info!("Mouth v{} starting", env!("CARGO_PKG_VERSION")); info!("Mode: {:?}", config.mode); info!("Hotkey: {}", config.hotkey); @@ -30,10 +43,25 @@ pub fn run() -> Result<()> { let transcriber = Transcriber::new(&model_paths, &config.accelerator, config.gpu_device) .context("Failed to load transcription engine")?; - // Step 3: VAD (not yet bundled) + // Step 3: VAD let vad = if config.vad_enabled { - info!("VAD enabled but Silero model not yet bundled — skipping"); - None + info!("Loading Silero VAD..."); + match model_cache::ensure_vad_model() { + Ok(vad_path) => match crate::vad::Vad::new(vad_path.to_str().unwrap_or_default()) { + Ok(v) => { + info!("VAD loaded"); + Some(v) + } + Err(e) => { + tracing::warn!("Failed to load VAD, continuing without it: {e}"); + None + } + }, + Err(e) => { + tracing::warn!("Failed to download VAD model, continuing without it: {e}"); + None + } + } } else { None }; @@ -44,12 +72,29 @@ pub fn run() -> Result<()> { let cancel_combo = hotkey::parse_hotkey(&config.cancel_key) .with_context(|| format!("Invalid cancel key: {}", config.cancel_key))?; - // Step 5: Set up channels + // Step 5: Create shared state + let shared_state = Arc::new(SharedState::new( + config.model.clone(), + format!("{:?}", config.accelerator).to_lowercase(), + )); + + // Step 6: Start IPC listener + let ipc_state = Arc::clone(&shared_state); + thread::Builder::new() + .name("mouth-ipc".into()) + .spawn(move || { + if let Err(e) = ipc::start_ipc_listener(ipc_state) { + tracing::error!("IPC listener failed: {e}"); + } + }) + .context("Failed to spawn IPC thread")?; + + // Step 7: Set up channels let (hotkey_tx, hotkey_rx) = mpsc::channel(); let (recorder_cmd_tx, recorder_cmd_rx) = mpsc::channel(); let (audio_tx, audio_rx) = mpsc::channel(); - // Step 6: Spawn background threads + // Step 8: Spawn background threads let device_name = config.input_device.clone(); thread::Builder::new() .name("mouth-recorder".into()) @@ -65,52 +110,50 @@ pub fn run() -> Result<()> { }) .context("Failed to spawn hotkey thread")?; - // Step 7: Start overlay + coordinator - if config.overlay_position != OverlayPosition::None { - let (event_loop, proxy) = overlay::create_event_loop() - .map_err(|e| anyhow::anyhow!("Failed to create overlay event loop: {e}"))?; + // Step 9: Start overlay + coordinator + // Always create the event loop (needed for tray icon even when overlay is hidden) + let (event_loop, proxy) = overlay::create_event_loop() + .map_err(|e| anyhow::anyhow!("Failed to create overlay event loop: {e}"))?; - let overlay_position = config.overlay_position.clone(); - let coord_proxy = Some(proxy); + let overlay_position = config.overlay_position.clone(); - // Coordinator runs on a background thread - let coord_config = config.clone(); - thread::Builder::new() - .name("mouth-coordinator".into()) - .spawn(move || { - let mut coordinator = Coordinator::new( - coord_config, - transcriber, - vad, - recorder_cmd_tx, - audio_rx, - hotkey_rx, - coord_proxy, - ); - coordinator.run(); - }) - .context("Failed to spawn coordinator thread")?; + // Coordinator runs on a background thread + let coord_config = config.clone(); + let coord_state = Arc::clone(&shared_state); + thread::Builder::new() + .name("mouth-coordinator".into()) + .spawn(move || { + let mut coordinator = Coordinator::new( + coord_config, + coord_state, + transcriber, + vad, + recorder_cmd_tx, + audio_rx, + hotkey_rx, + Some(proxy), + ); + coordinator.run(); + }) + .context("Failed to spawn coordinator thread")?; - println!("Mouth is running. Press {} to record. Ctrl+C to quit.", config.hotkey); - - // Overlay event loop runs on main thread (blocking) - overlay::run_event_loop(event_loop, overlay_position) - .map_err(|e| anyhow::anyhow!("Overlay event loop error: {e}"))?; - } else { - // No overlay — coordinator runs on main thread - println!("Mouth is running. Press {} to record. Ctrl+C to quit.", config.hotkey); - - let mut coordinator = Coordinator::new( - config, - transcriber, - vad, - recorder_cmd_tx, - audio_rx, - hotkey_rx, - None, - ); - coordinator.run(); - } + // Overlay event loop runs on main thread (blocking) + // Tray icon is created inside the overlay app + overlay::run_event_loop(event_loop, overlay_position) + .map_err(|e| anyhow::anyhow!("Overlay event loop error: {e}"))?; + ipc::cleanup(); Ok(()) } + +#[cfg(windows)] +fn hide_console() { + use windows_sys::Win32::System::Console::GetConsoleWindow; + use windows_sys::Win32::UI::WindowsAndMessaging::{ShowWindow, SW_HIDE}; + unsafe { + let console = GetConsoleWindow(); + if !console.is_null() { + ShowWindow(console, SW_HIDE); + } + } +} diff --git a/src/cli/status_cmd.rs b/src/cli/status_cmd.rs index 3e1b98e..457b164 100644 --- a/src/cli/status_cmd.rs +++ b/src/cli/status_cmd.rs @@ -1,11 +1,24 @@ use anyhow::Result; -pub fn status() -> Result<()> { - let version = env!("CARGO_PKG_VERSION"); +use crate::ipc; - // TODO: Phase 10 — connect to daemon IPC socket/pipe and query status - // For now, just show version info - println!("Mouth v{version}"); - println!("Status: not yet implemented (requires daemon IPC)"); - Ok(()) +pub fn status() -> Result<()> { + match ipc::query_daemon_status() { + Ok(status) => { + println!("Mouth v{}", status.version); + println!("State: {}", status.state); + println!("Model: {}", status.model); + println!("Accelerator: {}", status.accelerator); + + let hours = status.uptime_secs / 3600; + let mins = (status.uptime_secs % 3600) / 60; + let secs = status.uptime_secs % 60; + println!("Uptime: {}h {}m {}s", hours, mins, secs); + Ok(()) + } + Err(_) => { + eprintln!("Mouth is not running."); + std::process::exit(1); + } + } } diff --git a/src/coordinator.rs b/src/coordinator.rs index 9f04348..570cab5 100644 --- a/src/coordinator.rs +++ b/src/coordinator.rs @@ -1,4 +1,4 @@ -use std::sync::mpsc; +use std::sync::{mpsc, Arc}; use std::thread; use tracing::{debug, error, info, warn}; use winit::event_loop::EventLoopProxy; @@ -9,6 +9,7 @@ use crate::hotkey::HotkeyEvent; use crate::overlay::{OverlayEvent, OverlayState}; use crate::paste; use crate::recorder::{AudioData, RecorderCommand}; +use crate::shared_state::SharedState; use crate::transcriber::Transcriber; use crate::vad::Vad; @@ -24,6 +25,7 @@ enum State { pub struct Coordinator { config: Config, state: State, + shared_state: Arc, transcriber: Transcriber, vad: Option, recorder_tx: mpsc::Sender, @@ -35,6 +37,7 @@ pub struct Coordinator { impl Coordinator { pub fn new( config: Config, + shared_state: Arc, transcriber: Transcriber, vad: Option, recorder_tx: mpsc::Sender, @@ -45,6 +48,7 @@ impl Coordinator { Self { config, state: State::Idle, + shared_state, transcriber, vad, recorder_tx, @@ -54,6 +58,16 @@ impl Coordinator { } } + fn set_state(&mut self, state: State) { + self.state = state; + let label = match state { + State::Idle => "idle", + State::Recording => "recording", + State::Transcribing => "transcribing", + }; + self.shared_state.set_state(label); + } + /// Run the coordinator loop. This blocks until shutdown. pub fn run(&mut self) { info!("Coordinator started"); @@ -111,7 +125,7 @@ impl Coordinator { fn start_recording(&mut self) { info!("Recording started"); - self.state = State::Recording; + self.set_state(State::Recording); self.set_overlay(OverlayState::Recording); if self.config.audio_feedback { @@ -120,23 +134,26 @@ impl Coordinator { if self.recorder_tx.send(RecorderCommand::Start).is_err() { error!("Failed to send start command to recorder"); - self.state = State::Idle; + self.set_state(State::Idle); self.set_overlay(OverlayState::Hidden); } } fn stop_recording(&mut self) { info!("Recording stopped, starting transcription"); - self.state = State::Transcribing; + self.set_state(State::Transcribing); self.set_overlay(OverlayState::Transcribing); if self.config.audio_feedback { audio_feedback::play_blip_down(); } + // Keep recording briefly after the stop signal so trailing speech isn't clipped + thread::sleep(std::time::Duration::from_millis(300)); + if self.recorder_tx.send(RecorderCommand::Stop).is_err() { error!("Failed to send stop command to recorder"); - self.state = State::Idle; + self.set_state(State::Idle); self.set_overlay(OverlayState::Hidden); return; } @@ -148,7 +165,7 @@ impl Coordinator { } Err(_) => { error!("Failed to receive audio data"); - self.state = State::Idle; + self.set_state(State::Idle); self.set_overlay(OverlayState::Error); self.delayed_hide_overlay(); } @@ -157,7 +174,7 @@ impl Coordinator { fn cancel_recording(&mut self) { info!("Recording cancelled"); - self.state = State::Idle; + self.set_state(State::Idle); if self.recorder_tx.send(RecorderCommand::Stop).is_err() { warn!("Failed to send stop command to recorder"); @@ -176,7 +193,7 @@ impl Coordinator { Ok(filtered) => { if filtered.is_empty() { info!("No speech detected by VAD"); - self.state = State::Idle; + self.set_state(State::Idle); self.set_overlay(OverlayState::Hidden); return; } @@ -199,7 +216,7 @@ impl Coordinator { Ok(text) => { if text.is_empty() { info!("Empty transcription"); - self.state = State::Idle; + self.set_state(State::Idle); self.set_overlay(OverlayState::Hidden); return; } @@ -218,11 +235,11 @@ impl Coordinator { } self.delayed_hide_overlay(); - self.state = State::Idle; + self.set_state(State::Idle); } Err(e) => { error!("Transcription failed: {e}"); - self.state = State::Idle; + self.set_state(State::Idle); self.set_overlay(OverlayState::Error); self.delayed_hide_overlay(); } diff --git a/src/hotkey.rs b/src/hotkey.rs index 8c1241d..7a0342f 100644 --- a/src/hotkey.rs +++ b/src/hotkey.rs @@ -1,5 +1,6 @@ use anyhow::{bail, Result}; use rdev::{self, Event, EventType, Key}; +use std::cell::RefCell; use std::sync::mpsc; use std::time::{Duration, Instant}; use tracing::{debug, error, info}; @@ -164,77 +165,297 @@ fn parse_key(s: &str) -> Result { "7" => Key::Num7, "8" => Key::Num8, "9" => Key::Num9, + // Punctuation / symbol keys + "[" | "leftbracket" => Key::LeftBracket, + "]" | "rightbracket" => Key::RightBracket, + ";" | "semicolon" => Key::SemiColon, + "'" | "quote" => Key::Quote, + "`" | "backquote" | "backtick" => Key::BackQuote, + "\\" | "backslash" => Key::BackSlash, + "," | "comma" => Key::Comma, + "." | "dot" | "period" => Key::Dot, + "/" | "slash" => Key::Slash, + "-" | "minus" => Key::Minus, + "=" | "equal" | "equals" => Key::Equal, + // Additional non-character keys + "printscreen" | "prtsc" => Key::PrintScreen, + "scrolllock" => Key::ScrollLock, + "pause" | "break" => Key::Pause, + "numlock" => Key::NumLock, + "capslock" => Key::CapsLock, + // Numpad + "kp0" | "numpad0" => Key::Kp0, + "kp1" | "numpad1" => Key::Kp1, + "kp2" | "numpad2" => Key::Kp2, + "kp3" | "numpad3" => Key::Kp3, + "kp4" | "numpad4" => Key::Kp4, + "kp5" | "numpad5" => Key::Kp5, + "kp6" | "numpad6" => Key::Kp6, + "kp7" | "numpad7" => Key::Kp7, + "kp8" | "numpad8" => Key::Kp8, + "kp9" | "numpad9" => Key::Kp9, + "kpenter" | "numpadenter" => Key::KpReturn, + "kpminus" | "numpadminus" => Key::KpMinus, + "kpplus" | "numpadplus" => Key::KpPlus, + "kpmultiply" | "numpadmultiply" => Key::KpMultiply, + "kpdivide" | "numpaddivide" => Key::KpDivide, + "kpdelete" | "numpaddelete" => Key::KpDelete, _ => bail!("Unknown key: {s}"), }; Ok(key) } +/// Convert an rdev Key back to the config string representation. +fn key_to_string(key: &Key) -> Option { + let s = match key { + Key::Space => "space", + Key::Return => "enter", + Key::Escape => "escape", + Key::Tab => "tab", + Key::Backspace => "backspace", + Key::Delete => "delete", + Key::Insert => "insert", + Key::Home => "home", + Key::End => "end", + Key::PageUp => "pageup", + Key::PageDown => "pagedown", + Key::UpArrow => "up", + Key::DownArrow => "down", + Key::LeftArrow => "left", + Key::RightArrow => "right", + Key::F1 => "f1", + Key::F2 => "f2", + Key::F3 => "f3", + Key::F4 => "f4", + Key::F5 => "f5", + Key::F6 => "f6", + Key::F7 => "f7", + Key::F8 => "f8", + Key::F9 => "f9", + Key::F10 => "f10", + Key::F11 => "f11", + Key::F12 => "f12", + Key::KeyA => "a", + Key::KeyB => "b", + Key::KeyC => "c", + Key::KeyD => "d", + Key::KeyE => "e", + Key::KeyF => "f", + Key::KeyG => "g", + Key::KeyH => "h", + Key::KeyI => "i", + Key::KeyJ => "j", + Key::KeyK => "k", + Key::KeyL => "l", + Key::KeyM => "m", + Key::KeyN => "n", + Key::KeyO => "o", + Key::KeyP => "p", + Key::KeyQ => "q", + Key::KeyR => "r", + Key::KeyS => "s", + Key::KeyT => "t", + Key::KeyU => "u", + Key::KeyV => "v", + Key::KeyW => "w", + Key::KeyX => "x", + Key::KeyY => "y", + Key::KeyZ => "z", + Key::Num0 => "0", + Key::Num1 => "1", + Key::Num2 => "2", + Key::Num3 => "3", + Key::Num4 => "4", + Key::Num5 => "5", + Key::Num6 => "6", + Key::Num7 => "7", + Key::Num8 => "8", + Key::Num9 => "9", + Key::LeftBracket => "[", + Key::RightBracket => "]", + Key::SemiColon => ";", + Key::Quote => "'", + Key::BackQuote => "`", + Key::BackSlash => "\\", + Key::Comma => ",", + Key::Dot => ".", + Key::Slash => "/", + Key::Minus => "-", + Key::Equal => "=", + Key::PrintScreen => "printscreen", + Key::ScrollLock => "scrolllock", + Key::Pause => "pause", + Key::NumLock => "numlock", + Key::CapsLock => "capslock", + Key::Kp0 => "kp0", + Key::Kp1 => "kp1", + Key::Kp2 => "kp2", + Key::Kp3 => "kp3", + Key::Kp4 => "kp4", + Key::Kp5 => "kp5", + Key::Kp6 => "kp6", + Key::Kp7 => "kp7", + Key::Kp8 => "kp8", + Key::Kp9 => "kp9", + Key::KpReturn => "kpenter", + Key::KpMinus => "kpminus", + Key::KpPlus => "kpplus", + Key::KpMultiply => "kpmultiply", + Key::KpDivide => "kpdivide", + Key::KpDelete => "kpdelete", + _ => return None, + }; + Some(s.to_string()) +} + +/// Returns true if the key is a modifier (ctrl, alt, shift, meta). +fn is_modifier(key: &Key) -> bool { + matches!( + key, + Key::ControlLeft + | Key::ControlRight + | Key::Alt + | Key::AltGr + | Key::ShiftLeft + | Key::ShiftRight + | Key::MetaLeft + | Key::MetaRight + ) +} + +/// Capture a hotkey combination by listening for an actual keypress. +/// Blocks until the user presses a non-modifier key while optionally holding modifiers. +/// Returns the hotkey string (e.g. "ctrl+[") or None on timeout/error. +pub fn capture_hotkey(timeout: Duration) -> Option { + let (tx, rx) = mpsc::channel(); + + std::thread::spawn(move || { + let mut modifier_state = ModifierState::default(); + + let callback = move |event: Event| { + match event.event_type { + EventType::KeyPress(key) => { + modifier_state.update(&key, true); + + // Ignore pure modifier presses — wait for a real key + if is_modifier(&key) { + return; + } + + if let Some(key_name) = key_to_string(&key) { + let mut parts = Vec::new(); + if modifier_state.ctrl { + parts.push("ctrl".to_string()); + } + if modifier_state.alt { + parts.push("alt".to_string()); + } + if modifier_state.shift { + parts.push("shift".to_string()); + } + if modifier_state.meta { + parts.push("meta".to_string()); + } + parts.push(key_name); + let _ = tx.send(parts.join("+")); + } + } + EventType::KeyRelease(key) => { + modifier_state.update(&key, false); + } + _ => {} + } + }; + + let _ = rdev::listen(callback); + }); + + rx.recv_timeout(timeout).ok() +} + /// Start the global hotkey listener on the current thread (blocking). -/// Sends HotkeyEvents to the provided channel. +/// Uses `rdev::grab` to intercept and consume hotkey events so they don't +/// reach the focused application. pub fn listen( hotkey: HotkeyCombination, cancel_key: HotkeyCombination, tx: mpsc::Sender, ) { let debounce_duration = Duration::from_millis(30); - let mut last_event_time = Instant::now() - debounce_duration; - let mut modifier_state = ModifierState::default(); - let mut hotkey_held = false; - info!("Hotkey listener started"); + info!("Hotkey listener started (grab mode)"); debug!("Hotkey: {:?}", hotkey); debug!("Cancel: {:?}", cancel_key); - let callback = move |event: Event| { + // rdev::grab requires Fn (not FnMut), so wrap mutable state in RefCell + struct GrabState { + last_event_time: Instant, + modifier_state: ModifierState, + hotkey_held: bool, + } + let state = RefCell::new(GrabState { + last_event_time: Instant::now() - debounce_duration, + modifier_state: ModifierState::default(), + hotkey_held: false, + }); + + let callback = move |event: Event| -> Option { + let mut s = state.borrow_mut(); let now = Instant::now(); match event.event_type { EventType::KeyPress(key) => { - modifier_state.update(&key, true); + s.modifier_state.update(&key, true); - // Check cancel key - if key == cancel_key.key && modifier_state.all_held(&cancel_key.modifiers) { - if now.duration_since(last_event_time) >= debounce_duration { - last_event_time = now; + // Check cancel key — swallow it + if key == cancel_key.key && s.modifier_state.all_held(&cancel_key.modifiers) { + if now.duration_since(s.last_event_time) >= debounce_duration { + s.last_event_time = now; debug!("Cancel key pressed"); if tx.send(HotkeyEvent::Cancel).is_err() { error!("Failed to send cancel event"); } } - return; + return None; } - // Check hotkey - if key == hotkey.key && modifier_state.all_held(&hotkey.modifiers) { - if now.duration_since(last_event_time) >= debounce_duration && !hotkey_held { - last_event_time = now; - hotkey_held = true; + // Check hotkey — swallow it + if key == hotkey.key && s.modifier_state.all_held(&hotkey.modifiers) { + if now.duration_since(s.last_event_time) >= debounce_duration && !s.hotkey_held { + s.last_event_time = now; + s.hotkey_held = true; debug!("Hotkey pressed"); if tx.send(HotkeyEvent::Pressed).is_err() { error!("Failed to send pressed event"); } } + return None; } + + Some(event) } EventType::KeyRelease(key) => { - modifier_state.update(&key, false); + s.modifier_state.update(&key, false); - // Check hotkey release (for push-to-talk) - if key == hotkey.key && hotkey_held { - if now.duration_since(last_event_time) >= debounce_duration { - last_event_time = now; - hotkey_held = false; + // Check hotkey release — swallow it + if key == hotkey.key && s.hotkey_held { + if now.duration_since(s.last_event_time) >= debounce_duration { + s.last_event_time = now; + s.hotkey_held = false; debug!("Hotkey released"); if tx.send(HotkeyEvent::Released).is_err() { error!("Failed to send released event"); } } + return None; } + + Some(event) } - _ => {} + _ => Some(event), } }; - if let Err(e) = rdev::listen(callback) { - error!("Hotkey listener error: {:?}", e); + if let Err(e) = rdev::grab(callback) { + error!("Hotkey grab error: {:?}", e); } } diff --git a/src/ipc.rs b/src/ipc.rs new file mode 100644 index 0000000..8caa475 --- /dev/null +++ b/src/ipc.rs @@ -0,0 +1,233 @@ +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; +use std::io::{Read, Write}; +use std::sync::Arc; +use tracing::{debug, info}; + +use crate::shared_state::SharedState; + +/// Status response sent over IPC. +#[derive(Debug, Serialize, Deserialize)] +pub struct DaemonStatus { + pub version: String, + pub state: String, + pub model: String, + pub accelerator: String, + pub uptime_secs: u64, +} + +/// Returns the platform-specific IPC path. +pub fn ipc_path() -> String { + #[cfg(unix)] + { + "/tmp/mouth.sock".to_string() + } + #[cfg(windows)] + { + r"\\.\pipe\mouth".to_string() + } +} + +/// Check if a daemon is already running by attempting to connect. +pub fn is_daemon_running() -> bool { + query_daemon_status().is_ok() +} + +/// Query the running daemon for its status. +pub fn query_daemon_status() -> Result { + let path = ipc_path(); + + #[cfg(unix)] + { + use std::os::unix::net::UnixStream; + let mut stream = UnixStream::connect(&path) + .with_context(|| format!("Could not connect to daemon at {path}"))?; + stream + .set_read_timeout(Some(std::time::Duration::from_secs(2))) + .ok(); + let mut buf = String::new(); + stream.read_to_string(&mut buf)?; + let status: DaemonStatus = + serde_json::from_str(&buf).context("Invalid status response from daemon")?; + Ok(status) + } + + #[cfg(windows)] + { + use std::fs::OpenOptions; + let mut file = OpenOptions::new() + .read(true) + .write(true) + .open(&path) + .with_context(|| format!("Could not connect to daemon at {path}"))?; + // Write a newline to trigger the server to respond + file.write_all(b"\n")?; + file.flush()?; + // Read response — use a fixed buffer since read_to_string waits for EOF + let mut buf = vec![0u8; 4096]; + let n = file.read(&mut buf)?; + let text = String::from_utf8_lossy(&buf[..n]); + let status: DaemonStatus = + serde_json::from_str(&text).context("Invalid status response from daemon")?; + Ok(status) + } +} + +/// Start the IPC listener on the current thread (blocking). +/// Call this from a dedicated thread. +pub fn start_ipc_listener(shared_state: Arc) -> Result<()> { + let path = ipc_path(); + info!("Starting IPC listener at {path}"); + + #[cfg(unix)] + { + unix_listener(&path, shared_state) + } + + #[cfg(windows)] + { + windows_listener(&path, shared_state) + } +} + +#[cfg(unix)] +fn unix_listener(path: &str, shared_state: Arc) -> Result<()> { + use std::os::unix::net::UnixListener; + + // Clean up stale socket + if std::path::Path::new(path).exists() { + if is_daemon_running() { + anyhow::bail!("Another instance of Mouth is already running"); + } + std::fs::remove_file(path).ok(); + } + + let listener = UnixListener::bind(path).context("Failed to bind IPC socket")?; + info!("IPC listener ready"); + + for stream in listener.incoming() { + match stream { + Ok(mut stream) => { + let status = build_status(&shared_state); + match serde_json::to_string(&status) { + Ok(json) => { + if let Err(e) = stream.write_all(json.as_bytes()) { + debug!("Failed to write IPC response: {e}"); + } + } + Err(e) => { + warn!("Failed to serialize status: {e}"); + } + } + } + Err(e) => { + debug!("IPC accept error: {e}"); + } + } + } + + Ok(()) +} + +#[cfg(windows)] +fn windows_listener(path: &str, shared_state: Arc) -> Result<()> { + use windows_sys::Win32::Foundation::{CloseHandle, INVALID_HANDLE_VALUE}; + use windows_sys::Win32::Storage::FileSystem::{ + FlushFileBuffers, ReadFile, WriteFile, PIPE_ACCESS_DUPLEX, + }; + use windows_sys::Win32::System::Pipes::{ + ConnectNamedPipe, CreateNamedPipeW, DisconnectNamedPipe, + PIPE_READMODE_BYTE, PIPE_TYPE_BYTE, PIPE_UNLIMITED_INSTANCES, PIPE_WAIT, + }; + + let wide_path: Vec = path.encode_utf16().chain(std::iter::once(0)).collect(); + + info!("IPC listener ready"); + + loop { + let handle = unsafe { + CreateNamedPipeW( + wide_path.as_ptr(), + PIPE_ACCESS_DUPLEX, + PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT, + PIPE_UNLIMITED_INSTANCES, + 4096, + 4096, + 0, + std::ptr::null(), + ) + }; + + if handle == INVALID_HANDLE_VALUE { + tracing::error!("Failed to create named pipe"); + std::thread::sleep(std::time::Duration::from_secs(1)); + continue; + } + + // Wait for a client to connect + let connected = unsafe { ConnectNamedPipe(handle, std::ptr::null_mut()) }; + if connected == 0 { + let err = std::io::Error::last_os_error(); + // ERROR_PIPE_CONNECTED (535) means client already connected — that's ok + if err.raw_os_error() != Some(535) { + debug!("ConnectNamedPipe error: {err}"); + unsafe { CloseHandle(handle) }; + continue; + } + } + + // Read the trigger byte from the client (just 1 byte to unblock) + let mut read_buf = [0u8; 1]; + let mut bytes_read: u32 = 0; + unsafe { + ReadFile( + handle, + read_buf.as_mut_ptr(), + 1, + &mut bytes_read, + std::ptr::null_mut(), + ); + } + + // Write the status response + let status = build_status(&shared_state); + if let Ok(json) = serde_json::to_string(&status) { + let bytes = json.as_bytes(); + let mut written: u32 = 0; + unsafe { + WriteFile( + handle, + bytes.as_ptr().cast(), + bytes.len() as u32, + &mut written, + std::ptr::null_mut(), + ); + FlushFileBuffers(handle); + } + } + + unsafe { + DisconnectNamedPipe(handle); + CloseHandle(handle); + } + } +} + +fn build_status(shared_state: &SharedState) -> DaemonStatus { + DaemonStatus { + version: env!("CARGO_PKG_VERSION").to_string(), + state: shared_state.get_state(), + model: shared_state.model.clone(), + accelerator: shared_state.accelerator.clone(), + uptime_secs: shared_state.uptime_secs(), + } +} + +/// Clean up the IPC socket (Unix only). +pub fn cleanup() { + #[cfg(unix)] + { + let path = ipc_path(); + std::fs::remove_file(&path).ok(); + } +} diff --git a/src/main.rs b/src/main.rs index 0c9d423..4ec3990 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,10 +3,12 @@ mod cli; mod config; mod coordinator; mod hotkey; +mod ipc; mod model_cache; mod overlay; mod paste; mod recorder; +mod shared_state; mod transcriber; mod vad; diff --git a/src/model_cache.rs b/src/model_cache.rs index 6678e48..5249060 100644 --- a/src/model_cache.rs +++ b/src/model_cache.rs @@ -82,6 +82,23 @@ pub fn ensure_model(model_name: &str) -> Result { }) } +/// Ensure the Silero VAD model is downloaded and return its path. +pub fn ensure_vad_model() -> Result { + let repo_id = "onnx-community/silero-vad"; + let model_file = "onnx/model.onnx"; + + let api = Api::new().context("Failed to create HuggingFace Hub API")?; + let repo = api.model(repo_id.to_string()); + + info!("Ensuring Silero VAD model from {repo_id}"); + let path = repo + .get(model_file) + .with_context(|| format!("Failed to download VAD model from {repo_id}"))?; + debug!("VAD model: {}", path.display()); + + Ok(path) +} + /// Check if model files are already cached. pub fn is_model_cached(model_name: &str) -> bool { ensure_model(model_name).is_ok() diff --git a/src/overlay.rs b/src/overlay.rs index 69e428a..6383576 100644 --- a/src/overlay.rs +++ b/src/overlay.rs @@ -8,8 +8,8 @@ use winit::window::{Window, WindowAttributes, WindowId, WindowLevel}; use crate::config::OverlayPosition; -const OVERLAY_WIDTH: u32 = 200; -const OVERLAY_HEIGHT: u32 = 36; +const OVERLAY_WIDTH: u32 = 150; +const OVERLAY_HEIGHT: u32 = 18; /// State of the overlay display. #[derive(Debug, Clone, Copy, PartialEq)] @@ -34,6 +34,8 @@ struct OverlayApp { surface: Option, std::rc::Rc>>, state: OverlayState, position: OverlayPosition, + _tray_icon: Option, + tray_exit_id: Option, } impl OverlayApp { @@ -99,6 +101,43 @@ impl OverlayApp { window.set_visible(visible); } } + + fn create_tray_icon(&mut self) { + use tray_icon::menu::{Menu, MenuItem}; + use tray_icon::TrayIconBuilder; + + let menu = Menu::new(); + let exit_item = MenuItem::new("Exit", true, None); + let exit_id = exit_item.id().clone(); + if let Err(e) = menu.append(&exit_item) { + warn!("Failed to add tray menu item: {e}"); + return; + } + + let icon = match load_tray_icon() { + Ok(i) => i, + Err(e) => { + warn!("Failed to load tray icon: {e}"); + return; + } + }; + + match TrayIconBuilder::new() + .with_menu(Box::new(menu)) + .with_tooltip("Mouth — Speech to Text") + .with_icon(icon) + .build() + { + Ok(tray) => { + info!("System tray icon created"); + self._tray_icon = Some(tray); + self.tray_exit_id = Some(exit_id); + } + Err(e) => { + warn!("Failed to create tray icon: {e}"); + } + } + } } impl ApplicationHandler for OverlayApp { @@ -154,6 +193,9 @@ impl ApplicationHandler for OverlayApp { error!("Failed to create overlay window: {e}"); } } + + // Create tray icon (must be done on the main/event-loop thread) + self.create_tray_icon(); } fn user_event(&mut self, event_loop: &ActiveEventLoop, event: OverlayEvent) { @@ -176,6 +218,99 @@ impl ApplicationHandler for OverlayApp { self.draw(); } } + + fn about_to_wait(&mut self, event_loop: &ActiveEventLoop) { + // Poll tray menu events + if let Some(exit_id) = &self.tray_exit_id { + if let Ok(event) = tray_icon::menu::MenuEvent::receiver().try_recv() { + if event.id() == exit_id { + info!("Exit requested via tray icon"); + crate::ipc::cleanup(); + event_loop.exit(); + } + } + } + } +} + +fn load_tray_icon() -> Result> { + const S: u32 = 32; + let mut pixels = vec![0u8; (S * S * 4) as usize]; + + let cx = S as f32 / 2.0; + + for y in 0..S { + for x in 0..S { + let fx = x as f32 + 0.5; + let fy = y as f32 + 0.5; + let idx = ((y * S + x) * 4) as usize; + + let mut alpha: f32 = 0.0; + + // Microphone body: rounded rectangle (capsule shape) + // Center x=16, from y=3 to y=18, radius 5 + let mic_top = 3.0; + let mic_bot = 18.0; + let mic_r = 5.5; + let mic_cx = cx; + { + let dy = fy.clamp(mic_top + mic_r, mic_bot - mic_r); + let dist = ((fx - mic_cx).powi(2) + (fy - dy).powi(2)).sqrt(); + if dist <= mic_r { + alpha = 1.0; + } else if dist <= mic_r + 1.0 { + alpha = alpha.max(mic_r + 1.0 - dist); // anti-alias + } + } + + // Cradle arc: U-shape below mic, from y=14 to y=22 + { + let arc_cy = 14.0; + let arc_r = 8.5; + let arc_thickness = 2.2; + let dx = fx - cx; + let dy = fy - arc_cy; + let dist = (dx * dx + dy * dy).sqrt(); + if fy >= arc_cy && dist >= arc_r - arc_thickness / 2.0 && dist <= arc_r + arc_thickness / 2.0 { + let edge_outer = (arc_r + arc_thickness / 2.0 - dist).min(1.0).max(0.0); + let edge_inner = (dist - (arc_r - arc_thickness / 2.0)).min(1.0).max(0.0); + alpha = alpha.max(edge_outer.min(edge_inner)); + } + } + + // Stem: vertical line from arc bottom to near bottom + { + let stem_top = 22.0; + let stem_bot = 27.0; + let stem_w = 1.2; + if fy >= stem_top && fy <= stem_bot && (fx - cx).abs() <= stem_w { + let edge = (stem_w - (fx - cx).abs()).min(1.0); + alpha = alpha.max(edge); + } + } + + // Base: horizontal line at bottom + { + let base_y = 27.0; + let base_h = 2.0; + let base_hw = 5.0; + if fy >= base_y && fy <= base_y + base_h && (fx - cx).abs() <= base_hw { + let edge = (base_hw - (fx - cx).abs()).min(1.0); + alpha = alpha.max(edge); + } + } + + let a = (alpha.clamp(0.0, 1.0) * 255.0) as u8; + // White icon with alpha (looks good on both light and dark taskbars) + pixels[idx] = 255; // R + pixels[idx + 1] = 255; // G + pixels[idx + 2] = 255; // B + pixels[idx + 3] = a; // A + } + } + + let icon = tray_icon::Icon::from_rgba(pixels, S, S)?; + Ok(icon) } /// Create an event loop and return the proxy for sending events. @@ -195,6 +330,8 @@ pub fn run_event_loop( surface: None, state: OverlayState::Hidden, position, + _tray_icon: None, + tray_exit_id: None, }; event_loop.run_app(&mut app) diff --git a/src/recorder.rs b/src/recorder.rs index 2be6932..2f4525d 100644 --- a/src/recorder.rs +++ b/src/recorder.rs @@ -7,6 +7,9 @@ use std::sync::{Arc, Mutex}; use tracing::{debug, error, info, warn}; const TARGET_SAMPLE_RATE: u32 = 16000; +/// Silence prepended to recordings to give the model a clean lead-in, +/// compensating for mic startup latency. +const LEAD_IN_MS: u32 = 300; /// Commands sent to the recorder. #[derive(Debug)] @@ -252,8 +255,13 @@ pub fn run( debug!("Resampled to {} samples at {}Hz", samples.len(), TARGET_SAMPLE_RATE); + // Prepend silence to compensate for mic startup latency + let lead_in_samples = (TARGET_SAMPLE_RATE * LEAD_IN_MS / 1000) as usize; + let mut padded = vec![0.0f32; lead_in_samples]; + padded.extend_from_slice(&samples); + let audio = AudioData { - samples, + samples: padded, sample_rate: TARGET_SAMPLE_RATE, }; diff --git a/src/shared_state.rs b/src/shared_state.rs new file mode 100644 index 0000000..57b6ae2 --- /dev/null +++ b/src/shared_state.rs @@ -0,0 +1,35 @@ +use std::sync::RwLock; +use std::time::Instant; + +/// Thread-safe shared state accessible by the coordinator, IPC listener, and tray icon. +pub struct SharedState { + pub state: RwLock, + pub model: String, + pub accelerator: String, + pub started_at: Instant, +} + +impl SharedState { + pub fn new(model: String, accelerator: String) -> Self { + Self { + state: RwLock::new("idle".to_string()), + model, + accelerator, + started_at: Instant::now(), + } + } + + pub fn set_state(&self, state: &str) { + if let Ok(mut s) = self.state.write() { + *s = state.to_string(); + } + } + + pub fn get_state(&self) -> String { + self.state.read().map(|s| s.clone()).unwrap_or_else(|_| "unknown".to_string()) + } + + pub fn uptime_secs(&self) -> u64 { + self.started_at.elapsed().as_secs() + } +} diff --git a/src/transcriber.rs b/src/transcriber.rs index 6e7b6d8..f99eebe 100644 --- a/src/transcriber.rs +++ b/src/transcriber.rs @@ -22,7 +22,7 @@ pub struct Transcriber { encoder: Session, decoder: Session, vocab: Vec, - blank_id: i64, + blank_id: i32, vocab_size: usize, } @@ -45,7 +45,7 @@ impl Transcriber { let vocab = load_vocab(&paths.vocab)?; let vocab_size = vocab.len(); - let blank_id = (vocab_size - 1) as i64; // is the last token + let blank_id = (vocab_size - 1) as i32; // is the last token info!("Vocab loaded: {vocab_size} tokens, blank_id={blank_id}"); Ok(Self { @@ -121,7 +121,7 @@ impl Transcriber { Ok((enc_data.to_vec(), feat_dim, encoded_length)) } - fn tdt_greedy_decode(&mut self, encoder_output: &[f32], feat_dim: usize, encoded_length: usize) -> Result> { + fn tdt_greedy_decode(&mut self, encoder_output: &[f32], feat_dim: usize, encoded_length: usize) -> Result> { // Determine decoder LSTM state dimensions by inspecting input metadata // Default fallback values let mut state_shape: [usize; 3] = [1, 1, 640]; @@ -168,7 +168,7 @@ impl Transcriber { let frame = Array3::from_shape_vec([1, feat_dim, 1], frame_data)?; let targets = ndarray::Array2::from_shape_vec((1, 1), vec![prev_token])?; - let target_length = ndarray::Array1::from_vec(vec![1i64]); + let target_length = ndarray::Array1::from_vec(vec![1i32]); let outputs = self.decoder.run(vec![ make_input("encoder_outputs", Value::from_array(frame)?.into_dyn()), @@ -186,7 +186,7 @@ impl Transcriber { let token_logits = &output_data[..self.vocab_size]; let duration_logits = &output_data[self.vocab_size..]; - let token_id = argmax(token_logits) as i64; + let token_id = argmax(token_logits) as i32; let duration = if !duration_logits.is_empty() { argmax(duration_logits) } else { @@ -225,7 +225,7 @@ impl Transcriber { Ok(tokens) } - fn tokens_to_text(&self, tokens: &[i64]) -> String { + fn tokens_to_text(&self, tokens: &[i32]) -> String { let mut text = String::new(); for &token_id in tokens { if token_id >= 0 && (token_id as usize) < self.vocab.len() {