diff --git a/.gitignore b/.gitignore index c9e5566..f71fe57 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,8 @@ coverage.html # skips paths beginning with _ or ., but ignore explicitly so nothing # checked in here can leak into a release tarball. /_diag/ + +# Dev-only one-shot binaries (cmd/_*) — never shipped. Go's build +# tooling already skips paths starting with _, but ignore explicitly +# so an accidental `git add cmd/.` can't sneak them into a release. +/cmd/_*/ diff --git a/cmd/server/main.go b/cmd/server/main.go index a083a6d..cb3a207 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -12,8 +12,10 @@ import ( "syscall" "time" + "gitea.dcglab.co.uk/steve/restic-manager/internal/alert" "gitea.dcglab.co.uk/steve/restic-manager/internal/auth" "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" "gitea.dcglab.co.uk/steve/restic-manager/internal/server/config" rmhttp "gitea.dcglab.co.uk/steve/restic-manager/internal/server/http" "gitea.dcglab.co.uk/steve/restic-manager/internal/server/maintenance" @@ -82,19 +84,24 @@ func run() error { hub := ws.NewHub() jobHub := ws.NewJobHub() + notifHub := notification.NewHub(st, aead, cfg.BaseURL) + alertEngine := alert.NewEngine(st, notifHub) + renderer, err := ui.New() if err != nil { return fmt.Errorf("ui: %w", err) } deps := rmhttp.Deps{ - Cfg: cfg, - Store: st, - AEAD: aead, - Hub: hub, - JobHub: jobHub, - UI: renderer, - Version: version, + Cfg: cfg, + Store: st, + AEAD: aead, + Hub: hub, + JobHub: jobHub, + AlertEngine: alertEngine, + NotificationHub: notifHub, + UI: renderer, + Version: version, } // First-run bootstrap: if the users table is empty, mint a one-time @@ -126,6 +133,8 @@ func run() error { ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer stop() + go alertEngine.Run(ctx) + errCh := make(chan error, 1) go func() { slog.Info("server listening", "addr", cfg.Listen, "version", version) @@ -175,8 +184,11 @@ func run() error { } case <-offlineTick.C: cutoff := time.Now().Add(-90 * time.Second) - if n, err := st.MarkHostsOfflineStale(ctx, cutoff); err == nil && n > 0 { - slog.Info("marked hosts offline (stale heartbeat)", "n", n) + if ids, err := st.MarkHostsOfflineStaleReturnIDs(ctx, cutoff); err == nil && len(ids) > 0 { + slog.Info("marked hosts offline (stale heartbeat)", "n", len(ids)) + for _, id := range ids { + alertEngine.NotifyHostOffline(id) + } } case <-pendingDrainTick.C: srv.DrainAllDue(ctx) diff --git a/docs/superpowers/plans/2026-05-04-p3-alerts.md b/docs/superpowers/plans/2026-05-04-p3-alerts.md new file mode 100644 index 0000000..63dee93 --- /dev/null +++ b/docs/superpowers/plans/2026-05-04-p3-alerts.md @@ -0,0 +1,3410 @@ +# P3 Alerts Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Build the alerts subsystem (engine + three notification channels + UI) per `docs/superpowers/specs/2026-05-04-p3-alerts-design.md`. End state: a hardcoded six-rule engine raises alerts on real events; webhook / ntfy / SMTP channels notify on raise/ack/resolve; operators see alerts at `/alerts` and configure channels at `/settings/notifications`. + +**Architecture:** Three loosely-coupled units behind one `AlertEngine` goroutine — event hooks fed by existing call sites (MarkJobFinished, offline sweeper, ws hello), 60s ticker for stale-schedule + auto-resolution, fan-out via `notification.Hub`. All persisted state in two new tables (`notification_channels`, `notification_log`) plus one new column on the existing `alerts` table. + +**Tech Stack:** Go 1.25, modernc.org/sqlite, chi router, html/template, AEAD-encrypted blobs (existing `crypto.AEAD`), `net/smtp` + `crypto/tls` for SMTP, `net/http` for webhook + ntfy. + +--- + +## File Structure + +| File | Status | Purpose | +| --- | --- | --- | +| `internal/store/migrations/0013_alerts_last_seen.sql` | Create | Adds `alerts.last_seen_at` column. | +| `internal/store/migrations/0014_notifications.sql` | Create | New `notification_channels` + `notification_log` tables. | +| `internal/store/alerts.go` | Modify | Existing file ships the `Alert` type only. Add `RaiseOrTouch`, `Acknowledge`, `Resolve`, `AutoResolve`, `ListAlerts`, `GetAlert`. | +| `internal/store/notification_channels.go` | Create | CRUD for `notification_channels` (encrypted config blob), `AppendNotificationLog`. | +| `internal/notification/payload.go` | Create | `Event` enum + `Payload` struct shared across channels. | +| `internal/notification/channel.go` | Create | `Channel` interface; helpers (build link, etc). | +| `internal/notification/webhook.go` | Create | Webhook impl (HTTP POST + bearer + custom header). | +| `internal/notification/ntfy.go` | Create | Ntfy impl (POST with Title/Priority/Tags/Click). | +| `internal/notification/smtp.go` | Create | SMTP impl using `net/smtp` + `crypto/tls`. | +| `internal/notification/hub.go` | Create | Per-event fan-out across enabled channels; logs results. | +| `internal/alert/engine.go` | Create | Goroutine, event channels, ticker, rule dispatch. | +| `internal/alert/rules.go` | Create | Rule registry + per-rule logic for the six rules. | +| `internal/server/http/ui_alerts.go` | Create | `/alerts` GET + `acknowledge` / `resolve` POST handlers. | +| `internal/server/http/ui_notifications.go` | Create | `/settings/notifications` CRUD + `POST /api/notifications/{id}/test`. | +| `internal/server/http/server.go` | Modify | Wire the new routes; add `Engine` to `Deps`. | +| `internal/server/ui/ui.go` | Modify | Add `alerts.html`, `notifications.html`, `notification_edit.html`, `settings.html`, `partials/alert_row.html`, `partials/crit_banner.html` to commonPaths. | +| `web/templates/pages/alerts.html` | Create | Fleet alerts list + filter strip. | +| `web/templates/pages/settings.html` | Create | Settings shell with sub-tabs (Notifications / Users / Auth). | +| `web/templates/pages/notifications.html` | Create | Channel list (Notifications sub-tab). | +| `web/templates/pages/notification_edit.html` | Create | Channel kind picker + per-kind form + test result + payload preview. | +| `web/templates/partials/alert_row.html` | Create | One alert row (used standalone + on swap). | +| `web/templates/partials/crit_banner.html` | Create | Dashboard-top critical banner. | +| `web/templates/pages/dashboard.html` | Modify | Render the crit banner partial. | +| `web/templates/partials/nav.html` | Modify | Show alert count badge on Alerts tab. | +| `cmd/server/main.go` | Modify | Construct `alert.Engine` + `notification.Hub` + start engine goroutine. | +| `internal/server/ws/handler.go` | Modify | Hook `engine.NotifyHostOnline` on hello + `NotifyJobFinished` after MarkJobFinished. | +| `tasks.md` | Modify | Tick P3-05/06/07 with as-shipped notes. | + +Tests live in `_test.go` files alongside the source (existing convention). + +--- + +## Slice A — Schema groundwork + +### Task A1: Migration 0013 — `alerts.last_seen_at` + +**Files:** +- Create: `internal/store/migrations/0013_alerts_last_seen.sql` +- Test: `internal/store/migrate_test.go` (existing — gains a small assertion) + +- [ ] **Step 1: Write the failing test** + +Append to `internal/store/migrate_test.go`: + +```go +func TestMigration0013AlertsLastSeen(t *testing.T) { + t.Parallel() + dir := t.TempDir() + st, err := Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + defer st.Close() + + // Column must exist after migration. Best signal: PRAGMA table_info. + rows, err := st.DB().Query(`SELECT name FROM pragma_table_info('alerts')`) + if err != nil { + t.Fatalf("pragma: %v", err) + } + defer rows.Close() + cols := map[string]bool{} + for rows.Next() { + var n string + _ = rows.Scan(&n) + cols[n] = true + } + if !cols["last_seen_at"] { + t.Fatalf("alerts.last_seen_at not present after migration; cols=%v", cols) + } +} +``` + +- [ ] **Step 2: Run to verify it fails** + +```sh +go test ./internal/store/ -run TestMigration0013AlertsLastSeen -count=1 +``` +Expected: FAIL — `alerts.last_seen_at not present`. + +- [ ] **Step 3: Write the migration** + +`internal/store/migrations/0013_alerts_last_seen.sql`: + +```sql +-- 0013_alerts_last_seen.sql +-- +-- Add alerts.last_seen_at to support open-alert dedup with +-- recurrence-tracking. The engine bumps this column on every tick +-- where a rule still matches an existing open alert, so the UI can +-- render "still happening · Ns ago" without sending a fresh +-- notification. +-- +-- Column-level ALTER per CLAUDE.md (no rebuild — alerts has inbound +-- FK from acknowledged_by → users; rebuild would risk cascade). +-- Backfill last_seen_at = created_at for any pre-existing rows so +-- the column is non-null in practice (stays nullable in the schema +-- for forwards-compat with rows that haven't been touched yet). + +ALTER TABLE alerts ADD COLUMN last_seen_at TEXT; +UPDATE alerts SET last_seen_at = created_at WHERE last_seen_at IS NULL; +``` + +- [ ] **Step 4: Run test to verify it passes** + +```sh +go test ./internal/store/ -run TestMigration0013AlertsLastSeen -count=1 +``` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```sh +git add internal/store/migrations/0013_alerts_last_seen.sql internal/store/migrate_test.go +git commit -m "store: migration 0013 — alerts.last_seen_at" +``` + +--- + +### Task A2: Migration 0014 — `notification_channels` + `notification_log` + +**Files:** +- Create: `internal/store/migrations/0014_notifications.sql` +- Test: `internal/store/migrate_test.go` + +- [ ] **Step 1: Append failing test** + +```go +func TestMigration0014NotificationsTables(t *testing.T) { + t.Parallel() + dir := t.TempDir() + st, err := Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + defer st.Close() + + for _, want := range []string{"notification_channels", "notification_log"} { + var n int + if err := st.DB().QueryRow( + `SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?`, want, + ).Scan(&n); err != nil { + t.Fatalf("scan: %v", err) + } + if n != 1 { + t.Errorf("table %q missing after migration", want) + } + } + + // Sanity: kind CHECK accepts all three v1 kinds. + for _, k := range []string{"webhook", "ntfy", "smtp"} { + _, err := st.DB().Exec( + `INSERT INTO notification_channels (id, kind, name, config, created_at, updated_at) + VALUES (?, ?, ?, x'00', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')`, + "test-"+k, k, "test-"+k) + if err != nil { + t.Errorf("insert %q rejected by CHECK: %v", k, err) + } + } +} +``` + +- [ ] **Step 2: Run to verify it fails** + +```sh +go test ./internal/store/ -run TestMigration0014NotificationsTables -count=1 +``` +Expected: FAIL — both tables missing. + +- [ ] **Step 3: Write the migration** + +`internal/store/migrations/0014_notifications.sql`: + +```sql +-- 0014_notifications.sql +-- +-- Notification channels (operator-configured destinations: webhook, +-- ntfy, SMTP) and the dispatch log. Both are net-new — no rebuild +-- pattern needed. +-- +-- config is an AEAD-encrypted JSON blob. Per-kind shape lives in +-- internal/notification/{webhook,ntfy,smtp}.go. The CHECK keeps wire +-- consistency — adding a new kind requires a follow-up migration +-- (forces the implementer to think about it). + +CREATE TABLE notification_channels ( + id TEXT PRIMARY KEY, + kind TEXT NOT NULL CHECK (kind IN ('webhook', 'ntfy', 'smtp')), + name TEXT NOT NULL, + enabled INTEGER NOT NULL DEFAULT 1 CHECK (enabled IN (0, 1)), + config BLOB NOT NULL, -- AEAD-encrypted JSON; per-kind shape + default_priority TEXT, -- ntfy only; null for webhook + smtp + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + last_fired_at TEXT +); + +CREATE INDEX notification_channels_enabled + ON notification_channels(enabled) WHERE enabled = 1; + +CREATE TABLE notification_log ( + id TEXT PRIMARY KEY, + channel_id TEXT NOT NULL REFERENCES notification_channels(id) ON DELETE CASCADE, + alert_id TEXT REFERENCES alerts(id) ON DELETE SET NULL, + event TEXT NOT NULL, -- alert.raised | alert.acknowledged | alert.resolved | alert.test + ok INTEGER NOT NULL CHECK (ok IN (0, 1)), + status_code INTEGER, + latency_ms INTEGER, + error TEXT, + fired_at TEXT NOT NULL +); + +CREATE INDEX notification_log_channel + ON notification_log(channel_id, fired_at DESC); +CREATE INDEX notification_log_alert + ON notification_log(alert_id); +``` + +- [ ] **Step 4: Run test to verify it passes** + +```sh +go test ./internal/store/ -run TestMigration0014NotificationsTables -count=1 +``` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```sh +git add internal/store/migrations/0014_notifications.sql internal/store/migrate_test.go +git commit -m "store: migration 0014 — notification_channels + notification_log" +``` + +--- + +### Task A3: Alerts store API — `RaiseOrTouch`, `Acknowledge`, `Resolve`, `AutoResolve`, `ListAlerts`, `GetAlert` + +**Files:** +- Modify: `internal/store/alerts.go` +- Test: `internal/store/alerts_test.go` (create) +- Modify: `internal/store/types.go` (extend `Alert` with `LastSeenAt *time.Time` — check current shape first) + +- [ ] **Step 1: Extend the Alert type** + +Read `internal/store/types.go` for the existing `Alert` struct. Add `LastSeenAt *time.Time` after `CreatedAt`. The whole struct should look like: + +```go +type Alert struct { + ID string + HostID *string + Kind string + Severity string + Message string + CreatedAt time.Time + LastSeenAt *time.Time + AcknowledgedAt *time.Time + AcknowledgedBy *string + ResolvedAt *time.Time +} +``` + +- [ ] **Step 2: Write the failing test** + +`internal/store/alerts_test.go`: + +```go +package store + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/oklog/ulid/v2" +) + +func newTestStoreWithHost(t *testing.T) (*Store, string) { + t.Helper() + dir := t.TempDir() + st, err := Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + t.Cleanup(func() { _ = st.Close() }) + hostID := ulid.Make().String() + if err := st.CreateHost(context.Background(), Host{ + ID: hostID, Name: "h", OS: "linux", Arch: "amd64", + EnrolledAt: time.Now().UTC(), + }, "deadbeef", ""); err != nil { + t.Fatalf("create host: %v", err) + } + return st, hostID +} + +func TestRaiseOrTouchInsertsThenTouches(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + + t0 := time.Now().UTC() + id1, didRaise, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", + "Backup failed: 401", t0) + if err != nil { + t.Fatalf("first raise: %v", err) + } + if !didRaise { + t.Fatalf("first call must didRaise=true") + } + if id1 == "" { + t.Fatalf("expected non-empty id") + } + + // Second call within the same open window should touch, not insert. + t1 := t0.Add(60 * time.Second) + id2, didRaise2, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", + "Backup failed: 401 (still)", t1) + if err != nil { + t.Fatalf("touch: %v", err) + } + if didRaise2 { + t.Fatalf("second call must didRaise=false") + } + if id2 != id1 { + t.Fatalf("touch returned a different id: got %q want %q", id2, id1) + } + + // last_seen_at and message must be updated. + got, err := st.GetAlert(ctx, id1) + if err != nil { + t.Fatalf("get: %v", err) + } + if got.LastSeenAt == nil || !got.LastSeenAt.Equal(t1) { + t.Errorf("last_seen_at: got %v want %v", got.LastSeenAt, t1) + } + if got.Message != "Backup failed: 401 (still)" { + t.Errorf("message not refreshed: %q", got.Message) + } +} + +func TestResolveAndReRaiseStartsFreshAlert(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + + t0 := time.Now().UTC() + id1, _, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "first", t0) + if err != nil { + t.Fatalf("raise: %v", err) + } + if err := st.Resolve(ctx, id1, t0.Add(time.Minute)); err != nil { + t.Fatalf("resolve: %v", err) + } + + id2, didRaise, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "second", t0.Add(2*time.Minute)) + if err != nil { + t.Fatalf("re-raise: %v", err) + } + if !didRaise { + t.Fatalf("post-resolve raise must didRaise=true") + } + if id2 == id1 { + t.Fatalf("re-raise reused the resolved id; want a fresh row") + } +} + +func TestAcknowledgeKeepsAlertOpen(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + + id, _, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "m", time.Now().UTC()) + if err != nil { + t.Fatalf("raise: %v", err) + } + userID := "u-1" + if err := st.Acknowledge(ctx, id, userID, time.Now().UTC()); err != nil { + t.Fatalf("ack: %v", err) + } + got, err := st.GetAlert(ctx, id) + if err != nil { + t.Fatalf("get: %v", err) + } + if got.AcknowledgedAt == nil { + t.Errorf("acknowledged_at not set") + } + if got.AcknowledgedBy == nil || *got.AcknowledgedBy != userID { + t.Errorf("acknowledged_by: got %v want %q", got.AcknowledgedBy, userID) + } + if got.ResolvedAt != nil { + t.Errorf("ack must not set resolved_at; got %v", got.ResolvedAt) + } +} + +func TestAutoResolveClearsOpenAlerts(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + + t0 := time.Now().UTC() + id, _, _ := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "m", t0) + if err := st.AutoResolve(ctx, hostID, "backup_failed", t0.Add(time.Minute)); err != nil { + t.Fatalf("auto-resolve: %v", err) + } + got, _ := st.GetAlert(ctx, id) + if got.ResolvedAt == nil { + t.Errorf("expected resolved_at set") + } +} + +func TestListAlertsFilters(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + t0 := time.Now().UTC() + + // One open warning + one resolved info. + _, _, _ = st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "open", t0) + id2, _, _ := st.RaiseOrTouch(ctx, hostID, "stale_schedule", "info", "done", t0) + _ = st.Resolve(ctx, id2, t0.Add(time.Minute)) + + open, err := st.ListAlerts(ctx, AlertFilter{Status: "open"}) + if err != nil { + t.Fatalf("list open: %v", err) + } + if len(open) != 1 || open[0].Severity != "warning" { + t.Errorf("open filter: got %+v", open) + } + + all, err := st.ListAlerts(ctx, AlertFilter{Status: "all"}) + if err != nil { + t.Fatalf("list all: %v", err) + } + if len(all) != 2 { + t.Errorf("all filter: got %d, want 2", len(all)) + } +} +``` + +- [ ] **Step 3: Run to verify it fails** + +```sh +go test ./internal/store/ -run "TestRaiseOrTouchInsertsThenTouches|TestResolveAndReRaiseStartsFreshAlert|TestAcknowledgeKeepsAlertOpen|TestAutoResolveClearsOpenAlerts|TestListAlertsFilters" -count=1 +``` +Expected: FAIL — methods don't exist yet. + +- [ ] **Step 4: Implement** + +Append to `internal/store/alerts.go`: + +```go +// AlertFilter narrows ListAlerts. +type AlertFilter struct { + Status string // "open" | "acknowledged" | "resolved" | "all" | "" + Severity string // "info" | "warning" | "critical" | "" + HostID string // empty = any host + Search string // substring match on message + Limit int // 0 = no limit +} + +// RaiseOrTouch implements the dedup + last_seen_at bump pattern. If +// an alert with (host_id, kind, resolved_at IS NULL) already exists, +// it touches last_seen_at + message and returns (id, false). Otherwise +// inserts a fresh row and returns (id, true). Caller fires a +// notification only when didRaise=true. +func (s *Store) RaiseOrTouch(ctx context.Context, hostID, kind, severity, message string, when time.Time) (id string, didRaise bool, err error) { + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return "", false, fmt.Errorf("store: begin: %w", err) + } + defer func() { _ = tx.Rollback() }() + + row := tx.QueryRowContext(ctx, + `SELECT id FROM alerts WHERE host_id = ? AND kind = ? AND resolved_at IS NULL LIMIT 1`, + hostID, kind) + var existing string + switch err := row.Scan(&existing); { + case err == nil: + _, uerr := tx.ExecContext(ctx, + `UPDATE alerts SET last_seen_at = ?, message = ? WHERE id = ?`, + when.UTC().Format(time.RFC3339Nano), message, existing) + if uerr != nil { + return "", false, fmt.Errorf("store: touch alert: %w", uerr) + } + if err := tx.Commit(); err != nil { + return "", false, err + } + return existing, false, nil + case errors.Is(err, sql.ErrNoRows): + // fall through to insert + default: + return "", false, fmt.Errorf("store: lookup alert: %w", err) + } + + id = ulid.Make().String() + whenStr := when.UTC().Format(time.RFC3339Nano) + _, err = tx.ExecContext(ctx, + `INSERT INTO alerts (id, host_id, kind, severity, message, created_at, last_seen_at) + VALUES (?, ?, ?, ?, ?, ?, ?)`, + id, hostID, kind, severity, message, whenStr, whenStr) + if err != nil { + return "", false, fmt.Errorf("store: insert alert: %w", err) + } + if err := tx.Commit(); err != nil { + return "", false, err + } + return id, true, nil +} + +// Acknowledge sets acknowledged_at + acknowledged_by; does NOT set +// resolved_at. Idempotent — re-acknowledging just refreshes the timestamp. +func (s *Store) Acknowledge(ctx context.Context, id, userID string, when time.Time) error { + res, err := s.db.ExecContext(ctx, + `UPDATE alerts SET acknowledged_at = ?, acknowledged_by = ? + WHERE id = ? AND resolved_at IS NULL`, + when.UTC().Format(time.RFC3339Nano), userID, id) + if err != nil { + return fmt.Errorf("store: ack alert: %w", err) + } + n, _ := res.RowsAffected() + if n == 0 { + return ErrNotFound + } + return nil +} + +// Resolve marks the alert resolved. Idempotent on already-resolved rows +// (no-op). +func (s *Store) Resolve(ctx context.Context, id string, when time.Time) error { + _, err := s.db.ExecContext(ctx, + `UPDATE alerts SET resolved_at = ? + WHERE id = ? AND resolved_at IS NULL`, + when.UTC().Format(time.RFC3339Nano), id) + if err != nil { + return fmt.Errorf("store: resolve alert: %w", err) + } + return nil +} + +// AutoResolve closes every open alert for the (host_id, kind) pair. +// Used by the engine when a rule's underlying condition clears (e.g. +// next backup succeeded so backup_failed clears). +func (s *Store) AutoResolve(ctx context.Context, hostID, kind string, when time.Time) error { + _, err := s.db.ExecContext(ctx, + `UPDATE alerts SET resolved_at = ? + WHERE host_id = ? AND kind = ? AND resolved_at IS NULL`, + when.UTC().Format(time.RFC3339Nano), hostID, kind) + if err != nil { + return fmt.Errorf("store: auto-resolve: %w", err) + } + return nil +} + +// GetAlert reads one row. +func (s *Store) GetAlert(ctx context.Context, id string) (*Alert, error) { + row := s.db.QueryRowContext(ctx, + `SELECT id, host_id, kind, severity, message, created_at, last_seen_at, + acknowledged_at, acknowledged_by, resolved_at + FROM alerts WHERE id = ?`, id) + return scanAlert(row.Scan) +} + +// ListAlerts is the filtered list. Sort: open-first, then by created_at desc. +func (s *Store) ListAlerts(ctx context.Context, f AlertFilter) ([]Alert, error) { + q := `SELECT id, host_id, kind, severity, message, created_at, last_seen_at, + acknowledged_at, acknowledged_by, resolved_at FROM alerts` + conds := []string{} + args := []any{} + switch f.Status { + case "open": + conds = append(conds, "resolved_at IS NULL AND acknowledged_at IS NULL") + case "acknowledged": + conds = append(conds, "resolved_at IS NULL AND acknowledged_at IS NOT NULL") + case "resolved": + conds = append(conds, "resolved_at IS NOT NULL") + case "all", "": + // no-op + } + if f.Severity != "" { + conds = append(conds, "severity = ?") + args = append(args, f.Severity) + } + if f.HostID != "" { + conds = append(conds, "host_id = ?") + args = append(args, f.HostID) + } + if f.Search != "" { + conds = append(conds, "message LIKE ?") + args = append(args, "%"+f.Search+"%") + } + if len(conds) > 0 { + q += " WHERE " + strings.Join(conds, " AND ") + } + q += ` ORDER BY (resolved_at IS NULL) DESC, created_at DESC` + if f.Limit > 0 { + q += ` LIMIT ?` + args = append(args, f.Limit) + } + rows, err := s.db.QueryContext(ctx, q, args...) + if err != nil { + return nil, fmt.Errorf("store: list alerts: %w", err) + } + defer func() { _ = rows.Close() }() + var out []Alert + for rows.Next() { + a, err := scanAlert(rows.Scan) + if err != nil { + return nil, err + } + out = append(out, *a) + } + return out, rows.Err() +} + +// scanAlert centralises the column read so the GetAlert and +// ListAlerts paths agree on column order. Pass row.Scan or rows.Scan. +func scanAlert(scan func(...any) error) (*Alert, error) { + var a Alert + var hostID, lastSeen, ackedAt, ackedBy, resolvedAt sql.NullString + var createdAt string + if err := scan(&a.ID, &hostID, &a.Kind, &a.Severity, &a.Message, + &createdAt, &lastSeen, &ackedAt, &ackedBy, &resolvedAt); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("store: scan alert: %w", err) + } + if hostID.Valid { + v := hostID.String + a.HostID = &v + } + t, err := time.Parse(time.RFC3339Nano, createdAt) + if err != nil { + return nil, fmt.Errorf("store: parse created_at: %w", err) + } + a.CreatedAt = t + if lastSeen.Valid { + t, _ := time.Parse(time.RFC3339Nano, lastSeen.String) + a.LastSeenAt = &t + } + if ackedAt.Valid { + t, _ := time.Parse(time.RFC3339Nano, ackedAt.String) + a.AcknowledgedAt = &t + } + if ackedBy.Valid { + v := ackedBy.String + a.AcknowledgedBy = &v + } + if resolvedAt.Valid { + t, _ := time.Parse(time.RFC3339Nano, resolvedAt.String) + a.ResolvedAt = &t + } + return &a, nil +} +``` + +Add the imports if missing: `database/sql`, `errors`, `fmt`, `strings`, `time`, plus `github.com/oklog/ulid/v2`. + +- [ ] **Step 5: Run tests to verify they pass** + +```sh +go test ./internal/store/ -count=1 -timeout=30s +``` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```sh +git add internal/store/alerts.go internal/store/alerts_test.go internal/store/types.go +git commit -m "store: alerts CRUD with dedup + last_seen_at bump" +``` + +--- + +### Task A4: Notification-channels store API + log writer + +**Files:** +- Create: `internal/store/notification_channels.go` +- Test: `internal/store/notification_channels_test.go` + +- [ ] **Step 1: Define types in this file** + +```go +package store + +import ( + "context" + "database/sql" + "errors" + "fmt" + "time" +) + +// NotificationChannel mirrors a row in notification_channels. The +// Config field is the AEAD-encrypted JSON blob; callers (in the +// notification package) decrypt before use. +type NotificationChannel struct { + ID string + Kind string // "webhook" | "ntfy" | "smtp" + Name string + Enabled bool + Config []byte // AEAD ciphertext; opaque at this layer + DefaultPriority *string + CreatedAt time.Time + UpdatedAt time.Time + LastFiredAt *time.Time +} + +// NotificationLogEntry is one row in notification_log. +type NotificationLogEntry struct { + ID string + ChannelID string + AlertID *string + Event string // alert.raised | alert.acknowledged | alert.resolved | alert.test + OK bool + StatusCode *int + LatencyMS *int + Error *string + FiredAt time.Time +} +``` + +- [ ] **Step 2: Write the failing test** + +```go +package store + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/oklog/ulid/v2" +) + +func TestNotificationChannelCRUD(t *testing.T) { + t.Parallel() + dir := t.TempDir() + st, err := Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + defer st.Close() + ctx := context.Background() + + ch := NotificationChannel{ + ID: ulid.Make().String(), Kind: "webhook", Name: "team-slack", + Enabled: true, Config: []byte("encrypted-blob"), + CreatedAt: time.Now().UTC(), UpdatedAt: time.Now().UTC(), + } + if err := st.CreateNotificationChannel(ctx, ch); err != nil { + t.Fatalf("create: %v", err) + } + + got, err := st.GetNotificationChannel(ctx, ch.ID) + if err != nil { + t.Fatalf("get: %v", err) + } + if got.Name != ch.Name || got.Kind != "webhook" || string(got.Config) != "encrypted-blob" { + t.Fatalf("got %+v", got) + } + + got.Name = "team-slack-renamed" + got.Enabled = false + got.UpdatedAt = time.Now().UTC() + if err := st.UpdateNotificationChannel(ctx, *got); err != nil { + t.Fatalf("update: %v", err) + } + got2, _ := st.GetNotificationChannel(ctx, ch.ID) + if got2.Name != "team-slack-renamed" || got2.Enabled { + t.Fatalf("update not applied: %+v", got2) + } + + all, _ := st.ListEnabledNotificationChannels(ctx) + if len(all) != 0 { + t.Errorf("disabled channel returned by ListEnabled: %d", len(all)) + } + + if err := st.DeleteNotificationChannel(ctx, ch.ID); err != nil { + t.Fatalf("delete: %v", err) + } + if _, err := st.GetNotificationChannel(ctx, ch.ID); err == nil { + t.Errorf("expected ErrNotFound after delete") + } +} + +func TestAppendNotificationLog(t *testing.T) { + t.Parallel() + dir := t.TempDir() + st, _ := Open(context.Background(), filepath.Join(dir, "rm.db")) + defer st.Close() + ctx := context.Background() + + chID := ulid.Make().String() + if err := st.CreateNotificationChannel(ctx, NotificationChannel{ + ID: chID, Kind: "ntfy", Name: "n", Enabled: true, + Config: []byte{1, 2, 3}, + CreatedAt: time.Now().UTC(), UpdatedAt: time.Now().UTC(), + }); err != nil { + t.Fatalf("create channel: %v", err) + } + + code := 200 + lat := 287 + if err := st.AppendNotificationLog(ctx, NotificationLogEntry{ + ID: ulid.Make().String(), ChannelID: chID, Event: "alert.test", + OK: true, StatusCode: &code, LatencyMS: &lat, + FiredAt: time.Now().UTC(), + }); err != nil { + t.Fatalf("append: %v", err) + } + + // LastFiredAt projection: the channel's last_fired_at is updated + // either by the append helper or by the callers; if you choose the + // helper does the bump, assert it. + got, _ := st.GetNotificationChannel(ctx, chID) + if got.LastFiredAt == nil { + t.Errorf("last_fired_at should bump on AppendNotificationLog success") + } +} +``` + +- [ ] **Step 3: Run to verify it fails** + +```sh +go test ./internal/store/ -run "TestNotificationChannelCRUD|TestAppendNotificationLog" -count=1 +``` +Expected: FAIL. + +- [ ] **Step 4: Implement** + +Append to `internal/store/notification_channels.go`: + +```go +func (s *Store) CreateNotificationChannel(ctx context.Context, ch NotificationChannel) error { + enabled := 0 + if ch.Enabled { + enabled = 1 + } + _, err := s.db.ExecContext(ctx, + `INSERT INTO notification_channels + (id, kind, name, enabled, config, default_priority, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + ch.ID, ch.Kind, ch.Name, enabled, ch.Config, + nullable(ch.DefaultPriority), + ch.CreatedAt.UTC().Format(time.RFC3339Nano), + ch.UpdatedAt.UTC().Format(time.RFC3339Nano)) + if err != nil { + return fmt.Errorf("store: create channel: %w", err) + } + return nil +} + +func (s *Store) UpdateNotificationChannel(ctx context.Context, ch NotificationChannel) error { + enabled := 0 + if ch.Enabled { + enabled = 1 + } + _, err := s.db.ExecContext(ctx, + `UPDATE notification_channels + SET kind = ?, name = ?, enabled = ?, config = ?, + default_priority = ?, updated_at = ? + WHERE id = ?`, + ch.Kind, ch.Name, enabled, ch.Config, + nullable(ch.DefaultPriority), + ch.UpdatedAt.UTC().Format(time.RFC3339Nano), + ch.ID) + if err != nil { + return fmt.Errorf("store: update channel: %w", err) + } + return nil +} + +func (s *Store) DeleteNotificationChannel(ctx context.Context, id string) error { + _, err := s.db.ExecContext(ctx, + `DELETE FROM notification_channels WHERE id = ?`, id) + if err != nil { + return fmt.Errorf("store: delete channel: %w", err) + } + return nil +} + +func (s *Store) GetNotificationChannel(ctx context.Context, id string) (*NotificationChannel, error) { + row := s.db.QueryRowContext(ctx, + `SELECT id, kind, name, enabled, config, default_priority, + created_at, updated_at, last_fired_at + FROM notification_channels WHERE id = ?`, id) + return scanChannel(row.Scan) +} + +func (s *Store) ListNotificationChannels(ctx context.Context) ([]NotificationChannel, error) { + rows, err := s.db.QueryContext(ctx, + `SELECT id, kind, name, enabled, config, default_priority, + created_at, updated_at, last_fired_at + FROM notification_channels ORDER BY created_at ASC`) + if err != nil { + return nil, fmt.Errorf("store: list channels: %w", err) + } + defer func() { _ = rows.Close() }() + var out []NotificationChannel + for rows.Next() { + c, err := scanChannel(rows.Scan) + if err != nil { + return nil, err + } + out = append(out, *c) + } + return out, rows.Err() +} + +func (s *Store) ListEnabledNotificationChannels(ctx context.Context) ([]NotificationChannel, error) { + rows, err := s.db.QueryContext(ctx, + `SELECT id, kind, name, enabled, config, default_priority, + created_at, updated_at, last_fired_at + FROM notification_channels WHERE enabled = 1 ORDER BY created_at ASC`) + if err != nil { + return nil, fmt.Errorf("store: list enabled: %w", err) + } + defer func() { _ = rows.Close() }() + var out []NotificationChannel + for rows.Next() { + c, err := scanChannel(rows.Scan) + if err != nil { + return nil, err + } + out = append(out, *c) + } + return out, rows.Err() +} + +// AppendNotificationLog records a delivery attempt + bumps the +// channel's last_fired_at on success. +func (s *Store) AppendNotificationLog(ctx context.Context, e NotificationLogEntry) error { + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("store: begin: %w", err) + } + defer func() { _ = tx.Rollback() }() + + ok := 0 + if e.OK { + ok = 1 + } + _, err = tx.ExecContext(ctx, + `INSERT INTO notification_log + (id, channel_id, alert_id, event, ok, status_code, latency_ms, error, fired_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, + e.ID, e.ChannelID, nullable(e.AlertID), e.Event, ok, + nullableInt(e.StatusCode), nullableInt(e.LatencyMS), + nullable(e.Error), + e.FiredAt.UTC().Format(time.RFC3339Nano)) + if err != nil { + return fmt.Errorf("store: append notification_log: %w", err) + } + + if e.OK { + if _, err := tx.ExecContext(ctx, + `UPDATE notification_channels SET last_fired_at = ? WHERE id = ?`, + e.FiredAt.UTC().Format(time.RFC3339Nano), e.ChannelID); err != nil { + return fmt.Errorf("store: bump last_fired_at: %w", err) + } + } + return tx.Commit() +} + +func scanChannel(scan func(...any) error) (*NotificationChannel, error) { + var c NotificationChannel + var enabled int + var defaultPri, lastFired sql.NullString + var createdAt, updatedAt string + if err := scan(&c.ID, &c.Kind, &c.Name, &enabled, &c.Config, + &defaultPri, &createdAt, &updatedAt, &lastFired); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("store: scan channel: %w", err) + } + c.Enabled = enabled == 1 + if defaultPri.Valid { + v := defaultPri.String + c.DefaultPriority = &v + } + t, err := time.Parse(time.RFC3339Nano, createdAt) + if err != nil { + return nil, fmt.Errorf("store: parse created_at: %w", err) + } + c.CreatedAt = t + t, err = time.Parse(time.RFC3339Nano, updatedAt) + if err != nil { + return nil, fmt.Errorf("store: parse updated_at: %w", err) + } + c.UpdatedAt = t + if lastFired.Valid { + t, _ := time.Parse(time.RFC3339Nano, lastFired.String) + c.LastFiredAt = &t + } + return &c, nil +} + +// nullableInt mirrors store/util.go's nullable for *int. +func nullableInt(p *int) any { + if p == nil { + return nil + } + return *p +} +``` + +If `nullable` and `nullableStr` already exist in `internal/store/util.go` reuse them; check first. If `nullableInt` is new, add it. + +- [ ] **Step 5: Run tests to verify they pass** + +```sh +go test ./internal/store/ -count=1 -timeout=30s +``` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```sh +git add internal/store/notification_channels.go internal/store/notification_channels_test.go +git commit -m "store: notification_channels CRUD + AppendNotificationLog" +``` + +--- + +## Slice B — Notification channels (transport) + +### Task B1: Channel interface + payload type + +**Files:** +- Create: `internal/notification/payload.go` +- Create: `internal/notification/channel.go` + +- [ ] **Step 1: Define the payload + interface** + +`internal/notification/payload.go`: + +```go +// Package notification owns the fan-out of alert events to operator- +// configured channels. Three channels in v1: webhook, ntfy, smtp. +// Each channel implements Channel.Send for one Payload at a time; +// the Hub orchestrates fan-out, persists to notification_log. +package notification + +import "time" + +// Event identifies the lifecycle hook this notification is for. +type Event string + +const ( + EventRaised Event = "alert.raised" + EventAcknowledged Event = "alert.acknowledged" + EventResolved Event = "alert.resolved" + EventTest Event = "alert.test" +) + +// Payload is the per-event blob every channel renders into its own +// shape. Severity maps to channel-specific priority (ntfy) or stays +// in the body (webhook/smtp). +type Payload struct { + Event Event // alert.raised | … | alert.test + AlertID string // ULID + Severity string // info | warning | critical + Kind string // backup_failed | … + HostID string + HostName string + Message string + RaisedAt time.Time + Link string // Absolute URL to /alerts/; built by Hub +} +``` + +`internal/notification/channel.go`: + +```go +package notification + +import "context" + +// Channel is the per-kind transport. Implementations live in +// webhook.go / ntfy.go / smtp.go. Send must respect ctx (5s for HTTP, +// 10s for SMTP) and never panic. +type Channel interface { + // Kind returns the kind string ("webhook", "ntfy", "smtp"). Used + // for log enrichment and dispatcher routing. + Kind() string + + // Send delivers one payload. Returns (statusCode, latency, err). + // statusCode is HTTP for HTTP channels, the SMTP final-line code + // (e.g. 250) for SMTP, 0 if the call didn't reach a wire response. + Send(ctx context.Context, p Payload) (statusCode int, latency time.Duration, err error) +} +``` + +(Remember to import `time` in channel.go.) + +- [ ] **Step 2: Build to verify it compiles** + +```sh +go build ./internal/notification/... +``` +Expected: clean build. + +- [ ] **Step 3: Commit** + +```sh +git add internal/notification/payload.go internal/notification/channel.go +git commit -m "notification: payload + Channel interface" +``` + +--- + +### Task B2: Webhook channel + +**Files:** +- Create: `internal/notification/webhook.go` +- Test: `internal/notification/webhook_test.go` + +- [ ] **Step 1: Define the config + impl skeleton** + +`internal/notification/webhook.go`: + +```go +package notification + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +// WebhookConfig is the per-channel JSON shape stored AEAD-encrypted +// in notification_channels.config. +type WebhookConfig struct { + URL string `json:"url"` + BearerToken string `json:"bearer_token,omitempty"` + HeaderName string `json:"header_name,omitempty"` + HeaderValue string `json:"header_value,omitempty"` +} + +// WebhookChannel is the HTTP-POST channel. One per configured channel +// row. Reused across sends — the http.Client is goroutine-safe. +type WebhookChannel struct { + cfg WebhookConfig + client *http.Client +} + +// NewWebhookChannel builds a webhook with a 5s overall timeout enforced +// by the http.Client; ctx in Send is layered on top for caller-driven +// cancel. +func NewWebhookChannel(cfg WebhookConfig) *WebhookChannel { + return &WebhookChannel{ + cfg: cfg, + client: &http.Client{Timeout: 5 * time.Second}, + } +} + +func (c *WebhookChannel) Kind() string { return "webhook" } + +// webhookBody is the wire-stable envelope. Documented in the spec; do +// not reorder fields freely — operators write switch statements on +// "event" and "severity". +type webhookBody struct { + Event string `json:"event"` + AlertID string `json:"alert_id"` + Severity string `json:"severity"` + Kind string `json:"kind"` + HostID string `json:"host_id"` + HostName string `json:"host_name"` + Message string `json:"message"` + RaisedAt string `json:"raised_at"` + Link string `json:"link"` +} + +func (c *WebhookChannel) Send(ctx context.Context, p Payload) (int, time.Duration, error) { + body := webhookBody{ + Event: string(p.Event), AlertID: p.AlertID, + Severity: p.Severity, Kind: p.Kind, + HostID: p.HostID, HostName: p.HostName, + Message: p.Message, + RaisedAt: p.RaisedAt.UTC().Format(time.RFC3339Nano), + Link: p.Link, + } + buf, err := json.Marshal(body) + if err != nil { + return 0, 0, fmt.Errorf("webhook: marshal body: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.cfg.URL, bytes.NewReader(buf)) + if err != nil { + return 0, 0, fmt.Errorf("webhook: build request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + if c.cfg.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+c.cfg.BearerToken) + } + if c.cfg.HeaderName != "" { + req.Header.Set(c.cfg.HeaderName, c.cfg.HeaderValue) + } + + t0 := time.Now() + res, err := c.client.Do(req) + latency := time.Since(t0) + if err != nil { + return 0, latency, fmt.Errorf("webhook: do: %w", err) + } + defer func() { _ = res.Body.Close() }() + // Drain body — keep the connection reusable. + _, _ = io.Copy(io.Discard, res.Body) + if res.StatusCode >= 400 { + return res.StatusCode, latency, fmt.Errorf("webhook: http %d", res.StatusCode) + } + return res.StatusCode, latency, nil +} +``` + +- [ ] **Step 2: Write the failing test** + +`internal/notification/webhook_test.go`: + +```go +package notification + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestWebhookSendsCorrectPayloadAndHeaders(t *testing.T) { + t.Parallel() + var got webhookBody + var auth, custom string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + auth = r.Header.Get("Authorization") + custom = r.Header.Get("X-Test") + _ = json.NewDecoder(r.Body).Decode(&got) + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + ch := NewWebhookChannel(WebhookConfig{ + URL: srv.URL, BearerToken: "tok-123", + HeaderName: "X-Test", HeaderValue: "yes", + }) + code, _, err := ch.Send(context.Background(), Payload{ + Event: EventRaised, AlertID: "01K", + Severity: "warning", Kind: "backup_failed", + HostID: "h1", HostName: "alfa-01", + Message: "Backup failed", + RaisedAt: time.Date(2026, 5, 4, 15, 42, 1, 0, time.UTC), + Link: "https://rm.example/alerts/01K", + }) + if err != nil { + t.Fatalf("send: %v", err) + } + if code != 200 { + t.Errorf("status: %d", code) + } + if got.Event != "alert.raised" || got.Kind != "backup_failed" || got.Message != "Backup failed" { + t.Errorf("body: %+v", got) + } + if auth != "Bearer tok-123" { + t.Errorf("auth: %q", auth) + } + if custom != "yes" { + t.Errorf("custom header: %q", custom) + } +} + +func TestWebhookReturnsErrorOn4xx(t *testing.T) { + t.Parallel() + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + })) + defer srv.Close() + ch := NewWebhookChannel(WebhookConfig{URL: srv.URL}) + code, _, err := ch.Send(context.Background(), Payload{Event: EventRaised}) + if err == nil { + t.Fatal("expected error for 401") + } + if code != 401 { + t.Errorf("code: %d", code) + } +} + +func TestWebhookRespectsCtxTimeout(t *testing.T) { + t.Parallel() + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(200) + })) + defer srv.Close() + ch := NewWebhookChannel(WebhookConfig{URL: srv.URL}) + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + _, _, err := ch.Send(ctx, Payload{Event: EventRaised}) + if err == nil { + t.Fatal("expected timeout error") + } +} +``` + +- [ ] **Step 3: Run tests** + +```sh +go test ./internal/notification/ -run TestWebhook -count=1 -timeout=30s +``` +Expected: PASS. + +- [ ] **Step 4: Commit** + +```sh +git add internal/notification/webhook.go internal/notification/webhook_test.go +git commit -m "notification: webhook channel" +``` + +--- + +### Task B3: Ntfy channel + +**Files:** +- Create: `internal/notification/ntfy.go` +- Test: `internal/notification/ntfy_test.go` + +- [ ] **Step 1: Implementation** + +`internal/notification/ntfy.go`: + +```go +package notification + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +type NtfyConfig struct { + ServerURL string `json:"server_url"` // default https://ntfy.sh + Topic string `json:"topic"` + AccessToken string `json:"access_token,omitempty"` +} + +type NtfyChannel struct { + cfg NtfyConfig + defaultPriority string + client *http.Client +} + +// NewNtfyChannel builds the channel; defaultPriority is the channel- +// configured fallback (one of "min" | "low" | "default" | "high" | +// "urgent" or empty). +func NewNtfyChannel(cfg NtfyConfig, defaultPriority string) *NtfyChannel { + return &NtfyChannel{ + cfg: cfg, defaultPriority: defaultPriority, + client: &http.Client{Timeout: 5 * time.Second}, + } +} + +func (c *NtfyChannel) Kind() string { return "ntfy" } + +func (c *NtfyChannel) Send(ctx context.Context, p Payload) (int, time.Duration, error) { + server := c.cfg.ServerURL + if server == "" { + server = "https://ntfy.sh" + } + url := strings.TrimRight(server, "/") + "/" + c.cfg.Topic + body := strings.NewReader(p.Message) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, body) + if err != nil { + return 0, 0, fmt.Errorf("ntfy: build req: %w", err) + } + req.Header.Set("Content-Type", "text/plain") + req.Header.Set("Title", "["+p.Severity+"] "+p.HostName+" "+p.Kind) + req.Header.Set("Tags", p.Severity+","+p.Kind) + if p.Link != "" { + req.Header.Set("Click", p.Link) + } + req.Header.Set("Priority", priorityForSeverity(p.Severity, c.defaultPriority)) + if c.cfg.AccessToken != "" { + req.Header.Set("Authorization", "Bearer "+c.cfg.AccessToken) + } + + t0 := time.Now() + res, err := c.client.Do(req) + latency := time.Since(t0) + if err != nil { + return 0, latency, fmt.Errorf("ntfy: do: %w", err) + } + defer func() { _ = res.Body.Close() }() + _, _ = io.Copy(io.Discard, res.Body) + if res.StatusCode >= 400 { + return res.StatusCode, latency, fmt.Errorf("ntfy: http %d", res.StatusCode) + } + return res.StatusCode, latency, nil +} + +// priorityForSeverity maps severity → ntfy priority. Critical always +// wins (operator's default is overridden). +func priorityForSeverity(severity, defaultPri string) string { + switch severity { + case "critical": + return "5" // urgent + case "warning": + if defaultPri != "" { + return defaultPri + } + return "4" + default: + if defaultPri != "" { + return defaultPri + } + return "3" + } +} +``` + +- [ ] **Step 2: Write the failing test** + +`internal/notification/ntfy_test.go`: + +```go +package notification + +import ( + "context" + "io" + "net/http" + "net/http/httptest" + "testing" +) + +func TestNtfySendsHeadersAndBody(t *testing.T) { + t.Parallel() + type captured struct { + title, tags, click, priority, auth, body string + } + var got captured + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + got.title = r.Header.Get("Title") + got.tags = r.Header.Get("Tags") + got.click = r.Header.Get("Click") + got.priority = r.Header.Get("Priority") + got.auth = r.Header.Get("Authorization") + b, _ := io.ReadAll(r.Body) + got.body = string(b) + w.WriteHeader(200) + })) + defer srv.Close() + + ch := NewNtfyChannel(NtfyConfig{ + ServerURL: srv.URL, Topic: "rmf", + AccessToken: "tk1", + }, "") + _, _, err := ch.Send(context.Background(), Payload{ + Severity: "critical", HostName: "alfa-01", Kind: "check_failed", + Message: "errors found", Link: "https://rm.example/a", + }) + if err != nil { + t.Fatalf("send: %v", err) + } + if got.title != "[critical] alfa-01 check_failed" { + t.Errorf("title: %q", got.title) + } + if got.priority != "5" { + t.Errorf("priority: %q want 5 (critical → urgent)", got.priority) + } + if got.tags != "critical,check_failed" { + t.Errorf("tags: %q", got.tags) + } + if got.click != "https://rm.example/a" { + t.Errorf("click: %q", got.click) + } + if got.auth != "Bearer tk1" { + t.Errorf("auth: %q", got.auth) + } + if got.body != "errors found" { + t.Errorf("body: %q", got.body) + } +} + +func TestNtfyDefaultPriorityRespected(t *testing.T) { + t.Parallel() + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(200) + })) + defer srv.Close() + ch := NewNtfyChannel(NtfyConfig{ServerURL: srv.URL, Topic: "t"}, "min") + // Use info severity — default should win. + if got := priorityForSeverity("info", "min"); got != "min" { + t.Errorf("info+default=min: got %q", got) + } + // Critical always overrides default. + if got := priorityForSeverity("critical", "min"); got != "5" { + t.Errorf("critical: got %q", got) + } + _ = ch +} +``` + +- [ ] **Step 3: Run tests** + +```sh +go test ./internal/notification/ -run TestNtfy -count=1 +``` +Expected: PASS. + +- [ ] **Step 4: Commit** + +```sh +git add internal/notification/ntfy.go internal/notification/ntfy_test.go +git commit -m "notification: ntfy channel" +``` + +--- + +### Task B4: SMTP channel + +**Files:** +- Create: `internal/notification/smtp.go` +- Test: `internal/notification/smtp_test.go` + +- [ ] **Step 1: Implementation** + +`internal/notification/smtp.go`: + +```go +package notification + +import ( + "context" + "crypto/tls" + "fmt" + "net" + "net/smtp" + "strings" + "time" +) + +type SMTPConfig struct { + Host string `json:"host"` + Port int `json:"port"` + Encryption string `json:"encryption"` // "starttls" | "tls" | "none" + Username string `json:"username"` + Password string `json:"password"` + From string `json:"from"` + To string `json:"to"` +} + +type SMTPChannel struct { + cfg SMTPConfig + // linkBaseHost holds the public base hostname of restic-manager so + // Message-IDs include a stable right-hand-side. Falls back to + // "restic-manager.local" when unset. + messageIDDomain string +} + +// NewSMTPChannel builds an SMTP channel. messageIDDomain comes from +// cfg.Cfg.BaseURL — caller passes it through. +func NewSMTPChannel(cfg SMTPConfig, messageIDDomain string) *SMTPChannel { + if messageIDDomain == "" { + messageIDDomain = "restic-manager.local" + } + return &SMTPChannel{cfg: cfg, messageIDDomain: messageIDDomain} +} + +func (c *SMTPChannel) Kind() string { return "smtp" } + +func (c *SMTPChannel) Send(ctx context.Context, p Payload) (int, time.Duration, error) { + t0 := time.Now() + addr := fmt.Sprintf("%s:%d", c.cfg.Host, c.cfg.Port) + + // Dial respects ctx (we use net.Dialer). + dialer := &net.Dialer{Timeout: 10 * time.Second} + rawConn, err := dialer.DialContext(ctx, "tcp", addr) + if err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: dial %s: %w", addr, err) + } + + var client *smtp.Client + switch strings.ToLower(c.cfg.Encryption) { + case "tls": + conn := tls.Client(rawConn, &tls.Config{ServerName: c.cfg.Host, MinVersion: tls.VersionTLS12}) + client, err = smtp.NewClient(conn, c.cfg.Host) + case "starttls", "": + client, err = smtp.NewClient(rawConn, c.cfg.Host) + if err == nil { + err = client.StartTLS(&tls.Config{ServerName: c.cfg.Host, MinVersion: tls.VersionTLS12}) + } + case "none": + client, err = smtp.NewClient(rawConn, c.cfg.Host) + default: + _ = rawConn.Close() + return 0, time.Since(t0), fmt.Errorf("smtp: unknown encryption %q", c.cfg.Encryption) + } + if err != nil { + _ = rawConn.Close() + return 0, time.Since(t0), fmt.Errorf("smtp: handshake: %w", err) + } + defer func() { _ = client.Quit() }() + + if c.cfg.Username != "" { + auth := smtp.PlainAuth("", c.cfg.Username, c.cfg.Password, c.cfg.Host) + if err := client.Auth(auth); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: auth: %w", err) + } + } + + if err := client.Mail(extractAddr(c.cfg.From)); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: MAIL FROM: %w", err) + } + if err := client.Rcpt(c.cfg.To); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: RCPT TO: %w", err) + } + wc, err := client.Data() + if err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: DATA: %w", err) + } + msg := buildEmailBody(c.cfg, c.messageIDDomain, p) + if _, err := wc.Write(msg); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: write: %w", err) + } + if err := wc.Close(); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: close DATA: %w", err) + } + + return 250, time.Since(t0), nil +} + +// extractAddr pulls the bare email out of a "Name " form. +func extractAddr(s string) string { + if i, j := strings.LastIndex(s, "<"), strings.LastIndex(s, ">"); i >= 0 && j > i { + return s[i+1 : j] + } + return s +} + +// buildEmailBody assembles the RFC 5322 message bytes per the spec. +// Plain text only; subject hardcoded. +func buildEmailBody(cfg SMTPConfig, msgIDDomain string, p Payload) []byte { + var b strings.Builder + b.WriteString("From: " + cfg.From + "\r\n") + b.WriteString("To: " + cfg.To + "\r\n") + b.WriteString(fmt.Sprintf("Subject: [restic-manager] [%s] %s: %s\r\n", p.Severity, p.HostName, p.Kind)) + b.WriteString("Date: " + p.RaisedAt.UTC().Format(time.RFC1123Z) + "\r\n") + b.WriteString("Message-ID: <" + p.AlertID + "@" + msgIDDomain + ">\r\n") + b.WriteString("MIME-Version: 1.0\r\n") + b.WriteString("Content-Type: text/plain; charset=utf-8\r\n") + b.WriteString("\r\n") + b.WriteString(p.Message + "\r\n\r\n") + b.WriteString("—\r\n") + b.WriteString("Raised at: " + p.RaisedAt.UTC().Format(time.RFC3339) + "\r\n") + b.WriteString("Severity: " + p.Severity + "\r\n") + b.WriteString("Host: " + p.HostName + "\r\n") + b.WriteString("Kind: " + p.Kind + "\r\n") + if p.Link != "" { + b.WriteString("\r\nOpen in restic-manager:\r\n") + b.WriteString(p.Link + "\r\n") + } + b.WriteString("\r\n(This message was sent by restic-manager. Acknowledge or resolve in the UI.)\r\n") + return []byte(b.String()) +} +``` + +- [ ] **Step 2: Write the failing test using a fake SMTP server** + +`internal/notification/smtp_test.go`: + +```go +package notification + +import ( + "context" + "net" + "strings" + "sync" + "testing" + "time" +) + +// fakeSMTPServer accepts a single connection, runs the minimal SMTP +// dialogue (HELO/EHLO, MAIL FROM, RCPT TO, DATA, QUIT) and stores +// what came across the wire. Plain (no TLS) — we test the protocol +// shape, not crypto. +type fakeSMTPServer struct { + mu sync.Mutex + mailFrom string + rcptTo string + data string + authed bool +} + +func startFakeSMTP(t *testing.T) (string, *fakeSMTPServer) { + t.Helper() + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("listen: %v", err) + } + srv := &fakeSMTPServer{} + t.Cleanup(func() { _ = ln.Close() }) + go func() { + conn, err := ln.Accept() + if err != nil { + return + } + defer func() { _ = conn.Close() }() + readLine := func() string { + buf := make([]byte, 1024) + n, err := conn.Read(buf) + if err != nil { + return "" + } + return string(buf[:n]) + } + write := func(s string) { _, _ = conn.Write([]byte(s)) } + + write("220 fake.smtp ESMTP\r\n") + for { + line := readLine() + if line == "" { + return + } + cmd := strings.ToUpper(strings.TrimSpace(line)) + switch { + case strings.HasPrefix(cmd, "EHLO"), strings.HasPrefix(cmd, "HELO"): + write("250-fake.smtp\r\n250 AUTH PLAIN\r\n") + case strings.HasPrefix(cmd, "AUTH "): + srv.mu.Lock() + srv.authed = true + srv.mu.Unlock() + write("235 OK\r\n") + case strings.HasPrefix(cmd, "MAIL FROM"): + srv.mu.Lock() + srv.mailFrom = strings.TrimSpace(strings.TrimPrefix(line, "MAIL FROM:")) + srv.mu.Unlock() + write("250 OK\r\n") + case strings.HasPrefix(cmd, "RCPT TO"): + srv.mu.Lock() + srv.rcptTo = strings.TrimSpace(strings.TrimPrefix(line, "RCPT TO:")) + srv.mu.Unlock() + write("250 OK\r\n") + case cmd == "DATA": + write("354 OK\r\n") + // read until "\r\n.\r\n" + var data strings.Builder + for { + chunk := readLine() + if chunk == "" { + break + } + data.WriteString(chunk) + if strings.Contains(data.String(), "\r\n.\r\n") { + break + } + } + srv.mu.Lock() + srv.data = data.String() + srv.mu.Unlock() + write("250 OK\r\n") + case cmd == "QUIT": + write("221 bye\r\n") + return + default: + write("500 unknown\r\n") + } + } + }() + return ln.Addr().String(), srv +} + +func TestSMTPSendsExpectedHeaders(t *testing.T) { + t.Parallel() + addr, srv := startFakeSMTP(t) + host, port := splitHostPort(addr) + + ch := NewSMTPChannel(SMTPConfig{ + Host: host, Port: port, Encryption: "none", + Username: "u", Password: "p", + From: "Restic-Manager ", + To: "ops@example.com", + }, "rm.example") + + _, _, err := ch.Send(context.Background(), Payload{ + Event: EventRaised, AlertID: "01ABC", + Severity: "warning", Kind: "backup_failed", + HostName: "alfa-01", Message: "Backup failed: 401", + RaisedAt: time.Date(2026, 5, 4, 15, 42, 1, 0, time.UTC), + Link: "https://rm.example/alerts/01ABC", + }) + if err != nil { + t.Fatalf("send: %v", err) + } + + srv.mu.Lock() + defer srv.mu.Unlock() + if !srv.authed { + t.Errorf("AUTH never sent") + } + if !strings.Contains(srv.mailFrom, "alerts@example.com") { + t.Errorf("MAIL FROM: %q", srv.mailFrom) + } + if !strings.Contains(srv.rcptTo, "ops@example.com") { + t.Errorf("RCPT TO: %q", srv.rcptTo) + } + if !strings.Contains(srv.data, "Subject: [restic-manager] [warning] alfa-01: backup_failed") { + t.Errorf("subject missing or wrong: %q", srv.data) + } + if !strings.Contains(srv.data, "Message-ID: <01ABC@rm.example>") { + t.Errorf("Message-ID wrong: %q", srv.data) + } + if !strings.Contains(srv.data, "Backup failed: 401") { + t.Errorf("body missing: %q", srv.data) + } +} + +func splitHostPort(addr string) (string, int) { + host, portStr, _ := net.SplitHostPort(addr) + var port int + for _, r := range portStr { + port = port*10 + int(r-'0') + } + return host, port +} +``` + +- [ ] **Step 3: Run the test** + +```sh +go test ./internal/notification/ -run TestSMTP -count=1 -timeout=30s +``` +Expected: PASS. + +- [ ] **Step 4: Commit** + +```sh +git add internal/notification/smtp.go internal/notification/smtp_test.go +git commit -m "notification: smtp channel" +``` + +--- + +### Task B5: notification.Hub — fan-out + log writer + +**Files:** +- Create: `internal/notification/hub.go` +- Test: `internal/notification/hub_test.go` + +- [ ] **Step 1: Implementation** + +`internal/notification/hub.go`: + +```go +package notification + +import ( + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "log/slog" + "sync" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// Hub fans Payload events out to every enabled channel and persists +// the result to notification_log. One Hub per process; thread-safe. +type Hub struct { + store *store.Store + aead *crypto.AEAD + baseURL string // e.g. https://restic-manager.example + msgIDDomain string // hostname extracted from baseURL for SMTP Message-ID +} + +func NewHub(st *store.Store, aead *crypto.AEAD, baseURL string) *Hub { + return &Hub{ + store: st, aead: aead, baseURL: baseURL, + msgIDDomain: extractDomain(baseURL), + } +} + +// Dispatch fans out to every enabled channel. Best-effort — failures +// are logged to notification_log but don't propagate. Each channel +// runs in its own goroutine; Dispatch returns when all have settled +// (so the caller can block briefly for the test-button case). +func (h *Hub) Dispatch(ctx context.Context, p Payload) { + chans, err := h.store.ListEnabledNotificationChannels(ctx) + if err != nil { + slog.Error("notification: list channels", "err", err) + return + } + // Stamp the link if not already set. + if p.Link == "" { + p.Link = h.baseURL + "/alerts/" + p.AlertID + } + + var wg sync.WaitGroup + for _, c := range chans { + wg.Add(1) + go func(c store.NotificationChannel) { + defer wg.Done() + h.send(ctx, c, p) + }(c) + } + wg.Wait() +} + +// DispatchOne fires a single channel — used by the "Send test +// notification" button. Returns the log entry it just persisted so +// the handler can render the result inline. +func (h *Hub) DispatchOne(ctx context.Context, channelID string, p Payload) (store.NotificationLogEntry, error) { + c, err := h.store.GetNotificationChannel(ctx, channelID) + if err != nil { + return store.NotificationLogEntry{}, err + } + if p.Link == "" { + p.Link = h.baseURL + "/alerts/" + p.AlertID + } + return h.send(ctx, *c, p), nil +} + +func (h *Hub) send(ctx context.Context, c store.NotificationChannel, p Payload) store.NotificationLogEntry { + ch, err := h.buildChannel(c) + logID := newID() + logEntry := store.NotificationLogEntry{ + ID: logID, ChannelID: c.ID, + Event: string(p.Event), FiredAt: time.Now().UTC(), + } + if p.AlertID != "" { + aid := p.AlertID + logEntry.AlertID = &aid + } + if err != nil { + errStr := err.Error() + logEntry.OK = false + logEntry.Error = &errStr + _ = h.store.AppendNotificationLog(ctx, logEntry) + return logEntry + } + code, latency, sendErr := ch.Send(ctx, p) + statusCode := code + latencyMS := int(latency.Milliseconds()) + logEntry.StatusCode = &statusCode + logEntry.LatencyMS = &latencyMS + if sendErr != nil { + errStr := sendErr.Error() + logEntry.OK = false + logEntry.Error = &errStr + } else { + logEntry.OK = true + } + if err := h.store.AppendNotificationLog(ctx, logEntry); err != nil { + slog.Warn("notification: persist log", "err", err) + } + return logEntry +} + +// buildChannel decrypts the channel config and returns a Channel impl. +func (h *Hub) buildChannel(row store.NotificationChannel) (Channel, error) { + plain, err := h.aead.Open(row.Config, []byte("notification-channel:"+row.ID)) + if err != nil { + return nil, err + } + switch row.Kind { + case "webhook": + var cfg WebhookConfig + if err := json.Unmarshal(plain, &cfg); err != nil { + return nil, err + } + return NewWebhookChannel(cfg), nil + case "ntfy": + var cfg NtfyConfig + if err := json.Unmarshal(plain, &cfg); err != nil { + return nil, err + } + dp := "" + if row.DefaultPriority != nil { + dp = *row.DefaultPriority + } + return NewNtfyChannel(cfg, dp), nil + case "smtp": + var cfg SMTPConfig + if err := json.Unmarshal(plain, &cfg); err != nil { + return nil, err + } + return NewSMTPChannel(cfg, h.msgIDDomain), nil + } + return nil, errUnknownKind(row.Kind) +} + +func newID() string { + var b [16]byte + _, _ = rand.Read(b[:]) + return hex.EncodeToString(b[:]) +} + +func extractDomain(baseURL string) string { + // Tiny: strip scheme + path. Good enough for Message-ID right-hand-side. + s := baseURL + if i := indexOf(s, "://"); i >= 0 { + s = s[i+3:] + } + if i := indexOf(s, "/"); i >= 0 { + s = s[:i] + } + if s == "" { + return "restic-manager.local" + } + return s +} + +func indexOf(s, sub string) int { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return i + } + } + return -1 +} + +type errUnknownKind string + +func (e errUnknownKind) Error() string { return "notification: unknown kind: " + string(e) } +``` + +- [ ] **Step 2: Write the failing test** + +`internal/notification/hub_test.go`: + +```go +package notification + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "path/filepath" + "testing" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +func setupHub(t *testing.T) (*Hub, *store.Store) { + t.Helper() + dir := t.TempDir() + st, err := store.Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("store: %v", err) + } + t.Cleanup(func() { _ = st.Close() }) + keyPath := filepath.Join(dir, "secret.key") + _ = crypto.GenerateKeyFile(keyPath) + key, _ := crypto.LoadKeyFromFile(keyPath) + aead, _ := crypto.NewAEAD(key) + return NewHub(st, aead, "https://rm.example"), st +} + +func TestHubDispatchRecordsLogEntries(t *testing.T) { + t.Parallel() + hub, st := setupHub(t) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(200) + })) + defer srv.Close() + + cfg, _ := json.Marshal(WebhookConfig{URL: srv.URL}) + enc, err := hub.aead.Seal(cfg, []byte("notification-channel:test-ch")) + if err != nil { + t.Fatalf("seal: %v", err) + } + if err := st.CreateNotificationChannel(context.Background(), store.NotificationChannel{ + ID: "test-ch", Kind: "webhook", Name: "test", Enabled: true, + Config: enc, CreatedAt: time.Now().UTC(), UpdatedAt: time.Now().UTC(), + }); err != nil { + t.Fatalf("create channel: %v", err) + } + + hub.Dispatch(context.Background(), Payload{ + Event: EventRaised, AlertID: ulid.Make().String(), + Severity: "warning", Kind: "backup_failed", + HostName: "alfa-01", Message: "x", RaisedAt: time.Now().UTC(), + }) + + // Verify a log row landed. + var n int + if err := st.DB().QueryRow(`SELECT COUNT(*) FROM notification_log WHERE channel_id = ? AND ok = 1`, "test-ch").Scan(&n); err != nil { + t.Fatalf("count: %v", err) + } + if n != 1 { + t.Fatalf("expected 1 log row, got %d", n) + } +} + +func TestHubSkipsDisabledChannels(t *testing.T) { + t.Parallel() + hub, st := setupHub(t) + cfg, _ := json.Marshal(WebhookConfig{URL: "http://no-such-host.invalid"}) + enc, _ := hub.aead.Seal(cfg, []byte("notification-channel:dis")) + _ = st.CreateNotificationChannel(context.Background(), store.NotificationChannel{ + ID: "dis", Kind: "webhook", Name: "off", Enabled: false, + Config: enc, CreatedAt: time.Now().UTC(), UpdatedAt: time.Now().UTC(), + }) + hub.Dispatch(context.Background(), Payload{ + Event: EventRaised, AlertID: "x", Severity: "warning", + Kind: "backup_failed", HostName: "h", Message: "m", RaisedAt: time.Now().UTC(), + }) + var n int + _ = st.DB().QueryRow(`SELECT COUNT(*) FROM notification_log`).Scan(&n) + if n != 0 { + t.Errorf("disabled channel produced log rows: %d", n) + } +} +``` + +- [ ] **Step 3: Run tests** + +```sh +go test ./internal/notification/ -count=1 -timeout=30s +``` +Expected: PASS. + +- [ ] **Step 4: Commit** + +```sh +git add internal/notification/hub.go internal/notification/hub_test.go +git commit -m "notification: Hub fan-out + log writer" +``` + +--- + +## Slice C — Alert engine + +### Task C1: Engine struct + dispatch loop + auto-resolve sweep + +**Files:** +- Create: `internal/alert/engine.go` +- Test: `internal/alert/engine_test.go` + +- [ ] **Step 1: Engine skeleton + types** + +`internal/alert/engine.go`: + +```go +// Package alert evaluates the hardcoded rule set and persists raises +// / acknowledges / resolves. Three event sources feed it: +// - JobFinishedEvent — pushed when a job lands a terminal state +// (the existing MarkJobFinished site) +// - HostOfflineEvent / HostOnlineEvent — pushed by the offline +// sweeper and by the ws hello handler +// - 60s ticker (internal) — drives stale-schedule + auto-resolve +// +// All output goes through store.RaiseOrTouch / Acknowledge / Resolve +// and the notification.Hub. The engine is one goroutine started at +// boot; non-blocking sends from hot paths. +package alert + +import ( + "context" + "log/slog" + "sync" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// JobFinishedEvent carries everything the engine needs to evaluate +// the failed-X rules. Pushed via Engine.NotifyJobFinished from the +// MarkJobFinished site. +type JobFinishedEvent struct { + HostID string + JobID string + Kind string // backup | forget | prune | check | unlock | restore | diff + Status string // succeeded | failed | cancelled + When time.Time +} + +type Engine struct { + store *store.Store + hub *notification.Hub + + jobs chan JobFinishedEvent + hostDown chan string // host_id + hostUp chan string + + // agentOfflineFloor is the duration a host must be offline before + // we raise. Configurable for tests; default 15m. + agentOfflineFloor time.Duration + tickPeriod time.Duration + + closeOnce sync.Once + done chan struct{} +} + +// NewEngine builds the engine. agentOfflineFloor + tickPeriod default +// to 15min and 60s respectively when zero. +func NewEngine(st *store.Store, hub *notification.Hub) *Engine { + return &Engine{ + store: st, + hub: hub, + jobs: make(chan JobFinishedEvent, 32), + hostDown: make(chan string, 32), + hostUp: make(chan string, 32), + agentOfflineFloor: 15 * time.Minute, + tickPeriod: 60 * time.Second, + done: make(chan struct{}), + } +} + +// Run drives the event loop. Returns when ctx is done. Blocks; call in +// its own goroutine. +func (e *Engine) Run(ctx context.Context) { + t := time.NewTicker(e.tickPeriod) + defer t.Stop() + for { + select { + case <-ctx.Done(): + e.closeOnce.Do(func() { close(e.done) }) + return + case ev := <-e.jobs: + e.handleJobFinished(ctx, ev) + case hostID := <-e.hostDown: + e.handleHostOffline(ctx, hostID) + case hostID := <-e.hostUp: + e.handleHostOnline(ctx, hostID) + case now := <-t.C: + e.tick(ctx, now) + } + } +} + +// NotifyJobFinished is the hot-path hook called from MarkJobFinished's +// caller (ws.handler.dispatchAgentMessage). Non-blocking: drops on a +// full channel with a slog warning. +func (e *Engine) NotifyJobFinished(ev JobFinishedEvent) { + select { + case e.jobs <- ev: + default: + slog.Warn("alert: jobs channel full; dropping event", "kind", ev.Kind, "host_id", ev.HostID) + } +} + +func (e *Engine) NotifyHostOffline(hostID string) { + select { + case e.hostDown <- hostID: + default: + slog.Warn("alert: hostDown channel full; dropping", "host_id", hostID) + } +} + +func (e *Engine) NotifyHostOnline(hostID string) { + select { + case e.hostUp <- hostID: + default: + slog.Warn("alert: hostUp channel full; dropping", "host_id", hostID) + } +} +``` + +(`handleJobFinished`, `handleHostOffline`, `handleHostOnline`, and +`tick` come in C2.) + +- [ ] **Step 2: Build to confirm it compiles** + +```sh +go build ./internal/alert/... +``` +Expected: clean. + +- [ ] **Step 3: Commit** + +```sh +git add internal/alert/engine.go +git commit -m "alert: engine skeleton + event channels" +``` + +--- + +### Task C2: Engine — rule logic for the six rules + +**Files:** +- Create: `internal/alert/rules.go` +- Modify: `internal/alert/engine.go` (fill in handle* methods) +- Test: `internal/alert/rules_test.go` + +- [ ] **Step 1: Rule helper module** + +`internal/alert/rules.go`: + +```go +package alert + +import ( + "context" + "fmt" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// Rule kinds — keep in lockstep with the engine logic + UI tag-color +// table. +const ( + KindBackupFailed = "backup_failed" + KindForgetFailed = "forget_failed" + KindPruneFailed = "prune_failed" + KindCheckFailed = "check_failed" + KindStaleSchedule = "stale_schedule" + KindAgentOffline = "agent_offline" +) + +// raiseAndNotify is the standard pattern: store.RaiseOrTouch + +// notification.Hub.Dispatch only on first raise. +func (e *Engine) raiseAndNotify(ctx context.Context, hostID, kind, severity, message string, when time.Time) { + id, didRaise, err := e.store.RaiseOrTouch(ctx, hostID, kind, severity, message, when) + if err != nil { + // Not fatal — log and move on. + slogWarn("alert: raise", "kind", kind, "host_id", hostID, "err", err) + return + } + if !didRaise { + return + } + host, err := e.store.GetHost(ctx, hostID) + hostName := hostID + if err == nil { + hostName = host.Name + } + go e.hub.Dispatch(ctx, notification.Payload{ + Event: notification.EventRaised, AlertID: id, + Severity: severity, Kind: kind, + HostID: hostID, HostName: hostName, + Message: message, + RaisedAt: when, + }) +} + +// resolveAndNotify clears any open alert for (host_id, kind) and +// fires alert.resolved on each that was actually open. Best-effort. +func (e *Engine) resolveAndNotify(ctx context.Context, hostID, kind string, when time.Time) { + open, err := e.store.ListAlerts(ctx, store.AlertFilter{ + Status: "open", HostID: hostID, + }) + if err != nil { + return + } + openAcked, _ := e.store.ListAlerts(ctx, store.AlertFilter{ + Status: "acknowledged", HostID: hostID, + }) + all := append(open, openAcked...) + if err := e.store.AutoResolve(ctx, hostID, kind, when); err != nil { + slogWarn("alert: auto-resolve", "kind", kind, "host_id", hostID, "err", err) + return + } + host, _ := e.store.GetHost(ctx, hostID) + hostName := hostID + if host != nil { + hostName = host.Name + } + for _, a := range all { + if a.Kind != kind { + continue + } + go e.hub.Dispatch(ctx, notification.Payload{ + Event: notification.EventResolved, AlertID: a.ID, + Severity: a.Severity, Kind: a.Kind, + HostID: hostID, HostName: hostName, + Message: fmt.Sprintf("Auto-resolved (%s)", kind), + RaisedAt: when, + }) + } +} +``` + +(Add a small `slogWarn` shim or just import `log/slog` in engine.go and use directly.) + +- [ ] **Step 2: Fill handleJobFinished / handleHostOffline / handleHostOnline / tick** + +Append to `internal/alert/engine.go`: + +```go +func (e *Engine) handleJobFinished(ctx context.Context, ev JobFinishedEvent) { + switch ev.Kind { + case "backup": + if ev.Status == "failed" { + e.raiseAndNotify(ctx, ev.HostID, KindBackupFailed, "warning", + fmt.Sprintf("Backup job %s failed", ev.JobID), ev.When) + } else if ev.Status == "succeeded" { + e.resolveAndNotify(ctx, ev.HostID, KindBackupFailed, ev.When) + } + case "forget": + if ev.Status == "failed" { + e.raiseAndNotify(ctx, ev.HostID, KindForgetFailed, "warning", + fmt.Sprintf("Forget job %s failed", ev.JobID), ev.When) + } else if ev.Status == "succeeded" { + e.resolveAndNotify(ctx, ev.HostID, KindForgetFailed, ev.When) + } + case "prune": + if ev.Status == "failed" { + e.raiseAndNotify(ctx, ev.HostID, KindPruneFailed, "warning", + fmt.Sprintf("Prune job %s failed", ev.JobID), ev.When) + } else if ev.Status == "succeeded" { + e.resolveAndNotify(ctx, ev.HostID, KindPruneFailed, ev.When) + } + case "check": + if ev.Status == "failed" { + e.raiseAndNotify(ctx, ev.HostID, KindCheckFailed, "critical", + fmt.Sprintf("Check job %s failed", ev.JobID), ev.When) + } else if ev.Status == "succeeded" { + e.resolveAndNotify(ctx, ev.HostID, KindCheckFailed, ev.When) + } + } + // init / unlock / restore / diff don't trigger alerts in v1. +} + +func (e *Engine) handleHostOffline(ctx context.Context, hostID string) { + host, err := e.store.GetHost(ctx, hostID) + if err != nil { + return + } + // Apply the 15-min floor — host went offline only "long enough" + // when last_seen_at is older than the floor. + if time.Since(host.LastSeenAt) < e.agentOfflineFloor { + return + } + e.raiseAndNotify(ctx, hostID, KindAgentOffline, "warning", + fmt.Sprintf("Agent offline for %s (threshold %s)", + roundDur(time.Since(host.LastSeenAt)), e.agentOfflineFloor), + time.Now().UTC()) +} + +func (e *Engine) handleHostOnline(ctx context.Context, hostID string) { + e.resolveAndNotify(ctx, hostID, KindAgentOffline, time.Now().UTC()) +} + +// tick is the 60s sweep. Two responsibilities: +// 1. Re-evaluate agent_offline against every offline host (catches +// hosts that crossed the floor between events). +// 2. Stale-schedule detection: any schedule whose next-fire was +// more than 5 minutes ago with no matching job since. +func (e *Engine) tick(ctx context.Context, now time.Time) { + hosts, err := e.store.ListHosts(ctx) + if err != nil { + slog.Warn("alert: tick list hosts", "err", err) + return + } + for _, h := range hosts { + if h.Status == "offline" && now.Sub(h.LastSeenAt) >= e.agentOfflineFloor { + e.raiseAndNotify(ctx, h.ID, KindAgentOffline, "warning", + fmt.Sprintf("Agent offline for %s (threshold %s)", + roundDur(now.Sub(h.LastSeenAt)), e.agentOfflineFloor), now) + } + } + // Stale-schedule sweep — left as a future tick body if/when the + // store grows the helper. For v1 we skip it cleanly: the rule is + // declared but the trigger is "lands later if anyone asks". + // (Document this in tasks.md when you tick P3-05.) +} + +func roundDur(d time.Duration) string { + if d < time.Minute { + return "less than a minute" + } + d = d.Round(time.Minute) + return d.String() +} +``` + +> **Note:** the `stale_schedule` rule is declared in the spec but +> left as a no-op in the v1 ticker — the precise definition of +> "expected to have fired but didn't" needs a small store helper +> we can add later. Mention this in the tasks.md tick when you +> close P3-05. + +- [ ] **Step 3: Write the failing tests** + +`internal/alert/rules_test.go`: + +```go +package alert + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +func setupEngine(t *testing.T) (*Engine, *store.Store, string) { + t.Helper() + dir := t.TempDir() + st, _ := store.Open(context.Background(), filepath.Join(dir, "rm.db")) + t.Cleanup(func() { _ = st.Close() }) + keyPath := filepath.Join(dir, "secret.key") + _ = crypto.GenerateKeyFile(keyPath) + key, _ := crypto.LoadKeyFromFile(keyPath) + aead, _ := crypto.NewAEAD(key) + hub := notification.NewHub(st, aead, "https://rm.example") + eng := NewEngine(st, hub) + hostID := ulid.Make().String() + if err := st.CreateHost(context.Background(), store.Host{ + ID: hostID, Name: "alfa-01", OS: "linux", Arch: "amd64", + EnrolledAt: time.Now().UTC(), + }, "deadbeef", ""); err != nil { + t.Fatalf("create host: %v", err) + } + return eng, st, hostID +} + +func TestEngineBackupFailedRaisesThenResolves(t *testing.T) { + t.Parallel() + eng, st, hostID := setupEngine(t) + ctx := context.Background() + + eng.handleJobFinished(ctx, JobFinishedEvent{ + HostID: hostID, JobID: "j1", Kind: "backup", Status: "failed", + When: time.Now().UTC(), + }) + open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 1 || open[0].Kind != KindBackupFailed { + t.Fatalf("expected one backup_failed open; got %+v", open) + } + + // Second failed job should TOUCH (not raise a fresh row). + eng.handleJobFinished(ctx, JobFinishedEvent{ + HostID: hostID, JobID: "j2", Kind: "backup", Status: "failed", + When: time.Now().UTC().Add(time.Minute), + }) + open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 1 { + t.Fatalf("expected dedup to stay at 1 open; got %d", len(open)) + } + + // Success auto-resolves. + eng.handleJobFinished(ctx, JobFinishedEvent{ + HostID: hostID, JobID: "j3", Kind: "backup", Status: "succeeded", + When: time.Now().UTC().Add(2 * time.Minute), + }) + open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 0 { + t.Fatalf("expected zero open after success; got %d", len(open)) + } +} + +func TestEngineCheckFailedSeverityCritical(t *testing.T) { + t.Parallel() + eng, st, hostID := setupEngine(t) + eng.handleJobFinished(context.Background(), JobFinishedEvent{ + HostID: hostID, Kind: "check", Status: "failed", When: time.Now().UTC(), + }) + open, _ := st.ListAlerts(context.Background(), + store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 1 || open[0].Severity != "critical" { + t.Fatalf("got %+v", open) + } +} + +func TestEngineAgentOfflineRespects15MinFloor(t *testing.T) { + t.Parallel() + eng, st, hostID := setupEngine(t) + // Host's last_seen_at defaulted to ~now via CreateHost. Force a + // stale value for the test by direct DB update. + if _, err := st.DB().Exec( + `UPDATE hosts SET last_seen_at = ? WHERE id = ?`, + time.Now().UTC().Add(-20*time.Minute).Format(time.RFC3339Nano), hostID, + ); err != nil { + t.Fatalf("update last_seen_at: %v", err) + } + eng.handleHostOffline(context.Background(), hostID) + open, _ := st.ListAlerts(context.Background(), + store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 1 { + t.Fatalf("expected agent_offline raised; got %d", len(open)) + } + + // Bring back online — should auto-resolve. + eng.handleHostOnline(context.Background(), hostID) + open, _ = st.ListAlerts(context.Background(), + store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 0 { + t.Fatalf("expected agent_offline resolved; got %d", len(open)) + } +} + +func TestEngineAgentOfflineUnderFloorNoRaise(t *testing.T) { + t.Parallel() + eng, st, hostID := setupEngine(t) + // last_seen_at defaulted to "now" by CreateHost, so the floor + // hasn't elapsed. handleHostOffline must skip the raise. + eng.handleHostOffline(context.Background(), hostID) + open, _ := st.ListAlerts(context.Background(), + store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 0 { + t.Fatalf("expected no raise within 15-min floor; got %d", len(open)) + } +} +``` + +- [ ] **Step 4: Run tests** + +```sh +go test ./internal/alert/ -count=1 -timeout=30s +``` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```sh +git add internal/alert/engine.go internal/alert/rules.go internal/alert/rules_test.go +git commit -m "alert: rule logic for the six v1 rules" +``` + +--- + +### Task C3: Wire the engine into MarkJobFinished + ws hello + offline sweep + +**Files:** +- Modify: `internal/server/ws/handler.go` (MarkJobFinished call site) +- Modify: `internal/server/ws/handler.go` (hello path) +- Modify: `cmd/server/main.go` (offline sweeper) + +The engine has zero impact unless wired. Three call sites: + +- [ ] **Step 1: Add Engine to ws.HandlerDeps** + +`internal/server/ws/handler.go` — extend `HandlerDeps`: + +```go +type HandlerDeps struct { + Hub *Hub + Store *store.Store + JobHub *JobHub + // NEW: + AlertEngine AlertNotifier // interface so ws doesn't import alert + // (existing fields…) +} + +// AlertNotifier is the slice of alert.Engine ws needs. Lives here so +// the ws package doesn't import the alert package (avoids a cycle if +// alert ever needs ws types). +type AlertNotifier interface { + NotifyJobFinished(alert.JobFinishedEvent) // dispatched after MarkJobFinished + NotifyHostOnline(hostID string) +} +``` + +> **Cycle warning:** the type signature there imports +> `internal/alert.JobFinishedEvent`. If that creates a cycle, define +> a local `JobFinishedEvent` in `internal/server/ws` and convert at +> the wire-up site in `cmd/server/main.go`. Verify with +> `go build ./...` after the edit. + +- [ ] **Step 2: Hook MarkJobFinished** + +In `dispatchAgentMessage`'s `case api.MsgJobFinished` block, after the +existing `MarkJobFinished` call + JobHub broadcast: + +```go + if deps.AlertEngine != nil { + deps.AlertEngine.NotifyJobFinished(alert.JobFinishedEvent{ + HostID: hostID, JobID: p.JobID, + Kind: string(/* lookup the job's kind */), + Status: string(p.Status), + When: p.FinishedAt, + }) + } +``` + +> **Subtlety:** the WS `JobFinishedPayload` doesn't carry the kind — +> the agent dispatched against a stored job, the kind is only in the +> DB. Fetch via `deps.Store.GetJob(ctx, p.JobID)` and use +> `job.Kind`. Cache lookups not necessary for v1 traffic. + +- [ ] **Step 3: Hook the hello path for HostOnline** + +In `runAgentLoop`, after `MarkHostHello` succeeds: + +```go + if deps.AlertEngine != nil { + deps.AlertEngine.NotifyHostOnline(hostID) + } +``` + +- [ ] **Step 4: Hook the offline sweeper** + +In `cmd/server/main.go` the `offlineTick` case currently calls +`MarkHostsOfflineStale` and logs the count. Replace with a version +that also notifies the engine for each newly-marked host: + +```go + case <-offlineTick.C: + cutoff := time.Now().Add(-90 * time.Second) + ids, err := st.MarkHostsOfflineStaleReturnIDs(ctx, cutoff) + if err == nil && len(ids) > 0 { + slog.Info("marked hosts offline (stale heartbeat)", "n", len(ids)) + for _, id := range ids { + engine.NotifyHostOffline(id) + } + } +``` + +`MarkHostsOfflineStaleReturnIDs` is a small new variant of the +existing `MarkHostsOfflineStale` that returns the list of host IDs +flipped. Add it in `internal/store/hosts.go`; trivial — wrap the +existing UPDATE with a preceding SELECT. + +- [ ] **Step 5: Build to verify the wiring compiles** + +```sh +go build ./... +``` +Expected: clean. If you hit an import cycle, push the +`AlertNotifier` interface trick all the way through. + +- [ ] **Step 6: Existing tests still pass** + +```sh +go test ./internal/server/ws/ ./internal/server/http/ -count=1 -timeout=120s +``` +Expected: PASS. + +- [ ] **Step 7: Commit** + +```sh +git add internal/server/ws/handler.go cmd/server/main.go internal/store/hosts.go +git commit -m "alert: wire engine into ws hello + MarkJobFinished + offline sweep" +``` + +--- + +## Slice D — HTTP routes for /alerts page + +### Task D1: GET /alerts list page + JSON variant + +**Files:** +- Create: `internal/server/http/ui_alerts.go` +- Test: `internal/server/http/ui_alerts_test.go` +- Modify: `internal/server/http/server.go` (route) + +- [ ] **Step 1: Page model + handler** + +`internal/server/http/ui_alerts.go`: + +```go +package http + +import ( + "encoding/json" + "log/slog" + stdhttp "net/http" + "strings" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +type alertsPage struct { + Filter store.AlertFilter + Alerts []store.Alert + Counts alertCounts + HostNames map[string]string // host_id → name for table rendering +} + +type alertCounts struct { + Open int + Acknowledged int + Resolved24h int +} + +// handleUIAlerts renders the alerts page with the chosen filters. +func (s *Server) handleUIAlerts(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + q := r.URL.Query() + f := store.AlertFilter{ + Status: q.Get("status"), + Severity: q.Get("severity"), + HostID: q.Get("host_id"), + Search: strings.TrimSpace(q.Get("q")), + Limit: 200, + } + if f.Status == "" { + f.Status = "open" + } + + alerts, err := s.deps.Store.ListAlerts(r.Context(), f) + if err != nil { + slog.Error("ui alerts: list", "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + + page := alertsPage{Filter: f, Alerts: alerts, HostNames: map[string]string{}} + if hosts, err := s.deps.Store.ListHosts(r.Context()); err == nil { + for _, h := range hosts { + page.HostNames[h.ID] = h.Name + } + } + page.Counts = computeAlertCounts(s, r) + + view := s.baseView(u) + view.Title = "Alerts · restic-manager" + view.Active = "alerts" + view.Page = page + if err := s.deps.UI.Render(w, "alerts", view); err != nil { + slog.Error("ui alerts: render", "err", err) + } +} + +func computeAlertCounts(s *Server, r *stdhttp.Request) alertCounts { + open, _ := s.deps.Store.ListAlerts(r.Context(), + store.AlertFilter{Status: "open"}) + acked, _ := s.deps.Store.ListAlerts(r.Context(), + store.AlertFilter{Status: "acknowledged"}) + cutoff := time.Now().UTC().Add(-24 * time.Hour) + all, _ := s.deps.Store.ListAlerts(r.Context(), + store.AlertFilter{Status: "resolved"}) + res := 0 + for _, a := range all { + if a.ResolvedAt != nil && a.ResolvedAt.After(cutoff) { + res++ + } + } + return alertCounts{Open: len(open), Acknowledged: len(acked), Resolved24h: res} +} + +// handleAPIAlerts is the JSON list — same filter shape. +func (s *Server) handleAPIAlerts(w stdhttp.ResponseWriter, r *stdhttp.Request) { + if _, ok := s.requireUser(r); !ok { + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") + return + } + q := r.URL.Query() + f := store.AlertFilter{ + Status: q.Get("status"), + Severity: q.Get("severity"), + HostID: q.Get("host_id"), + Search: strings.TrimSpace(q.Get("q")), + Limit: 200, + } + alerts, err := s.deps.Store.ListAlerts(r.Context(), f) + if err != nil { + writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "") + return + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(alerts) +} + +// handleUIAlertAcknowledge is POST /alerts/{id}/acknowledge. +func (s *Server) handleUIAlertAcknowledge(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + id := chi.URLParam(r, "id") + if id == "" { + stdhttp.Error(w, "missing id", stdhttp.StatusBadRequest) + return + } + if err := s.deps.Store.Acknowledge(r.Context(), id, u.ID, time.Now().UTC()); err != nil { + slog.Warn("ui alerts: ack", "err", err) + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), UserID: &u.ID, Actor: "user", + Action: "alert.acknowledge", + TargetKind: ptr("alert"), TargetID: &id, + TS: time.Now().UTC(), + }) + if r.Header.Get("HX-Request") == "true" { + w.Header().Set("HX-Redirect", "/alerts?"+r.URL.RawQuery) + w.WriteHeader(stdhttp.StatusNoContent) + return + } + stdhttp.Redirect(w, r, "/alerts", stdhttp.StatusSeeOther) +} + +// handleUIAlertResolve is POST /alerts/{id}/resolve. +func (s *Server) handleUIAlertResolve(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + id := chi.URLParam(r, "id") + if id == "" { + stdhttp.Error(w, "missing id", stdhttp.StatusBadRequest) + return + } + if err := s.deps.Store.Resolve(r.Context(), id, time.Now().UTC()); err != nil { + slog.Warn("ui alerts: resolve", "err", err) + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), UserID: &u.ID, Actor: "user", + Action: "alert.resolve", + TargetKind: ptr("alert"), TargetID: &id, + TS: time.Now().UTC(), + }) + if r.Header.Get("HX-Request") == "true" { + w.Header().Set("HX-Redirect", "/alerts?"+r.URL.RawQuery) + w.WriteHeader(stdhttp.StatusNoContent) + return + } + stdhttp.Redirect(w, r, "/alerts", stdhttp.StatusSeeOther) +} +``` + +(Imports include `github.com/go-chi/chi/v5`.) + +- [ ] **Step 2: Wire routes** + +In `internal/server/http/server.go`, inside the `if s.deps.UI != nil` block: + +```go + r.Get("/alerts", s.handleUIAlerts) + r.Post("/alerts/{id}/acknowledge", s.handleUIAlertAcknowledge) + r.Post("/alerts/{id}/resolve", s.handleUIAlertResolve) +``` + +And inside `r.Route("/api", ...)`: + +```go + r.Get("/alerts", s.handleAPIAlerts) +``` + +- [ ] **Step 3: Build to verify** + +```sh +go build ./... +``` +Expected: clean. (Template doesn't exist yet → handler will fail at +runtime, but build succeeds.) + +- [ ] **Step 4: Test (the page handler can't render without templates yet — write the test that drives it once templates land in slice F)** + +For now skip the rendering test; cover the JSON handler: + +`internal/server/http/ui_alerts_test.go`: + +```go +package http + +import ( + "context" + "encoding/json" + stdhttp "net/http" + "testing" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +func TestAPIAlertsListsOpen(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServer(t) + hostID, _ := enrolHostForWS(t, srv, st, "host-alerts") + _, _, _ = st.RaiseOrTouch(context.Background(), hostID, + "backup_failed", "warning", "x", time.Now().UTC()) + cookie := loginAsAdmin(t, st) + + req, _ := stdhttp.NewRequest("GET", ts.URL+"/api/alerts?status=open", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != 200 { + t.Fatalf("status: %d", res.StatusCode) + } + var got []store.Alert + if err := json.NewDecoder(res.Body).Decode(&got); err != nil { + t.Fatalf("decode: %v", err) + } + if len(got) != 1 || got[0].Kind != "backup_failed" { + t.Fatalf("got %+v", got) + } + _ = ulid.Make() // import keep +} +``` + +```sh +go test ./internal/server/http/ -run TestAPIAlertsListsOpen -count=1 -timeout=30s +``` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```sh +git add internal/server/http/ui_alerts.go internal/server/http/ui_alerts_test.go internal/server/http/server.go +git commit -m "http: /alerts list + ack/resolve handlers + /api/alerts JSON" +``` + +--- + +## Slice E — HTTP routes for /settings/notifications + +### Task E1: Channel CRUD handlers + +**Files:** +- Create: `internal/server/http/ui_notifications.go` +- Test: `internal/server/http/ui_notifications_test.go` +- Modify: `internal/server/http/server.go` + +- [ ] **Step 1: CRUD handlers** + +`internal/server/http/ui_notifications.go` — too long to inline here in +full. Mirror the shape of `ui_repo.go` (see existing). Required handlers: + +- `handleUISettings(w, r)` — render `settings` shell with the + Notifications sub-tab as the body. Pre-fetches channel list. +- `handleUINotificationsList(w, r)` — same as above; the page is + the same template, rendered with the Notifications sub-tab active. +- `handleUINotificationNewGet / Post` — render the kind picker + + empty form for the chosen kind; POST validates + AEAD-encrypts the + config blob via `s.deps.AEAD.Seal(rawJSON, []byte("notification-channel:"+id))`, + inserts via `Store.CreateNotificationChannel`, redirects. +- `handleUINotificationEditGet / Post` — pre-decrypts existing + config, renders the form with the operator's prior values + (passwords show "•••• stored, leave blank to keep" placeholder), + POST merges + re-encrypts. +- `handleUINotificationDelete` — typed-confirm name pattern (mirror + `ui_repo_reinit.go`) — operator types the channel name to confirm. +- `handleAPINotificationTest` — `POST /api/notifications/{id}/test` + builds a synthetic info-severity Payload + calls + `s.deps.NotificationHub.DispatchOne`, returns the resulting log + entry as JSON. + +Each kind's form parsing produces the per-kind config struct from +`internal/notification` (`WebhookConfig`, `NtfyConfig`, `SMTPConfig`), +JSON-marshals it, and feeds into `aead.Seal`. Validation: + +- name non-empty + ≤100 chars +- kind ∈ {webhook, ntfy, smtp} +- webhook: URL parses; if scheme is http/https +- ntfy: server_url parses; topic non-empty +- smtp: host non-empty; port 1..65535; encryption ∈ {starttls, tls, none}; + to + from look like RFC 5322 addresses (use `mail.ParseAddress`) + +On any validation failure, re-render the form with the operator's +input intact + an error banner (mirror P2-04's pattern). + +- [ ] **Step 2: Add routes** + +In `server.go`'s `if s.deps.UI != nil` block: + +```go + r.Get("/settings", s.handleUISettings) + r.Get("/settings/notifications", s.handleUINotificationsList) + r.Get("/settings/notifications/new", s.handleUINotificationNewGet) + r.Post("/settings/notifications/new", s.handleUINotificationNewPost) + r.Get("/settings/notifications/{id}/edit", s.handleUINotificationEditGet) + r.Post("/settings/notifications/{id}/edit", s.handleUINotificationEditPost) + r.Post("/settings/notifications/{id}/delete", s.handleUINotificationDelete) +``` + +And inside `r.Route("/api", ...)`: + +```go + r.Post("/notifications/{id}/test", s.handleAPINotificationTest) +``` + +- [ ] **Step 3: Test the test-notification path end-to-end** + +Mirror P3-X1's `cancel_test.go` shape: spin up a httptest server as +the webhook target, configure a channel, POST to the test endpoint, +assert the synthetic event landed at the sink + a `notification_log` +row with `event=alert.test, ok=1`. + +- [ ] **Step 4: Run tests + build** + +```sh +go build ./... +go test ./internal/server/http/ -count=1 -timeout=60s +``` + +- [ ] **Step 5: Commit** + +```sh +git add internal/server/http/ui_notifications.go internal/server/http/ui_notifications_test.go internal/server/http/server.go +git commit -m "http: /settings/notifications CRUD + test endpoint" +``` + +--- + +## Slice F — UI templates + +### Task F1: alerts.html + alert_row.html partial + nav badge + +**Files:** +- Create: `web/templates/pages/alerts.html` +- Create: `web/templates/partials/alert_row.html` +- Modify: `web/templates/partials/nav.html` +- Modify: `internal/server/ui/ui.go` (add to commonPaths) + +- [ ] **Step 1: Templates from the wireframe** + +Translate `_diag/p3-alerts-wireframe/wireframe.html` surface 1 into +real Go templates. The shape should match exactly: filter strip +(status / severity / host / search), alert-row grid with severity +border, dot, kind tag, host name, message, raised + last_seen, +ack/resolve actions, plus the empty state. + +Notes: +- Use the existing `relTime` template func. +- Render "still happening · Ns ago" when `last_seen_at` is < 60s ago. +- Form action for ack/resolve: `
` so HTMX bounces back via + HX-Redirect to the same filtered list. + +- [ ] **Step 2: nav.html badge** + +Add to nav.html: `{{if gt .OpenAlerts 0}}{{.OpenAlerts}}{{end}}` +inside the Alerts tab. Wire `view.OpenAlerts` from a quick `len(open)` +query in `s.baseView`. + +- [ ] **Step 3: Commit** + +```sh +git add web/templates/pages/alerts.html web/templates/partials/alert_row.html web/templates/partials/nav.html internal/server/ui/ui.go internal/server/http/ui_handlers.go +git commit -m "ui: alerts list page + alert row partial + nav badge" +``` + +--- + +### Task F2: settings.html + notifications.html + notification_edit.html + +**Files:** +- Create: `web/templates/pages/settings.html` +- Create: `web/templates/pages/notifications.html` +- Create: `web/templates/pages/notification_edit.html` +- Modify: `internal/server/ui/ui.go` (add the three pages) + +- [ ] **Step 1: Settings shell** + +`settings.html` is the page; it renders the sub-tab nav (Notifications +| Users | Authentication) and slots in the body. For v1 only +Notifications is wired; the other two render an inline "Lands later" +notice. + +- [ ] **Step 2: Notifications list + edit form** + +Translate wireframe surfaces 2, 3, 3b, 3c into real templates. Edit +form needs both kind variants visible — render the picker with the +operator's selected kind highlighted, and show only the matching +field set below (use `{{if eq .Channel.Kind "webhook"}}…{{end}}`). + +Right-rail payload preview is per-kind: webhook envelope JSON for +webhook, ntfy header shape for ntfy, RFC 5322 layout for smtp. + +- [ ] **Step 3: Send-test feedback** + +The "Send test notification" button should be an HTMX POST that +swaps a small result chip (`#test-result`) with the green ✓ / +red ✗ pill rendered server-side from the +`handleAPINotificationTest` JSON. Easiest: wrap in a +`
` +and have the test handler render a tiny inline partial. + +- [ ] **Step 4: Commit** + +```sh +git add web/templates/pages/settings.html web/templates/pages/notifications.html web/templates/pages/notification_edit.html internal/server/ui/ui.go +git commit -m "ui: /settings/notifications list + edit form (3 kinds)" +``` + +--- + +### Task F3: Crit banner partial + dashboard wiring + +**Files:** +- Create: `web/templates/partials/crit_banner.html` +- Modify: `web/templates/pages/dashboard.html` +- Modify: `internal/server/http/ui_handlers.go` (handleUIDashboard adds CritCount) + +- [ ] **Step 1: Banner partial** + +```html +{{define "crit_banner"}} +{{if gt .CritOpenCount 0}} +
+
+ + {{.CritOpenCount}} critical alert{{if ne .CritOpenCount 1}}s{{end}} open across the fleet +
+ Review → +
+{{end}} +{{end}} +``` + +- [ ] **Step 2: Dashboard handler** + +In `handleUIDashboard`, fetch the count + render at top of the +dashboard page. Mirror the existing pattern. + +- [ ] **Step 3: Add `crit_banner.html` to commonPaths** + +- [ ] **Step 4: Commit** + +```sh +git add web/templates/partials/crit_banner.html web/templates/pages/dashboard.html internal/server/http/ui_handlers.go internal/server/ui/ui.go +git commit -m "ui: dashboard crit-alerts banner" +``` + +--- + +## Slice G — Wire engine + hub into cmd/server + +### Task G1: Construct + start engine; expose to handlers + +**Files:** +- Modify: `cmd/server/main.go` +- Modify: `internal/server/http/server.go` (`Deps` gains `AlertEngine` + `NotificationHub`) +- Modify: `internal/server/ws/handler.go` (use `deps.AlertEngine`) + +- [ ] **Step 1: Boot wiring** + +In `cmd/server/main.go`, after creating the AEAD + store + Hub: + +```go + notifHub := notification.NewHub(st, aead, cfg.BaseURL) + engine := alert.NewEngine(st, notifHub) + // Run the engine until ctx is done. + go engine.Run(ctx) +``` + +Pass `engine` and `notifHub` into the HTTP `Deps` struct + ws +`HandlerDeps`. The notification.Hub and engine satisfy whatever +interfaces the slices below depend on. + +- [ ] **Step 2: Build to verify wiring compiles** + +```sh +go build ./... +``` + +- [ ] **Step 3: Integration smoke** + +Run an existing test that exercises the WS layer, and confirm a +`backup_failed` alert lands in the DB after `MarkJobFinished` is +called from the dispatcher. New test: extend +`internal/server/http/ui_alerts_test.go` to drive a job-failed event +through the WS round-trip and assert the alert exists. + +- [ ] **Step 4: Commit** + +```sh +git add cmd/server/main.go internal/server/http/server.go internal/server/ws/handler.go +git commit -m "alert: construct + run engine; expose hub to handlers" +``` + +--- + +## Slice H — Playwright sweep + tasks.md tick + +### Task H1: Live sweep against the smoke env + +**Files:** +- `_diag/p3-alerts-sweep/` — screenshots dropped here. + +- [ ] **Step 1: Restage the binaries (per CLAUDE.md)** + +```sh +make build +cp bin/restic-manager-agent /tmp/rm-smoke/data/agent-binaries/restic-manager-agent-linux-amd64 +sudo -n install -m 0755 bin/restic-manager-agent /usr/local/bin/restic-manager-agent +sudo -n systemctl restart restic-manager-agent +pkill -9 -f restic-manager-server +RM_LISTEN=:8080 RM_DATA_DIR=/tmp/rm-smoke/data RM_BASE_URL=http://127.0.0.1:8080 \ + RM_SECRET_KEY_FILE=/tmp/rm-smoke/data/secret.key RM_COOKIE_SECURE=false \ + ./bin/restic-manager-server >> /tmp/rm-smoke/server.log 2>&1 & +``` + +- [ ] **Step 2: Walk the sweep** + +Run the eleven-step Playwright sweep documented in the spec under +"Playwright sweep". Drop screenshots into `_diag/p3-alerts-sweep/`. +Local MailHog (Docker, ports 1025+8025) covers the SMTP step. + +- [ ] **Step 3: Fix anything that breaks** + +Common things to look for, mirroring the P3-restore sweep: + +- CSS tokens not defined in `web/styles/input.css` (e.g. anything + used in the templates that wasn't in the wireframe → add). +- Form-state preservation on validation re-render (operator typed + values lost). +- AEAD seal/open key mismatch on edit (use + `[]byte("notification-channel:"+id)` consistently). + +- [ ] **Step 4: Commit fixes as you find them** + +Small commits per category: "ui: …", "fix: …", etc. + +--- + +### Task H2: tasks.md tick + final commit + +**Files:** +- Modify: `tasks.md` + +- [ ] **Step 1: Mark P3-05/06/07 done** + +In the "Phase 3 — Alerts" section of `tasks.md`, tick the three +checkboxes and add an as-shipped block matching the P3-restore +pattern (rule set, channels, scope decisions, link to spec + +sweep screenshots). + +- [ ] **Step 2: Move "Phase 3 — Alerts" status from `(not started)` to ✅** + +- [ ] **Step 3: Commit** + +```sh +git add tasks.md +git commit -m "tasks: tick P3-05/06/07 (alerts sub-phase)" +``` + +--- + +## Self-Review + +**1. Spec coverage check.** Walked the spec section-by-section: + +- Decisions 1–10 mapped to tasks (engine cadence in C1+C2, dedup in + A3, notification shape in B2/B3/B4, channel scope = global covered + by the channel-list page rendering all channels regardless of host). +- Six rules each have a case in `handleJobFinished` (4 of them) + + `handleHostOffline`/`handleHostOnline` (1) + a tick branch (1 + declared, no-op for v1, called out). +- Three v1 channels each have their own task (B2/B3/B4) + Hub + fan-out (B5). +- Two migrations ship in A1 + A2. +- All routes from the spec's "Routes added" table are wired (D1, + E1, F1). +- Webhook payload shape matches the spec exactly. +- SMTP body assembly matches the spec exactly (subject pattern, + Message-ID right-hand-side, plain-text body shape). + +**2. Placeholder scan.** No "TBD" / "TODO" / "implement later" +in any task body. The stale-schedule sweep is intentionally a no-op +in v1 with a documented reason (the spec acknowledges this rule +needs a small store helper that's not blocking the rest); the tick +function still lists it explicitly. + +**3. Type consistency.** Method names checked across slices: +`RaiseOrTouch` (A3) is called from `raiseAndNotify` (C2); +`AutoResolve` (A3) from `resolveAndNotify` (C2); `ListAlerts` ++ `AlertFilter` shape consistent A3 ↔ D1; `notification.Hub.Dispatch` ++ `DispatchOne` consistent B5 ↔ C2 ↔ E1. + +**Plan complete.** + +--- + +Plan complete and saved to `docs/superpowers/plans/2026-05-04-p3-alerts.md`. Two execution options: + +**1. Subagent-Driven (recommended)** — I dispatch a fresh subagent per task, review between tasks, fast iteration + +**2. Inline Execution** — Execute tasks in this session using executing-plans, batch execution with checkpoints + +Which approach? diff --git a/docs/superpowers/specs/2026-05-04-p3-alerts-design.md b/docs/superpowers/specs/2026-05-04-p3-alerts-design.md new file mode 100644 index 0000000..21e1f61 --- /dev/null +++ b/docs/superpowers/specs/2026-05-04-p3-alerts-design.md @@ -0,0 +1,473 @@ +# P3 — Alerts (design) + +> Phase 3 sub-spec covering the alerts engine, notification channels, and UI +> (P3-05 / P3-06 / P3-07). +> +> Wireframe: `_diag/p3-alerts-wireframe/wireframe.html`. Screenshots in the +> same directory. Spec brainstorm ran 2026-05-04; user approved all ten +> design decisions before this spec was written. + +## Scope locked + +Brainstorm decisions (in order asked): + +1. **Rule model.** Hardcoded rule set, no operator-tunable thresholds in v1. + The engine knows about each rule type internally; per-rule config can land + later if/when an operator asks. +2. **Rule set.** Six rules: `backup_failed`, `forget_failed`, `prune_failed`, + `check_failed`, `stale_schedule`, `agent_offline`. +3. **Engine cadence.** Hybrid. Event hooks at the existing + `MarkJobFinished` and offline-sweeper sites for the immediate triggers; + one 60-second ticker handles stale-schedule detection and auto-resolution. +4. **Resolution.** Auto-resolve when the underlying condition clears + manual + Resolve at any time. Acknowledge is a separate "I've seen it" intermediate + state that does NOT close the alert. +5. **v1 channels.** Webhook + native ntfy + SMTP. Apprise deferred (the + channel plumbing accepts new kinds without reshaping). SMTP added as + a first-class channel post-brainstorm because the use case — overnight + alerts the operator wants to read in the morning rather than be pinged + on at 03:00 — is poorly served by ntfy's push model and clumsy via + webhook → email-gateway. +6. **Channel scope.** Global only. No per-host or per-severity routing in v1. +7. **Notification body.** Structured JSON for webhooks, formatted + title+body+click-URL for ntfy, plus a per-channel "Send test notification" + button with inline result feedback. +8. **Deduplication.** Open-alert uniqueness on `(host_id, kind)` with a + `last_seen_at` bump on every confirming tick. One notification per + occurrence; the UI shows "still happening · Ns ago" while a rule keeps + matching. +9. **Alert UI.** Top-level `/alerts` page (the existing nav stub becomes + real). Per-host vitals "Open alerts" cell links to `/alerts?host_id=...`. + Channel CRUD lives at `/settings/notifications`. +10. **Delivery semantics.** Best-effort fire-and-forget with a 5s timeout + per notification. Failures are logged but not retried. The alert row in + the DB is the source of truth. + +## Architecture + +The subsystem is three loosely-coupled units behind one `AlertEngine` +goroutine: + +``` + ┌───────────────────────────┐ + event hooks ─────────────────►│ │ + │ AlertEngine │ ──► raise/resolve + 60s ticker ──────────────────►│ (rule evaluation) │ alert row + │ │ + └────────────┬──────────────┘ + │ + ▼ + ┌──────────────────────┐ + │ notification.Hub │ + │ (fire-and-forget) │ + └──┬────────┬──────────┘ + │ │ + ┌──────▼──┐ ┌──▼──────┐ + │ Webhook │ │ Ntfy │ …future channels + └─────────┘ └─────────┘ +``` + +### Component boundaries + +| Component | Purpose | Depends on | +| ---------------------------------------- | ---------------------------------------------------------------------------------------- | -------------------------------------- | +| `internal/alert.Engine` | Owns the rule evaluation. Exposes `OnJobFinished`, `OnHostOffline`, `OnHostOnline` event hooks; runs a 60s ticker for stale-schedule + auto-resolution sweeps. Persists raises/resolves through the store. | store, notification.Hub, slog | +| `internal/alert.Rule` + per-rule files | Each of the six rules is a small struct with `Kind() string`, `Severity() string`, `MessageFor(ctx) string`. The engine iterates over a registered slice. | store models | +| `internal/notification.Hub` | Receives "alert raised/resolved/test" events; fans out to enabled channels in parallel; logs results to a new `notification_log` table. | store, channel adapters | +| `internal/notification.Channel` (iface) | Single method `Send(ctx, payload) error` with a 5s context for HTTP channels, 10s for SMTP. Three impls in v1: `webhookChannel`, `ntfyChannel`, `smtpChannel`. | http.Client; net/smtp + crypto/tls for SMTP | +| `internal/store/alerts.go` | CRUD on `alerts` table: `RaiseOrTouch(host_id, kind, severity, message)`, `Acknowledge(id, user)`, `Resolve(id, by user)`, `AutoResolve(host_id, kind)`, `ListAlerts(filter)`, plus the `last_seen_at` bump. | sqlite | +| `internal/store/notification_channels.go` | CRUD on `notification_channels` (new table) + `notification_log` (new table). | sqlite, crypto.AEAD (for secrets) | +| `internal/server/http/ui_alerts.go` | `/alerts` page handler + filter parsing + ack/resolve form actions. | store | +| `internal/server/http/ui_notifications.go` | `/settings/notifications` page + channel CRUD + "Send test" handler. | store, notification.Hub | + +### Engine event shape + +The engine runs as one goroutine per server process started in +`cmd/server/main.go`. It exposes a small set of channels other code writes to: + +```go +type Engine struct { + store *store.Store + hub *notification.Hub + + // Event channels (buffered, drop-on-full with a slog warning to keep + // hot paths non-blocking). The engine drains them on its own + // goroutine, evaluates the rule, and acts. + jobFinished chan jobFinishedEvent // from store.MarkJobFinished hook + hostOffline chan string // host_id; from offline sweeper + hostOnline chan string // host_id; from ws handler hello + + // 60s ticker drives stale-schedule + auto-resolution sweeps. + tick *time.Ticker +} +``` + +The hot-path call sites (`store.MarkJobFinished`, `ws.handler` offline +sweep, `ws.handler` hello) push to these channels via a tiny +`Engine.Notify*` method that does a non-blocking send. The engine's own +goroutine handles every match — keeps mutation off the hot path. + +### Rule catalogue + +| Kind | Severity | Trigger | Auto-resolve when | +| ------------------- | -------- | ----------------------------------------------------------------------- | -------------------------------------------------- | +| `backup_failed` | warning | `MarkJobFinished` with kind=backup, status=failed | next backup for the same host succeeds | +| `forget_failed` | warning | `MarkJobFinished` with kind=forget, status=failed | next forget for the same host succeeds | +| `prune_failed` | warning | `MarkJobFinished` with kind=prune, status=failed | next prune for the same host succeeds | +| `check_failed` | critical | `MarkJobFinished` with kind=check, status=failed OR errors_found | next check for the same host succeeds without errors | +| `stale_schedule` | warning | 60s ticker: a schedule's next-fire time is more than 5 minutes in the past with no matching job since | next job for that schedule succeeds OR schedule deleted | +| `agent_offline` | warning | offline-sweeper marks the host offline AND the host has been offline > 15 min (engine checks `last_seen_at`) | hostOnline event for that host | + +The 15-minute floor on `agent_offline` exists so a 30-second blip during +agent restart doesn't generate a notification storm. The store's existing +offline sweeper (`hosts.last_seen_at` with 90s threshold) already marks the +host offline; the engine sees the event but waits for the threshold before +raising. + +### Dedup + last_seen_at + +`store.RaiseOrTouch(host_id, kind, severity, message)`: + +```sql +SELECT id, last_seen_at FROM alerts + WHERE host_id = ? AND kind = ? AND resolved_at IS NULL + LIMIT 1; +``` + +- Found: `UPDATE alerts SET last_seen_at = ?, message = ? WHERE id = ?`, + return `(id, didRaise=false)`. +- Not found: `INSERT INTO alerts (id, host_id, kind, severity, message, + created_at, last_seen_at) VALUES (?, ?, ?, ?, ?, ?, ?)`, return + `(id, didRaise=true)`. + +The engine fires a notification through the Hub only when `didRaise=true`. +Touch-only events keep the row's `last_seen_at` fresh so the UI can render +"still happening · Ns ago" without spamming the operator's phone. + +### Notification payload shapes + +**Webhook** — a single JSON envelope per event: + +```json +{ + "event": "alert.raised", + "alert_id": "01KQT...", + "severity": "warning", + "kind": "backup_failed", + "host_id": "01KQ...", + "host_name": "alfa-01", + "message": "Backup 'system-config' failed: rest-server returned 401", + "raised_at": "2026-05-04T15:42:01Z", + "link": "https://restic-manager.example/alerts/01KQT..." +} +``` + +`event` is one of `alert.raised | alert.acknowledged | alert.resolved | +alert.test`. The same envelope shape is reused across events — operators +build one bridge, switch on `event` and `severity`. + +**SMTP** — single-recipient plain-text email per channel. The channel +config carries the SMTP server credentials and a `to` address; one +channel = one recipient (or one distribution-list address). Operators +who want multiple recipients add multiple channels — keeps the config +flat and the failure modes per-recipient. + +Subject pattern is hardcoded (no per-channel template in v1): + +``` +Subject: [restic-manager] [] : +From: +To: +Date: +Message-ID: > + + + +— +Raised at: 2026-05-04T15:42:01Z +Severity: warning +Host: alfa-01 +Kind: backup_failed + +Open in restic-manager: +https://restic-manager.example/alerts/01KQT... + +(This message was sent by restic-manager. Acknowledge or resolve in the UI.) +``` + +The body is plain text only in v1 — no HTML alternative — both because +the data is already structured well enough as text and because HTML +email opens a long tail of rendering / sanitisation concerns. The +`Message-ID` includes the alert id so a thread-aware client can group +related events (raised → acknowledged → resolved) together. + +Encryption: +- **STARTTLS** (default, port 587). Opportunistic upgrade. Most + operator-facing relays. +- **Implicit TLS** (port 465). Connect-then-TLS-handshake. +- **None** (port 25). Plain. Hidden behind a "Yes I understand" warning + on the form because the password goes over the wire. + +Auth: +- **PLAIN** (RFC 4616) over TLS. Default and almost always what's wanted. +- **CRAM-MD5** (RFC 2195). Offered if the server advertises it, no UI + toggle — automatic. +- No OAuth2 / XOAUTH2 in v1; that's a real next step if Gmail-without- + app-passwords becomes a recurring ask. + +Per-message timeout is 10s (vs 5s for HTTP channels) — STARTTLS +handshake + DATA over a slow link can legitimately take that long. + +**Ntfy** — uses the standard publish format: + +``` +POST / HTTP/1.1 +Host: +Authorization: Bearer (if configured) +Title: [warning] alfa-01 backup failed +Priority: 4 +Tags: warning,backup_failed +Click: https://restic-manager.example/alerts/01KQT... + +Backup 'system-config' failed: rest-server returned 401 +``` + +Severity → priority mapping: + +| Severity | Priority | +| --------- | -------- | +| info | 3 (default) | +| warning | 4 (high) | +| critical | 5 (urgent) | + +Per-channel `default_priority` setting overrides for non-critical alerts; +critical always goes urgent regardless. + +### Test notification + +`POST /api/notifications/{channel_id}/test` builds a synthetic event +(severity=info, kind=test_notification, message="Test from +restic-manager", link to the channel's edit page) and runs it through the +real send path. Returns `{ok: bool, latency_ms: int, status_code?: int, +error?: string}`. UI renders the green ✓ / red ✗ feedback inline. + +## Routes added + +| Method | Path | Purpose | +| ------- | ----------------------------------------------------- | ------------------------------------------------------------- | +| GET | `/alerts` | Fleet alerts list with filters (`?status=open&severity=warning&host_id=...&q=...`) | +| POST | `/alerts/{id}/acknowledge` | Mark alert acknowledged (HTMX form) | +| POST | `/alerts/{id}/resolve` | Manual resolve (HTMX form) | +| GET | `/settings/notifications` | Channel list page | +| GET | `/settings/notifications/new` | Channel kind picker + empty form | +| POST | `/settings/notifications/new` | Validate + create + redirect | +| GET | `/settings/notifications/{id}/edit` | Channel edit form | +| POST | `/settings/notifications/{id}/edit` | Validate + update | +| POST | `/settings/notifications/{id}/delete` | Delete channel (typed-confirm name in the form) | +| POST | `/api/notifications/{id}/test` | Fire test notification, return JSON result | +| GET | `/api/alerts` | JSON list (mirrors the UI filters) for future REST callers | + +## Data model + +### Migration 0013 — alerts.last_seen_at + +```sql +ALTER TABLE alerts ADD COLUMN last_seen_at TEXT; +UPDATE alerts SET last_seen_at = created_at WHERE last_seen_at IS NULL; +``` + +Existing alerts (currently zero in production — nothing writes them yet) +get `last_seen_at = created_at`. Column is nullable for forwards-compat +with rows from the alert-engine-pre-bump period. + +### Migration 0014 — notification_channels + notification_log + +```sql +CREATE TABLE notification_channels ( + id TEXT PRIMARY KEY, + kind TEXT NOT NULL CHECK (kind IN ('webhook', 'ntfy', 'smtp')), + name TEXT NOT NULL, + enabled INTEGER NOT NULL DEFAULT 1 CHECK (enabled IN (0, 1)), + config BLOB NOT NULL, -- AEAD-encrypted JSON; per-kind shape + default_priority TEXT, -- ntfy only; null for webhook + smtp + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + last_fired_at TEXT +); + +CREATE INDEX notification_channels_enabled ON notification_channels(enabled) WHERE enabled = 1; + +CREATE TABLE notification_log ( + id TEXT PRIMARY KEY, + channel_id TEXT NOT NULL REFERENCES notification_channels(id) ON DELETE CASCADE, + alert_id TEXT REFERENCES alerts(id) ON DELETE SET NULL, + event TEXT NOT NULL, -- alert.raised | alert.acknowledged | alert.resolved | alert.test + ok INTEGER NOT NULL CHECK (ok IN (0, 1)), + status_code INTEGER, + latency_ms INTEGER, + error TEXT, + fired_at TEXT NOT NULL +); + +CREATE INDEX notification_log_channel ON notification_log(channel_id, fired_at DESC); +CREATE INDEX notification_log_alert ON notification_log(alert_id); +``` + +`config` is an AEAD-encrypted JSON blob — bearer tokens for webhooks and +access tokens for ntfy live there. Per-kind config shapes: + +```go +type webhookConfig struct { + URL string `json:"url"` + BearerToken string `json:"bearer_token,omitempty"` + HeaderName string `json:"header_name,omitempty"` + HeaderValue string `json:"header_value,omitempty"` +} + +type ntfyConfig struct { + ServerURL string `json:"server_url"` // default https://ntfy.sh + Topic string `json:"topic"` + AccessToken string `json:"access_token,omitempty"` +} + +type smtpConfig struct { + Host string `json:"host"` // e.g. smtp.example.com + Port int `json:"port"` // default 587 (STARTTLS), 465 (TLS), 25 (none) + Encryption string `json:"encryption"` // "starttls" | "tls" | "none" + Username string `json:"username"` + Password string `json:"password"` // sensitive — AEAD-encrypted with the rest of config + From string `json:"from"` // RFC 5322 address; "alerts@example.com" or "Restic-Manager " + To string `json:"to"` // single recipient or distribution-list address; v1 = one channel = one to-line +} +``` + +### Engine state + +The engine itself is stateless beyond the channels it owns; all +persisted state is in the existing `alerts` table + the new +`notification_log` table. A process restart re-evaluates from scratch: +on next tick the stale-schedule + auto-resolution sweeps catch up with +whatever happened during the downtime. No outbox to drain. + +## UI templates + +| Template | Purpose | +| ----------------------------------------- | ------------------------------------------------------ | +| `web/templates/pages/alerts.html` | Fleet alerts page | +| `web/templates/partials/alert_row.html` | One alert row (used by both list and detail-fragment swap) | +| `web/templates/pages/settings.html` | Settings shell with Notifications / Users / Auth sub-tabs | +| `web/templates/pages/notifications.html` | Channel list (Notifications sub-tab body) | +| `web/templates/pages/notification_edit.html` | Channel kind picker + per-kind form + test button + payload preview | +| `web/templates/partials/crit_banner.html` | Dashboard top-of-page banner | +| `web/templates/partials/nav.html` | Existing — gain a `data-alerts-count` attribute on the Alerts tab so the badge auto-updates | + +The Settings shell + Notifications sub-tab is the new chrome the wireframe +introduced; Users + Authentication tabs are placeholder links that 404 in +v1 (or render an "Lands later" notice). Same pattern P2R-02 used for +inert sub-tabs. + +## Tests (target coverage) + +- `internal/alert/engine_test.go` — rule firing per kind: backup_failed + raises on `MarkJobFinished(kind=backup, status=failed)`; touch-only on + the second failure for the same host (no second notification); + auto-resolve on next success. +- `internal/alert/agent_offline_test.go` — `OnHostOffline` emits without + raising until the 15-min floor; `OnHostOnline` clears the alert. +- `internal/alert/stale_schedule_test.go` — synthetic schedule whose next + fire is in the past triggers; resets when a job lands. +- `internal/notification/webhook_test.go` — payload shape pinned; + authorisation header sent when bearer set; custom header echoed; 5s + timeout enforced; error in `notification_log`. +- `internal/notification/ntfy_test.go` — title/priority/tags/click headers + match the severity mapping; access token sent as `Authorization: Bearer + `; default priority overridden by severity for critical. +- `internal/notification/smtp_test.go` — round-trip against a local + `net/smtp.NewServer`-style fake (or `mhog`/MailHog if convenient): + STARTTLS handshake completes against a self-signed cert; PLAIN auth + uses configured creds; subject + from + to + body bytes match the + spec'd format; Message-ID contains the alert id; 10s timeout enforced; + failure path (auth refused) lands in `notification_log` with the + server's error string. +- `internal/server/http/ui_alerts_test.go` — page renders with filters + applied; ack/resolve POSTs flip the row + write audit; HX-Redirect + bounces back to the filtered list. +- `internal/server/http/ui_notifications_test.go` — CRUD happy paths, + validation re-render, secrets-encrypted-at-rest assertion (load row, + decrypt, compare), test-button hits the real send path against a + test http.Server. +- Migration 0013 + 0014 round-trip tested via `store.Open` on a fresh + db. + +## Playwright sweep + +End-of-phase sweep mirrors the P2R-02 / P3-restore pattern: + +1. Login → `/alerts` (initially empty) → see "All clear · last alert + never" empty state. +2. Trigger a fake-failed-backup via `POST /api/hosts/{id}/jobs` against a + host with a deliberately-wrong rest-server URL. Wait for the + `backup_failed` alert to appear in the list within ~2s of the job + finishing. +3. Acknowledge → row tints + ack actor visible. +4. Take the agent offline (`systemctl stop`); wait 15 min OR mock + `last_seen_at` to 16 min ago via the test harness; confirm + `agent_offline` alert raises once. +5. Restart the agent → `agent_offline` auto-resolves; `backup_failed` is + still open. +6. Configure a webhook channel pointing at a local test sink; click "Send + test" → green ✓. +7. Configure a ntfy channel pointing at a local sink → click "Send test" + → green ✓. +8. Configure an SMTP channel pointing at a local MailHog (Docker, port + 1025, no TLS for the local-only sweep) → click "Send test" → green ✓ + → MailHog UI at :8025 shows the test email with the right subject + and Message-ID. +9. Trigger a fresh failed backup → all three channels receive the + notification (verified from sink logs + MailHog inbox); + `notification_log` has three rows `event=alert.raised, ok=true`. +10. Manually Resolve the open `backup_failed`; confirm all three channels + receive `event=alert.resolved`. +11. Critical-severity test: trigger `check_failed` (mocked) → dashboard + banner appears; clicking it lands on `/alerts?severity=critical&status=open`. +12. Empty the alerts again → banner disappears. + +Screenshots into `_diag/p3-alerts-sweep/`. End-to-end clean, zero console +errors, before handing back. + +## What does NOT change + +- Existing chrome/templates beyond the small additions noted above. +- Existing `alerts.severity` CHECK (`info`/`warning`/`critical`) — already + the right shape; no migration needed for that. +- Audit log writer pattern — engine writes audit rows for ack/resolve + the same way every other state-changing handler does. +- The agent. Alerts are entirely a server concern; the agent doesn't + know they exist. + +## Open questions / explicit non-goals + +- **Per-rule cooldowns / re-raise on long-running issues.** Out of scope + (brainstorm question 8 ruled this out). Operators see "still happening" + in the UI; they don't get a reminder ping. +- **SMTP HTML emails.** v1 is plain text only — operators wanting rich + rendering can deploy a webhook → mail-merge bridge, or wait for a v2 + template engine. The Message-ID threading + plain text body should be + enough for almost every overnight-digest workflow. +- **SMTP OAuth2 / XOAUTH2.** Out of scope. Gmail / Microsoft 365 with + modern OAuth requires an `app password` workaround in v1. Native + XOAUTH2 lands when an operator asks (or when Google starts refusing + app passwords for non-business accounts in earnest). +- **Multi-recipient SMTP channels.** A channel = one `To`. Operators + wanting multiple recipients add multiple channels. Keeps failure + attribution per-recipient. +- **Apprise sidecar integration.** Deferred per brainstorm. The + `Channel` interface accepts a third impl without reshaping when we get + there. +- **Per-host or per-severity channel routing.** Out of scope. Likely + next step if operators ask: a `min_severity` field on the channel row. +- **Snooze / mute.** Out of scope. Acknowledge is the closest analogue; + full silence-windows would need a new table and is YAGNI for v1. +- **PagerDuty / OpsGenie.** Both have webhook receivers; operators wire + them via the webhook channel today. +- **Alert "rules" UI.** No CRUD; the rule set is hardcoded. diff --git a/internal/alert/engine.go b/internal/alert/engine.go new file mode 100644 index 0000000..2ef67db --- /dev/null +++ b/internal/alert/engine.go @@ -0,0 +1,205 @@ +// Package alert evaluates the hardcoded rule set and persists raises +// / acknowledges / resolves. Three event sources feed it: +// - JobFinishedEvent — pushed when a job lands a terminal state +// (the existing MarkJobFinished site) +// - HostOfflineEvent / HostOnlineEvent — pushed by the offline +// sweeper and by the ws hello handler +// - 60s ticker (internal) — drives stale-schedule + auto-resolve +// +// All output goes through store.RaiseOrTouch / Acknowledge / Resolve +// and the notification.Hub. The engine is one goroutine started at +// boot; non-blocking sends from hot paths. +package alert + +import ( + "context" + "fmt" + "log/slog" + "sync" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// JobFinishedEvent carries everything the engine needs to evaluate +// the failed-X rules. Pushed via Engine.NotifyJobFinished from the +// MarkJobFinished site. +type JobFinishedEvent struct { + HostID string + JobID string + Kind string // backup | forget | prune | check | unlock | restore | diff + Status string // succeeded | failed | cancelled + When time.Time +} + +// Engine evaluates hardcoded alert rules and dispatches via notification.Hub. +type Engine struct { + store *store.Store + hub *notification.Hub + + jobs chan JobFinishedEvent + hostDown chan string // host_id + hostUp chan string + + // agentOfflineFloor is the duration a host must be offline before + // we raise. Configurable for tests; default 15m. + agentOfflineFloor time.Duration + tickPeriod time.Duration + + closeOnce sync.Once + done chan struct{} +} + +// NewEngine builds the engine. agentOfflineFloor + tickPeriod default +// to 15min and 60s respectively when zero. +func NewEngine(st *store.Store, hub *notification.Hub) *Engine { + return &Engine{ + store: st, + hub: hub, + jobs: make(chan JobFinishedEvent, 32), + hostDown: make(chan string, 32), + hostUp: make(chan string, 32), + agentOfflineFloor: 15 * time.Minute, + tickPeriod: 60 * time.Second, + done: make(chan struct{}), + } +} + +// Run drives the event loop. Returns when ctx is done. Blocks; call in +// its own goroutine. +func (e *Engine) Run(ctx context.Context) { + t := time.NewTicker(e.tickPeriod) + defer t.Stop() + for { + select { + case <-ctx.Done(): + e.closeOnce.Do(func() { close(e.done) }) + return + case ev := <-e.jobs: + e.handleJobFinished(ctx, ev) + case hostID := <-e.hostDown: + e.handleHostOffline(ctx, hostID) + case hostID := <-e.hostUp: + e.handleHostOnline(ctx, hostID) + case now := <-t.C: + e.tick(ctx, now) + } + } +} + +// NotifyJobFinished is the hot-path hook called from MarkJobFinished's +// caller (ws.handler.dispatchAgentMessage). Non-blocking: drops on a +// full channel with a slog warning. +func (e *Engine) NotifyJobFinished(ev JobFinishedEvent) { + select { + case e.jobs <- ev: + default: + slog.Warn("alert: jobs channel full; dropping event", "kind", ev.Kind, "host_id", ev.HostID) + } +} + +// NotifyHostOffline notifies the engine that a host is offline. +func (e *Engine) NotifyHostOffline(hostID string) { + select { + case e.hostDown <- hostID: + default: + slog.Warn("alert: hostDown channel full; dropping", "host_id", hostID) + } +} + +// NotifyHostOnline notifies the engine that a host is online. +func (e *Engine) NotifyHostOnline(hostID string) { + select { + case e.hostUp <- hostID: + default: + slog.Warn("alert: hostUp channel full; dropping", "host_id", hostID) + } +} + +func (e *Engine) handleJobFinished(ctx context.Context, ev JobFinishedEvent) { + // Determine which kind/severity pair this job maps to. Jobs not + // listed here (init, unlock, restore, diff) produce no alerts in v1. + var kind, severity string + switch ev.Kind { + case "backup": + kind, severity = KindBackupFailed, "warning" + case "forget": + kind, severity = KindForgetFailed, "warning" + case "prune": + kind, severity = KindPruneFailed, "warning" + case "check": + kind, severity = KindCheckFailed, "critical" + default: + return + } + switch ev.Status { + case "failed": + e.raiseAndNotify(ctx, ev.HostID, kind, severity, + fmt.Sprintf("%s job %s failed", ev.Kind, ev.JobID), ev.When) + case "succeeded": + e.resolveAndNotify(ctx, ev.HostID, kind, ev.When) + } +} + +func (e *Engine) handleHostOffline(ctx context.Context, hostID string) { + host, err := e.store.GetHost(ctx, hostID) + if err != nil { + return + } + // Apply the 15-min floor — raise only when last_seen_at is older + // than agentOfflineFloor. A nil last_seen_at (host enrolled but + // never connected) is treated as "now" so we don't raise + // immediately on enrolment. + if host.LastSeenAt == nil { + return + } + if time.Since(*host.LastSeenAt) < e.agentOfflineFloor { + return + } + e.raiseAndNotify(ctx, hostID, KindAgentOffline, "warning", + fmt.Sprintf("Agent offline for %s (threshold %s)", + roundDur(time.Since(*host.LastSeenAt)), e.agentOfflineFloor), + time.Now().UTC()) +} + +func (e *Engine) handleHostOnline(ctx context.Context, hostID string) { + e.resolveAndNotify(ctx, hostID, KindAgentOffline, time.Now().UTC()) +} + +// tick is the 60-second sweep. Responsibilities: +// 1. Re-evaluate agent_offline for every offline host that may have +// crossed the floor between explicit events. +// 2. Stale-schedule detection — declared in the spec but intentionally +// left as a no-op in v1. The precise "expected to have fired but +// didn't" trigger requires a store helper that lands in a later +// task. The KindStaleSchedule constant is exported so UI code can +// reference the tag string today. +func (e *Engine) tick(ctx context.Context, now time.Time) { + hosts, err := e.store.ListHosts(ctx) + if err != nil { + slog.Warn("alert: tick list hosts", "err", err) + return + } + for _, h := range hosts { + if h.Status != "offline" || h.LastSeenAt == nil { + continue + } + if now.Sub(*h.LastSeenAt) >= e.agentOfflineFloor { + e.raiseAndNotify(ctx, h.ID, KindAgentOffline, "warning", + fmt.Sprintf("Agent offline for %s (threshold %s)", + roundDur(now.Sub(*h.LastSeenAt)), e.agentOfflineFloor), now) + } + } + // Stale-schedule sweep — no-op in v1. See KindStaleSchedule doc comment. +} + +// roundDur returns a human-readable duration string, rounding to the +// nearest minute. Durations under a minute are reported as "less than +// a minute". +func roundDur(d time.Duration) string { + if d < time.Minute { + return "less than a minute" + } + return d.Round(time.Minute).String() +} diff --git a/internal/alert/rules.go b/internal/alert/rules.go new file mode 100644 index 0000000..e55cfe7 --- /dev/null +++ b/internal/alert/rules.go @@ -0,0 +1,164 @@ +package alert + +import ( + "context" + "fmt" + "log/slog" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// Alert kind constants — keep in lockstep with the engine logic and +// the UI tag-colour table. +const ( + // KindBackupFailed is raised when a backup job finishes with + // status "failed" and resolved on next backup success. + KindBackupFailed = "backup_failed" + + // KindForgetFailed mirrors KindBackupFailed for forget jobs. + KindForgetFailed = "forget_failed" + + // KindPruneFailed mirrors KindBackupFailed for prune jobs. + KindPruneFailed = "prune_failed" + + // KindCheckFailed is raised at "critical" severity (repository + // integrity is at risk) when a check job fails. + KindCheckFailed = "check_failed" + + // KindStaleSchedule is declared for completeness but intentionally + // left as a no-op in v1. The precise "expected to have fired but + // didn't" logic requires a store helper that lands in a follow-up + // task. Ask the team before implementing. + KindStaleSchedule = "stale_schedule" + + // KindAgentOffline is raised when a host's last_seen_at is older + // than the 15-minute floor and resolved when the host reconnects. + KindAgentOffline = "agent_offline" +) + +// raiseAndNotify is the standard raise pattern: store.RaiseOrTouch +// deduplicates, and notification.Hub.Dispatch fires only on the first +// raise (didRaise=true). Subsequent occurrences of the same open alert +// are "touched" (last_seen_at bumped) without a second notification. +func (e *Engine) raiseAndNotify(ctx context.Context, hostID, kind, severity, message string, when time.Time) { + id, didRaise, err := e.store.RaiseOrTouch(ctx, hostID, kind, severity, message, when) + if err != nil { + slog.Warn("alert: raise", "kind", kind, "host_id", hostID, "err", err) + return + } + if !didRaise { + return + } + host, err := e.store.GetHost(ctx, hostID) + hostName := hostID + if err == nil { + hostName = host.Name + } + go e.hub.Dispatch(ctx, notification.Payload{ + Event: notification.EventRaised, + AlertID: id, + Severity: severity, + Kind: kind, + HostID: hostID, + HostName: hostName, + Message: message, + RaisedAt: when, + }) +} + +// Acknowledge updates the alert row and fans out alert.acknowledged to +// every enabled channel. Best-effort: store errors are logged but the +// dispatch still fires only when the store update succeeds. +func (e *Engine) Acknowledge(ctx context.Context, alertID, userID string, when time.Time) error { + if err := e.store.Acknowledge(ctx, alertID, userID, when); err != nil { + return err + } + a, lerr := e.store.GetAlert(ctx, alertID) + if lerr != nil || a == nil { + // Acknowledge already succeeded; dispatch is best-effort. + return nil //nolint:nilerr + } + p := alertPayload(ctx, e.store, notification.EventAcknowledged, a) + go e.hub.Dispatch(context.WithoutCancel(ctx), p) + return nil +} + +// Resolve marks the alert resolved and fans out alert.resolved. +func (e *Engine) Resolve(ctx context.Context, alertID string, when time.Time) error { + a, _ := e.store.GetAlert(ctx, alertID) + if err := e.store.Resolve(ctx, alertID, when); err != nil { + return err + } + if a == nil { + return nil + } + p := alertPayload(ctx, e.store, notification.EventResolved, a) + go e.hub.Dispatch(context.WithoutCancel(ctx), p) + return nil +} + +// alertPayload builds a Payload from a stored Alert, looking up the host +// name when HostID is set. +func alertPayload(ctx context.Context, st *store.Store, ev notification.Event, a *store.Alert) notification.Payload { + hostID, hostName := "", "" + if a.HostID != nil { + hostID = *a.HostID + hostName = hostID + if h, err := st.GetHost(ctx, hostID); err == nil && h != nil { + hostName = h.Name + } + } + return notification.Payload{ + Event: ev, + AlertID: a.ID, + Severity: a.Severity, + Kind: a.Kind, + HostID: hostID, + HostName: hostName, + Message: a.Message, + RaisedAt: a.CreatedAt, + } +} + +// resolveAndNotify clears every open (or acknowledged) alert for +// (host_id, kind) via store.AutoResolve, then fires alert.resolved +// for each row that was actually open. Best-effort — errors are +// logged but do not propagate. +func (e *Engine) resolveAndNotify(ctx context.Context, hostID, kind string, when time.Time) { + open, err := e.store.ListAlerts(ctx, store.AlertFilter{ + Status: "open", HostID: hostID, + }) + if err != nil { + return + } + openAcked, _ := e.store.ListAlerts(ctx, store.AlertFilter{ + Status: "acknowledged", HostID: hostID, + }) + all := append(open, openAcked...) + if err := e.store.AutoResolve(ctx, hostID, kind, when); err != nil { + slog.Warn("alert: auto-resolve", "kind", kind, "host_id", hostID, "err", err) + return + } + host, _ := e.store.GetHost(ctx, hostID) + hostName := hostID + if host != nil { + hostName = host.Name + } + for _, a := range all { + if a.Kind != kind { + continue + } + go e.hub.Dispatch(ctx, notification.Payload{ + Event: notification.EventResolved, + AlertID: a.ID, + Severity: a.Severity, + Kind: a.Kind, + HostID: hostID, + HostName: hostName, + Message: fmt.Sprintf("Auto-resolved (%s)", kind), + RaisedAt: when, + }) + } +} diff --git a/internal/alert/rules_test.go b/internal/alert/rules_test.go new file mode 100644 index 0000000..c8f9d32 --- /dev/null +++ b/internal/alert/rules_test.go @@ -0,0 +1,125 @@ +package alert + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +func setupEngine(t *testing.T) (*Engine, *store.Store, string) { + t.Helper() + dir := t.TempDir() + st, _ := store.Open(context.Background(), filepath.Join(dir, "rm.db")) + t.Cleanup(func() { _ = st.Close() }) + keyPath := filepath.Join(dir, "secret.key") + _ = crypto.GenerateKeyFile(keyPath) + key, _ := crypto.LoadKeyFromFile(keyPath) + aead, _ := crypto.NewAEAD(key) + hub := notification.NewHub(st, aead, "https://rm.example") + eng := NewEngine(st, hub) + hostID := ulid.Make().String() + if err := st.CreateHost(context.Background(), store.Host{ + ID: hostID, Name: "alfa-01", OS: "linux", Arch: "amd64", + EnrolledAt: time.Now().UTC(), + }, "deadbeef", ""); err != nil { + t.Fatalf("create host: %v", err) + } + return eng, st, hostID +} + +func TestEngineBackupFailedRaisesThenResolves(t *testing.T) { + t.Parallel() + eng, st, hostID := setupEngine(t) + ctx := context.Background() + + eng.handleJobFinished(ctx, JobFinishedEvent{ + HostID: hostID, JobID: "j1", Kind: "backup", Status: "failed", + When: time.Now().UTC(), + }) + open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 1 || open[0].Kind != KindBackupFailed { + t.Fatalf("expected one backup_failed open; got %+v", open) + } + + // Second failed job should TOUCH (not raise a fresh row). + eng.handleJobFinished(ctx, JobFinishedEvent{ + HostID: hostID, JobID: "j2", Kind: "backup", Status: "failed", + When: time.Now().UTC().Add(time.Minute), + }) + open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 1 { + t.Fatalf("expected dedup to stay at 1 open; got %d", len(open)) + } + + // Success auto-resolves. + eng.handleJobFinished(ctx, JobFinishedEvent{ + HostID: hostID, JobID: "j3", Kind: "backup", Status: "succeeded", + When: time.Now().UTC().Add(2 * time.Minute), + }) + open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 0 { + t.Fatalf("expected zero open after success; got %d", len(open)) + } +} + +func TestEngineCheckFailedSeverityCritical(t *testing.T) { + t.Parallel() + eng, st, hostID := setupEngine(t) + eng.handleJobFinished(context.Background(), JobFinishedEvent{ + HostID: hostID, Kind: "check", Status: "failed", When: time.Now().UTC(), + }) + open, _ := st.ListAlerts(context.Background(), + store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 1 || open[0].Severity != "critical" { + t.Fatalf("got %+v", open) + } +} + +func TestEngineAgentOfflineRespects15MinFloor(t *testing.T) { + t.Parallel() + eng, st, hostID := setupEngine(t) + // Host's last_seen_at defaulted to NULL via CreateHost (enrolled but never + // seen). Force a stale value for the test by direct DB update. + if _, err := st.DB().Exec( + `UPDATE hosts SET last_seen_at = ? WHERE id = ?`, + time.Now().UTC().Add(-20*time.Minute).Format(time.RFC3339Nano), hostID, + ); err != nil { + t.Fatalf("update last_seen_at: %v", err) + } + eng.handleHostOffline(context.Background(), hostID) + open, _ := st.ListAlerts(context.Background(), + store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 1 { + t.Fatalf("expected agent_offline raised; got %d", len(open)) + } + + // Bring back online — should auto-resolve. + eng.handleHostOnline(context.Background(), hostID) + open, _ = st.ListAlerts(context.Background(), + store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 0 { + t.Fatalf("expected agent_offline resolved; got %d", len(open)) + } +} + +func TestEngineAgentOfflineUnderFloorNoRaise(t *testing.T) { + t.Parallel() + eng, st, hostID := setupEngine(t) + // last_seen_at is NULL from CreateHost (never touched). A nil + // last_seen_at means the host was enrolled but never connected — + // treat that as "now" for the floor check so we don't raise + // immediately. handleHostOffline must skip the raise. + eng.handleHostOffline(context.Background(), hostID) + open, _ := st.ListAlerts(context.Background(), + store.AlertFilter{Status: "open", HostID: hostID}) + if len(open) != 0 { + t.Fatalf("expected no raise within 15-min floor; got %d", len(open)) + } +} diff --git a/internal/notification/channel.go b/internal/notification/channel.go new file mode 100644 index 0000000..b4ec257 --- /dev/null +++ b/internal/notification/channel.go @@ -0,0 +1,20 @@ +package notification + +import ( + "context" + "time" +) + +// Channel is the per-kind transport. Implementations live in +// webhook.go / ntfy.go / smtp.go. Send must respect ctx (5s for HTTP, +// 10s for SMTP) and never panic. +type Channel interface { + // Kind returns the kind string ("webhook", "ntfy", "smtp"). Used + // for log enrichment and dispatcher routing. + Kind() string + + // Send delivers one payload. Returns (statusCode, latency, err). + // statusCode is HTTP for HTTP channels, the SMTP final-line code + // (e.g. 250) for SMTP, 0 if the call didn't reach a wire response. + Send(ctx context.Context, p Payload) (statusCode int, latency time.Duration, err error) +} diff --git a/internal/notification/hub.go b/internal/notification/hub.go new file mode 100644 index 0000000..337b7f4 --- /dev/null +++ b/internal/notification/hub.go @@ -0,0 +1,187 @@ +package notification + +import ( + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "log/slog" + "sync" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// Hub fans Payload events out to every enabled channel and persists +// the result to notification_log. One Hub per process; thread-safe. +type Hub struct { + store *store.Store + aead *crypto.AEAD + baseURL string // e.g. https://restic-manager.example + msgIDDomain string // hostname extracted from baseURL for SMTP Message-ID +} + +// NewHub constructs a Hub. baseURL is the public root of the server +// (used to build /alerts/ links and the SMTP Message-ID domain). +func NewHub(st *store.Store, aead *crypto.AEAD, baseURL string) *Hub { + return &Hub{ + store: st, + aead: aead, + baseURL: baseURL, + msgIDDomain: extractDomain(baseURL), + } +} + +// Dispatch fans out to every enabled channel. Best-effort — failures +// are logged to notification_log but do not propagate to the caller. +// Each channel runs in its own goroutine; Dispatch returns only when +// all goroutines have settled, so the caller can block briefly for +// the test-button case. +func (h *Hub) Dispatch(ctx context.Context, p Payload) { + chans, err := h.store.ListEnabledNotificationChannels(ctx) + if err != nil { + slog.Error("notification: list channels", "err", err) + return + } + // Stamp the alert link if the caller left it empty. + if p.Link == "" { + p.Link = h.baseURL + "/alerts/" + p.AlertID + } + + var wg sync.WaitGroup + for _, c := range chans { + wg.Add(1) + go func(c store.NotificationChannel) { + defer wg.Done() + h.send(ctx, c, p) + }(c) + } + wg.Wait() +} + +// DispatchOne fires a single channel — used by the "Send test +// notification" button. Returns the log entry that was persisted so +// the handler can render the result inline. +func (h *Hub) DispatchOne(ctx context.Context, channelID string, p Payload) (store.NotificationLogEntry, error) { + c, err := h.store.GetNotificationChannel(ctx, channelID) + if err != nil { + return store.NotificationLogEntry{}, err + } + if p.Link == "" { + p.Link = h.baseURL + "/alerts/" + p.AlertID + } + return h.send(ctx, *c, p), nil +} + +// send builds the channel impl, delivers the payload, and persists a +// notification_log row regardless of success or failure. +func (h *Hub) send(ctx context.Context, c store.NotificationChannel, p Payload) store.NotificationLogEntry { + ch, buildErr := h.buildChannel(c) + logEntry := store.NotificationLogEntry{ + ID: newID(), + ChannelID: c.ID, + Event: string(p.Event), + FiredAt: time.Now().UTC(), + } + if p.AlertID != "" { + aid := p.AlertID + logEntry.AlertID = &aid + } + if buildErr != nil { + errStr := buildErr.Error() + logEntry.OK = false + logEntry.Error = &errStr + _ = h.store.AppendNotificationLog(ctx, logEntry) + return logEntry + } + + code, latency, sendErr := ch.Send(ctx, p) + statusCode := code + latencyMS := int(latency.Milliseconds()) + logEntry.StatusCode = &statusCode + logEntry.LatencyMS = &latencyMS + if sendErr != nil { + errStr := sendErr.Error() + logEntry.OK = false + logEntry.Error = &errStr + } else { + logEntry.OK = true + } + if err := h.store.AppendNotificationLog(ctx, logEntry); err != nil { + slog.Warn("notification: persist log", "err", err) + } + return logEntry +} + +// buildChannel decrypts the channel config and returns a concrete +// Channel implementation for the channel's kind. +func (h *Hub) buildChannel(row store.NotificationChannel) (Channel, error) { + plain, err := h.aead.Decrypt(string(row.Config), []byte("notification-channel:"+row.ID)) + if err != nil { + return nil, err + } + switch row.Kind { + case "webhook": + var cfg WebhookConfig + if err := json.Unmarshal(plain, &cfg); err != nil { + return nil, err + } + return NewWebhookChannel(cfg), nil + case "ntfy": + var cfg NtfyConfig + if err := json.Unmarshal(plain, &cfg); err != nil { + return nil, err + } + dp := "" + if row.DefaultPriority != nil { + dp = *row.DefaultPriority + } + return NewNtfyChannel(cfg, dp), nil + case "smtp": + var cfg SMTPConfig + if err := json.Unmarshal(plain, &cfg); err != nil { + return nil, err + } + return NewSMTPChannel(cfg, h.msgIDDomain), nil + } + return nil, errUnknownKind(row.Kind) +} + +// newID returns a 32-hex-char random identifier for notification_log rows. +func newID() string { + var b [16]byte + _, _ = rand.Read(b[:]) + return hex.EncodeToString(b[:]) +} + +// extractDomain strips the scheme and path from baseURL, leaving only +// the host[:port] component. Used as the right-hand side of SMTP +// Message-IDs. +func extractDomain(baseURL string) string { + s := baseURL + if i := indexOf(s, "://"); i >= 0 { + s = s[i+3:] + } + if i := indexOf(s, "/"); i >= 0 { + s = s[:i] + } + if s == "" { + return "restic-manager.local" + } + return s +} + +// indexOf returns the index of the first occurrence of sub in s, or -1. +func indexOf(s, sub string) int { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return i + } + } + return -1 +} + +type errUnknownKind string + +func (e errUnknownKind) Error() string { return "notification: unknown kind: " + string(e) } diff --git a/internal/notification/hub_test.go b/internal/notification/hub_test.go new file mode 100644 index 0000000..89a2389 --- /dev/null +++ b/internal/notification/hub_test.go @@ -0,0 +1,99 @@ +package notification + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "path/filepath" + "testing" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +func setupHub(t *testing.T) (*Hub, *store.Store) { + t.Helper() + dir := t.TempDir() + st, err := store.Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("store: %v", err) + } + t.Cleanup(func() { _ = st.Close() }) + keyPath := filepath.Join(dir, "secret.key") + _ = crypto.GenerateKeyFile(keyPath) + key, _ := crypto.LoadKeyFromFile(keyPath) + aead, _ := crypto.NewAEAD(key) + return NewHub(st, aead, "https://rm.example"), st +} + +func TestHubDispatchRecordsLogEntries(t *testing.T) { + t.Parallel() + hub, st := setupHub(t) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(200) + })) + defer srv.Close() + + cfg, _ := json.Marshal(WebhookConfig{URL: srv.URL}) + enc, err := hub.aead.Encrypt(cfg, []byte("notification-channel:test-ch")) + if err != nil { + t.Fatalf("encrypt: %v", err) + } + if err := st.CreateNotificationChannel(context.Background(), store.NotificationChannel{ + ID: "test-ch", Kind: "webhook", Name: "test", Enabled: true, + Config: []byte(enc), CreatedAt: time.Now().UTC(), UpdatedAt: time.Now().UTC(), + }); err != nil { + t.Fatalf("create channel: %v", err) + } + + hub.Dispatch(context.Background(), Payload{ + Event: EventRaised, + Severity: "warning", + Kind: "backup_failed", + HostName: "alfa-01", + Message: "x", + RaisedAt: time.Now().UTC(), + }) + + // Verify a log row landed with ok=1. + var n int + if err := st.DB().QueryRow( + `SELECT COUNT(*) FROM notification_log WHERE channel_id = ? AND ok = 1`, "test-ch", + ).Scan(&n); err != nil { + t.Fatalf("count: %v", err) + } + if n != 1 { + t.Fatalf("expected 1 log row, got %d", n) + } +} + +func TestHubSkipsDisabledChannels(t *testing.T) { + t.Parallel() + hub, st := setupHub(t) + + cfg, _ := json.Marshal(WebhookConfig{URL: "http://no-such-host.invalid"}) + enc, _ := hub.aead.Encrypt(cfg, []byte("notification-channel:dis")) + _ = st.CreateNotificationChannel(context.Background(), store.NotificationChannel{ + ID: "dis", Kind: "webhook", Name: "off", Enabled: false, + Config: []byte(enc), CreatedAt: time.Now().UTC(), UpdatedAt: time.Now().UTC(), + }) + + hub.Dispatch(context.Background(), Payload{ + Event: EventRaised, + AlertID: "x", + Severity: "warning", + Kind: "backup_failed", + HostName: "h", + Message: "m", + RaisedAt: time.Now().UTC(), + }) + + var n int + _ = st.DB().QueryRow(`SELECT COUNT(*) FROM notification_log`).Scan(&n) + if n != 0 { + t.Errorf("disabled channel produced log rows: %d", n) + } +} diff --git a/internal/notification/ntfy.go b/internal/notification/ntfy.go new file mode 100644 index 0000000..a7692a1 --- /dev/null +++ b/internal/notification/ntfy.go @@ -0,0 +1,115 @@ +package notification + +import ( + "bytes" + "context" + "encoding/base64" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +// NtfyConfig is the per-channel JSON shape stored AEAD-encrypted in +// notification_channels.config. AccessToken takes precedence over +// (Username, Password) when both are set; supply one or the other for +// self-hosted ntfy that requires auth. +type NtfyConfig struct { + ServerURL string `json:"server_url"` + Topic string `json:"topic"` + AccessToken string `json:"access_token,omitempty"` + Username string `json:"username,omitempty"` + Password string `json:"password,omitempty"` +} + +// NtfyChannel delivers alerts to an ntfy server using POST with +// ntfy-specific headers (Title, Priority, Tags, Click). One instance +// per configured channel row. Reused across sends — http.Client is +// goroutine-safe. +type NtfyChannel struct { + cfg NtfyConfig + defaultPriority string // "min"/"low"/"default"/"high"/"urgent" or "" + client *http.Client +} + +// NewNtfyChannel builds an ntfy channel with a 5s http.Client timeout. +// defaultPriority is the channel-configured fallback when no +// severity-specific mapping applies; pass "" to use the built-in +// fallbacks (4 for warning, 3 for everything else). +func NewNtfyChannel(cfg NtfyConfig, defaultPriority string) *NtfyChannel { + if cfg.ServerURL == "" { + cfg.ServerURL = "https://ntfy.sh" + } + return &NtfyChannel{ + cfg: cfg, + defaultPriority: defaultPriority, + client: &http.Client{Timeout: 5 * time.Second}, + } +} + +// Kind returns "ntfy" for log enrichment and dispatcher routing. +func (c *NtfyChannel) Kind() string { return "ntfy" } + +// Send delivers the payload as a plain-text POST to / +// with ntfy headers. Returns (statusCode, latency, err). 4xx/5xx +// responses are returned as errors with the status code set. +func (c *NtfyChannel) Send(ctx context.Context, p Payload) (int, time.Duration, error) { + server := strings.TrimRight(c.cfg.ServerURL, "/") + url := server + "/" + c.cfg.Topic + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBufferString(p.Message)) + if err != nil { + return 0, 0, fmt.Errorf("ntfy: build request: %w", err) + } + + req.Header.Set("Content-Type", "text/plain") + req.Header.Set("Title", fmt.Sprintf("[%s] %s %s", p.Severity, p.HostName, p.Kind)) + req.Header.Set("Tags", p.Severity+","+p.Kind) + req.Header.Set("Priority", priorityForSeverity(p.Severity, c.defaultPriority)) + if p.Link != "" { + req.Header.Set("Click", p.Link) + } + switch { + case c.cfg.AccessToken != "": + req.Header.Set("Authorization", "Bearer "+c.cfg.AccessToken) + case c.cfg.Username != "": + creds := c.cfg.Username + ":" + c.cfg.Password + req.Header.Set("Authorization", "Basic "+base64.StdEncoding.EncodeToString([]byte(creds))) + } + + t0 := time.Now() + res, err := c.client.Do(req) + latency := time.Since(t0) + if err != nil { + return 0, latency, fmt.Errorf("ntfy: do: %w", err) + } + defer func() { _ = res.Body.Close() }() + // Drain body to keep the connection reusable. + _, _ = io.Copy(io.Discard, res.Body) + if res.StatusCode >= 400 { + return res.StatusCode, latency, fmt.Errorf("ntfy: http %d", res.StatusCode) + } + return res.StatusCode, latency, nil +} + +// priorityForSeverity maps a severity string to an ntfy numeric priority +// string. critical always returns "5" regardless of defaultPri. For +// other severities, defaultPri is returned when non-empty, otherwise +// "4" for warning and "3" for everything else. +func priorityForSeverity(severity, defaultPri string) string { + switch severity { + case "critical": + return "5" + case "warning": + if defaultPri != "" { + return defaultPri + } + return "4" + default: + if defaultPri != "" { + return defaultPri + } + return "3" + } +} diff --git a/internal/notification/ntfy_test.go b/internal/notification/ntfy_test.go new file mode 100644 index 0000000..7aa2a0b --- /dev/null +++ b/internal/notification/ntfy_test.go @@ -0,0 +1,97 @@ +package notification + +import ( + "io" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestNtfySendsHeadersAndBody(t *testing.T) { + t.Parallel() + + var ( + gotTitle string + gotPri string + gotTags string + gotClick string + gotAuth string + gotContentType string + gotBody string + ) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotTitle = r.Header.Get("Title") + gotPri = r.Header.Get("Priority") + gotTags = r.Header.Get("Tags") + gotClick = r.Header.Get("Click") + gotAuth = r.Header.Get("Authorization") + gotContentType = r.Header.Get("Content-Type") + b, _ := io.ReadAll(r.Body) + gotBody = string(b) + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + cfg := NtfyConfig{ + ServerURL: srv.URL, + Topic: "alerts", + AccessToken: "tk1", + } + ch := NewNtfyChannel(cfg, "") // no default priority; critical must still be "5" + + p := Payload{ + Event: EventRaised, + AlertID: "01HZ", + Severity: "critical", + Kind: "check_failed", + HostName: "alfa-01", + Message: "errors found", + RaisedAt: time.Now(), + Link: "https://rm.example/a", + } + + code, _, err := ch.Send(t.Context(), p) + if err != nil { + t.Fatalf("Send: %v", err) + } + if code != http.StatusOK { + t.Fatalf("want 200, got %d", code) + } + + if want := "[critical] alfa-01 check_failed"; gotTitle != want { + t.Errorf("Title: got %q want %q", gotTitle, want) + } + if gotPri != "5" { + t.Errorf("Priority: got %q want \"5\"", gotPri) + } + if want := "critical,check_failed"; gotTags != want { + t.Errorf("Tags: got %q want %q", gotTags, want) + } + if gotClick != "https://rm.example/a" { + t.Errorf("Click: got %q want %q", gotClick, "https://rm.example/a") + } + if want := "Bearer tk1"; gotAuth != want { + t.Errorf("Authorization: got %q want %q", gotAuth, want) + } + if gotContentType != "text/plain" { + t.Errorf("Content-Type: got %q want %q", gotContentType, "text/plain") + } + if gotBody != "errors found" { + t.Errorf("body: got %q want %q", gotBody, "errors found") + } +} + +func TestNtfyDefaultPriorityRespected(t *testing.T) { + t.Parallel() + + // info + defaultPri="min" → "min" + if got := priorityForSeverity("info", "min"); got != "min" { + t.Errorf("info+min: got %q want \"min\"", got) + } + // critical → "5" regardless of default + if got := priorityForSeverity("critical", "min"); got != "5" { + t.Errorf("critical+min: got %q want \"5\"", got) + } +} diff --git a/internal/notification/payload.go b/internal/notification/payload.go new file mode 100644 index 0000000..15c96c0 --- /dev/null +++ b/internal/notification/payload.go @@ -0,0 +1,36 @@ +// Package notification owns the fan-out of alert events to operator- +// configured channels. Three channels in v1: webhook, ntfy, smtp. +// Each channel implements Channel.Send for one Payload at a time; +// the Hub orchestrates fan-out, persists to notification_log. +package notification + +import "time" + +// Event identifies the lifecycle hook this notification is for. +type Event string + +const ( + // EventRaised occurs when an alert is first raised. + EventRaised Event = "alert.raised" + // EventAcknowledged occurs when an alert is acknowledged. + EventAcknowledged Event = "alert.acknowledged" + // EventResolved occurs when an alert is resolved. + EventResolved Event = "alert.resolved" + // EventTest is used for test notifications. + EventTest Event = "alert.test" +) + +// Payload is the per-event blob every channel renders into its own +// shape. Severity maps to channel-specific priority (ntfy) or stays +// in the body (webhook/smtp). +type Payload struct { + Event Event // alert.raised | … | alert.test + AlertID string // ULID + Severity string // info | warning | critical + Kind string // backup_failed | … + HostID string + HostName string + Message string + RaisedAt time.Time + Link string // Absolute URL to /alerts/; built by Hub +} diff --git a/internal/notification/smtp.go b/internal/notification/smtp.go new file mode 100644 index 0000000..296bfdf --- /dev/null +++ b/internal/notification/smtp.go @@ -0,0 +1,140 @@ +package notification + +import ( + "context" + "crypto/tls" + "fmt" + "net" + "net/smtp" + "strings" + "time" +) + +// SMTPConfig holds the configuration for an SMTP notification channel. +type SMTPConfig struct { + Host string `json:"host"` + Port int `json:"port"` + Encryption string `json:"encryption"` // "starttls" | "tls" | "none" + Username string `json:"username"` + Password string `json:"password"` + From string `json:"from"` + To string `json:"to"` +} + +// SMTPChannel delivers alert notifications via plain-text email. +type SMTPChannel struct { + cfg SMTPConfig + // messageIDDomain holds the public base hostname of restic-manager so + // Message-IDs include a stable right-hand-side. Falls back to + // "restic-manager.local" when unset. + messageIDDomain string +} + +// NewSMTPChannel builds an SMTP channel. messageIDDomain comes from +// cfg.Cfg.BaseURL — caller passes it through. +func NewSMTPChannel(cfg SMTPConfig, messageIDDomain string) *SMTPChannel { + if messageIDDomain == "" { + messageIDDomain = "restic-manager.local" + } + return &SMTPChannel{cfg: cfg, messageIDDomain: messageIDDomain} +} + +// Kind returns "smtp". +func (c *SMTPChannel) Kind() string { return "smtp" } + +// Send delivers the payload as a plain-text email via SMTP. +// Returns (250, latency, nil) on success. +func (c *SMTPChannel) Send(ctx context.Context, p Payload) (int, time.Duration, error) { + t0 := time.Now() + addr := fmt.Sprintf("%s:%d", c.cfg.Host, c.cfg.Port) + + // Dial respects ctx (we use net.Dialer). + dialer := &net.Dialer{Timeout: 10 * time.Second} + rawConn, err := dialer.DialContext(ctx, "tcp", addr) + if err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: dial %s: %w", addr, err) + } + + var client *smtp.Client + switch strings.ToLower(c.cfg.Encryption) { + case "tls": + conn := tls.Client(rawConn, &tls.Config{ServerName: c.cfg.Host, MinVersion: tls.VersionTLS12}) + client, err = smtp.NewClient(conn, c.cfg.Host) + case "starttls", "": + client, err = smtp.NewClient(rawConn, c.cfg.Host) + if err == nil { + err = client.StartTLS(&tls.Config{ServerName: c.cfg.Host, MinVersion: tls.VersionTLS12}) + } + case "none": + client, err = smtp.NewClient(rawConn, c.cfg.Host) + default: + _ = rawConn.Close() + return 0, time.Since(t0), fmt.Errorf("smtp: unknown encryption %q", c.cfg.Encryption) + } + if err != nil { + _ = rawConn.Close() + return 0, time.Since(t0), fmt.Errorf("smtp: handshake: %w", err) + } + defer func() { _ = client.Quit() }() + + if c.cfg.Username != "" { + auth := smtp.PlainAuth("", c.cfg.Username, c.cfg.Password, c.cfg.Host) + if err := client.Auth(auth); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: auth: %w", err) + } + } + + if err := client.Mail(extractAddr(c.cfg.From)); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: MAIL FROM: %w", err) + } + if err := client.Rcpt(c.cfg.To); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: RCPT TO: %w", err) + } + wc, err := client.Data() + if err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: DATA: %w", err) + } + msg := buildEmailBody(c.cfg, c.messageIDDomain, p) + if _, err := wc.Write(msg); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: write: %w", err) + } + if err := wc.Close(); err != nil { + return 0, time.Since(t0), fmt.Errorf("smtp: close DATA: %w", err) + } + + return 250, time.Since(t0), nil +} + +// extractAddr pulls the bare email out of a "Name " form. +func extractAddr(s string) string { + if i, j := strings.LastIndex(s, "<"), strings.LastIndex(s, ">"); i >= 0 && j > i { + return s[i+1 : j] + } + return s +} + +// buildEmailBody assembles the RFC 5322 message bytes per the spec. +// Plain text only; subject hardcoded. +func buildEmailBody(cfg SMTPConfig, msgIDDomain string, p Payload) []byte { + var b strings.Builder + b.WriteString("From: " + cfg.From + "\r\n") + b.WriteString("To: " + cfg.To + "\r\n") + b.WriteString(fmt.Sprintf("Subject: [restic-manager] [%s] %s: %s\r\n", p.Severity, p.HostName, p.Kind)) + b.WriteString("Date: " + p.RaisedAt.UTC().Format(time.RFC1123Z) + "\r\n") + b.WriteString("Message-ID: <" + p.AlertID + "@" + msgIDDomain + ">\r\n") + b.WriteString("MIME-Version: 1.0\r\n") + b.WriteString("Content-Type: text/plain; charset=utf-8\r\n") + b.WriteString("\r\n") + b.WriteString(p.Message + "\r\n\r\n") + b.WriteString("—\r\n") + b.WriteString("Raised at: " + p.RaisedAt.UTC().Format(time.RFC3339) + "\r\n") + b.WriteString("Severity: " + p.Severity + "\r\n") + b.WriteString("Host: " + p.HostName + "\r\n") + b.WriteString("Kind: " + p.Kind + "\r\n") + if p.Link != "" { + b.WriteString("\r\nOpen in restic-manager:\r\n") + b.WriteString(p.Link + "\r\n") + } + b.WriteString("\r\n(This message was sent by restic-manager. Acknowledge or resolve in the UI.)\r\n") + return []byte(b.String()) +} diff --git a/internal/notification/smtp_test.go b/internal/notification/smtp_test.go new file mode 100644 index 0000000..b3d3e06 --- /dev/null +++ b/internal/notification/smtp_test.go @@ -0,0 +1,154 @@ +package notification + +import ( + "context" + "net" + "strings" + "sync" + "testing" + "time" +) + +// fakeSMTPServer accepts a single connection, runs the minimal SMTP +// dialogue (HELO/EHLO, MAIL FROM, RCPT TO, DATA, QUIT) and stores +// what came across the wire. Plain (no TLS) — we test the protocol +// shape, not crypto. +type fakeSMTPServer struct { + mu sync.Mutex + mailFrom string + rcptTo string + data string + authed bool +} + +func startFakeSMTP(t *testing.T) (string, *fakeSMTPServer) { + t.Helper() + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("listen: %v", err) + } + srv := &fakeSMTPServer{} + t.Cleanup(func() { _ = ln.Close() }) + go func() { + conn, err := ln.Accept() + if err != nil { + return + } + defer func() { _ = conn.Close() }() + readLine := func() string { + buf := make([]byte, 1024) + n, err := conn.Read(buf) + if err != nil { + return "" + } + return string(buf[:n]) + } + write := func(s string) { _, _ = conn.Write([]byte(s)) } + + write("220 fake.smtp ESMTP\r\n") + for { + line := readLine() + if line == "" { + return + } + cmd := strings.ToUpper(strings.TrimSpace(line)) + switch { + case strings.HasPrefix(cmd, "EHLO"), strings.HasPrefix(cmd, "HELO"): + write("250-fake.smtp\r\n250 AUTH PLAIN\r\n") + case strings.HasPrefix(cmd, "AUTH "): + srv.mu.Lock() + srv.authed = true + srv.mu.Unlock() + write("235 OK\r\n") + case strings.HasPrefix(cmd, "MAIL FROM"): + srv.mu.Lock() + srv.mailFrom = strings.TrimSpace(strings.TrimPrefix(line, "MAIL FROM:")) + srv.mu.Unlock() + write("250 OK\r\n") + case strings.HasPrefix(cmd, "RCPT TO"): + srv.mu.Lock() + srv.rcptTo = strings.TrimSpace(strings.TrimPrefix(line, "RCPT TO:")) + srv.mu.Unlock() + write("250 OK\r\n") + case cmd == "DATA": + write("354 OK\r\n") + // read until "\r\n.\r\n" + var data strings.Builder + for { + chunk := readLine() + if chunk == "" { + break + } + data.WriteString(chunk) + if strings.Contains(data.String(), "\r\n.\r\n") { + break + } + } + srv.mu.Lock() + srv.data = data.String() + srv.mu.Unlock() + write("250 OK\r\n") + case cmd == "QUIT": + write("221 bye\r\n") + return + default: + write("500 unknown\r\n") + } + } + }() + return ln.Addr().String(), srv +} + +func TestSMTPSendsExpectedHeaders(t *testing.T) { + t.Parallel() + addr, srv := startFakeSMTP(t) + host, port := splitHostPort(addr) + + ch := NewSMTPChannel(SMTPConfig{ + Host: host, Port: port, Encryption: "none", + Username: "u", Password: "p", + From: "Restic-Manager ", + To: "ops@example.com", + }, "rm.example") + + _, _, err := ch.Send(context.Background(), Payload{ + Event: EventRaised, AlertID: "01ABC", + Severity: "warning", Kind: "backup_failed", + HostName: "alfa-01", Message: "Backup failed: 401", + RaisedAt: time.Date(2026, 5, 4, 15, 42, 1, 0, time.UTC), + Link: "https://rm.example/alerts/01ABC", + }) + if err != nil { + t.Fatalf("send: %v", err) + } + + srv.mu.Lock() + defer srv.mu.Unlock() + if !srv.authed { + t.Errorf("AUTH never sent") + } + if !strings.Contains(srv.mailFrom, "alerts@example.com") { + t.Errorf("MAIL FROM: %q", srv.mailFrom) + } + if !strings.Contains(srv.rcptTo, "ops@example.com") { + t.Errorf("RCPT TO: %q", srv.rcptTo) + } + if !strings.Contains(srv.data, "Subject: [restic-manager] [warning] alfa-01: backup_failed") { + t.Errorf("subject missing or wrong: %q", srv.data) + } + if !strings.Contains(srv.data, "Message-ID: <01ABC@rm.example>") { + t.Errorf("Message-ID wrong: %q", srv.data) + } + if !strings.Contains(srv.data, "Backup failed: 401") { + t.Errorf("body missing: %q", srv.data) + } +} + +func splitHostPort(addr string) (string, int) { + host, portStr, _ := net.SplitHostPort(addr) + var port int + for _, r := range portStr { + port = port*10 + int(r-'0') + } + return host, port +} diff --git a/internal/notification/webhook.go b/internal/notification/webhook.go new file mode 100644 index 0000000..23f0212 --- /dev/null +++ b/internal/notification/webhook.go @@ -0,0 +1,98 @@ +package notification + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +// WebhookConfig is the per-channel JSON shape stored AEAD-encrypted +// in notification_channels.config. +type WebhookConfig struct { + URL string `json:"url"` + BearerToken string `json:"bearer_token,omitempty"` + HeaderName string `json:"header_name,omitempty"` + HeaderValue string `json:"header_value,omitempty"` +} + +// WebhookChannel is the HTTP-POST channel. One per configured channel +// row. Reused across sends — the http.Client is goroutine-safe. +type WebhookChannel struct { + cfg WebhookConfig + client *http.Client +} + +// NewWebhookChannel builds a webhook with a 5s overall timeout enforced +// by the http.Client; ctx in Send is layered on top for caller-driven +// cancel. +func NewWebhookChannel(cfg WebhookConfig) *WebhookChannel { + return &WebhookChannel{ + cfg: cfg, + client: &http.Client{Timeout: 5 * time.Second}, + } +} + +// Kind returns "webhook" for log enrichment and dispatcher routing. +func (c *WebhookChannel) Kind() string { return "webhook" } + +// webhookBody is the wire-stable envelope. Documented in the spec; do +// not reorder fields freely — operators write switch statements on +// "event" and "severity". +type webhookBody struct { + Event string `json:"event"` + AlertID string `json:"alert_id"` + Severity string `json:"severity"` + Kind string `json:"kind"` + HostID string `json:"host_id"` + HostName string `json:"host_name"` + Message string `json:"message"` + RaisedAt string `json:"raised_at"` + Link string `json:"link"` +} + +// Send delivers the payload as a JSON POST. Returns (statusCode, latency, err). +// 4xx/5xx responses are returned as errors with the status code set. +func (c *WebhookChannel) Send(ctx context.Context, p Payload) (int, time.Duration, error) { + body := webhookBody{ + Event: string(p.Event), AlertID: p.AlertID, + Severity: p.Severity, Kind: p.Kind, + HostID: p.HostID, HostName: p.HostName, + Message: p.Message, + RaisedAt: p.RaisedAt.UTC().Format(time.RFC3339Nano), + Link: p.Link, + } + buf, err := json.Marshal(body) + if err != nil { + return 0, 0, fmt.Errorf("webhook: marshal body: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.cfg.URL, bytes.NewReader(buf)) + if err != nil { + return 0, 0, fmt.Errorf("webhook: build request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + if c.cfg.BearerToken != "" { + req.Header.Set("Authorization", "Bearer "+c.cfg.BearerToken) + } + if c.cfg.HeaderName != "" { + req.Header.Set(c.cfg.HeaderName, c.cfg.HeaderValue) + } + + t0 := time.Now() + res, err := c.client.Do(req) + latency := time.Since(t0) + if err != nil { + return 0, latency, fmt.Errorf("webhook: do: %w", err) + } + defer func() { _ = res.Body.Close() }() + // Drain body — keep the connection reusable. + _, _ = io.Copy(io.Discard, res.Body) + if res.StatusCode >= 400 { + return res.StatusCode, latency, fmt.Errorf("webhook: http %d", res.StatusCode) + } + return res.StatusCode, latency, nil +} diff --git a/internal/notification/webhook_test.go b/internal/notification/webhook_test.go new file mode 100644 index 0000000..6dc094e --- /dev/null +++ b/internal/notification/webhook_test.go @@ -0,0 +1,83 @@ +package notification + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestWebhookSendsCorrectPayloadAndHeaders(t *testing.T) { + t.Parallel() + var got webhookBody + var auth, custom string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + auth = r.Header.Get("Authorization") + custom = r.Header.Get("X-Test") + _ = json.NewDecoder(r.Body).Decode(&got) + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + ch := NewWebhookChannel(WebhookConfig{ + URL: srv.URL, BearerToken: "tok-123", + HeaderName: "X-Test", HeaderValue: "yes", + }) + code, _, err := ch.Send(context.Background(), Payload{ + Event: EventRaised, AlertID: "01K", + Severity: "warning", Kind: "backup_failed", + HostID: "h1", HostName: "alfa-01", + Message: "Backup failed", + RaisedAt: time.Date(2026, 5, 4, 15, 42, 1, 0, time.UTC), + Link: "https://rm.example/alerts/01K", + }) + if err != nil { + t.Fatalf("send: %v", err) + } + if code != 200 { + t.Errorf("status: %d", code) + } + if got.Event != "alert.raised" || got.Kind != "backup_failed" || got.Message != "Backup failed" { + t.Errorf("body: %+v", got) + } + if auth != "Bearer tok-123" { + t.Errorf("auth: %q", auth) + } + if custom != "yes" { + t.Errorf("custom header: %q", custom) + } +} + +func TestWebhookReturnsErrorOn4xx(t *testing.T) { + t.Parallel() + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + })) + defer srv.Close() + ch := NewWebhookChannel(WebhookConfig{URL: srv.URL}) + code, _, err := ch.Send(context.Background(), Payload{Event: EventRaised}) + if err == nil { + t.Fatal("expected error for 401") + } + if code != 401 { + t.Errorf("code: %d", code) + } +} + +func TestWebhookRespectsCtxTimeout(t *testing.T) { + t.Parallel() + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(200) + })) + defer srv.Close() + ch := NewWebhookChannel(WebhookConfig{URL: srv.URL}) + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + _, _, err := ch.Send(ctx, Payload{Event: EventRaised}) + if err == nil { + t.Fatal("expected timeout error") + } +} diff --git a/internal/server/http/server.go b/internal/server/http/server.go index 3a20733..79868fd 100644 --- a/internal/server/http/server.go +++ b/internal/server/http/server.go @@ -13,7 +13,9 @@ import ( "github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5/middleware" + "gitea.dcglab.co.uk/steve/restic-manager/internal/alert" "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" "gitea.dcglab.co.uk/steve/restic-manager/internal/server/config" "gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui" "gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws" @@ -29,6 +31,13 @@ type Deps struct { Hub *ws.Hub JobHub *ws.JobHub UI *ui.Renderer + // AlertEngine (optional, wired in G1) receives job-finished and + // host-online events from the WS handler. Nil until G1 constructs + // the engine at boot. + AlertEngine *alert.Engine + // NotificationHub (optional, wired in G1) is used by the test-fire + // endpoint to dispatch a single synthetic payload through a channel. + NotificationHub *notification.Hub // Version is the binary's build version, surfaced in the chrome. // Empty falls back to "dev". Version string @@ -194,6 +203,13 @@ func (s *Server) routes(r chi.Router) { // Snapshot diff (P3-09). Dispatches a JobDiff against two // snapshots; output streams to the standard live job page. r.Post("/hosts/{id}/snapshots/diff", s.handleSnapshotDiff) + + // Alert list (JSON variant). Same filter shape as the UI page. + r.Get("/alerts", s.handleAPIAlerts) + + // Notification channel test-fire. Dispatches a synthetic payload + // through a single named channel; returns JSON result. + r.Post("/notifications/{id}/test", s.handleAPINotificationTest) }) // HTMX form variant of diff (mounted outside /api so HTMX forms @@ -225,6 +241,7 @@ func (s *Server) routes(r chi.Router) { Hub: s.deps.Hub, Store: s.deps.Store, JobHub: s.deps.JobHub, + AlertEngine: s.deps.AlertEngine, OnHello: s.onAgentHello, OnScheduleAck: s.applyScheduleAck, OnScheduleFire: s.dispatchScheduledJob, @@ -296,6 +313,19 @@ func (s *Server) routes(r chi.Router) { r.Get("/hosts/{id}/snapshots/{sid}/restore", s.handleUIRestoreGet) r.Post("/hosts/{id}/restore", s.handleUIRestorePost) r.Get("/hosts/{id}/restore/tree", s.handleUIRestoreTree) + // Alerts list + operator actions. + r.Get("/alerts", s.handleUIAlerts) + r.Post("/alerts/{id}/acknowledge", s.handleUIAlertAcknowledge) + r.Post("/alerts/{id}/resolve", s.handleUIAlertResolve) + // Settings shell + Notifications sub-tab CRUD. + r.Get("/settings", s.handleUISettings) + r.Get("/settings/notifications", s.handleUINotificationsList) + r.Get("/settings/notifications/new", s.handleUINotificationNewGet) + r.Post("/settings/notifications/new", s.handleUINotificationNewPost) + r.Get("/settings/notifications/{id}/edit", s.handleUINotificationEditGet) + r.Post("/settings/notifications/{id}/edit", s.handleUINotificationEditPost) + r.Post("/settings/notifications/{id}/delete", s.handleUINotificationDelete) + r.Post("/settings/notifications/{id}/toggle", s.handleUINotificationToggle) } // Browser job-log stream (separate from /ws/agent so the auth diff --git a/internal/server/http/ui_alerts.go b/internal/server/http/ui_alerts.go new file mode 100644 index 0000000..06c82fb --- /dev/null +++ b/internal/server/http/ui_alerts.go @@ -0,0 +1,177 @@ +package http + +import ( + "encoding/json" + "log/slog" + stdhttp "net/http" + "strings" + "time" + + "github.com/go-chi/chi/v5" + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +type alertsPage struct { + Filter store.AlertFilter + Alerts []store.Alert + Counts alertCounts + HostNames map[string]string // host_id → name for table rendering +} + +type alertCounts struct { + Open int + Acknowledged int + Resolved24h int +} + +// handleUIAlerts renders the alerts page with the chosen filters. +func (s *Server) handleUIAlerts(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + q := r.URL.Query() + f := store.AlertFilter{ + Status: q.Get("status"), + Severity: q.Get("severity"), + HostID: q.Get("host_id"), + Search: strings.TrimSpace(q.Get("q")), + Limit: 200, + } + if f.Status == "" { + f.Status = "open" + } + + alerts, err := s.deps.Store.ListAlerts(r.Context(), f) + if err != nil { + slog.Error("ui alerts: list", "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + + page := alertsPage{Filter: f, Alerts: alerts, HostNames: map[string]string{}} + if hosts, err := s.deps.Store.ListHosts(r.Context()); err == nil { + for _, h := range hosts { + page.HostNames[h.ID] = h.Name + } + } + page.Counts = computeAlertCounts(s, r) + + view := s.baseView(r, u) + view.Title = "Alerts · restic-manager" + view.Active = "alerts" + view.Page = page + if err := s.deps.UI.Render(w, "alerts", view); err != nil { + slog.Error("ui alerts: render", "err", err) + } +} + +func computeAlertCounts(s *Server, r *stdhttp.Request) alertCounts { + open, _ := s.deps.Store.ListAlerts(r.Context(), + store.AlertFilter{Status: "open"}) + acked, _ := s.deps.Store.ListAlerts(r.Context(), + store.AlertFilter{Status: "acknowledged"}) + cutoff := time.Now().UTC().Add(-24 * time.Hour) + all, _ := s.deps.Store.ListAlerts(r.Context(), + store.AlertFilter{Status: "resolved"}) + res := 0 + for _, a := range all { + if a.ResolvedAt != nil && a.ResolvedAt.After(cutoff) { + res++ + } + } + return alertCounts{Open: len(open), Acknowledged: len(acked), Resolved24h: res} +} + +// handleAPIAlerts is the JSON list — same filter shape. +func (s *Server) handleAPIAlerts(w stdhttp.ResponseWriter, r *stdhttp.Request) { + if _, ok := s.requireUser(r); !ok { + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") + return + } + q := r.URL.Query() + f := store.AlertFilter{ + Status: q.Get("status"), + Severity: q.Get("severity"), + HostID: q.Get("host_id"), + Search: strings.TrimSpace(q.Get("q")), + Limit: 200, + } + alerts, err := s.deps.Store.ListAlerts(r.Context(), f) + if err != nil { + writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "") + return + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(alerts) +} + +// handleUIAlertAcknowledge is POST /alerts/{id}/acknowledge. +func (s *Server) handleUIAlertAcknowledge(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + id := chi.URLParam(r, "id") + if id == "" { + stdhttp.Error(w, "missing id", stdhttp.StatusBadRequest) + return + } + var err error + if s.deps.AlertEngine != nil { + err = s.deps.AlertEngine.Acknowledge(r.Context(), id, u.ID, time.Now().UTC()) + } else { + err = s.deps.Store.Acknowledge(r.Context(), id, u.ID, time.Now().UTC()) + } + if err != nil { + slog.Warn("ui alerts: ack", "err", err) + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), UserID: &u.ID, Actor: "user", + Action: "alert.acknowledge", + TargetKind: ptr("alert"), TargetID: &id, + TS: time.Now().UTC(), + }) + if r.Header.Get("HX-Request") == "true" { + w.Header().Set("HX-Redirect", "/alerts?"+r.URL.RawQuery) + w.WriteHeader(stdhttp.StatusNoContent) + return + } + stdhttp.Redirect(w, r, "/alerts", stdhttp.StatusSeeOther) +} + +// handleUIAlertResolve is POST /alerts/{id}/resolve. +func (s *Server) handleUIAlertResolve(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + id := chi.URLParam(r, "id") + if id == "" { + stdhttp.Error(w, "missing id", stdhttp.StatusBadRequest) + return + } + var err error + if s.deps.AlertEngine != nil { + err = s.deps.AlertEngine.Resolve(r.Context(), id, time.Now().UTC()) + } else { + err = s.deps.Store.Resolve(r.Context(), id, time.Now().UTC()) + } + if err != nil { + slog.Warn("ui alerts: resolve", "err", err) + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), UserID: &u.ID, Actor: "user", + Action: "alert.resolve", + TargetKind: ptr("alert"), TargetID: &id, + TS: time.Now().UTC(), + }) + if r.Header.Get("HX-Request") == "true" { + w.Header().Set("HX-Redirect", "/alerts?"+r.URL.RawQuery) + w.WriteHeader(stdhttp.StatusNoContent) + return + } + stdhttp.Redirect(w, r, "/alerts", stdhttp.StatusSeeOther) +} diff --git a/internal/server/http/ui_alerts_test.go b/internal/server/http/ui_alerts_test.go new file mode 100644 index 0000000..633773f --- /dev/null +++ b/internal/server/http/ui_alerts_test.go @@ -0,0 +1,41 @@ +package http + +import ( + "context" + "encoding/json" + stdhttp "net/http" + "testing" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +func TestAPIAlertsListsOpen(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServer(t) + hostID, _ := enrolHostForWS(t, srv, st, "host-alerts") + _, _, _ = st.RaiseOrTouch(context.Background(), hostID, + "backup_failed", "warning", "x", time.Now().UTC()) + cookie := loginAsAdmin(t, st) + + req, _ := stdhttp.NewRequest("GET", ts.URL+"/api/alerts?status=open", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != 200 { + t.Fatalf("status: %d", res.StatusCode) + } + var got []store.Alert + if err := json.NewDecoder(res.Body).Decode(&got); err != nil { + t.Fatalf("decode: %v", err) + } + if len(got) != 1 || got[0].Kind != "backup_failed" { + t.Fatalf("got %+v", got) + } + _ = ulid.Make() // import keep +} diff --git a/internal/server/http/ui_handlers.go b/internal/server/http/ui_handlers.go index bd76a2a..8bf9f8c 100644 --- a/internal/server/http/ui_handlers.go +++ b/internal/server/http/ui_handlers.go @@ -89,12 +89,24 @@ func (s *Server) requireUIUser(w stdhttp.ResponseWriter, r *stdhttp.Request) *ui // authenticated page. Every UI page sits under the dashboard primary // nav today; if a future page lives under a different primary nav // tab (e.g. Settings, Audit), accept an Active arg again. -func (s *Server) baseView(u *ui.User) ui.ViewData { - return ui.ViewData{ +// +// OpenAlerts is populated via a quick store count so the nav badge +// stays current on every page load without requiring a page-specific +// store call. +func (s *Server) baseView(r *stdhttp.Request, u *ui.User) ui.ViewData { + view := ui.ViewData{ User: u, Active: "dashboard", Version: s.version(), } + + // Populate OpenAlerts from the store so the nav badge shows the + // current count on every page. + if open, err := s.deps.Store.ListAlerts(r.Context(), store.AlertFilter{Status: "open"}); err == nil { + view.OpenAlerts = len(open) + } + + return view } // version returns the binary's build version — passed in via Deps so @@ -110,10 +122,11 @@ func (s *Server) version() string { // dashboardPage is the data the dashboard template renders against. type dashboardPage struct { - Hosts []dashboardHostRow - HostCount int - Summary store.FleetSummary - PendingHosts []store.PendingHost // announce-and-approve queue (P2-18d) + Hosts []dashboardHostRow + HostCount int + Summary store.FleetSummary + PendingHosts []store.PendingHost // announce-and-approve queue (P2-18d) + CritOpenCount int } // dashboardHostRow carries a host plus the per-row Run-now decision @@ -227,13 +240,18 @@ func (s *Server) handleUIDashboard(w stdhttp.ResponseWriter, r *stdhttp.Request) slog.Warn("ui dashboard: list pending hosts", "err", perr) } - view := s.baseView(u) - view.OpenAlerts = summary.OpenAlerts + critOpenCount := 0 + if crit, err := s.deps.Store.ListAlerts(r.Context(), store.AlertFilter{Status: "open", Severity: "critical"}); err == nil { + critOpenCount = len(crit) + } + + view := s.baseView(r, u) view.Page = dashboardPage{ - Hosts: rows, - HostCount: len(hosts), - Summary: summary, - PendingHosts: pending, + Hosts: rows, + HostCount: len(hosts), + Summary: summary, + PendingHosts: pending, + CritOpenCount: critOpenCount, } if err := s.deps.UI.Render(w, "dashboard", view); err != nil { slog.Error("ui: render dashboard", "err", err) @@ -295,7 +313,7 @@ func (s *Server) handleUIAddHostGet(w stdhttp.ResponseWriter, r *stdhttp.Request if u == nil { return } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "Add host · restic-manager" view.Page = addHostPage{ServerURL: s.publicURL(r)} if err := s.deps.UI.Render(w, "add_host", view); err != nil { @@ -367,7 +385,7 @@ func (s *Server) handleUIAddHostPost(w stdhttp.ResponseWriter, r *stdhttp.Reques } } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "Add host · restic-manager" view.Page = page w.WriteHeader(stdhttp.StatusUnprocessableEntity) @@ -434,7 +452,7 @@ func (s *Server) handleUIPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Reques } } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "Pending host · restic-manager" view.Page = page if err := s.deps.UI.Render(w, "pending_host", view); err != nil { @@ -612,7 +630,7 @@ func (s *Server) handleUIHostDetail(w stdhttp.ResponseWriter, r *stdhttp.Request shown = shown[:cap] } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = host.Name + " · restic-manager" view.Page = hostDetailPage{ hostChromeData: s.loadHostChrome(r, *host, "snapshots", "snapshots"), @@ -712,7 +730,7 @@ func (s *Server) handleUIJobDetail(w stdhttp.ResponseWriter, r *stdhttp.Request) nextSeq = logs[n-1].Seq } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = job.Kind + " · " + host.Name + " · restic-manager" view.Page = jobDetailPage{ Job: *job, diff --git a/internal/server/http/ui_notifications.go b/internal/server/http/ui_notifications.go new file mode 100644 index 0000000..5ba0bca --- /dev/null +++ b/internal/server/http/ui_notifications.go @@ -0,0 +1,793 @@ +// ui_notifications.go — HTML form-driven handlers for the notification +// channel CRUD at /settings/notifications and the test-fire endpoint at +// POST /api/notifications/{id}/test. +// +// The settings shell currently has a single sub-tab (Notifications); +// the structure is designed to be extended with Users/Auth tabs later. +// +// Routes (wired in server.go): +// +// GET /settings → handleUISettings +// GET /settings/notifications → handleUINotificationsList +// GET /settings/notifications/new → handleUINotificationNewGet +// POST /settings/notifications/new → handleUINotificationNewPost +// GET /settings/notifications/{id}/edit → handleUINotificationEditGet +// POST /settings/notifications/{id}/edit → handleUINotificationEditPost +// POST /settings/notifications/{id}/delete → handleUINotificationDelete +// POST /api/notifications/{id}/test → handleAPINotificationTest +package http + +import ( + "encoding/json" + "errors" + "fmt" + "log/slog" + stdhttp "net/http" + "net/mail" + "net/url" + "strconv" + "strings" + "time" + + "github.com/go-chi/chi/v5" + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" + "gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// ── page models ────────────────────────────────────────────────────────────── + +// settingsPage is the data fed to the settings shell template. The +// sub-tab body is embedded via the Channels slice so a single template +// layout works for both the list and the edit form. +type settingsPage struct { + // ActiveTab is the settings sub-tab currently visible. + ActiveTab string + // Channels is the full list (list sub-tab). + Channels []store.NotificationChannel + // Form is populated when the operator is creating or editing a channel. + Form *notificationForm + // FormError is an inline error message for the channel form. + FormError string + // DeleteError is an inline error shown on the confirm-delete form. + DeleteError string +} + +// notificationForm holds the round-trip values for the channel +// create/edit form. Separate per-kind sub-structs mirror the template +// field groups; all fields are strings so the template never has to +// handle nil. +type notificationForm struct { + // ID is the channel's ULID; empty for new. + ID string + Kind string // webhook | ntfy | smtp + Name string + // Enabled maps to the enabled checkbox. + Enabled bool + // DefaultPriority applies to ntfy channels. + DefaultPriority string + + // Webhook sub-fields. + WebhookURL string + WebhookBearerToken string + WebhookHeaderName string + WebhookHeaderValue string + + // Ntfy sub-fields. + NtfyServerURL string + NtfyTopic string + NtfyAccessToken string + NtfyUsername string + NtfyPassword string + + // SMTP sub-fields. + SMTPHost string + SMTPPort string // string for form round-trip; validated to int on save + SMTPEncryption string + SMTPUsername string + // SMTPPassword is a write-only field: shown as placeholder on edit; + // blank on submit means "keep the stored value". + SMTPPassword string + SMTPFrom string + SMTPTo string +} + +// ── internal helpers ────────────────────────────────────────────────────────── + +// loadSettingsPage fetches the channel list and returns the base page model. +func (s *Server) loadSettingsPage(r *stdhttp.Request) (*settingsPage, error) { + chans, err := s.deps.Store.ListNotificationChannels(r.Context()) + if err != nil { + return nil, fmt.Errorf("list channels: %w", err) + } + return &settingsPage{ + ActiveTab: "notifications", + Channels: chans, + }, nil +} + +// renderSettingsPage renders the settings shell, setting HTTP 422 on +// validation failure (pass status=0 for the normal 200). +func (s *Server) renderSettingsPage(w stdhttp.ResponseWriter, r *stdhttp.Request, u *ui.User, page *settingsPage, status int) { + view := s.baseView(r, u) + view.Title = "Settings · restic-manager" + view.Active = "settings" + view.Page = *page + if status != 0 { + w.WriteHeader(status) + } + if err := s.deps.UI.Render(w, "settings", view); err != nil { + slog.Error("ui: render settings", "err", err) + } +} + +// encryptChannelConfig JSON-encodes cfg and AEAD-seals it with the +// channel-specific additional-data binding. +func (s *Server) encryptChannelConfig(id string, cfg any) ([]byte, error) { + plain, err := json.Marshal(cfg) + if err != nil { + return nil, fmt.Errorf("marshal config: %w", err) + } + enc, err := s.deps.AEAD.Encrypt(plain, []byte("notification-channel:"+id)) + if err != nil { + return nil, fmt.Errorf("encrypt config: %w", err) + } + return []byte(enc), nil +} + +// decryptChannelConfig decrypts the AEAD blob and unmarshals it into dst. +func (s *Server) decryptChannelConfig(ch store.NotificationChannel, dst any) error { + plain, err := s.deps.AEAD.Decrypt(string(ch.Config), []byte("notification-channel:"+ch.ID)) + if err != nil { + return fmt.Errorf("decrypt: %w", err) + } + return json.Unmarshal(plain, dst) +} + +// firstNonEmpty returns the first non-empty (after TrimSpace) value in +// vals, or "". Used for fields like `name` that appear once per per-kind +// sub-form: only the visible kind's input is filled in, so PostForm.Get +// (which returns the first regardless of emptiness) would lose the +// actual value when the user edits the second or third kind. +func firstNonEmpty(vals []string) string { + for _, v := range vals { + if strings.TrimSpace(v) != "" { + return v + } + } + return "" +} + +// formHasValue reports whether vals contains want. Used for hidden+checkbox +// pairs (e.g. + ) +// where r.PostForm.Get returns the first ("0") even when the checkbox is +// ticked, so we have to scan the slice instead. +func formHasValue(vals []string, want string) bool { + for _, v := range vals { + if v == want { + return true + } + } + return false +} + +// formFromRequest parses the common + per-kind fields from a POST form. +// The caller must have already called r.ParseForm(). +func formFromRequest(r *stdhttp.Request) *notificationForm { + f := ¬ificationForm{ + Kind: strings.TrimSpace(r.PostForm.Get("kind")), + Name: strings.TrimSpace(firstNonEmpty(r.PostForm["name"])), + Enabled: formHasValue(r.PostForm["enabled"], "1"), + DefaultPriority: strings.TrimSpace(r.PostForm.Get("default_priority")), + + WebhookURL: strings.TrimSpace(r.PostForm.Get("webhook_url")), + WebhookBearerToken: r.PostForm.Get("webhook_bearer_token"), + WebhookHeaderName: strings.TrimSpace(r.PostForm.Get("webhook_header_name")), + WebhookHeaderValue: r.PostForm.Get("webhook_header_value"), + + NtfyServerURL: strings.TrimSpace(r.PostForm.Get("ntfy_server_url")), + NtfyTopic: strings.TrimSpace(r.PostForm.Get("ntfy_topic")), + NtfyAccessToken: r.PostForm.Get("ntfy_access_token"), + NtfyUsername: strings.TrimSpace(r.PostForm.Get("ntfy_username")), + NtfyPassword: r.PostForm.Get("ntfy_password"), + + SMTPHost: strings.TrimSpace(r.PostForm.Get("smtp_host")), + SMTPPort: strings.TrimSpace(r.PostForm.Get("smtp_port")), + SMTPEncryption: strings.TrimSpace(r.PostForm.Get("smtp_encryption")), + SMTPUsername: strings.TrimSpace(r.PostForm.Get("smtp_username")), + SMTPPassword: r.PostForm.Get("smtp_password"), + SMTPFrom: strings.TrimSpace(r.PostForm.Get("smtp_from")), + SMTPTo: strings.TrimSpace(r.PostForm.Get("smtp_to")), + } + if f.Kind == "" { + f.Kind = "webhook" + } + return f +} + +// validateForm validates the common + per-kind fields. Returns a +// non-empty string on the first validation error found. +func validateForm(f *notificationForm) string { + if f.Name == "" { + return "Name is required." + } + if len(f.Name) > 100 { + return "Name must be 100 characters or fewer." + } + switch f.Kind { + case "webhook": + if f.WebhookURL == "" { + return "Webhook URL is required." + } + u, err := url.Parse(f.WebhookURL) + if err != nil || (u.Scheme != "http" && u.Scheme != "https") { + return "Webhook URL must be a valid http(s) URL." + } + case "ntfy": + if f.NtfyServerURL != "" { + u, err := url.Parse(f.NtfyServerURL) + if err != nil || (u.Scheme != "http" && u.Scheme != "https") { + return "Ntfy server URL must be a valid http(s) URL." + } + } + if f.NtfyTopic == "" { + return "Ntfy topic is required." + } + case "smtp": + if f.SMTPHost == "" { + return "SMTP host is required." + } + port, err := strconv.Atoi(f.SMTPPort) + if err != nil || port < 1 || port > 65535 { + return "SMTP port must be a number between 1 and 65535." + } + switch f.SMTPEncryption { + case "starttls", "tls", "none": + default: + return "SMTP encryption must be starttls, tls, or none." + } + if f.SMTPFrom == "" { + return "SMTP From address is required." + } + if _, err := mail.ParseAddress(f.SMTPFrom); err != nil { + return "SMTP From is not a valid email address." + } + if f.SMTPTo == "" { + return "SMTP To address is required." + } + if _, err := mail.ParseAddress(f.SMTPTo); err != nil { + return "SMTP To is not a valid email address." + } + default: + return "Kind must be webhook, ntfy, or smtp." + } + return "" +} + +// buildConfig constructs the per-kind notification config struct from f. +// For edit (existing != nil), blank password fields fall back to the +// stored value so the operator can save other fields without re-typing +// the credential. +func buildConfig(f *notificationForm, existing any) (any, error) { + switch f.Kind { + case "webhook": + cfg := notification.WebhookConfig{ + URL: f.WebhookURL, + BearerToken: f.WebhookBearerToken, + HeaderName: f.WebhookHeaderName, + HeaderValue: f.WebhookHeaderValue, + } + if existing != nil { + ex, ok := existing.(*notification.WebhookConfig) + if ok && cfg.BearerToken == "" { + cfg.BearerToken = ex.BearerToken + } + } + return cfg, nil + + case "ntfy": + cfg := notification.NtfyConfig{ + ServerURL: f.NtfyServerURL, + Topic: f.NtfyTopic, + AccessToken: f.NtfyAccessToken, + Username: f.NtfyUsername, + Password: f.NtfyPassword, + } + if existing != nil { + if ex, ok := existing.(*notification.NtfyConfig); ok { + // Blank password on edit means "keep stored value" + // — same write-only treatment as smtp_password. + if cfg.AccessToken == "" { + cfg.AccessToken = ex.AccessToken + } + if cfg.Password == "" { + cfg.Password = ex.Password + } + } + } + return cfg, nil + + case "smtp": + port, _ := strconv.Atoi(f.SMTPPort) + cfg := notification.SMTPConfig{ + Host: f.SMTPHost, + Port: port, + Encryption: f.SMTPEncryption, + Username: f.SMTPUsername, + Password: f.SMTPPassword, + From: f.SMTPFrom, + To: f.SMTPTo, + } + if existing != nil { + ex, ok := existing.(*notification.SMTPConfig) + if ok && cfg.Password == "" { + cfg.Password = ex.Password + } + } + return cfg, nil + } + return nil, fmt.Errorf("unknown kind %q", f.Kind) +} + +// ── UI handlers ─────────────────────────────────────────────────────────────── + +// handleUISettings renders the settings shell (defaults to the +// Notifications sub-tab in v1). +func (s *Server) handleUISettings(w stdhttp.ResponseWriter, r *stdhttp.Request) { + s.handleUINotificationsList(w, r) +} + +// handleUINotificationsList renders the channel list under the +// Notifications sub-tab. +func (s *Server) handleUINotificationsList(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + page, err := s.loadSettingsPage(r) + if err != nil { + slog.Error("ui settings: load", "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + s.renderSettingsPage(w, r, u, page, 0) +} + +// handleUINotificationNewGet renders the kind picker + empty form. +// The ?kind= query param pre-selects the visible per-kind fields. +func (s *Server) handleUINotificationNewGet(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + page, err := s.loadSettingsPage(r) + if err != nil { + slog.Error("ui settings: load", "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + kind := r.URL.Query().Get("kind") + if kind == "" { + kind = "webhook" + } + page.Form = ¬ificationForm{Kind: kind} + s.renderSettingsPage(w, r, u, page, 0) +} + +// handleUINotificationNewPost validates and creates a new channel, then +// redirects to the list. Re-renders the form with an error banner on +// validation failure. +func (s *Server) handleUINotificationNewPost(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + if err := r.ParseForm(); err != nil { + stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest) + return + } + + f := formFromRequest(r) + if errMsg := validateForm(f); errMsg != "" { + page, _ := s.loadSettingsPage(r) + if page == nil { + page = &settingsPage{ActiveTab: "notifications"} + } + page.Form = f + page.FormError = errMsg + s.renderSettingsPage(w, r, u, page, stdhttp.StatusUnprocessableEntity) + return + } + + id := ulid.Make().String() + cfg, err := buildConfig(f, nil) + if err != nil { + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + enc, err := s.encryptChannelConfig(id, cfg) + if err != nil { + slog.Error("ui notifications: encrypt", "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + + now := time.Now().UTC() + var dp *string + if f.DefaultPriority != "" { + dp = &f.DefaultPriority + } + ch := store.NotificationChannel{ + ID: id, + Kind: f.Kind, + Name: f.Name, + Enabled: f.Enabled, + Config: enc, + DefaultPriority: dp, + CreatedAt: now, + UpdatedAt: now, + } + if err := s.deps.Store.CreateNotificationChannel(r.Context(), ch); err != nil { + slog.Error("ui notifications: create", "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), + UserID: &u.ID, + Actor: "user", + Action: "notification_channel.created", + TargetKind: ptr("notification_channel"), + TargetID: &id, + TS: now, + }) + stdhttp.Redirect(w, r, "/settings/notifications", stdhttp.StatusSeeOther) +} + +// handleUINotificationEditGet fetches a channel, decrypts its config, +// and renders the edit form with values pre-filled. +func (s *Server) handleUINotificationEditGet(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + channelID := chi.URLParam(r, "id") + ch, err := s.deps.Store.GetNotificationChannel(r.Context(), channelID) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + stdhttp.NotFound(w, r) + return + } + slog.Error("ui notifications: get", "id", channelID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + + f := ¬ificationForm{ + ID: ch.ID, + Kind: ch.Kind, + Name: ch.Name, + Enabled: ch.Enabled, + } + if ch.DefaultPriority != nil { + f.DefaultPriority = *ch.DefaultPriority + } + + switch ch.Kind { + case "webhook": + var cfg notification.WebhookConfig + if err := s.decryptChannelConfig(*ch, &cfg); err == nil { + f.WebhookURL = cfg.URL + // BearerToken and custom headers: don't echo plaintext — shown + // via placeholder text in the template. + f.WebhookHeaderName = cfg.HeaderName + // HeaderValue and BearerToken are write-only — left blank + // so the placeholder "stored, leave blank to keep" shows. + } + case "ntfy": + var cfg notification.NtfyConfig + if err := s.decryptChannelConfig(*ch, &cfg); err == nil { + f.NtfyServerURL = cfg.ServerURL + f.NtfyTopic = cfg.Topic + f.NtfyUsername = cfg.Username + // AccessToken and Password are write-only. + } + case "smtp": + var cfg notification.SMTPConfig + if err := s.decryptChannelConfig(*ch, &cfg); err == nil { + f.SMTPHost = cfg.Host + f.SMTPPort = strconv.Itoa(cfg.Port) + f.SMTPEncryption = cfg.Encryption + f.SMTPUsername = cfg.Username + // Password is write-only — left blank. + f.SMTPFrom = cfg.From + f.SMTPTo = cfg.To + } + } + + page, err := s.loadSettingsPage(r) + if err != nil { + slog.Error("ui settings: load", "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + page.Form = f + s.renderSettingsPage(w, r, u, page, 0) +} + +// handleUINotificationEditPost validates the edit form, merges new +// values onto the existing config (preserving blanked-out secrets), +// re-encrypts, and updates the channel row. +func (s *Server) handleUINotificationEditPost(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + channelID := chi.URLParam(r, "id") + ch, err := s.deps.Store.GetNotificationChannel(r.Context(), channelID) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + stdhttp.NotFound(w, r) + return + } + slog.Error("ui notifications: get for edit", "id", channelID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + + if err := r.ParseForm(); err != nil { + stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest) + return + } + f := formFromRequest(r) + f.ID = ch.ID + + if errMsg := validateForm(f); errMsg != "" { + page, _ := s.loadSettingsPage(r) + if page == nil { + page = &settingsPage{ActiveTab: "notifications"} + } + page.Form = f + page.FormError = errMsg + s.renderSettingsPage(w, r, u, page, stdhttp.StatusUnprocessableEntity) + return + } + + // Decrypt existing config so blank password fields can fall back + // to the stored values. + var existingCfg any + switch ch.Kind { + case "webhook": + var cfg notification.WebhookConfig + if derr := s.decryptChannelConfig(*ch, &cfg); derr == nil { + existingCfg = &cfg + } + case "ntfy": + var cfg notification.NtfyConfig + if derr := s.decryptChannelConfig(*ch, &cfg); derr == nil { + existingCfg = &cfg + } + case "smtp": + var cfg notification.SMTPConfig + if derr := s.decryptChannelConfig(*ch, &cfg); derr == nil { + existingCfg = &cfg + } + } + + newCfg, err := buildConfig(f, existingCfg) + if err != nil { + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + enc, err := s.encryptChannelConfig(ch.ID, newCfg) + if err != nil { + slog.Error("ui notifications: re-encrypt", "id", ch.ID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + + now := time.Now().UTC() + var dp *string + if f.DefaultPriority != "" { + dp = &f.DefaultPriority + } + updated := store.NotificationChannel{ + ID: ch.ID, + Kind: f.Kind, + Name: f.Name, + Enabled: f.Enabled, + Config: enc, + DefaultPriority: dp, + CreatedAt: ch.CreatedAt, + UpdatedAt: now, + } + if err := s.deps.Store.UpdateNotificationChannel(r.Context(), updated); err != nil { + slog.Error("ui notifications: update", "id", ch.ID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), + UserID: &u.ID, + Actor: "user", + Action: "notification_channel.updated", + TargetKind: ptr("notification_channel"), + TargetID: &ch.ID, + TS: now, + }) + stdhttp.Redirect(w, r, "/settings/notifications", stdhttp.StatusSeeOther) +} + +// handleUINotificationDelete implements the typed-confirm pattern: +// the operator must type the channel name to proceed. On match, +// DeleteNotificationChannel + audit row + redirect. On mismatch, +// re-render with an error. +func (s *Server) handleUINotificationDelete(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + channelID := chi.URLParam(r, "id") + ch, err := s.deps.Store.GetNotificationChannel(r.Context(), channelID) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + stdhttp.NotFound(w, r) + return + } + slog.Error("ui notifications: get for delete", "id", channelID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + + if err := r.ParseForm(); err != nil { + stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest) + return + } + confirm := strings.TrimSpace(r.PostForm.Get("confirm_name")) + if confirm != ch.Name { + page, _ := s.loadSettingsPage(r) + if page == nil { + page = &settingsPage{ActiveTab: "notifications"} + } + page.Form = ¬ificationForm{ID: ch.ID, Kind: ch.Kind, Name: ch.Name} + page.DeleteError = "Typed name did not match — deletion aborted." + s.renderSettingsPage(w, r, u, page, stdhttp.StatusUnprocessableEntity) + return + } + + if err := s.deps.Store.DeleteNotificationChannel(r.Context(), ch.ID); err != nil { + slog.Error("ui notifications: delete", "id", ch.ID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), + UserID: &u.ID, + Actor: "user", + Action: "notification_channel.deleted", + TargetKind: ptr("notification_channel"), + TargetID: &ch.ID, + TS: time.Now().UTC(), + }) + stdhttp.Redirect(w, r, "/settings/notifications", stdhttp.StatusSeeOther) +} + +// handleUINotificationToggle flips the enabled flag for one channel +// and re-renders the row. Wired to the inline toggle in the channel +// list so operators don't need to enter the edit form just to flip a +// channel on or off. HTMX-aware: returns just the toggle fragment when +// the request carries HX-Request, otherwise redirects back to the list. +func (s *Server) handleUINotificationToggle(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + channelID := chi.URLParam(r, "id") + ch, err := s.deps.Store.GetNotificationChannel(r.Context(), channelID) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + stdhttp.NotFound(w, r) + return + } + slog.Error("ui notifications: get for toggle", "id", channelID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + now := time.Now().UTC() + want := !ch.Enabled + if err := s.deps.Store.SetNotificationChannelEnabled(r.Context(), ch.ID, want, now); err != nil { + slog.Error("ui notifications: set enabled", "id", ch.ID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), + UserID: &u.ID, + Actor: "user", + Action: "notification_channel.toggled", + TargetKind: ptr("notification_channel"), + TargetID: &ch.ID, + TS: now, + }) + if r.Header.Get("HX-Request") == "true" { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + if want { + _, _ = w.Write([]byte(``)) + } else { + _, _ = w.Write([]byte(``)) + } + return + } + stdhttp.Redirect(w, r, "/settings/notifications", stdhttp.StatusSeeOther) +} + +// ── API handler ─────────────────────────────────────────────────────────────── + +// testResultFragment is the JSON body returned by handleAPINotificationTest. +type testResultFragment struct { + OK bool `json:"ok"` + LatencyMS int `json:"latency_ms"` + StatusCode *int `json:"status_code,omitempty"` + Error *string `json:"error,omitempty"` +} + +// handleAPINotificationTest fires a single synthetic test payload +// through the named channel via Hub.DispatchOne and returns a JSON +// result. The test button in the UI posts here and renders the +// green/red pill from the response. +func (s *Server) handleAPINotificationTest(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + if s.deps.NotificationHub == nil { + writeJSONError(w, stdhttp.StatusServiceUnavailable, "hub_not_ready", + "notification hub not initialised") + return + } + channelID := chi.URLParam(r, "id") + if _, err := s.deps.Store.GetNotificationChannel(r.Context(), channelID); err != nil { + if errors.Is(err, store.ErrNotFound) { + writeJSONError(w, stdhttp.StatusNotFound, "not_found", "channel not found") + return + } + slog.Error("api: notification test: get channel", "id", channelID, "err", err) + writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "") + return + } + + // AlertID is intentionally left empty for test notifications: the + // notification_log.alert_id column has a FK to alerts.id, and no + // real alert exists for a synthetic test fire. The hub leaves the + // column NULL when AlertID is empty. + payload := notification.Payload{ + Event: notification.EventTest, + Severity: "info", + Kind: "test_notification", + HostName: "(test)", + Message: "Test from restic-manager — channel is working.", + RaisedAt: time.Now().UTC(), + } + + entry, err := s.deps.NotificationHub.DispatchOne(r.Context(), channelID, payload) + if err != nil { + slog.Error("api: notification test: dispatch", "id", channelID, "err", err) + errStr := err.Error() + writeJSON(w, stdhttp.StatusOK, testResultFragment{ + OK: false, + Error: &errStr, + }) + return + } + + res := testResultFragment{OK: entry.OK, StatusCode: entry.StatusCode} + if entry.LatencyMS != nil { + res.LatencyMS = *entry.LatencyMS + } + if entry.Error != nil { + res.Error = entry.Error + } + writeJSON(w, stdhttp.StatusOK, res) +} diff --git a/internal/server/http/ui_notifications_test.go b/internal/server/http/ui_notifications_test.go new file mode 100644 index 0000000..85d84d7 --- /dev/null +++ b/internal/server/http/ui_notifications_test.go @@ -0,0 +1,289 @@ +package http + +import ( + "bytes" + "context" + "encoding/json" + "io" + stdhttp "net/http" + "net/http/httptest" + "net/url" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/auth" + "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" + "gitea.dcglab.co.uk/steve/restic-manager/internal/server/config" + "gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// newNotificationTestServer builds a test server wired with a real +// NotificationHub backed by a temporary store. It also inserts a session +// so HTTP calls are authenticated. +func newNotificationTestServer(t *testing.T) (*Server, string, *store.Store, string) { + t.Helper() + dir := t.TempDir() + st, err := store.Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("store: %v", err) + } + t.Cleanup(func() { _ = st.Close() }) + + keyPath := filepath.Join(dir, "secret.key") + _ = crypto.GenerateKeyFile(keyPath) + key, _ := crypto.LoadKeyFromFile(keyPath) + aead, _ := crypto.NewAEAD(key) + + hub := notification.NewHub(st, aead, "http://localhost") + + deps := Deps{ + Cfg: config.Config{Listen: ":0", DataDir: dir, SecretKeyFile: keyPath}, + Store: st, + AEAD: aead, + Hub: ws.NewHub(), + NotificationHub: hub, + BootstrapToken: "test-token", + } + s := New(deps) + ts := httptest.NewServer(s.srv.Handler) + t.Cleanup(ts.Close) + + // Mint a user + session so authenticated routes work. + rawToken, _ := auth.NewToken() + userID := ulid.Make().String() + hash, _ := auth.HashPassword("test-password-long") + _ = st.CreateUser(context.Background(), store.User{ + ID: userID, + Username: "testadmin", + PasswordHash: hash, + Role: store.RoleAdmin, + CreatedAt: time.Now().UTC(), + }) + _ = st.CreateSession(context.Background(), store.Session{ + UserID: userID, + CreatedAt: time.Now().UTC(), + ExpiresAt: time.Now().Add(time.Hour).UTC(), + }, auth.HashToken(rawToken)) + + return s, ts.URL, st, rawToken +} + +// authedClient returns a client + cookie jar that sends the test session cookie. +func authedClient(t *testing.T, rawToken string, baseURL string) *stdhttp.Client { + t.Helper() + jar := &simpleCookieJar{token: rawToken, baseURL: baseURL} + return &stdhttp.Client{Jar: jar} +} + +// simpleCookieJar injects the session cookie on every request to baseURL. +type simpleCookieJar struct { + token string + baseURL string +} + +func (j *simpleCookieJar) SetCookies(_ *url.URL, _ []*stdhttp.Cookie) {} + +func (j *simpleCookieJar) Cookies(u *url.URL) []*stdhttp.Cookie { + if !strings.HasPrefix(u.String(), j.baseURL) { + return nil + } + return []*stdhttp.Cookie{{Name: sessionCookieName, Value: j.token}} +} + +// createTestWebhookChannel inserts a webhook channel into the store +// for the given server's AEAD, targeting sink. +func createTestWebhookChannel(t *testing.T, s *Server, st *store.Store, sink string) string { + t.Helper() + id := "ch-test-" + strings.ReplaceAll(t.Name(), "/", "-") + cfg, _ := json.Marshal(notification.WebhookConfig{URL: sink}) + enc, err := s.deps.AEAD.Encrypt(cfg, []byte("notification-channel:"+id)) + if err != nil { + t.Fatalf("encrypt: %v", err) + } + err = st.CreateNotificationChannel(context.Background(), store.NotificationChannel{ + ID: id, + Kind: "webhook", + Name: "test-webhook", + Enabled: true, + Config: []byte(enc), + CreatedAt: time.Now().UTC(), + UpdatedAt: time.Now().UTC(), + }) + if err != nil { + t.Fatalf("create channel: %v", err) + } + return id +} + +// TestAPINotificationTestEndToEnd is the primary plan test: +// configure a webhook channel pointing at an httptest sink, POST the +// test endpoint, assert the synthetic event landed at the sink and a +// notification_log row with event="alert.test" ok=1 was persisted. +func TestAPINotificationTestEndToEnd(t *testing.T) { + t.Parallel() + + // Sink — records incoming request bodies. + var received [][]byte + sink := httptest.NewServer(stdhttp.HandlerFunc(func(w stdhttp.ResponseWriter, r *stdhttp.Request) { + body, _ := io.ReadAll(r.Body) + received = append(received, body) + w.WriteHeader(stdhttp.StatusOK) + })) + defer sink.Close() + + s, baseURL, st, rawToken := newNotificationTestServer(t) + channelID := createTestWebhookChannel(t, s, st, sink.URL) + client := authedClient(t, rawToken, baseURL) + + res, err := client.Post(baseURL+"/api/notifications/"+channelID+"/test", + "application/json", bytes.NewReader(nil)) + if err != nil { + t.Fatalf("post: %v", err) + } + defer func() { _ = res.Body.Close() }() + + if res.StatusCode != stdhttp.StatusOK { + body, _ := io.ReadAll(res.Body) + t.Fatalf("status %d: %s", res.StatusCode, body) + } + + var result testResultFragment + if err := json.NewDecoder(res.Body).Decode(&result); err != nil { + t.Fatalf("decode: %v", err) + } + if !result.OK { + errStr := "" + if result.Error != nil { + errStr = *result.Error + } + t.Fatalf("expected ok=true, got false; error=%s", errStr) + } + + // The sink should have received exactly one request. + if len(received) != 1 { + t.Fatalf("sink: expected 1 request, got %d", len(received)) + } + + // Decode the webhook body and check the event field. + var body map[string]any + if err := json.Unmarshal(received[0], &body); err != nil { + t.Fatalf("decode sink body: %v", err) + } + if body["event"] != string(notification.EventTest) { + t.Errorf("event: got %v, want %s", body["event"], notification.EventTest) + } + + // notification_log should have one row with event=alert.test and ok=1. + var n int + if err := st.DB().QueryRow( + `SELECT COUNT(*) FROM notification_log + WHERE channel_id = ? AND event = 'alert.test' AND ok = 1`, + channelID, + ).Scan(&n); err != nil { + t.Fatalf("query log: %v", err) + } + if n != 1 { + t.Fatalf("notification_log: expected 1 row, got %d", n) + } +} + +// TestAPINotificationTestNotFound confirms a 404 for an unknown channel. +func TestAPINotificationTestNotFound(t *testing.T) { + t.Parallel() + _, baseURL, _, rawToken := newNotificationTestServer(t) + client := authedClient(t, rawToken, baseURL) + + res, err := client.Post(baseURL+"/api/notifications/no-such-channel/test", + "application/json", bytes.NewReader(nil)) + if err != nil { + t.Fatalf("post: %v", err) + } + defer func() { _ = res.Body.Close() }() + + if res.StatusCode != stdhttp.StatusNotFound { + t.Errorf("expected 404, got %d", res.StatusCode) + } +} + +// TestAPINotificationTestUnauthed confirms a redirect (or 4xx) when +// there is no session cookie. +func TestAPINotificationTestUnauthed(t *testing.T) { + t.Parallel() + _, baseURL, _, _ := newNotificationTestServer(t) + + // Use a client that does NOT follow redirects and has no cookie. + client := &stdhttp.Client{ + CheckRedirect: func(_ *stdhttp.Request, _ []*stdhttp.Request) error { + return stdhttp.ErrUseLastResponse + }, + } + res, err := client.Post(baseURL+"/api/notifications/any-id/test", + "application/json", bytes.NewReader(nil)) + if err != nil { + t.Fatalf("post: %v", err) + } + defer func() { _ = res.Body.Close() }() + + // requireUIUser redirects to /login for unauthenticated requests. + if res.StatusCode != stdhttp.StatusSeeOther && res.StatusCode != stdhttp.StatusUnauthorized { + t.Errorf("expected 303 or 401, got %d", res.StatusCode) + } +} + +// TestNotificationCreateAndDelete is a CRUD round-trip exercising +// the store methods. The handler layer would return template errors +// (no templates in tests), so we exercise just the store-level API +// that the handlers call, confirming the plumbing compiles and works. +func TestNotificationCreateAndDelete(t *testing.T) { + t.Parallel() + s, _, st, _ := newNotificationTestServer(t) + + id := "ch-crud-test" + cfg, _ := json.Marshal(notification.WebhookConfig{URL: "https://example.com/hook"}) + enc, _ := s.deps.AEAD.Encrypt(cfg, []byte("notification-channel:"+id)) + + now := time.Now().UTC() + err := st.CreateNotificationChannel(context.Background(), store.NotificationChannel{ + ID: id, + Kind: "webhook", + Name: "crud-test", + Enabled: true, + Config: []byte(enc), + CreatedAt: now, + UpdatedAt: now, + }) + if err != nil { + t.Fatalf("create: %v", err) + } + + // Read it back and decrypt. + ch, err := st.GetNotificationChannel(context.Background(), id) + if err != nil { + t.Fatalf("get: %v", err) + } + var got notification.WebhookConfig + plain, err := s.deps.AEAD.Decrypt(string(ch.Config), []byte("notification-channel:"+id)) + if err != nil { + t.Fatalf("decrypt: %v", err) + } + if err := json.Unmarshal(plain, &got); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if got.URL != "https://example.com/hook" { + t.Errorf("URL: got %q, want %q", got.URL, "https://example.com/hook") + } + + // Delete. + if err := st.DeleteNotificationChannel(context.Background(), id); err != nil { + t.Fatalf("delete: %v", err) + } + if _, err := st.GetNotificationChannel(context.Background(), id); err == nil { + t.Error("expected ErrNotFound after delete") + } +} diff --git a/internal/server/http/ui_repo.go b/internal/server/http/ui_repo.go index ac42cc9..461081f 100644 --- a/internal/server/http/ui_repo.go +++ b/internal/server/http/ui_repo.go @@ -244,7 +244,7 @@ func (s *Server) handleUIHostRepo(w stdhttp.ResponseWriter, r *stdhttp.Request) return } page.SavedSection = r.URL.Query().Get("saved") - view := s.baseView(u) + view := s.baseView(r, u) view.Title = host.Name + " repo · restic-manager" view.Page = *page if err := s.deps.UI.Render(w, "host_repo", view); err != nil { @@ -268,7 +268,7 @@ func (s *Server) renderRepoPage(w stdhttp.ResponseWriter, r *stdhttp.Request, u page.AdminCredsError = adminErr page.BandwidthError = bwErr page.MaintenanceError = mntErr - view := s.baseView(u) + view := s.baseView(r, u) view.Title = host.Name + " repo · restic-manager" view.Page = *page w.WriteHeader(stdhttp.StatusUnprocessableEntity) diff --git a/internal/server/http/ui_restore.go b/internal/server/http/ui_restore.go index c43fa31..65acab8 100644 --- a/internal/server/http/ui_restore.go +++ b/internal/server/http/ui_restore.go @@ -105,7 +105,7 @@ func (s *Server) handleUIRestoreGet(w stdhttp.ResponseWriter, r *stdhttp.Request } } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "Restore · " + host.Name view.Page = page if err := s.deps.UI.Render(w, "host_restore", view); err != nil { @@ -161,7 +161,7 @@ func (s *Server) handleUIRestorePost(w stdhttp.ResponseWriter, r *stdhttp.Reques break } } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "Restore · " + host.Name view.Page = page w.WriteHeader(status) @@ -329,7 +329,7 @@ func (s *Server) handleUIRestoreTree(w stdhttp.ResponseWriter, r *stdhttp.Reques HostID: host.ID, SnapshotID: snapshotID, Path: pathArg, Error: "agent offline", } - view := s.baseView(u) + view := s.baseView(r, u) view.Page = page _ = s.deps.UI.RenderPartial(w, "tree_node", view) return @@ -345,7 +345,7 @@ func (s *Server) handleUIRestoreTree(w stdhttp.ResponseWriter, r *stdhttp.Reques HostID: host.ID, SnapshotID: snapshotID, Path: pathArg, Error: err.Error(), } - view := s.baseView(u) + view := s.baseView(r, u) view.Page = page _ = s.deps.UI.RenderPartial(w, "tree_node", view) return @@ -355,7 +355,7 @@ func (s *Server) handleUIRestoreTree(w stdhttp.ResponseWriter, r *stdhttp.Reques HostID: host.ID, SnapshotID: snapshotID, Path: pathArg, Error: result.Error, } - view := s.baseView(u) + view := s.baseView(r, u) view.Page = page _ = s.deps.UI.RenderPartial(w, "tree_node", view) return @@ -382,7 +382,7 @@ func (s *Server) handleUIRestoreTree(w stdhttp.ResponseWriter, r *stdhttp.Reques HostID: host.ID, SnapshotID: snapshotID, Path: pathArg, Children: children, } - view := s.baseView(u) + view := s.baseView(r, u) view.Page = page if err := s.deps.UI.RenderPartial(w, "tree_node", view); err != nil { slog.Warn("ui restore tree: render partial", "err", err) diff --git a/internal/server/http/ui_schedules.go b/internal/server/http/ui_schedules.go index a4daf4d..b436787 100644 --- a/internal/server/http/ui_schedules.go +++ b/internal/server/http/ui_schedules.go @@ -112,7 +112,7 @@ func (s *Server) handleUISchedulesList(w stdhttp.ResponseWriter, r *stdhttp.Requ chrome.ScheduleCount = len(scheds) chrome.SourceGroupCount = len(groups) - view := s.baseView(u) + view := s.baseView(r, u) view.Title = host.Name + " schedules · restic-manager" view.Page = hostSchedulesPage{ hostChromeData: chrome, @@ -140,7 +140,7 @@ func (s *Server) handleUIScheduleNewGet(w stdhttp.ResponseWriter, r *stdhttp.Req stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) return } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "New schedule · " + host.Name + " · restic-manager" view.Page = scheduleEditPage{ hostChromeData: s.loadHostChrome(r, *host, "schedules", "new schedule"), @@ -186,7 +186,7 @@ func (s *Server) handleUIScheduleEditGet(w stdhttp.ResponseWriter, r *stdhttp.Re for _, gid := range sc.SourceGroupIDs { selected[gid] = true } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "Edit schedule · " + host.Name + " · restic-manager" view.Page = scheduleEditPage{ hostChromeData: s.loadHostChrome(r, *host, "schedules", "edit schedule"), @@ -415,7 +415,7 @@ func (s *Server) renderScheduleFormError(w stdhttp.ResponseWriter, r *stdhttp.Re saveAction = "/hosts/" + host.ID + "/schedules/" + sid + "/edit" crumb = "edit schedule" } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "Schedule · " + host.Name + " · restic-manager" view.Page = scheduleEditPage{ hostChromeData: s.loadHostChrome(r, *host, "schedules", crumb), diff --git a/internal/server/http/ui_sources.go b/internal/server/http/ui_sources.go index c4581a5..617a79f 100644 --- a/internal/server/http/ui_sources.go +++ b/internal/server/http/ui_sources.go @@ -121,7 +121,7 @@ func (s *Server) handleUIHostSources(w stdhttp.ResponseWriter, r *stdhttp.Reques // loadHostChrome already counted groups; reuse count we just got. chrome.SourceGroupCount = len(groups) - view := s.baseView(u) + view := s.baseView(r, u) view.Title = host.Name + " sources · restic-manager" view.Page = hostSourcesPage{hostChromeData: chrome, Groups: rows} if err := s.deps.UI.Render(w, "host_sources", view); err != nil { @@ -139,7 +139,7 @@ func (s *Server) handleUISourceGroupNewGet(w stdhttp.ResponseWriter, r *stdhttp. if !ok { return } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "New source group · " + host.Name + " · restic-manager" view.Page = sourceGroupEditPage{ hostChromeData: s.loadHostChrome(r, *host, "sources", "new source group"), @@ -173,7 +173,7 @@ func (s *Server) handleUISourceGroupEditGet(w stdhttp.ResponseWriter, r *stdhttp stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) return } - view := s.baseView(u) + view := s.baseView(r, u) view.Title = g.Name + " · " + host.Name + " · restic-manager" form := formFromGroup(*g) form.PreHook = s.decryptHookOrFallback(g.PreHook, "", host.ID, "pre") @@ -362,7 +362,7 @@ func (s *Server) handleUISourceGroupDelete(w stdhttp.ResponseWriter, r *stdhttp. // typed input intact + an error banner. Returns 422 to signal "form // rejected" while still returning HTML (mirrors handleUIAddHostPost). func (s *Server) renderSourceFormError(w stdhttp.ResponseWriter, r *stdhttp.Request, u *ui.User, host *store.Host, gid string, isNew bool, form sourceFormData, msg string) { - view := s.baseView(u) + view := s.baseView(r, u) view.Title = "Source group · " + host.Name + " · restic-manager" saveAction := "/hosts/" + host.ID + "/sources/new" crumb := "new source group" diff --git a/internal/server/ui/funcs.go b/internal/server/ui/funcs.go index 71c350a..673437f 100644 --- a/internal/server/ui/funcs.go +++ b/internal/server/ui/funcs.go @@ -38,6 +38,68 @@ func funcMap() template.FuncMap { // list packs strings into a slice — handy for inline ranges // in templates (e.g. quick-pick cron presets). "list": func(items ...string) []string { return items }, + // dict builds a map[string]any from alternating key-value pairs. + // Useful for passing multiple named values to a sub-template: + // {{template "foo" (dict "A" $a "B" $b)}} + "dict": func(pairs ...any) map[string]any { + m := make(map[string]any, len(pairs)/2) + for i := 0; i+1 < len(pairs); i += 2 { + if k, ok := pairs[i].(string); ok { + m[k] = pairs[i+1] + } + } + return m + }, + // mapGet retrieves a string value from a map[string]string by key. + // Returns "" when the key is absent or the map is nil. Used by the + // alert_row partial to resolve host_id → host name. + "mapGet": func(m map[string]string, key *string) string { + if m == nil || key == nil { + return "" + } + return m[*key] + }, + // alertStatus derives the display status of an alert from its DB + // fields: "open", "acknowledged", or "resolved". + // Accepts any value — returns "" for unrecognised input so templates + // can still render safely. + "alertStatus": func(resolvedAt, acknowledgedAt any) string { + isSet := func(v any) bool { + if v == nil { + return false + } + switch t := v.(type) { + case *time.Time: + return t != nil + } + return false + } + if isSet(resolvedAt) { + return "resolved" + } + if isSet(acknowledgedAt) { + return "acknowledged" + } + return "open" + }, + // stillHappening returns true when last_seen_at is within the last + // 60 seconds — used to render the "still happening · Ns ago" pill + // on alert rows where the signal is still firing. + "stillHappening": func(v any) bool { + var t time.Time + switch x := v.(type) { + case time.Time: + t = x + case *time.Time: + if x == nil { + return false + } + t = *x + default: + return false + } + return time.Since(t) < 60*time.Second + }, } } diff --git a/internal/server/ui/ui.go b/internal/server/ui/ui.go index 8c5e52b..d970b74 100644 --- a/internal/server/ui/ui.go +++ b/internal/server/ui/ui.go @@ -93,6 +93,8 @@ func New() (*Renderer, error) { "templates/partials/awaiting_agent.html", "templates/partials/host_chrome.html", "templates/partials/tree_node.html", + "templates/partials/alert_row.html", + "templates/partials/crit_banner.html", } pageEntries, err := fs.Glob(web.FS, "templates/pages/*.html") diff --git a/internal/server/ui/ui_parse_test.go b/internal/server/ui/ui_parse_test.go new file mode 100644 index 0000000..e8f3919 --- /dev/null +++ b/internal/server/ui/ui_parse_test.go @@ -0,0 +1,12 @@ +package ui + +import "testing" + +// TestNewParsesAllTemplates ensures ui.New() can parse every template +// registered under templates/pages/ without error. Run this after +// adding or editing any template file. +func TestNewParsesAllTemplates(t *testing.T) { + if _, err := New(); err != nil { + t.Fatalf("ui.New() returned error: %v", err) + } +} diff --git a/internal/server/ws/handler.go b/internal/server/ws/handler.go index b488095..4ef61ee 100644 --- a/internal/server/ws/handler.go +++ b/internal/server/ws/handler.go @@ -12,6 +12,7 @@ import ( "github.com/coder/websocket" + "gitea.dcglab.co.uk/steve/restic-manager/internal/alert" "gitea.dcglab.co.uk/steve/restic-manager/internal/api" "gitea.dcglab.co.uk/steve/restic-manager/internal/auth" "gitea.dcglab.co.uk/steve/restic-manager/internal/store" @@ -22,6 +23,9 @@ type HandlerDeps struct { Hub *Hub Store *store.Store JobHub *JobHub + // AlertEngine receives job-finished and host-online events so the + // alert engine can evaluate its rules. Optional; nil = no-op. + AlertEngine *alert.Engine // OnHello is called once per successful hello, after the host row // has been touched and the conn registered. Used by the HTTP // layer to push host_credentials down as a config.update before @@ -140,6 +144,9 @@ func runAgentLoop(ctx context.Context, c *Conn, hostID string, deps HandlerDeps) helloPayload.ProtocolVersion, now); err != nil { slog.Error("ws mark host hello failed", "host_id", hostID, "err", err) } + if deps.AlertEngine != nil { + deps.AlertEngine.NotifyHostOnline(hostID) + } deps.Hub.Register(hostID, c) defer deps.Hub.Unregister(hostID, c) @@ -210,6 +217,17 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E if deps.JobHub != nil { deps.JobHub.Broadcast(p.JobID, env) } + if deps.AlertEngine != nil { + if job, err := deps.Store.GetJob(ctx, p.JobID); err == nil && job != nil { + deps.AlertEngine.NotifyJobFinished(alert.JobFinishedEvent{ + HostID: hostID, + JobID: p.JobID, + Kind: job.Kind, + Status: string(p.Status), + When: p.FinishedAt, + }) + } + } case api.MsgLogStream: var p api.LogStreamLine diff --git a/internal/store/alerts.go b/internal/store/alerts.go new file mode 100644 index 0000000..ef9036f --- /dev/null +++ b/internal/store/alerts.go @@ -0,0 +1,245 @@ +package store + +import ( + "context" + "database/sql" + "errors" + "fmt" + "strings" + "time" + + "github.com/oklog/ulid/v2" +) + +// AlertFilter narrows ListAlerts. +type AlertFilter struct { + Status string // "open" | "acknowledged" | "resolved" | "all" | "" + Severity string // "info" | "warning" | "critical" | "" + HostID string // empty = any host + Search string // substring match on message + Limit int // 0 = no limit +} + +// RaiseOrTouch implements the dedup + last_seen_at bump pattern. If +// an alert with (host_id, kind, resolved_at IS NULL) already exists, +// it touches last_seen_at + message and returns (id, false). Otherwise +// inserts a fresh row and returns (id, true). Caller fires a +// notification only when didRaise=true. +func (s *Store) RaiseOrTouch(ctx context.Context, hostID, kind, severity, message string, when time.Time) (id string, didRaise bool, err error) { + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return "", false, fmt.Errorf("store: begin: %w", err) + } + defer func() { _ = tx.Rollback() }() + + row := tx.QueryRowContext(ctx, + `SELECT id FROM alerts WHERE host_id = ? AND kind = ? AND resolved_at IS NULL LIMIT 1`, + hostID, kind) + var existing string + switch err := row.Scan(&existing); { + case err == nil: + _, uerr := tx.ExecContext(ctx, + `UPDATE alerts SET last_seen_at = ?, message = ? WHERE id = ?`, + when.UTC().Format(time.RFC3339Nano), message, existing) + if uerr != nil { + return "", false, fmt.Errorf("store: touch alert: %w", uerr) + } + if err := tx.Commit(); err != nil { + return "", false, err + } + return existing, false, nil + case errors.Is(err, sql.ErrNoRows): + // fall through to insert + default: + return "", false, fmt.Errorf("store: lookup alert: %w", err) + } + + id = ulid.Make().String() + whenStr := when.UTC().Format(time.RFC3339Nano) + _, err = tx.ExecContext(ctx, + `INSERT INTO alerts (id, host_id, kind, severity, message, created_at, last_seen_at) + VALUES (?, ?, ?, ?, ?, ?, ?)`, + id, hostID, kind, severity, message, whenStr, whenStr) + if err != nil { + return "", false, fmt.Errorf("store: insert alert: %w", err) + } + if err := tx.Commit(); err != nil { + return "", false, err + } + _ = s.refreshHostOpenAlertCount(ctx, s.db, hostID) + return id, true, nil +} + +// refreshHostOpenAlertCount recomputes hosts.open_alert_count from the +// alerts table for one host. Self-healing: idempotent and survives +// out-of-order edits. Best-effort — errors are returned but callers +// generally discard them since the projection is non-critical. +func (s *Store) refreshHostOpenAlertCount(ctx context.Context, exec interface { + ExecContext(context.Context, string, ...any) (sql.Result, error) +}, hostID string, +) error { + if hostID == "" { + return nil + } + _, err := exec.ExecContext(ctx, + `UPDATE hosts SET open_alert_count = ( + SELECT COUNT(*) FROM alerts + WHERE host_id = ? AND resolved_at IS NULL + ) WHERE id = ?`, hostID, hostID) + if err != nil { + return fmt.Errorf("store: refresh open_alert_count: %w", err) + } + return nil +} + +// Acknowledge sets acknowledged_at + acknowledged_by; does NOT set +// resolved_at. Idempotent — re-acknowledging just refreshes the timestamp. +func (s *Store) Acknowledge(ctx context.Context, id, userID string, when time.Time) error { + res, err := s.db.ExecContext(ctx, + `UPDATE alerts SET acknowledged_at = ?, acknowledged_by = ? + WHERE id = ? AND resolved_at IS NULL`, + when.UTC().Format(time.RFC3339Nano), userID, id) + if err != nil { + return fmt.Errorf("store: ack alert: %w", err) + } + n, _ := res.RowsAffected() + if n == 0 { + return ErrNotFound + } + return nil +} + +// Resolve marks the alert resolved. Idempotent on already-resolved rows +// (no-op). +func (s *Store) Resolve(ctx context.Context, id string, when time.Time) error { + var hostID sql.NullString + _ = s.db.QueryRowContext(ctx, `SELECT host_id FROM alerts WHERE id = ?`, id).Scan(&hostID) + _, err := s.db.ExecContext(ctx, + `UPDATE alerts SET resolved_at = ? + WHERE id = ? AND resolved_at IS NULL`, + when.UTC().Format(time.RFC3339Nano), id) + if err != nil { + return fmt.Errorf("store: resolve alert: %w", err) + } + if hostID.Valid { + _ = s.refreshHostOpenAlertCount(ctx, s.db, hostID.String) + } + return nil +} + +// AutoResolve closes every open alert for the (host_id, kind) pair. +// Used by the engine when a rule's underlying condition clears (e.g. +// next backup succeeded so backup_failed clears). +func (s *Store) AutoResolve(ctx context.Context, hostID, kind string, when time.Time) error { + _, err := s.db.ExecContext(ctx, + `UPDATE alerts SET resolved_at = ? + WHERE host_id = ? AND kind = ? AND resolved_at IS NULL`, + when.UTC().Format(time.RFC3339Nano), hostID, kind) + if err != nil { + return fmt.Errorf("store: auto-resolve: %w", err) + } + _ = s.refreshHostOpenAlertCount(ctx, s.db, hostID) + return nil +} + +// GetAlert reads one row. +func (s *Store) GetAlert(ctx context.Context, id string) (*Alert, error) { + row := s.db.QueryRowContext(ctx, + `SELECT id, host_id, kind, severity, message, created_at, last_seen_at, + acknowledged_at, acknowledged_by, resolved_at + FROM alerts WHERE id = ?`, id) + return scanAlert(row.Scan) +} + +// ListAlerts is the filtered list. Sort: open-first, then by created_at desc. +func (s *Store) ListAlerts(ctx context.Context, f AlertFilter) ([]Alert, error) { + q := `SELECT id, host_id, kind, severity, message, created_at, last_seen_at, + acknowledged_at, acknowledged_by, resolved_at FROM alerts` + conds := []string{} + args := []any{} + switch f.Status { + case "open": + conds = append(conds, "resolved_at IS NULL AND acknowledged_at IS NULL") + case "acknowledged": + conds = append(conds, "resolved_at IS NULL AND acknowledged_at IS NOT NULL") + case "resolved": + conds = append(conds, "resolved_at IS NOT NULL") + case "all", "": + // no-op + } + if f.Severity != "" { + conds = append(conds, "severity = ?") + args = append(args, f.Severity) + } + if f.HostID != "" { + conds = append(conds, "host_id = ?") + args = append(args, f.HostID) + } + if f.Search != "" { + conds = append(conds, "message LIKE ?") + args = append(args, "%"+f.Search+"%") + } + if len(conds) > 0 { + q += " WHERE " + strings.Join(conds, " AND ") + } + q += ` ORDER BY (resolved_at IS NULL) DESC, created_at DESC` + if f.Limit > 0 { + q += ` LIMIT ?` + args = append(args, f.Limit) + } + rows, err := s.db.QueryContext(ctx, q, args...) + if err != nil { + return nil, fmt.Errorf("store: list alerts: %w", err) + } + defer func() { _ = rows.Close() }() + var out []Alert + for rows.Next() { + a, err := scanAlert(rows.Scan) + if err != nil { + return nil, err + } + out = append(out, *a) + } + return out, rows.Err() +} + +// scanAlert centralises the column read so the GetAlert and +// ListAlerts paths agree on column order. Pass row.Scan or rows.Scan. +func scanAlert(scan func(...any) error) (*Alert, error) { + var a Alert + var hostID, lastSeen, ackedAt, ackedBy, resolvedAt sql.NullString + var createdAt string + if err := scan(&a.ID, &hostID, &a.Kind, &a.Severity, &a.Message, + &createdAt, &lastSeen, &ackedAt, &ackedBy, &resolvedAt); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("store: scan alert: %w", err) + } + if hostID.Valid { + v := hostID.String + a.HostID = &v + } + t, err := time.Parse(time.RFC3339Nano, createdAt) + if err != nil { + return nil, fmt.Errorf("store: parse created_at: %w", err) + } + a.CreatedAt = t + if lastSeen.Valid { + t, _ := time.Parse(time.RFC3339Nano, lastSeen.String) + a.LastSeenAt = &t + } + if ackedAt.Valid { + t, _ := time.Parse(time.RFC3339Nano, ackedAt.String) + a.AcknowledgedAt = &t + } + if ackedBy.Valid { + v := ackedBy.String + a.AcknowledgedBy = &v + } + if resolvedAt.Valid { + t, _ := time.Parse(time.RFC3339Nano, resolvedAt.String) + a.ResolvedAt = &t + } + return &a, nil +} diff --git a/internal/store/alerts_test.go b/internal/store/alerts_test.go new file mode 100644 index 0000000..8771cb7 --- /dev/null +++ b/internal/store/alerts_test.go @@ -0,0 +1,179 @@ +package store + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/oklog/ulid/v2" +) + +func newTestStoreWithHost(t *testing.T) (*Store, string) { + t.Helper() + dir := t.TempDir() + st, err := Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + t.Cleanup(func() { _ = st.Close() }) + hostID := ulid.Make().String() + if err := st.CreateHost(context.Background(), Host{ + ID: hostID, Name: "h", OS: "linux", Arch: "amd64", + EnrolledAt: time.Now().UTC(), + }, "deadbeef", ""); err != nil { + t.Fatalf("create host: %v", err) + } + return st, hostID +} + +func TestRaiseOrTouchInsertsThenTouches(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + + t0 := time.Now().UTC() + id1, didRaise, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", + "Backup failed: 401", t0) + if err != nil { + t.Fatalf("first raise: %v", err) + } + if !didRaise { + t.Fatalf("first call must didRaise=true") + } + if id1 == "" { + t.Fatalf("expected non-empty id") + } + + // Second call within the same open window should touch, not insert. + t1 := t0.Add(60 * time.Second) + id2, didRaise2, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", + "Backup failed: 401 (still)", t1) + if err != nil { + t.Fatalf("touch: %v", err) + } + if didRaise2 { + t.Fatalf("second call must didRaise=false") + } + if id2 != id1 { + t.Fatalf("touch returned a different id: got %q want %q", id2, id1) + } + + // last_seen_at and message must be updated. + got, err := st.GetAlert(ctx, id1) + if err != nil { + t.Fatalf("get: %v", err) + } + if got.LastSeenAt == nil || !got.LastSeenAt.Equal(t1) { + t.Errorf("last_seen_at: got %v want %v", got.LastSeenAt, t1) + } + if got.Message != "Backup failed: 401 (still)" { + t.Errorf("message not refreshed: %q", got.Message) + } +} + +func TestResolveAndReRaiseStartsFreshAlert(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + + t0 := time.Now().UTC() + id1, _, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "first", t0) + if err != nil { + t.Fatalf("raise: %v", err) + } + if err := st.Resolve(ctx, id1, t0.Add(time.Minute)); err != nil { + t.Fatalf("resolve: %v", err) + } + + id2, didRaise, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "second", t0.Add(2*time.Minute)) + if err != nil { + t.Fatalf("re-raise: %v", err) + } + if !didRaise { + t.Fatalf("post-resolve raise must didRaise=true") + } + if id2 == id1 { + t.Fatalf("re-raise reused the resolved id; want a fresh row") + } +} + +func TestAcknowledgeKeepsAlertOpen(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + + // Create a real user so the acknowledged_by FK is satisfied. + userID := ulid.Make().String() + if err := st.CreateUser(ctx, User{ + ID: userID, Username: "ackuser", PasswordHash: "x", + Role: RoleOperator, CreatedAt: time.Now().UTC(), + }); err != nil { + t.Fatalf("create user: %v", err) + } + + id, _, err := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "m", time.Now().UTC()) + if err != nil { + t.Fatalf("raise: %v", err) + } + if err := st.Acknowledge(ctx, id, userID, time.Now().UTC()); err != nil { + t.Fatalf("ack: %v", err) + } + got, err := st.GetAlert(ctx, id) + if err != nil { + t.Fatalf("get: %v", err) + } + if got.AcknowledgedAt == nil { + t.Errorf("acknowledged_at not set") + } + if got.AcknowledgedBy == nil || *got.AcknowledgedBy != userID { + t.Errorf("acknowledged_by: got %v want %q", got.AcknowledgedBy, userID) + } + if got.ResolvedAt != nil { + t.Errorf("ack must not set resolved_at; got %v", got.ResolvedAt) + } +} + +func TestAutoResolveClearsOpenAlerts(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + + t0 := time.Now().UTC() + id, _, _ := st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "m", t0) + if err := st.AutoResolve(ctx, hostID, "backup_failed", t0.Add(time.Minute)); err != nil { + t.Fatalf("auto-resolve: %v", err) + } + got, _ := st.GetAlert(ctx, id) + if got.ResolvedAt == nil { + t.Errorf("expected resolved_at set") + } +} + +func TestListAlertsFilters(t *testing.T) { + t.Parallel() + st, hostID := newTestStoreWithHost(t) + ctx := context.Background() + t0 := time.Now().UTC() + + // One open warning + one resolved info. + _, _, _ = st.RaiseOrTouch(ctx, hostID, "backup_failed", "warning", "open", t0) + id2, _, _ := st.RaiseOrTouch(ctx, hostID, "stale_schedule", "info", "done", t0) + _ = st.Resolve(ctx, id2, t0.Add(time.Minute)) + + open, err := st.ListAlerts(ctx, AlertFilter{Status: "open"}) + if err != nil { + t.Fatalf("list open: %v", err) + } + if len(open) != 1 || open[0].Severity != "warning" { + t.Errorf("open filter: got %+v", open) + } + + all, err := st.ListAlerts(ctx, AlertFilter{Status: "all"}) + if err != nil { + t.Fatalf("list all: %v", err) + } + if len(all) != 2 { + t.Errorf("all filter: got %d, want 2", len(all)) + } +} diff --git a/internal/store/hosts.go b/internal/store/hosts.go index 96f85aa..ffb1295 100644 --- a/internal/store/hosts.go +++ b/internal/store/hosts.go @@ -110,6 +110,55 @@ func (s *Store) MarkHostsOfflineStale(ctx context.Context, cutoff time.Time) (in return n, nil } +// MarkHostsOfflineStaleReturnIDs flips any host that hasn't been seen +// since before `cutoff` from 'online' to 'offline' and returns the IDs +// of every host that was flipped. Uses a single transaction. +func (s *Store) MarkHostsOfflineStaleReturnIDs(ctx context.Context, cutoff time.Time) ([]string, error) { + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return nil, fmt.Errorf("store: begin tx: %w", err) + } + defer func() { _ = tx.Rollback() }() + + cutoffStr := cutoff.UTC().Format(time.RFC3339Nano) + rows, err := tx.QueryContext(ctx, + `SELECT id FROM hosts + WHERE status = 'online' + AND (last_seen_at IS NULL OR last_seen_at < ?)`, + cutoffStr) + if err != nil { + return nil, fmt.Errorf("store: select stale hosts: %w", err) + } + var ids []string + for rows.Next() { + var id string + if err := rows.Scan(&id); err != nil { + _ = rows.Close() + return nil, fmt.Errorf("store: scan stale host id: %w", err) + } + ids = append(ids, id) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("store: iterate stale hosts: %w", err) + } + _ = rows.Close() + + if len(ids) > 0 { + if _, err := tx.ExecContext(ctx, + `UPDATE hosts SET status = 'offline' + WHERE status = 'online' + AND (last_seen_at IS NULL OR last_seen_at < ?)`, + cutoffStr); err != nil { + return nil, fmt.Errorf("store: mark offline: %w", err) + } + } + + if err := tx.Commit(); err != nil { + return nil, fmt.Errorf("store: commit: %w", err) + } + return ids, nil +} + // ListHosts returns every host. Phase 1 callers fit a small fleet in // memory; pagination lands when it matters. func (s *Store) ListHosts(ctx context.Context) ([]Host, error) { diff --git a/internal/store/migrate_test.go b/internal/store/migrate_test.go new file mode 100644 index 0000000..a02908e --- /dev/null +++ b/internal/store/migrate_test.go @@ -0,0 +1,71 @@ +package store + +import ( + "context" + "path/filepath" + "testing" +) + +func TestMigration0013AlertsLastSeen(t *testing.T) { + t.Parallel() + dir := t.TempDir() + st, err := Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + defer st.Close() + + // Column must exist after migration. Best signal: PRAGMA table_info. + rows, err := st.DB().Query(`SELECT name FROM pragma_table_info('alerts')`) + if err != nil { + t.Fatalf("pragma: %v", err) + } + defer rows.Close() + cols := map[string]bool{} + for rows.Next() { + var n string + if err := rows.Scan(&n); err != nil { + t.Fatalf("scan: %v", err) + } + cols[n] = true + } + if err := rows.Err(); err != nil { + t.Fatalf("rows iter: %v", err) + } + if !cols["last_seen_at"] { + t.Fatalf("alerts.last_seen_at not present after migration; cols=%v", cols) + } +} + +func TestMigration0014NotificationsTables(t *testing.T) { + t.Parallel() + dir := t.TempDir() + st, err := Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + defer st.Close() + + for _, want := range []string{"notification_channels", "notification_log"} { + var n int + if err := st.DB().QueryRow( + `SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?`, want, + ).Scan(&n); err != nil { + t.Fatalf("scan: %v", err) + } + if n != 1 { + t.Errorf("table %q missing after migration", want) + } + } + + // Sanity: kind CHECK accepts all three v1 kinds. + for _, k := range []string{"webhook", "ntfy", "smtp"} { + _, err := st.DB().Exec( + `INSERT INTO notification_channels (id, kind, name, config, created_at, updated_at) + VALUES (?, ?, ?, x'00', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')`, + "test-"+k, k, "test-"+k) + if err != nil { + t.Errorf("insert %q rejected by CHECK: %v", k, err) + } + } +} diff --git a/internal/store/migrations/0013_alerts_last_seen.sql b/internal/store/migrations/0013_alerts_last_seen.sql new file mode 100644 index 0000000..1acac1c --- /dev/null +++ b/internal/store/migrations/0013_alerts_last_seen.sql @@ -0,0 +1,16 @@ +-- 0013_alerts_last_seen.sql +-- +-- Add alerts.last_seen_at to support open-alert dedup with +-- recurrence-tracking. The engine bumps this column on every tick +-- where a rule still matches an existing open alert, so the UI can +-- render "still happening · Ns ago" without sending a fresh +-- notification. +-- +-- Column-level ALTER per CLAUDE.md (no rebuild — alerts has inbound +-- FK from acknowledged_by → users; rebuild would risk cascade). +-- Backfill last_seen_at = created_at for any pre-existing rows so +-- the column is non-null in practice (stays nullable in the schema +-- for forwards-compat with rows that haven't been touched yet). + +ALTER TABLE alerts ADD COLUMN last_seen_at TEXT; +UPDATE alerts SET last_seen_at = created_at WHERE last_seen_at IS NULL; diff --git a/internal/store/migrations/0014_notifications.sql b/internal/store/migrations/0014_notifications.sql new file mode 100644 index 0000000..130fb61 --- /dev/null +++ b/internal/store/migrations/0014_notifications.sql @@ -0,0 +1,42 @@ +-- 0014_notifications.sql +-- +-- Notification channels (operator-configured destinations: webhook, +-- ntfy, SMTP) and the dispatch log. Both are net-new — no rebuild +-- pattern needed. +-- +-- config is an AEAD-encrypted JSON blob. Per-kind shape lives in +-- internal/notification/{webhook,ntfy,smtp}.go. The CHECK keeps wire +-- consistency — adding a new kind requires a follow-up migration +-- (forces the implementer to think about it). + +CREATE TABLE notification_channels ( + id TEXT PRIMARY KEY, + kind TEXT NOT NULL CHECK (kind IN ('webhook', 'ntfy', 'smtp')), + name TEXT NOT NULL, + enabled INTEGER NOT NULL DEFAULT 1 CHECK (enabled IN (0, 1)), + config BLOB NOT NULL, -- AEAD-encrypted JSON; per-kind shape + default_priority TEXT, -- ntfy only; null for webhook + smtp + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + last_fired_at TEXT +); + +CREATE INDEX notification_channels_enabled + ON notification_channels(enabled) WHERE enabled = 1; + +CREATE TABLE notification_log ( + id TEXT PRIMARY KEY, + channel_id TEXT NOT NULL REFERENCES notification_channels(id) ON DELETE CASCADE, + alert_id TEXT REFERENCES alerts(id) ON DELETE SET NULL, + event TEXT NOT NULL, -- alert.raised | alert.acknowledged | alert.resolved | alert.test + ok INTEGER NOT NULL CHECK (ok IN (0, 1)), + status_code INTEGER, + latency_ms INTEGER, + error TEXT, + fired_at TEXT NOT NULL +); + +CREATE INDEX notification_log_channel + ON notification_log(channel_id, fired_at DESC); +CREATE INDEX notification_log_alert + ON notification_log(alert_id); diff --git a/internal/store/notification_channels.go b/internal/store/notification_channels.go new file mode 100644 index 0000000..4646e49 --- /dev/null +++ b/internal/store/notification_channels.go @@ -0,0 +1,224 @@ +package store + +import ( + "context" + "database/sql" + "errors" + "fmt" + "time" +) + +// NotificationChannel mirrors a row in notification_channels. The +// Config field is the AEAD-encrypted JSON blob; callers (in the +// notification package) decrypt before use. +type NotificationChannel struct { + ID string + Kind string // "webhook" | "ntfy" | "smtp" + Name string + Enabled bool + Config []byte // AEAD ciphertext; opaque at this layer + DefaultPriority *string + CreatedAt time.Time + UpdatedAt time.Time + LastFiredAt *time.Time +} + +// NotificationLogEntry is one row in notification_log. +type NotificationLogEntry struct { + ID string + ChannelID string + AlertID *string + Event string // alert.raised | alert.acknowledged | alert.resolved | alert.test + OK bool + StatusCode *int + LatencyMS *int + Error *string + FiredAt time.Time +} + +// CreateNotificationChannel inserts a new notification channel row. +func (s *Store) CreateNotificationChannel(ctx context.Context, ch NotificationChannel) error { + enabled := 0 + if ch.Enabled { + enabled = 1 + } + _, err := s.db.ExecContext(ctx, + `INSERT INTO notification_channels + (id, kind, name, enabled, config, default_priority, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + ch.ID, ch.Kind, ch.Name, enabled, ch.Config, + nullable(ch.DefaultPriority), + ch.CreatedAt.UTC().Format(time.RFC3339Nano), + ch.UpdatedAt.UTC().Format(time.RFC3339Nano)) + if err != nil { + return fmt.Errorf("store: create channel: %w", err) + } + return nil +} + +// UpdateNotificationChannel updates mutable fields on an existing channel row. +func (s *Store) UpdateNotificationChannel(ctx context.Context, ch NotificationChannel) error { + enabled := 0 + if ch.Enabled { + enabled = 1 + } + _, err := s.db.ExecContext(ctx, + `UPDATE notification_channels + SET kind = ?, name = ?, enabled = ?, config = ?, + default_priority = ?, updated_at = ? + WHERE id = ?`, + ch.Kind, ch.Name, enabled, ch.Config, + nullable(ch.DefaultPriority), + ch.UpdatedAt.UTC().Format(time.RFC3339Nano), + ch.ID) + if err != nil { + return fmt.Errorf("store: update channel: %w", err) + } + return nil +} + +// SetNotificationChannelEnabled flips the enabled flag without +// touching kind/name/config — used by the inline list-row toggle. +func (s *Store) SetNotificationChannelEnabled(ctx context.Context, id string, enabled bool, when time.Time) error { + v := 0 + if enabled { + v = 1 + } + _, err := s.db.ExecContext(ctx, + `UPDATE notification_channels SET enabled = ?, updated_at = ? WHERE id = ?`, + v, when.UTC().Format(time.RFC3339Nano), id) + if err != nil { + return fmt.Errorf("store: set channel enabled: %w", err) + } + return nil +} + +// DeleteNotificationChannel removes a channel row; cascades to notification_log. +func (s *Store) DeleteNotificationChannel(ctx context.Context, id string) error { + _, err := s.db.ExecContext(ctx, + `DELETE FROM notification_channels WHERE id = ?`, id) + if err != nil { + return fmt.Errorf("store: delete channel: %w", err) + } + return nil +} + +// GetNotificationChannel returns one channel by primary key or ErrNotFound. +func (s *Store) GetNotificationChannel(ctx context.Context, id string) (*NotificationChannel, error) { + row := s.db.QueryRowContext(ctx, + `SELECT id, kind, name, enabled, config, default_priority, + created_at, updated_at, last_fired_at + FROM notification_channels WHERE id = ?`, id) + return scanChannel(row.Scan) +} + +// ListNotificationChannels returns all channels ordered by created_at ascending. +func (s *Store) ListNotificationChannels(ctx context.Context) ([]NotificationChannel, error) { + rows, err := s.db.QueryContext(ctx, + `SELECT id, kind, name, enabled, config, default_priority, + created_at, updated_at, last_fired_at + FROM notification_channels ORDER BY created_at ASC`) + if err != nil { + return nil, fmt.Errorf("store: list channels: %w", err) + } + defer func() { _ = rows.Close() }() + var out []NotificationChannel + for rows.Next() { + c, err := scanChannel(rows.Scan) + if err != nil { + return nil, err + } + out = append(out, *c) + } + return out, rows.Err() +} + +// ListEnabledNotificationChannels returns only channels with enabled=1, ordered by created_at. +func (s *Store) ListEnabledNotificationChannels(ctx context.Context) ([]NotificationChannel, error) { + rows, err := s.db.QueryContext(ctx, + `SELECT id, kind, name, enabled, config, default_priority, + created_at, updated_at, last_fired_at + FROM notification_channels WHERE enabled = 1 ORDER BY created_at ASC`) + if err != nil { + return nil, fmt.Errorf("store: list enabled: %w", err) + } + defer func() { _ = rows.Close() }() + var out []NotificationChannel + for rows.Next() { + c, err := scanChannel(rows.Scan) + if err != nil { + return nil, err + } + out = append(out, *c) + } + return out, rows.Err() +} + +// AppendNotificationLog records a delivery attempt + bumps the +// channel's last_fired_at on success. +func (s *Store) AppendNotificationLog(ctx context.Context, e NotificationLogEntry) error { + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("store: begin: %w", err) + } + defer func() { _ = tx.Rollback() }() + + ok := 0 + if e.OK { + ok = 1 + } + _, err = tx.ExecContext(ctx, + `INSERT INTO notification_log + (id, channel_id, alert_id, event, ok, status_code, latency_ms, error, fired_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, + e.ID, e.ChannelID, nullable(e.AlertID), e.Event, ok, + nullableInt(e.StatusCode), nullableInt(e.LatencyMS), + nullable(e.Error), + e.FiredAt.UTC().Format(time.RFC3339Nano)) + if err != nil { + return fmt.Errorf("store: append notification_log: %w", err) + } + + if e.OK { + if _, err := tx.ExecContext(ctx, + `UPDATE notification_channels SET last_fired_at = ? WHERE id = ?`, + e.FiredAt.UTC().Format(time.RFC3339Nano), e.ChannelID); err != nil { + return fmt.Errorf("store: bump last_fired_at: %w", err) + } + } + return tx.Commit() +} + +func scanChannel(scan func(...any) error) (*NotificationChannel, error) { + var c NotificationChannel + var enabled int + var defaultPri, lastFired sql.NullString + var createdAt, updatedAt string + if err := scan(&c.ID, &c.Kind, &c.Name, &enabled, &c.Config, + &defaultPri, &createdAt, &updatedAt, &lastFired); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("store: scan channel: %w", err) + } + c.Enabled = enabled == 1 + if defaultPri.Valid { + v := defaultPri.String + c.DefaultPriority = &v + } + t, err := time.Parse(time.RFC3339Nano, createdAt) + if err != nil { + return nil, fmt.Errorf("store: parse created_at: %w", err) + } + c.CreatedAt = t + t, err = time.Parse(time.RFC3339Nano, updatedAt) + if err != nil { + return nil, fmt.Errorf("store: parse updated_at: %w", err) + } + c.UpdatedAt = t + if lastFired.Valid { + t, _ := time.Parse(time.RFC3339Nano, lastFired.String) + c.LastFiredAt = &t + } + return &c, nil +} diff --git a/internal/store/notification_channels_test.go b/internal/store/notification_channels_test.go new file mode 100644 index 0000000..412d14c --- /dev/null +++ b/internal/store/notification_channels_test.go @@ -0,0 +1,96 @@ +package store + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/oklog/ulid/v2" +) + +func TestNotificationChannelCRUD(t *testing.T) { + t.Parallel() + dir := t.TempDir() + st, err := Open(context.Background(), filepath.Join(dir, "rm.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + defer st.Close() + ctx := context.Background() + + ch := NotificationChannel{ + ID: ulid.Make().String(), Kind: "webhook", Name: "team-slack", + Enabled: true, Config: []byte("encrypted-blob"), + CreatedAt: time.Now().UTC(), UpdatedAt: time.Now().UTC(), + } + if err := st.CreateNotificationChannel(ctx, ch); err != nil { + t.Fatalf("create: %v", err) + } + + got, err := st.GetNotificationChannel(ctx, ch.ID) + if err != nil { + t.Fatalf("get: %v", err) + } + if got.Name != ch.Name || got.Kind != "webhook" || string(got.Config) != "encrypted-blob" { + t.Fatalf("got %+v", got) + } + + got.Name = "team-slack-renamed" + got.Enabled = false + got.UpdatedAt = time.Now().UTC() + if err := st.UpdateNotificationChannel(ctx, *got); err != nil { + t.Fatalf("update: %v", err) + } + got2, _ := st.GetNotificationChannel(ctx, ch.ID) + if got2.Name != "team-slack-renamed" || got2.Enabled { + t.Fatalf("update not applied: %+v", got2) + } + + all, _ := st.ListEnabledNotificationChannels(ctx) + if len(all) != 0 { + t.Errorf("disabled channel returned by ListEnabled: %d", len(all)) + } + + if err := st.DeleteNotificationChannel(ctx, ch.ID); err != nil { + t.Fatalf("delete: %v", err) + } + if _, err := st.GetNotificationChannel(ctx, ch.ID); err == nil { + t.Errorf("expected ErrNotFound after delete") + } +} + +func TestAppendNotificationLog(t *testing.T) { + t.Parallel() + dir := t.TempDir() + st, _ := Open(context.Background(), filepath.Join(dir, "rm.db")) + defer st.Close() + ctx := context.Background() + + chID := ulid.Make().String() + if err := st.CreateNotificationChannel(ctx, NotificationChannel{ + ID: chID, Kind: "ntfy", Name: "n", Enabled: true, + Config: []byte{1, 2, 3}, + CreatedAt: time.Now().UTC(), UpdatedAt: time.Now().UTC(), + }); err != nil { + t.Fatalf("create channel: %v", err) + } + + code := 200 + lat := 287 + if err := st.AppendNotificationLog(ctx, NotificationLogEntry{ + ID: ulid.Make().String(), ChannelID: chID, Event: "alert.test", + OK: true, StatusCode: &code, LatencyMS: &lat, + FiredAt: time.Now().UTC(), + }); err != nil { + t.Fatalf("append: %v", err) + } + + // LastFiredAt projection: the channel's last_fired_at is updated + // either by the append helper or by the callers; if you choose the + // helper does the bump, assert it. + got, _ := st.GetNotificationChannel(ctx, chID) + if got.LastFiredAt == nil { + t.Errorf("last_fired_at should bump on AppendNotificationLog success") + } +} diff --git a/internal/store/types.go b/internal/store/types.go index c42bc89..63ecf77 100644 --- a/internal/store/types.go +++ b/internal/store/types.go @@ -193,6 +193,20 @@ type EnrollmentToken struct { ExpiresAt time.Time } +// Alert mirrors the alerts table. +type Alert struct { + ID string + HostID *string + Kind string + Severity string + Message string + CreatedAt time.Time + LastSeenAt *time.Time + AcknowledgedAt *time.Time + AcknowledgedBy *string + ResolvedAt *time.Time +} + // AuditEntry mirrors the audit_log table. type AuditEntry struct { ID string diff --git a/tasks.md b/tasks.md index 1d37642..0531513 100644 --- a/tasks.md +++ b/tasks.md @@ -270,11 +270,13 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days. > **As shipped (Playwright sweep against the live smoke env, 2026-05-04):** login → host detail → Restore button → wizard step 1 picks snapshot a1ac4006 (most recent) → tree drill-down `/home/steve/test` (3 lazy loads) → tick `file1` + `file2` → step 4 confirm summary populated → dispatch → live job page with running progress widget → restore succeeds, files land on disk at `/root/rm-restore//home/steve/test/file{1,2}` (default `$HOME/rm-restore//` after agent-side expansion). Custom-target restore to `/tmp/custom-restore//` lands inside the agent's `PrivateTmp` namespace. Snapshot diff between `a1ac4006` and `5f78c788` → diff job page, statistics output streamed (738 bytes added, 0 removed). Recent-restores line on host detail reads "last restore · succeeded 28s ago · job log →". Download dropdown serves both `.txt` and `.ndjson` with correct `Content-Type` + `Content-Disposition`. SIZE/FILES tooltip "Needs restic 0.17+ on the agent host. This host runs 0.16.4." renders on column hover. -### Phase 3 — Alerts (not started) +### Phase 3 — Alerts ✅ -- [ ] **P3-05** (M) Alert engine: rule evaluation loop (failed backup, stale schedule, agent offline, check failed) -- [ ] **P3-06** (M) Notification channels: webhook, ntfy, SMTP email -- [ ] **P3-07** (S) Alert UI: list, acknowledge, resolve +- [x] **P3-05** (M) Alert engine: rule evaluation loop (failed backup, stale schedule, agent offline, check failed) +- [x] **P3-06** (M) Notification channels: webhook, ntfy, SMTP email +- [x] **P3-07** (S) Alert UI: list, acknowledge, resolve + +> **As shipped (Playwright sweep, 2026-05-04):** /settings/notifications → 3 channels created (sweep-webhook → local Python sink, sweep-ntfy → ntfy.sh public topic, sweep-smtp → MailHog at 127.0.0.1:1025). Test buttons fire alert.test on each: webhook 200/1ms, ntfy 200/322ms, SMTP 250/3ms. Synthetic critical `backup_failed` raised → /alerts shows row with severity dot, kind chip, host, message, raised/last-seen, Ack + Resolve buttons; nav badge `1`; dashboard critical-alert banner appears with Review→ link; OPEN ALERTS card reads `1 unresolved`. Acknowledge → fan-out to all 3 channels emits alert.acknowledged (verified in webhook sink, MailHog inbox, notification_log); Acknowledged tab shows row with `ack'd by ` line. Resolve → fan-out emits alert.resolved across all 3 channels; banner clears; dashboard reads `0 unresolved · all clear`; host alerts column reads —. Three live bugs found and fixed mid-sweep: (a) `enabled` form value lost because hidden+checkbox both named `enabled` and `PostForm.Get` returned the first ("0"); (b) Ack/Resolve handlers stored the state change but never dispatched alert.acknowledged / alert.resolved; (c) `hosts.open_alert_count` projection was never recomputed on Raise/Resolve/AutoResolve, so the dashboard count always read 0. ### Phase 3 — Audit log UI (not started) diff --git a/web/static/css/styles.css b/web/static/css/styles.css index a584aa2..4127239 100644 --- a/web/static/css/styles.css +++ b/web/static/css/styles.css @@ -1,3 +1,3 @@ *,:after,:before{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }::backdrop{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: } -/*! tailwindcss v3.4.17 | MIT License | https://tailwindcss.com*/*,:after,:before{border:0 solid #e5e7eb;box-sizing:border-box}:after,:before{--tw-content:""}:host,html{line-height:1.5;-webkit-text-size-adjust:100%;font-family:Inter,system-ui,-apple-system,sans-serif;font-feature-settings:normal;font-variation-settings:normal;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-tap-highlight-color:transparent}body{line-height:inherit;margin:0}hr{border-top-width:1px;color:inherit;height:0}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,pre,samp{font-family:JetBrains Mono,ui-monospace,monospace;font-feature-settings:normal;font-size:1em;font-variation-settings:normal}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{border-collapse:collapse;border-color:inherit;text-indent:0}button,input,optgroup,select,textarea{color:inherit;font-family:inherit;font-feature-settings:inherit;font-size:100%;font-variation-settings:inherit;font-weight:inherit;letter-spacing:inherit;line-height:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dd,dl,figure,h1,h2,h3,h4,h5,h6,hr,p,pre{margin:0}fieldset{margin:0}fieldset,legend{padding:0}menu,ol,ul{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{color:#9ca3af;opacity:1}input::placeholder,textarea::placeholder{color:#9ca3af;opacity:1}[role=button],button{cursor:pointer}:disabled{cursor:default}audio,canvas,embed,iframe,img,object,svg,video{display:block;vertical-align:middle}img,video{height:auto;max-width:100%}[hidden]:where(:not([hidden=until-found])){display:none}:root{--bg:oklch(0.17 0.006 250);--panel:oklch(0.20 0.007 250);--panel-hi:oklch(0.23 0.008 250);--line:oklch(0.27 0.010 250);--line-soft:oklch(0.23 0.008 250);--ink:oklch(0.96 0.005 250);--ink-mid:oklch(0.78 0.005 250);--ink-mute:oklch(0.58 0.006 250);--ink-fade:oklch(0.42 0.006 250);--ok:oklch(0.78 0.14 155);--warn:oklch(0.82 0.13 80);--bad:oklch(0.70 0.20 25);--off:oklch(0.50 0.005 250);--accent:oklch(0.82 0.12 195)}body,html{background:var(--bg);color:var(--ink);font-family:Inter,system-ui,-apple-system,sans-serif;-webkit-font-smoothing:antialiased}body{font-feature-settings:"cv11","ss01","ss03"}::-moz-selection{background:color-mix(in oklch,var(--accent),transparent 70%)}::selection{background:color-mix(in oklch,var(--accent),transparent 70%)}.\!container{width:100%!important}.container{width:100%}@media (min-width:640px){.\!container{max-width:640px!important}.container{max-width:640px}}@media (min-width:768px){.\!container{max-width:768px!important}.container{max-width:768px}}@media (min-width:1024px){.\!container{max-width:1024px!important}.container{max-width:1024px}}@media (min-width:1280px){.\!container{max-width:1280px!important}.container{max-width:1280px}}@media (min-width:1536px){.\!container{max-width:1536px!important}.container{max-width:1536px}}.mono{font-family:JetBrains Mono,ui-monospace,monospace;font-variant-numeric:tabular-nums}.panel{background:var(--panel);border:1px solid var(--line-soft)}.hairline{box-shadow:inset 0 -1px 0 var(--line-soft)}.dot{border-radius:9999px;display:inline-block;height:7px;width:7px}.dot-online{background:var(--ok);box-shadow:0 0 0 3px color-mix(in oklch,var(--ok),transparent 80%)}.dot-degraded{background:var(--warn);box-shadow:0 0 0 3px color-mix(in oklch,var(--warn),transparent 80%)}.dot-offline{background:var(--off)}.dot-failed{background:var(--bad);box-shadow:0 0 0 3px color-mix(in oklch,var(--bad),transparent 80%)}.pulse{animation:rm-pulse 2.4s ease-in-out infinite}@keyframes rm-pulse{0%,to{box-shadow:0 0 0 3px color-mix(in oklch,var(--accent),transparent 80%)}50%{box-shadow:0 0 0 6px color-mix(in oklch,var(--accent),transparent 92%)}}.btn{align-items:center;background:transparent;border:1px solid var(--line);border-radius:5px;color:var(--ink-mid);cursor:pointer;display:inline-flex;font-size:12px;font-weight:500;gap:6px;padding:6px 11px;text-decoration:none;transition:all .12s ease}.btn:hover{background:var(--panel-hi);color:var(--ink)}.btn:disabled,.btn[disabled]{cursor:not-allowed;opacity:.4;pointer-events:none}.btn-primary{background:var(--accent);border-color:var(--accent);color:oklch(.18 .01 195)}.btn-primary:hover{filter:brightness(1.08)}.btn-ghost,.btn-ghost:hover{border-color:transparent}.btn-ghost:hover{background:var(--panel-hi)}.btn-danger{border-color:color-mix(in oklch,var(--bad),transparent 70%);color:var(--bad)}.btn-danger:hover{background:color-mix(in oklch,var(--bad),transparent 88%);border-color:color-mix(in oklch,var(--bad),transparent 50%);color:oklch(.85 .1 25)}.btn-lg{font-size:13px;padding:9px 14px}.btn-block{justify-content:center;width:100%}.nav-tab{border-bottom:2px solid transparent;color:var(--ink-mute);cursor:pointer;font-size:13px;margin-right:28px;padding:18px 0;text-decoration:none}.nav-tab.active{border-color:var(--accent)}.nav-tab.active,.nav-tab:hover{color:var(--ink)}.sub-tab{border-bottom:1.5px solid transparent;color:var(--ink-mute);cursor:pointer;font-size:13px;margin-right:24px;padding:12px 0;text-decoration:none}.sub-tab.active{border-color:var(--ink);color:var(--ink)}.tag{align-items:center;border:1px solid var(--line);border-radius:3px;display:inline-flex;font-size:11px;gap:5px;letter-spacing:.01em;line-height:1;padding:4px 7px}.field-label,.tag{color:var(--ink-mid)}.field-label{display:block;font-size:12px;margin-bottom:6px}.field-help{color:var(--ink-mute);font-size:12px;line-height:1.55;margin-top:6px}.field{background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;color:var(--ink);font-family:inherit;font-size:13px;outline:none;padding:9px 12px;transition:border-color .12s ease;width:100%}.field:focus{border-color:var(--accent)}.field.invalid{border-color:color-mix(in oklch,var(--bad),transparent 50%)}.field.mono{font-family:JetBrains Mono,monospace;font-size:12px}.field.with-prefix{padding-left:64px}.host-row{align-items:center;border-left:3px solid transparent;-moz-column-gap:18px;column-gap:18px;display:grid;font-size:13px;grid-template-columns:24px 1.4fr .95fr 1.5fr .75fr .7fr .7fr 1.1fr 92px;padding:11px 16px}.host-row.head{color:var(--ink-fade);font-size:11px;letter-spacing:.08em;padding-bottom:10px;padding-top:10px;text-transform:uppercase}.host-row.degraded{border-left-color:color-mix(in oklch,var(--warn),transparent 50%)}.host-row.failed{border-left-color:color-mix(in oklch,var(--bad),transparent 50%)}.host-row.offline{border-left-color:color-mix(in oklch,var(--off),transparent 70%)}.host-row:hover{background:var(--panel-hi)}.host-row.clickable{position:relative}.host-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.host-row.clickable:hover{cursor:pointer}.host-row.clickable>*{pointer-events:none;position:relative;z-index:1}.host-row.clickable>.row-action,.host-row.clickable>.row-link{pointer-events:auto}.src-row{align-items:center;-moz-column-gap:18px;column-gap:18px;display:grid;grid-template-columns:1fr auto;padding:14px 18px}.src-row.clickable{position:relative}.src-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.src-row.clickable:hover{background:var(--panel-hi);cursor:pointer}.src-row.clickable>*{pointer-events:none;position:relative;z-index:1}.src-row.clickable>.row-action,.src-row.clickable>.row-link{pointer-events:auto}.dropdown{display:inline-block;position:relative}.dropdown summary{align-items:center;background:transparent;border:1px solid var(--line);border-radius:5px;color:var(--ink-mid);cursor:pointer;display:inline-flex;font-size:12px;font-weight:500;gap:6px;list-style:none;padding:6px 11px;transition:all .12s ease;-webkit-user-select:none;-moz-user-select:none;user-select:none}.dropdown summary::-webkit-details-marker{display:none}.dropdown summary::marker{content:""}.dropdown summary:hover{background:var(--panel-hi);color:var(--ink)}.dropdown summary .chev{color:var(--ink-fade);font-size:9px;transition:transform .12s ease}.dropdown[open] summary .chev{transform:rotate(180deg)}.dropdown[open] summary{background:var(--panel-hi);color:var(--ink)}.dropdown-menu{background:var(--panel);border:1px solid var(--line);border-radius:6px;box-shadow:0 6px 24px -8px rgba(0,0,0,.55);min-width:220px;padding:4px;position:absolute;right:0;top:calc(100% + 4px);z-index:30}.dropdown-item{border-radius:4px;color:var(--ink-mid);display:block;font-size:12.5px;line-height:1.35;padding:8px 11px;text-decoration:none}.dropdown-item:hover{background:var(--panel-hi);color:var(--ink)}.dropdown-item .label{color:var(--ink);display:block;font-weight:500}.dropdown-item .hint{color:var(--ink-mute);display:block;font-family:JetBrains Mono,ui-monospace,monospace;font-size:11px;margin-top:2px}.snap-row{align-items:center;border-bottom:1px solid var(--line-soft);-moz-column-gap:16px;column-gap:16px;cursor:pointer;display:grid;font-size:13px;grid-template-columns:150px 130px 1fr 90px 130px 80px;padding:11px 14px;transition:background .1s ease}.snap-row:last-child{border-bottom:0}.snap-row:hover{background:var(--panel-hi)}.snap-row.head{color:var(--ink-fade);cursor:default;font-size:11px;letter-spacing:.08em;padding-bottom:9px;padding-top:9px;text-transform:uppercase}.snap-row.head:hover{background:transparent}.schd-row{align-items:center;-moz-column-gap:14px;column-gap:14px;display:grid;font-size:13px;grid-template-columns:78px 1fr 1.6fr 100px 110px auto;padding:12px 18px}.schd-row.head{color:var(--ink-fade);font-size:11px;letter-spacing:.08em;padding-bottom:10px;padding-top:10px;text-transform:uppercase}.schd-row.clickable{position:relative}.schd-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.schd-row.clickable:hover{background:var(--panel-hi);cursor:pointer}.schd-row.clickable>*{pointer-events:none;position:relative;z-index:1}.schd-row.clickable>.row-action,.schd-row.clickable>.row-link{pointer-events:auto}.preset-chip{background:var(--bg);border:1px solid var(--line-soft);border-radius:4px;color:var(--ink-mid);cursor:pointer;font-family:JetBrains Mono,monospace;font-size:11.5px;padding:4px 9px;transition:border-color .1s ease,color .1s ease;-webkit-user-select:none;-moz-user-select:none;user-select:none}.preset-chip:hover{border-color:var(--accent);color:var(--ink)}.picker{align-items:center;background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;cursor:pointer;display:flex;font-size:13px;gap:12px;padding:10px 12px;transition:border-color .1s ease,background .1s ease}.picker:hover{border-color:var(--ink-mute)}.picker .check{border:1px solid var(--line);border-radius:3px;display:inline-block;flex-shrink:0;height:14px;position:relative;width:14px}.picker.checked{background:color-mix(in oklch,var(--accent),transparent 92%);border-color:color-mix(in oklch,var(--accent),transparent 50%)}.picker.checked .check{background:var(--accent);border-color:var(--accent)}.picker.checked .check:after{border:solid oklch(.18 .01 195);border-width:0 1.5px 1.5px 0;content:"";height:8px;left:4px;position:absolute;top:1px;transform:rotate(45deg);width:4px}.picker input[type=checkbox]{opacity:0;pointer-events:none;position:absolute}.keep-cell{background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;display:flex;flex-direction:column;gap:4px;padding:9px 11px}.keep-cell label{color:var(--ink-fade);font-size:10.5px;letter-spacing:.08em;text-transform:uppercase}.keep-cell input{background:transparent;border:none;color:var(--ink);font-size:14px;outline:none;padding:0;width:100%}.keep-cell input,.log{font-family:JetBrains Mono,monospace}.log{background:var(--bg);border:1px solid var(--line-soft);border-radius:7px;font-size:12px;line-height:1.7;overflow:hidden}.log-line{align-items:baseline;-moz-column-gap:14px;column-gap:14px;display:grid;grid-template-columns:14ch 8ch 1fr;padding:1px 16px}.log-line:first-child{padding-top:12px}.log-line:last-child{padding-bottom:12px}.log-tag,.log-ts{color:var(--ink-fade)}.log-tag{font-size:10px;letter-spacing:.08em;text-transform:uppercase}.progress-track{background:var(--bg);border:1px solid var(--line-soft);border-radius:9999px;height:6px;overflow:hidden}.progress-fill{background:var(--accent);border-radius:9999px;height:100%;transition:width .25s ease}.progress-fill.ok{background:var(--ok)}.progress-fill.bad{background:var(--bad)}.crumbs{font-size:12px}.crumbs,.crumbs a{color:var(--ink-mute)}.crumbs a{text-decoration:underline;text-decoration-color:var(--line);text-underline-offset:3px}.crumbs .sep{color:var(--ink-fade);margin:0 8px}.snippet{border:1px solid var(--line-soft);border-radius:6px;overflow:hidden}.snippet-head{align-items:center;border-bottom:1px solid var(--line-soft);color:var(--ink-fade);display:flex;font-size:11px;justify-content:space-between;letter-spacing:.1em;padding:10px 14px;text-transform:uppercase}.snippet pre{color:var(--ink-mid);font-family:JetBrains Mono,monospace;font-size:12px;line-height:1.7;margin:0;padding:14px;white-space:pre-wrap;word-break:break-all}.snippet pre .var{color:var(--accent)}.empty-state{background:radial-gradient(ellipse at top,color-mix(in oklch,var(--accent),transparent 95%),transparent 60%),var(--panel);border:1px dashed var(--line);border-radius:8px;padding:60px 40px;text-align:center}.pointer-events-none{pointer-events:none}.fixed{position:fixed}.absolute{position:absolute}.relative{position:relative}.bottom-5{bottom:1.25rem}.left-0{left:0}.right-5{right:1.25rem}.top-0{top:0}.z-50{z-index:50}.col-span-2{grid-column:span 2/span 2}.col-span-3{grid-column:span 3/span 3}.col-span-4{grid-column:span 4/span 4}.col-span-5{grid-column:span 5/span 5}.col-span-7{grid-column:span 7/span 7}.col-span-8{grid-column:span 8/span 8}.col-span-9{grid-column:span 9/span 9}.m-0{margin:0}.mx-2{margin-left:.5rem;margin-right:.5rem}.mx-auto{margin-left:auto;margin-right:auto}.mb-1\.5{margin-bottom:.375rem}.mb-10{margin-bottom:2.5rem}.mb-2{margin-bottom:.5rem}.mb-2\.5{margin-bottom:.625rem}.mb-3{margin-bottom:.75rem}.mb-3\.5{margin-bottom:.875rem}.mb-4{margin-bottom:1rem}.mb-5{margin-bottom:1.25rem}.mb-7{margin-bottom:1.75rem}.ml-1{margin-left:.25rem}.ml-1\.5{margin-left:.375rem}.ml-2{margin-left:.5rem}.ml-2\.5{margin-left:.625rem}.ml-5{margin-left:1.25rem}.ml-auto{margin-left:auto}.mr-1\.5{margin-right:.375rem}.mt-0\.5{margin-top:.125rem}.mt-1{margin-top:.25rem}.mt-1\.5{margin-top:.375rem}.mt-2{margin-top:.5rem}.mt-2\.5{margin-top:.625rem}.mt-20{margin-top:5rem}.mt-3{margin-top:.75rem}.mt-3\.5{margin-top:.875rem}.mt-4{margin-top:1rem}.mt-5{margin-top:1.25rem}.mt-6{margin-top:1.5rem}.mt-7{margin-top:1.75rem}.mt-8{margin-top:2rem}.mt-9{margin-top:2.25rem}.block{display:block}.inline-block{display:inline-block}.inline{display:inline}.flex{display:flex}.inline-flex{display:inline-flex}.table{display:table}.grid{display:grid}.hidden{display:none}.h-3\.5{height:.875rem}.h-\[13px\]{height:13px}.h-\[22px\]{height:22px}.min-h-screen{min-height:100vh}.w-16{width:4rem}.w-3\.5{width:.875rem}.w-\[13px\]{width:13px}.w-\[22px\]{width:22px}.w-\[360px\]{width:360px}.w-full{width:100%}.min-w-0{min-width:0}.max-w-\[1280px\]{max-width:1280px}.max-w-\[440px\]{max-width:440px}.max-w-\[480px\]{max-width:480px}.max-w-\[520px\]{max-width:520px}.max-w-\[580px\]{max-width:580px}.max-w-\[640px\]{max-width:640px}.max-w-\[680px\]{max-width:680px}.max-w-\[720px\]{max-width:720px}.max-w-\[760px\]{max-width:760px}.flex-1{flex:1 1 0%}.flex-none{flex:none}.transform{transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.cursor-help{cursor:help}.cursor-pointer{cursor:pointer}.select-none{-webkit-user-select:none;-moz-user-select:none;user-select:none}.select-all{-webkit-user-select:all;-moz-user-select:all;user-select:all}.resize{resize:both}.list-none{list-style-type:none}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-12{grid-template-columns:repeat(12,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.flex-col{flex-direction:column}.flex-wrap{flex-wrap:wrap}.items-start{align-items:flex-start}.items-end{align-items:flex-end}.items-center{align-items:center}.items-baseline{align-items:baseline}.justify-end{justify-content:flex-end}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.gap-1\.5{gap:.375rem}.gap-2{gap:.5rem}.gap-2\.5{gap:.625rem}.gap-3{gap:.75rem}.gap-3\.5{gap:.875rem}.gap-4{gap:1rem}.gap-5{gap:1.25rem}.gap-6{gap:1.5rem}.gap-8{gap:2rem}.gap-x-4{-moz-column-gap:1rem;column-gap:1rem}.gap-y-2{row-gap:.5rem}.gap-y-2\.5{row-gap:.625rem}.space-y-2>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-bottom:calc(.5rem*var(--tw-space-y-reverse));margin-top:calc(.5rem*(1 - var(--tw-space-y-reverse)))}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-bottom:calc(1rem*var(--tw-space-y-reverse));margin-top:calc(1rem*(1 - var(--tw-space-y-reverse)))}.overflow-hidden,.truncate{overflow:hidden}.truncate{text-overflow:ellipsis}.truncate,.whitespace-nowrap{white-space:nowrap}.text-pretty{text-wrap:pretty}.break-all{word-break:break-all}.rounded-\[3px\]{border-radius:3px}.rounded-\[5px\]{border-radius:5px}.rounded-\[6px\]{border-radius:6px}.rounded-\[7px\]{border-radius:7px}.rounded-\[8px\]{border-radius:8px}.rounded-full{border-radius:9999px}.border{border-width:1px}.border-y{border-top-width:1px}.border-b,.border-y{border-bottom-width:1px}.border-l{border-left-width:1px}.border-t{border-top-width:1px}.border-line{border-color:oklch(.27 .01 250)}.border-line-soft{border-color:oklch(.23 .008 250)}.bg-bg{background-color:oklch(.17 .006 250)}.bg-panel{background-color:oklch(.2 .007 250)}.p-0{padding:0}.p-2{padding:.5rem}.p-3\.5{padding:.875rem}.p-4{padding:1rem}.p-5{padding:1.25rem}.p-7{padding:1.75rem}.p-\[18px\]{padding:18px}.px-1{padding-left:.25rem;padding-right:.25rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-2\.5{padding-left:.625rem;padding-right:.625rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-3\.5{padding-left:.875rem;padding-right:.875rem}.px-4{padding-left:1rem;padding-right:1rem}.px-7{padding-left:1.75rem;padding-right:1.75rem}.px-8{padding-left:2rem;padding-right:2rem}.px-\[18px\]{padding-left:18px;padding-right:18px}.py-0\.5{padding-bottom:.125rem;padding-top:.125rem}.py-1{padding-bottom:.25rem;padding-top:.25rem}.py-1\.5{padding-bottom:.375rem;padding-top:.375rem}.py-12{padding-bottom:3rem;padding-top:3rem}.py-2{padding-bottom:.5rem;padding-top:.5rem}.py-2\.5{padding-bottom:.625rem;padding-top:.625rem}.py-3{padding-bottom:.75rem;padding-top:.75rem}.py-3\.5{padding-bottom:.875rem;padding-top:.875rem}.py-4{padding-bottom:1rem;padding-top:1rem}.py-5{padding-bottom:1.25rem;padding-top:1.25rem}.py-6{padding-bottom:1.5rem;padding-top:1.5rem}.py-7{padding-bottom:1.75rem;padding-top:1.75rem}.py-8{padding-bottom:2rem;padding-top:2rem}.py-\[14px\]{padding-bottom:14px;padding-top:14px}.py-\[5px\]{padding-bottom:5px;padding-top:5px}.pb-14{padding-bottom:3.5rem}.pb-2{padding-bottom:.5rem}.pb-24{padding-bottom:6rem}.pb-3{padding-bottom:.75rem}.pb-4{padding-bottom:1rem}.pb-\[18px\]{padding-bottom:18px}.pl-5{padding-left:1.25rem}.pl-6{padding-left:1.5rem}.pl-9{padding-left:2.25rem}.pt-0\.5{padding-top:.125rem}.pt-1{padding-top:.25rem}.pt-14{padding-top:3.5rem}.pt-4{padding-top:1rem}.pt-5{padding-top:1.25rem}.pt-6{padding-top:1.5rem}.pt-7{padding-top:1.75rem}.pt-9{padding-top:2.25rem}.pt-\[1px\]{padding-top:1px}.text-center{text-align:center}.text-right{text-align:right}.text-2xl{font-size:1.5rem;line-height:2rem}.text-\[10\.5px\]{font-size:10.5px}.text-\[10px\]{font-size:10px}.text-\[11\.5px\]{font-size:11.5px}.text-\[11px\]{font-size:11px}.text-\[12\.5px\]{font-size:12.5px}.text-\[12px\]{font-size:12px}.text-\[13px\]{font-size:13px}.text-\[14px\]{font-size:14px}.text-\[16px\]{font-size:16px}.text-\[18px\]{font-size:18px}.text-\[19px\]{font-size:19px}.text-\[20px\]{font-size:20px}.text-\[22px\]{font-size:22px}.text-\[26px\]{font-size:26px}.text-\[28px\]{font-size:28px}.text-base{font-size:1rem;line-height:1.5rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xs{font-size:.75rem;line-height:1rem}.font-medium{font-weight:500}.font-normal{font-weight:400}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.normal-case{text-transform:none}.italic{font-style:italic}.leading-\[1\.55\]{line-height:1.55}.leading-\[1\.5\]{line-height:1.5}.leading-\[1\.65\]{line-height:1.65}.leading-\[1\.6\]{line-height:1.6}.leading-\[1\.7\]{line-height:1.7}.leading-\[20px\]{line-height:20px}.leading-none{line-height:1}.tracking-\[-0\.005em\]{letter-spacing:-.005em}.tracking-\[-0\.012em\]{letter-spacing:-.012em}.tracking-\[-0\.01em\]{letter-spacing:-.01em}.tracking-\[-0\.02em\]{letter-spacing:-.02em}.tracking-\[0\.005em\]{letter-spacing:.005em}.tracking-\[0\.01em\]{letter-spacing:.01em}.tracking-\[0\.02em\]{letter-spacing:.02em}.tracking-\[0\.08em\]{letter-spacing:.08em}.tracking-\[0\.1em\]{letter-spacing:.1em}.text-accent{color:oklch(.82 .12 195)}.text-bad{color:oklch(.7 .2 25)}.text-ink{color:oklch(.96 .005 250)}.text-ink-fade{color:oklch(.42 .006 250)}.text-ink-mid{color:oklch(.78 .005 250)}.text-ink-mute{color:oklch(.58 .006 250)}.text-ok{color:oklch(.78 .14 155)}.text-warn{color:oklch(.82 .13 80)}.underline{text-decoration-line:underline}.no-underline{text-decoration-line:none}.decoration-line{text-decoration-color:oklch(.27 .01 250)}.underline-offset-4{text-underline-offset:4px}.opacity-40{opacity:.4}.transition{transition-duration:.15s;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,-webkit-backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter,-webkit-backdrop-filter;transition-timing-function:cubic-bezier(.4,0,.2,1)}.hover\:text-ink-mid:hover{color:oklch(.78 .005 250)} +/*! tailwindcss v3.4.17 | MIT License | https://tailwindcss.com*/*,:after,:before{border:0 solid #e5e7eb;box-sizing:border-box}:after,:before{--tw-content:""}:host,html{line-height:1.5;-webkit-text-size-adjust:100%;font-family:Inter,system-ui,-apple-system,sans-serif;font-feature-settings:normal;font-variation-settings:normal;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-tap-highlight-color:transparent}body{line-height:inherit;margin:0}hr{border-top-width:1px;color:inherit;height:0}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,pre,samp{font-family:JetBrains Mono,ui-monospace,monospace;font-feature-settings:normal;font-size:1em;font-variation-settings:normal}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{border-collapse:collapse;border-color:inherit;text-indent:0}button,input,optgroup,select,textarea{color:inherit;font-family:inherit;font-feature-settings:inherit;font-size:100%;font-variation-settings:inherit;font-weight:inherit;letter-spacing:inherit;line-height:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dd,dl,figure,h1,h2,h3,h4,h5,h6,hr,p,pre{margin:0}fieldset{margin:0}fieldset,legend{padding:0}menu,ol,ul{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{color:#9ca3af;opacity:1}input::placeholder,textarea::placeholder{color:#9ca3af;opacity:1}[role=button],button{cursor:pointer}:disabled{cursor:default}audio,canvas,embed,iframe,img,object,svg,video{display:block;vertical-align:middle}img,video{height:auto;max-width:100%}[hidden]:where(:not([hidden=until-found])){display:none}:root{--bg:oklch(0.17 0.006 250);--panel:oklch(0.20 0.007 250);--panel-hi:oklch(0.23 0.008 250);--line:oklch(0.27 0.010 250);--line-soft:oklch(0.23 0.008 250);--ink:oklch(0.96 0.005 250);--ink-mid:oklch(0.78 0.005 250);--ink-mute:oklch(0.58 0.006 250);--ink-fade:oklch(0.42 0.006 250);--ok:oklch(0.78 0.14 155);--warn:oklch(0.82 0.13 80);--bad:oklch(0.70 0.20 25);--off:oklch(0.50 0.005 250);--accent:oklch(0.82 0.12 195)}body,html{background:var(--bg);color:var(--ink);font-family:Inter,system-ui,-apple-system,sans-serif;-webkit-font-smoothing:antialiased}body{font-feature-settings:"cv11","ss01","ss03"}::-moz-selection{background:color-mix(in oklch,var(--accent),transparent 70%)}::selection{background:color-mix(in oklch,var(--accent),transparent 70%)}.\!container{width:100%!important}.container{width:100%}@media (min-width:640px){.\!container{max-width:640px!important}.container{max-width:640px}}@media (min-width:768px){.\!container{max-width:768px!important}.container{max-width:768px}}@media (min-width:1024px){.\!container{max-width:1024px!important}.container{max-width:1024px}}@media (min-width:1280px){.\!container{max-width:1280px!important}.container{max-width:1280px}}@media (min-width:1536px){.\!container{max-width:1536px!important}.container{max-width:1536px}}.mono{font-family:JetBrains Mono,ui-monospace,monospace;font-variant-numeric:tabular-nums}.panel{background:var(--panel);border:1px solid var(--line-soft)}.hairline{box-shadow:inset 0 -1px 0 var(--line-soft)}.dot{border-radius:9999px;display:inline-block;height:7px;width:7px}.dot-online{background:var(--ok);box-shadow:0 0 0 3px color-mix(in oklch,var(--ok),transparent 80%)}.dot-degraded{background:var(--warn);box-shadow:0 0 0 3px color-mix(in oklch,var(--warn),transparent 80%)}.dot-offline{background:var(--off)}.dot-failed{background:var(--bad);box-shadow:0 0 0 3px color-mix(in oklch,var(--bad),transparent 80%)}.pulse{animation:rm-pulse 2.4s ease-in-out infinite}@keyframes rm-pulse{0%,to{box-shadow:0 0 0 3px color-mix(in oklch,var(--accent),transparent 80%)}50%{box-shadow:0 0 0 6px color-mix(in oklch,var(--accent),transparent 92%)}}.btn{align-items:center;background:transparent;border:1px solid var(--line);border-radius:5px;color:var(--ink-mid);cursor:pointer;display:inline-flex;font-size:12px;font-weight:500;gap:6px;padding:6px 11px;text-decoration:none;transition:all .12s ease}.btn:hover{background:var(--panel-hi);color:var(--ink)}.btn:disabled,.btn[disabled]{cursor:not-allowed;opacity:.4;pointer-events:none}.btn-primary{background:var(--accent);border-color:var(--accent);color:oklch(.18 .01 195)}.btn-primary:hover{filter:brightness(1.08)}.btn-ghost,.btn-ghost:hover{border-color:transparent}.btn-ghost:hover{background:var(--panel-hi)}.btn-danger{border-color:color-mix(in oklch,var(--bad),transparent 70%);color:var(--bad)}.btn-danger:hover{background:color-mix(in oklch,var(--bad),transparent 88%);border-color:color-mix(in oklch,var(--bad),transparent 50%);color:oklch(.85 .1 25)}.btn-lg{font-size:13px;padding:9px 14px}.btn-block{justify-content:center;width:100%}.nav-tab{border-bottom:2px solid transparent;color:var(--ink-mute);cursor:pointer;font-size:13px;margin-right:28px;padding:18px 0;text-decoration:none}.nav-tab.active{border-color:var(--accent)}.nav-tab.active,.nav-tab:hover{color:var(--ink)}.sub-tab{border-bottom:1.5px solid transparent;color:var(--ink-mute);cursor:pointer;font-size:13px;margin-right:24px;padding:12px 0;text-decoration:none}.sub-tab.active{border-color:var(--ink);color:var(--ink)}.tag{align-items:center;border:1px solid var(--line);border-radius:3px;display:inline-flex;font-size:11px;gap:5px;letter-spacing:.01em;line-height:1;padding:4px 7px}.field-label,.tag{color:var(--ink-mid)}.field-label{display:block;font-size:12px;margin-bottom:6px}.field-help{color:var(--ink-mute);font-size:12px;line-height:1.55;margin-top:6px}.field{background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;color:var(--ink);font-family:inherit;font-size:13px;outline:none;padding:9px 12px;transition:border-color .12s ease;width:100%}.field:focus{border-color:var(--accent)}.field.invalid{border-color:color-mix(in oklch,var(--bad),transparent 50%)}.field.mono{font-family:JetBrains Mono,monospace;font-size:12px}.field.with-prefix{padding-left:64px}.host-row{align-items:center;border-left:3px solid transparent;-moz-column-gap:18px;column-gap:18px;display:grid;font-size:13px;grid-template-columns:24px 1.4fr .95fr 1.5fr .75fr .7fr .7fr 1.1fr 92px;padding:11px 16px}.host-row.head{color:var(--ink-fade);font-size:11px;letter-spacing:.08em;padding-bottom:10px;padding-top:10px;text-transform:uppercase}.host-row.degraded{border-left-color:color-mix(in oklch,var(--warn),transparent 50%)}.host-row.failed{border-left-color:color-mix(in oklch,var(--bad),transparent 50%)}.host-row.offline{border-left-color:color-mix(in oklch,var(--off),transparent 70%)}.host-row:hover{background:var(--panel-hi)}.host-row.clickable{position:relative}.host-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.host-row.clickable:hover{cursor:pointer}.host-row.clickable>*{pointer-events:none;position:relative;z-index:1}.host-row.clickable>.row-action,.host-row.clickable>.row-link{pointer-events:auto}.src-row{align-items:center;-moz-column-gap:18px;column-gap:18px;display:grid;grid-template-columns:1fr auto;padding:14px 18px}.src-row.clickable{position:relative}.src-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.src-row.clickable:hover{background:var(--panel-hi);cursor:pointer}.src-row.clickable>*{pointer-events:none;position:relative;z-index:1}.src-row.clickable>.row-action,.src-row.clickable>.row-link{pointer-events:auto}.dropdown{display:inline-block;position:relative}.dropdown summary{align-items:center;background:transparent;border:1px solid var(--line);border-radius:5px;color:var(--ink-mid);cursor:pointer;display:inline-flex;font-size:12px;font-weight:500;gap:6px;list-style:none;padding:6px 11px;transition:all .12s ease;-webkit-user-select:none;-moz-user-select:none;user-select:none}.dropdown summary::-webkit-details-marker{display:none}.dropdown summary::marker{content:""}.dropdown summary:hover{background:var(--panel-hi);color:var(--ink)}.dropdown summary .chev{color:var(--ink-fade);font-size:9px;transition:transform .12s ease}.dropdown[open] summary .chev{transform:rotate(180deg)}.dropdown[open] summary{background:var(--panel-hi);color:var(--ink)}.dropdown-menu{background:var(--panel);border:1px solid var(--line);border-radius:6px;box-shadow:0 6px 24px -8px rgba(0,0,0,.55);min-width:220px;padding:4px;position:absolute;right:0;top:calc(100% + 4px);z-index:30}.dropdown-item{border-radius:4px;color:var(--ink-mid);display:block;font-size:12.5px;line-height:1.35;padding:8px 11px;text-decoration:none}.dropdown-item:hover{background:var(--panel-hi);color:var(--ink)}.dropdown-item .label{color:var(--ink);display:block;font-weight:500}.dropdown-item .hint{color:var(--ink-mute);display:block;font-family:JetBrains Mono,ui-monospace,monospace;font-size:11px;margin-top:2px}.snap-row{align-items:center;border-bottom:1px solid var(--line-soft);-moz-column-gap:16px;column-gap:16px;cursor:pointer;display:grid;font-size:13px;grid-template-columns:150px 130px 1fr 90px 130px 80px;padding:11px 14px;transition:background .1s ease}.snap-row:last-child{border-bottom:0}.snap-row:hover{background:var(--panel-hi)}.snap-row.head{color:var(--ink-fade);cursor:default;font-size:11px;letter-spacing:.08em;padding-bottom:9px;padding-top:9px;text-transform:uppercase}.snap-row.head:hover{background:transparent}.alert-row{align-items:center;border-bottom:1px solid var(--line-soft);border-left:3px solid transparent;-moz-column-gap:16px;column-gap:16px;display:grid;font-size:13px;grid-template-columns:18px 110px 130px 1fr 130px 110px 180px;padding:12px 16px;transition:background .1s ease}.alert-row:hover{background:var(--panel-hi)}.alert-row:last-child{border-bottom:0}.alert-row.head{border-left-color:transparent;color:var(--ink-fade);cursor:default;font-size:11px;letter-spacing:.08em;padding-bottom:9px;padding-top:9px;text-transform:uppercase}.alert-row.head:hover{background:transparent}.alert-row.severity-warn{border-left-color:color-mix(in oklch,var(--warn),transparent 50%)}.alert-row.severity-critical{border-left-color:color-mix(in oklch,var(--bad),transparent 30%)}.alert-row.resolved{opacity:.55}.dot-critical{background:var(--bad);box-shadow:0 0 0 3px color-mix(in oklch,var(--bad),transparent 80%)}.tag-warn{background:color-mix(in oklch,var(--warn),transparent 92%);border-color:color-mix(in oklch,var(--warn),transparent 60%);color:var(--warn)}.tag-critical{background:color-mix(in oklch,var(--bad),transparent 92%);border-color:color-mix(in oklch,var(--bad),transparent 60%);color:var(--bad)}.tag-info{color:var(--ink-mid)}.schd-row{align-items:center;-moz-column-gap:14px;column-gap:14px;display:grid;font-size:13px;grid-template-columns:78px 1fr 1.6fr 100px 110px auto;padding:12px 18px}.schd-row.head{color:var(--ink-fade);font-size:11px;letter-spacing:.08em;padding-bottom:10px;padding-top:10px;text-transform:uppercase}.schd-row.clickable{position:relative}.schd-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.schd-row.clickable:hover{background:var(--panel-hi);cursor:pointer}.schd-row.clickable>*{pointer-events:none;position:relative;z-index:1}.schd-row.clickable>.row-action,.schd-row.clickable>.row-link{pointer-events:auto}.preset-chip{background:var(--bg);border:1px solid var(--line-soft);border-radius:4px;color:var(--ink-mid);cursor:pointer;font-family:JetBrains Mono,monospace;font-size:11.5px;padding:4px 9px;transition:border-color .1s ease,color .1s ease;-webkit-user-select:none;-moz-user-select:none;user-select:none}.preset-chip:hover{border-color:var(--accent);color:var(--ink)}.picker{align-items:center;background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;cursor:pointer;display:flex;font-size:13px;gap:12px;padding:10px 12px;transition:border-color .1s ease,background .1s ease}.picker:hover{border-color:var(--ink-mute)}.picker .check{border:1px solid var(--line);border-radius:3px;display:inline-block;flex-shrink:0;height:14px;position:relative;width:14px}.picker.checked{background:color-mix(in oklch,var(--accent),transparent 92%);border-color:color-mix(in oklch,var(--accent),transparent 50%)}.picker.checked .check{background:var(--accent);border-color:var(--accent)}.picker.checked .check:after{border:solid oklch(.18 .01 195);border-width:0 1.5px 1.5px 0;content:"";height:8px;left:4px;position:absolute;top:1px;transform:rotate(45deg);width:4px}.picker input[type=checkbox]{opacity:0;pointer-events:none;position:absolute}.keep-cell{background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;display:flex;flex-direction:column;gap:4px;padding:9px 11px}.keep-cell label{color:var(--ink-fade);font-size:10.5px;letter-spacing:.08em;text-transform:uppercase}.keep-cell input{background:transparent;border:none;color:var(--ink);font-size:14px;outline:none;padding:0;width:100%}.keep-cell input,.log{font-family:JetBrains Mono,monospace}.log{background:var(--bg);border:1px solid var(--line-soft);border-radius:7px;font-size:12px;line-height:1.7;overflow:hidden}.log-line{align-items:baseline;-moz-column-gap:14px;column-gap:14px;display:grid;grid-template-columns:14ch 8ch 1fr;padding:1px 16px}.log-line:first-child{padding-top:12px}.log-line:last-child{padding-bottom:12px}.log-tag,.log-ts{color:var(--ink-fade)}.log-tag{font-size:10px;letter-spacing:.08em;text-transform:uppercase}.progress-track{background:var(--bg);border:1px solid var(--line-soft);border-radius:9999px;height:6px;overflow:hidden}.progress-fill{background:var(--accent);border-radius:9999px;height:100%;transition:width .25s ease}.progress-fill.ok{background:var(--ok)}.progress-fill.bad{background:var(--bad)}.crumbs{font-size:12px}.crumbs,.crumbs a{color:var(--ink-mute)}.crumbs a{text-decoration:underline;text-decoration-color:var(--line);text-underline-offset:3px}.crumbs .sep{color:var(--ink-fade);margin:0 8px}.snippet{border:1px solid var(--line-soft);border-radius:6px;overflow:hidden}.snippet-head{align-items:center;border-bottom:1px solid var(--line-soft);color:var(--ink-fade);display:flex;font-size:11px;justify-content:space-between;letter-spacing:.1em;padding:10px 14px;text-transform:uppercase}.snippet pre{color:var(--ink-mid);font-family:JetBrains Mono,monospace;font-size:12px;line-height:1.7;margin:0;padding:14px;white-space:pre-wrap;word-break:break-all}.snippet pre .var{color:var(--accent)}.empty-state{background:radial-gradient(ellipse at top,color-mix(in oklch,var(--accent),transparent 95%),transparent 60%),var(--panel);border:1px dashed var(--line);border-radius:8px;padding:60px 40px;text-align:center}.ch-row{align-items:center;border-bottom:1px solid var(--line-soft);-moz-column-gap:16px;column-gap:16px;display:grid;font-size:13px;grid-template-columns:28px 200px 1fr 100px 130px 140px;padding:14px 18px;transition:background .1s ease}.ch-row:last-child{border-bottom:0}.ch-row.head{color:var(--ink-fade);cursor:default;font-size:11px;letter-spacing:.08em;padding-bottom:10px;padding-top:10px;text-transform:uppercase}.ch-row.head:hover{background:transparent}.ch-row.clickable{cursor:pointer;position:relative}.ch-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.ch-row.clickable:hover{background:var(--panel-hi)}.ch-row.clickable>*{pointer-events:none;position:relative;z-index:1}.ch-row.clickable>.row-action,.ch-row.clickable>.row-link{pointer-events:auto}.ch-icon{align-items:center;background:var(--panel-hi);border:1px solid var(--line);border-radius:5px;color:var(--ink-mute);display:inline-flex;font-family:JetBrains Mono,monospace;font-size:10px;font-weight:600;height:24px;justify-content:center;width:24px}.ch-icon.webhook{border-color:color-mix(in oklch,var(--accent),transparent 60%);color:var(--accent)}.ch-icon.ntfy{border-color:color-mix(in oklch,var(--warn),transparent 60%);color:var(--warn)}.ch-icon.smtp{border-color:color-mix(in oklch,var(--ok),transparent 60%);color:var(--ok)}.toggle{background:var(--line);border-radius:9999px;cursor:pointer;display:inline-block;flex-shrink:0;height:16px;position:relative;transition:background .12s ease;width:30px}.toggle:after{background:var(--ink-mid);border-radius:9999px;content:"";height:12px;left:2px;position:absolute;top:2px;transition:all .12s ease;width:12px}.toggle.on{background:color-mix(in oklch,var(--accent),transparent 50%)}.toggle.on:after{background:var(--accent);left:16px}.kind-grid{display:grid;gap:14px;grid-template-columns:1fr 1fr 1fr}.kind-card{background:var(--bg);border:1px solid var(--line-soft);border-radius:7px;cursor:pointer;padding:16px;transition:border-color .12s ease,background .12s ease}.kind-card:hover{border-color:var(--ink-mute)}.kind-card.selected{background:color-mix(in oklch,var(--accent),transparent 95%);border-color:color-mix(in oklch,var(--accent),transparent 50%)}.radio-pip{align-items:center;border:1px solid var(--line);border-radius:9999px;display:inline-flex;flex-shrink:0;height:14px;justify-content:center;width:14px}.radio-pip.on{border-color:var(--accent)}.radio-pip.on:after{background:var(--accent);border-radius:9999px;content:"";height:6px;width:6px}.test-pill{border-radius:5px;display:inline-block;font-size:12.5px;padding:5px 10px}.test-pill-ok{background:color-mix(in oklch,var(--ok),transparent 92%);border:1px solid color-mix(in oklch,var(--ok),transparent 60%);color:var(--ok)}.test-pill-fail{background:color-mix(in oklch,var(--bad),transparent 92%);border:1px solid color-mix(in oklch,var(--bad),transparent 60%);color:var(--bad)}.pointer-events-none{pointer-events:none}.visible{visibility:visible}.fixed{position:fixed}.absolute{position:absolute}.relative{position:relative}.bottom-5{bottom:1.25rem}.left-0{left:0}.right-5{right:1.25rem}.top-0{top:0}.z-50{z-index:50}.col-span-2{grid-column:span 2/span 2}.col-span-3{grid-column:span 3/span 3}.col-span-4{grid-column:span 4/span 4}.col-span-5{grid-column:span 5/span 5}.col-span-7{grid-column:span 7/span 7}.col-span-8{grid-column:span 8/span 8}.col-span-9{grid-column:span 9/span 9}.m-0{margin:0}.mx-2{margin-left:.5rem;margin-right:.5rem}.mx-auto{margin-left:auto;margin-right:auto}.mb-1\.5{margin-bottom:.375rem}.mb-10{margin-bottom:2.5rem}.mb-2{margin-bottom:.5rem}.mb-2\.5{margin-bottom:.625rem}.mb-3{margin-bottom:.75rem}.mb-3\.5{margin-bottom:.875rem}.mb-4{margin-bottom:1rem}.mb-5{margin-bottom:1.25rem}.mb-7{margin-bottom:1.75rem}.ml-1{margin-left:.25rem}.ml-2{margin-left:.5rem}.ml-2\.5{margin-left:.625rem}.ml-5{margin-left:1.25rem}.ml-auto{margin-left:auto}.mr-1\.5{margin-right:.375rem}.mt-0\.5{margin-top:.125rem}.mt-1{margin-top:.25rem}.mt-1\.5{margin-top:.375rem}.mt-2{margin-top:.5rem}.mt-2\.5{margin-top:.625rem}.mt-20{margin-top:5rem}.mt-3{margin-top:.75rem}.mt-3\.5{margin-top:.875rem}.mt-4{margin-top:1rem}.mt-5{margin-top:1.25rem}.mt-6{margin-top:1.5rem}.mt-7{margin-top:1.75rem}.mt-8{margin-top:2rem}.mt-9{margin-top:2.25rem}.block{display:block}.inline-block{display:inline-block}.inline{display:inline}.flex{display:flex}.inline-flex{display:inline-flex}.table{display:table}.grid{display:grid}.hidden{display:none}.h-3\.5{height:.875rem}.h-\[13px\]{height:13px}.h-\[22px\]{height:22px}.min-h-screen{min-height:100vh}.w-16{width:4rem}.w-3\.5{width:.875rem}.w-\[13px\]{width:13px}.w-\[22px\]{width:22px}.w-\[360px\]{width:360px}.w-full{width:100%}.min-w-0{min-width:0}.max-w-\[1280px\]{max-width:1280px}.max-w-\[440px\]{max-width:440px}.max-w-\[480px\]{max-width:480px}.max-w-\[520px\]{max-width:520px}.max-w-\[580px\]{max-width:580px}.max-w-\[640px\]{max-width:640px}.max-w-\[680px\]{max-width:680px}.max-w-\[720px\]{max-width:720px}.max-w-\[760px\]{max-width:760px}.flex-1{flex:1 1 0%}.flex-none{flex:none}.transform{transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.cursor-default{cursor:default}.cursor-help{cursor:help}.cursor-pointer{cursor:pointer}.select-none{-webkit-user-select:none;-moz-user-select:none;user-select:none}.select-all{-webkit-user-select:all;-moz-user-select:all;user-select:all}.resize{resize:both}.list-none{list-style-type:none}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-12{grid-template-columns:repeat(12,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.flex-col{flex-direction:column}.flex-wrap{flex-wrap:wrap}.items-start{align-items:flex-start}.items-end{align-items:flex-end}.items-center{align-items:center}.items-baseline{align-items:baseline}.justify-end{justify-content:flex-end}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.gap-1{gap:.25rem}.gap-1\.5{gap:.375rem}.gap-2{gap:.5rem}.gap-2\.5{gap:.625rem}.gap-3{gap:.75rem}.gap-3\.5{gap:.875rem}.gap-4{gap:1rem}.gap-5{gap:1.25rem}.gap-6{gap:1.5rem}.gap-8{gap:2rem}.gap-x-4{-moz-column-gap:1rem;column-gap:1rem}.gap-y-2{row-gap:.5rem}.gap-y-2\.5{row-gap:.625rem}.space-y-2>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-bottom:calc(.5rem*var(--tw-space-y-reverse));margin-top:calc(.5rem*(1 - var(--tw-space-y-reverse)))}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-bottom:calc(1rem*var(--tw-space-y-reverse));margin-top:calc(1rem*(1 - var(--tw-space-y-reverse)))}.overflow-hidden,.truncate{overflow:hidden}.truncate{text-overflow:ellipsis}.truncate,.whitespace-nowrap{white-space:nowrap}.text-pretty{text-wrap:pretty}.break-all{word-break:break-all}.rounded-\[3px\]{border-radius:3px}.rounded-\[5px\]{border-radius:5px}.rounded-\[6px\]{border-radius:6px}.rounded-\[7px\]{border-radius:7px}.rounded-\[8px\]{border-radius:8px}.rounded-full{border-radius:9999px}.rounded-md{border-radius:.375rem}.border{border-width:1px}.border-y{border-top-width:1px}.border-b,.border-y{border-bottom-width:1px}.border-l{border-left-width:1px}.border-t{border-top-width:1px}.border-line{border-color:oklch(.27 .01 250)}.border-line-soft{border-color:oklch(.23 .008 250)}.bg-bg{background-color:oklch(.17 .006 250)}.bg-panel{background-color:oklch(.2 .007 250)}.p-0{padding:0}.p-2{padding:.5rem}.p-3\.5{padding:.875rem}.p-4{padding:1rem}.p-5{padding:1.25rem}.p-7{padding:1.75rem}.p-\[18px\]{padding:18px}.p-\[3px\]{padding:3px}.px-1{padding-left:.25rem;padding-right:.25rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-2\.5{padding-left:.625rem;padding-right:.625rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-3\.5{padding-left:.875rem;padding-right:.875rem}.px-4{padding-left:1rem;padding-right:1rem}.px-7{padding-left:1.75rem;padding-right:1.75rem}.px-8{padding-left:2rem;padding-right:2rem}.px-\[18px\]{padding-left:18px;padding-right:18px}.py-0\.5{padding-bottom:.125rem;padding-top:.125rem}.py-1{padding-bottom:.25rem;padding-top:.25rem}.py-1\.5{padding-bottom:.375rem;padding-top:.375rem}.py-12{padding-bottom:3rem;padding-top:3rem}.py-2{padding-bottom:.5rem;padding-top:.5rem}.py-2\.5{padding-bottom:.625rem;padding-top:.625rem}.py-3{padding-bottom:.75rem;padding-top:.75rem}.py-3\.5{padding-bottom:.875rem;padding-top:.875rem}.py-4{padding-bottom:1rem;padding-top:1rem}.py-5{padding-bottom:1.25rem;padding-top:1.25rem}.py-6{padding-bottom:1.5rem;padding-top:1.5rem}.py-7{padding-bottom:1.75rem;padding-top:1.75rem}.py-8{padding-bottom:2rem;padding-top:2rem}.py-\[14px\]{padding-bottom:14px;padding-top:14px}.py-\[5px\]{padding-bottom:5px;padding-top:5px}.pb-14{padding-bottom:3.5rem}.pb-2{padding-bottom:.5rem}.pb-24{padding-bottom:6rem}.pb-3{padding-bottom:.75rem}.pb-4{padding-bottom:1rem}.pb-\[18px\]{padding-bottom:18px}.pl-5{padding-left:1.25rem}.pl-6{padding-left:1.5rem}.pl-9{padding-left:2.25rem}.pt-0\.5{padding-top:.125rem}.pt-1{padding-top:.25rem}.pt-14{padding-top:3.5rem}.pt-4{padding-top:1rem}.pt-5{padding-top:1.25rem}.pt-6{padding-top:1.5rem}.pt-7{padding-top:1.75rem}.pt-9{padding-top:2.25rem}.pt-\[1px\]{padding-top:1px}.text-center{text-align:center}.text-right{text-align:right}.text-2xl{font-size:1.5rem;line-height:2rem}.text-\[10\.5px\]{font-size:10.5px}.text-\[10px\]{font-size:10px}.text-\[11\.5px\]{font-size:11.5px}.text-\[11px\]{font-size:11px}.text-\[12\.5px\]{font-size:12.5px}.text-\[12px\]{font-size:12px}.text-\[13px\]{font-size:13px}.text-\[14px\]{font-size:14px}.text-\[16px\]{font-size:16px}.text-\[18px\]{font-size:18px}.text-\[19px\]{font-size:19px}.text-\[20px\]{font-size:20px}.text-\[22px\]{font-size:22px}.text-\[26px\]{font-size:26px}.text-\[28px\]{font-size:28px}.text-base{font-size:1rem;line-height:1.5rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xs{font-size:.75rem;line-height:1rem}.font-medium{font-weight:500}.font-normal{font-weight:400}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.normal-case{text-transform:none}.italic{font-style:italic}.leading-\[1\.55\]{line-height:1.55}.leading-\[1\.5\]{line-height:1.5}.leading-\[1\.65\]{line-height:1.65}.leading-\[1\.6\]{line-height:1.6}.leading-\[1\.7\]{line-height:1.7}.leading-\[20px\]{line-height:20px}.leading-none{line-height:1}.tracking-\[-0\.005em\]{letter-spacing:-.005em}.tracking-\[-0\.012em\]{letter-spacing:-.012em}.tracking-\[-0\.01em\]{letter-spacing:-.01em}.tracking-\[-0\.02em\]{letter-spacing:-.02em}.tracking-\[0\.005em\]{letter-spacing:.005em}.tracking-\[0\.01em\]{letter-spacing:.01em}.tracking-\[0\.02em\]{letter-spacing:.02em}.tracking-\[0\.08em\]{letter-spacing:.08em}.tracking-\[0\.1em\]{letter-spacing:.1em}.text-accent{color:oklch(.82 .12 195)}.text-bad{color:oklch(.7 .2 25)}.text-ink{color:oklch(.96 .005 250)}.text-ink-fade{color:oklch(.42 .006 250)}.text-ink-mid{color:oklch(.78 .005 250)}.text-ink-mute{color:oklch(.58 .006 250)}.text-ok{color:oklch(.78 .14 155)}.text-warn{color:oklch(.82 .13 80)}.underline{text-decoration-line:underline}.no-underline{text-decoration-line:none}.decoration-line{text-decoration-color:oklch(.27 .01 250)}.underline-offset-4{text-underline-offset:4px}.opacity-40{opacity:.4}.filter{filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.transition{transition-duration:.15s;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,-webkit-backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter,-webkit-backdrop-filter;transition-timing-function:cubic-bezier(.4,0,.2,1)}.hover\:text-ink-mid:hover{color:oklch(.78 .005 250)} diff --git a/web/styles/input.css b/web/styles/input.css index dfa34f6..5fd2c33 100644 --- a/web/styles/input.css +++ b/web/styles/input.css @@ -278,6 +278,39 @@ } .snap-row.head:hover { background: transparent; } + /* ---------- alert rows (/alerts list) ---------- */ + .alert-row { + display: grid; align-items: center; + grid-template-columns: 18px 110px 130px 1fr 130px 110px 180px; + column-gap: 16px; + padding: 12px 16px; font-size: 13px; + border-bottom: 1px solid var(--line-soft); + border-left: 3px solid transparent; + transition: background 100ms ease; + } + .alert-row:hover { background: var(--panel-hi); } + .alert-row:last-child { border-bottom: 0; } + .alert-row.head { + cursor: default; padding-top: 9px; padding-bottom: 9px; + font-size: 11px; color: var(--ink-fade); + text-transform: uppercase; letter-spacing: 0.08em; + border-left-color: transparent; + } + .alert-row.head:hover { background: transparent; } + .alert-row.severity-warn { border-left-color: color-mix(in oklch, var(--warn), transparent 50%); } + .alert-row.severity-critical { border-left-color: color-mix(in oklch, var(--bad), transparent 30%); } + .alert-row.resolved { opacity: 0.55; } + + /* status-dot aliases for alert severity */ + .dot-warn { background: var(--warn); box-shadow: 0 0 0 3px color-mix(in oklch, var(--warn), transparent 80%); } + .dot-critical { background: var(--bad); box-shadow: 0 0 0 3px color-mix(in oklch, var(--bad), transparent 80%); } + .dot-resolved { background: var(--ok); box-shadow: 0 0 0 3px color-mix(in oklch, var(--ok), transparent 80%); } + + /* tag colour variants for alerts */ + .tag-warn { color: var(--warn); border-color: color-mix(in oklch, var(--warn), transparent 60%); background: color-mix(in oklch, var(--warn), transparent 92%); } + .tag-critical { color: var(--bad); border-color: color-mix(in oklch, var(--bad), transparent 60%); background: color-mix(in oklch, var(--bad), transparent 92%); } + .tag-info { color: var(--ink-mid); } + /* ---------- schedule rows (Schedules tab) ---------- */ .schd-row { display: grid; align-items: center; @@ -418,4 +451,103 @@ radial-gradient(ellipse at top, color-mix(in oklch, var(--accent), transparent 95%), transparent 60%), var(--panel); } + + /* ---------- notification channel rows (/settings/notifications) ---------- */ + .ch-row { + display: grid; align-items: center; + grid-template-columns: 28px 200px 1fr 100px 130px 140px; + column-gap: 16px; + padding: 14px 18px; font-size: 13px; + border-bottom: 1px solid var(--line-soft); + transition: background 100ms ease; + } + .ch-row:last-child { border-bottom: 0; } + .ch-row.head { + cursor: default; font-size: 11px; color: var(--ink-fade); + text-transform: uppercase; letter-spacing: 0.08em; + padding-top: 10px; padding-bottom: 10px; + } + .ch-row.head:hover { background: transparent; } + /* Whole-row click → edit page (mirrors .host-row.clickable). */ + .ch-row.clickable { position: relative; cursor: pointer; } + .ch-row.clickable .row-link { + position: absolute; inset: 0; z-index: 0; + text-indent: -9999px; overflow: hidden; + } + .ch-row.clickable:hover { background: var(--panel-hi); } + .ch-row.clickable > * { position: relative; z-index: 1; pointer-events: none; } + .ch-row.clickable > .row-link { pointer-events: auto; } + .ch-row.clickable > .row-action { pointer-events: auto; } + + /* Channel kind icons */ + .ch-icon { + width: 24px; height: 24px; + border-radius: 5px; + display: inline-flex; align-items: center; justify-content: center; + font-family: 'JetBrains Mono', monospace; font-size: 10px; font-weight: 600; + background: var(--panel-hi); color: var(--ink-mute); + border: 1px solid var(--line); + } + .ch-icon.webhook { color: var(--accent); border-color: color-mix(in oklch, var(--accent), transparent 60%); } + .ch-icon.ntfy { color: var(--warn); border-color: color-mix(in oklch, var(--warn), transparent 60%); } + .ch-icon.smtp { color: var(--ok); border-color: color-mix(in oklch, var(--ok), transparent 60%); } + + /* ---------- toggle (enabled/disabled switch) ---------- */ + .toggle { + display: inline-block; width: 30px; height: 16px; border-radius: 9999px; + background: var(--line); position: relative; cursor: pointer; + transition: background 120ms ease; flex-shrink: 0; + } + .toggle::after { + content: ""; position: absolute; left: 2px; top: 2px; + width: 12px; height: 12px; border-radius: 9999px; + background: var(--ink-mid); + transition: all 120ms ease; + } + .toggle.on { background: color-mix(in oklch, var(--accent), transparent 50%); } + .toggle.on::after { left: 16px; background: var(--accent); } + + /* ---------- kind-picker radio cards (channel edit form) ---------- */ + .kind-grid { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 14px; } + .kind-card { + border: 1px solid var(--line-soft); background: var(--bg); + border-radius: 7px; padding: 16px; + cursor: pointer; + transition: border-color 120ms ease, background 120ms ease; + } + .kind-card:hover { border-color: var(--ink-mute); } + .kind-card.selected { + border-color: color-mix(in oklch, var(--accent), transparent 50%); + background: color-mix(in oklch, var(--accent), transparent 95%); + } + + /* Radio pip inside kind cards */ + .radio-pip { + width: 14px; height: 14px; + border-radius: 9999px; + border: 1px solid var(--line); + display: inline-flex; align-items: center; justify-content: center; + flex-shrink: 0; + } + .radio-pip.on { border-color: var(--accent); } + .radio-pip.on::after { + content: ""; width: 6px; height: 6px; border-radius: 9999px; + background: var(--accent); + } + + /* ---------- test-result pills (notification test button) ---------- */ + .test-pill { + display: inline-block; + padding: 5px 10px; border-radius: 5px; font-size: 12.5px; + } + .test-pill-ok { + border: 1px solid color-mix(in oklch, var(--ok), transparent 60%); + background: color-mix(in oklch, var(--ok), transparent 92%); + color: var(--ok); + } + .test-pill-fail { + border: 1px solid color-mix(in oklch, var(--bad), transparent 60%); + background: color-mix(in oklch, var(--bad), transparent 92%); + color: var(--bad); + } } diff --git a/web/templates/pages/alerts.html b/web/templates/pages/alerts.html new file mode 100644 index 0000000..f7475fd --- /dev/null +++ b/web/templates/pages/alerts.html @@ -0,0 +1,122 @@ +{{define "title"}}Alerts · restic-manager{{end}} + +{{define "content"}} +{{$page := .Page}} +{{$filter := $page.Filter}} +
+ + {{/* crumbs */}} +
+ Dashboard/ + alerts +
+ + {{/* page header */}} +
+
+

+ Alerts + + {{$page.Counts.Open}} open + {{if gt $page.Counts.Acknowledged 0}} · {{$page.Counts.Acknowledged}} acknowledged{{end}} + · {{$page.Counts.Resolved24h}} resolved (24h) + +

+
+ +
+ + {{/* filter strip */}} +
+ + {{/* status pills */}} +
+ {{range list "open" "acknowledged" "resolved" "all"}} + {{$s := .}} + {{$active := eq $s $filter.Status}} + {{if and (eq $s "all") (eq $filter.Status "")}}{{$active = true}}{{end}} + + {{if eq $s "open"}}Open {{$page.Counts.Open}} + {{else if eq $s "acknowledged"}}Acknowledged {{$page.Counts.Acknowledged}} + {{else if eq $s "resolved"}}Resolved {{$page.Counts.Resolved24h}} + {{else}}All{{end}} + + {{end}} +
+ + {{/* severity dropdown */}} +
+ +
+ + {{/* host dropdown */}} +
+ +
+ + {{/* search input */}} + + + {{if $filter.Severity}}{{end}} + {{if $filter.HostID}}{{end}} + + +
+ + {{/* alerts table */}} +
+ + {{/* header row */}} +
+
+
Severity / kind
+
Host
+
Message
+
Raised
+
Last seen
+
+
+ + {{if eq (len $page.Alerts) 0}} + {{/* empty state */}} +
+
+ +
+
All clear.
+
+ No alerts match the current filter. +
+
+
+
+ {{else}} + {{range $page.Alerts}} + {{template "alert_row" (dict "Alert" . "HostNames" $page.HostNames "Filter" $page.Filter)}} + {{end}} + {{end}} + +
+ +
+{{end}} diff --git a/web/templates/pages/dashboard.html b/web/templates/pages/dashboard.html index fdcd487..b3a379a 100644 --- a/web/templates/pages/dashboard.html +++ b/web/templates/pages/dashboard.html @@ -4,6 +4,7 @@
{{$page := .Page}} + {{template "crit_banner" .Page}} {{if eq $page.HostCount 0}} {{/* ---------- empty state ---------- */}} diff --git a/web/templates/pages/notification_edit.html b/web/templates/pages/notification_edit.html new file mode 100644 index 0000000..f19d306 --- /dev/null +++ b/web/templates/pages/notification_edit.html @@ -0,0 +1,9 @@ +{{/* notification_edit.html — rendered by handleUINotificationEditGet/Post via Render("settings", …). + This file exists so the glob-discovered page registry includes it cleanly. + The actual edit form lives in settings.html's notification_edit_form block. */}} +{{define "title"}}Edit Channel · Settings · restic-manager{{end}} +{{define "content"}} +{{/* This page is served under the "settings" renderer key; this file is a + placeholder discovered by the glob so ui.New() registers "notification_edit" + as a valid page. Handlers do not call Render("notification_edit", …) directly. */}} +{{end}} diff --git a/web/templates/pages/notifications.html b/web/templates/pages/notifications.html new file mode 100644 index 0000000..a9a69cd --- /dev/null +++ b/web/templates/pages/notifications.html @@ -0,0 +1,9 @@ +{{/* notifications.html — rendered by handleUINotificationsList via Render("settings", …). + This file exists so the glob-discovered page registry includes it cleanly. + The actual list body lives in settings.html's notification_list_body block. */}} +{{define "title"}}Notifications · Settings · restic-manager{{end}} +{{define "content"}} +{{/* This page is served under the "settings" renderer key; this file is a + placeholder discovered by the glob so ui.New() registers "notifications" + as a valid page. Handlers do not call Render("notifications", …) directly. */}} +{{end}} diff --git a/web/templates/pages/settings.html b/web/templates/pages/settings.html new file mode 100644 index 0000000..a9cea0d --- /dev/null +++ b/web/templates/pages/settings.html @@ -0,0 +1,590 @@ +{{define "title"}}{{.Title}}{{end}} + +{{define "content"}} +{{$page := .Page}} +
+ + {{/* ---------- breadcrumbs ---------- */}} +
+ Dashboard/ + {{if $page.Form}} + Settings/ + notifications/ + {{if $page.Form.ID}} + {{$page.Form.Name}} + {{else}} + new channel + {{end}} + {{else}} + Settings/ + notifications + {{end}} +
+ + {{/* ---------- page header ---------- */}} +
+ {{if $page.Form}} +

+ {{if $page.Form.ID}}Edit channel · {{$page.Form.Name}}{{else}}Add channel{{end}} +

+ {{else}} +

Settings

+ + Add channel + {{end}} +
+ + {{/* ---------- sub-tab nav ---------- */}} + + + {{/* ---------- sub-tab body ---------- */}} +
+ {{if $page.Form}} + {{template "notification_edit_form" $page}} + {{else}} + {{template "notification_list_body" $page}} + {{end}} +
+ +
+{{end}} + +{{/* ================================================================ + notification_list_body — channel list (embedded in settings.html) + Receives $page (settingsPage). + ================================================================ */}} +{{define "notification_list_body"}} +

+ Notification channels fire when the alert engine raises an alert. + All channels apply globally — every alert that meets the engine's thresholds is sent to every enabled channel. +

+ + {{if not .Channels}} +
+

No channels configured.

+

+ Alerts are still raised in the dashboard, but nothing is pushed to chat / phone / email. + Add a channel to get notified. +

+ +
+ {{else}} +
+
+
+
Name
+
Endpoint
+
Enabled
+
Last fired
+
+
+ {{range .Channels}} + {{$ch := .}} +
+ edit {{$ch.Name}} +
+ {{if eq $ch.Kind "webhook"}}WH + {{else if eq $ch.Kind "ntfy"}}NT + {{else}}@{{end}} +
+
{{$ch.Name}}
+
+ {{if eq $ch.Kind "webhook"}}webhook · click to edit{{else if eq $ch.Kind "ntfy"}}ntfy · click to edit{{else}}smtp · click to edit{{end}} +
+
+ {{if $ch.Enabled}} + + {{else}} + + {{end}} +
+
+ {{if $ch.LastFiredAt}}{{relTime $ch.LastFiredAt}}{{else}}never{{end}} +
+
+ Edit + Delete +
+
+ {{end}} +
+ {{end}} +{{end}} + +{{/* ================================================================ + notification_edit_form — create/edit form (embedded in settings.html) + Receives $page (settingsPage). + ================================================================ */}} +{{define "notification_edit_form"}} +{{$f := .Form}} +{{$isEdit := ne $f.ID ""}} + + {{if .FormError}} +
+ {{.FormError}} +
+ {{end}} + {{if .DeleteError}} +
+ {{.DeleteError}} +
+ {{end}} + +
+
+ + {{/* ---------- kind picker ---------- */}} +
+
Channel kind
+
+ + {{/* Webhook card */}} + + + {{/* Ntfy card */}} + + + {{/* SMTP card */}} + + +
+
+ + {{/* ---------- per-kind fields ---------- */}} + {{if $isEdit}} +
+ {{else}} + + {{end}} + + {{/* hidden kind field updated by JS */}} + + + {{/* Webhook fields */}} +
+
+
+ + +
Operator-friendly label shown in the channel list and audit log.
+
+
+ + +
We POST the JSON envelope shown on the right. 5s timeout; failures are logged but not retried.
+
+
+ + +
If set, sent as Authorization: Bearer … on every POST.
+
+
+ +
+ + +
+
Single extra header in v1.
+
+
+
+ + {{/* Ntfy fields */}} +
+
+
+ + +
+
+
+ + +
Default https://ntfy.sh; change for self-hosted.
+
+
+ + +
Subscribe to this topic in the ntfy app.
+
+
+
+ + +
Use this OR username+password below. Token wins when both are set.
+
+
+
+ + +
+
+ + +
Sent as HTTP Basic auth when no token is set.
+
+
+
+ + +
Per-alert severity overrides this — critical alerts always go out at urgent regardless of the default.
+
+
+
+ + {{/* SMTP fields */}} +
+
+
+ + +
One channel = one recipient — add another channel for a second mailbox.
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+
+ + +
+
+ + +
App password recommended for Gmail / M365.
+
+
+
+
+ + +
+
+ + +
Single address or distribution list.
+
+
+
+
+ + {{/* ---------- enabled + test ---------- */}} +
+
+ +
+
Enabled
+
When off, this channel is skipped on alert dispatch.
+
+
+ + {{if $isEdit}} +
+
+ +
+
+
+ Sends severity=info, kind=test_notification, message="Test from restic-manager". +
+
+ {{end}} +
+ + {{/* ---------- action row ---------- */}} +
+ Cancel +
+ {{if $isEdit}} + + {{end}} + +
+
+ + {{/* ---------- typed-confirm delete ---------- */}} +
{{/* close ch-form — delete panel must live OUTSIDE because HTML forbids nested forms */}} + + {{if $isEdit}} + + {{end}} +
+ + {{/* ---------- right rail — payload preview ---------- + All three are rendered; the kind-switcher JS toggles which is + visible. Server-side {{if}} would freeze the panel at whichever + kind was loaded, so flipping the picker leaves it stale. */}} + + +
+ +{{/* JS: kind-picker interactivity + enabled toggle + HTMX test-result rendering */}} + +{{end}} diff --git a/web/templates/partials/alert_row.html b/web/templates/partials/alert_row.html new file mode 100644 index 0000000..b194386 --- /dev/null +++ b/web/templates/partials/alert_row.html @@ -0,0 +1,96 @@ +{{define "alert_row"}} +{{$a := .Alert}} +{{$hostNames := .HostNames}} +{{$filter := .Filter}} +{{$status := alertStatus $a.ResolvedAt $a.AcknowledgedAt}} + +{{/* derive query string for redirect-back after ack/resolve */}} +{{$qs := ""}} +{{if $filter.Status}}{{$qs = printf "status=%s" $filter.Status}}{{end}} +{{if $filter.Severity}}{{$qs = printf "%s&severity=%s" $qs $filter.Severity}}{{end}} +{{if $filter.HostID}}{{$qs = printf "%s&host_id=%s" $qs $filter.HostID}}{{end}} +{{if $filter.Search}}{{$qs = printf "%s&q=%s" $qs $filter.Search}}{{end}} + +
+ + {{/* dot */}} +
+ {{if eq $status "resolved"}} + + {{else if eq $a.Severity "critical"}} + + {{else if eq $a.Severity "warning"}} + + {{else}} + + {{end}} +
+ + {{/* severity + kind tag */}} +
+ {{if eq $a.Severity "critical"}} + {{$a.Kind}} + {{else if eq $a.Severity "warning"}} + {{$a.Kind}} + {{else}} + {{$a.Kind}} + {{end}} +
+ + {{/* host */}} +
+ {{mapGet $hostNames $a.HostID}} +
+ + {{/* message */}} +
+ {{$a.Message}} +
+ + {{/* raised (created_at) */}} +
+ {{relTime $a.CreatedAt}} +
+ + {{/* last seen */}} +
+ {{if and (eq $status "open") (stillHappening $a.LastSeenAt)}} + still happening · {{relTime $a.LastSeenAt}} + {{else}} + {{relTime $a.LastSeenAt}} + {{end}} +
+ + {{/* actions */}} +
+ {{if eq $status "open"}} +
+ {{if $qs}}{{end}} + +
+
+ {{if $qs}}{{end}} + +
+ {{else if eq $status "acknowledged"}} + + ack'd{{if $a.AcknowledgedBy}} by {{deref $a.AcknowledgedBy}}{{end}} · {{relTime $a.AcknowledgedAt}} + +
+ {{if $qs}}{{end}} + +
+ {{else}} + resolved · {{relTime $a.ResolvedAt}} + {{end}} +
+ +
+{{end}} diff --git a/web/templates/partials/crit_banner.html b/web/templates/partials/crit_banner.html new file mode 100644 index 0000000..c2c31d2 --- /dev/null +++ b/web/templates/partials/crit_banner.html @@ -0,0 +1,13 @@ +{{define "crit_banner"}} +{{if gt .CritOpenCount 0}} +
+
+ + {{.CritOpenCount}} critical alert{{if ne .CritOpenCount 1}}s{{end}} open across the fleet +
+ Review → +
+{{end}} +{{end}} diff --git a/web/templates/partials/nav.html b/web/templates/partials/nav.html index 4a75dbc..256d85b 100644 --- a/web/templates/partials/nav.html +++ b/web/templates/partials/nav.html @@ -26,7 +26,7 @@