// Package alert evaluates the hardcoded rule set and persists raises // / acknowledges / resolves. Three event sources feed it: // - JobFinishedEvent — pushed when a job lands a terminal state // (the existing MarkJobFinished site) // - HostOfflineEvent / HostOnlineEvent — pushed by the offline // sweeper and by the ws hello handler // - 60s ticker (internal) — drives stale-schedule + auto-resolve // // All output goes through store.RaiseOrTouch / Acknowledge / Resolve // and the notification.Hub. The engine is one goroutine started at // boot; non-blocking sends from hot paths. package alert import ( "context" "log/slog" "sync" "time" "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" "gitea.dcglab.co.uk/steve/restic-manager/internal/store" ) // JobFinishedEvent carries everything the engine needs to evaluate // the failed-X rules. Pushed via Engine.NotifyJobFinished from the // MarkJobFinished site. type JobFinishedEvent struct { HostID string JobID string Kind string // backup | forget | prune | check | unlock | restore | diff Status string // succeeded | failed | cancelled When time.Time } // Engine evaluates hardcoded alert rules and dispatches via notification.Hub. type Engine struct { store *store.Store hub *notification.Hub jobs chan JobFinishedEvent hostDown chan string // host_id hostUp chan string // agentOfflineFloor is the duration a host must be offline before // we raise. Configurable for tests; default 15m. agentOfflineFloor time.Duration tickPeriod time.Duration closeOnce sync.Once done chan struct{} } // NewEngine builds the engine. agentOfflineFloor + tickPeriod default // to 15min and 60s respectively when zero. func NewEngine(st *store.Store, hub *notification.Hub) *Engine { return &Engine{ store: st, hub: hub, jobs: make(chan JobFinishedEvent, 32), hostDown: make(chan string, 32), hostUp: make(chan string, 32), agentOfflineFloor: 15 * time.Minute, tickPeriod: 60 * time.Second, done: make(chan struct{}), } } // Run drives the event loop. Returns when ctx is done. Blocks; call in // its own goroutine. func (e *Engine) Run(ctx context.Context) { t := time.NewTicker(e.tickPeriod) defer t.Stop() for { select { case <-ctx.Done(): e.closeOnce.Do(func() { close(e.done) }) return case ev := <-e.jobs: e.handleJobFinished(ctx, ev) case hostID := <-e.hostDown: e.handleHostOffline(ctx, hostID) case hostID := <-e.hostUp: e.handleHostOnline(ctx, hostID) case now := <-t.C: e.tick(ctx, now) } } } // NotifyJobFinished is the hot-path hook called from MarkJobFinished's // caller (ws.handler.dispatchAgentMessage). Non-blocking: drops on a // full channel with a slog warning. func (e *Engine) NotifyJobFinished(ev JobFinishedEvent) { select { case e.jobs <- ev: default: slog.Warn("alert: jobs channel full; dropping event", "kind", ev.Kind, "host_id", ev.HostID) } } // NotifyHostOffline notifies the engine that a host is offline. func (e *Engine) NotifyHostOffline(hostID string) { select { case e.hostDown <- hostID: default: slog.Warn("alert: hostDown channel full; dropping", "host_id", hostID) } } // NotifyHostOnline notifies the engine that a host is online. func (e *Engine) NotifyHostOnline(hostID string) { select { case e.hostUp <- hostID: default: slog.Warn("alert: hostUp channel full; dropping", "host_id", hostID) } } // Placeholder method stubs for C2 implementation func (e *Engine) handleJobFinished(ctx context.Context, ev JobFinishedEvent) { // Implemented in C2 } func (e *Engine) handleHostOffline(ctx context.Context, hostID string) { // Implemented in C2 } func (e *Engine) handleHostOnline(ctx context.Context, hostID string) { // Implemented in C2 } func (e *Engine) tick(ctx context.Context, now time.Time) { // Implemented in C2 }