From 9abe817aae35f68f698c8fc4451ff8851c3da8a0 Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Mon, 4 May 2026 19:47:09 +0100 Subject: [PATCH] alert: engine skeleton + event channels --- internal/alert/engine.go | 134 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 internal/alert/engine.go diff --git a/internal/alert/engine.go b/internal/alert/engine.go new file mode 100644 index 0000000..56c8427 --- /dev/null +++ b/internal/alert/engine.go @@ -0,0 +1,134 @@ +// Package alert evaluates the hardcoded rule set and persists raises +// / acknowledges / resolves. Three event sources feed it: +// - JobFinishedEvent — pushed when a job lands a terminal state +// (the existing MarkJobFinished site) +// - HostOfflineEvent / HostOnlineEvent — pushed by the offline +// sweeper and by the ws hello handler +// - 60s ticker (internal) — drives stale-schedule + auto-resolve +// +// All output goes through store.RaiseOrTouch / Acknowledge / Resolve +// and the notification.Hub. The engine is one goroutine started at +// boot; non-blocking sends from hot paths. +package alert + +import ( + "context" + "log/slog" + "sync" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// JobFinishedEvent carries everything the engine needs to evaluate +// the failed-X rules. Pushed via Engine.NotifyJobFinished from the +// MarkJobFinished site. +type JobFinishedEvent struct { + HostID string + JobID string + Kind string // backup | forget | prune | check | unlock | restore | diff + Status string // succeeded | failed | cancelled + When time.Time +} + +// Engine evaluates hardcoded alert rules and dispatches via notification.Hub. +type Engine struct { + store *store.Store + hub *notification.Hub + + jobs chan JobFinishedEvent + hostDown chan string // host_id + hostUp chan string + + // agentOfflineFloor is the duration a host must be offline before + // we raise. Configurable for tests; default 15m. + agentOfflineFloor time.Duration + tickPeriod time.Duration + + closeOnce sync.Once + done chan struct{} +} + +// NewEngine builds the engine. agentOfflineFloor + tickPeriod default +// to 15min and 60s respectively when zero. +func NewEngine(st *store.Store, hub *notification.Hub) *Engine { + return &Engine{ + store: st, + hub: hub, + jobs: make(chan JobFinishedEvent, 32), + hostDown: make(chan string, 32), + hostUp: make(chan string, 32), + agentOfflineFloor: 15 * time.Minute, + tickPeriod: 60 * time.Second, + done: make(chan struct{}), + } +} + +// Run drives the event loop. Returns when ctx is done. Blocks; call in +// its own goroutine. +func (e *Engine) Run(ctx context.Context) { + t := time.NewTicker(e.tickPeriod) + defer t.Stop() + for { + select { + case <-ctx.Done(): + e.closeOnce.Do(func() { close(e.done) }) + return + case ev := <-e.jobs: + e.handleJobFinished(ctx, ev) + case hostID := <-e.hostDown: + e.handleHostOffline(ctx, hostID) + case hostID := <-e.hostUp: + e.handleHostOnline(ctx, hostID) + case now := <-t.C: + e.tick(ctx, now) + } + } +} + +// NotifyJobFinished is the hot-path hook called from MarkJobFinished's +// caller (ws.handler.dispatchAgentMessage). Non-blocking: drops on a +// full channel with a slog warning. +func (e *Engine) NotifyJobFinished(ev JobFinishedEvent) { + select { + case e.jobs <- ev: + default: + slog.Warn("alert: jobs channel full; dropping event", "kind", ev.Kind, "host_id", ev.HostID) + } +} + +// NotifyHostOffline notifies the engine that a host is offline. +func (e *Engine) NotifyHostOffline(hostID string) { + select { + case e.hostDown <- hostID: + default: + slog.Warn("alert: hostDown channel full; dropping", "host_id", hostID) + } +} + +// NotifyHostOnline notifies the engine that a host is online. +func (e *Engine) NotifyHostOnline(hostID string) { + select { + case e.hostUp <- hostID: + default: + slog.Warn("alert: hostUp channel full; dropping", "host_id", hostID) + } +} + +// Placeholder method stubs for C2 implementation +func (e *Engine) handleJobFinished(ctx context.Context, ev JobFinishedEvent) { + // Implemented in C2 +} + +func (e *Engine) handleHostOffline(ctx context.Context, hostID string) { + // Implemented in C2 +} + +func (e *Engine) handleHostOnline(ctx context.Context, hostID string) { + // Implemented in C2 +} + +func (e *Engine) tick(ctx context.Context, now time.Time) { + // Implemented in C2 +}