refactor(alert): refresh stale_schedule docs; log tick schedule errors; add mode-change + never-backed-up tests
This commit is contained in:
@@ -196,11 +196,9 @@ func (e *Engine) handleHostOnline(ctx context.Context, hostID string) {
|
||||
// tick is the 60-second sweep. Responsibilities:
|
||||
// 1. Re-evaluate agent_offline for every offline host that may have
|
||||
// crossed the floor between explicit events.
|
||||
// 2. Stale-schedule detection — declared in the spec but intentionally
|
||||
// left as a no-op in v1. The precise "expected to have fired but
|
||||
// didn't" trigger requires a store helper that lands in a later
|
||||
// task. The KindStaleSchedule constant is exported so UI code can
|
||||
// reference the tag string today.
|
||||
// 2. Stale-schedule detection for intermittent hosts — raises
|
||||
// stale_schedule when LastBackupAt is older than 7 days and the
|
||||
// host has an enabled schedule. Always-on hosts are excluded.
|
||||
func (e *Engine) tick(ctx context.Context, now time.Time) {
|
||||
// User-management cleanup piggy-backed here for now. Setup tokens
|
||||
// have a 1h expiry; the alert engine tick is the cheapest existing
|
||||
@@ -232,7 +230,11 @@ func (e *Engine) tick(ctx context.Context, now time.Time) {
|
||||
continue
|
||||
}
|
||||
hasEnabled, err := e.hostHasEnabledSchedule(ctx, h.ID)
|
||||
if err != nil || !hasEnabled {
|
||||
if err != nil {
|
||||
slog.Warn("alert: tick list schedules", "host_id", h.ID, "err", err)
|
||||
continue
|
||||
}
|
||||
if !hasEnabled {
|
||||
continue
|
||||
}
|
||||
e.raiseAndNotify(ctx, h.ID, KindStaleSchedule, "", "warning",
|
||||
|
||||
Reference in New Issue
Block a user