package alert import ( "context" "testing" "time" "github.com/oklog/ulid/v2" "gitea.dcglab.co.uk/steve/restic-manager/internal/store" ) // TestIntermittentHostSuppressesOfflineAlert checks that handleHostOffline // does NOT raise agent_offline for a host with AlwaysOn=false. func TestIntermittentHostSuppressesOfflineAlert(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) ctx := context.Background() // Make the host intermittent. if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil { t.Fatalf("SetHostAlwaysOn: %v", err) } // Give it a stale last_seen_at well past the floor. if _, err := st.DB().Exec( `UPDATE hosts SET last_seen_at = ?, status = ? WHERE id = ?`, time.Now().UTC().Add(-2*time.Hour).Format(time.RFC3339Nano), "offline", hostID, ); err != nil { t.Fatalf("update last_seen_at: %v", err) } eng.handleHostOffline(ctx, hostID) open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 0 { t.Fatalf("expected 0 open alerts for intermittent host; got %d: %+v", len(open), open) } } // TestAlwaysOnHostStillRaisesOfflineAlert checks that always-on hosts still // get an agent_offline alert when offline past the floor. func TestAlwaysOnHostStillRaisesOfflineAlert(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) ctx := context.Background() // always_on=true is the default, but be explicit. if err := st.SetHostAlwaysOn(ctx, hostID, true); err != nil { t.Fatalf("SetHostAlwaysOn: %v", err) } // Give it a stale last_seen_at well past the 15m floor. if _, err := st.DB().Exec( `UPDATE hosts SET last_seen_at = ?, status = ? WHERE id = ?`, time.Now().UTC().Add(-2*time.Hour).Format(time.RFC3339Nano), "offline", hostID, ); err != nil { t.Fatalf("update last_seen_at: %v", err) } eng.handleHostOffline(ctx, hostID) open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 1 || open[0].Kind != KindAgentOffline { t.Fatalf("expected 1 agent_offline alert; got %d: %+v", len(open), open) } } // TestStalenessAlertForIntermittentHost checks that tick raises stale_schedule // for an intermittent host whose last backup is older than 7 days AND has an // enabled schedule. Also verifies that a succeeded backup clears the alert. func TestStalenessAlertForIntermittentHost(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) ctx := context.Background() // Make intermittent. if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil { t.Fatalf("SetHostAlwaysOn: %v", err) } // Create a source group to attach the schedule to. sgID := ulid.Make().String() if err := st.CreateSourceGroup(ctx, &store.SourceGroup{ ID: sgID, HostID: hostID, Name: "default", Includes: []string{"/home"}, }); err != nil { t.Fatalf("CreateSourceGroup: %v", err) } // Create an enabled schedule pointing at the source group. schedID := ulid.Make().String() if err := st.CreateSchedule(ctx, &store.Schedule{ ID: schedID, HostID: hostID, CronExpr: "0 2 * * *", Enabled: true, SourceGroupIDs: []string{sgID}, }); err != nil { t.Fatalf("CreateSchedule: %v", err) } // Set last_backup_at to 8 days ago. eightDaysAgo := time.Now().UTC().Add(-8 * 24 * time.Hour) if err := st.SetHostLastBackup(ctx, hostID, "succeeded", eightDaysAgo); err != nil { t.Fatalf("SetHostLastBackup: %v", err) } eng.tick(ctx, time.Now().UTC()) open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) var staleCount int for _, a := range open { if a.Kind == KindStaleSchedule { staleCount++ } } if staleCount != 1 { t.Fatalf("expected 1 stale_schedule alert after tick; got %d (all open: %+v)", staleCount, open) } // A succeeded backup should clear the stale_schedule alert. eng.handleJobFinished(ctx, JobFinishedEvent{ HostID: hostID, JobID: ulid.Make().String(), Kind: "backup", Status: "succeeded", SourceGroupID: sgID, When: time.Now().UTC(), }) open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) for _, a := range open { if a.Kind == KindStaleSchedule { t.Fatalf("expected stale_schedule to be resolved after backup succeeded; still open: %+v", a) } } } // TestNoStalenessWithoutEnabledSchedule checks that no stale_schedule is // raised for an intermittent host with a stale backup but no enabled schedule. func TestNoStalenessWithoutEnabledSchedule(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) ctx := context.Background() // Make intermittent. if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil { t.Fatalf("SetHostAlwaysOn: %v", err) } // Set last_backup_at to 8 days ago — stale — but no schedule. eightDaysAgo := time.Now().UTC().Add(-8 * 24 * time.Hour) if err := st.SetHostLastBackup(ctx, hostID, "succeeded", eightDaysAgo); err != nil { t.Fatalf("SetHostLastBackup: %v", err) } eng.tick(ctx, time.Now().UTC()) open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) for _, a := range open { if a.Kind == KindStaleSchedule { t.Fatalf("expected no stale_schedule without an enabled schedule; got: %+v", a) } } } // TestResolveOnModeChangeClearsOfflineAlert checks that ResolveOnModeChange // clears an open agent_offline alert when a host's mode is toggled. func TestResolveOnModeChangeClearsOfflineAlert(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) ctx := context.Background() // Make always-on and set it offline with a stale last_seen_at. if err := st.SetHostAlwaysOn(ctx, hostID, true); err != nil { t.Fatalf("SetHostAlwaysOn: %v", err) } if _, err := st.DB().Exec( `UPDATE hosts SET last_seen_at = ?, status = ? WHERE id = ?`, time.Now().UTC().Add(-2*time.Hour).Format(time.RFC3339Nano), "offline", hostID, ); err != nil { t.Fatalf("update last_seen_at: %v", err) } // Raise the offline alert. eng.handleHostOffline(ctx, hostID) open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 1 || open[0].Kind != KindAgentOffline { t.Fatalf("expected 1 agent_offline alert before mode change; got %d: %+v", len(open), open) } // Toggle mode — should clear the alert. eng.ResolveOnModeChange(ctx, hostID, time.Now().UTC()) open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) for _, a := range open { if a.Kind == KindAgentOffline { t.Fatalf("expected agent_offline to be resolved after mode change; still open: %+v", a) } } } // TestNoStalenessWhenNeverBackedUp checks that no stale_schedule alert is // raised for an intermittent host that has never backed up (nil LastBackupAt). func TestNoStalenessWhenNeverBackedUp(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) ctx := context.Background() // Make intermittent. if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil { t.Fatalf("SetHostAlwaysOn: %v", err) } // Create a source group and an enabled schedule — but do NOT set LastBackupAt. sgID := ulid.Make().String() if err := st.CreateSourceGroup(ctx, &store.SourceGroup{ ID: sgID, HostID: hostID, Name: "default", Includes: []string{"/home"}, }); err != nil { t.Fatalf("CreateSourceGroup: %v", err) } schedID := ulid.Make().String() if err := st.CreateSchedule(ctx, &store.Schedule{ ID: schedID, HostID: hostID, CronExpr: "0 2 * * *", Enabled: true, SourceGroupIDs: []string{sgID}, }); err != nil { t.Fatalf("CreateSchedule: %v", err) } eng.tick(ctx, time.Now().UTC()) open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) for _, a := range open { if a.Kind == KindStaleSchedule { t.Fatalf("expected no stale_schedule when never backed up; got: %+v", a) } } }