256 lines
7.7 KiB
Go
256 lines
7.7 KiB
Go
package alert
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/oklog/ulid/v2"
|
|
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
|
)
|
|
|
|
// TestIntermittentHostSuppressesOfflineAlert checks that handleHostOffline
|
|
// does NOT raise agent_offline for a host with AlwaysOn=false.
|
|
func TestIntermittentHostSuppressesOfflineAlert(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
ctx := context.Background()
|
|
|
|
// Make the host intermittent.
|
|
if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil {
|
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
|
}
|
|
|
|
// Give it a stale last_seen_at well past the floor.
|
|
if _, err := st.DB().Exec(
|
|
`UPDATE hosts SET last_seen_at = ?, status = ? WHERE id = ?`,
|
|
time.Now().UTC().Add(-2*time.Hour).Format(time.RFC3339Nano),
|
|
"offline",
|
|
hostID,
|
|
); err != nil {
|
|
t.Fatalf("update last_seen_at: %v", err)
|
|
}
|
|
|
|
eng.handleHostOffline(ctx, hostID)
|
|
|
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 0 {
|
|
t.Fatalf("expected 0 open alerts for intermittent host; got %d: %+v", len(open), open)
|
|
}
|
|
}
|
|
|
|
// TestAlwaysOnHostStillRaisesOfflineAlert checks that always-on hosts still
|
|
// get an agent_offline alert when offline past the floor.
|
|
func TestAlwaysOnHostStillRaisesOfflineAlert(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
ctx := context.Background()
|
|
|
|
// always_on=true is the default, but be explicit.
|
|
if err := st.SetHostAlwaysOn(ctx, hostID, true); err != nil {
|
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
|
}
|
|
|
|
// Give it a stale last_seen_at well past the 15m floor.
|
|
if _, err := st.DB().Exec(
|
|
`UPDATE hosts SET last_seen_at = ?, status = ? WHERE id = ?`,
|
|
time.Now().UTC().Add(-2*time.Hour).Format(time.RFC3339Nano),
|
|
"offline",
|
|
hostID,
|
|
); err != nil {
|
|
t.Fatalf("update last_seen_at: %v", err)
|
|
}
|
|
|
|
eng.handleHostOffline(ctx, hostID)
|
|
|
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 1 || open[0].Kind != KindAgentOffline {
|
|
t.Fatalf("expected 1 agent_offline alert; got %d: %+v", len(open), open)
|
|
}
|
|
}
|
|
|
|
// TestStalenessAlertForIntermittentHost checks that tick raises stale_schedule
|
|
// for an intermittent host whose last backup is older than 7 days AND has an
|
|
// enabled schedule. Also verifies that a succeeded backup clears the alert.
|
|
func TestStalenessAlertForIntermittentHost(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
ctx := context.Background()
|
|
|
|
// Make intermittent.
|
|
if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil {
|
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
|
}
|
|
|
|
// Create a source group to attach the schedule to.
|
|
sgID := ulid.Make().String()
|
|
if err := st.CreateSourceGroup(ctx, &store.SourceGroup{
|
|
ID: sgID,
|
|
HostID: hostID,
|
|
Name: "default",
|
|
Includes: []string{"/home"},
|
|
}); err != nil {
|
|
t.Fatalf("CreateSourceGroup: %v", err)
|
|
}
|
|
|
|
// Create an enabled schedule pointing at the source group.
|
|
schedID := ulid.Make().String()
|
|
if err := st.CreateSchedule(ctx, &store.Schedule{
|
|
ID: schedID,
|
|
HostID: hostID,
|
|
CronExpr: "0 2 * * *",
|
|
Enabled: true,
|
|
SourceGroupIDs: []string{sgID},
|
|
}); err != nil {
|
|
t.Fatalf("CreateSchedule: %v", err)
|
|
}
|
|
|
|
// Set last_backup_at to 8 days ago.
|
|
eightDaysAgo := time.Now().UTC().Add(-8 * 24 * time.Hour)
|
|
if err := st.SetHostLastBackup(ctx, hostID, "succeeded", eightDaysAgo); err != nil {
|
|
t.Fatalf("SetHostLastBackup: %v", err)
|
|
}
|
|
|
|
eng.tick(ctx, time.Now().UTC())
|
|
|
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
var staleCount int
|
|
for _, a := range open {
|
|
if a.Kind == KindStaleSchedule {
|
|
staleCount++
|
|
}
|
|
}
|
|
if staleCount != 1 {
|
|
t.Fatalf("expected 1 stale_schedule alert after tick; got %d (all open: %+v)", staleCount, open)
|
|
}
|
|
|
|
// A succeeded backup should clear the stale_schedule alert.
|
|
eng.handleJobFinished(ctx, JobFinishedEvent{
|
|
HostID: hostID,
|
|
JobID: ulid.Make().String(),
|
|
Kind: "backup",
|
|
Status: "succeeded",
|
|
SourceGroupID: sgID,
|
|
When: time.Now().UTC(),
|
|
})
|
|
|
|
open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
for _, a := range open {
|
|
if a.Kind == KindStaleSchedule {
|
|
t.Fatalf("expected stale_schedule to be resolved after backup succeeded; still open: %+v", a)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestNoStalenessWithoutEnabledSchedule checks that no stale_schedule is
|
|
// raised for an intermittent host with a stale backup but no enabled schedule.
|
|
func TestNoStalenessWithoutEnabledSchedule(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
ctx := context.Background()
|
|
|
|
// Make intermittent.
|
|
if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil {
|
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
|
}
|
|
|
|
// Set last_backup_at to 8 days ago — stale — but no schedule.
|
|
eightDaysAgo := time.Now().UTC().Add(-8 * 24 * time.Hour)
|
|
if err := st.SetHostLastBackup(ctx, hostID, "succeeded", eightDaysAgo); err != nil {
|
|
t.Fatalf("SetHostLastBackup: %v", err)
|
|
}
|
|
|
|
eng.tick(ctx, time.Now().UTC())
|
|
|
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
for _, a := range open {
|
|
if a.Kind == KindStaleSchedule {
|
|
t.Fatalf("expected no stale_schedule without an enabled schedule; got: %+v", a)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestResolveOnModeChangeClearsOfflineAlert checks that ResolveOnModeChange
|
|
// clears an open agent_offline alert when a host's mode is toggled.
|
|
func TestResolveOnModeChangeClearsOfflineAlert(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
ctx := context.Background()
|
|
|
|
// Make always-on and set it offline with a stale last_seen_at.
|
|
if err := st.SetHostAlwaysOn(ctx, hostID, true); err != nil {
|
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
|
}
|
|
if _, err := st.DB().Exec(
|
|
`UPDATE hosts SET last_seen_at = ?, status = ? WHERE id = ?`,
|
|
time.Now().UTC().Add(-2*time.Hour).Format(time.RFC3339Nano),
|
|
"offline",
|
|
hostID,
|
|
); err != nil {
|
|
t.Fatalf("update last_seen_at: %v", err)
|
|
}
|
|
|
|
// Raise the offline alert.
|
|
eng.handleHostOffline(ctx, hostID)
|
|
|
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 1 || open[0].Kind != KindAgentOffline {
|
|
t.Fatalf("expected 1 agent_offline alert before mode change; got %d: %+v", len(open), open)
|
|
}
|
|
|
|
// Toggle mode — should clear the alert.
|
|
eng.ResolveOnModeChange(ctx, hostID, time.Now().UTC())
|
|
|
|
open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
for _, a := range open {
|
|
if a.Kind == KindAgentOffline {
|
|
t.Fatalf("expected agent_offline to be resolved after mode change; still open: %+v", a)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestNoStalenessWhenNeverBackedUp checks that no stale_schedule alert is
|
|
// raised for an intermittent host that has never backed up (nil LastBackupAt).
|
|
func TestNoStalenessWhenNeverBackedUp(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
ctx := context.Background()
|
|
|
|
// Make intermittent.
|
|
if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil {
|
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
|
}
|
|
|
|
// Create a source group and an enabled schedule — but do NOT set LastBackupAt.
|
|
sgID := ulid.Make().String()
|
|
if err := st.CreateSourceGroup(ctx, &store.SourceGroup{
|
|
ID: sgID,
|
|
HostID: hostID,
|
|
Name: "default",
|
|
Includes: []string{"/home"},
|
|
}); err != nil {
|
|
t.Fatalf("CreateSourceGroup: %v", err)
|
|
}
|
|
|
|
schedID := ulid.Make().String()
|
|
if err := st.CreateSchedule(ctx, &store.Schedule{
|
|
ID: schedID,
|
|
HostID: hostID,
|
|
CronExpr: "0 2 * * *",
|
|
Enabled: true,
|
|
SourceGroupIDs: []string{sgID},
|
|
}); err != nil {
|
|
t.Fatalf("CreateSchedule: %v", err)
|
|
}
|
|
|
|
eng.tick(ctx, time.Now().UTC())
|
|
|
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
for _, a := range open {
|
|
if a.Kind == KindStaleSchedule {
|
|
t.Fatalf("expected no stale_schedule when never backed up; got: %+v", a)
|
|
}
|
|
}
|
|
}
|