package alert import ( "context" "path/filepath" "testing" "time" "github.com/oklog/ulid/v2" "gitea.dcglab.co.uk/steve/restic-manager/internal/crypto" "gitea.dcglab.co.uk/steve/restic-manager/internal/notification" "gitea.dcglab.co.uk/steve/restic-manager/internal/store" ) func setupEngine(t *testing.T) (*Engine, *store.Store, string) { t.Helper() dir := t.TempDir() st, _ := store.Open(context.Background(), filepath.Join(dir, "rm.db")) t.Cleanup(func() { _ = st.Close() }) keyPath := filepath.Join(dir, "secret.key") _ = crypto.GenerateKeyFile(keyPath) key, _ := crypto.LoadKeyFromFile(keyPath) aead, _ := crypto.NewAEAD(key) hub := notification.NewHub(st, aead, "https://rm.example") eng := NewEngine(st, hub) hostID := ulid.Make().String() if err := st.CreateHost(context.Background(), store.Host{ ID: hostID, Name: "alfa-01", OS: "linux", Arch: "amd64", EnrolledAt: time.Now().UTC(), }, "deadbeef", ""); err != nil { t.Fatalf("create host: %v", err) } return eng, st, hostID } func TestEngineBackupFailedRaisesThenResolves(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) ctx := context.Background() eng.handleJobFinished(ctx, JobFinishedEvent{ HostID: hostID, JobID: "j1", Kind: "backup", Status: "failed", When: time.Now().UTC(), }) open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 1 || open[0].Kind != KindBackupFailed { t.Fatalf("expected one backup_failed open; got %+v", open) } // Second failed job should TOUCH (not raise a fresh row). eng.handleJobFinished(ctx, JobFinishedEvent{ HostID: hostID, JobID: "j2", Kind: "backup", Status: "failed", When: time.Now().UTC().Add(time.Minute), }) open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 1 { t.Fatalf("expected dedup to stay at 1 open; got %d", len(open)) } // Success auto-resolves. eng.handleJobFinished(ctx, JobFinishedEvent{ HostID: hostID, JobID: "j3", Kind: "backup", Status: "succeeded", When: time.Now().UTC().Add(2 * time.Minute), }) open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 0 { t.Fatalf("expected zero open after success; got %d", len(open)) } } func TestEngineCheckFailedSeverityCritical(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) eng.handleJobFinished(context.Background(), JobFinishedEvent{ HostID: hostID, Kind: "check", Status: "failed", When: time.Now().UTC(), }) open, _ := st.ListAlerts(context.Background(), store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 1 || open[0].Severity != "critical" { t.Fatalf("got %+v", open) } } func TestEngineAgentOfflineRespects15MinFloor(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) // Host's last_seen_at defaulted to NULL via CreateHost (enrolled but never // seen). Force a stale value for the test by direct DB update. if _, err := st.DB().Exec( `UPDATE hosts SET last_seen_at = ? WHERE id = ?`, time.Now().UTC().Add(-20*time.Minute).Format(time.RFC3339Nano), hostID, ); err != nil { t.Fatalf("update last_seen_at: %v", err) } eng.handleHostOffline(context.Background(), hostID) open, _ := st.ListAlerts(context.Background(), store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 1 { t.Fatalf("expected agent_offline raised; got %d", len(open)) } // Bring back online — should auto-resolve. eng.handleHostOnline(context.Background(), hostID) open, _ = st.ListAlerts(context.Background(), store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 0 { t.Fatalf("expected agent_offline resolved; got %d", len(open)) } } func TestEngineAgentOfflineUnderFloorNoRaise(t *testing.T) { t.Parallel() eng, st, hostID := setupEngine(t) // last_seen_at is NULL from CreateHost (never touched). A nil // last_seen_at means the host was enrolled but never connected — // treat that as "now" for the floor check so we don't raise // immediately. handleHostOffline must skip the raise. eng.handleHostOffline(context.Background(), hostID) open, _ := st.ListAlerts(context.Background(), store.AlertFilter{Status: "open", HostID: hostID}) if len(open) != 0 { t.Fatalf("expected no raise within 15-min floor; got %d", len(open)) } }