Files
restic-manager/internal/alert/rules_test.go
T

126 lines
4.2 KiB
Go

package alert
import (
"context"
"path/filepath"
"testing"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
"gitea.dcglab.co.uk/steve/restic-manager/internal/notification"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
func setupEngine(t *testing.T) (*Engine, *store.Store, string) {
t.Helper()
dir := t.TempDir()
st, _ := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
t.Cleanup(func() { _ = st.Close() })
keyPath := filepath.Join(dir, "secret.key")
_ = crypto.GenerateKeyFile(keyPath)
key, _ := crypto.LoadKeyFromFile(keyPath)
aead, _ := crypto.NewAEAD(key)
hub := notification.NewHub(st, aead, "https://rm.example")
eng := NewEngine(st, hub)
hostID := ulid.Make().String()
if err := st.CreateHost(context.Background(), store.Host{
ID: hostID, Name: "alfa-01", OS: "linux", Arch: "amd64",
EnrolledAt: time.Now().UTC(),
}, "deadbeef", ""); err != nil {
t.Fatalf("create host: %v", err)
}
return eng, st, hostID
}
func TestEngineBackupFailedRaisesThenResolves(t *testing.T) {
t.Parallel()
eng, st, hostID := setupEngine(t)
ctx := context.Background()
eng.handleJobFinished(ctx, JobFinishedEvent{
HostID: hostID, JobID: "j1", Kind: "backup", Status: "failed",
When: time.Now().UTC(),
})
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
if len(open) != 1 || open[0].Kind != KindBackupFailed {
t.Fatalf("expected one backup_failed open; got %+v", open)
}
// Second failed job should TOUCH (not raise a fresh row).
eng.handleJobFinished(ctx, JobFinishedEvent{
HostID: hostID, JobID: "j2", Kind: "backup", Status: "failed",
When: time.Now().UTC().Add(time.Minute),
})
open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
if len(open) != 1 {
t.Fatalf("expected dedup to stay at 1 open; got %d", len(open))
}
// Success auto-resolves.
eng.handleJobFinished(ctx, JobFinishedEvent{
HostID: hostID, JobID: "j3", Kind: "backup", Status: "succeeded",
When: time.Now().UTC().Add(2 * time.Minute),
})
open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
if len(open) != 0 {
t.Fatalf("expected zero open after success; got %d", len(open))
}
}
func TestEngineCheckFailedSeverityCritical(t *testing.T) {
t.Parallel()
eng, st, hostID := setupEngine(t)
eng.handleJobFinished(context.Background(), JobFinishedEvent{
HostID: hostID, Kind: "check", Status: "failed", When: time.Now().UTC(),
})
open, _ := st.ListAlerts(context.Background(),
store.AlertFilter{Status: "open", HostID: hostID})
if len(open) != 1 || open[0].Severity != "critical" {
t.Fatalf("got %+v", open)
}
}
func TestEngineAgentOfflineRespects15MinFloor(t *testing.T) {
t.Parallel()
eng, st, hostID := setupEngine(t)
// Host's last_seen_at defaulted to NULL via CreateHost (enrolled but never
// seen). Force a stale value for the test by direct DB update.
if _, err := st.DB().Exec(
`UPDATE hosts SET last_seen_at = ? WHERE id = ?`,
time.Now().UTC().Add(-20*time.Minute).Format(time.RFC3339Nano), hostID,
); err != nil {
t.Fatalf("update last_seen_at: %v", err)
}
eng.handleHostOffline(context.Background(), hostID)
open, _ := st.ListAlerts(context.Background(),
store.AlertFilter{Status: "open", HostID: hostID})
if len(open) != 1 {
t.Fatalf("expected agent_offline raised; got %d", len(open))
}
// Bring back online — should auto-resolve.
eng.handleHostOnline(context.Background(), hostID)
open, _ = st.ListAlerts(context.Background(),
store.AlertFilter{Status: "open", HostID: hostID})
if len(open) != 0 {
t.Fatalf("expected agent_offline resolved; got %d", len(open))
}
}
func TestEngineAgentOfflineUnderFloorNoRaise(t *testing.T) {
t.Parallel()
eng, st, hostID := setupEngine(t)
// last_seen_at is NULL from CreateHost (never touched). A nil
// last_seen_at means the host was enrolled but never connected —
// treat that as "now" for the floor check so we don't raise
// immediately. handleHostOffline must skip the raise.
eng.handleHostOffline(context.Background(), hostID)
open, _ := st.ListAlerts(context.Background(),
store.AlertFilter{Status: "open", HostID: hostID})
if len(open) != 0 {
t.Fatalf("expected no raise within 15-min floor; got %d", len(open))
}
}