126 lines
4.2 KiB
Go
126 lines
4.2 KiB
Go
package alert
|
|
|
|
import (
|
|
"context"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/oklog/ulid/v2"
|
|
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/notification"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
|
)
|
|
|
|
func setupEngine(t *testing.T) (*Engine, *store.Store, string) {
|
|
t.Helper()
|
|
dir := t.TempDir()
|
|
st, _ := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
|
|
t.Cleanup(func() { _ = st.Close() })
|
|
keyPath := filepath.Join(dir, "secret.key")
|
|
_ = crypto.GenerateKeyFile(keyPath)
|
|
key, _ := crypto.LoadKeyFromFile(keyPath)
|
|
aead, _ := crypto.NewAEAD(key)
|
|
hub := notification.NewHub(st, aead, "https://rm.example")
|
|
eng := NewEngine(st, hub)
|
|
hostID := ulid.Make().String()
|
|
if err := st.CreateHost(context.Background(), store.Host{
|
|
ID: hostID, Name: "alfa-01", OS: "linux", Arch: "amd64",
|
|
EnrolledAt: time.Now().UTC(),
|
|
}, "deadbeef", ""); err != nil {
|
|
t.Fatalf("create host: %v", err)
|
|
}
|
|
return eng, st, hostID
|
|
}
|
|
|
|
func TestEngineBackupFailedRaisesThenResolves(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
ctx := context.Background()
|
|
|
|
eng.handleJobFinished(ctx, JobFinishedEvent{
|
|
HostID: hostID, JobID: "j1", Kind: "backup", Status: "failed",
|
|
When: time.Now().UTC(),
|
|
})
|
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 1 || open[0].Kind != KindBackupFailed {
|
|
t.Fatalf("expected one backup_failed open; got %+v", open)
|
|
}
|
|
|
|
// Second failed job should TOUCH (not raise a fresh row).
|
|
eng.handleJobFinished(ctx, JobFinishedEvent{
|
|
HostID: hostID, JobID: "j2", Kind: "backup", Status: "failed",
|
|
When: time.Now().UTC().Add(time.Minute),
|
|
})
|
|
open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 1 {
|
|
t.Fatalf("expected dedup to stay at 1 open; got %d", len(open))
|
|
}
|
|
|
|
// Success auto-resolves.
|
|
eng.handleJobFinished(ctx, JobFinishedEvent{
|
|
HostID: hostID, JobID: "j3", Kind: "backup", Status: "succeeded",
|
|
When: time.Now().UTC().Add(2 * time.Minute),
|
|
})
|
|
open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 0 {
|
|
t.Fatalf("expected zero open after success; got %d", len(open))
|
|
}
|
|
}
|
|
|
|
func TestEngineCheckFailedSeverityCritical(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
eng.handleJobFinished(context.Background(), JobFinishedEvent{
|
|
HostID: hostID, Kind: "check", Status: "failed", When: time.Now().UTC(),
|
|
})
|
|
open, _ := st.ListAlerts(context.Background(),
|
|
store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 1 || open[0].Severity != "critical" {
|
|
t.Fatalf("got %+v", open)
|
|
}
|
|
}
|
|
|
|
func TestEngineAgentOfflineRespects15MinFloor(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
// Host's last_seen_at defaulted to NULL via CreateHost (enrolled but never
|
|
// seen). Force a stale value for the test by direct DB update.
|
|
if _, err := st.DB().Exec(
|
|
`UPDATE hosts SET last_seen_at = ? WHERE id = ?`,
|
|
time.Now().UTC().Add(-20*time.Minute).Format(time.RFC3339Nano), hostID,
|
|
); err != nil {
|
|
t.Fatalf("update last_seen_at: %v", err)
|
|
}
|
|
eng.handleHostOffline(context.Background(), hostID)
|
|
open, _ := st.ListAlerts(context.Background(),
|
|
store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 1 {
|
|
t.Fatalf("expected agent_offline raised; got %d", len(open))
|
|
}
|
|
|
|
// Bring back online — should auto-resolve.
|
|
eng.handleHostOnline(context.Background(), hostID)
|
|
open, _ = st.ListAlerts(context.Background(),
|
|
store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 0 {
|
|
t.Fatalf("expected agent_offline resolved; got %d", len(open))
|
|
}
|
|
}
|
|
|
|
func TestEngineAgentOfflineUnderFloorNoRaise(t *testing.T) {
|
|
t.Parallel()
|
|
eng, st, hostID := setupEngine(t)
|
|
// last_seen_at is NULL from CreateHost (never touched). A nil
|
|
// last_seen_at means the host was enrolled but never connected —
|
|
// treat that as "now" for the floor check so we don't raise
|
|
// immediately. handleHostOffline must skip the raise.
|
|
eng.handleHostOffline(context.Background(), hostID)
|
|
open, _ := st.ListAlerts(context.Background(),
|
|
store.AlertFilter{Status: "open", HostID: hostID})
|
|
if len(open) != 0 {
|
|
t.Fatalf("expected no raise within 15-min floor; got %d", len(open))
|
|
}
|
|
}
|