fix: refresh hosts.open_alert_count on Raise/Resolve/AutoResolve

The denormalised projection was never written by the alerts code
path, so the dashboard's OPEN ALERTS card and the per-host alerts
column always read 0 regardless of how many alerts were open.
fleet.GetStats sums hosts.open_alert_count; if it never moves, the
card is decoration.

Add refreshHostOpenAlertCount that recomputes from the alerts table
(self-healing — no +/- bookkeeping to drift). Call it after the
commit in RaiseOrTouch when a row was inserted, after Resolve, and
after AutoResolve.

Caught during the live sweep: a synthetic critical raised the count
to 1, but resolving it left the dashboard reading '1 unresolved'
indefinitely.
This commit is contained in:
2026-05-04 21:01:17 +01:00
parent 24eecc1673
commit cbdaa4daeb
+29
View File
@@ -66,9 +66,32 @@ func (s *Store) RaiseOrTouch(ctx context.Context, hostID, kind, severity, messag
if err := tx.Commit(); err != nil { if err := tx.Commit(); err != nil {
return "", false, err return "", false, err
} }
_ = s.refreshHostOpenAlertCount(ctx, s.db, hostID)
return id, true, nil return id, true, nil
} }
// refreshHostOpenAlertCount recomputes hosts.open_alert_count from the
// alerts table for one host. Self-healing: idempotent and survives
// out-of-order edits. Best-effort — errors are returned but callers
// generally discard them since the projection is non-critical.
func (s *Store) refreshHostOpenAlertCount(ctx context.Context, exec interface {
ExecContext(context.Context, string, ...any) (sql.Result, error)
}, hostID string,
) error {
if hostID == "" {
return nil
}
_, err := exec.ExecContext(ctx,
`UPDATE hosts SET open_alert_count = (
SELECT COUNT(*) FROM alerts
WHERE host_id = ? AND resolved_at IS NULL
) WHERE id = ?`, hostID, hostID)
if err != nil {
return fmt.Errorf("store: refresh open_alert_count: %w", err)
}
return nil
}
// Acknowledge sets acknowledged_at + acknowledged_by; does NOT set // Acknowledge sets acknowledged_at + acknowledged_by; does NOT set
// resolved_at. Idempotent — re-acknowledging just refreshes the timestamp. // resolved_at. Idempotent — re-acknowledging just refreshes the timestamp.
func (s *Store) Acknowledge(ctx context.Context, id, userID string, when time.Time) error { func (s *Store) Acknowledge(ctx context.Context, id, userID string, when time.Time) error {
@@ -89,6 +112,8 @@ func (s *Store) Acknowledge(ctx context.Context, id, userID string, when time.Ti
// Resolve marks the alert resolved. Idempotent on already-resolved rows // Resolve marks the alert resolved. Idempotent on already-resolved rows
// (no-op). // (no-op).
func (s *Store) Resolve(ctx context.Context, id string, when time.Time) error { func (s *Store) Resolve(ctx context.Context, id string, when time.Time) error {
var hostID sql.NullString
_ = s.db.QueryRowContext(ctx, `SELECT host_id FROM alerts WHERE id = ?`, id).Scan(&hostID)
_, err := s.db.ExecContext(ctx, _, err := s.db.ExecContext(ctx,
`UPDATE alerts SET resolved_at = ? `UPDATE alerts SET resolved_at = ?
WHERE id = ? AND resolved_at IS NULL`, WHERE id = ? AND resolved_at IS NULL`,
@@ -96,6 +121,9 @@ func (s *Store) Resolve(ctx context.Context, id string, when time.Time) error {
if err != nil { if err != nil {
return fmt.Errorf("store: resolve alert: %w", err) return fmt.Errorf("store: resolve alert: %w", err)
} }
if hostID.Valid {
_ = s.refreshHostOpenAlertCount(ctx, s.db, hostID.String)
}
return nil return nil
} }
@@ -110,6 +138,7 @@ func (s *Store) AutoResolve(ctx context.Context, hostID, kind string, when time.
if err != nil { if err != nil {
return fmt.Errorf("store: auto-resolve: %w", err) return fmt.Errorf("store: auto-resolve: %w", err)
} }
_ = s.refreshHostOpenAlertCount(ctx, s.db, hostID)
return nil return nil
} }