fix: refresh hosts.open_alert_count on Raise/Resolve/AutoResolve
The denormalised projection was never written by the alerts code path, so the dashboard's OPEN ALERTS card and the per-host alerts column always read 0 regardless of how many alerts were open. fleet.GetStats sums hosts.open_alert_count; if it never moves, the card is decoration. Add refreshHostOpenAlertCount that recomputes from the alerts table (self-healing — no +/- bookkeeping to drift). Call it after the commit in RaiseOrTouch when a row was inserted, after Resolve, and after AutoResolve. Caught during the live sweep: a synthetic critical raised the count to 1, but resolving it left the dashboard reading '1 unresolved' indefinitely.
This commit is contained in:
@@ -66,9 +66,32 @@ func (s *Store) RaiseOrTouch(ctx context.Context, hostID, kind, severity, messag
|
||||
if err := tx.Commit(); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
_ = s.refreshHostOpenAlertCount(ctx, s.db, hostID)
|
||||
return id, true, nil
|
||||
}
|
||||
|
||||
// refreshHostOpenAlertCount recomputes hosts.open_alert_count from the
|
||||
// alerts table for one host. Self-healing: idempotent and survives
|
||||
// out-of-order edits. Best-effort — errors are returned but callers
|
||||
// generally discard them since the projection is non-critical.
|
||||
func (s *Store) refreshHostOpenAlertCount(ctx context.Context, exec interface {
|
||||
ExecContext(context.Context, string, ...any) (sql.Result, error)
|
||||
}, hostID string,
|
||||
) error {
|
||||
if hostID == "" {
|
||||
return nil
|
||||
}
|
||||
_, err := exec.ExecContext(ctx,
|
||||
`UPDATE hosts SET open_alert_count = (
|
||||
SELECT COUNT(*) FROM alerts
|
||||
WHERE host_id = ? AND resolved_at IS NULL
|
||||
) WHERE id = ?`, hostID, hostID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: refresh open_alert_count: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Acknowledge sets acknowledged_at + acknowledged_by; does NOT set
|
||||
// resolved_at. Idempotent — re-acknowledging just refreshes the timestamp.
|
||||
func (s *Store) Acknowledge(ctx context.Context, id, userID string, when time.Time) error {
|
||||
@@ -89,6 +112,8 @@ func (s *Store) Acknowledge(ctx context.Context, id, userID string, when time.Ti
|
||||
// Resolve marks the alert resolved. Idempotent on already-resolved rows
|
||||
// (no-op).
|
||||
func (s *Store) Resolve(ctx context.Context, id string, when time.Time) error {
|
||||
var hostID sql.NullString
|
||||
_ = s.db.QueryRowContext(ctx, `SELECT host_id FROM alerts WHERE id = ?`, id).Scan(&hostID)
|
||||
_, err := s.db.ExecContext(ctx,
|
||||
`UPDATE alerts SET resolved_at = ?
|
||||
WHERE id = ? AND resolved_at IS NULL`,
|
||||
@@ -96,6 +121,9 @@ func (s *Store) Resolve(ctx context.Context, id string, when time.Time) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: resolve alert: %w", err)
|
||||
}
|
||||
if hostID.Valid {
|
||||
_ = s.refreshHostOpenAlertCount(ctx, s.db, hostID.String)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -110,6 +138,7 @@ func (s *Store) AutoResolve(ctx context.Context, hostID, kind string, when time.
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: auto-resolve: %w", err)
|
||||
}
|
||||
_ = s.refreshHostOpenAlertCount(ctx, s.db, hostID)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user