restic-manager/internal/alert/update_alerts.go

package alert

import (
	"context"
	"fmt"
	"log/slog"
	"time"

	"gitea.dcglab.co.uk/steve/restic-manager/internal/notification"
)

// Alert-kind constants for P6 self-update flows.
const (
	// KindUpdateFailed is raised when an agent fails to come back with
	// the expected version after a command.update dispatch (timeout or
	// version-mismatch). Resolved by a subsequent matching hello.
	KindUpdateFailed = "update_failed"

	// KindFleetUpdateHalted is raised when the fleet-update worker
	// stops mid-run because a host failed to update or went offline.
	// Host-less alert (system-scoped). Manually resolved by an admin.
	KindFleetUpdateHalted = "fleet_update_halted"
)

// RaiseUpdateFailed records a per-host update failure. dedupKey is the
// hostID so a re-dispatch on the same host touches the existing alert
// rather than spawning a duplicate.
func (e *Engine) RaiseUpdateFailed(ctx context.Context, hostID, jobID, reason string, when time.Time) {
	msg := fmt.Sprintf("Agent update failed (job %s): %s", jobID, reason)
	e.raiseAndNotify(ctx, hostID, KindUpdateFailed, hostID, "warning", msg, when)
}

// ResolveUpdateFailed clears any open update_failed alert for hostID.
// Called from the WS hello path when the agent reconnects with the
// target version.
func (e *Engine) ResolveUpdateFailed(ctx context.Context, hostID string, when time.Time) {
	e.resolveAndNotify(ctx, hostID, KindUpdateFailed, hostID, when)
}

// RaiseFleetUpdateHalted is host-less — the fleet update is a
// system-level concept. We persist it via the dedicated host-less
// alert path so the alerts table's host_id column carries NULL.
func (e *Engine) RaiseFleetUpdateHalted(ctx context.Context, fleetUpdateID, reason string, when time.Time) {
	msg := fmt.Sprintf("Fleet update %s halted: %s", fleetUpdateID, reason)
	id, didRaise, err := e.store.RaiseOrTouchSystem(ctx, KindFleetUpdateHalted, fleetUpdateID, "warning", msg, when)
	if err != nil {
		slog.Warn("alert: raise fleet_update_halted", "fu_id", fleetUpdateID, "err", err)
		return
	}
	if !didRaise {
		return
	}
	go e.hub.Dispatch(ctx, notification.Payload{
		Event:    notification.EventRaised,
		AlertID:  id,
		Severity: "warning",
		Kind:     KindFleetUpdateHalted,
		HostID:   "",
		HostName: "",
		Message:  msg,
		RaisedAt: when,
	})
}