Files
steve e6cfb1cd9f ui: fleet update page + endpoints
- POST /api/fleet/update, POST /api/fleet-updates/{id}/cancel,
  GET /api/fleet-updates/{id} (admin-only).
- GET /settings/fleet-update + /partial for htmx polling.
- Renders idle / running / terminal states with per-host progress.
- Tests cover happy path, derive-host-ids, conflict, cancel, get,
  and RBAC.
2026-05-06 22:20:03 +01:00

380 lines
12 KiB
Go

// fleet_update.go — admin-only fleet rolling-update endpoints + page.
//
// Surface:
// - POST /api/fleet/update → starts a fleet update (JSON)
// - POST /api/fleet-updates/{id}/cancel
// - GET /api/fleet-updates/{id} → JSON parent + per-host array
// - GET /settings/fleet-update → admin UI page
// - GET /settings/fleet-update/partial → htmx polling fragment
//
// All routes are mounted in the admin band (see routes()).
package http
import (
"context"
"encoding/json"
"errors"
"log/slog"
stdhttp "net/http"
"time"
"github.com/go-chi/chi/v5"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
"gitea.dcglab.co.uk/steve/restic-manager/internal/version"
)
// fleetUpdateStartReq is the JSON body for POST /api/fleet/update.
// Both fields are optional: empty target_version defaults to the
// server's current version, empty host_ids derives the out-of-date
// online subset.
type fleetUpdateStartReq struct {
TargetVersion string `json:"target_version,omitempty"`
HostIDs []string `json:"host_ids,omitempty"`
}
// fleetUpdateHostView is one row in the JSON response for GET
// /api/fleet-updates/{id}. Hostname is hydrated from the store so
// callers don't need a second round-trip per host.
type fleetUpdateHostView struct {
HostID string `json:"host_id"`
HostName string `json:"host_name,omitempty"`
Position int `json:"position"`
Status string `json:"status"`
JobID string `json:"job_id,omitempty"`
FailedReason string `json:"failed_reason,omitempty"`
}
// fleetUpdateView is the JSON projection of the parent + children.
type fleetUpdateView struct {
ID string `json:"id"`
StartedAt string `json:"started_at"`
StartedByUserID string `json:"started_by_user_id"`
TargetVersion string `json:"target_version"`
Status string `json:"status"`
CurrentHostID string `json:"current_host_id,omitempty"`
HaltedReason string `json:"halted_reason,omitempty"`
CompletedAt *string `json:"completed_at,omitempty"`
Hosts []fleetUpdateHostView `json:"hosts"`
}
// fleetUpdatePage backs both the full /settings/fleet-update page
// and the partial polled fragment. Idle / Active are mutually
// exclusive: if Active is non-nil, render the progress view.
type fleetUpdatePage struct {
// Idle-state fields.
OutOfDateHosts []store.Host // online hosts whose version != target
TargetVersion string
// Active-state fields. Nil when no fleet update has ever run.
Active *store.FleetUpdate
ActiveRows []fleetUpdateHostView
// Common.
HostNames map[string]string
// PollURL is the partial endpoint htmx polls every few seconds.
PollURL string
}
// handleAPIFleetUpdateStart is POST /api/fleet/update.
func (s *Server) handleAPIFleetUpdateStart(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
if s.deps.FleetWorker == nil {
writeJSONError(w, stdhttp.StatusServiceUnavailable, "fleet_worker_unavailable", "")
return
}
var body fleetUpdateStartReq
// Empty body is fine — both fields are optional.
if r.ContentLength != 0 {
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
}
target := body.TargetVersion
if target == "" {
target = version.Version
}
hostIDs := body.HostIDs
if len(hostIDs) == 0 {
derived, err := s.deriveOutOfDateOnlineHostIDs(r.Context(), target)
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
hostIDs = derived
}
if len(hostIDs) == 0 {
writeJSONError(w, stdhttp.StatusConflict, "no_hosts_eligible",
"no online hosts are out of date")
return
}
fuID, err := s.deps.FleetWorker.Start(r.Context(), user.ID, target, hostIDs)
if err != nil {
if errors.Is(err, store.ErrFleetUpdateRunning) {
writeJSONError(w, stdhttp.StatusConflict, "fleet_update_in_progress", err.Error())
return
}
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
auditPayload, _ := json.Marshal(map[string]any{
"fleet_update_id": fuID,
"target_version": target,
"host_count": len(hostIDs),
})
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(), UserID: &user.ID, Actor: "user",
Action: "fleet.update_started",
TargetKind: ptr("fleet_update"), TargetID: &fuID,
TS: time.Now().UTC(),
Payload: auditPayload,
})
writeJSON(w, stdhttp.StatusAccepted, map[string]string{"fleet_update_id": fuID})
}
// handleAPIFleetUpdateCancel is POST /api/fleet-updates/{id}/cancel.
func (s *Server) handleAPIFleetUpdateCancel(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
if s.deps.FleetWorker == nil {
writeJSONError(w, stdhttp.StatusServiceUnavailable, "fleet_worker_unavailable", "")
return
}
fuID := chi.URLParam(r, "id")
if fuID == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
return
}
fu, _, err := s.deps.Store.GetFleetUpdate(r.Context(), fuID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
writeJSONError(w, stdhttp.StatusNotFound, "fleet_update_not_found", "")
return
}
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
if fu.Status != "running" {
writeJSONError(w, stdhttp.StatusConflict, "fleet_update_not_running",
"fleet update is not in the running state")
return
}
if err := s.deps.FleetWorker.Cancel(r.Context(), fuID); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(), UserID: &user.ID, Actor: "user",
Action: "fleet.update_cancelled",
TargetKind: ptr("fleet_update"), TargetID: &fuID,
TS: time.Now().UTC(),
})
w.WriteHeader(stdhttp.StatusNoContent)
}
// handleAPIFleetUpdateGet is GET /api/fleet-updates/{id}.
func (s *Server) handleAPIFleetUpdateGet(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if _, ok := s.requireUser(r); !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
fuID := chi.URLParam(r, "id")
fu, hosts, err := s.deps.Store.GetFleetUpdate(r.Context(), fuID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
writeJSONError(w, stdhttp.StatusNotFound, "fleet_update_not_found", "")
return
}
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
names := s.hostNameMap(r)
view := fleetUpdateView{
ID: fu.ID,
StartedAt: fu.StartedAt.UTC().Format(time.RFC3339Nano),
StartedByUserID: fu.StartedByUserID,
TargetVersion: fu.TargetVersion,
Status: fu.Status,
CurrentHostID: fu.CurrentHostID,
HaltedReason: fu.HaltedReason,
Hosts: make([]fleetUpdateHostView, 0, len(hosts)),
}
if fu.CompletedAt != nil {
s := fu.CompletedAt.UTC().Format(time.RFC3339Nano)
view.CompletedAt = &s
}
for _, h := range hosts {
view.Hosts = append(view.Hosts, fleetUpdateHostView{
HostID: h.HostID,
HostName: names[h.HostID],
Position: h.Position,
Status: h.Status,
JobID: h.JobID,
FailedReason: h.FailedReason,
})
}
writeJSON(w, stdhttp.StatusOK, view)
}
// handleUIFleetUpdate renders /settings/fleet-update.
func (s *Server) handleUIFleetUpdate(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
page, err := s.buildFleetUpdatePage(r)
if err != nil {
slog.Error("ui fleet update: build page", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
view := s.baseView(r, u)
view.Title = "Fleet update · restic-manager"
view.Active = "settings"
view.Page = page
if err := s.deps.UI.Render(w, "fleet_update", view); err != nil {
slog.Error("ui fleet update: render", "err", err)
}
}
// handleUIFleetUpdatePartial renders just the inner panel for htmx
// auto-refresh polling — same data, no chrome.
func (s *Server) handleUIFleetUpdatePartial(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
page, err := s.buildFleetUpdatePage(r)
if err != nil {
slog.Error("ui fleet update partial: build page", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
view := s.baseView(r, u)
view.Page = page
if err := s.deps.UI.RenderPartial(w, "fleet_update_inner", view); err != nil {
slog.Error("ui fleet update partial: render", "err", err)
}
}
// buildFleetUpdatePage assembles the data both /settings/fleet-update
// and its partial render against. Resolves the most-recent fleet
// update (active OR completed/cancelled/halted) so the page can show
// the last roll's result instead of disappearing into "idle" the
// instant a roll finishes.
func (s *Server) buildFleetUpdatePage(r *stdhttp.Request) (fleetUpdatePage, error) {
page := fleetUpdatePage{
TargetVersion: version.Version,
HostNames: map[string]string{},
PollURL: "/settings/fleet-update/partial",
}
hosts, err := s.deps.Store.ListHosts(r.Context())
if err != nil {
return page, err
}
for _, h := range hosts {
page.HostNames[h.ID] = h.Name
}
active, err := s.deps.Store.ActiveFleetUpdate(r.Context())
if err != nil {
return page, err
}
mostRecent := active
if mostRecent == nil {
// Fall back to the most recent terminal row so the page can
// show "completed" / "halted" / "cancelled" once the worker
// finishes. One small bespoke query — keeps the page from
// flashing back to "idle" the instant a roll wraps up.
var id string
err := s.deps.Store.DB().QueryRowContext(r.Context(),
`SELECT id FROM fleet_updates ORDER BY started_at DESC LIMIT 1`).
Scan(&id)
if err == nil {
fu, _, gerr := s.deps.Store.GetFleetUpdate(r.Context(), id)
if gerr == nil {
mostRecent = fu
}
}
}
if mostRecent != nil {
_, rows, gerr := s.deps.Store.GetFleetUpdate(r.Context(), mostRecent.ID)
if gerr == nil {
page.Active = mostRecent
page.ActiveRows = make([]fleetUpdateHostView, 0, len(rows))
for _, hr := range rows {
page.ActiveRows = append(page.ActiveRows, fleetUpdateHostView{
HostID: hr.HostID,
HostName: page.HostNames[hr.HostID],
Position: hr.Position,
Status: hr.Status,
JobID: hr.JobID,
FailedReason: hr.FailedReason,
})
}
}
}
// Idle list (or "still out of date" reference even when an active
// roll is running — cheap to compute, harmless to attach).
for _, h := range hosts {
if h.Status != "online" {
continue
}
if h.AgentVersion == "" || h.AgentVersion == page.TargetVersion {
continue
}
page.OutOfDateHosts = append(page.OutOfDateHosts, h)
}
return page, nil
}
// deriveOutOfDateOnlineHostIDs returns the list of host IDs that
// (a) are online (Hub.Connected) and (b) have an agent_version that's
// non-empty AND != target. Used by the start endpoint when the caller
// omits host_ids.
func (s *Server) deriveOutOfDateOnlineHostIDs(ctx context.Context, target string) ([]string, error) {
hosts, err := s.deps.Store.ListHosts(ctx)
if err != nil {
return nil, err
}
out := []string{}
for _, h := range hosts {
if h.AgentVersion == "" || h.AgentVersion == target {
continue
}
if !s.deps.Hub.Connected(h.ID) {
continue
}
out = append(out, h.ID)
}
return out, nil
}
// hostNameMap returns hostID → name; used to hydrate fleet-update
// JSON responses.
func (s *Server) hostNameMap(r *stdhttp.Request) map[string]string {
out := map[string]string{}
hosts, err := s.deps.Store.ListHosts(r.Context())
if err != nil {
return out
}
for _, h := range hosts {
out[h.ID] = h.Name
}
return out
}