ui: fleet update page + endpoints
- POST /api/fleet/update, POST /api/fleet-updates/{id}/cancel,
GET /api/fleet-updates/{id} (admin-only).
- GET /settings/fleet-update + /partial for htmx polling.
- Renders idle / running / terminal states with per-host progress.
- Tests cover happy path, derive-host-ids, conflict, cancel, get,
and RBAC.
This commit is contained in:
@@ -0,0 +1,379 @@
|
||||
// fleet_update.go — admin-only fleet rolling-update endpoints + page.
|
||||
//
|
||||
// Surface:
|
||||
// - POST /api/fleet/update → starts a fleet update (JSON)
|
||||
// - POST /api/fleet-updates/{id}/cancel
|
||||
// - GET /api/fleet-updates/{id} → JSON parent + per-host array
|
||||
// - GET /settings/fleet-update → admin UI page
|
||||
// - GET /settings/fleet-update/partial → htmx polling fragment
|
||||
//
|
||||
// All routes are mounted in the admin band (see routes()).
|
||||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log/slog"
|
||||
stdhttp "net/http"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/version"
|
||||
)
|
||||
|
||||
// fleetUpdateStartReq is the JSON body for POST /api/fleet/update.
|
||||
// Both fields are optional: empty target_version defaults to the
|
||||
// server's current version, empty host_ids derives the out-of-date
|
||||
// online subset.
|
||||
type fleetUpdateStartReq struct {
|
||||
TargetVersion string `json:"target_version,omitempty"`
|
||||
HostIDs []string `json:"host_ids,omitempty"`
|
||||
}
|
||||
|
||||
// fleetUpdateHostView is one row in the JSON response for GET
|
||||
// /api/fleet-updates/{id}. Hostname is hydrated from the store so
|
||||
// callers don't need a second round-trip per host.
|
||||
type fleetUpdateHostView struct {
|
||||
HostID string `json:"host_id"`
|
||||
HostName string `json:"host_name,omitempty"`
|
||||
Position int `json:"position"`
|
||||
Status string `json:"status"`
|
||||
JobID string `json:"job_id,omitempty"`
|
||||
FailedReason string `json:"failed_reason,omitempty"`
|
||||
}
|
||||
|
||||
// fleetUpdateView is the JSON projection of the parent + children.
|
||||
type fleetUpdateView struct {
|
||||
ID string `json:"id"`
|
||||
StartedAt string `json:"started_at"`
|
||||
StartedByUserID string `json:"started_by_user_id"`
|
||||
TargetVersion string `json:"target_version"`
|
||||
Status string `json:"status"`
|
||||
CurrentHostID string `json:"current_host_id,omitempty"`
|
||||
HaltedReason string `json:"halted_reason,omitempty"`
|
||||
CompletedAt *string `json:"completed_at,omitempty"`
|
||||
Hosts []fleetUpdateHostView `json:"hosts"`
|
||||
}
|
||||
|
||||
// fleetUpdatePage backs both the full /settings/fleet-update page
|
||||
// and the partial polled fragment. Idle / Active are mutually
|
||||
// exclusive: if Active is non-nil, render the progress view.
|
||||
type fleetUpdatePage struct {
|
||||
// Idle-state fields.
|
||||
OutOfDateHosts []store.Host // online hosts whose version != target
|
||||
TargetVersion string
|
||||
|
||||
// Active-state fields. Nil when no fleet update has ever run.
|
||||
Active *store.FleetUpdate
|
||||
ActiveRows []fleetUpdateHostView
|
||||
|
||||
// Common.
|
||||
HostNames map[string]string
|
||||
// PollURL is the partial endpoint htmx polls every few seconds.
|
||||
PollURL string
|
||||
}
|
||||
|
||||
// handleAPIFleetUpdateStart is POST /api/fleet/update.
|
||||
func (s *Server) handleAPIFleetUpdateStart(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
if s.deps.FleetWorker == nil {
|
||||
writeJSONError(w, stdhttp.StatusServiceUnavailable, "fleet_worker_unavailable", "")
|
||||
return
|
||||
}
|
||||
var body fleetUpdateStartReq
|
||||
// Empty body is fine — both fields are optional.
|
||||
if r.ContentLength != 0 {
|
||||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
target := body.TargetVersion
|
||||
if target == "" {
|
||||
target = version.Version
|
||||
}
|
||||
hostIDs := body.HostIDs
|
||||
if len(hostIDs) == 0 {
|
||||
derived, err := s.deriveOutOfDateOnlineHostIDs(r.Context(), target)
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
||||
return
|
||||
}
|
||||
hostIDs = derived
|
||||
}
|
||||
if len(hostIDs) == 0 {
|
||||
writeJSONError(w, stdhttp.StatusConflict, "no_hosts_eligible",
|
||||
"no online hosts are out of date")
|
||||
return
|
||||
}
|
||||
|
||||
fuID, err := s.deps.FleetWorker.Start(r.Context(), user.ID, target, hostIDs)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrFleetUpdateRunning) {
|
||||
writeJSONError(w, stdhttp.StatusConflict, "fleet_update_in_progress", err.Error())
|
||||
return
|
||||
}
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
auditPayload, _ := json.Marshal(map[string]any{
|
||||
"fleet_update_id": fuID,
|
||||
"target_version": target,
|
||||
"host_count": len(hostIDs),
|
||||
})
|
||||
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||
ID: ulid.Make().String(), UserID: &user.ID, Actor: "user",
|
||||
Action: "fleet.update_started",
|
||||
TargetKind: ptr("fleet_update"), TargetID: &fuID,
|
||||
TS: time.Now().UTC(),
|
||||
Payload: auditPayload,
|
||||
})
|
||||
|
||||
writeJSON(w, stdhttp.StatusAccepted, map[string]string{"fleet_update_id": fuID})
|
||||
}
|
||||
|
||||
// handleAPIFleetUpdateCancel is POST /api/fleet-updates/{id}/cancel.
|
||||
func (s *Server) handleAPIFleetUpdateCancel(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
if s.deps.FleetWorker == nil {
|
||||
writeJSONError(w, stdhttp.StatusServiceUnavailable, "fleet_worker_unavailable", "")
|
||||
return
|
||||
}
|
||||
fuID := chi.URLParam(r, "id")
|
||||
if fuID == "" {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
|
||||
return
|
||||
}
|
||||
fu, _, err := s.deps.Store.GetFleetUpdate(r.Context(), fuID)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
writeJSONError(w, stdhttp.StatusNotFound, "fleet_update_not_found", "")
|
||||
return
|
||||
}
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
||||
return
|
||||
}
|
||||
if fu.Status != "running" {
|
||||
writeJSONError(w, stdhttp.StatusConflict, "fleet_update_not_running",
|
||||
"fleet update is not in the running state")
|
||||
return
|
||||
}
|
||||
if err := s.deps.FleetWorker.Cancel(r.Context(), fuID); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
||||
return
|
||||
}
|
||||
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||
ID: ulid.Make().String(), UserID: &user.ID, Actor: "user",
|
||||
Action: "fleet.update_cancelled",
|
||||
TargetKind: ptr("fleet_update"), TargetID: &fuID,
|
||||
TS: time.Now().UTC(),
|
||||
})
|
||||
w.WriteHeader(stdhttp.StatusNoContent)
|
||||
}
|
||||
|
||||
// handleAPIFleetUpdateGet is GET /api/fleet-updates/{id}.
|
||||
func (s *Server) handleAPIFleetUpdateGet(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if _, ok := s.requireUser(r); !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
fuID := chi.URLParam(r, "id")
|
||||
fu, hosts, err := s.deps.Store.GetFleetUpdate(r.Context(), fuID)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
writeJSONError(w, stdhttp.StatusNotFound, "fleet_update_not_found", "")
|
||||
return
|
||||
}
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
||||
return
|
||||
}
|
||||
names := s.hostNameMap(r)
|
||||
view := fleetUpdateView{
|
||||
ID: fu.ID,
|
||||
StartedAt: fu.StartedAt.UTC().Format(time.RFC3339Nano),
|
||||
StartedByUserID: fu.StartedByUserID,
|
||||
TargetVersion: fu.TargetVersion,
|
||||
Status: fu.Status,
|
||||
CurrentHostID: fu.CurrentHostID,
|
||||
HaltedReason: fu.HaltedReason,
|
||||
Hosts: make([]fleetUpdateHostView, 0, len(hosts)),
|
||||
}
|
||||
if fu.CompletedAt != nil {
|
||||
s := fu.CompletedAt.UTC().Format(time.RFC3339Nano)
|
||||
view.CompletedAt = &s
|
||||
}
|
||||
for _, h := range hosts {
|
||||
view.Hosts = append(view.Hosts, fleetUpdateHostView{
|
||||
HostID: h.HostID,
|
||||
HostName: names[h.HostID],
|
||||
Position: h.Position,
|
||||
Status: h.Status,
|
||||
JobID: h.JobID,
|
||||
FailedReason: h.FailedReason,
|
||||
})
|
||||
}
|
||||
writeJSON(w, stdhttp.StatusOK, view)
|
||||
}
|
||||
|
||||
// handleUIFleetUpdate renders /settings/fleet-update.
|
||||
func (s *Server) handleUIFleetUpdate(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
u := s.requireUIUser(w, r)
|
||||
if u == nil {
|
||||
return
|
||||
}
|
||||
page, err := s.buildFleetUpdatePage(r)
|
||||
if err != nil {
|
||||
slog.Error("ui fleet update: build page", "err", err)
|
||||
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
view := s.baseView(r, u)
|
||||
view.Title = "Fleet update · restic-manager"
|
||||
view.Active = "settings"
|
||||
view.Page = page
|
||||
if err := s.deps.UI.Render(w, "fleet_update", view); err != nil {
|
||||
slog.Error("ui fleet update: render", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// handleUIFleetUpdatePartial renders just the inner panel for htmx
|
||||
// auto-refresh polling — same data, no chrome.
|
||||
func (s *Server) handleUIFleetUpdatePartial(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
u := s.requireUIUser(w, r)
|
||||
if u == nil {
|
||||
return
|
||||
}
|
||||
page, err := s.buildFleetUpdatePage(r)
|
||||
if err != nil {
|
||||
slog.Error("ui fleet update partial: build page", "err", err)
|
||||
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
view := s.baseView(r, u)
|
||||
view.Page = page
|
||||
if err := s.deps.UI.RenderPartial(w, "fleet_update_inner", view); err != nil {
|
||||
slog.Error("ui fleet update partial: render", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// buildFleetUpdatePage assembles the data both /settings/fleet-update
|
||||
// and its partial render against. Resolves the most-recent fleet
|
||||
// update (active OR completed/cancelled/halted) so the page can show
|
||||
// the last roll's result instead of disappearing into "idle" the
|
||||
// instant a roll finishes.
|
||||
func (s *Server) buildFleetUpdatePage(r *stdhttp.Request) (fleetUpdatePage, error) {
|
||||
page := fleetUpdatePage{
|
||||
TargetVersion: version.Version,
|
||||
HostNames: map[string]string{},
|
||||
PollURL: "/settings/fleet-update/partial",
|
||||
}
|
||||
hosts, err := s.deps.Store.ListHosts(r.Context())
|
||||
if err != nil {
|
||||
return page, err
|
||||
}
|
||||
for _, h := range hosts {
|
||||
page.HostNames[h.ID] = h.Name
|
||||
}
|
||||
|
||||
active, err := s.deps.Store.ActiveFleetUpdate(r.Context())
|
||||
if err != nil {
|
||||
return page, err
|
||||
}
|
||||
mostRecent := active
|
||||
if mostRecent == nil {
|
||||
// Fall back to the most recent terminal row so the page can
|
||||
// show "completed" / "halted" / "cancelled" once the worker
|
||||
// finishes. One small bespoke query — keeps the page from
|
||||
// flashing back to "idle" the instant a roll wraps up.
|
||||
var id string
|
||||
err := s.deps.Store.DB().QueryRowContext(r.Context(),
|
||||
`SELECT id FROM fleet_updates ORDER BY started_at DESC LIMIT 1`).
|
||||
Scan(&id)
|
||||
if err == nil {
|
||||
fu, _, gerr := s.deps.Store.GetFleetUpdate(r.Context(), id)
|
||||
if gerr == nil {
|
||||
mostRecent = fu
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if mostRecent != nil {
|
||||
_, rows, gerr := s.deps.Store.GetFleetUpdate(r.Context(), mostRecent.ID)
|
||||
if gerr == nil {
|
||||
page.Active = mostRecent
|
||||
page.ActiveRows = make([]fleetUpdateHostView, 0, len(rows))
|
||||
for _, hr := range rows {
|
||||
page.ActiveRows = append(page.ActiveRows, fleetUpdateHostView{
|
||||
HostID: hr.HostID,
|
||||
HostName: page.HostNames[hr.HostID],
|
||||
Position: hr.Position,
|
||||
Status: hr.Status,
|
||||
JobID: hr.JobID,
|
||||
FailedReason: hr.FailedReason,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Idle list (or "still out of date" reference even when an active
|
||||
// roll is running — cheap to compute, harmless to attach).
|
||||
for _, h := range hosts {
|
||||
if h.Status != "online" {
|
||||
continue
|
||||
}
|
||||
if h.AgentVersion == "" || h.AgentVersion == page.TargetVersion {
|
||||
continue
|
||||
}
|
||||
page.OutOfDateHosts = append(page.OutOfDateHosts, h)
|
||||
}
|
||||
return page, nil
|
||||
}
|
||||
|
||||
// deriveOutOfDateOnlineHostIDs returns the list of host IDs that
|
||||
// (a) are online (Hub.Connected) and (b) have an agent_version that's
|
||||
// non-empty AND != target. Used by the start endpoint when the caller
|
||||
// omits host_ids.
|
||||
func (s *Server) deriveOutOfDateOnlineHostIDs(ctx context.Context, target string) ([]string, error) {
|
||||
hosts, err := s.deps.Store.ListHosts(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out := []string{}
|
||||
for _, h := range hosts {
|
||||
if h.AgentVersion == "" || h.AgentVersion == target {
|
||||
continue
|
||||
}
|
||||
if !s.deps.Hub.Connected(h.ID) {
|
||||
continue
|
||||
}
|
||||
out = append(out, h.ID)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// hostNameMap returns hostID → name; used to hydrate fleet-update
|
||||
// JSON responses.
|
||||
func (s *Server) hostNameMap(r *stdhttp.Request) map[string]string {
|
||||
out := map[string]string{}
|
||||
hosts, err := s.deps.Store.ListHosts(r.Context())
|
||||
if err != nil {
|
||||
return out
|
||||
}
|
||||
for _, h := range hosts {
|
||||
out[h.ID] = h.Name
|
||||
}
|
||||
return out
|
||||
}
|
||||
Reference in New Issue
Block a user