3fa7be51a5
- POST /api/fleet/update, POST /api/fleet-updates/{id}/cancel,
GET /api/fleet-updates/{id} (admin-only).
- GET /settings/fleet-update + /partial for htmx polling.
- Renders idle / running / terminal states with per-host progress.
- Tests cover happy path, derive-host-ids, conflict, cancel, get,
and RBAC.
380 lines
12 KiB
Go
380 lines
12 KiB
Go
// fleet_update.go — admin-only fleet rolling-update endpoints + page.
|
|
//
|
|
// Surface:
|
|
// - POST /api/fleet/update → starts a fleet update (JSON)
|
|
// - POST /api/fleet-updates/{id}/cancel
|
|
// - GET /api/fleet-updates/{id} → JSON parent + per-host array
|
|
// - GET /settings/fleet-update → admin UI page
|
|
// - GET /settings/fleet-update/partial → htmx polling fragment
|
|
//
|
|
// All routes are mounted in the admin band (see routes()).
|
|
package http
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"log/slog"
|
|
stdhttp "net/http"
|
|
"time"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
"github.com/oklog/ulid/v2"
|
|
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/version"
|
|
)
|
|
|
|
// fleetUpdateStartReq is the JSON body for POST /api/fleet/update.
|
|
// Both fields are optional: empty target_version defaults to the
|
|
// server's current version, empty host_ids derives the out-of-date
|
|
// online subset.
|
|
type fleetUpdateStartReq struct {
|
|
TargetVersion string `json:"target_version,omitempty"`
|
|
HostIDs []string `json:"host_ids,omitempty"`
|
|
}
|
|
|
|
// fleetUpdateHostView is one row in the JSON response for GET
|
|
// /api/fleet-updates/{id}. Hostname is hydrated from the store so
|
|
// callers don't need a second round-trip per host.
|
|
type fleetUpdateHostView struct {
|
|
HostID string `json:"host_id"`
|
|
HostName string `json:"host_name,omitempty"`
|
|
Position int `json:"position"`
|
|
Status string `json:"status"`
|
|
JobID string `json:"job_id,omitempty"`
|
|
FailedReason string `json:"failed_reason,omitempty"`
|
|
}
|
|
|
|
// fleetUpdateView is the JSON projection of the parent + children.
|
|
type fleetUpdateView struct {
|
|
ID string `json:"id"`
|
|
StartedAt string `json:"started_at"`
|
|
StartedByUserID string `json:"started_by_user_id"`
|
|
TargetVersion string `json:"target_version"`
|
|
Status string `json:"status"`
|
|
CurrentHostID string `json:"current_host_id,omitempty"`
|
|
HaltedReason string `json:"halted_reason,omitempty"`
|
|
CompletedAt *string `json:"completed_at,omitempty"`
|
|
Hosts []fleetUpdateHostView `json:"hosts"`
|
|
}
|
|
|
|
// fleetUpdatePage backs both the full /settings/fleet-update page
|
|
// and the partial polled fragment. Idle / Active are mutually
|
|
// exclusive: if Active is non-nil, render the progress view.
|
|
type fleetUpdatePage struct {
|
|
// Idle-state fields.
|
|
OutOfDateHosts []store.Host // online hosts whose version != target
|
|
TargetVersion string
|
|
|
|
// Active-state fields. Nil when no fleet update has ever run.
|
|
Active *store.FleetUpdate
|
|
ActiveRows []fleetUpdateHostView
|
|
|
|
// Common.
|
|
HostNames map[string]string
|
|
// PollURL is the partial endpoint htmx polls every few seconds.
|
|
PollURL string
|
|
}
|
|
|
|
// handleAPIFleetUpdateStart is POST /api/fleet/update.
|
|
func (s *Server) handleAPIFleetUpdateStart(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
user, ok := s.requireUser(r)
|
|
if !ok {
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
|
return
|
|
}
|
|
if s.deps.FleetWorker == nil {
|
|
writeJSONError(w, stdhttp.StatusServiceUnavailable, "fleet_worker_unavailable", "")
|
|
return
|
|
}
|
|
var body fleetUpdateStartReq
|
|
// Empty body is fine — both fields are optional.
|
|
if r.ContentLength != 0 {
|
|
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
|
return
|
|
}
|
|
}
|
|
target := body.TargetVersion
|
|
if target == "" {
|
|
target = version.Version
|
|
}
|
|
hostIDs := body.HostIDs
|
|
if len(hostIDs) == 0 {
|
|
derived, err := s.deriveOutOfDateOnlineHostIDs(r.Context(), target)
|
|
if err != nil {
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
|
return
|
|
}
|
|
hostIDs = derived
|
|
}
|
|
if len(hostIDs) == 0 {
|
|
writeJSONError(w, stdhttp.StatusConflict, "no_hosts_eligible",
|
|
"no online hosts are out of date")
|
|
return
|
|
}
|
|
|
|
fuID, err := s.deps.FleetWorker.Start(r.Context(), user.ID, target, hostIDs)
|
|
if err != nil {
|
|
if errors.Is(err, store.ErrFleetUpdateRunning) {
|
|
writeJSONError(w, stdhttp.StatusConflict, "fleet_update_in_progress", err.Error())
|
|
return
|
|
}
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
|
return
|
|
}
|
|
|
|
auditPayload, _ := json.Marshal(map[string]any{
|
|
"fleet_update_id": fuID,
|
|
"target_version": target,
|
|
"host_count": len(hostIDs),
|
|
})
|
|
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
|
ID: ulid.Make().String(), UserID: &user.ID, Actor: "user",
|
|
Action: "fleet.update_started",
|
|
TargetKind: ptr("fleet_update"), TargetID: &fuID,
|
|
TS: time.Now().UTC(),
|
|
Payload: auditPayload,
|
|
})
|
|
|
|
writeJSON(w, stdhttp.StatusAccepted, map[string]string{"fleet_update_id": fuID})
|
|
}
|
|
|
|
// handleAPIFleetUpdateCancel is POST /api/fleet-updates/{id}/cancel.
|
|
func (s *Server) handleAPIFleetUpdateCancel(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
user, ok := s.requireUser(r)
|
|
if !ok {
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
|
return
|
|
}
|
|
if s.deps.FleetWorker == nil {
|
|
writeJSONError(w, stdhttp.StatusServiceUnavailable, "fleet_worker_unavailable", "")
|
|
return
|
|
}
|
|
fuID := chi.URLParam(r, "id")
|
|
if fuID == "" {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
|
|
return
|
|
}
|
|
fu, _, err := s.deps.Store.GetFleetUpdate(r.Context(), fuID)
|
|
if err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
writeJSONError(w, stdhttp.StatusNotFound, "fleet_update_not_found", "")
|
|
return
|
|
}
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
|
return
|
|
}
|
|
if fu.Status != "running" {
|
|
writeJSONError(w, stdhttp.StatusConflict, "fleet_update_not_running",
|
|
"fleet update is not in the running state")
|
|
return
|
|
}
|
|
if err := s.deps.FleetWorker.Cancel(r.Context(), fuID); err != nil {
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
|
return
|
|
}
|
|
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
|
ID: ulid.Make().String(), UserID: &user.ID, Actor: "user",
|
|
Action: "fleet.update_cancelled",
|
|
TargetKind: ptr("fleet_update"), TargetID: &fuID,
|
|
TS: time.Now().UTC(),
|
|
})
|
|
w.WriteHeader(stdhttp.StatusNoContent)
|
|
}
|
|
|
|
// handleAPIFleetUpdateGet is GET /api/fleet-updates/{id}.
|
|
func (s *Server) handleAPIFleetUpdateGet(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
if _, ok := s.requireUser(r); !ok {
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
|
return
|
|
}
|
|
fuID := chi.URLParam(r, "id")
|
|
fu, hosts, err := s.deps.Store.GetFleetUpdate(r.Context(), fuID)
|
|
if err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
writeJSONError(w, stdhttp.StatusNotFound, "fleet_update_not_found", "")
|
|
return
|
|
}
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
|
return
|
|
}
|
|
names := s.hostNameMap(r)
|
|
view := fleetUpdateView{
|
|
ID: fu.ID,
|
|
StartedAt: fu.StartedAt.UTC().Format(time.RFC3339Nano),
|
|
StartedByUserID: fu.StartedByUserID,
|
|
TargetVersion: fu.TargetVersion,
|
|
Status: fu.Status,
|
|
CurrentHostID: fu.CurrentHostID,
|
|
HaltedReason: fu.HaltedReason,
|
|
Hosts: make([]fleetUpdateHostView, 0, len(hosts)),
|
|
}
|
|
if fu.CompletedAt != nil {
|
|
s := fu.CompletedAt.UTC().Format(time.RFC3339Nano)
|
|
view.CompletedAt = &s
|
|
}
|
|
for _, h := range hosts {
|
|
view.Hosts = append(view.Hosts, fleetUpdateHostView{
|
|
HostID: h.HostID,
|
|
HostName: names[h.HostID],
|
|
Position: h.Position,
|
|
Status: h.Status,
|
|
JobID: h.JobID,
|
|
FailedReason: h.FailedReason,
|
|
})
|
|
}
|
|
writeJSON(w, stdhttp.StatusOK, view)
|
|
}
|
|
|
|
// handleUIFleetUpdate renders /settings/fleet-update.
|
|
func (s *Server) handleUIFleetUpdate(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
u := s.requireUIUser(w, r)
|
|
if u == nil {
|
|
return
|
|
}
|
|
page, err := s.buildFleetUpdatePage(r)
|
|
if err != nil {
|
|
slog.Error("ui fleet update: build page", "err", err)
|
|
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
|
|
return
|
|
}
|
|
view := s.baseView(r, u)
|
|
view.Title = "Fleet update · restic-manager"
|
|
view.Active = "settings"
|
|
view.Page = page
|
|
if err := s.deps.UI.Render(w, "fleet_update", view); err != nil {
|
|
slog.Error("ui fleet update: render", "err", err)
|
|
}
|
|
}
|
|
|
|
// handleUIFleetUpdatePartial renders just the inner panel for htmx
|
|
// auto-refresh polling — same data, no chrome.
|
|
func (s *Server) handleUIFleetUpdatePartial(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
u := s.requireUIUser(w, r)
|
|
if u == nil {
|
|
return
|
|
}
|
|
page, err := s.buildFleetUpdatePage(r)
|
|
if err != nil {
|
|
slog.Error("ui fleet update partial: build page", "err", err)
|
|
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
|
|
return
|
|
}
|
|
view := s.baseView(r, u)
|
|
view.Page = page
|
|
if err := s.deps.UI.RenderPartial(w, "fleet_update_inner", view); err != nil {
|
|
slog.Error("ui fleet update partial: render", "err", err)
|
|
}
|
|
}
|
|
|
|
// buildFleetUpdatePage assembles the data both /settings/fleet-update
|
|
// and its partial render against. Resolves the most-recent fleet
|
|
// update (active OR completed/cancelled/halted) so the page can show
|
|
// the last roll's result instead of disappearing into "idle" the
|
|
// instant a roll finishes.
|
|
func (s *Server) buildFleetUpdatePage(r *stdhttp.Request) (fleetUpdatePage, error) {
|
|
page := fleetUpdatePage{
|
|
TargetVersion: version.Version,
|
|
HostNames: map[string]string{},
|
|
PollURL: "/settings/fleet-update/partial",
|
|
}
|
|
hosts, err := s.deps.Store.ListHosts(r.Context())
|
|
if err != nil {
|
|
return page, err
|
|
}
|
|
for _, h := range hosts {
|
|
page.HostNames[h.ID] = h.Name
|
|
}
|
|
|
|
active, err := s.deps.Store.ActiveFleetUpdate(r.Context())
|
|
if err != nil {
|
|
return page, err
|
|
}
|
|
mostRecent := active
|
|
if mostRecent == nil {
|
|
// Fall back to the most recent terminal row so the page can
|
|
// show "completed" / "halted" / "cancelled" once the worker
|
|
// finishes. One small bespoke query — keeps the page from
|
|
// flashing back to "idle" the instant a roll wraps up.
|
|
var id string
|
|
err := s.deps.Store.DB().QueryRowContext(r.Context(),
|
|
`SELECT id FROM fleet_updates ORDER BY started_at DESC LIMIT 1`).
|
|
Scan(&id)
|
|
if err == nil {
|
|
fu, _, gerr := s.deps.Store.GetFleetUpdate(r.Context(), id)
|
|
if gerr == nil {
|
|
mostRecent = fu
|
|
}
|
|
}
|
|
}
|
|
|
|
if mostRecent != nil {
|
|
_, rows, gerr := s.deps.Store.GetFleetUpdate(r.Context(), mostRecent.ID)
|
|
if gerr == nil {
|
|
page.Active = mostRecent
|
|
page.ActiveRows = make([]fleetUpdateHostView, 0, len(rows))
|
|
for _, hr := range rows {
|
|
page.ActiveRows = append(page.ActiveRows, fleetUpdateHostView{
|
|
HostID: hr.HostID,
|
|
HostName: page.HostNames[hr.HostID],
|
|
Position: hr.Position,
|
|
Status: hr.Status,
|
|
JobID: hr.JobID,
|
|
FailedReason: hr.FailedReason,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Idle list (or "still out of date" reference even when an active
|
|
// roll is running — cheap to compute, harmless to attach).
|
|
for _, h := range hosts {
|
|
if h.Status != "online" {
|
|
continue
|
|
}
|
|
if h.AgentVersion == "" || h.AgentVersion == page.TargetVersion {
|
|
continue
|
|
}
|
|
page.OutOfDateHosts = append(page.OutOfDateHosts, h)
|
|
}
|
|
return page, nil
|
|
}
|
|
|
|
// deriveOutOfDateOnlineHostIDs returns the list of host IDs that
|
|
// (a) are online (Hub.Connected) and (b) have an agent_version that's
|
|
// non-empty AND != target. Used by the start endpoint when the caller
|
|
// omits host_ids.
|
|
func (s *Server) deriveOutOfDateOnlineHostIDs(ctx context.Context, target string) ([]string, error) {
|
|
hosts, err := s.deps.Store.ListHosts(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out := []string{}
|
|
for _, h := range hosts {
|
|
if h.AgentVersion == "" || h.AgentVersion == target {
|
|
continue
|
|
}
|
|
if !s.deps.Hub.Connected(h.ID) {
|
|
continue
|
|
}
|
|
out = append(out, h.ID)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// hostNameMap returns hostID → name; used to hydrate fleet-update
|
|
// JSON responses.
|
|
func (s *Server) hostNameMap(r *stdhttp.Request) map[string]string {
|
|
out := map[string]string{}
|
|
hosts, err := s.deps.Store.ListHosts(r.Context())
|
|
if err != nil {
|
|
return out
|
|
}
|
|
for _, h := range hosts {
|
|
out[h.ID] = h.Name
|
|
}
|
|
return out
|
|
}
|