testing: bootstrap UI, agent reliability, NS-01..04 + alert username

Smoothes the rough edges that came up exercising a live deployment.

First-run bootstrap UI: /bootstrap renders a username + password form
that uses the in-memory token directly (operator no longer copies it
out of the log); /login redirects there while bootstrap is available.

Agent reliability: failJob synthetic envelopes so command.run early
returns no longer hang the server-side job; runtime probe of restic
restore --help drives --no-ownership instead of version sniffing
(0.18.x had it removed). Server unit re-shaped: ProtectSystem=full
plus ReadWritePaths=/etc/restic-manager, no ProtectHome — restore
can now write anywhere a user might want.

Restore wizard: default target is /root/rm-restore/<job-id>/ with
clearer help text. Re-init confirm input uses .field (was .input,
which doesn't exist — text was invisible).

NS-01 host delete: store DeleteHost, admin-band /hosts/{id}/delete
with hostname-confirm danger zone, audit, FK cascade, live WS close.

NS-02 enrollment-token recovery: outstanding-tokens panel on
/hosts/new, regenerate (preserves attachments) and revoke handlers
+ audit, store-level ListOutstandingEnrollmentTokens and
DeleteEnrollmentToken.

NS-03 repo init / probe surface: migration 0020 adds
hosts.repo_status + repo_status_error; WS handler projects every
init job's outcome onto the host row (idempotent already-initialised
collapses to ready); creds-save resets status and dispatches a fresh
probe; /hosts/{id}/repo/probe retry endpoint with banner.

NS-04 dashboard live + sort + filter: query-string filter
(q/status/repo_status/tag/sort/dir), 5s htmx live poll mirroring the
alerts pattern with a localStorage live toggle, sortable column
headers, filter row + clear.

Alerts page: ack'd-by line resolves user_id ULID to username.

Compose.yaml ignored — host-specific.
This commit is contained in:
2026-05-05 22:03:15 +01:00
parent ddb46e16b6
commit 02e4ef7544
40 changed files with 2135 additions and 109 deletions
+276 -18
View File
@@ -9,6 +9,7 @@ import (
"log/slog"
stdhttp "net/http"
"net/url"
"sort"
"strings"
"time"
@@ -130,7 +131,7 @@ func (s *Server) version() string {
type dashboardPage struct {
Hosts []dashboardHostRow
HostCount int // unfiltered fleet size
ShownCount int // after the tag filter (== HostCount when no filter)
ShownCount int // after every active filter
Summary store.FleetSummary
PendingHosts []store.PendingHost // announce-and-approve queue (P2-18d)
CritOpenCount int
@@ -139,6 +140,31 @@ type dashboardPage struct {
// the fleet, used to render the chip-row.
ActiveTag string
KnownTags []string
// Filter / sort URL state (NS-04). Round-tripped through query
// string so a bookmarked / shared dashboard URL is durable, and
// passed back to the template so the form inputs and column
// header sort-arrows render with current state.
Filter dashboardFilter
// RefreshURL is the same dashboard URL with all current filters
// pinned, used by the htmx live-poll trigger to refetch the
// table without flashing the surrounding chrome.
RefreshURL string
// SortURL is a per-column URL builder: passing a column key
// returns the URL that sorts by that column (toggling direction
// when it's already active). Pre-computed so the template stays
// dumb.
SortURL map[string]string
}
// dashboardFilter holds the parsed query-string filter state.
type dashboardFilter struct {
Search string // hostname substring match (case-insensitive)
Status string // "" | "online" | "offline" | "never_seen"
RepoStatus string // "" | "unknown" | "ready" | "init_failed"
Tag string // mirrors ActiveTag for round-trip on links
Sort string // column key (see sortDashboard)
Dir string // "asc" | "desc"
}
// dashboardHostRow carries a host plus the per-row Run-now decision
@@ -211,21 +237,10 @@ func (s *Server) handleUIDashboard(w stdhttp.ResponseWriter, r *stdhttp.Request)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
// Tag filter (chip-row above the table). Empty = show all.
activeTag := r.URL.Query().Get("tag")
hosts := allHosts
if activeTag != "" {
filtered := make([]store.Host, 0, len(allHosts))
for _, h := range allHosts {
for _, t := range h.Tags {
if t == activeTag {
filtered = append(filtered, h)
break
}
}
}
hosts = filtered
}
// Parse query-string filter + sort (NS-04). The tag chip-row is
// kept as ?tag= for backwards compat with existing bookmarks.
filter := parseDashboardFilter(r.URL.Query())
hosts := filterAndSortDashboardHosts(allHosts, filter)
knownTags, _ := s.deps.Store.DistinctHostTags(r.Context())
summary, err := s.deps.Store.FleetSummary(r.Context())
@@ -282,8 +297,11 @@ func (s *Server) handleUIDashboard(w stdhttp.ResponseWriter, r *stdhttp.Request)
Summary: summary,
PendingHosts: pending,
CritOpenCount: critOpenCount,
ActiveTag: activeTag,
ActiveTag: filter.Tag,
KnownTags: knownTags,
Filter: filter,
RefreshURL: "/?" + filter.encode(),
SortURL: buildDashboardSortURLs(filter),
}
if err := s.deps.UI.Render(w, "dashboard", view); err != nil {
slog.Error("ui: render dashboard", "err", err)
@@ -291,6 +309,182 @@ func (s *Server) handleUIDashboard(w stdhttp.ResponseWriter, r *stdhttp.Request)
}
}
// parseDashboardFilter reads the query string into a dashboardFilter,
// normalising defaults (sort=name, dir=asc) so the rest of the
// pipeline doesn't have to special-case empty values.
func parseDashboardFilter(q url.Values) dashboardFilter {
f := dashboardFilter{
Search: strings.TrimSpace(q.Get("q")),
Status: q.Get("status"),
RepoStatus: q.Get("repo_status"),
Tag: q.Get("tag"),
Sort: q.Get("sort"),
Dir: q.Get("dir"),
}
if f.Sort == "" {
f.Sort = "name"
}
if f.Dir != "asc" && f.Dir != "desc" {
f.Dir = "asc"
}
return f
}
// encode rebuilds the filter as a URL-safe query string. Used for the
// live-refresh URL and for column-sort link composition.
func (f dashboardFilter) encode() string {
v := url.Values{}
if f.Search != "" {
v.Set("q", f.Search)
}
if f.Status != "" {
v.Set("status", f.Status)
}
if f.RepoStatus != "" {
v.Set("repo_status", f.RepoStatus)
}
if f.Tag != "" {
v.Set("tag", f.Tag)
}
if f.Sort != "" && f.Sort != "name" {
v.Set("sort", f.Sort)
}
if f.Dir != "" && f.Dir != "asc" {
v.Set("dir", f.Dir)
}
return v.Encode()
}
// filterAndSortDashboardHosts narrows a host list by the active
// filter dimensions, then sorts it by the chosen column/direction.
// Filter precedence: search ∧ status ∧ repo_status ∧ tag — every
// active filter has to match. Sort runs after filtering.
func filterAndSortDashboardHosts(hosts []store.Host, f dashboardFilter) []store.Host {
out := make([]store.Host, 0, len(hosts))
q := strings.ToLower(f.Search)
for _, h := range hosts {
if q != "" && !strings.Contains(strings.ToLower(h.Name), q) {
continue
}
if f.Status != "" {
switch f.Status {
case "online", "offline":
if h.Status != f.Status {
continue
}
case "never_seen":
if h.LastSeenAt != nil {
continue
}
}
}
if f.RepoStatus != "" {
// Backward compatibility: rows pre-NS-03 have an empty
// status string in memory if loaded before the migration
// scan added the column; treat that as "unknown".
rs := h.RepoStatus
if rs == "" {
rs = "unknown"
}
if rs != f.RepoStatus {
continue
}
}
if f.Tag != "" {
match := false
for _, t := range h.Tags {
if t == f.Tag {
match = true
break
}
}
if !match {
continue
}
}
out = append(out, h)
}
sortDashboardHosts(out, f.Sort, f.Dir)
return out
}
// sortDashboardHosts applies the column-by-direction sort in place.
// Unknown column key falls back to name asc — defensive default that
// keeps a malformed bookmarked URL from rendering an empty table.
func sortDashboardHosts(hosts []store.Host, col, dir string) {
less := func(i, j int) bool {
a, b := hosts[i], hosts[j]
switch col {
case "os":
if a.OS != b.OS {
return a.OS < b.OS
}
case "status":
if a.Status != b.Status {
return a.Status < b.Status
}
case "repo_status":
if a.RepoStatus != b.RepoStatus {
return a.RepoStatus < b.RepoStatus
}
case "restic":
if a.ResticVersion != b.ResticVersion {
return a.ResticVersion < b.ResticVersion
}
case "snapshot_count":
if a.SnapshotCount != b.SnapshotCount {
return a.SnapshotCount < b.SnapshotCount
}
case "repo_size":
if a.RepoSizeBytes != b.RepoSizeBytes {
return a.RepoSizeBytes < b.RepoSizeBytes
}
case "last_backup":
at, bt := time.Time{}, time.Time{}
if a.LastBackupAt != nil {
at = *a.LastBackupAt
}
if b.LastBackupAt != nil {
bt = *b.LastBackupAt
}
if !at.Equal(bt) {
return at.Before(bt)
}
}
// Stable secondary key: name.
return a.Name < b.Name
}
if dir == "desc" {
sort.Slice(hosts, func(i, j int) bool { return less(j, i) })
} else {
sort.Slice(hosts, less)
}
}
// buildDashboardSortURLs precomputes the link target for every
// sortable column header. Clicking the active column toggles
// direction; clicking a different column starts ascending.
func buildDashboardSortURLs(active dashboardFilter) map[string]string {
cols := []string{"name", "os", "status", "repo_status", "restic", "snapshot_count", "repo_size", "last_backup"}
out := make(map[string]string, len(cols))
for _, c := range cols {
f := active
f.Sort = c
if active.Sort == c && active.Dir == "asc" {
f.Dir = "desc"
} else {
f.Dir = "asc"
}
enc := f.encode()
if enc == "" {
out[c] = "/"
} else {
out[c] = "/?" + enc
}
}
return out
}
// Per-host Run-now and manual Init-repo were retired by the P2 redesign.
// Run-now lives at POST /hosts/{id}/source-groups/{gid}/run; init runs
// automatically on the agent's first WS connect after enrolment. Both
@@ -324,6 +518,23 @@ type addHostPage struct {
Paths string
ServerURL string
Error string
// Outstanding tokens (NS-02) — every still-valid (un-consumed,
// un-expired) enrolment token, surfaced so an operator who closed
// the install snippet tab can recover via Regenerate or revoke.
OutstandingTokens []addHostOutstandingToken
}
// addHostOutstandingToken is a UI-shaped projection of a row from
// store.ListOutstandingEnrollmentTokens with the repo URL already
// decrypted-and-redacted (no creds reach the browser).
type addHostOutstandingToken struct {
TokenHash string // full hex hash; opaque path param for actions
ShortHash string // first 12 chars of TokenHash for display
CreatedAt time.Time
ExpiresAt time.Time
RepoURL string // redacted (no embedded creds)
InitialPaths []string
}
// pendingHostPage is the GET /hosts/pending/{token} view. Lives
@@ -347,13 +558,54 @@ func (s *Server) handleUIAddHostGet(w stdhttp.ResponseWriter, r *stdhttp.Request
}
view := s.baseView(r, u)
view.Title = "Add host · restic-manager"
view.Page = addHostPage{ServerURL: s.publicURL(r)}
view.Page = addHostPage{
ServerURL: s.publicURL(r),
OutstandingTokens: s.loadOutstandingTokensForUI(r),
}
if err := s.deps.UI.Render(w, "add_host", view); err != nil {
slog.Error("ui: render add_host", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
}
}
// loadOutstandingTokensForUI fetches the still-valid enrolment tokens
// and decrypts each row's repo URL so the Add-host page can show a
// recoverable list. Decryption failures (rotated key etc.) are logged
// and surfaced as "(decrypt failed)" rather than crashing the page.
func (s *Server) loadOutstandingTokensForUI(r *stdhttp.Request) []addHostOutstandingToken {
rows, err := s.deps.Store.ListOutstandingEnrollmentTokens(r.Context())
if err != nil {
slog.Warn("ui add_host: list outstanding tokens", "err", err)
return nil
}
out := make([]addHostOutstandingToken, 0, len(rows))
for _, row := range rows {
short := row.TokenHash
if len(short) > 12 {
short = short[:12]
}
entry := addHostOutstandingToken{
TokenHash: row.TokenHash,
ShortHash: short,
CreatedAt: row.CreatedAt,
ExpiresAt: row.ExpiresAt,
InitialPaths: row.InitialPaths,
}
if row.EncRepoCreds != "" {
plain, derr := s.deps.AEAD.Decrypt(row.EncRepoCreds, []byte("token:"+row.TokenHash))
if derr != nil {
entry.RepoURL = "(decrypt failed — key rotation?)"
} else {
var blob repoCredsBlob
_ = json.Unmarshal(plain, &blob)
entry.RepoURL = restic.RedactURL(blob.RepoURL)
}
}
out = append(out, entry)
}
return out
}
// handleUIAddHostPost validates the form, mints the enrolment token
// (with encrypted repo creds), and 303-redirects to the persistent
// pending-host page. On validation errors we re-render the form
@@ -922,6 +1174,12 @@ func (s *Server) handleUILoginGet(w stdhttp.ResponseWriter, r *stdhttp.Request)
stdhttp.Redirect(w, r, "/", stdhttp.StatusSeeOther)
return
}
// First-run: no users + token still in memory ⇒ funnel the visitor
// to the bootstrap page so they don't have to know the API exists.
if s.bootstrapAvailable(r) {
stdhttp.Redirect(w, r, "/bootstrap", stdhttp.StatusSeeOther)
return
}
view := ui.ViewData{
Version: s.version(),
OIDCError: r.URL.Query().Get("oidc_error"),