P1 polish: agent-as-root, init-repo flow, rest creds passthrough, UX fixes

Cohesive batch from a smoke-test session against a real rest-server.
Themed bullets:

* Agent runs as root, sandboxed via systemd. CapabilityBoundingSet
  drops to CAP_DAC_READ_SEARCH + restore caps; ProtectSystem=strict
  with ReadWritePaths confined to /etc + /var/lib/restic-manager;
  NoNewPrivileges blocks escalation. Install script no longer
  creates a service user. spec.md §4.2 / §14.1 / §14.3 explain the
  rationale (matches UrBackup / Veeam / Bareos defaults; trying to
  back up "everything" as an unprivileged user creates silent skips
  on /home, /root, /var/lib/* with no upside vs the threat model
  the agent already implies).

* Init-repo end-to-end. New JobKind="init" wired through agent
  runner, restic.Env.RunInit, server dispatcher, and a UI button
  (red "Initialise repo" in the run-now panel). hosts.repo_initialised_at
  flips on init success, on backup success, or on a non-empty
  snapshots.report. The "Run now" / "Init" / "Retry" branching now
  drives both the dashboard host row and the host-detail panel.
  Migrations 0004 (column), 0005 (jobs.kind CHECK widened — using
  the safe create-new-then-rename pattern; first version corrupted
  job_logs.job_id FK), 0006 (cleans up job_logs FK on already-
  affected DBs).

* rest-server creds embedded at exec time only. restic.Env gains
  RepoUsername; mergeRestCreds() builds the user:pass@-prefixed URL
  inside envSlice() and never assigns it back to the struct, so
  nothing slog-able ever sees the cleartext form. RedactURL helper
  for any future surface that needs to log a URL safely. Both
  helpers tested.

* Add-host UX. Repo password is now optional — server mints a
  24-byte URL-safe random one and surfaces it once, alongside an
  htpasswd snippet ("echo PASS | htpasswd -B -i ... USERNAME") so
  the operator pastes one command on the rest-server host and one
  on the endpoint. Result page also links the install snippet at
  /install/install.sh (was /install.sh — 404'd before) and pipes
  to bash (not sh — script uses set -o pipefail and other
  bashisms; on Debian/Ubuntu sh is dash).

* Late-subscriber race in JobHub. A fast-failing job could finish
  (DB write + Broadcast) before the browser's HX-Redirect → page
  load → WS-connect path completed, so the JS sat forever waiting
  on a job.finished that already passed. JobHub split into
  Register + Send + Run; handleJobStream now subscribes first,
  re-fetches the job, and sends a synthetic job.finished if the
  state is already terminal.

* HTMX error visibility. New toast partial listens to
  htmx:responseError and surfaces the response body as a
  bottom-right toast — every server-side validation error now
  becomes visible without per-handler JS wiring. Also handles
  custom rm:toast events for future server-pushed notifications
  via the HX-Trigger header. Themed via existing CSS vars.

* Dashboard rows are now whole-row clickable to host detail
  (CSS card-link pattern: absolute-positioned anchor + .row-action
  z-index restoration so the action button stays clickable).
  "View →" on a running job links to /jobs/<id> rather than
  /hosts/<id> since the row click already covers the host page.

* "Run first" / "Run first backup" → "Run now" everywhere for
  consistency.

* runbook (docs/e2e-smoke.md) updated — live-log streaming step
  now reflects P1-26; mentions the browser-driven Run-now flow.

* _diag/dump-creds — moved out of cmd/ so go build doesn't pick
  it up; .gitignore now excludes /_diag/ entirely.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-02 11:02:12 +01:00
parent 8aa635f0c1
commit c8ead66f08
29 changed files with 885 additions and 129 deletions
+133 -3
View File
@@ -1,6 +1,8 @@
package http
import (
"crypto/rand"
"encoding/base64"
"errors"
"io/fs"
"log/slog"
@@ -178,6 +180,12 @@ func (s *Server) handleUIRunBackup(w stdhttp.ResponseWriter, r *stdhttp.Request)
stdhttp.StatusBadRequest)
return
}
if host.RepoInitialisedAt == nil {
stdhttp.Error(w,
"this host's repo hasn't been initialised yet — click Initialise repo first",
stdhttp.StatusBadRequest)
return
}
res, status, code, msg := s.dispatchJob(r.Context(), storeUser, hostID, api.JobBackup, host.DefaultPaths)
if code != "" {
stdhttp.Error(w, msg, status)
@@ -197,6 +205,47 @@ func (s *Server) handleUIRunBackup(w stdhttp.ResponseWriter, r *stdhttp.Request)
stdhttp.Redirect(w, r, target, stdhttp.StatusSeeOther)
}
// handleUIInitRepo dispatches a one-shot `restic init` job for a
// host. Surfaced in the run-now panel as a red "Initialise repo"
// button when host.repo_initialised_at IS NULL. On success it
// redirects to the live log page just like Run-now.
func (s *Server) handleUIInitRepo(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
hostID := chi.URLParam(r, "id")
if hostID == "" {
stdhttp.Error(w, "missing host id", stdhttp.StatusBadRequest)
return
}
storeUser, _, err := s.userByID(r, u.ID)
if err != nil {
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
if _, err := s.deps.Store.GetHost(r.Context(), hostID); err != nil {
if errors.Is(err, store.ErrNotFound) {
stdhttp.NotFound(w, r)
return
}
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
res, status, code, msg := s.dispatchJob(r.Context(), storeUser, hostID, api.JobInit, nil)
if code != "" {
stdhttp.Error(w, msg, status)
return
}
target := "/jobs/" + res.JobID
if r.Header.Get("HX-Request") == "true" {
w.Header().Set("HX-Redirect", target)
w.WriteHeader(stdhttp.StatusOK)
return
}
stdhttp.Redirect(w, r, target, stdhttp.StatusSeeOther)
}
// addHostPage carries the form state into the Add host template.
// In State A (form), Token is empty. In State B (result), Token is
// populated and the template renders the install command.
@@ -223,6 +272,16 @@ type addHostPage struct {
// install command panel instead of the form.
Token string
ExpiresAt time.Time
// RepoPassword is the password the agent will use against the
// rest-server. When the operator left the password field blank
// we generate one server-side; PasswordGenerated tracks which
// path produced it so the result page can label it appropriately.
// Either way it's surfaced on the result page exactly once,
// inside the htpasswd snippet — same one-time-view rule as the
// enrolment token. Reload = gone.
RepoPassword string
PasswordGenerated bool
}
// handleUIAddHostGet renders the empty Add host form.
@@ -264,8 +323,22 @@ func (s *Server) handleUIAddHostPost(w stdhttp.ResponseWriter, r *stdhttp.Reques
if page.Hostname == "" {
page.Error = "Hostname is required."
} else if page.RepoURL == "" || repoPassword == "" {
page.Error = "Repo URL and password are both required so the agent can back up the moment it comes online."
} else if page.RepoURL == "" {
page.Error = "Repo URL is required so the agent can back up the moment it comes online."
}
// If the operator didn't type a password, mint one. We surface it
// once on the result page (inside the htpasswd snippet) so they
// can paste it into the rest-server's htpasswd file.
if page.Error == "" && repoPassword == "" {
gen, err := generateRepoPassword()
if err != nil {
slog.Error("ui add_host: generate repo password", "err", err)
page.Error = "Couldnt generate a password — see the server log for details."
} else {
repoPassword = gen
page.PasswordGenerated = true
}
}
defaultPaths := splitPaths(page.Paths)
@@ -276,6 +349,7 @@ func (s *Server) handleUIAddHostPost(w stdhttp.ResponseWriter, r *stdhttp.Reques
case nil:
page.Token = token
page.ExpiresAt = expires
page.RepoPassword = repoPassword
case errMissingRepoCreds:
page.Error = "Repo URL and password are both required."
default:
@@ -355,6 +429,18 @@ func (s *Server) handleUIHostDetail(w stdhttp.ResponseWriter, r *stdhttp.Request
}
}
// generateRepoPassword returns a 24-byte URL-safe random string for
// use as a per-host rest-server password. URL-safe alphabet keeps
// it shell-safe inside single quotes — important since the operator
// pastes it into an `htpasswd -i` invocation on the rest-server.
func generateRepoPassword() (string, error) {
var buf [24]byte
if _, err := rand.Read(buf[:]); err != nil {
return "", err
}
return base64.RawURLEncoding.EncodeToString(buf[:]), nil
}
// splitPaths parses the textarea content into a clean []string —
// one path per line, leading/trailing whitespace trimmed, blanks
// dropped.
@@ -479,7 +565,51 @@ func (s *Server) handleJobStream(w stdhttp.ResponseWriter, r *stdhttp.Request) {
// Wrap so we get the same Send semantics as the agent path.
c := ws.NewConn("browser-"+jobID, conn)
s.deps.JobHub.Subscribe(r.Context(), jobID, c)
// Register first so future broadcasts reach us, then re-fetch the
// job to close the late-subscriber race: a fast-failing job can
// finish (DB write + Broadcast) before the browser's WS hop
// completes, leaving the JS waiting forever for a job.finished
// that already passed. If the job is already terminal here, prime
// the subscriber with a synthetic job.finished so the JS reloads.
sub := s.deps.JobHub.Register(jobID)
if cur, gerr := s.deps.Store.GetJob(r.Context(), jobID); gerr == nil && isTerminalJobStatus(cur.Status) {
if env, ferr := buildSyntheticJobFinished(cur); ferr == nil {
sub.Send(env)
}
}
sub.Run(r.Context(), c)
}
func isTerminalJobStatus(s string) bool {
switch api.JobStatus(s) {
case api.JobSucceeded, api.JobFailed, api.JobCancelled:
return true
}
return false
}
func buildSyntheticJobFinished(job *store.Job) (api.Envelope, error) {
var fin time.Time
if job.FinishedAt != nil {
fin = *job.FinishedAt
}
exit := 0
if job.ExitCode != nil {
exit = *job.ExitCode
}
errMsg := ""
if job.Error != nil {
errMsg = *job.Error
}
return api.Marshal(api.MsgJobFinished, "", api.JobFinishedPayload{
JobID: job.ID,
Status: api.JobStatus(job.Status),
ExitCode: exit,
FinishedAt: fin,
Stats: job.Stats,
Error: errMsg,
})
}
// userByID fetches the full store.User the UI session represents.