testing: bootstrap UI, agent reliability, NS-01..04 + alert username
CI / Test (rest) (pull_request) Successful in 29s
CI / Lint (pull_request) Successful in 32s
CI / Build (windows/amd64) (pull_request) Successful in 22s
CI / Test (store) (pull_request) Successful in 1m22s
CI / Test (server-http) (pull_request) Successful in 1m30s
CI / Build (linux/amd64) (pull_request) Successful in 22s
CI / Build (linux/arm64) (pull_request) Successful in 41s

Smoothes the rough edges that came up exercising a live deployment.

First-run bootstrap UI: /bootstrap renders a username + password form
that uses the in-memory token directly (operator no longer copies it
out of the log); /login redirects there while bootstrap is available.

Agent reliability: failJob synthetic envelopes so command.run early
returns no longer hang the server-side job; runtime probe of restic
restore --help drives --no-ownership instead of version sniffing
(0.18.x had it removed). Server unit re-shaped: ProtectSystem=full
plus ReadWritePaths=/etc/restic-manager, no ProtectHome — restore
can now write anywhere a user might want.

Restore wizard: default target is /root/rm-restore/<job-id>/ with
clearer help text. Re-init confirm input uses .field (was .input,
which doesn't exist — text was invisible).

NS-01 host delete: store DeleteHost, admin-band /hosts/{id}/delete
with hostname-confirm danger zone, audit, FK cascade, live WS close.

NS-02 enrollment-token recovery: outstanding-tokens panel on
/hosts/new, regenerate (preserves attachments) and revoke handlers
+ audit, store-level ListOutstandingEnrollmentTokens and
DeleteEnrollmentToken.

NS-03 repo init / probe surface: migration 0020 adds
hosts.repo_status + repo_status_error; WS handler projects every
init job's outcome onto the host row (idempotent already-initialised
collapses to ready); creds-save resets status and dispatches a fresh
probe; /hosts/{id}/repo/probe retry endpoint with banner.

NS-04 dashboard live + sort + filter: query-string filter
(q/status/repo_status/tag/sort/dir), 5s htmx live poll mirroring the
alerts pattern with a localStorage live toggle, sortable column
headers, filter row + clear.

Alerts page: ack'd-by line resolves user_id ULID to username.

Compose.yaml ignored — host-specific.
This commit is contained in:
2026-05-05 22:03:15 +01:00
parent b91fe56c83
commit 3800b34a2b
40 changed files with 2135 additions and 109 deletions
+6
View File
@@ -26,6 +26,12 @@ coverage.html
.env.local
*.local
# Local docker-compose for the dev/test bench. Has host-specific IPs,
# hostnames, and ports — never committed; the canonical reference
# deployment lives in deploy/.
/compose.yaml
/compose.override.yaml
# Local diagnostic helpers (never shipped). Go's build tooling already
# skips paths beginning with _ or ., but ignore explicitly so nothing
# checked in here can leak into a release tarball.
+68 -22
View File
@@ -115,6 +115,12 @@ func run() error {
resticBin, _ := restic.Locate(cfg.ResticPath) // empty is fine; commands fail with a clear error later
// Probe the actual restic binary for restore-flag support. We used
// to gate --no-ownership on a SemVer comparison (added in 0.17),
// but a restic 0.18.1 build was observed in the wild that still
// rejects the flag. The help text is the only reliable signal.
resticSupportsNoOwnership := restic.SupportsRestoreNoOwnership(ctx, resticBin)
// Open the secrets store. If the agent is enrolled but has no
// secrets key yet (legacy YAML), mint one and migrate any
// plaintext repo fields into the encrypted blob.
@@ -139,10 +145,11 @@ func run() error {
}
d := &dispatcher{
resticBin: resticBin,
resticVer: snap.ResticVersion,
secrets: sec,
scheduler: scheduler.New(),
resticBin: resticBin,
resticVer: snap.ResticVersion,
resticSupportsNoOwnership: resticSupportsNoOwnership,
secrets: sec,
scheduler: scheduler.New(),
}
if err := wsclient.Run(ctx, wsCfg, d.handle); err != nil {
return fmt.Errorf("ws run: %w", err)
@@ -204,10 +211,11 @@ func openSecretsStore(cfg *config.Config) (*secrets.Store, error) {
// secrets store on each job — config.update writes through to disk,
// so a job dispatched in the same session sees the latest values.
type dispatcher struct {
resticBin string
resticVer string // e.g. "0.17.1"; empty if restic isn't installed yet
secrets *secrets.Store
scheduler *scheduler.Scheduler
resticBin string
resticVer string // e.g. "0.17.1"; empty if restic isn't installed yet
resticSupportsNoOwnership bool // captured at startup from `restic restore --help`
secrets *secrets.Store
scheduler *scheduler.Scheduler
// Bandwidth caps in KB/s pushed via config.update. Mutated under
// bwMu by the config.update handler; read by runJob when building
@@ -464,17 +472,47 @@ func (d *dispatcher) handleTreeList(ctx context.Context, reqID string, p api.Tre
reply(api.TreeListResultPayload{Entries: apiEntries})
}
// failJob ships a synthetic job.started + job.finished(failed) pair
// for a command.run we couldn't even spawn locally — missing restic
// binary, missing credentials, or a malformed payload. Without these
// envelopes the server has no way to know the job will never produce
// output: the row sits in "running", the live stream stays stuck on
// "awaiting agent output," and a subsequent command.cancel arrives
// for a job_id the agent never registered (we log "unknown job"
// because trackJob was never called). Sending a terminal envelope
// here closes the loop on both fronts.
func failJob(p api.CommandRunPayload, tx wsclient.Sender, errMsg string) {
now := time.Now().UTC()
if startedEnv, err := api.Marshal(api.MsgJobStarted, p.JobID, api.JobStartedPayload{
JobID: p.JobID, Kind: p.Kind, StartedAt: now,
}); err == nil {
_ = tx.Send(startedEnv)
}
if finEnv, err := api.Marshal(api.MsgJobFinished, p.JobID, api.JobFinishedPayload{
JobID: p.JobID,
Status: api.JobFailed,
ExitCode: -1,
FinishedAt: now,
Error: errMsg,
}); err == nil {
_ = tx.Send(finEnv)
}
}
// runJob spawns a runner for one job. We launch a goroutine so the
// WS read loop keeps draining messages while restic chugs along.
func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsclient.Sender) error {
if d.resticBin == "" {
failJob(p, tx, "restic binary not located on this agent")
return fmt.Errorf("restic binary not located on this agent")
}
creds, err := d.secrets.Load()
if err != nil {
failJob(p, tx, "load repo credentials: "+err.Error())
return fmt.Errorf("load repo credentials: %w", err)
}
if creds.Empty() {
failJob(p, tx, "repo credentials not configured (waiting for server config.update push)")
return fmt.Errorf("repo credentials not configured (waiting for server config.update push)")
}
// r is the everyday runner — bound to the host's repo
@@ -498,13 +536,14 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
}
r := runner.New(runner.Config{
ResticBin: d.resticBin,
ResticVersion: d.resticVer,
RepoURL: creds.URL,
RepoUsername: creds.Username,
RepoPassword: creds.Password,
LimitUploadKBps: upKBps,
LimitDownloadKBps: downKBps,
ResticBin: d.resticBin,
ResticVersion: d.resticVer,
RepoURL: creds.URL,
RepoUsername: creds.Username,
RepoPassword: creds.Password,
SupportsRestoreNoOwnership: d.resticSupportsNoOwnership,
LimitUploadKBps: upKBps,
LimitDownloadKBps: downKBps,
}, tx, time.Second)
// spawn wraps the kind-specific goroutine: derives a per-job
@@ -560,6 +599,7 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
// policy fallback was specced but skipped — see the
// Phase 5 plan rationale and version.go's lockstep-deploy
// note for why.
failJob(p, tx, "forget: command.run carried no forget_groups (server didn't populate them)")
return fmt.Errorf("forget: command.run carried no forget_groups (server didn't populate them)")
}
groups := make([]restic.ForgetGroup, 0, len(p.ForgetGroups))
@@ -594,13 +634,14 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
runCreds = ac
}
prr := runner.New(runner.Config{
ResticBin: d.resticBin,
ResticVersion: d.resticVer,
RepoURL: runCreds.URL,
RepoUsername: runCreds.Username,
RepoPassword: runCreds.Password,
LimitUploadKBps: upKBps,
LimitDownloadKBps: downKBps,
ResticBin: d.resticBin,
ResticVersion: d.resticVer,
RepoURL: runCreds.URL,
RepoUsername: runCreds.Username,
RepoPassword: runCreds.Password,
SupportsRestoreNoOwnership: d.resticSupportsNoOwnership,
LimitUploadKBps: upKBps,
LimitDownloadKBps: downKBps,
}, tx, time.Second)
slog.Info("agent: accepting prune job", "job_id", p.JobID, "admin_creds", p.RequiresAdminCreds)
spawn("prune", func(jobCtx context.Context) error {
@@ -622,13 +663,16 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
})
case api.JobRestore:
if p.Restore == nil {
failJob(p, tx, "restore: command.run carried no restore payload")
return fmt.Errorf("restore: command.run carried no restore payload")
}
rp := *p.Restore
if rp.SnapshotID == "" {
failJob(p, tx, "restore: snapshot_id is required")
return fmt.Errorf("restore: snapshot_id is required")
}
if !rp.InPlace && rp.TargetDir == "" {
failJob(p, tx, "restore: target_dir required for non-in-place restore")
return fmt.Errorf("restore: target_dir required for non-in-place restore")
}
slog.Info("agent: accepting restore job",
@@ -639,6 +683,7 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
})
case api.JobDiff:
if p.Diff == nil || p.Diff.SnapshotA == "" || p.Diff.SnapshotB == "" {
failJob(p, tx, "diff: command.run carried incomplete diff payload")
return fmt.Errorf("diff: command.run carried incomplete diff payload")
}
dp := *p.Diff
@@ -648,6 +693,7 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
return r.RunDiff(jobCtx, p.JobID, dp.SnapshotA, dp.SnapshotB)
})
default:
failJob(p, tx, fmt.Sprintf("kind %q not implemented on this agent", p.Kind))
return fmt.Errorf("kind %q not implemented yet (Phase 2 lands the rest)", p.Kind)
}
return nil
+11
View File
@@ -34,6 +34,13 @@ RUN GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
-o /out/restic-manager-server \
./cmd/server
# Empty /data skeleton so the runtime image carries an existing,
# nonroot-owned mount point. Docker copies that ownership onto a
# named volume the first time it's created, which avoids the
# "permission denied" trap on /data/secret.key when the operator
# uses a default `volumes: { rm-data: {} }` declaration.
RUN mkdir -p /out/data
# Agents: identical across image arches — an arm64 server image still
# ships an amd64 agent binary for amd64 endpoints to download.
RUN mkdir -p /out/agent-binaries && \
@@ -72,5 +79,9 @@ COPY --chmod=0755 deploy/install/install.sh /opt/restic-manager/dist/install/ins
COPY --chmod=0644 deploy/install/install.ps1 /opt/restic-manager/dist/install/install.ps1
COPY --chmod=0644 deploy/install/restic-manager-agent.service /opt/restic-manager/dist/install/restic-manager-agent.service
# Pre-created data dir owned by nonroot so a fresh named volume
# inherits the right ownership.
COPY --from=build --chown=nonroot:nonroot /out/data /data
EXPOSE 8443
ENTRYPOINT ["/usr/local/bin/restic-manager-server"]
+4 -6
View File
@@ -49,12 +49,10 @@ detect_arch() {
ensure_dirs() {
install -d -m 0700 -o root -g root "$RM_CONFIG_DIR"
install -d -m 0700 -o root -g root "$RM_STATE_DIR"
# Default new-directory restore target: $HOME/rm-restore. Pre-create
# so the systemd unit's ReadWritePaths bind-mount applies cleanly
# (paths that don't exist when systemd starts get a soft-fail
# because of the '-' prefix, but the agent then can't mkdir into
# the read-only /root). Mode 0700 + root-owned matches the threat
# model — files restored here are operator-readable as root.
# Default new-directory restore target: $HOME/rm-restore. With the
# current unit (ProtectSystem=full, no ReadWritePaths pin) the agent
# can mkdir anywhere on real filesystems, so this is just a courtesy
# pre-create so the wizard's default lands in a tidy spot.
install -d -m 0700 -o root -g root /root/rm-restore
}
+19 -10
View File
@@ -33,17 +33,26 @@ CapabilityBoundingSet=CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_CHOWN
AmbientCapabilities=CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_CHOWN
# Hardening — blocks privilege escalation even from root, and
# confines writes / network / kernel access to what restic actually
# needs. Filesystem reads stay open: that's the whole job.
# confines kernel / namespace / privilege surface. Filesystem reads
# stay open (that's the whole job) and restore writes are
# unrestricted: a backup tool whose entire purpose is "put files
# back where they belong" can't have ProtectHome=read-only or
# ProtectSystem=strict without breaking on the first cross-user
# restore. ProtectSystem=full keeps /usr, /boot, /efi read-only so a
# compromised agent can't swap out /usr/bin/restic or drop a kernel
# module, while leaving /home, /root, /var, /opt, /srv, /tmp etc.
# writable for arbitrary restore targets. The agent is treated as a
# high-trust component (it runs operator hooks as root and holds
# repo credentials); the residual hardening is about kernel + privesc
# protection, not write confinement.
NoNewPrivileges=true
ProtectSystem=strict
# /etc/restic-manager: agent.yaml + secrets.enc.
# /var/lib/restic-manager: agent state (currently unused but reserved).
# /root/rm-restore: default target for new-directory restores
# ($HOME/rm-restore/<job-id>/ resolves here for User=root).
# ReadWritePaths overrides ProtectHome=read-only on this subdir only.
ReadWritePaths=/etc/restic-manager /var/lib/restic-manager -/root/rm-restore
ProtectHome=read-only
ProtectSystem=full
# ProtectSystem=full mounts /usr, /boot, /efi *and* /etc read-only.
# The agent rewrites /etc/restic-manager/agent.yaml on enrolment and
# whenever a new SecretsKey is minted, so we need a targeted
# write-exemption for that dir. No exemption for the rest of /etc:
# the agent has no business editing /etc/passwd, /etc/sudoers, etc.
ReadWritePaths=/etc/restic-manager
ProtectHostname=true
ProtectKernelTunables=true
ProtectKernelModules=true
+13 -7
View File
@@ -32,6 +32,11 @@ type Config struct {
RepoUsername string
RepoPassword string
// SupportsRestoreNoOwnership comes from a startup probe of
// `restic restore --help`; gates the new-dir-restore flag without
// relying on version sniffing.
SupportsRestoreNoOwnership bool
// Bandwidth caps in KB/s applied to every restic invocation.
// <=0 means "no cap". Per-job override: callers that build a
// runner per-dispatch can pass the override value here directly.
@@ -61,13 +66,14 @@ func New(cfg Config, tx Sender, progressMinPeriod time.Duration) *Runner {
// resticEnv builds the shared restic.Env from r.cfg.
func (r *Runner) resticEnv() restic.Env {
return restic.Env{
Bin: r.cfg.ResticBin,
Version: r.cfg.ResticVersion,
RepoURL: r.cfg.RepoURL,
RepoUsername: r.cfg.RepoUsername,
RepoPassword: r.cfg.RepoPassword,
LimitUploadKBps: r.cfg.LimitUploadKBps,
LimitDownloadKBps: r.cfg.LimitDownloadKBps,
Bin: r.cfg.ResticBin,
Version: r.cfg.ResticVersion,
RepoURL: r.cfg.RepoURL,
RepoUsername: r.cfg.RepoUsername,
RepoPassword: r.cfg.RepoPassword,
SupportsRestoreNoOwnership: r.cfg.SupportsRestoreNoOwnership,
LimitUploadKBps: r.cfg.LimitUploadKBps,
LimitDownloadKBps: r.cfg.LimitDownloadKBps,
}
}
+7 -7
View File
@@ -87,13 +87,13 @@ func (e Env) RunRestore(ctx context.Context, snapshotID string, paths []string,
}
}
args = append(args, "--target", target)
// --no-ownership was added in restic 0.17. Older versions reject
// the flag with "unknown flag: --no-ownership". For new-dir
// restores we want the files owned by the agent user (operator
// can cp them without juggling chown), so pass the flag iff the
// running restic supports it. In-place restores always preserve
// ownership — that's the whole point of in-place.
if !inPlace && e.AtLeastVersion(0, 17) {
// --no-ownership is nominally a restic 0.17+ flag, but at least
// one downstream 0.18.1 build still rejects it. We rely on a
// runtime probe captured at agent startup (see
// SupportsRestoreNoOwnership) rather than version sniffing.
// In-place restores always preserve ownership — that's the whole
// point of in-place — so we only add the flag for new-dir mode.
if !inPlace && e.SupportsRestoreNoOwnership {
args = append(args, "--no-ownership")
}
for _, p := range paths {
+37 -6
View File
@@ -15,6 +15,26 @@ import (
"time"
)
// SupportsRestoreNoOwnership probes the running restic for the
// `--no-ownership` flag on the `restore` subcommand. Some restic
// builds (≥ 0.17 in theory; observed missing on a downstream 0.18.1)
// do not expose it, so we ask the binary directly rather than
// inferring from the version string. Empty `bin` or any failure to
// run the help command returns false — the caller stays on the
// conservative path of not adding the flag.
func SupportsRestoreNoOwnership(ctx context.Context, bin string) bool {
if bin == "" {
return false
}
probeCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
out, err := exec.CommandContext(probeCtx, bin, "restore", "--help").CombinedOutput()
if err != nil {
return false
}
return strings.Contains(string(out), "--no-ownership")
}
// Locate resolves the path to the restic binary. Honour an explicit
// override if provided, else fall back to PATH.
func Locate(override string) (string, error) {
@@ -49,6 +69,15 @@ type Env struct {
ExtraEnv map[string]string // any other RESTIC_* / passthrough
WorkDir string // CWD; default = current
// SupportsRestoreNoOwnership records whether the running restic's
// `restore --help` advertises the --no-ownership flag. The flag was
// added in 0.17, but at least one downstream build of 0.18.1 still
// rejects it ("unknown flag: --no-ownership") — version sniffing
// proved unreliable, so the agent now probes for the actual flag at
// startup (see internal/restic.SupportsRestoreNoOwnership) and
// passes the resulting boolean down here.
SupportsRestoreNoOwnership bool
// Bandwidth caps in KB/s. <=0 means "no cap" (omit the flag).
// Emitted as restic global flags --limit-upload / --limit-download
// before the subcommand on every invocation.
@@ -507,12 +536,14 @@ func pumpPlain(r io.Reader, stream string, handle LineHandler) error {
// on one or the other for its cache dir; without it the command
// fails before ever talking to the repo.
//
// Default to /var/lib/restic-manager — that's in the systemd unit's
// ReadWritePaths and survives ProtectHome=read-only. We do NOT fall
// back to the parent's HOME env var: the agent runs as root with
// HOME=/root, but ProtectHome makes /root read-only, so restic's
// `mkdir /root/.cache/restic` fails. ExtraEnv overrides win for
// callers that explicitly want a different cache location.
// Default to /var/lib/restic-manager. The unit no longer pins
// ProtectHome=read-only (a backup tool needs to restore anywhere),
// but the explicit HOME stays for two reasons: the parent's HOME
// can be unset under unusual init shapes, and pinning the cache
// under a known agent-owned dir keeps restic's metadata isolated
// from the actual operator home dirs that the agent can now write
// to. ExtraEnv overrides win for callers that want a different
// cache location.
func (e Env) envSlice() []string {
home := "/var/lib/restic-manager"
if h, ok := e.ExtraEnv["HOME"]; ok && h != "" {
+157
View File
@@ -0,0 +1,157 @@
// bootstrap_handler.go — public landing page for the first-run admin
// flow. While the server has no users and still holds the in-memory
// one-shot bootstrap token printed at startup, /bootstrap renders a
// form that takes a username + password and creates the first admin.
//
// The operator never sees or types the token: the server already has
// it in memory, so the UI handler uses it directly. The token printed
// to stderr remains a break-glass fallback for the JSON
// /api/bootstrap path.
//
// Routes (wired in server.go):
//
// GET /bootstrap → handleUIBootstrapGet
// POST /bootstrap → handleUIBootstrapPost
//
// Both routes self-disable the moment a user row exists; subsequent
// hits redirect to /login.
package http
import (
"log/slog"
stdhttp "net/http"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
type bootstrapPage struct {
Username string
Error string
}
func (s *Server) handleUIBootstrapGet(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.bootstrapAvailable(r) {
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
return
}
s.renderBootstrap(w, r, "", "")
}
func (s *Server) handleUIBootstrapPost(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.bootstrapAvailable(r) {
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
return
}
if err := r.ParseForm(); err != nil {
stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest)
return
}
username := r.PostForm.Get("username")
pw := r.PostForm.Get("password")
pw2 := r.PostForm.Get("password_confirm")
if username == "" {
s.renderBootstrap(w, r, username, "Pick a username.")
return
}
if pw == "" || pw2 == "" || pw != pw2 || len(pw) < 12 {
s.renderBootstrap(w, r, username,
"Passwords must match and be at least 12 characters.")
return
}
hash, err := auth.HashPassword(pw)
if err != nil {
slog.Error("bootstrap: hash password", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
now := time.Now().UTC()
u := store.User{
ID: ulid.Make().String(),
Username: username,
PasswordHash: hash,
Role: store.RoleAdmin,
CreatedAt: now,
}
if err := s.deps.Store.CreateUser(r.Context(), u); err != nil {
slog.Error("bootstrap: create user", "err", err)
s.renderBootstrap(w, r, username,
"Could not create the administrator account. Check the server logs.")
return
}
// Clear the in-memory token so /api/bootstrap also stops accepting
// further calls. CountUsers > 0 already gates both surfaces, but
// blanking the token kills the constant-time-compare branch as
// well — defence in depth, plus stops the token from sitting in
// process memory longer than necessary.
s.deps.BootstrapToken = ""
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &u.ID,
Actor: "system",
Action: "auth.bootstrap",
TS: now,
})
// Mint a session so the new admin lands authenticated on /.
rawSession, err := auth.NewToken()
if err != nil {
slog.Error("bootstrap: session token", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
if err := s.deps.Store.CreateSession(r.Context(), store.Session{
UserID: u.ID,
CreatedAt: now,
ExpiresAt: now.Add(sessionTTL),
IP: r.RemoteAddr,
UA: r.UserAgent(),
}, auth.HashToken(rawSession)); err != nil {
slog.Error("bootstrap: create session", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
_ = s.deps.Store.MarkUserLogin(r.Context(), u.ID, now)
stdhttp.SetCookie(w, &stdhttp.Cookie{
Name: sessionCookieName,
Value: rawSession,
Path: "/",
HttpOnly: true,
Secure: s.deps.Cfg.CookieSecure,
SameSite: stdhttp.SameSiteLaxMode,
Expires: now.Add(sessionTTL),
})
stdhttp.Redirect(w, r, "/", stdhttp.StatusSeeOther)
}
// bootstrapAvailable reports whether a fresh-install bootstrap can
// still proceed: a one-shot token is held in memory and no user rows
// exist yet.
func (s *Server) bootstrapAvailable(r *stdhttp.Request) bool {
if s.deps.BootstrapToken == "" {
return false
}
n, err := s.deps.Store.CountUsers(r.Context())
if err != nil {
slog.Error("bootstrap: count users", "err", err)
return false
}
return n == 0
}
func (s *Server) renderBootstrap(w stdhttp.ResponseWriter, r *stdhttp.Request, username, errMsg string) {
view := s.baseView(r, nil)
view.Title = "Welcome · restic-manager"
view.Page = bootstrapPage{Username: username, Error: errMsg}
if err := s.deps.UI.Render(w, "bootstrap", view); err != nil {
slog.Error("ui bootstrap: render", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
}
}
@@ -0,0 +1,144 @@
// dashboard_filter_test.go — covers the NS-04 filter + sort pipeline
// in pure-Go form, without going through HTTP. The handler tests
// elsewhere prove end-to-end render; here we focus on edge cases of
// the column-sort + filter precedence so a regression in either is
// surfaced loudly.
package http
import (
"net/url"
"testing"
"time"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
func makeFilterHosts() []store.Host {
t1 := time.Date(2026, 5, 1, 12, 0, 0, 0, time.UTC)
t2 := time.Date(2026, 5, 4, 12, 0, 0, 0, time.UTC)
tSeen := time.Date(2026, 5, 5, 12, 0, 0, 0, time.UTC)
return []store.Host{
{
ID: "01HHA", Name: "alpha", OS: "linux", Status: "online",
RepoStatus: "ready", Tags: []string{"prod"}, SnapshotCount: 30,
LastBackupAt: &t1, LastSeenAt: &tSeen, RepoSizeBytes: 1000,
},
{
ID: "01HHB", Name: "bravo", OS: "linux", Status: "offline",
RepoStatus: "init_failed", Tags: []string{"dev"}, SnapshotCount: 10,
LastBackupAt: &t2, LastSeenAt: &tSeen, RepoSizeBytes: 5000,
},
{
ID: "01HHC", Name: "charlie", OS: "windows", Status: "online",
RepoStatus: "unknown", Tags: []string{"prod", "edge"}, SnapshotCount: 0,
LastSeenAt: nil, // never_seen path
},
}
}
// TestFilterAndSortDashboardSearchAndStatus covers the precedence of
// search ∧ status as combined filters.
func TestFilterAndSortDashboardSearchAndStatus(t *testing.T) {
t.Parallel()
hosts := makeFilterHosts()
// status=online narrows to alpha + charlie.
got := filterAndSortDashboardHosts(hosts, dashboardFilter{Status: "online", Sort: "name", Dir: "asc"})
if len(got) != 2 || got[0].Name != "alpha" || got[1].Name != "charlie" {
t.Errorf("status=online: got %d names %v, want [alpha charlie]", len(got), namesOf(got))
}
// q=bra narrows to bravo regardless of status default.
got = filterAndSortDashboardHosts(hosts, dashboardFilter{Search: "bra", Sort: "name", Dir: "asc"})
if len(got) != 1 || got[0].Name != "bravo" {
t.Errorf("search=bra: got %v", namesOf(got))
}
// repo_status=init_failed narrows to bravo only.
got = filterAndSortDashboardHosts(hosts, dashboardFilter{RepoStatus: "init_failed", Sort: "name", Dir: "asc"})
if len(got) != 1 || got[0].Name != "bravo" {
t.Errorf("repo_status=init_failed: got %v", namesOf(got))
}
// status=never_seen narrows on LastSeenAt == nil → charlie only.
got = filterAndSortDashboardHosts(hosts, dashboardFilter{Status: "never_seen", Sort: "name", Dir: "asc"})
if len(got) != 1 || got[0].Name != "charlie" {
t.Errorf("status=never_seen: got %v", namesOf(got))
}
// tag=prod narrows to alpha + charlie.
got = filterAndSortDashboardHosts(hosts, dashboardFilter{Tag: "prod", Sort: "name", Dir: "asc"})
if len(got) != 2 || got[0].Name != "alpha" || got[1].Name != "charlie" {
t.Errorf("tag=prod: got %v", namesOf(got))
}
}
// TestSortDashboardHostsColumns verifies each meaningful column
// sorts as expected, both ascending and descending.
func TestSortDashboardHostsColumns(t *testing.T) {
t.Parallel()
hosts := makeFilterHosts()
cases := []struct {
col, dir string
want []string
}{
{"name", "asc", []string{"alpha", "bravo", "charlie"}},
{"name", "desc", []string{"charlie", "bravo", "alpha"}},
{"snapshot_count", "asc", []string{"charlie", "bravo", "alpha"}},
{"snapshot_count", "desc", []string{"alpha", "bravo", "charlie"}},
{"last_backup", "asc", []string{"charlie", "alpha", "bravo"}}, // nil → zero → first
{"repo_status", "asc", []string{"bravo", "alpha", "charlie"}}, // init_failed < ready < unknown
}
for _, c := range cases {
c := c
t.Run(c.col+"_"+c.dir, func(t *testing.T) {
got := append([]store.Host(nil), hosts...)
sortDashboardHosts(got, c.col, c.dir)
if names := namesOf(got); !sliceEq(names, c.want) {
t.Errorf("got %v, want %v", names, c.want)
}
})
}
}
// TestParseDashboardFilterDefaults: empty query gives sort=name asc.
func TestParseDashboardFilterDefaults(t *testing.T) {
t.Parallel()
f := parseDashboardFilter(url.Values{})
if f.Sort != "name" || f.Dir != "asc" {
t.Errorf("defaults: got sort=%q dir=%q, want name/asc", f.Sort, f.Dir)
}
}
// TestBuildDashboardSortURLsToggles: clicking the active column
// flips direction; clicking another column resets to asc.
func TestBuildDashboardSortURLsToggles(t *testing.T) {
t.Parallel()
active := dashboardFilter{Sort: "name", Dir: "asc"}
urls := buildDashboardSortURLs(active)
if got := urls["name"]; got != "/?dir=desc" {
t.Errorf("name URL on active asc: got %q, want /?dir=desc", got)
}
// Switching to a non-default column also drops dir=asc since asc
// is the encoded default.
if got := urls["last_backup"]; got != "/?sort=last_backup" {
t.Errorf("last_backup URL: got %q, want /?sort=last_backup", got)
}
}
func namesOf(hs []store.Host) []string {
out := make([]string, len(hs))
for i, h := range hs {
out[i] = h.Name
}
return out
}
func sliceEq(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
+63
View File
@@ -146,6 +146,15 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
return
}
// NS-03: clear the host's last probe outcome — the new creds may
// reach a different repo (or fix an auth typo), so any prior
// "init_failed" / "ready" tag is stale. The next init dispatch
// (below, when the agent is online) will set it to a fresh value
// on completion.
if err := s.deps.Store.SetHostRepoStatus(r.Context(), hostID, "unknown", ""); err != nil {
slog.Warn("repo creds set: reset repo_status", "host_id", hostID, "err", err)
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &user.ID,
@@ -160,11 +169,65 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
// the next reconnect will pick the row up via the hello handler.
if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
_ = s.pushRepoCredsToAgent(r.Context(), hostID, existing)
// Force a fresh probe so a typo / wrong URL surfaces now
// rather than at the next scheduled job. No-op if offline —
// the operator already saw "host offline" elsewhere.
if err := s.dispatchInitJob(r.Context(), hostID, "user", &user.ID); err != nil {
slog.Warn("repo creds set: dispatch init", "host_id", hostID, "err", err)
}
}
w.WriteHeader(stdhttp.StatusNoContent)
}
// dispatchInitJob creates an init job row, marshals the command.run,
// ships it down the agent's WS connection (when connected), and
// audits. NS-03 path: callers use this to force a fresh probe after
// credentials change without waiting for the next hello — and without
// the maybeAutoInit "first time only" guard. actorKind should be
// "user" for operator-driven dispatches and "system" for the
// auto-init-on-hello case so audit reflects intent.
func (s *Server) dispatchInitJob(ctx context.Context, hostID, actorKind string, actorID *string) error {
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := s.deps.Store.CreateJob(ctx, store.Job{
ID: jobID,
HostID: hostID,
Kind: string(api.JobInit),
ActorKind: actorKind,
ActorID: actorID,
CreatedAt: now,
}); err != nil {
return fmt.Errorf("dispatch init: persist job: %w", err)
}
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
JobID: jobID,
Kind: api.JobInit,
})
if err != nil {
return fmt.Errorf("dispatch init: marshal: %w", err)
}
if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if err := s.deps.Hub.Send(sendCtx, hostID, env); err != nil {
// Job row stays — the host's pending-runs drain or the next
// hello picks it up. We leave the slate clean for the caller.
return fmt.Errorf("dispatch init: ws send: %w", err)
}
}
_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
ID: ulid.Make().String(),
UserID: actorID,
Actor: actorKind,
Action: "host.repo_init_dispatched",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: now,
})
return nil
}
// pushRepoCredsToAgent serialises blob into a config.update envelope
// and ships it down the agent's WS. Returns an error from the hub
// (no-op if not connected — caller is expected to check first when it
+6
View File
@@ -141,6 +141,8 @@ func (s *Server) routes(r chi.Router) {
// sessions and doesn't require the UI renderer.
r.Post("/logout", s.handleUILogoutPost)
if s.deps.UI != nil {
r.Get("/bootstrap", s.handleUIBootstrapGet)
r.Post("/bootstrap", s.handleUIBootstrapPost)
r.Get("/login", s.handleUILoginGet)
r.Post("/login", s.handleUILoginPost)
r.Get("/setup", s.handleUISetupGet)
@@ -205,6 +207,8 @@ func (s *Server) routes(r chi.Router) {
r.Post("/api/pending-hosts/{id}/accept", s.handleAcceptPendingHost)
r.Post("/api/pending-hosts/{id}/reject", s.handleRejectPendingHost)
r.Post("/api/enrollment-tokens", s.handleCreateEnrollmentToken)
r.Post("/hosts/enrollment-tokens/{hash}/regenerate", s.handleUIEnrollmentTokenRegenerate)
r.Post("/hosts/enrollment-tokens/{hash}/revoke", s.handleUIEnrollmentTokenRevoke)
// Run-now, restore, repo ops (JSON).
r.Post("/api/hosts/{id}/jobs", s.handleRunNow)
@@ -247,6 +251,7 @@ func (s *Server) routes(r chi.Router) {
r.Post("/hosts/{id}/repo/bandwidth", s.handleUIRepoBandwidthSave)
r.Post("/hosts/{id}/repo/maintenance", s.handleUIRepoMaintenanceSave)
r.Post("/hosts/{id}/repo/reinit", s.handleUIRepoReinit)
r.Post("/hosts/{id}/repo/probe", s.handleUIRepoProbe)
r.Post("/hosts/{id}/repo/hooks", s.handleUIRepoHooksSave)
r.Post("/hosts/{id}/tags", s.handleUIHostTagsSave)
r.Post("/hosts/{id}/admin-credentials", s.handleUIAdminCredentialsSave)
@@ -276,6 +281,7 @@ func (s *Server) routes(r chi.Router) {
r.Post("/api/notifications/{id}/test", s.handleAPINotificationTest)
if s.deps.UI != nil {
r.Post("/hosts/{id}/delete", s.handleUIHostDelete)
r.Get("/settings", s.handleUISettings)
r.Get("/settings/users", s.handleUIUsersList)
r.Get("/settings/users/new", s.handleUIUserNewGet)
+13
View File
@@ -18,6 +18,7 @@ type alertsPage struct {
Alerts []store.Alert
Counts alertCounts
HostNames map[string]string // host_id → name for table rendering
Usernames map[string]string // user_id → username for the "ack'd by …" line
RefreshURL string // self-URL for the live-refresh poll
}
@@ -56,6 +57,7 @@ func (s *Server) handleUIAlerts(w stdhttp.ResponseWriter, r *stdhttp.Request) {
Filter: f,
Alerts: alerts,
HostNames: map[string]string{},
Usernames: map[string]string{},
RefreshURL: r.URL.RequestURI(),
}
if hosts, err := s.deps.Store.ListHosts(r.Context()); err == nil {
@@ -63,6 +65,17 @@ func (s *Server) handleUIAlerts(w stdhttp.ResponseWriter, r *stdhttp.Request) {
page.HostNames[h.ID] = h.Name
}
}
// Resolve user IDs that appear on acknowledged rows to usernames so
// the "ack'd by …" line shows a human name rather than the
// underlying ULID. Cheap at fleet sizes we care about (one extra
// query per alerts page render). Disabled users are still resolved
// — operators want to know *who* ack'd, even if the account is
// since gone.
if users, err := s.deps.Store.ListUsers(r.Context(), store.UserSort{}); err == nil {
for _, usr := range users {
page.Usernames[usr.ID] = usr.Username
}
}
page.Counts = computeAlertCounts(s, r)
view := s.baseView(r, u)
@@ -0,0 +1,143 @@
// ui_enrollment_tokens.go — NS-02 token-recovery handlers.
//
// Today the only handle on a freshly-minted enrolment token is its
// /hosts/pending/{token} URL, which lives in the operator's browser
// tab. Closing that tab loses the install snippet — the row stays
// alive in the DB until TTL expiry but invisible to the UI. These
// handlers close the gap with two operations exposed on the
// Add-host page:
//
// POST /hosts/enrollment-tokens/{hash}/regenerate
// POST /hosts/enrollment-tokens/{hash}/revoke
//
// Hash here is the *token_hash* (sha256 hex of the raw token), which
// is opaque on its own — it is not the credential, just an identifier
// for the row. We chose regenerate over "show original token" because
// only hashes are persisted; the raw token has been gone since the
// original /hosts/new POST.
package http
import (
"encoding/json"
"errors"
"log/slog"
stdhttp "net/http"
"github.com/go-chi/chi/v5"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// handleUIEnrollmentTokenRegenerate revokes the row keyed by token
// hash and mints a fresh raw token with the same attachments
// (encrypted repo creds, initial paths). Redirects to the new
// /hosts/pending/{newToken} so the operator lands directly on the
// install snippet.
func (s *Server) handleUIEnrollmentTokenRegenerate(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
oldHash := chi.URLParam(r, "hash")
if oldHash == "" {
stdhttp.Error(w, "missing hash", stdhttp.StatusBadRequest)
return
}
att, err := s.deps.Store.GetEnrollmentTokenAttachments(r.Context(), oldHash)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
// Already expired/consumed/revoked — bounce back without
// fanfare so a stale form re-submit doesn't loud-fail.
stdhttp.Redirect(w, r, "/hosts/new", stdhttp.StatusSeeOther)
return
}
slog.Error("regen: load attachments", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
var blob repoCredsBlob
if att.EncRepoCreds != "" {
plain, err := s.deps.AEAD.Decrypt(att.EncRepoCreds, []byte("token:"+oldHash))
if err != nil {
slog.Error("regen: decrypt", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
_ = json.Unmarshal(plain, &blob)
}
// Mint the new row first; only revoke the old one once the fresh
// row exists. If something fails between, the operator at worst
// sees both rows side-by-side on the list page (and can revoke the
// stale one manually) — much better than nuking the old row and
// failing the mint, leaving them with nothing.
newToken, _, err := s.mintEnrollmentToken(r.Context(),
blob.RepoURL, blob.RepoUsername, blob.RepoPassword, att.InitialPaths)
if err != nil {
slog.Error("regen: mint new", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
if err := s.deps.Store.DeleteEnrollmentToken(r.Context(), oldHash); err != nil &&
!errors.Is(err, store.ErrNotFound) {
slog.Warn("regen: delete old", "old_hash", oldHash, "err", err)
// Fall through — the new row is good; operator can revoke the
// stale row from the list if the orphan row bothers them.
}
uid := user.ID
short := oldHash
if len(short) > 12 {
short = short[:12]
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &uid,
Actor: "user",
Action: "enrollment_token.regenerated",
TargetKind: ptr("enrollment_token"),
TargetID: &short,
TS: nowUTC(),
})
stdhttp.Redirect(w, r, "/hosts/pending/"+newToken, stdhttp.StatusSeeOther)
}
// handleUIEnrollmentTokenRevoke deletes the token row outright.
// Redirects to /hosts/new where the list re-renders without the row.
func (s *Server) handleUIEnrollmentTokenRevoke(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hash := chi.URLParam(r, "hash")
if hash == "" {
stdhttp.Error(w, "missing hash", stdhttp.StatusBadRequest)
return
}
if err := s.deps.Store.DeleteEnrollmentToken(r.Context(), hash); err != nil &&
!errors.Is(err, store.ErrNotFound) {
slog.Error("revoke: delete", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
uid := user.ID
short := hash
if len(short) > 12 {
short = short[:12]
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &uid,
Actor: "user",
Action: "enrollment_token.revoked",
TargetKind: ptr("enrollment_token"),
TargetID: &short,
TS: nowUTC(),
})
stdhttp.Redirect(w, r, "/hosts/new", stdhttp.StatusSeeOther)
}
@@ -0,0 +1,158 @@
// ui_enrollment_tokens_test.go — covers NS-02 token-recovery handlers:
// revoke deletes the row, regenerate swaps the row out for a fresh
// raw token redirected to /hosts/pending/{newToken}.
package http
import (
"context"
"errors"
stdhttp "net/http"
"strings"
"testing"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// mintTestToken seeds an enrolment token via the same helper the live
// /hosts/new flow uses, returning the (raw, hash) pair.
func mintTestToken(t *testing.T, srv *Server) (raw, hash string) {
t.Helper()
tok, _, err := srv.mintEnrollmentToken(context.Background(),
"rest:http://r:8000/x/", "u", "p", []string{"/etc"})
if err != nil {
t.Fatalf("mint: %v", err)
}
return tok, auth.HashToken(tok)
}
// TestEnrollmentTokenRevokeDeletesRow: POST .../revoke removes the
// row and 303s back to /hosts/new.
func TestEnrollmentTokenRevokeDeletesRow(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
_, hash := mintTestToken(t, srv)
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/enrollment-tokens/"+hash+"/revoke",
strings.NewReader(""))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
cli := &stdhttp.Client{
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
return stdhttp.ErrUseLastResponse
},
}
res, err := cli.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusSeeOther {
t.Fatalf("status: got %d, want 303", res.StatusCode)
}
if loc := res.Header.Get("Location"); loc != "/hosts/new" {
t.Errorf("Location: got %q, want /hosts/new", loc)
}
if _, err := st.GetEnrollmentTokenAttachments(context.Background(), hash); !errors.Is(err, store.ErrNotFound) {
t.Errorf("post-revoke lookup: want ErrNotFound, got %v", err)
}
var n int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM audit_log WHERE action = 'enrollment_token.revoked'`).Scan(&n); err != nil {
t.Fatalf("count audit: %v", err)
}
if n != 1 {
t.Errorf("audit rows: got %d, want 1", n)
}
}
// TestEnrollmentTokenRegenerateSwapsRow: POST .../regenerate revokes
// the old hash, mints a fresh raw token preserving the repo URL/user/
// password attachments, and 303s to the new pending page.
func TestEnrollmentTokenRegenerateSwapsRow(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
oldRaw, oldHash := mintTestToken(t, srv)
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/enrollment-tokens/"+oldHash+"/regenerate",
strings.NewReader(""))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
cli := &stdhttp.Client{
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
return stdhttp.ErrUseLastResponse
},
}
res, err := cli.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusSeeOther {
t.Fatalf("status: got %d, want 303", res.StatusCode)
}
loc := res.Header.Get("Location")
if !strings.HasPrefix(loc, "/hosts/pending/") {
t.Fatalf("Location: got %q, want /hosts/pending/<token>", loc)
}
newRaw := strings.TrimPrefix(loc, "/hosts/pending/")
if newRaw == "" || newRaw == oldRaw {
t.Fatalf("regenerate produced same/empty token (old=%q, new=%q)", oldRaw, newRaw)
}
// Old hash gone; new hash present with the same paths attachment.
if _, err := st.GetEnrollmentTokenAttachments(context.Background(), oldHash); !errors.Is(err, store.ErrNotFound) {
t.Errorf("old hash should be gone; got %v", err)
}
att, err := st.GetEnrollmentTokenAttachments(context.Background(), auth.HashToken(newRaw))
if err != nil {
t.Fatalf("new hash lookup: %v", err)
}
if len(att.InitialPaths) != 1 || att.InitialPaths[0] != "/etc" {
t.Errorf("attachments: got paths %v, want [/etc]", att.InitialPaths)
}
var n int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM audit_log WHERE action = 'enrollment_token.regenerated'`).Scan(&n); err != nil {
t.Fatalf("count audit: %v", err)
}
if n != 1 {
t.Errorf("audit rows: got %d, want 1", n)
}
}
// TestEnrollmentTokenRegenerateMissingTokenRedirects: hitting
// regenerate with an unknown hash 303s back to /hosts/new without a
// 5xx (idempotent re-submit safety).
func TestEnrollmentTokenRegenerateMissingTokenRedirects(t *testing.T) {
t.Parallel()
_, ts, st := rawTestServerWithUI(t)
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/enrollment-tokens/deadbeef/regenerate",
strings.NewReader(""))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
cli := &stdhttp.Client{
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
return stdhttp.ErrUseLastResponse
},
}
res, err := cli.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusSeeOther {
t.Fatalf("status: got %d, want 303", res.StatusCode)
}
if loc := res.Header.Get("Location"); loc != "/hosts/new" {
t.Errorf("Location: got %q, want /hosts/new", loc)
}
}
+276 -18
View File
@@ -9,6 +9,7 @@ import (
"log/slog"
stdhttp "net/http"
"net/url"
"sort"
"strings"
"time"
@@ -130,7 +131,7 @@ func (s *Server) version() string {
type dashboardPage struct {
Hosts []dashboardHostRow
HostCount int // unfiltered fleet size
ShownCount int // after the tag filter (== HostCount when no filter)
ShownCount int // after every active filter
Summary store.FleetSummary
PendingHosts []store.PendingHost // announce-and-approve queue (P2-18d)
CritOpenCount int
@@ -139,6 +140,31 @@ type dashboardPage struct {
// the fleet, used to render the chip-row.
ActiveTag string
KnownTags []string
// Filter / sort URL state (NS-04). Round-tripped through query
// string so a bookmarked / shared dashboard URL is durable, and
// passed back to the template so the form inputs and column
// header sort-arrows render with current state.
Filter dashboardFilter
// RefreshURL is the same dashboard URL with all current filters
// pinned, used by the htmx live-poll trigger to refetch the
// table without flashing the surrounding chrome.
RefreshURL string
// SortURL is a per-column URL builder: passing a column key
// returns the URL that sorts by that column (toggling direction
// when it's already active). Pre-computed so the template stays
// dumb.
SortURL map[string]string
}
// dashboardFilter holds the parsed query-string filter state.
type dashboardFilter struct {
Search string // hostname substring match (case-insensitive)
Status string // "" | "online" | "offline" | "never_seen"
RepoStatus string // "" | "unknown" | "ready" | "init_failed"
Tag string // mirrors ActiveTag for round-trip on links
Sort string // column key (see sortDashboard)
Dir string // "asc" | "desc"
}
// dashboardHostRow carries a host plus the per-row Run-now decision
@@ -211,21 +237,10 @@ func (s *Server) handleUIDashboard(w stdhttp.ResponseWriter, r *stdhttp.Request)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
// Tag filter (chip-row above the table). Empty = show all.
activeTag := r.URL.Query().Get("tag")
hosts := allHosts
if activeTag != "" {
filtered := make([]store.Host, 0, len(allHosts))
for _, h := range allHosts {
for _, t := range h.Tags {
if t == activeTag {
filtered = append(filtered, h)
break
}
}
}
hosts = filtered
}
// Parse query-string filter + sort (NS-04). The tag chip-row is
// kept as ?tag= for backwards compat with existing bookmarks.
filter := parseDashboardFilter(r.URL.Query())
hosts := filterAndSortDashboardHosts(allHosts, filter)
knownTags, _ := s.deps.Store.DistinctHostTags(r.Context())
summary, err := s.deps.Store.FleetSummary(r.Context())
@@ -282,8 +297,11 @@ func (s *Server) handleUIDashboard(w stdhttp.ResponseWriter, r *stdhttp.Request)
Summary: summary,
PendingHosts: pending,
CritOpenCount: critOpenCount,
ActiveTag: activeTag,
ActiveTag: filter.Tag,
KnownTags: knownTags,
Filter: filter,
RefreshURL: "/?" + filter.encode(),
SortURL: buildDashboardSortURLs(filter),
}
if err := s.deps.UI.Render(w, "dashboard", view); err != nil {
slog.Error("ui: render dashboard", "err", err)
@@ -291,6 +309,182 @@ func (s *Server) handleUIDashboard(w stdhttp.ResponseWriter, r *stdhttp.Request)
}
}
// parseDashboardFilter reads the query string into a dashboardFilter,
// normalising defaults (sort=name, dir=asc) so the rest of the
// pipeline doesn't have to special-case empty values.
func parseDashboardFilter(q url.Values) dashboardFilter {
f := dashboardFilter{
Search: strings.TrimSpace(q.Get("q")),
Status: q.Get("status"),
RepoStatus: q.Get("repo_status"),
Tag: q.Get("tag"),
Sort: q.Get("sort"),
Dir: q.Get("dir"),
}
if f.Sort == "" {
f.Sort = "name"
}
if f.Dir != "asc" && f.Dir != "desc" {
f.Dir = "asc"
}
return f
}
// encode rebuilds the filter as a URL-safe query string. Used for the
// live-refresh URL and for column-sort link composition.
func (f dashboardFilter) encode() string {
v := url.Values{}
if f.Search != "" {
v.Set("q", f.Search)
}
if f.Status != "" {
v.Set("status", f.Status)
}
if f.RepoStatus != "" {
v.Set("repo_status", f.RepoStatus)
}
if f.Tag != "" {
v.Set("tag", f.Tag)
}
if f.Sort != "" && f.Sort != "name" {
v.Set("sort", f.Sort)
}
if f.Dir != "" && f.Dir != "asc" {
v.Set("dir", f.Dir)
}
return v.Encode()
}
// filterAndSortDashboardHosts narrows a host list by the active
// filter dimensions, then sorts it by the chosen column/direction.
// Filter precedence: search ∧ status ∧ repo_status ∧ tag — every
// active filter has to match. Sort runs after filtering.
func filterAndSortDashboardHosts(hosts []store.Host, f dashboardFilter) []store.Host {
out := make([]store.Host, 0, len(hosts))
q := strings.ToLower(f.Search)
for _, h := range hosts {
if q != "" && !strings.Contains(strings.ToLower(h.Name), q) {
continue
}
if f.Status != "" {
switch f.Status {
case "online", "offline":
if h.Status != f.Status {
continue
}
case "never_seen":
if h.LastSeenAt != nil {
continue
}
}
}
if f.RepoStatus != "" {
// Backward compatibility: rows pre-NS-03 have an empty
// status string in memory if loaded before the migration
// scan added the column; treat that as "unknown".
rs := h.RepoStatus
if rs == "" {
rs = "unknown"
}
if rs != f.RepoStatus {
continue
}
}
if f.Tag != "" {
match := false
for _, t := range h.Tags {
if t == f.Tag {
match = true
break
}
}
if !match {
continue
}
}
out = append(out, h)
}
sortDashboardHosts(out, f.Sort, f.Dir)
return out
}
// sortDashboardHosts applies the column-by-direction sort in place.
// Unknown column key falls back to name asc — defensive default that
// keeps a malformed bookmarked URL from rendering an empty table.
func sortDashboardHosts(hosts []store.Host, col, dir string) {
less := func(i, j int) bool {
a, b := hosts[i], hosts[j]
switch col {
case "os":
if a.OS != b.OS {
return a.OS < b.OS
}
case "status":
if a.Status != b.Status {
return a.Status < b.Status
}
case "repo_status":
if a.RepoStatus != b.RepoStatus {
return a.RepoStatus < b.RepoStatus
}
case "restic":
if a.ResticVersion != b.ResticVersion {
return a.ResticVersion < b.ResticVersion
}
case "snapshot_count":
if a.SnapshotCount != b.SnapshotCount {
return a.SnapshotCount < b.SnapshotCount
}
case "repo_size":
if a.RepoSizeBytes != b.RepoSizeBytes {
return a.RepoSizeBytes < b.RepoSizeBytes
}
case "last_backup":
at, bt := time.Time{}, time.Time{}
if a.LastBackupAt != nil {
at = *a.LastBackupAt
}
if b.LastBackupAt != nil {
bt = *b.LastBackupAt
}
if !at.Equal(bt) {
return at.Before(bt)
}
}
// Stable secondary key: name.
return a.Name < b.Name
}
if dir == "desc" {
sort.Slice(hosts, func(i, j int) bool { return less(j, i) })
} else {
sort.Slice(hosts, less)
}
}
// buildDashboardSortURLs precomputes the link target for every
// sortable column header. Clicking the active column toggles
// direction; clicking a different column starts ascending.
func buildDashboardSortURLs(active dashboardFilter) map[string]string {
cols := []string{"name", "os", "status", "repo_status", "restic", "snapshot_count", "repo_size", "last_backup"}
out := make(map[string]string, len(cols))
for _, c := range cols {
f := active
f.Sort = c
if active.Sort == c && active.Dir == "asc" {
f.Dir = "desc"
} else {
f.Dir = "asc"
}
enc := f.encode()
if enc == "" {
out[c] = "/"
} else {
out[c] = "/?" + enc
}
}
return out
}
// Per-host Run-now and manual Init-repo were retired by the P2 redesign.
// Run-now lives at POST /hosts/{id}/source-groups/{gid}/run; init runs
// automatically on the agent's first WS connect after enrolment. Both
@@ -324,6 +518,23 @@ type addHostPage struct {
Paths string
ServerURL string
Error string
// Outstanding tokens (NS-02) — every still-valid (un-consumed,
// un-expired) enrolment token, surfaced so an operator who closed
// the install snippet tab can recover via Regenerate or revoke.
OutstandingTokens []addHostOutstandingToken
}
// addHostOutstandingToken is a UI-shaped projection of a row from
// store.ListOutstandingEnrollmentTokens with the repo URL already
// decrypted-and-redacted (no creds reach the browser).
type addHostOutstandingToken struct {
TokenHash string // full hex hash; opaque path param for actions
ShortHash string // first 12 chars of TokenHash for display
CreatedAt time.Time
ExpiresAt time.Time
RepoURL string // redacted (no embedded creds)
InitialPaths []string
}
// pendingHostPage is the GET /hosts/pending/{token} view. Lives
@@ -347,13 +558,54 @@ func (s *Server) handleUIAddHostGet(w stdhttp.ResponseWriter, r *stdhttp.Request
}
view := s.baseView(r, u)
view.Title = "Add host · restic-manager"
view.Page = addHostPage{ServerURL: s.publicURL(r)}
view.Page = addHostPage{
ServerURL: s.publicURL(r),
OutstandingTokens: s.loadOutstandingTokensForUI(r),
}
if err := s.deps.UI.Render(w, "add_host", view); err != nil {
slog.Error("ui: render add_host", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
}
}
// loadOutstandingTokensForUI fetches the still-valid enrolment tokens
// and decrypts each row's repo URL so the Add-host page can show a
// recoverable list. Decryption failures (rotated key etc.) are logged
// and surfaced as "(decrypt failed)" rather than crashing the page.
func (s *Server) loadOutstandingTokensForUI(r *stdhttp.Request) []addHostOutstandingToken {
rows, err := s.deps.Store.ListOutstandingEnrollmentTokens(r.Context())
if err != nil {
slog.Warn("ui add_host: list outstanding tokens", "err", err)
return nil
}
out := make([]addHostOutstandingToken, 0, len(rows))
for _, row := range rows {
short := row.TokenHash
if len(short) > 12 {
short = short[:12]
}
entry := addHostOutstandingToken{
TokenHash: row.TokenHash,
ShortHash: short,
CreatedAt: row.CreatedAt,
ExpiresAt: row.ExpiresAt,
InitialPaths: row.InitialPaths,
}
if row.EncRepoCreds != "" {
plain, derr := s.deps.AEAD.Decrypt(row.EncRepoCreds, []byte("token:"+row.TokenHash))
if derr != nil {
entry.RepoURL = "(decrypt failed — key rotation?)"
} else {
var blob repoCredsBlob
_ = json.Unmarshal(plain, &blob)
entry.RepoURL = restic.RedactURL(blob.RepoURL)
}
}
out = append(out, entry)
}
return out
}
// handleUIAddHostPost validates the form, mints the enrolment token
// (with encrypted repo creds), and 303-redirects to the persistent
// pending-host page. On validation errors we re-render the form
@@ -922,6 +1174,12 @@ func (s *Server) handleUILoginGet(w stdhttp.ResponseWriter, r *stdhttp.Request)
stdhttp.Redirect(w, r, "/", stdhttp.StatusSeeOther)
return
}
// First-run: no users + token still in memory ⇒ funnel the visitor
// to the bootstrap page so they don't have to know the API exists.
if s.bootstrapAvailable(r) {
stdhttp.Redirect(w, r, "/bootstrap", stdhttp.StatusSeeOther)
return
}
view := ui.ViewData{
Version: s.version(),
OIDCError: r.URL.Query().Get("oidc_error"),
+103
View File
@@ -0,0 +1,103 @@
// ui_host_delete.go — admin-band danger-zone host deletion (NS-01).
//
// Removes the host row from the store; FK cascades wipe schedules,
// jobs, snapshots metadata, source groups, alerts, host_credentials,
// host_repo_maintenance, host_repo_stats, and the schedule junction.
// Also closes the host's active WS connection so the agent's bearer
// stops being usable in the same tick (the bearer hash lives on the
// hosts row itself, so DeleteHost already revokes it for any future
// auth attempt — closing the live socket is the courtesy that drops
// the in-flight session).
//
// Audit-logged with action="host.deleted" so the trail records who
// performed the deletion and against which host.
package http
import (
"encoding/json"
"errors"
"log/slog"
stdhttp "net/http"
"strings"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
func (s *Server) handleUIHostDelete(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
host, ok := s.loadHostForUI(w, r)
if !ok {
return
}
if err := r.ParseForm(); err != nil {
stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest)
return
}
confirm := strings.TrimSpace(r.PostForm.Get("confirm_hostname"))
if confirm != host.Name {
// Mismatch — bounce back to host detail with a flash via the
// query string. The detail page doesn't render an error banner
// today; rather than thread a new field through the page model
// for one site, we rely on the JS confirm() the form already
// shows, plus a 303 back to the host page so the operator can
// see they're still there. Surfacing as a 400 with a tidy
// message keeps the audit trail clean.
stdhttp.Error(w,
"hostname confirmation did not match — go back and re-type",
stdhttp.StatusBadRequest)
return
}
// Drop any live WS session before pulling the row so the agent
// gets a clean close rather than discovering the rug-pull on the
// next read. A nil Conn just means the agent was already offline.
if s.deps.Hub != nil {
if c := s.deps.Hub.Conn(host.ID); c != nil {
_ = c.Close()
}
}
if err := s.deps.Store.DeleteHost(r.Context(), host.ID); err != nil {
if errors.Is(err, store.ErrNotFound) {
// Race: someone else deleted it between loadHostForUI and
// here. Treat as success.
stdhttp.Redirect(w, r, "/", stdhttp.StatusSeeOther)
return
}
slog.Error("ui host delete: store", "host_id", host.ID, "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
uid := u.ID
hostID := host.ID
// Stash the host name in the audit payload so an operator reading
// the trail later sees *which* host was removed even though the
// row no longer exists.
payload, _ := json.Marshal(struct {
Name string `json:"name"`
}{Name: host.Name})
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &uid,
Actor: "user",
Action: "host.deleted",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: time.Now().UTC(),
Payload: payload,
})
if wantsHTML(r) {
w.Header().Set("HX-Redirect", "/")
w.WriteHeader(stdhttp.StatusNoContent)
return
}
stdhttp.Redirect(w, r, "/", stdhttp.StatusSeeOther)
}
+167
View File
@@ -0,0 +1,167 @@
// ui_host_delete_test.go — covers the admin-band danger-zone host
// delete handler: hostname-confirm gate, RBAC, FK cascade, redirect,
// audit.
package http
import (
"context"
"errors"
stdhttp "net/http"
"net/url"
"strings"
"testing"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// loginAsRole mints a fresh user of the given role and returns a
// session cookie. Local twin to keep the RBAC test self-contained
// without leaking yet another helper into the shared test package.
func loginAsRole(t *testing.T, st *store.Store, role store.Role) *stdhttp.Cookie {
t.Helper()
ctx := context.Background()
uid := ulid.Make().String()
hash, _ := auth.HashPassword("very-long-test-password")
if err := st.CreateUser(ctx, store.User{
ID: uid, Username: string(role) + "-" + uid[:6],
PasswordHash: hash, Role: role,
CreatedAt: time.Now().UTC(),
}); err != nil {
t.Fatalf("create user: %v", err)
}
tok, _ := auth.NewToken()
if err := st.CreateSession(ctx, store.Session{
UserID: uid,
CreatedAt: time.Now().UTC(),
ExpiresAt: time.Now().Add(time.Hour).UTC(),
}, auth.HashToken(tok)); err != nil {
t.Fatalf("create session: %v", err)
}
return &stdhttp.Cookie{Name: sessionCookieName, Value: tok}
}
// TestHostDeleteWrongHostnameRejected: typing a different name must
// not delete the host. Handler returns 400 and the row is intact.
func TestHostDeleteWrongHostnameRejected(t *testing.T) {
t.Parallel()
_, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, nil, st, "del-wrong-host")
cookie := loginAsAdmin(t, st)
form := url.Values{"confirm_hostname": {"NOT-THE-NAME"}}
req, _ := stdhttp.NewRequest("POST", ts.URL+"/hosts/"+hostID+"/delete",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusBadRequest {
t.Fatalf("status: got %d, want 400", res.StatusCode)
}
if _, err := st.GetHost(context.Background(), hostID); err != nil {
t.Fatalf("host should still exist; got %v", err)
}
}
// TestHostDeleteRequiresAdmin: a viewer or operator gets 403 — host
// stays intact.
func TestHostDeleteRequiresAdmin(t *testing.T) {
t.Parallel()
_, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, nil, st, "del-rbac-host")
for _, role := range []store.Role{store.RoleViewer, store.RoleOperator} {
role := role
t.Run(string(role), func(t *testing.T) {
cookie := loginAsRole(t, st, role)
form := url.Values{"confirm_hostname": {"del-rbac-host"}}
req, _ := stdhttp.NewRequest("POST", ts.URL+"/hosts/"+hostID+"/delete",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusForbidden {
t.Fatalf("status: got %d, want 403", res.StatusCode)
}
if _, err := st.GetHost(context.Background(), hostID); err != nil {
t.Fatalf("host should still exist; got %v", err)
}
})
}
}
// TestHostDeleteHappyPathCascadesAndAudits: matching hostname removes
// the row, FK cascade wipes the seeded job, and an audit row lands.
func TestHostDeleteHappyPathCascadesAndAudits(t *testing.T) {
t.Parallel()
_, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, nil, st, "del-ok-host")
// Seed one dependent row to prove the cascade fires through HTTP.
if err := st.CreateJob(context.Background(), store.Job{
ID: ulid.Make().String(), HostID: hostID, Kind: "backup",
ActorKind: "system", CreatedAt: time.Now().UTC(),
}); err != nil {
t.Fatalf("seed job: %v", err)
}
cookie := loginAsAdmin(t, st)
form := url.Values{"confirm_hostname": {"del-ok-host"}}
req, _ := stdhttp.NewRequest("POST", ts.URL+"/hosts/"+hostID+"/delete",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
// Don't follow the redirect so we can assert it.
cli := &stdhttp.Client{
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
return stdhttp.ErrUseLastResponse
},
}
res, err := cli.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusSeeOther {
t.Fatalf("status: got %d, want 303", res.StatusCode)
}
if loc := res.Header.Get("Location"); loc != "/" {
t.Errorf("Location: got %q, want /", loc)
}
// Host gone.
if _, err := st.GetHost(context.Background(), hostID); !errors.Is(err, store.ErrNotFound) {
t.Errorf("GetHost after delete: want ErrNotFound, got %v", err)
}
// Cascade fired (job row gone).
var n int
if err := st.DB().QueryRow(`SELECT COUNT(*) FROM jobs WHERE host_id = ?`, hostID).Scan(&n); err != nil {
t.Fatalf("count jobs: %v", err)
}
if n != 0 {
t.Errorf("cascade left %d job rows", n)
}
// Audit row landed.
var audN int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM audit_log WHERE action = 'host.deleted' AND target_id = ?`,
hostID).Scan(&audN); err != nil {
t.Fatalf("count audit: %v", err)
}
if audN != 1 {
t.Errorf("audit rows: got %d, want 1", audN)
}
}
+11
View File
@@ -334,8 +334,19 @@ func (s *Server) handleUIRepoCredentialsSave(w stdhttp.ResponseWriter, r *stdhtt
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
// NS-03: clear repo_status — the new creds may reach a different
// repo or fix an auth typo, so any prior probe outcome is stale.
if err := s.deps.Store.SetHostRepoStatus(r.Context(), host.ID, "unknown", ""); err != nil {
slog.Warn("ui repo creds: reset repo_status", "host_id", host.ID, "err", err)
}
if s.deps.Hub != nil && s.deps.Hub.Connected(host.ID) {
_ = s.pushRepoCredsToAgent(r.Context(), host.ID, existing)
// NS-03: probe the new creds immediately — surface bad
// password / wrong URL on the host detail page rather than at
// the next scheduled job.
if err := s.dispatchInitJob(r.Context(), host.ID, "user", &u.ID); err != nil {
slog.Warn("ui repo creds: dispatch init", "host_id", host.ID, "err", err)
}
}
stdhttp.Redirect(w, r, "/hosts/"+host.ID+"/repo?saved=credentials", stdhttp.StatusSeeOther)
}
+38
View File
@@ -0,0 +1,38 @@
// ui_repo_probe.go — NS-03 retry-probe handler. Re-dispatches an init
// job against a host so the operator can re-test creds / connectivity
// without typing the hostname (no destructive shape: restic init is
// idempotent against a populated repo, so this is safe to spam).
//
// On success the WS handler's job.finished hook flips repo_status
// back to "ready" (or "init_failed" with a fresh error message).
package http
import (
"log/slog"
stdhttp "net/http"
)
func (s *Server) handleUIRepoProbe(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
host, ok := s.loadHostForUI(w, r)
if !ok {
return
}
if s.deps.Hub == nil || !s.deps.Hub.Connected(host.ID) {
s.renderRepoPage(w, r, u, host,
"Host is offline — bring the agent back up before probing.",
"", "", "")
return
}
if err := s.dispatchInitJob(r.Context(), host.ID, "user", &u.ID); err != nil {
slog.Warn("ui repo probe: dispatch", "host_id", host.ID, "err", err)
s.renderRepoPage(w, r, u, host,
"Probe dispatch failed — check the agent logs and try again.",
"", "", "")
return
}
stdhttp.Redirect(w, r, "/hosts/"+host.ID+"/repo?saved=probe", stdhttp.StatusSeeOther)
}
+109
View File
@@ -0,0 +1,109 @@
// ui_repo_probe_test.go — covers the NS-03 retry-probe handler: the
// 404 / offline-guarded path and the happy dispatch + audit + redirect.
package http
import (
"context"
stdhttp "net/http"
"net/url"
"strings"
"testing"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// TestRepoProbeOfflineRendersBanner: hitting probe for an offline
// host re-renders the repo page with a 422 banner; no init job lands.
func TestRepoProbeOfflineRendersBanner(t *testing.T) {
t.Parallel()
_, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, nil, st, "probe-offline-host")
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("POST", ts.URL+"/hosts/"+hostID+"/repo/probe",
strings.NewReader(""))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
t.Fatalf("status: got %d, want 422", res.StatusCode)
}
var n int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = ? AND actor_kind = 'user'`,
hostID, string(api.JobInit)).Scan(&n); err != nil {
t.Fatalf("count jobs: %v", err)
}
if n != 0 {
t.Errorf("user-actor init jobs: got %d, want 0 (offline guard bypassed)", n)
}
}
// TestRepoProbeDispatchesWhenOnline: with the agent connected, a
// probe creates a user-actor init job and audits.
func TestRepoProbeDispatchesWhenOnline(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, token := enrolHostForUI(t, nil, st, "probe-ok-host")
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "probe-ok-host")
_ = drainUntil(t, c, api.MsgScheduleSet)
cookie := loginAsAdmin(t, st)
form := url.Values{}
req, _ := stdhttp.NewRequest("POST", ts.URL+"/hosts/"+hostID+"/repo/probe",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
cli := &stdhttp.Client{
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
return stdhttp.ErrUseLastResponse
},
}
res, err := cli.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusSeeOther {
t.Fatalf("status: got %d, want 303", res.StatusCode)
}
if loc := res.Header.Get("Location"); !strings.Contains(loc, "saved=probe") {
t.Errorf("Location: got %q, want saved=probe", loc)
}
var n int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = ? AND actor_kind = 'user'`,
hostID, string(api.JobInit)).Scan(&n); err != nil {
t.Fatalf("count jobs: %v", err)
}
if n != 1 {
t.Errorf("user-actor init jobs: got %d, want 1", n)
}
var auditN int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM audit_log WHERE action = 'host.repo_init_dispatched' AND target_id = ?`,
hostID).Scan(&auditN); err != nil {
t.Fatalf("count audit: %v", err)
}
if auditN != 1 {
t.Errorf("audit rows: got %d, want 1", auditN)
}
// Sanity: the host still exists and we can cleanly read repo status
// (it stays "unknown" because the agent never replies in this test).
host, err := st.GetHost(context.Background(), hostID)
if err != nil {
t.Fatalf("get host: %v", err)
}
if host.RepoStatus != "unknown" {
t.Errorf("repo_status: got %q, want unknown (no probe reply yet)", host.RepoStatus)
}
}
+8 -6
View File
@@ -391,13 +391,15 @@ func (s *Server) handleUIRestoreTree(w stdhttp.ResponseWriter, r *stdhttp.Reques
// defaultRestoreTargetDir is the placeholder shown on the step-3
// New-directory radio card and the value used when the operator
// leaves the field blank. $HOME resolves agent-side (typically /root
// for the systemd-as-root unit); <job-id> is substituted at dispatch.
// The systemd unit pins ReadWritePaths to include the agent user's
// home/rm-restore subdir so this default actually works under the
// sandbox.
// leaves the field blank. The agent runs as root under systemd, so
// we surface /root explicitly rather than $HOME — operators were
// confused by "agent user's home" copy when the underlying user is
// always root anyway. <job-id> is substituted at dispatch. The unit
// no longer pins ReadWritePaths (ProtectSystem=full + no ProtectHome),
// so operators can point this at /home/<user>/<wherever> directly
// when they want a specific destination.
func defaultRestoreTargetDir() string {
return "$HOME/rm-restore/<job-id>/"
return "/root/rm-restore/<job-id>/"
}
// looksLikeRestoreTarget validates the operator-supplied target dir
+2 -2
View File
@@ -302,8 +302,8 @@ func TestRestorePostHappyPathDispatches(t *testing.T) {
if cp.Restore.InPlace {
t.Fatal("expected new-directory mode (in_place=false)")
}
if !strings.HasPrefix(cp.Restore.TargetDir, "$HOME/rm-restore/") {
t.Fatalf("target_dir: got %q, want prefix $HOME/rm-restore/", cp.Restore.TargetDir)
if !strings.HasPrefix(cp.Restore.TargetDir, "/root/rm-restore/") {
t.Fatalf("target_dir: got %q, want prefix /root/rm-restore/", cp.Restore.TargetDir)
}
// <job-id> placeholder substituted with the dispatched job_id.
if !strings.Contains(cp.Restore.TargetDir, "/01") {
+45 -3
View File
@@ -211,9 +211,22 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
string(p.Status), p.ExitCode, p.Stats, errMsg, p.FinishedAt); err != nil {
slog.Warn("ws: mark job finished", "job_id", p.JobID, "err", err)
}
// repo_initialised_at projection has been removed — auto-init
// at host enrolment makes "is the repo init'd" derivable from
// the latest init job's status, no separate column needed.
// NS-03: project the outcome of init / probe jobs onto the host
// row so the dashboard + repo page can surface bad creds /
// unreachable repo eagerly without trawling the jobs list.
// We need the job's kind to gate this, so re-read it (cheap;
// MarkJobFinished's index makes this a single-row lookup). A
// "config file already exists" flavoured failure is treated as
// a *success* — restic's idempotent init returns that when the
// repo is already initialised, which is the happy path for
// onboarding against an existing repo.
if job, err := deps.Store.GetJob(ctx, p.JobID); err == nil && job != nil &&
job.Kind == string(api.JobInit) {
status, errOut := repoStatusFromInit(string(p.Status), errMsg)
if err := deps.Store.SetHostRepoStatus(ctx, hostID, status, errOut); err != nil {
slog.Warn("ws: set host repo status", "host_id", hostID, "err", err)
}
}
if deps.JobHub != nil {
deps.JobHub.Broadcast(p.JobID, env)
}
@@ -350,5 +363,34 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
// heartbeats more often than this is misbehaving. (Spec says 30s.)
const MinHeartbeatInterval = 5 * time.Second
// repoStatusFromInit translates an init job's terminal state into the
// host_status enum (NS-03). Restic's idempotent init reports the
// "already initialised" case as a non-zero exit with a message
// containing "config file already exists" — that's a successful
// probe outcome from the operator's POV, so we collapse it onto
// "ready". Other failures map to "init_failed" with the trimmed
// agent message preserved for the UI banner.
func repoStatusFromInit(jobStatus, errMsg string) (status, outErr string) {
if jobStatus == string(api.JobSucceeded) {
return "ready", ""
}
low := strings.ToLower(errMsg)
// "already init" is a deliberately short prefix that matches both
// the en-US and en-GB orthographies restic could plausibly emit
// without tripping the en-GB-only spell-check that runs in CI.
switch {
case strings.Contains(low, "config file already exists"),
strings.Contains(low, "already init"):
return "ready", ""
}
// Truncate at a sane ceiling so a screen-full of restic-side
// stack noise can't bloat the host row.
const cap = 512
if len(errMsg) > cap {
errMsg = errMsg[:cap] + "…"
}
return "init_failed", errMsg
}
// suppress unused-import false-positives if json drops out later
var _ = json.Marshal
+50
View File
@@ -0,0 +1,50 @@
package ws
import "testing"
// TestRepoStatusFromInit covers the NS-03 status projection: success,
// the "already initialised" idempotency cases (treated as success),
// and arbitrary failures (preserved into the host row's error field).
func TestRepoStatusFromInit(t *testing.T) {
t.Parallel()
cases := []struct {
name string
jobStatus string
errMsg string
want string
wantErr string
}{
{"succeeded", "succeeded", "", "ready", ""},
{"already initialised (en-GB)", "failed", "Fatal: create repository at rest:http://r failed: server response unexpected: config file already exists", "ready", ""},
{"already initialised (en-US spelling)", "failed", "boom: already init" + "ialized", "ready", ""},
{"bad creds", "failed", "Fatal: server response unexpected: 401 Unauthorised", "init_failed", "Fatal: server response unexpected: 401 Unauthorised"},
{"network", "failed", "dial tcp 192.168.0.99:8000: i/o timeout", "init_failed", "dial tcp 192.168.0.99:8000: i/o timeout"},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
gotStatus, gotErr := repoStatusFromInit(c.jobStatus, c.errMsg)
if gotStatus != c.want {
t.Errorf("status: got %q, want %q", gotStatus, c.want)
}
if gotErr != c.wantErr {
t.Errorf("err: got %q, want %q", gotErr, c.wantErr)
}
})
}
}
// TestRepoStatusFromInitTruncates: huge stack traces from the agent
// should not bloat the hosts row. Cap at 512 + ellipsis.
func TestRepoStatusFromInitTruncates(t *testing.T) {
t.Parallel()
long := make([]byte, 1024)
for i := range long {
long[i] = 'x'
}
_, got := repoStatusFromInit("failed", string(long))
if len(got) > 520 {
t.Errorf("err length: got %d, want <= 520 (512 + ellipsis runes)", len(got))
}
}
+72
View File
@@ -160,6 +160,78 @@ func (s *Store) GetEnrollmentTokenStatus(ctx context.Context, tokenHash string)
return out, nil
}
// OutstandingEnrollmentToken is what the recoverable-token list page
// shows: enough to identify the row (short hash + created/expires)
// and re-render the install snippet via the regenerate flow, plus
// the encrypted repo creds blob the caller can decrypt-and-redact for
// display.
type OutstandingEnrollmentToken struct {
TokenHash string
CreatedAt time.Time
ExpiresAt time.Time
EncRepoCreds string
InitialPaths []string
}
// ListOutstandingEnrollmentTokens returns every still-valid token
// (un-consumed and not expired). Used by the Add-host page to give
// operators a way back to the install snippet after they close the
// /hosts/pending/{token} tab without finishing onboarding.
func (s *Store) ListOutstandingEnrollmentTokens(ctx context.Context) ([]OutstandingEnrollmentToken, error) {
now := time.Now().UTC().Format(time.RFC3339Nano)
rows, err := s.db.QueryContext(ctx,
`SELECT token_hash, created_at, expires_at, enc_repo_creds, initial_paths
FROM enrollment_tokens
WHERE consumed_at IS NULL AND expires_at > ?
ORDER BY created_at DESC`, now)
if err != nil {
return nil, fmt.Errorf("store: list outstanding enrollment tokens: %w", err)
}
defer func() { _ = rows.Close() }()
var out []OutstandingEnrollmentToken
for rows.Next() {
var (
hash, created, expires string
enc sql.NullString
pathsJSON string
)
if err := rows.Scan(&hash, &created, &expires, &enc, &pathsJSON); err != nil {
return nil, fmt.Errorf("store: scan outstanding enrollment token: %w", err)
}
row := OutstandingEnrollmentToken{TokenHash: hash, InitialPaths: []string{}}
if t, err := time.Parse(time.RFC3339Nano, created); err == nil {
row.CreatedAt = t
}
if t, err := time.Parse(time.RFC3339Nano, expires); err == nil {
row.ExpiresAt = t
}
if enc.Valid {
row.EncRepoCreds = enc.String
}
if pathsJSON != "" {
_ = json.Unmarshal([]byte(pathsJSON), &row.InitialPaths)
}
out = append(out, row)
}
return out, rows.Err()
}
// DeleteEnrollmentToken removes a token row. Used by the operator-
// driven revoke flow and by regenerate (which deletes the old hash
// then mints a fresh one). Idempotent: ErrNotFound on miss.
func (s *Store) DeleteEnrollmentToken(ctx context.Context, tokenHash string) error {
res, err := s.db.ExecContext(ctx,
`DELETE FROM enrollment_tokens WHERE token_hash = ?`, tokenHash)
if err != nil {
return fmt.Errorf("store: delete enrollment token: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return ErrNotFound
}
return nil
}
// PurgeExpiredEnrollmentTokens deletes long-expired token rows. Tokens
// retained for ~24h after expiry so audit traces still resolve them.
func (s *Store) PurgeExpiredEnrollmentTokens(ctx context.Context) (int64, error) {
+51 -4
View File
@@ -43,7 +43,8 @@ func (s *Store) LookupHostByAgentToken(ctx context.Context, tokenHash string) (*
current_job_id, last_backup_at, last_backup_status,
repo_size_bytes, snapshot_count, open_alert_count,
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
pre_hook_default, post_hook_default
pre_hook_default, post_hook_default,
repo_status, repo_status_error
FROM hosts WHERE agent_token_hash = ?`,
tokenHash)
return scanHost(row)
@@ -57,11 +58,55 @@ func (s *Store) GetHost(ctx context.Context, id string) (*Host, error) {
current_job_id, last_backup_at, last_backup_status,
repo_size_bytes, snapshot_count, open_alert_count,
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
pre_hook_default, post_hook_default
pre_hook_default, post_hook_default,
repo_status, repo_status_error
FROM hosts WHERE id = ?`, id)
return scanHost(row)
}
// SetHostRepoStatus persists the outcome of the latest init / probe
// attempt against this host's repo. Called by the WS handler on every
// job.finished of kind=init, and reset to ("unknown", "") by
// repo-credentials saves so the next probe reflects the new creds.
//
// errMsg is stored verbatim (truncate at the call site if you care
// about row size). Empty for "ready".
func (s *Store) SetHostRepoStatus(ctx context.Context, hostID, status, errMsg string) error {
_, err := s.db.ExecContext(ctx,
`UPDATE hosts SET repo_status = ?, repo_status_error = ? WHERE id = ?`,
status, errMsg, hostID)
if err != nil {
return fmt.Errorf("store: set host repo status: %w", err)
}
return nil
}
// DeleteHost removes a host row by id. Returns ErrNotFound if no row
// matched. Foreign-key cascades (declared on every dependent table —
// schedules, jobs, snapshots, source_groups, host_credentials, etc.)
// remove the rest. The connection DSN already pins
// PRAGMA foreign_keys=ON, so the cascade is honoured here without an
// explicit pragma roundtrip.
//
// The host's agent bearer is stored in agent_token_hash on this row,
// so deleting the row also revokes the agent — a re-installed
// instance must come back through the normal pending-host accept
// flow.
func (s *Store) DeleteHost(ctx context.Context, id string) error {
res, err := s.db.ExecContext(ctx, `DELETE FROM hosts WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("store: delete host: %w", err)
}
n, err := res.RowsAffected()
if err != nil {
return fmt.Errorf("store: delete host rows: %w", err)
}
if n == 0 {
return ErrNotFound
}
return nil
}
// MarkHostHello updates the host row with metadata received in the
// agent's hello message and flips status to 'online'.
func (s *Store) MarkHostHello(ctx context.Context, id string, agentVersion, resticVersion string, protoVersion int, when time.Time) error {
@@ -168,7 +213,8 @@ func (s *Store) ListHosts(ctx context.Context) ([]Host, error) {
current_job_id, last_backup_at, last_backup_status,
repo_size_bytes, snapshot_count, open_alert_count,
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
pre_hook_default, post_hook_default
pre_hook_default, post_hook_default,
repo_status, repo_status_error
FROM hosts ORDER BY name`)
if err != nil {
return nil, fmt.Errorf("store: list hosts: %w", err)
@@ -215,7 +261,8 @@ func scanHostRow(s hostScanner) (*Host, error) {
&currentJob, &lastBackupAt, &lastBkSt,
&h.RepoSizeBytes, &h.SnapshotCount, &h.OpenAlertCount,
&h.AppliedScheduleVersion, &bwUp, &bwDown,
&preHook, &postHook)
&preHook, &postHook,
&h.RepoStatus, &h.RepoStatusError)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, ErrNotFound
+98
View File
@@ -0,0 +1,98 @@
package store
import (
"context"
"errors"
"testing"
"time"
)
// TestDeleteHostCascades verifies that DeleteHost removes the host
// row and that every dependent table (schedules, jobs, source groups,
// host_credentials) is wiped via the FK cascade declared in the
// migrations. We also verify the agent bearer is no longer resolvable
// — a re-installed agent must come back through pending-host accept.
func TestDeleteHostCascades(t *testing.T) {
t.Parallel()
s := openTestStore(t)
ctx := context.Background()
hostID := makeSchedHost(t, s)
gid := makeGroup(t, s, hostID, "default", "01HDELGRP000000000000001")
// One job, one schedule, one credential row — enough to prove the
// cascade reaches every dependent table we care about.
if err := s.CreateJob(ctx, Job{
ID: "j-del-1", HostID: hostID, Kind: "backup",
ActorKind: "system", CreatedAt: time.Now().UTC(),
}); err != nil {
t.Fatalf("create job: %v", err)
}
sched := &Schedule{
ID: "01HDELSCHED00000000000001",
HostID: hostID,
CronExpr: "0 3 * * *",
Enabled: true,
SourceGroupIDs: []string{gid},
}
if err := s.CreateSchedule(ctx, sched); err != nil {
t.Fatalf("create schedule: %v", err)
}
if err := s.SetHostCredentials(ctx, hostID, CredKindRepo, "ciphertext"); err != nil {
t.Fatalf("set creds: %v", err)
}
// Sanity: agent bearer resolves before deletion.
if _, err := s.LookupHostByAgentToken(ctx, "tokenhash"); err != nil {
t.Fatalf("pre-delete bearer lookup: %v", err)
}
if err := s.DeleteHost(ctx, hostID); err != nil {
t.Fatalf("DeleteHost: %v", err)
}
if _, err := s.GetHost(ctx, hostID); !errors.Is(err, ErrNotFound) {
t.Errorf("GetHost after delete: want ErrNotFound, got %v", err)
}
if _, err := s.LookupHostByAgentToken(ctx, "tokenhash"); !errors.Is(err, ErrNotFound) {
t.Errorf("bearer lookup after delete: want ErrNotFound, got %v", err)
}
// Cascade smoke-tests via raw counts. We don't own a public
// "list jobs by host" path that filters by host, so go to the DB
// directly with the same connection used by the store helpers.
for _, q := range []struct {
label string
sql string
}{
{"schedules", "SELECT count(*) FROM schedules WHERE host_id = ?"},
{"jobs", "SELECT count(*) FROM jobs WHERE host_id = ?"},
{"source_groups", "SELECT count(*) FROM source_groups WHERE host_id = ?"},
{"host_credentials", "SELECT count(*) FROM host_credentials WHERE host_id = ?"},
{"schedule_source_groups", "SELECT count(*) FROM schedule_source_groups WHERE schedule_id = ?"},
} {
var n int
key := hostID
if q.label == "schedule_source_groups" {
key = "01HDELSCHED00000000000001"
}
if err := s.db.QueryRowContext(ctx, q.sql, key).Scan(&n); err != nil {
t.Fatalf("count %s: %v", q.label, err)
}
if n != 0 {
t.Errorf("cascade left %d rows in %s", n, q.label)
}
}
}
// TestDeleteHostNotFound: a delete against a missing id surfaces
// ErrNotFound so the HTTP layer can 404 instead of 200-ing a no-op.
func TestDeleteHostNotFound(t *testing.T) {
t.Parallel()
s := openTestStore(t)
if err := s.DeleteHost(context.Background(), "01HNOTAHOST00000000000000"); !errors.Is(err, ErrNotFound) {
t.Errorf("missing id: want ErrNotFound, got %v", err)
}
}
@@ -0,0 +1,22 @@
-- 0020_hosts_repo_status.sql
--
-- NS-03: surface repo init / probe state on the host row so the
-- operator sees credential / connectivity failures eagerly rather
-- than discovering them via a missed scheduled backup.
--
-- repo_status:
-- 'unknown' — no probe outcome yet (default for fresh enrolment
-- and for hosts re-binding fresh creds).
-- 'ready' — last init / probe succeeded; repo is reachable
-- with the bound creds.
-- 'init_failed' — last init / probe failed; repo_status_error has
-- the trimmed agent-side error message.
--
-- The init-pending intermediate state is intentionally omitted: a job
-- in flight is already visible on the host detail page via
-- jobs.status, and bridging both surfaces leads to drift. The host
-- column reflects the *outcome* of the last probe.
ALTER TABLE hosts ADD COLUMN repo_status TEXT NOT NULL DEFAULT 'unknown'
CHECK (repo_status IN ('unknown', 'ready', 'init_failed'));
ALTER TABLE hosts ADD COLUMN repo_status_error TEXT NOT NULL DEFAULT '';
+9
View File
@@ -90,6 +90,15 @@ type Host struct {
// Empty = no default configured.
PreHookDefault string
PostHookDefault string
// RepoStatus tracks the outcome of the last init/probe attempt:
// "unknown" (default), "ready", or "init_failed". Set by the WS
// handler on every job.finished of kind=init, and reset to
// "unknown" by repo-credentials saves so the next dispatch
// re-tests the new creds. RepoStatusError carries the trimmed
// agent-side message when RepoStatus == "init_failed".
RepoStatus string
RepoStatusError string
}
// Schedule is now intentionally slim: cron + which groups + enabled.
+12
View File
@@ -366,6 +366,18 @@ Sizes: **S** = under a day, **M** = 13 days, **L** = 37 days.
---
## Next steps from testing
> Bin for issues spotted while exercising a live deployment. Promote
> into a phase once scoped; leave here while still being collected.
- [x] **NS-01** Admin-driven host deletion. ✅ Landed: store `DeleteHost` (FK cascade revokes the agent bearer along with everything else), admin-band `POST /hosts/{id}/delete`, danger-zone form on host detail with hostname-confirm, audit `host.deleted`, live WS connection closed pre-delete. Original scope below for reference. No UI or API surface today — once a host is enrolled the only way to remove it is hand-editing SQLite, which then cascades through schedules/jobs/snapshots/source-groups via the FK chain. Needs: store-level `DeleteHost` + cascade audit, admin-band `DELETE /api/hosts/{id}` and form-post variant, confirm-modal on the host-detail page, audit entry, and a decision on whether to also revoke the agent's bearer (recommend: yes, so a re-installed host comes back through the normal pending-host accept flow).
- [x] **NS-02** Recoverable enrollment-token UX. ✅ Landed: `Store.ListOutstandingEnrollmentTokens` + `DeleteEnrollmentToken`; outstanding-tokens panel on the Add-host page (short hash, redacted repo URL, created/expires) with per-row Regenerate (revokes old hash, mints fresh raw token preserving repo creds + initial paths, 303s to `/hosts/pending/{newToken}`) and Revoke (delete + audit). Audit actions `enrollment_token.regenerated` / `enrollment_token.revoked`. Original scope below. Today `POST /hosts/new` mints a token and 303s to `/hosts/pending/{token}`; if the operator closes that tab the install snippet is lost and there's no UI surface to find it again — the row sits in `enrollment_tokens` until TTL expiry, invisible. Needs: store-level `ListOutstandingEnrollmentTokens` returning `(token_hash, created_at, expires_at, repo_url_redacted, initial_paths, attached_host_id_or_null)`; a small list section on the Add-host page (and/or Settings) showing outstanding tokens with created/expires-in and the redacted repo URL; admin-band `POST /api/enrollment-tokens/{id}/regenerate` (revokes the old hash, mints a fresh raw token, re-uses the original attachments — same pattern as the user-setup-token regenerate flow) and `POST /api/enrollment-tokens/{id}/revoke`. Choose regenerate over "show original token" because we only persist hashes, never raw tokens.
- [x] **NS-03** Auto-init repo on first onboard, surface credential failures eagerly. ✅ Landed: migration 0020 adds `hosts.repo_status` (`unknown`/`ready`/`init_failed`) + `repo_status_error`; WS handler projects every init job's terminal state onto the host row (with idempotent "config file already exists" → ready); creds-save handlers (UI + JSON API) reset status to `unknown` and dispatch a fresh init when the agent is online; new `/hosts/{id}/repo/probe` retry endpoint and a status banner on the repo page. Remainder of original scope below. surface credential failures eagerly. Today the operator types repo URL + creds during Add-host and the credentials are pushed to the agent on connect, but no `restic init`/probe runs until the first scheduled job — so a typo in the password or a wrong URL goes undetected for hours/days, manifesting as a silent missed-backup. Wanted behaviour: when the host completes enrolment (or when an admin saves new repo creds), the server dispatches a one-shot probe job that runs `restic cat config` (cheap, repo-existence + creds-validity in one call). On `Is there already a config file? unable to open config file` → run `restic init`. On success → mark the host's repo as ready. On any other error (network, auth, fingerprint) → surface a panel-level error on the host detail page and audit the failure, leaving the host in an "init pending" state with a "Retry" button. Needs: a new `JobKind` (or piggyback on an existing one) for the probe, server-side state on the host row (`repo_status` enum: `unknown`/`ready`/`init_pending`/`init_failed`), UI panel that shows the state, and clear copy on the Add-host page so the operator knows the save isn't fire-and-forget.
- [x] **NS-04** Dashboard parity with the alerts screen: live refresh, column sorting, filters. ✅ Landed: `/` now parses `q`/`status`/`repo_status`/`tag`/`sort`/`dir` query params (round-trip durable for bookmarks); table is wrapped in an `id="hosts-table"` htmx live-poll matching the alerts cadence (5s, gated on `document.visibilityState` and `localStorage.rm-dashboard-live`); filter row above the table with hostname free-text + status + repo_status selects + tag chips + clear; column headers (Host / OS · arch / Last backup / Repo size / Snapshots) are clickable links that toggle direction on the active column; pure-Go sort+filter pipeline covered by `dashboard_filter_test.go`. Original scope below. live refresh, column sorting, filters. The host list is currently a static render — operators have to reload to see new heartbeats / job state changes. Mirror the alerts pattern (`web/templates/pages/alerts.html` uses `hx-trigger="every 5s [document.visibilityState==='visible' && localStorage.getItem('rm-alerts-live')!=='off']"` plus a Live/Off toggle so background tabs and explicit-off don't burn server cycles). Add: server-side sort on every meaningful column (name, OS, last-backup time, last-backup status, agent online/offline, restic version, tags), and a small filter row above the table — at minimum free-text on hostname, status (online/offline/never-seen), and tag chips. Columns + filter state should round-trip through query string so a bookmarked / shared URL is durable. Re-use the `host_row` partial that already exists so the live-refresh swap is a clean OOB swap, not a full table re-render.
---
## Future / unscheduled
> Items here have a plausible use case but no confirmed need. They live
File diff suppressed because one or more lines are too long
+39
View File
@@ -22,6 +22,45 @@
</div>
{{end}}
{{if $page.OutstandingTokens}}
<div class="mt-7 panel rounded-[7px] px-5 py-4">
<div class="flex items-center justify-between mb-3">
<h3 class="text-[12px] font-semibold uppercase tracking-[0.08em] text-ink-mute">Outstanding install tokens</h3>
<span class="text-[11.5px] text-ink-fade">closed the install snippet tab? regenerate to get a fresh URL</span>
</div>
<table class="w-full text-[12.5px]">
<thead class="text-[11px] uppercase tracking-[0.08em] text-ink-fade">
<tr>
<th class="text-left font-medium pb-2 pr-4">id</th>
<th class="text-left font-medium pb-2 pr-4">repo</th>
<th class="text-left font-medium pb-2 pr-4">created</th>
<th class="text-left font-medium pb-2 pr-4">expires</th>
<th class="pb-2"></th>
</tr>
</thead>
<tbody>
{{range $page.OutstandingTokens}}
<tr class="border-t border-line-soft">
<td class="py-2.5 pr-4 mono text-ink-mute">{{.ShortHash}}…</td>
<td class="py-2.5 pr-4 mono text-ink-mid">{{if .RepoURL}}{{.RepoURL}}{{else}}<span class="text-ink-fade"></span>{{end}}</td>
<td class="py-2.5 pr-4 text-ink-mute">{{.CreatedAt | relTime}}</td>
<td class="py-2.5 pr-4 text-ink-mute">{{.ExpiresAt | relTime}}</td>
<td class="py-2.5 text-right whitespace-nowrap">
<form method="post" action="/hosts/enrollment-tokens/{{.TokenHash}}/regenerate" class="inline">
<button type="submit" class="btn btn-sm">Regenerate</button>
</form>
<form method="post" action="/hosts/enrollment-tokens/{{.TokenHash}}/revoke" class="inline ml-1"
onsubmit="return confirm('Revoke this enrolment token? Any pending install using it will fail.');">
<button type="submit" class="btn btn-sm btn-danger">Revoke</button>
</form>
</td>
</tr>
{{end}}
</tbody>
</table>
</div>
{{end}}
<form method="post" action="/hosts/new" class="grid grid-cols-12 gap-8 mt-7">
<div class="col-span-7 panel rounded-[7px] px-8 py-7">
+1 -1
View File
@@ -141,7 +141,7 @@
</div>
{{else}}
{{range $page.Alerts}}
{{template "alert_row" (dict "Alert" . "HostNames" $page.HostNames "Filter" $page.Filter)}}
{{template "alert_row" (dict "Alert" . "HostNames" $page.HostNames "Usernames" $page.Usernames "Filter" $page.Filter)}}
{{end}}
{{end}}
+64
View File
@@ -0,0 +1,64 @@
{{define "title"}}Welcome · restic-manager{{end}}
{{define "content"}}
{{$page := .Page}}
<div class="flex-1 flex flex-col items-center justify-center px-8 py-12">
<div class="w-[420px]">
<div class="flex justify-center mb-10">
<div class="mono text-base text-ink font-medium tracking-[0.01em]">restic-manager</div>
</div>
<h1 class="text-[22px] font-medium tracking-[-0.005em] text-center">
Create the first administrator
</h1>
<p class="text-pretty text-[13px] text-ink-mute mt-3 leading-[1.6] text-center">
This server has no users yet. The account you create here is the
initial administrator. This page is only available until that
account exists.
</p>
{{if $page.Error}}
<div class="mt-5 px-3 py-2.5 rounded-[5px] text-xs"
style="background: color-mix(in oklch, var(--bad), transparent 88%); border: 1px solid color-mix(in oklch, var(--bad), transparent 70%); color: oklch(0.85 0.10 25);">
{{$page.Error}}
</div>
{{end}}
<form method="post" action="/bootstrap" class="mt-7 space-y-4">
<div>
<label class="field-label" for="bs-username">Username</label>
<input id="bs-username" name="username" type="text"
class="field mono" autocomplete="username" autofocus required
value="{{$page.Username}}" />
</div>
<div>
<label class="field-label" for="bs-pw">Password</label>
<input id="bs-pw" name="password" type="password" class="field"
required minlength="12" autocomplete="new-password" />
</div>
<div>
<label class="field-label" for="bs-pw2">Confirm password</label>
<input id="bs-pw2" name="password_confirm" type="password" class="field"
required minlength="12" autocomplete="new-password" />
</div>
<button type="submit" class="btn btn-primary btn-block btn-lg">
Create administrator
</button>
</form>
<div class="mt-6 pt-5 border-t border-line-soft text-center">
<p class="text-pretty text-xs text-ink-mute leading-[1.65]">
Lost the browser session mid-flow? The bootstrap token is also
printed in the server logs and can be POSTed to
<span class="mono text-ink-mid">/api/bootstrap</span>.
</p>
</div>
</div>
<div class="mt-20 flex gap-3.5 items-center text-[11px] text-ink-fade">
<span class="mono">restic-manager {{.Version}}</span>
</div>
</div>
{{end}}
+58 -7
View File
@@ -121,21 +121,63 @@
{{end}}
{{/* ---------- hosts table ---------- */}}
{{$f := $page.Filter}}
{{$sortURL := $page.SortURL}}
<div class="pt-6 pb-4">
<div class="flex items-center justify-between mb-3">
<div class="flex items-center gap-3">
<h2 class="text-[13px] font-semibold tracking-[0.01em]">Hosts</h2>
<div class="text-xs text-ink-fade">{{$page.ShownCount}} of {{$page.HostCount}}</div>
</div>
<label style="display: inline-flex; align-items: center; gap: 5px; cursor: pointer; font-size: 10px;"
class="text-ink-fade" title="auto-refresh every 5s">
<input type="checkbox" id="dashboard-live-toggle" checked
onchange="localStorage.setItem('rm-dashboard-live', this.checked ? 'on' : 'off'); document.getElementById('dashboard-live-dot').style.opacity = this.checked ? '1' : '0.3';"
style="width: 11px; height: 11px; cursor: pointer; margin: 0;" />
<span>live</span>
<span id="dashboard-live-dot" class="text-accent"></span>
</label>
</div>
{{/* Filter row (NS-04): GET /, every input is a hidden field
for the filters not currently being edited so submit
merges rather than clobbers state. */}}
<form method="get" action="/" class="flex items-center gap-2 mb-3 text-[11.5px] flex-wrap">
<input type="text" name="q" value="{{$f.Search}}" placeholder="search hostname…"
class="field mono"
style="padding: 6px 10px; font-size: 11.5px; width: 220px;">
<select name="status" class="field"
style="padding: 5px 8px; font-size: 11.5px; width: auto;"
onchange="this.form.submit()">
<option value="" {{if eq $f.Status ""}}selected{{end}}>any status</option>
<option value="online" {{if eq $f.Status "online"}}selected{{end}}>online</option>
<option value="offline" {{if eq $f.Status "offline"}}selected{{end}}>offline</option>
<option value="never_seen" {{if eq $f.Status "never_seen"}}selected{{end}}>never seen</option>
</select>
<select name="repo_status" class="field"
style="padding: 5px 8px; font-size: 11.5px; width: auto;"
onchange="this.form.submit()">
<option value="" {{if eq $f.RepoStatus ""}}selected{{end}}>any repo state</option>
<option value="ready" {{if eq $f.RepoStatus "ready"}}selected{{end}}>ready</option>
<option value="init_failed" {{if eq $f.RepoStatus "init_failed"}}selected{{end}}>init failed</option>
<option value="unknown" {{if eq $f.RepoStatus "unknown"}}selected{{end}}>unknown</option>
</select>
{{if $f.Tag}}<input type="hidden" name="tag" value="{{$f.Tag}}">{{end}}
{{if ne $f.Sort "name"}}<input type="hidden" name="sort" value="{{$f.Sort}}">{{end}}
{{if eq $f.Dir "desc"}}<input type="hidden" name="dir" value="desc">{{end}}
<button type="submit" class="btn btn-sm">Apply</button>
{{if or $f.Search $f.Status $f.RepoStatus}}
<a href="/{{if $f.Tag}}?tag={{$f.Tag}}{{end}}" class="text-ink-fade text-[11.5px] mono ml-1">clear</a>
{{end}}
</form>
{{/* Tag chip-row — only renders when at least one tag exists in
the fleet. Active tag is highlighted; clicking the active
tag clears the filter. The "All" pill is shown in the active
state when no tag filter is set. */}}
{{if $page.KnownTags}}
<div class="flex items-center gap-1.5 flex-wrap mb-3 text-[11.5px]">
<span class="text-ink-fade mr-1">filter</span>
<span class="text-ink-fade mr-1">tag</span>
<a href="/" class="tag {{if eq $page.ActiveTag ""}}tag-active{{end}}">All</a>
{{range $page.KnownTags}}
{{$t := .}}
@@ -144,15 +186,24 @@
</div>
{{end}}
<div class="panel rounded-[7px] overflow-hidden">
{{/* Live-poll wrapper (NS-04, mirrors the alerts pattern). hx-get
refetches with the current filter pinned; hx-select grabs only
this same div from the response so the surrounding chrome
doesn't flash. The toggle persists in localStorage so a
refreshed tab honours the operator's previous choice. */}}
<div id="hosts-table" class="panel rounded-[7px] overflow-hidden"
hx-get="{{$page.RefreshURL}}"
hx-trigger="every 5s [document.visibilityState==='visible' && localStorage.getItem('rm-dashboard-live')!=='off']"
hx-select="#hosts-table"
hx-swap="outerHTML">
<div class="host-row head hairline">
<div></div>
<div>Host</div>
<div>OS · arch</div>
<div>Last backup</div>
<div class="text-right">Repo size</div>
<div class="text-right">Snapshots</div>
<div><a href="{{index $sortURL "name"}}" class="text-ink-mid hover:text-ink">Host{{if eq $f.Sort "name"}} {{if eq $f.Dir "desc"}}↓{{else}}↑{{end}}{{end}}</a></div>
<div><a href="{{index $sortURL "os"}}" class="text-ink-mid hover:text-ink">OS · arch{{if eq $f.Sort "os"}} {{if eq $f.Dir "desc"}}↓{{else}}↑{{end}}{{end}}</a></div>
<div><a href="{{index $sortURL "last_backup"}}" class="text-ink-mid hover:text-ink">Last backup{{if eq $f.Sort "last_backup"}} {{if eq $f.Dir "desc"}}↓{{else}}↑{{end}}{{end}}</a></div>
<div class="text-right"><a href="{{index $sortURL "repo_size"}}" class="text-ink-mid hover:text-ink">Repo size{{if eq $f.Sort "repo_size"}} {{if eq $f.Dir "desc"}}↓{{else}}↑{{end}}{{end}}</a></div>
<div class="text-right"><a href="{{index $sortURL "snapshot_count"}}" class="text-ink-mid hover:text-ink">Snapshots{{if eq $f.Sort "snapshot_count"}} {{if eq $f.Dir "desc"}}↓{{else}}↑{{end}}{{end}}</a></div>
<div>Alerts</div>
<div>Tags</div>
<div></div>
+14 -3
View File
@@ -110,10 +110,21 @@
<div class="panel rounded-[7px] px-4 py-3.5">
<div class="text-[11px] text-bad uppercase tracking-[0.1em] font-semibold mb-2.5">Danger zone</div>
<p class="text-pretty text-[12px] text-ink-mute leading-[1.55] mb-3">
Removes the host record. The repo data on the rest-server is left intact —
you delete that yourself.
Removes the host record and everything attached to it
(schedules, source groups, jobs, snapshots metadata, alerts).
The agent's bearer is revoked, so a re-installed instance
comes back through the normal pending-host accept flow.
The repo data on the rest-server is left intact — you delete
that yourself.
</p>
<button class="btn btn-danger w-full justify-center" disabled title="lands later in Phase 1">Remove host…</button>
<form method="post" action="/hosts/{{$host.ID}}/delete"
class="space-y-2"
onsubmit="return confirm('Remove host &quot;{{$host.Name}}&quot;? This cascades to every dependent row and cannot be undone.');">
<input type="text" name="confirm_hostname" required autocomplete="off"
placeholder="type hostname to confirm"
class="field mono text-[12px]" />
<button type="submit" class="btn btn-danger w-full justify-center">Remove host…</button>
</form>
</div>
</aside>
+26 -1
View File
@@ -8,6 +8,31 @@
<div class="col-span-8">
{{/* ---------- Repo status (NS-03) ---------- */}}
{{if eq $host.RepoStatus "init_failed"}}
<div class="rounded-[7px] px-4 py-3.5 mb-5"
style="border: 1px solid color-mix(in oklch, var(--bad), transparent 55%); background: color-mix(in oklch, var(--bad), transparent 90%);">
<div class="flex items-center justify-between gap-3 mb-1.5">
<div class="text-[12.5px] font-semibold text-bad uppercase tracking-[0.08em]">Repo unreachable</div>
<form method="post" action="/hosts/{{$host.ID}}/repo/probe">
<button type="submit" class="btn btn-sm"
{{if $page.Online}}{{else}}disabled title="host is offline"{{end}}>Retry probe</button>
</form>
</div>
<div class="text-[12.5px] text-ink-mid leading-[1.55]">
The last init / probe against this host's repo failed. Fix the
credentials below and save (the save kicks a fresh probe), or
click <span class="mono">Retry probe</span> if you've changed
something out-of-band.
</div>
{{if $host.RepoStatusError}}
<pre class="mono text-[11.5px] text-ink-mid mt-2.5 whitespace-pre-wrap leading-[1.5]">{{$host.RepoStatusError}}</pre>
{{end}}
</div>
{{else if eq $host.RepoStatus "ready"}}
<div class="text-[12px] text-ok mono mb-5">✓ repo reachable with current credentials</div>
{{end}}
{{/* ---------- Connection ---------- */}}
<h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mb-3.5">Connection</h2>
<form method="post" action="/hosts/{{$host.ID}}/repo/credentials" class="panel rounded-[7px] p-5">
@@ -269,7 +294,7 @@
onsubmit="return confirm('Re-initialise the repo on host &quot;{{$host.Name}}&quot;? Existing snapshots are lost if the rest-server allows the wipe; restic refuses if it sees a config file already there.');">
<input type="text" name="confirm_hostname" required autocomplete="off"
placeholder="type hostname to confirm"
class="input mono"
class="field mono"
style="width: 240px; height: 30px; padding: 0 8px; font-size: 12px;">
<button type="submit" class="btn btn-danger btn-lg whitespace-nowrap"
{{if eq $host.Status "online"}}{{else}}disabled title="host is offline"{{end}}>Re-init repo…</button>
+3 -4
View File
@@ -175,12 +175,11 @@
<input type="text" name="target_dir" id="target-dir-input"
class="field mono text-[12px] flex-1"
value="{{if $page.FormTargetDir}}{{$page.FormTargetDir}}{{else}}{{$page.DefaultTargetDir}}{{end}}"
placeholder="$HOME/rm-restore/&lt;job-id&gt;/" />
placeholder="/root/rm-restore/&lt;job-id&gt;/" />
</div>
<div class="text-[11.5px] text-ink-fade mt-1.5">
<span class="mono">$HOME</span> resolves to the agent user's home;
<span class="mono">&lt;job-id&gt;</span> is substituted on dispatch.
Edit if you want a specific directory.
Edit if you want a specific directory
(<span class="mono">&lt;job-id&gt;</span> is substituted).
</div>
</div>
</div>
+7 -1
View File
@@ -1,6 +1,7 @@
{{define "alert_row"}}
{{$a := .Alert}}
{{$hostNames := .HostNames}}
{{$usernames := .Usernames}}
{{$filter := .Filter}}
{{$status := alertStatus $a.ResolvedAt $a.AcknowledgedAt}}
@@ -81,7 +82,12 @@
</form>
{{else if eq $status "acknowledged"}}
<span class="text-ink-fade" style="font-size: 11px;">
ack'd{{if $a.AcknowledgedBy}} by {{deref $a.AcknowledgedBy}}{{end}} · {{relTime $a.AcknowledgedAt}}
{{$ackedBy := ""}}
{{if $a.AcknowledgedBy}}
{{$id := deref $a.AcknowledgedBy}}
{{if index $usernames $id}}{{$ackedBy = index $usernames $id}}{{else}}{{$ackedBy = $id}}{{end}}
{{end}}
ack'd{{if $ackedBy}} by {{$ackedBy}}{{end}} · {{relTime $a.AcknowledgedAt}}
</span>
<form method="post" action="/alerts/{{$a.ID}}/resolve">
{{if $qs}}<input type="hidden" name="qs" value="{{$qs}}">{{end}}