test: write-then-rename script-bin helpers (avoid ETXTBSY under -race)

CI run #48 failed with: --- FAIL: TestRunInitShipsStartedAndFinished RunInit: ... fork/exec /tmp/.../restic: text file busy setupScript and setupScriptBin used os.WriteFile to write a shell script directly at the final path, then exec'd it. Under -race + many t.Parallel tests, a fork-from-another-goroutine could inherit the still-open writable fd from one of those WriteFile calls; the kernel returns ETXTBSY when the freshly-execed binary still has a writable fd anywhere on the system. Fix: write to "<path>.tmp", then os.Rename into place. The rename is a pure dirent op; by the time the final path exists, no process has a writable fd on its inode and exec is safe. -race + -count=5 on both runner packages now passes consistently.
api+agent: document protocol-version stability and forget back-compat decisions
2026-05-04 10:19:15 +01:00 · 2026-05-04 10:19:15 +01:00 · 2026-05-04 10:19:15 +01:00 · 2026-05-04 10:19:15 +01:00 · 2026-05-04 10:19:15 +01:00 · 2026-05-04 10:19:15 +01:00
54 changed files with 7910 additions and 659 deletions
@@ -3,10 +3,11 @@
 # Notes for anyone editing this file:
 #
 # Self-hosted runner expectations
-#   The Gitea runners are provisioned via scripts/provision-gitea-runner.sh.
+#   The Gitea runners are provisioned out-of-band (the infra team owns
-#   That script bind-mounts persistent host volumes for /root/go/pkg/mod
+#   the script). Each runner host bind-mounts persistent volumes for
-#   (GOMODCACHE), /root/.cache/go-build (GOCACHE), and /root/.cache/act
+#   /root/go/pkg/mod (GOMODCACHE), /root/.cache/go-build (GOCACHE), and
-#   (action clones) into every job container. As a result:
+#   /root/.cache/act (action clones) into every job container. As a
 #   result:
 #     * `cache: true` on actions/setup-go is intentionally OMITTED — the
 #       action would otherwise tar/untar GOMODCACHE+GOCACHE through the
 #       Gitea cache backend on every job, undoing the host-volume cache
@@ -2,13 +2,13 @@ package main
 import (
 	"context"
 	"encoding/json"
 	"errors"
 	"flag"
 	"fmt"
 	"log/slog"
 	"os"
 	"os/signal"
 	"strconv"
 	"syscall"
 	"time"
@@ -199,32 +199,68 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
 	case api.MsgConfigUpdate:
 		var p api.ConfigUpdatePayload
 		_ = env.UnmarshalPayload(&p)
-		// Merge with whatever's already in secrets.enc — empty fields
+		slot := p.Slot
-		// in the push mean "leave alone." Atomic write underneath.
+		if slot == "" {
-		cur, err := d.secrets.Load()
+			slot = "repo"
 		if err != nil {
 			slog.Error("ws agent: load secrets for merge", "err", err)
 			return nil
 		}
-		changed := false
+		switch slot {
-		if p.RepoURL != "" && p.RepoURL != cur.URL {
+		case "repo":
-			cur.URL = p.RepoURL
+			// Merge with whatever's already in secrets.enc — empty fields
-			changed = true
+			// in the push mean "leave alone." Atomic write underneath.
-		}
+			cur, err := d.secrets.Load()
-		if p.RepoUsername != "" && p.RepoUsername != cur.Username {
+			if err != nil {
-			cur.Username = p.RepoUsername
+				slog.Error("ws agent: load secrets for merge", "err", err)
 			changed = true
 		}
 		if p.RepoPassword != "" && p.RepoPassword != cur.Password {
 			cur.Password = p.RepoPassword
 			changed = true
 		}
 		if changed {
 			if err := d.secrets.Save(cur); err != nil {
 				slog.Error("ws agent: persist secrets", "err", err)
 				return nil
 			}
-			slog.Info("ws agent: repo credentials updated via config.update")
+			changed := false
 			if p.RepoURL != "" && p.RepoURL != cur.URL {
 				cur.URL = p.RepoURL
 				changed = true
 			}
 			if p.RepoUsername != "" && p.RepoUsername != cur.Username {
 				cur.Username = p.RepoUsername
 				changed = true
 			}
 			if p.RepoPassword != "" && p.RepoPassword != cur.Password {
 				cur.Password = p.RepoPassword
 				changed = true
 			}
 			if changed {
 				if err := d.secrets.Save(cur); err != nil {
 					slog.Error("ws agent: persist secrets", "err", err)
 					return nil
 				}
 				slog.Info("ws agent: repo credentials updated via config.update")
 			}
 		case "admin":
 			cur, err := d.secrets.LoadAdmin()
 			if err != nil && !errors.Is(err, secrets.ErrNoAdmin) {
 				slog.Error("ws agent: load admin secrets", "err", err)
 				return nil
 			}
 			// ErrNoAdmin is not an error here — we are creating the slot.
 			changed := false
 			if p.RepoURL != "" && p.RepoURL != cur.URL {
 				cur.URL = p.RepoURL
 				changed = true
 			}
 			if p.RepoUsername != "" && p.RepoUsername != cur.Username {
 				cur.Username = p.RepoUsername
 				changed = true
 			}
 			if p.RepoPassword != "" && p.RepoPassword != cur.Password {
 				cur.Password = p.RepoPassword
 				changed = true
 			}
 			if changed {
 				if err := d.secrets.SaveAdmin(cur); err != nil {
 					slog.Error("ws agent: persist admin secrets", "err", err)
 					return nil
 				}
 				slog.Info("ws agent: admin credentials updated via config.update")
 			}
 		default:
 			slog.Warn("ws agent: unknown config.update slot, ignoring", "slot", p.Slot)
 		}
 	case api.MsgAgentUpdateAvail:
@@ -251,6 +287,14 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
 	if creds.Empty() {
 		return fmt.Errorf("repo credentials not configured (waiting for server config.update push)")
 	}
 	// r is the everyday runner — bound to the host's repo
 	// (append-only) credentials. Reused by every kind except
 	// JobPrune, which builds its own runner against the
 	// admin-credentials slot when p.RequiresAdminCreds is set
 	// (admin creds are not loaded for any other kind, so they're
 	// not on r). If you find yourself adding a new JobKind that
 	// needs delete authority, mirror the JobPrune pattern below
 	// — don't try to overload r.
 	r := runner.New(runner.Config{
 		ResticBin:    d.resticBin,
 		RepoURL:      creds.URL,
@@ -291,33 +335,81 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
 			slog.Info("agent: init job complete", "job_id", p.JobID)
 		}()
 	case api.JobForget:
-		var policy restic.ForgetPolicy
+		if len(p.ForgetGroups) == 0 {
-		if len(p.RetentionPolicy) > 0 {
+			// Hard-error rather than fall back to a single-policy form:
-			var raw struct {
+			// the server-side dispatch path (maintenance ticker) is the
-				KeepLast    *int `json:"keep_last,omitempty"`
+			// only writer of forget command.run today, and it always
-				KeepHourly  *int `json:"keep_hourly,omitempty"`
+			// populates ForgetGroups. A backwards-compatible single-
-				KeepDaily   *int `json:"keep_daily,omitempty"`
+			// policy fallback was specced but skipped — see the
-				KeepWeekly  *int `json:"keep_weekly,omitempty"`
+			// Phase 5 plan rationale and version.go's lockstep-deploy
-				KeepMonthly *int `json:"keep_monthly,omitempty"`
+			// note for why.
-				KeepYearly  *int `json:"keep_yearly,omitempty"`
+			return fmt.Errorf("forget: command.run carried no forget_groups (server didn't populate them)")
 			}
 			if err := json.Unmarshal(p.RetentionPolicy, &raw); err != nil {
 				return fmt.Errorf("forget: decode retention_policy: %w", err)
 			}
 			policy = restic.ForgetPolicy{
 				KeepLast: raw.KeepLast, KeepHourly: raw.KeepHourly,
 				KeepDaily: raw.KeepDaily, KeepWeekly: raw.KeepWeekly,
 				KeepMonthly: raw.KeepMonthly, KeepYearly: raw.KeepYearly,
 			}
 		}
-		slog.Info("agent: accepting forget job", "job_id", p.JobID, "policy", p.RetentionPolicy)
+		groups := make([]restic.ForgetGroup, 0, len(p.ForgetGroups))
 		for _, g := range p.ForgetGroups {
 			groups = append(groups, restic.ForgetGroup{
 				Tag: g.Tag,
 				Policy: restic.ForgetPolicy{
 					KeepLast:    g.Policy.KeepLast,
 					KeepHourly:  g.Policy.KeepHourly,
 					KeepDaily:   g.Policy.KeepDaily,
 					KeepWeekly:  g.Policy.KeepWeekly,
 					KeepMonthly: g.Policy.KeepMonthly,
 					KeepYearly:  g.Policy.KeepYearly,
 				},
 			})
 		}
 		slog.Info("agent: accepting forget job", "job_id", p.JobID, "groups", len(groups))
 		go func() {
-			if err := r.RunForget(ctx, p.JobID, policy); err != nil {
+			if err := r.RunForget(ctx, p.JobID, groups); err != nil {
 				slog.Warn("agent: forget job failed", "job_id", p.JobID, "err", err)
 				return
 			}
 			slog.Info("agent: forget job complete", "job_id", p.JobID)
 		}()
 	case api.JobPrune:
 		// Prune may require admin creds (delete authority on rest-server).
 		runCreds := creds
 		if p.RequiresAdminCreds {
 			ac, err := d.secrets.LoadAdmin()
 			if err != nil {
 				return fmt.Errorf("prune: admin creds not configured (server didn't push them): %w", err)
 			}
 			if ac.Empty() {
 				return fmt.Errorf("prune: admin creds incomplete")
 			}
 			runCreds = ac
 		}
 		prr := runner.New(runner.Config{
 			ResticBin:    d.resticBin,
 			RepoURL:      runCreds.URL,
 			RepoUsername: runCreds.Username,
 			RepoPassword: runCreds.Password,
 		}, tx, time.Second)
 		slog.Info("agent: accepting prune job", "job_id", p.JobID, "admin_creds", p.RequiresAdminCreds)
 		go func() {
 			if err := prr.RunPrune(ctx, p.JobID); err != nil {
 				slog.Warn("agent: prune job failed", "job_id", p.JobID, "err", err)
 			}
 		}()
 	case api.JobCheck:
 		subset := 0
 		if len(p.Args) > 0 {
 			subset, _ = strconv.Atoi(p.Args[0])
 		}
 		slog.Info("agent: accepting check job", "job_id", p.JobID, "subset_pct", subset)
 		go func() {
 			if err := r.RunCheck(ctx, p.JobID, subset); err != nil {
 				slog.Warn("agent: check job failed", "job_id", p.JobID, "err", err)
 			}
 		}()
 	case api.JobUnlock:
 		slog.Info("agent: accepting unlock job", "job_id", p.JobID)
 		go func() {
 			if err := r.RunUnlock(ctx, p.JobID); err != nil {
 				slog.Warn("agent: unlock job failed", "job_id", p.JobID, "err", err)
 			}
 		}()
 	default:
 		return fmt.Errorf("kind %q not implemented yet (Phase 2 lands the rest)", p.Kind)
 	}
@@ -16,6 +16,7 @@ import (
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
 	rmhttp "gitea.dcglab.co.uk/steve/restic-manager/internal/server/http"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/maintenance"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
@@ -139,6 +140,23 @@ func run() error {
 	defer purgeTick.Stop()
 	offlineTick := time.NewTicker(30 * time.Second)
 	defer offlineTick.Stop()
 	// Maintenance ticker: drives forget/prune/check on the cadences
 	// operators set per-host. Independent of the agent's local cron
 	// (which only handles backup schedules). 60s cadence — the cron
 	// expressions are minute-grained, so anything finer is wasted
 	// work.
 	maintenanceTick := time.NewTicker(60 * time.Second)
 	defer maintenanceTick.Stop()
 	// Pending-runs drain ticker: 30s cadence sweeps every host with
 	// pending_runs rows whose next_attempt_at <= now (rows accumulate
 	// when a schedule.fire's command.run send fails because the agent
 	// dropped offline mid-flight). The on-reconnect path in
 	// onAgentHello handles the common case; this ticker is the
 	// safety-net for hosts that come back without a fresh hello (they
 	// shouldn't, but the queue exists either way).
 	pendingDrainTick := time.NewTicker(30 * time.Second)
 	defer pendingDrainTick.Stop()
 	mt := maintenance.New(st)
 	go func() {
 		for {
 			select {
@@ -156,6 +174,18 @@ func run() error {
 				if n, err := st.MarkHostsOfflineStale(ctx, cutoff); err == nil && n > 0 {
 					slog.Info("marked hosts offline (stale heartbeat)", "n", n)
 				}
 			case <-pendingDrainTick.C:
 				srv.DrainAllDue(ctx)
 			case <-maintenanceTick.C:
 				decisions, err := mt.Decide(ctx, time.Now().UTC())
 				if err != nil {
 					slog.Warn("maintenance ticker: decide", "err", err)
 					continue
 				}
 				if len(decisions) > 0 {
 					slog.Info("maintenance ticker: dispatching", "n", len(decisions))
 					srv.DispatchMaintenance(ctx, decisions)
 				}
 			}
 		}
 	}()
@@ -51,24 +51,70 @@ func New(cfg Config, tx Sender, progressMinPeriod time.Duration) *Runner {
 	return &Runner{cfg: cfg, tx: tx, progressMinPeriod: progressMinPeriod}
 }
-// RunBackup executes a backup job and reports back via the sender.
+// resticEnv builds the shared restic.Env from r.cfg.
-// Returns nil on a clean (or "incomplete-but-snapshot-created") finish.
+func (r *Runner) resticEnv() restic.Env {
-func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, tags []string) error {
+	return restic.Env{
 	startedAt := time.Now().UTC()
 	startEnv, _ := api.Marshal(api.MsgJobStarted, jobID, api.JobStartedPayload{
 		JobID: jobID, Kind: api.JobBackup, StartedAt: startedAt,
 	})
 	if err := r.tx.Send(startEnv); err != nil {
 		slog.Warn("runner: send job.started", "err", err)
 	}
 	env := restic.Env{
 		Bin:          r.cfg.ResticBin,
 		RepoURL:      r.cfg.RepoURL,
 		RepoUsername: r.cfg.RepoUsername,
 		RepoPassword: r.cfg.RepoPassword,
 	}
 }
 // sendStarted ships a job.started envelope.
 func (r *Runner) sendStarted(jobID string, kind api.JobKind, startedAt time.Time) {
 	env, _ := api.Marshal(api.MsgJobStarted, jobID, api.JobStartedPayload{
 		JobID: jobID, Kind: kind, StartedAt: startedAt,
 	})
 	if err := r.tx.Send(env); err != nil {
 		slog.Warn("runner: send job.started", "job_id", jobID, "kind", kind, "err", err)
 	}
 }
 // streamHandler returns a LineHandler that ships log.stream envelopes.
 func (r *Runner) streamHandler(jobID string, seq *atomic.Int64) restic.LineHandler {
 	return func(stream string, line string, _ any) {
 		now := time.Now().UTC()
 		logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
 			JobID:   jobID,
 			Seq:     seq.Add(1),
 			TS:      now,
 			Stream:  api.LogStream(stream),
 			Payload: line,
 		})
 		_ = r.tx.Send(logEnv)
 	}
 }
 // sendFinished ships a job.finished envelope. err==nil → succeeded;
 // otherwise failed. statsBlob is forwarded as JobFinishedPayload.Stats.
 func (r *Runner) sendFinished(jobID string, finishedAt time.Time, err error, statsBlob json.RawMessage) {
 	status := api.JobSucceeded
 	exit := 0
 	errMsg := ""
 	if err != nil {
 		status = api.JobFailed
 		exit = -1
 		errMsg = err.Error()
 	}
 	finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
 		JobID:      jobID,
 		Status:     status,
 		ExitCode:   exit,
 		FinishedAt: finishedAt,
 		Stats:      statsBlob,
 		Error:      errMsg,
 	})
 	_ = r.tx.Send(finEnv)
 }
 // RunBackup executes a backup job and reports back via the sender.
 // Returns nil on a clean (or "incomplete-but-snapshot-created") finish.
 func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, tags []string) error {
 	startedAt := time.Now().UTC()
 	r.sendStarted(jobID, api.JobBackup, startedAt)
 	env := r.resticEnv()
 	var seq atomic.Int64
 	lastProgress := time.Now()
@@ -115,27 +161,11 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
 	summary, err := env.RunBackup(ctx, paths, excludes, tags, handle)
 	finishedAt := time.Now().UTC()
 	status := api.JobSucceeded
 	exit := 0
 	errMsg := ""
 	if err != nil {
 		status = api.JobFailed
 		exit = -1
 		errMsg = err.Error()
 	}
 	var statsBlob json.RawMessage
 	if summary != nil {
 		statsBlob, _ = json.Marshal(summary)
 	}
-	finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
+	r.sendFinished(jobID, finishedAt, err, statsBlob)
 		JobID:      jobID,
 		Status:     status,
 		ExitCode:   exit,
 		FinishedAt: finishedAt,
 		Stats:      statsBlob,
 		Error:      errMsg,
 	})
 	_ = r.tx.Send(finEnv)
 	// On a successful backup, refresh the server's snapshot projection.
 	// We do this *after* job.finished so the UI sees the job land first;
@@ -147,6 +177,9 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
 		if rerr := r.reportSnapshots(ctx, env); rerr != nil {
 			slog.Warn("runner: snapshots.report failed", "job_id", jobID, "err", rerr)
 		}
 		if rerr := r.reportStats(ctx, env, api.RepoStatsPayload{}); rerr != nil {
 			slog.Warn("runner: stats.report after backup failed", "job_id", jobID, "err", rerr)
 		}
 	}
 	if err != nil {
@@ -160,111 +193,35 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
 // browser-side log viewer just works.
 func (r *Runner) RunInit(ctx context.Context, jobID string) error {
 	startedAt := time.Now().UTC()
-	startEnv, _ := api.Marshal(api.MsgJobStarted, jobID, api.JobStartedPayload{
+	r.sendStarted(jobID, api.JobInit, startedAt)
 		JobID: jobID, Kind: api.JobInit, StartedAt: startedAt,
 	})
 	if err := r.tx.Send(startEnv); err != nil {
 		slog.Warn("runner: send job.started (init)", "err", err)
 	}
 	env := restic.Env{
 		Bin:          r.cfg.ResticBin,
 		RepoURL:      r.cfg.RepoURL,
 		RepoUsername: r.cfg.RepoUsername,
 		RepoPassword: r.cfg.RepoPassword,
 	}
 	env := r.resticEnv()
 	var seq atomic.Int64
-	handle := func(stream string, line string, _ any) {
+	err := env.RunInit(ctx, r.streamHandler(jobID, &seq))
 		now := time.Now().UTC()
 		logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
 			JobID:   jobID,
 			Seq:     seq.Add(1),
 			TS:      now,
 			Stream:  api.LogStream(stream),
 			Payload: line,
 		})
 		_ = r.tx.Send(logEnv)
 	}
 	err := env.RunInit(ctx, handle)
 	finishedAt := time.Now().UTC()
-
+	r.sendFinished(jobID, finishedAt, err, nil)
 	status := api.JobSucceeded
 	exit := 0
 	errMsg := ""
 	if err != nil {
 		status = api.JobFailed
 		exit = -1
 		errMsg = err.Error()
 	}
 	finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
 		JobID:      jobID,
 		Status:     status,
 		ExitCode:   exit,
 		FinishedAt: finishedAt,
 		Error:      errMsg,
 	})
 	_ = r.tx.Send(finEnv)
 	if err != nil {
 		return fmt.Errorf("runner init: %w", err)
 	}
 	return nil
 }
-// RunForget executes a forget job against the configured repo with
+// RunForget executes a forget job against the configured repo by
-// the given retention policy. Same envelope shape as RunBackup so
+// invoking `restic forget --tag <Tag> --keep-* …` once per group.
-// the live log viewer + job lifecycle work without special-casing.
+// Same envelope shape as RunBackup so the live log viewer + job
-// On success refreshes the snapshot projection (forget rewrites the
+// lifecycle work without special-casing. On success refreshes the
-// snapshot index — the host's snapshot list shrinks).
+// snapshot projection (forget rewrites the snapshot index — the
-func (r *Runner) RunForget(ctx context.Context, jobID string, policy restic.ForgetPolicy) error {
+// host's snapshot list shrinks). Snapshot refresh runs once after
 // every group completes, not per-group.
 func (r *Runner) RunForget(ctx context.Context, jobID string, groups []restic.ForgetGroup) error {
 	startedAt := time.Now().UTC()
-	startEnv, _ := api.Marshal(api.MsgJobStarted, jobID, api.JobStartedPayload{
+	r.sendStarted(jobID, api.JobForget, startedAt)
 		JobID: jobID, Kind: api.JobForget, StartedAt: startedAt,
 	})
 	if err := r.tx.Send(startEnv); err != nil {
 		slog.Warn("runner: send job.started (forget)", "err", err)
 	}
 	env := restic.Env{
 		Bin:          r.cfg.ResticBin,
 		RepoURL:      r.cfg.RepoURL,
 		RepoUsername: r.cfg.RepoUsername,
 		RepoPassword: r.cfg.RepoPassword,
 	}
 	env := r.resticEnv()
 	var seq atomic.Int64
-	handle := func(stream string, line string, _ any) {
+	err := env.RunForget(ctx, groups, r.streamHandler(jobID, &seq))
 		now := time.Now().UTC()
 		logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
 			JobID:   jobID,
 			Seq:     seq.Add(1),
 			TS:      now,
 			Stream:  api.LogStream(stream),
 			Payload: line,
 		})
 		_ = r.tx.Send(logEnv)
 	}
 	err := env.RunForget(ctx, policy, handle)
 	finishedAt := time.Now().UTC()
-
+	r.sendFinished(jobID, finishedAt, err, nil)
 	status := api.JobSucceeded
 	exit := 0
 	errMsg := ""
 	if err != nil {
 		status = api.JobFailed
 		exit = -1
 		errMsg = err.Error()
 	}
 	finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
 		JobID:      jobID,
 		Status:     status,
 		ExitCode:   exit,
 		FinishedAt: finishedAt,
 		Error:      errMsg,
 	})
 	_ = r.tx.Send(finEnv)
 	// Refresh the server's snapshot projection — forget rewrites the
 	// index so the host's snapshot list almost certainly shrunk.
@@ -281,6 +238,129 @@ func (r *Runner) RunForget(ctx context.Context, jobID string, policy restic.Forg
 	return nil
 }
 // RunPrune executes a prune job against the configured repo. On
 // success it ships a repo.stats envelope with LastPruneAt set (plus
 // a full size refresh via RunStats) before the job.finished envelope,
 // so the UI can display updated size information alongside the
 // completed job. On failure no stats refresh is attempted.
 func (r *Runner) RunPrune(ctx context.Context, jobID string) error {
 	startedAt := time.Now().UTC()
 	r.sendStarted(jobID, api.JobPrune, startedAt)
 	env := r.resticEnv()
 	var seq atomic.Int64
 	err := env.RunPrune(ctx, r.streamHandler(jobID, &seq))
 	finishedAt := time.Now().UTC()
 	if err == nil {
 		pruneAt := finishedAt
 		if rerr := r.reportStats(ctx, env, api.RepoStatsPayload{LastPruneAt: &pruneAt}); rerr != nil {
 			slog.Warn("runner: stats.report after prune failed", "job_id", jobID, "err", rerr)
 		}
 	}
 	r.sendFinished(jobID, finishedAt, err, nil)
 	if err != nil {
 		return fmt.Errorf("runner prune: %w", err)
 	}
 	return nil
 }
 // RunCheck executes a `restic check` job. Always ships a repo.stats
 // envelope (success or failure) with LastCheckAt, LastCheckStatus,
 // and LockPresent populated from the check result.
 func (r *Runner) RunCheck(ctx context.Context, jobID string, subsetPct int) error {
 	startedAt := time.Now().UTC()
 	r.sendStarted(jobID, api.JobCheck, startedAt)
 	env := r.resticEnv()
 	var seq atomic.Int64
 	res, err := env.RunCheck(ctx, subsetPct, r.streamHandler(jobID, &seq))
 	finishedAt := time.Now().UTC()
 	// Determine check status string.
 	checkStatus := "ok"
 	if err != nil {
 		checkStatus = "failed"
 	} else if res.ErrorsFound {
 		checkStatus = "errors_found"
 	}
 	lockPresent := res.LockPresent
 	now := finishedAt
 	patch := api.RepoStatsPayload{
 		LastCheckAt:     &now,
 		LastCheckStatus: checkStatus,
 		LockPresent:     &lockPresent,
 	}
 	if rerr := r.reportStats(ctx, env, patch); rerr != nil {
 		slog.Warn("runner: stats.report after check failed", "job_id", jobID, "err", rerr)
 	}
 	r.sendFinished(jobID, finishedAt, err, nil)
 	if err != nil {
 		return fmt.Errorf("runner check: %w", err)
 	}
 	return nil
 }
 // RunUnlock executes a `restic unlock` job. On success it ships a
 // repo.stats envelope with LockPresent=false so the UI banner clears.
 func (r *Runner) RunUnlock(ctx context.Context, jobID string) error {
 	startedAt := time.Now().UTC()
 	r.sendStarted(jobID, api.JobUnlock, startedAt)
 	env := r.resticEnv()
 	var seq atomic.Int64
 	err := env.RunUnlock(ctx, r.streamHandler(jobID, &seq))
 	finishedAt := time.Now().UTC()
 	if err == nil {
 		lockFalse := false
 		patch := api.RepoStatsPayload{LockPresent: &lockFalse}
 		if rerr := r.reportStats(ctx, env, patch); rerr != nil {
 			slog.Warn("runner: stats.report after unlock failed", "job_id", jobID, "err", rerr)
 		}
 	}
 	r.sendFinished(jobID, finishedAt, err, nil)
 	if err != nil {
 		return fmt.Errorf("runner unlock: %w", err)
 	}
 	return nil
 }
 // reportStats ships a repo.stats envelope. If the patch doesn't
 // already include size fields, fills them in by invoking env.RunStats.
 // Errors from RunStats are non-fatal — the patch is shipped anyway
 // with whatever the caller did populate.
 func (r *Runner) reportStats(ctx context.Context, env restic.Env, patch api.RepoStatsPayload) error {
 	listCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
 	defer cancel()
 	if patch.TotalSizeBytes == nil {
 		if s, err := env.RunStats(listCtx, nil); err == nil {
 			total := s.TotalSize
 			raw := s.TotalUncompressed
 			files := s.TotalFileCount
 			snaps := s.SnapshotsCount
 			patch.TotalSizeBytes = &total
 			patch.RawSizeBytes = &raw
 			patch.UniqueFiles = &files
 			patch.SnapshotCount = &snaps
 		} else {
 			slog.Debug("runner: stats refresh failed (non-fatal)", "err", err)
 		}
 	}
 	envOut, err := api.Marshal(api.MsgRepoStats, "", patch)
 	if err != nil {
 		return err
 	}
 	return r.tx.Send(envOut)
 }
 // reportSnapshots calls `restic snapshots --json`, translates the
 // payload into the wire shape, and ships it as a snapshots.report
 // envelope. Bounded by a separate timeout so a sluggish repo doesn't
@@ -0,0 +1,357 @@
 package runner
 import (
 	"context"
 	"os"
 	"path/filepath"
 	"testing"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
 )
 // fakeSender collects sent envelopes for assertions.
 type fakeSender struct{ envs []api.Envelope }
 func (s *fakeSender) Send(e api.Envelope) error {
 	s.envs = append(s.envs, e)
 	return nil
 }
 // setupScript writes a shell script (without shebang) to a temp dir,
 // names it "restic", makes it executable, and returns the path.
 //
 // Writes to "<path>.tmp" then renames into place. The rename is what
 // makes this race-free: under -race + many t.Parallel tests, a
 // fork-from-another-goroutine can inherit the writable fd from
 // os.WriteFile before close completes, and exec'ing the file then
 // returns ETXTBSY ("text file busy"). Once the rename lands, the
 // final path is a fresh dirent pointing at an inode that has no
 // writable fd open anywhere — exec is safe.
 func setupScript(t *testing.T, body string) string {
 	t.Helper()
 	dir := t.TempDir()
 	final := filepath.Join(dir, "restic")
 	tmp := final + ".tmp"
 	if err := os.WriteFile(tmp, []byte("#!/bin/sh\n"+body+"\n"), 0o755); err != nil {
 		t.Fatalf("setupScript: write tmp: %v", err)
 	}
 	if err := os.Rename(tmp, final); err != nil {
 		t.Fatalf("setupScript: rename: %v", err)
 	}
 	return final
 }
 // firstEnvOfType returns the first envelope with the given type, or
 // fails the test if none is found.
 func firstEnvOfType(t *testing.T, envs []api.Envelope, mt api.MessageType) api.Envelope {
 	t.Helper()
 	for _, e := range envs {
 		if e.Type == mt {
 			return e
 		}
 	}
 	t.Fatalf("no envelope of type %q found in %d envelopes", mt, len(envs))
 	return api.Envelope{}
 }
 // envelopeOrder returns the message types of all sent envelopes.
 func envelopeOrder(envs []api.Envelope) []api.MessageType {
 	out := make([]api.MessageType, len(envs))
 	for i, e := range envs {
 		out[i] = e.Type
 	}
 	return out
 }
 // TestRunPruneShipsExpectedEnvelopes drives RunPrune with a fake
 // binary that prints "prune" on stdout (for the log.stream envelope)
 // and emits valid stats JSON so reportStats can populate size fields.
 // Expected sequence: job.started → log.stream → repo.stats → job.finished.
 func TestRunPruneShipsExpectedEnvelopes(t *testing.T) {
 	t.Parallel()
 	// The fake "restic" handles both "prune" and "stats --json" calls.
 	statsJSON := `{"total_size":1000,"total_uncompressed_size":2000,"snapshots_count":3,"total_file_count":10}`
 	bin := setupScript(t, `
 case "$1" in
  prune)  echo "prune" ;;
  stats)  echo '`+statsJSON+`' ;;
  *)      echo "unknown: $*" ;;
 esac
 `)
 	tx := &fakeSender{}
 	r := New(Config{ResticBin: bin}, tx, 0)
 	if err := r.RunPrune(context.Background(), "job-1"); err != nil {
 		t.Fatalf("RunPrune: %v", err)
 	}
 	order := envelopeOrder(tx.envs)
 	// Confirm landmark envelope types appear in the required order.
 	wantTypes := []api.MessageType{api.MsgJobStarted, api.MsgLogStream, api.MsgRepoStats, api.MsgJobFinished}
 	positions := map[api.MessageType]int{}
 	for i, mt := range order {
 		if _, seen := positions[mt]; !seen {
 			positions[mt] = i
 		}
 	}
 	for i := 0; i < len(wantTypes)-1; i++ {
 		a, b := wantTypes[i], wantTypes[i+1]
 		pa, aOK := positions[a]
 		pb, bOK := positions[b]
 		if !aOK {
 			t.Errorf("envelope type %q not found in output %v", a, order)
 			continue
 		}
 		if !bOK {
 			t.Errorf("envelope type %q not found in output %v", b, order)
 			continue
 		}
 		if pa >= pb {
 			t.Errorf("expected %q before %q but positions are %d >= %d (order: %v)", a, b, pa, pb, order)
 		}
 	}
 	// The repo.stats payload must have LastPruneAt set.
 	statsEnv := firstEnvOfType(t, tx.envs, api.MsgRepoStats)
 	var statsPayload api.RepoStatsPayload
 	if err := statsEnv.UnmarshalPayload(&statsPayload); err != nil {
 		t.Fatalf("unmarshal repo.stats payload: %v", err)
 	}
 	if statsPayload.LastPruneAt == nil {
 		t.Error("expected LastPruneAt to be set in repo.stats after prune")
 	}
 	// The job.finished payload must indicate success.
 	finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished)
 	var finPayload api.JobFinishedPayload
 	if err := finEnv.UnmarshalPayload(&finPayload); err != nil {
 		t.Fatalf("unmarshal job.finished payload: %v", err)
 	}
 	if finPayload.Status != api.JobSucceeded {
 		t.Errorf("expected job.finished status=%q, got %q", api.JobSucceeded, finPayload.Status)
 	}
 }
 // TestRunCheckShipsCheckStatus verifies that a check run which emits
 // a stale-lock line on stderr (exit 0) reports LastCheckStatus="ok"
 // and LockPresent=true.
 func TestRunCheckShipsCheckStatus(t *testing.T) {
 	t.Parallel()
 	statsJSON := `{"total_size":500,"total_uncompressed_size":600,"snapshots_count":1,"total_file_count":5}`
 	bin := setupScript(t, `
 case "$1" in
  check) echo "Found stale lock" >&2; exit 0 ;;
  stats) echo '`+statsJSON+`' ;;
  *)     exit 0 ;;
 esac
 `)
 	tx := &fakeSender{}
 	r := New(Config{ResticBin: bin}, tx, 0)
 	if err := r.RunCheck(context.Background(), "job-2", 0); err != nil {
 		t.Fatalf("RunCheck: %v", err)
 	}
 	// Assert envelope ordering: job.started → log.stream → repo.stats → job.finished.
 	order := envelopeOrder(tx.envs)
 	wantTypes := []api.MessageType{api.MsgJobStarted, api.MsgLogStream, api.MsgRepoStats, api.MsgJobFinished}
 	positions := map[api.MessageType]int{}
 	for i, mt := range order {
 		if _, seen := positions[mt]; !seen {
 			positions[mt] = i
 		}
 	}
 	for i := 0; i < len(wantTypes)-1; i++ {
 		a, b := wantTypes[i], wantTypes[i+1]
 		pa, aOK := positions[a]
 		pb, bOK := positions[b]
 		if !aOK {
 			t.Errorf("envelope type %q not found in output %v", a, order)
 			continue
 		}
 		if !bOK {
 			t.Errorf("envelope type %q not found in output %v", b, order)
 			continue
 		}
 		if pa >= pb {
 			t.Errorf("expected %q before %q but positions are %d >= %d (order: %v)", a, b, pa, pb, order)
 		}
 	}
 	statsEnv := firstEnvOfType(t, tx.envs, api.MsgRepoStats)
 	var p api.RepoStatsPayload
 	if err := statsEnv.UnmarshalPayload(&p); err != nil {
 		t.Fatalf("unmarshal: %v", err)
 	}
 	if p.LastCheckStatus != "ok" {
 		t.Errorf("LastCheckStatus: got %q, want %q", p.LastCheckStatus, "ok")
 	}
 	if p.LockPresent == nil || !*p.LockPresent {
 		t.Errorf("expected LockPresent=true, got %v", p.LockPresent)
 	}
 	if p.LastCheckAt == nil {
 		t.Error("expected LastCheckAt to be set")
 	}
 }
 // TestRunCheckErrorsFoundShipsErrorsStatus verifies that a check run
 // that exits 1 (errors found) reports LastCheckStatus="errors_found".
 func TestRunCheckErrorsFoundShipsErrorsStatus(t *testing.T) {
 	t.Parallel()
 	statsJSON := `{"total_size":500,"total_uncompressed_size":600,"snapshots_count":1,"total_file_count":5}`
 	bin := setupScript(t, `
 case "$1" in
  check) exit 1 ;;
  stats) echo '`+statsJSON+`' ;;
  *)     exit 0 ;;
 esac
 `)
 	tx := &fakeSender{}
 	r := New(Config{ResticBin: bin}, tx, 0)
 	// RunCheck returns nil for exit 1 (errors_found is not a wrapper failure).
 	if err := r.RunCheck(context.Background(), "job-3", 0); err != nil {
 		t.Fatalf("RunCheck: %v", err)
 	}
 	// Assert envelope ordering: job.started → repo.stats → job.finished.
 	// (No log.stream expected because the fake script produces no
 	// output before exit 1 — a real restic check would emit log lines
 	// before exiting non-zero.)
 	order := envelopeOrder(tx.envs)
 	wantTypes := []api.MessageType{api.MsgJobStarted, api.MsgRepoStats, api.MsgJobFinished}
 	positions := map[api.MessageType]int{}
 	for i, mt := range order {
 		if _, seen := positions[mt]; !seen {
 			positions[mt] = i
 		}
 	}
 	for i := 0; i < len(wantTypes)-1; i++ {
 		a, b := wantTypes[i], wantTypes[i+1]
 		pa, aOK := positions[a]
 		pb, bOK := positions[b]
 		if !aOK {
 			t.Errorf("envelope type %q not found in output %v", a, order)
 			continue
 		}
 		if !bOK {
 			t.Errorf("envelope type %q not found in output %v", b, order)
 			continue
 		}
 		if pa >= pb {
 			t.Errorf("expected %q before %q but positions are %d >= %d (order: %v)", a, b, pa, pb, order)
 		}
 	}
 	statsEnv := firstEnvOfType(t, tx.envs, api.MsgRepoStats)
 	var p api.RepoStatsPayload
 	if err := statsEnv.UnmarshalPayload(&p); err != nil {
 		t.Fatalf("unmarshal: %v", err)
 	}
 	if p.LastCheckStatus != "errors_found" {
 		t.Errorf("LastCheckStatus: got %q, want %q", p.LastCheckStatus, "errors_found")
 	}
 }
 // TestRunUnlockClearsLock verifies that a successful unlock ships a
 // repo.stats envelope with LockPresent=false.
 func TestRunUnlockClearsLock(t *testing.T) {
 	t.Parallel()
 	statsJSON := `{"total_size":100,"total_uncompressed_size":150,"snapshots_count":2,"total_file_count":8}`
 	bin := setupScript(t, `
 case "$1" in
  unlock) echo "removed 1 locks" ;;
  stats)  echo '`+statsJSON+`' ;;
  *)      exit 0 ;;
 esac
 `)
 	tx := &fakeSender{}
 	r := New(Config{ResticBin: bin}, tx, 0)
 	if err := r.RunUnlock(context.Background(), "job-4"); err != nil {
 		t.Fatalf("RunUnlock: %v", err)
 	}
 	// Assert envelope ordering: job.started → log.stream → repo.stats → job.finished.
 	order := envelopeOrder(tx.envs)
 	wantTypes := []api.MessageType{api.MsgJobStarted, api.MsgLogStream, api.MsgRepoStats, api.MsgJobFinished}
 	positions := map[api.MessageType]int{}
 	for i, mt := range order {
 		if _, seen := positions[mt]; !seen {
 			positions[mt] = i
 		}
 	}
 	for i := 0; i < len(wantTypes)-1; i++ {
 		a, b := wantTypes[i], wantTypes[i+1]
 		pa, aOK := positions[a]
 		pb, bOK := positions[b]
 		if !aOK {
 			t.Errorf("envelope type %q not found in output %v", a, order)
 			continue
 		}
 		if !bOK {
 			t.Errorf("envelope type %q not found in output %v", b, order)
 			continue
 		}
 		if pa >= pb {
 			t.Errorf("expected %q before %q but positions are %d >= %d (order: %v)", a, b, pa, pb, order)
 		}
 	}
 	statsEnv := firstEnvOfType(t, tx.envs, api.MsgRepoStats)
 	var p api.RepoStatsPayload
 	if err := statsEnv.UnmarshalPayload(&p); err != nil {
 		t.Fatalf("unmarshal: %v", err)
 	}
 	if p.LockPresent == nil {
 		t.Fatal("expected LockPresent to be set (non-nil)")
 	}
 	if *p.LockPresent {
 		t.Errorf("expected LockPresent=false after unlock, got true")
 	}
 }
 // TestRunInitShipsStartedAndFinished confirms the refactored RunInit
 // still produces job.started and job.finished envelopes.
 func TestRunInitShipsStartedAndFinished(t *testing.T) {
 	t.Parallel()
 	bin := setupScript(t, `echo "initialized repository"`)
 	tx := &fakeSender{}
 	r := New(Config{ResticBin: bin}, tx, 0)
 	if err := r.RunInit(context.Background(), "job-init"); err != nil {
 		t.Fatalf("RunInit: %v", err)
 	}
 	_ = firstEnvOfType(t, tx.envs, api.MsgJobStarted)
 	_ = firstEnvOfType(t, tx.envs, api.MsgJobFinished)
 }
 // TestRunForgetShipsStartedAndFinished confirms the refactored
 // RunForget still produces job.started and job.finished envelopes.
 func TestRunForgetShipsStartedAndFinished(t *testing.T) {
 	t.Parallel()
 	// Script handles both "forget --json ..." and "snapshots --json" calls.
 	bin := setupScript(t, `
 case "$1" in
  forget)    echo "[]" ;;
  snapshots) echo "[]" ;;
  *)         exit 0 ;;
 esac
 `)
 	tx := &fakeSender{}
 	r := New(Config{ResticBin: bin}, tx, 0)
 	keepLast := 1
 	groups := []restic.ForgetGroup{{
 		Tag:    "documents",
 		Policy: restic.ForgetPolicy{KeepLast: &keepLast},
 	}}
 	if err := r.RunForget(context.Background(), "job-forget", groups); err != nil {
 		t.Fatalf("RunForget: %v", err)
 	}
 	_ = firstEnvOfType(t, tx.envs, api.MsgJobStarted)
 	_ = firstEnvOfType(t, tx.envs, api.MsgJobFinished)
 }
@@ -9,6 +9,7 @@
 package secrets
 import (
 	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -24,6 +25,11 @@ import (
 // depth — the key is per-host today, but cheap to be careful.)
 const additionalData = "rm-agent-repo-creds-v1"
 // ErrNoAdmin is returned by LoadAdmin when no admin slot has been
 // written yet. Callers must distinguish this from a hard error: the
 // agent simply hasn't received an admin config.update push yet.
 var ErrNoAdmin = errors.New("secrets: admin slot not configured")
 // Repo is the plaintext shape persisted inside the AEAD blob.
 type Repo struct {
 	URL      string `json:"repo_url,omitempty"`
@@ -35,6 +41,15 @@ type Repo struct {
 // minimum (URL + password) needed to run a backup.
 func (r Repo) Empty() bool { return r.URL == "" || r.Password == "" }
 // bundle is the on-disk JSON shape as of secrets v2. It holds the
 // everyday repo slot and an optional admin slot (prune / unlock).
 // Legacy files (pre-v2) contain a flat Repo object; loadBundle
 // transparently upgrades those on the next Save.
 type bundle struct {
 	Repo  Repo  `json:"repo,omitempty"`
 	Admin *Repo `json:"admin,omitempty"`
 }
 // Store reads and writes the encrypted secrets file at Path, sealed
 // under the 32-byte key Key.
 type Store struct {
@@ -55,32 +70,47 @@ func New(path string, key []byte) (*Store, error) {
 	return &Store{path: path, a: a}, nil
 }
-// Load returns the persisted Repo, or a zero-value Repo (with no
+// loadBundle reads and decrypts the on-disk blob, returning a bundle.
-// error) if the file does not exist yet — first-run agents have
+// It handles back-compat decode: legacy flat Repo blobs are detected
-// nothing on disk until the server pushes a config.update.
+// by the presence of a top-level "repo_url" key and re-wrapped into
-func (s *Store) Load() (Repo, error) {
+// the bundle shape transparently. Returns an empty bundle when the
 // file does not exist yet.
 func (s *Store) loadBundle() (bundle, error) {
 	body, err := os.ReadFile(s.path)
 	if err != nil {
 		if errors.Is(err, os.ErrNotExist) {
-			return Repo{}, nil
+			return bundle{}, nil
 		}
-		return Repo{}, fmt.Errorf("secrets: read %q: %w", s.path, err)
+		return bundle{}, fmt.Errorf("secrets: read %q: %w", s.path, err)
 	}
 	plain, err := s.a.Decrypt(string(body), []byte(additionalData))
 	if err != nil {
-		return Repo{}, fmt.Errorf("secrets: decrypt %q: %w", s.path, err)
+		return bundle{}, fmt.Errorf("secrets: decrypt %q: %w", s.path, err)
 	}
-	var r Repo
+
-	if err := json.Unmarshal(plain, &r); err != nil {
+	// Try the new bundle shape first.
-		return Repo{}, fmt.Errorf("secrets: parse %q: %w", s.path, err)
+	var b bundle
 	if err := json.Unmarshal(plain, &b); err != nil {
 		return bundle{}, fmt.Errorf("secrets: parse %q: %w", s.path, err)
 	}
-	return r, nil
+
 	// If the bundle has an empty Repo slot but the raw JSON contains
 	// a top-level "repo_url" key, this is a legacy flat blob —
 	// re-unmarshal it as a Repo and slot it in.
 	if b.Repo == (Repo{}) && bytes.Contains(plain, []byte(`"repo_url"`)) {
 		var legacy Repo
 		if err := json.Unmarshal(plain, &legacy); err == nil {
 			b.Repo = legacy
 		}
 	}
 	return b, nil
 }
-// Save replaces the on-disk blob atomically. Mode is 0600. Parent
+// saveBundle marshals b, encrypts it and writes it atomically at
-// directory must already exist (the install script lays it down).
+// mode 0600. Parent directory must already exist.
-func (s *Store) Save(r Repo) error {
+func (s *Store) saveBundle(b bundle) error {
-	body, err := json.Marshal(r)
+	body, err := json.Marshal(b)
 	if err != nil {
 		return fmt.Errorf("secrets: marshal: %w", err)
 	}
@@ -115,3 +145,50 @@ func (s *Store) Save(r Repo) error {
 	}
 	return nil
 }
 // Load returns the persisted Repo (the everyday repo slot), or a
 // zero-value Repo (with no error) if the file does not exist yet —
 // first-run agents have nothing on disk until the server pushes a
 // config.update.
 func (s *Store) Load() (Repo, error) {
 	b, err := s.loadBundle()
 	if err != nil {
 		return Repo{}, err
 	}
 	return b.Repo, nil
 }
 // Save replaces the repo slot on disk atomically, preserving the
 // admin slot. Mode is 0600. Parent directory must already exist.
 func (s *Store) Save(r Repo) error {
 	b, err := s.loadBundle()
 	if err != nil {
 		return fmt.Errorf("secrets: load before save: %w", err)
 	}
 	b.Repo = r
 	return s.saveBundle(b)
 }
 // LoadAdmin returns the admin slot, or (Repo{}, ErrNoAdmin) when no
 // admin slot has been set. All other errors are hard failures.
 func (s *Store) LoadAdmin() (Repo, error) {
 	b, err := s.loadBundle()
 	if err != nil {
 		return Repo{}, err
 	}
 	if b.Admin == nil {
 		return Repo{}, ErrNoAdmin
 	}
 	return *b.Admin, nil
 }
 // SaveAdmin replaces the admin slot on disk atomically, preserving
 // the repo slot. Mode is 0600.
 func (s *Store) SaveAdmin(r Repo) error {
 	b, err := s.loadBundle()
 	if err != nil {
 		return fmt.Errorf("secrets: load before save: %w", err)
 	}
 	b.Admin = &r
 	return s.saveBundle(b)
 }
@@ -2,6 +2,8 @@ package secrets
 import (
 	"crypto/rand"
 	"encoding/json"
 	"errors"
 	"io"
 	"os"
 	"path/filepath"
@@ -97,3 +99,211 @@ func TestSaveIsAtomic(t *testing.T) {
 		t.Errorf("dir should hold one file post-save, got %v", names)
 	}
 }
 func TestSecretsLoadAdminEmpty(t *testing.T) {
 	t.Parallel()
 	// No file yet: LoadAdmin must return ErrNoAdmin, not a hard error.
 	dir := t.TempDir()
 	path := filepath.Join(dir, "secrets.enc")
 	st, err := New(path, freshKey(t))
 	if err != nil {
 		t.Fatalf("new: %v", err)
 	}
 	_, err = st.LoadAdmin()
 	if !errors.Is(err, ErrNoAdmin) {
 		t.Errorf("expected ErrNoAdmin, got %v", err)
 	}
 }
 func TestSecretsAdminSlotIndependent(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	path := filepath.Join(dir, "secrets.enc")
 	st, err := New(path, freshKey(t))
 	if err != nil {
 		t.Fatalf("new: %v", err)
 	}
 	repo := Repo{URL: "rest:https://repo/host", Username: "user", Password: "pw"}
 	admin := Repo{URL: "rest:https://repo/host", Username: "admin", Password: "adminpw"}
 	if err := st.Save(repo); err != nil {
 		t.Fatalf("save repo: %v", err)
 	}
 	if err := st.SaveAdmin(admin); err != nil {
 		t.Fatalf("save admin: %v", err)
 	}
 	// Load returns the repo slot unchanged.
 	gotRepo, err := st.Load()
 	if err != nil {
 		t.Fatalf("load: %v", err)
 	}
 	if gotRepo != repo {
 		t.Errorf("repo slot mismatch: got %+v want %+v", gotRepo, repo)
 	}
 	// LoadAdmin returns the admin slot.
 	gotAdmin, err := st.LoadAdmin()
 	if err != nil {
 		t.Fatalf("load admin: %v", err)
 	}
 	if gotAdmin != admin {
 		t.Errorf("admin slot mismatch: got %+v want %+v", gotAdmin, admin)
 	}
 	// SaveAdmin a second time replaces admin only; repo unchanged.
 	admin2 := Repo{URL: "rest:https://repo/host", Username: "admin2", Password: "pw2"}
 	if err := st.SaveAdmin(admin2); err != nil {
 		t.Fatalf("save admin2: %v", err)
 	}
 	gotRepo2, err := st.Load()
 	if err != nil {
 		t.Fatalf("load after admin2 save: %v", err)
 	}
 	if gotRepo2 != repo {
 		t.Errorf("repo slot changed unexpectedly: got %+v want %+v", gotRepo2, repo)
 	}
 	gotAdmin2, err := st.LoadAdmin()
 	if err != nil {
 		t.Fatalf("load admin2: %v", err)
 	}
 	if gotAdmin2 != admin2 {
 		t.Errorf("admin2 slot mismatch: got %+v want %+v", gotAdmin2, admin2)
 	}
 }
 func TestSecretsSaveRefusesCorruptFile(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	path := filepath.Join(dir, "secrets.enc")
 	st, err := New(path, freshKey(t))
 	if err != nil {
 		t.Fatalf("new: %v", err)
 	}
 	// Lay down a valid file first.
 	if err := st.Save(Repo{URL: "rest:https://r/host", Password: "pw"}); err != nil {
 		t.Fatalf("initial save: %v", err)
 	}
 	// Corrupt the file.
 	garbage := []byte("not encrypted")
 	if err := os.WriteFile(path, garbage, 0o600); err != nil {
 		t.Fatalf("write garbage: %v", err)
 	}
 	// Save must refuse to overwrite: decrypt will fail.
 	saveErr := st.Save(Repo{URL: "rest:https://r/host", Password: "new"})
 	if saveErr == nil {
 		t.Fatal("Save over corrupt file must return an error; got nil")
 	}
 	// File must NOT have been replaced — still contains the garbage bytes.
 	got, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("re-read: %v", err)
 	}
 	if string(got) != string(garbage) {
 		t.Errorf("corrupt file was overwritten; file size now %d (was %d)", len(got), len(garbage))
 	}
 }
 func TestSecretsSaveAdminRefusesCorruptFile(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	path := filepath.Join(dir, "secrets.enc")
 	st, err := New(path, freshKey(t))
 	if err != nil {
 		t.Fatalf("new: %v", err)
 	}
 	// Lay down a valid file first.
 	if err := st.SaveAdmin(Repo{URL: "rest:https://r/host", Password: "adminpw"}); err != nil {
 		t.Fatalf("initial save admin: %v", err)
 	}
 	// Corrupt the file.
 	garbage := []byte("not encrypted admin")
 	if err := os.WriteFile(path, garbage, 0o600); err != nil {
 		t.Fatalf("write garbage: %v", err)
 	}
 	// SaveAdmin must refuse to overwrite: decrypt will fail.
 	saveErr := st.SaveAdmin(Repo{URL: "rest:https://r/host", Password: "new"})
 	if saveErr == nil {
 		t.Fatal("SaveAdmin over corrupt file must return an error; got nil")
 	}
 	// File must NOT have been replaced.
 	got, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("re-read: %v", err)
 	}
 	if string(got) != string(garbage) {
 		t.Errorf("corrupt file was overwritten; file size now %d (was %d)", len(got), len(garbage))
 	}
 }
 func TestSecretsLegacyFlatBlobMigrates(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	path := filepath.Join(dir, "secrets.enc")
 	key := freshKey(t)
 	// Write a legacy flat Repo blob directly — bypassing bundle wrapping.
 	legacy := Repo{URL: "rest:https://legacy/host", Username: "legacyuser", Password: "legacypw"}
 	plain, err := json.Marshal(legacy)
 	if err != nil {
 		t.Fatalf("marshal legacy: %v", err)
 	}
 	a, err := crypto.NewAEAD(key)
 	if err != nil {
 		t.Fatalf("aead: %v", err)
 	}
 	ct, err := a.Encrypt(plain, []byte(additionalData))
 	if err != nil {
 		t.Fatalf("encrypt legacy: %v", err)
 	}
 	if err := os.WriteFile(path, []byte(ct), 0o600); err != nil {
 		t.Fatalf("write legacy file: %v", err)
 	}
 	// Open via secrets.New + Load — must return the legacy Repo.
 	st, err := New(path, key)
 	if err != nil {
 		t.Fatalf("new: %v", err)
 	}
 	got, err := st.Load()
 	if err != nil {
 		t.Fatalf("load legacy: %v", err)
 	}
 	if got != legacy {
 		t.Errorf("legacy decode mismatch: got %+v want %+v", got, legacy)
 	}
 	// SaveAdmin should write both slots; re-opening must have both.
 	admin := Repo{URL: "rest:https://legacy/host", Username: "admin", Password: "adminpw"}
 	if err := st.SaveAdmin(admin); err != nil {
 		t.Fatalf("save admin after legacy: %v", err)
 	}
 	st2, err := New(path, key)
 	if err != nil {
 		t.Fatalf("reopen: %v", err)
 	}
 	gotRepo, err := st2.Load()
 	if err != nil {
 		t.Fatalf("load repo after migration: %v", err)
 	}
 	if gotRepo != legacy {
 		t.Errorf("repo after migration: got %+v want %+v", gotRepo, legacy)
 	}
 	gotAdmin, err := st2.LoadAdmin()
 	if err != nil {
 		t.Fatalf("load admin after migration: %v", err)
 	}
 	if gotAdmin != admin {
 		t.Errorf("admin after migration: got %+v want %+v", gotAdmin, admin)
 	}
 }
@@ -77,6 +77,30 @@ const (
 	JobCancelled JobStatus = "cancelled" //nolint:misspell // wire format
 )
 // ForgetPolicyJSON is the wire shape of a per-group retention policy
 // shipped with a forget command.run. Mirrors store.RetentionPolicy
 // JSON tags exactly so a future caller could json-roundtrip between
 // the two without reshaping. All fields nullable; an empty struct is
 // rejected by the agent (restic refuses to forget without --keep-*).
 type ForgetPolicyJSON struct {
 	KeepLast    *int `json:"keep_last,omitempty"`
 	KeepHourly  *int `json:"keep_hourly,omitempty"`
 	KeepDaily   *int `json:"keep_daily,omitempty"`
 	KeepWeekly  *int `json:"keep_weekly,omitempty"`
 	KeepMonthly *int `json:"keep_monthly,omitempty"`
 	KeepYearly  *int `json:"keep_yearly,omitempty"`
 }
 // ForgetGroup is one (tag, retention) pair shipped to the agent in a
 // forget command.run. The agent invokes
 // `restic forget --tag <Tag> --keep-* …` once per group, with each
 // group's own policy. The Tag is the source-group name (which is
 // also the snapshot tag carried at backup time).
 type ForgetGroup struct {
 	Tag    string           `json:"tag"`
 	Policy ForgetPolicyJSON `json:"policy"`
 }
 // CommandRunPayload is the server → agent dispatch for a run-now job.
 //
 // For kind=backup, Includes/Excludes/Tag are populated from the source
@@ -85,19 +109,27 @@ const (
 // the source group's name) so retention can target it later via
 // `restic forget --tag`.
 //
-// For kind=forget, RetentionPolicy is the typed keep-* set as raw JSON
+// For kind=forget, ForgetGroups carries one entry per source-group on
-// (the agent doesn't share the store package's typed struct).
+// the host that has a non-empty retention policy. The agent walks the
 // list and runs `restic forget --tag <Tag> --keep-* …` per group.
 //
 // Args is preserved as a generic free-form slice for kinds that don't
-// fit the structured fields (e.g. unlock takes none; init takes none).
+// fit the structured fields (e.g. unlock takes none; init takes none;
 // check carries the subset% as Args[0]).
 //
 // RequiresAdminCreds tells the agent to load the admin slot of its
 // secrets store rather than the everyday repo slot. Set by the server
 // only for prune (the only kind that needs delete authority on a
 // rest-server repo today).
 type CommandRunPayload struct {
-	JobID           string          `json:"job_id"`
+	JobID              string        `json:"job_id"`
-	Kind            JobKind         `json:"kind"`
+	Kind               JobKind       `json:"kind"`
-	Args            []string        `json:"args,omitempty"`
+	Args               []string      `json:"args,omitempty"`
-	Includes        []string        `json:"includes,omitempty"`
+	Includes           []string      `json:"includes,omitempty"`
-	Excludes        []string        `json:"excludes,omitempty"`
+	Excludes           []string      `json:"excludes,omitempty"`
-	Tag             string          `json:"tag,omitempty"`
+	Tag                string        `json:"tag,omitempty"`
-	RetentionPolicy json.RawMessage `json:"retention_policy,omitempty"`
+	ForgetGroups       []ForgetGroup `json:"forget_groups,omitempty"`
 	RequiresAdminCreds bool          `json:"requires_admin_creds,omitempty"`
 }
 // CommandCancelPayload is the server → agent cancel signal.
@@ -186,15 +218,24 @@ type Snapshot struct {
 	FileCount int64     `json:"file_count,omitempty"`
 }
-// RepoStatsPayload — agent reports periodic repo health facts derived
+// RepoStatsPayload carries a partial-update snapshot of repo health
-// from `restic stats` and lock-file inspection.
+// facts, shipped by the agent after prune/check/unlock or a periodic
 // stats refresh. Pointer fields follow omitempty semantics: a nil
 // pointer means "no update for this field" and is omitted on the
 // wire; the server merges only the non-nil fields into its
 // host_repo_stats row (matching UpsertHostRepoStats partial-update
 // semantics). Non-pointer fields (LastCheckStatus) use the empty
 // string as the "no update" sentinel.
 type RepoStatsPayload struct {
-	SizeBytes       int64     `json:"size_bytes"`
+	TotalSizeBytes      *int64     `json:"total_size_bytes,omitempty"`
-	SnapshotCount   int       `json:"snapshot_count"`
+	RawSizeBytes        *int64     `json:"raw_size_bytes,omitempty"`
-	DedupRatio      float64   `json:"dedup_ratio"`
+	UniqueFiles         *int64     `json:"unique_files,omitempty"`
-	LastCheckAt     time.Time `json:"last_check_at,omitempty"`
+	SnapshotCount       *int64     `json:"snapshot_count,omitempty"`
-	LastCheckStatus string    `json:"last_check_status,omitempty"`
+	LastCheckAt         *time.Time `json:"last_check_at,omitempty"`
-	LockState       string    `json:"lock_state"` // locked|unlocked
+	LastCheckStatus     string     `json:"last_check_status,omitempty"`
 	LockPresent         *bool      `json:"lock_present,omitempty"`
 	LastPruneAt         *time.Time `json:"last_prune_at,omitempty"`
 	LastPruneFreedBytes *int64     `json:"last_prune_freed_bytes,omitempty"`
 }
 // Schedule is the agent-facing view of a slim Schedule row plus its
@@ -252,12 +293,19 @@ type ScheduleFirePayload struct {
 // ConfigUpdatePayload — server pushes per-host config (currently just
 // repo connection details). Empty fields mean "leave existing alone";
 // to clear something, send an explicit zero value.
 //
 // Slot picks which secrets-store slot the agent writes the creds to.
 // Empty / "repo" = everyday repo creds (default). "admin" = the
 // prune-capable admin user (separate slot — not loaded for backups).
 // Forwards-compatible: an agent that ignores Slot simply writes to the
 // repo slot and admin pushes become no-ops.
 type ConfigUpdatePayload struct {
 	RepoURL        string `json:"repo_url,omitempty"`
 	RepoPassword   string `json:"repo_password,omitempty"` // sensitive
 	RepoUsername   string `json:"repo_username,omitempty"`
 	RepoCredential string `json:"repo_credential,omitempty"` // sensitive (for rest server basic auth)
 	HookShell      string `json:"hook_shell,omitempty"`
 	Slot           string `json:"slot,omitempty"`
 }
 // AgentUpdateAvailablePayload — informational only; the agent does
@@ -12,3 +12,15 @@ const CurrentProtocolVersion = 1
 // server accepts in a hello. Agents below this are disconnected with
 // a structured error pointing at the upgrade docs.
 const MinAgentProtocolVersion = 1
 // Phase 5 (P2R-03..P2R-08, branch p2r-phase5-maintenance, 2026-05) reshaped
 // CommandRunPayload (RetentionPolicy removed, ForgetGroups added, RequiresAdminCreds added),
 // ConfigUpdatePayload (Slot added), and RepoStatsPayload (full reshape).
 // The protocol version was deliberately NOT bumped because:
 //  1. This project deploys agent and server in lockstep from the same release.
 //  2. There is no supported "rolling upgrade" path with mixed agent/server versions.
 //  3. The smoke env restage block in CLAUDE.md restages the agent binary on
 //     every server build for exactly this reason.
 //
 // If a multi-version protocol path is ever introduced, every Phase 5 wire
 // change is a breaking change and the version must bump to 2 at that time.
@@ -138,6 +138,85 @@ func TestJobProgressShapeStable(t *testing.T) {
 	}
 }
 func TestRepoStatsPayloadRoundTrip(t *testing.T) {
 	t.Parallel()
 	// Nil pointer fields must be omitted from JSON output.
 	empty := RepoStatsPayload{}
 	raw, err := json.Marshal(empty)
 	if err != nil {
 		t.Fatalf("marshal empty: %v", err)
 	}
 	if string(raw) != "{}" {
 		t.Errorf("empty payload should marshal to {}, got %s", raw)
 	}
 	// Populated fields must survive a round trip.
 	total := int64(123456)
 	rawSize := int64(200000)
 	files := int64(42)
 	snaps := int64(7)
 	lockPresent := true
 	now := time.Date(2026, 1, 2, 3, 4, 5, 0, time.UTC)
 	pruneAt := time.Date(2026, 1, 3, 0, 0, 0, 0, time.UTC)
 	freed := int64(8192)
 	p := RepoStatsPayload{
 		TotalSizeBytes:      &total,
 		RawSizeBytes:        &rawSize,
 		UniqueFiles:         &files,
 		SnapshotCount:       &snaps,
 		LastCheckAt:         &now,
 		LastCheckStatus:     "ok",
 		LockPresent:         &lockPresent,
 		LastPruneAt:         &pruneAt,
 		LastPruneFreedBytes: &freed,
 	}
 	raw2, err := json.Marshal(p)
 	if err != nil {
 		t.Fatalf("marshal full: %v", err)
 	}
 	var got RepoStatsPayload
 	if err := json.Unmarshal(raw2, &got); err != nil {
 		t.Fatalf("unmarshal: %v", err)
 	}
 	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != total {
 		t.Errorf("TotalSizeBytes: got %v, want %d", got.TotalSizeBytes, total)
 	}
 	if got.RawSizeBytes == nil || *got.RawSizeBytes != rawSize {
 		t.Errorf("RawSizeBytes: got %v, want %d", got.RawSizeBytes, rawSize)
 	}
 	if got.UniqueFiles == nil || *got.UniqueFiles != files {
 		t.Errorf("UniqueFiles: got %v, want %d", got.UniqueFiles, files)
 	}
 	if got.SnapshotCount == nil || *got.SnapshotCount != snaps {
 		t.Errorf("SnapshotCount: got %v, want %d", got.SnapshotCount, snaps)
 	}
 	if got.LastCheckAt == nil || !got.LastCheckAt.Equal(now) {
 		t.Errorf("LastCheckAt: got %v, want %v", got.LastCheckAt, now)
 	}
 	if got.LastCheckStatus != "ok" {
 		t.Errorf("LastCheckStatus: got %q, want %q", got.LastCheckStatus, "ok")
 	}
 	if got.LockPresent == nil || *got.LockPresent != lockPresent {
 		t.Errorf("LockPresent: got %v, want %v", got.LockPresent, lockPresent)
 	}
 	if got.LastPruneAt == nil || !got.LastPruneAt.Equal(pruneAt) {
 		t.Errorf("LastPruneAt: got %v, want %v", got.LastPruneAt, pruneAt)
 	}
 	if got.LastPruneFreedBytes == nil || *got.LastPruneFreedBytes != freed {
 		t.Errorf("LastPruneFreedBytes: got %v, want %d", got.LastPruneFreedBytes, freed)
 	}
 	// Partial update: only set LockPresent.
 	lockFalse := false
 	partial := RepoStatsPayload{LockPresent: &lockFalse}
 	rawPartial, _ := json.Marshal(partial)
 	if string(rawPartial) != `{"lock_present":false}` {
 		t.Errorf("partial marshal: got %s, want {\"lock_present\":false}", rawPartial)
 	}
 }
 // touch time so the import is used by other tests in this file when
 // they grow over time.
 var _ = time.Now
@@ -151,8 +151,7 @@ func (e Env) RunBackup(ctx context.Context, paths, excludes, tags []string, hand
 }
 // ForgetPolicy mirrors restic forget's --keep-* flags. All optional;
-// nil/zero means "don't pass that flag." Caller passes whatever the
+// nil/zero means "don't pass that flag."
 // schedule's RetentionPolicy carries.
 type ForgetPolicy struct {
 	KeepLast    *int
 	KeepHourly  *int
@@ -181,53 +180,47 @@ func (p ForgetPolicy) args() []string {
 	return out
 }
-// Empty reports whether no retention dimensions are set. restic
+// Empty reports whether no retention dimensions are set.
 // forget refuses to run without at least one keep-* flag (it would
 // delete every snapshot), so the agent rejects empty policies before
 // invoking restic.
 func (p ForgetPolicy) Empty() bool {
 	return p.KeepLast == nil && p.KeepHourly == nil &&
 		p.KeepDaily == nil && p.KeepWeekly == nil &&
 		p.KeepMonthly == nil && p.KeepYearly == nil
 }
-// RunForget executes `restic forget --keep-* … --json` against the
+// ForgetGroup is one (tag, retention-policy) pair fed to RunForget.
-// configured repo. Does NOT pass --prune — pruning lives behind a
+// The wrapper invokes `restic forget --tag <Tag> --keep-* …` per
-// separate, admin-only credential (see spec §4.3 / P2-06). Restic
+// group so retention can be targeted at a single source-group's
-// just rewrites the snapshot index; the actual data deletion waits
+// snapshots without disturbing snapshots tagged for other groups.
-// for the next prune. Returns nil on a clean exit.
+type ForgetGroup struct {
-func (e Env) RunForget(ctx context.Context, policy ForgetPolicy, handle LineHandler) error {
+	Tag    string
-	if policy.Empty() {
+	Policy ForgetPolicy
-		return fmt.Errorf("restic forget: refusing to run with empty retention policy (would delete every snapshot)")
+}
 	}
 	args := append([]string{"forget", "--json"}, policy.args()...)
 	cmd := exec.CommandContext(ctx, e.Bin, args...)
 	cmd.Env = e.envSlice()
 	cmd.Dir = e.WorkDir
-	stdout, err := cmd.StdoutPipe()
+// RunForget executes one `restic forget --tag <Tag> --keep-* …`
-	if err != nil {
+// invocation per group. Does NOT pass --prune — pruning lives behind
-		return fmt.Errorf("restic forget: stdout pipe: %w", err)
+// a separate admin-only credential (see spec §4.3 / P2-06). Restic
 // rewrites the snapshot index; the actual data deletion waits for
 // the next prune. Empty groups slice is rejected (would be a no-op);
 // any group with an empty policy is rejected (restic forget without
 // any keep-* would delete every snapshot in the tagged set).
 // Returns the first error encountered, or nil when every group runs
 // to a clean exit.
 func (e Env) RunForget(ctx context.Context, groups []ForgetGroup, handle LineHandler) error {
 	if len(groups) == 0 {
 		return fmt.Errorf("restic forget: refusing to run with no groups (would be a no-op)")
 	}
-	stderr, err := cmd.StderrPipe()
+	for _, g := range groups {
-	if err != nil {
+		if g.Policy.Empty() {
-		return fmt.Errorf("restic forget: stderr pipe: %w", err)
+			return fmt.Errorf("restic forget: group %q has empty retention policy (would delete every snapshot)", g.Tag)
-	}
+		}
-
+		args := []string{"forget", "--json", "--tag", g.Tag}
-	if err := cmd.Start(); err != nil {
+		args = append(args, g.Policy.args()...)
-		return fmt.Errorf("restic forget: start: %w", err)
+		cmd := exec.CommandContext(ctx, e.Bin, args...)
-	}
+		cmd.Env = e.envSlice()
-
+		cmd.Dir = e.WorkDir
-	done := make(chan error, 2)
+		if err := runWithPump(cmd, handle); err != nil {
-	go func() { done <- pumpPlain(stdout, "stdout", handle) }()
+			return err
 	go func() { done <- pumpPlain(stderr, "stderr", handle) }()
 	for i := 0; i < 2; i++ {
 		if err := <-done; err != nil && handle != nil {
 			handle("event", fmt.Sprintf("pump error: %v", err), nil)
 		}
 	}
 	if werr := cmd.Wait(); werr != nil {
 		return fmt.Errorf("restic forget: %w", werr)
 	}
 	return nil
 }
@@ -243,19 +236,6 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
 	cmd.Env = e.envSlice()
 	cmd.Dir = e.WorkDir
 	stdout, err := cmd.StdoutPipe()
 	if err != nil {
 		return fmt.Errorf("restic init: stdout pipe: %w", err)
 	}
 	stderr, err := cmd.StderrPipe()
 	if err != nil {
 		return fmt.Errorf("restic init: stderr pipe: %w", err)
 	}
 	if err := cmd.Start(); err != nil {
 		return fmt.Errorf("restic init: start: %w", err)
 	}
 	// Sniff for "config file already exists" on stderr; if we see it
 	// we'll treat the non-zero exit as a soft success — running init
 	// against an already-initialized repo is a no-op semantically,
@@ -271,26 +251,166 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
 		}
 	}
-	done := make(chan error, 2)
+	if err := runWithPump(cmd, sniff); err != nil {
 	go func() { done <- pumpPlain(stdout, "stdout", sniff) }()
 	go func() { done <- pumpPlain(stderr, "stderr", sniff) }()
 	for i := 0; i < 2; i++ {
 		if err := <-done; err != nil && handle != nil {
 			handle("event", fmt.Sprintf("pump error: %v", err), nil)
 		}
 	}
 	if werr := cmd.Wait(); werr != nil {
 		if alreadyInited {
 			if handle != nil {
 				handle("event", "repo already initialized — treating as success", nil)
 			}
 			return nil
 		}
-		return fmt.Errorf("restic init: %w", werr)
+		return err
 	}
 	return nil
 }
 // RunPrune executes `restic prune` against the configured repo.
 // Requires the *admin* credentials (delete access on the rest-server
 // repo) — the caller is responsible for populating Env.RepoUsername
 // and Env.RepoPassword with the admin pair before calling this.
 //
 // Prune emits human-readable progress on stdout/stderr (no --json
 // support that's useful for our purposes). We tee everything to the
 // handler so the live log is the operator's progress bar.
 func (e Env) RunPrune(ctx context.Context, handle LineHandler) error {
 	cmd := exec.CommandContext(ctx, e.Bin, "prune")
 	cmd.Env = e.envSlice()
 	cmd.Dir = e.WorkDir
 	return runWithPump(cmd, handle)
 }
 // runWithPump starts the configured cmd, fans stdout+stderr into
 // pumpPlain via the supplied handler, waits, and wraps any error
 // with the cmd's verb (e.g., "restic prune") for context.
 func runWithPump(cmd *exec.Cmd, handle LineHandler) error {
 	label := "restic"
 	if len(cmd.Args) > 1 {
 		label = "restic " + cmd.Args[1]
 	}
 	stdout, err := cmd.StdoutPipe()
 	if err != nil {
 		return fmt.Errorf("%s: stdout pipe: %w", label, err)
 	}
 	stderr, err := cmd.StderrPipe()
 	if err != nil {
 		return fmt.Errorf("%s: stderr pipe: %w", label, err)
 	}
 	if err := cmd.Start(); err != nil {
 		return fmt.Errorf("%s: start: %w", label, err)
 	}
 	done := make(chan error, 2)
 	go func() { done <- pumpPlain(stdout, "stdout", handle) }()
 	go func() { done <- pumpPlain(stderr, "stderr", handle) }()
 	for i := 0; i < 2; i++ {
 		if err := <-done; err != nil && handle != nil {
 			handle("event", fmt.Sprintf("pump error: %v", err), nil)
 		}
 	}
 	if werr := cmd.Wait(); werr != nil {
 		return fmt.Errorf("%s: %w", label, werr)
 	}
 	return nil
 }
 // RunUnlock executes `restic unlock`. Returns nil on a clean exit.
 func (e Env) RunUnlock(ctx context.Context, handle LineHandler) error {
 	cmd := exec.CommandContext(ctx, e.Bin, "unlock")
 	cmd.Env = e.envSlice()
 	cmd.Dir = e.WorkDir
 	return runWithPump(cmd, handle)
 }
 // RepoStats mirrors `restic stats --json --mode raw-data` output.
 type RepoStats struct {
 	TotalSize         int64 `json:"total_size"`
 	TotalUncompressed int64 `json:"total_uncompressed_size"`
 	SnapshotsCount    int64 `json:"snapshots_count"`
 	TotalFileCount    int64 `json:"total_file_count"`
 	TotalBlobCount    int64 `json:"total_blob_count"`
 }
 // RunStats executes `restic stats --json --mode raw-data` and parses
 // the (single-line) JSON response. Tees raw output to handle so the
 // caller can still log it. Returns an error if no JSON-shaped line
 // arrived on stdout.
 func (e Env) RunStats(ctx context.Context, handle LineHandler) (*RepoStats, error) {
 	cmd := exec.CommandContext(ctx, e.Bin, "stats", "--json", "--mode", "raw-data")
 	cmd.Env = e.envSlice()
 	cmd.Dir = e.WorkDir
 	var out *RepoStats
 	capture := func(stream, line string, ev any) {
 		if stream == "stdout" && strings.HasPrefix(line, "{") {
 			var s RepoStats
 			if json.Unmarshal([]byte(line), &s) == nil {
 				cp := s
 				out = &cp
 			}
 		}
 		if handle != nil {
 			handle(stream, line, ev)
 		}
 	}
 	if err := runWithPump(cmd, capture); err != nil {
 		return nil, err
 	}
 	if out == nil {
 		return nil, fmt.Errorf("restic stats: no JSON in output")
 	}
 	return out, nil
 }
 // CheckResult summarizes a `restic check` invocation. LockPresent is
 // true if the stderr stream contained a stale-lock signal (caller is
 // expected to surface this in the UI so the operator can run unlock).
 // ErrorsFound is true if check exited with a non-zero status (errors
 // detected in repo metadata).
 type CheckResult struct {
 	LockPresent bool
 	ErrorsFound bool
 }
 // RunCheck executes `restic check` with optional --read-data-subset.
 // subsetPct of 0 omits the flag (full data check); >0 passes
 // --read-data-subset N%. Returns a CheckResult summarizing what was
 // sniffed from stderr; the result is set even if check itself
 // returns an error (so the caller can persist last_check_status).
 func (e Env) RunCheck(ctx context.Context, subsetPct int, handle LineHandler) (CheckResult, error) {
 	args := []string{"check"}
 	if subsetPct > 0 {
 		args = append(args, "--read-data-subset", fmt.Sprintf("%d%%", subsetPct))
 	}
 	cmd := exec.CommandContext(ctx, e.Bin, args...)
 	cmd.Env = e.envSlice()
 	cmd.Dir = e.WorkDir
 	var res CheckResult
 	sniff := func(stream, line string, ev any) {
 		if stream == "stderr" {
 			if strings.Contains(line, "stale lock") || strings.Contains(line, "already locked") {
 				res.LockPresent = true
 			}
 		}
 		if handle != nil {
 			handle(stream, line, ev)
 		}
 	}
 	err := runWithPump(cmd, sniff)
 	if err != nil {
 		// restic check exits non-zero when corruption is found; that's
 		// a CheckResult, not a wrapper failure. Treat ExitError as
 		// "errors found" but still return the result so the caller can
 		// persist last_check_status='errors_found'. Reserve the error
 		// return for actually-broken invocations (binary missing, etc).
 		var ee *exec.ExitError
 		if errors.As(err, &ee) {
 			res.ErrorsFound = true
 			return res, nil
 		}
 		return res, err
 	}
 	return res, nil
 }
 func pumpPlain(r io.Reader, stream string, handle LineHandler) error {
 	scanner := bufio.NewScanner(r)
 	scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
@@ -0,0 +1,193 @@
 package restic
 import (
 	"context"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 )
 // setupScriptBin writes a small shell script to a temp directory,
 // makes it executable, and returns its path. scriptBody is the
 // complete script content (without the shebang line — that's added
 // automatically).
 // Writes to "<path>.tmp" then renames into place — see the matching
 // helper in internal/agent/runner/runner_test.go for the ETXTBSY
 // race rationale. Same fix applied here so this helper doesn't lose
 // the race the next time CI gets unlucky.
 func setupScriptBin(t *testing.T, scriptBody string) string {
 	t.Helper()
 	dir := t.TempDir()
 	final := filepath.Join(dir, "restic")
 	tmp := final + ".tmp"
 	content := "#!/bin/sh\n" + scriptBody + "\n"
 	if err := os.WriteFile(tmp, []byte(content), 0o755); err != nil {
 		t.Fatalf("setupScriptBin: write tmp: %v", err)
 	}
 	if err := os.Rename(tmp, final); err != nil {
 		t.Fatalf("setupScriptBin: rename: %v", err)
 	}
 	return final
 }
 // captureLines returns a LineHandler that appends "stream:line" into
 // the returned slice pointer (safe for single-goroutine test use).
 func captureLines() (*[]string, LineHandler) {
 	var lines []string
 	h := func(stream, line string, _ any) {
 		lines = append(lines, fmt.Sprintf("%s:%s", stream, line))
 	}
 	return &lines, h
 }
 // --- B1: RunPrune + B2: RunCheck ---
 func TestRunPruneInvokesPrune(t *testing.T) {
 	// Shell script that echoes its args; "prune" should appear in output.
 	bin := setupScriptBin(t, `echo "$@"`)
 	env := Env{Bin: bin}
 	lines, h := captureLines()
 	if err := env.RunPrune(context.Background(), h); err != nil {
 		t.Fatalf("RunPrune returned error: %v", err)
 	}
 	for _, l := range *lines {
 		if strings.Contains(l, "prune") {
 			return
 		}
 	}
 	t.Fatalf("expected 'prune' in captured output; got: %v", *lines)
 }
 // --- B2: RunCheck ---
 func TestRunCheckLockSniff(t *testing.T) {
 	cases := []struct {
 		name       string
 		stderrLine string
 		wantLocked bool
 	}{
 		{"stale lock", "Found stale lock from PID 1234", true},
 		{"already locked", "repository is already locked exclusively", true},
 		{"benign mention", "subdir/locked-file ok", false},
 		{"empty", "", false},
 	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			// Script emits the line on stderr, then exits 0.
 			script := fmt.Sprintf(`printf '%%s\n' %q >&2`, c.stderrLine)
 			bin := setupScriptBin(t, script)
 			env := Env{Bin: bin}
 			res, err := env.RunCheck(context.Background(), 0, nil)
 			if err != nil {
 				t.Fatalf("RunCheck returned unexpected error: %v", err)
 			}
 			if res.LockPresent != c.wantLocked {
 				t.Fatalf("LockPresent: got %v, want %v (line: %q)", res.LockPresent, c.wantLocked, c.stderrLine)
 			}
 			if res.ErrorsFound {
 				t.Fatal("expected ErrorsFound=false")
 			}
 		})
 	}
 }
 func TestRunCheckErrorsFoundOnExit1(t *testing.T) {
 	bin := setupScriptBin(t, `exit 1`)
 	env := Env{Bin: bin}
 	res, err := env.RunCheck(context.Background(), 0, nil)
 	if err != nil {
 		t.Fatalf("RunCheck returned unexpected error (should have absorbed exit 1): %v", err)
 	}
 	if !res.ErrorsFound {
 		t.Fatal("expected ErrorsFound=true for exit 1")
 	}
 }
 func TestRunCheckSubsetArg(t *testing.T) {
 	bin := setupScriptBin(t, `echo "$@"`)
 	env := Env{Bin: bin}
 	lines, h := captureLines()
 	if _, err := env.RunCheck(context.Background(), 25, h); err != nil {
 		t.Fatalf("RunCheck: %v", err)
 	}
 	want := "--read-data-subset 25%"
 	for _, l := range *lines {
 		if strings.Contains(l, want) {
 			return
 		}
 	}
 	t.Fatalf("expected %q in captured output; got: %v", want, *lines)
 }
 // --- B3: RunUnlock + RunStats ---
 func TestRunUnlockInvokesUnlock(t *testing.T) {
 	bin := setupScriptBin(t, `echo "$@"`)
 	env := Env{Bin: bin}
 	lines, h := captureLines()
 	if err := env.RunUnlock(context.Background(), h); err != nil {
 		t.Fatalf("RunUnlock: %v", err)
 	}
 	for _, l := range *lines {
 		if strings.Contains(l, "unlock") {
 			return
 		}
 	}
 	t.Fatalf("expected 'unlock' in captured output; got: %v", *lines)
 }
 func TestRunStatsParsesJSON(t *testing.T) {
 	bin := setupScriptBin(t, `echo '{"total_size":1234,"total_uncompressed_size":5678,"snapshots_count":3,"total_file_count":100,"total_blob_count":50}'`)
 	env := Env{Bin: bin}
 	stats, err := env.RunStats(context.Background(), nil)
 	if err != nil {
 		t.Fatalf("RunStats: %v", err)
 	}
 	if stats.TotalSize != 1234 {
 		t.Fatalf("TotalSize: got %d, want 1234", stats.TotalSize)
 	}
 	if stats.TotalUncompressed != 5678 {
 		t.Fatalf("TotalUncompressed: got %d, want 5678", stats.TotalUncompressed)
 	}
 	if stats.SnapshotsCount != 3 {
 		t.Fatalf("SnapshotsCount: got %d, want 3", stats.SnapshotsCount)
 	}
 	if stats.TotalFileCount != 100 {
 		t.Fatalf("TotalFileCount: got %d, want 100", stats.TotalFileCount)
 	}
 	if stats.TotalBlobCount != 50 {
 		t.Fatalf("TotalBlobCount: got %d, want 50", stats.TotalBlobCount)
 	}
 }
 func TestRunStatsErrorsWithoutJSON(t *testing.T) {
 	bin := setupScriptBin(t, `echo "no json here"`)
 	env := Env{Bin: bin}
 	_, err := env.RunStats(context.Background(), nil)
 	if err == nil {
 		t.Fatal("expected error when no JSON in output")
 	}
 	if !strings.Contains(err.Error(), "no JSON in output") {
 		t.Fatalf("unexpected error: %v", err)
 	}
 }
 func TestRunStatsZeroSnapshots(t *testing.T) {
 	// Confirms RunStats succeeds and returns a valid *RepoStats when the
 	// repo has no snapshots (snapshots_count=0). A regression that
 	// re-added a "SnapshotsCount > 0" guard would return an error here.
 	bin := setupScriptBin(t, `echo '{"total_size":0,"total_uncompressed_size":0,"snapshots_count":0,"total_file_count":0,"total_blob_count":0}'`)
 	env := Env{Bin: bin}
 	stats, err := env.RunStats(context.Background(), nil)
 	if err != nil {
 		t.Fatalf("RunStats with zero snapshots returned unexpected error: %v", err)
 	}
 	if stats == nil {
 		t.Fatal("expected non-nil *RepoStats, got nil")
 	}
 	if stats.SnapshotsCount != 0 {
 		t.Fatalf("SnapshotsCount: got %d, want 0", stats.SnapshotsCount)
 	}
 }
@@ -167,7 +167,7 @@ func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request)
 	// /api/hosts/{id}/repo-credentials. Failing the whole enrolment
 	// here would leave a half-burned token + an orphan host.
 	if encForHost != "" {
-		if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, encForHost); err != nil {
+		if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, store.CredKindRepo, encForHost); err != nil {
 			slog.Error("enrollment: set host credentials failed",
 				"host_id", hostID, "err", err)
 		}
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"log/slog"
 	stdhttp "net/http"
 	"time"
@@ -39,7 +40,7 @@ func (s *Server) handleGetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
 		writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
 		return
 	}
-	enc, err := s.deps.Store.GetHostCredentials(r.Context(), hostID)
+	enc, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindRepo)
 	if err != nil {
 		if errors.Is(err, store.ErrNotFound) {
 			writeJSONError(w, stdhttp.StatusNotFound, "not_set", "")
@@ -85,7 +86,8 @@ type hostRepoCredsRequest struct {
 // preserved. Re-encrypts under host_id and pushes a config.update
 // over the WS if the agent is connected.
 func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
-	if !s.authedUser(r) {
+	user, ok := s.requireUser(r)
 	if !ok {
 		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
 		return
 	}
@@ -107,7 +109,7 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
 	// Merge with the existing row, if any.
 	existing := repoCredsBlob{}
-	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), hostID); err == nil {
+	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindRepo); err == nil {
 		plain, err := s.deps.AEAD.Decrypt(cur, []byte("host:"+hostID))
 		if err != nil {
 			writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
@@ -139,13 +141,14 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
-	if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, enc); err != nil {
+	if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, store.CredKindRepo, enc); err != nil {
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
 	_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
 		ID:         ulid.Make().String(),
 		UserID:     &user.ID,
 		Actor:      "user",
 		Action:     "host.repo_credentials_set",
 		TargetKind: ptr("host"),
@@ -184,6 +187,209 @@ func (s *Server) pushRepoCredsToAgent(ctx context.Context, hostID string, blob r
 	return nil
 }
 // handleGetAdminCredentials returns a redacted view of the host's admin
 // creds for UI display. 404 if no admin slot has been set yet. Operator
 // uses this to pre-fill the edit form.
 func (s *Server) handleGetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
 	if !s.authedUser(r) {
 		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
 		return
 	}
 	hostID := chi.URLParam(r, "id")
 	if hostID == "" {
 		writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
 		return
 	}
 	enc, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindAdmin)
 	if err != nil {
 		if errors.Is(err, store.ErrNotFound) {
 			writeJSONError(w, stdhttp.StatusNotFound, "not_set", "")
 			return
 		}
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
 	plain, err := s.deps.AEAD.Decrypt(enc, []byte("host:"+hostID+":admin"))
 	if err != nil {
 		writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
 		return
 	}
 	var blob repoCredsBlob
 	if err := json.Unmarshal(plain, &blob); err != nil {
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
 	writeJSON(w, stdhttp.StatusOK, hostRepoCredsView{
 		RepoURL:      blob.RepoURL,
 		RepoUsername: blob.RepoUsername,
 		HasPassword:  blob.RepoPassword != "",
 	})
 }
 // handleSetAdminCredentials lets an operator/admin update a host's admin
 // creds (the prune-capable slot). Same merge-then-validate semantics as
 // handleSetHostCredentials but operates on store.CredKindAdmin. After
 // persisting, pushes a config.update with Slot:"admin" over the WS if
 // the agent is connected.
 func (s *Server) handleSetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
 	user, ok := s.requireUser(r)
 	if !ok {
 		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
 		return
 	}
 	hostID := chi.URLParam(r, "id")
 	if hostID == "" {
 		writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
 		return
 	}
 	if _, err := s.deps.Store.GetHost(r.Context(), hostID); err != nil {
 		writeJSONError(w, stdhttp.StatusNotFound, "host_not_found", "")
 		return
 	}
 	var req hostRepoCredsRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
 		return
 	}
 	// Merge with the existing admin row, if any.
 	existing := repoCredsBlob{}
 	aad := []byte("host:" + hostID + ":admin")
 	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindAdmin); err == nil {
 		plain, err := s.deps.AEAD.Decrypt(cur, aad)
 		if err != nil {
 			writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
 			return
 		}
 		_ = json.Unmarshal(plain, &existing)
 	} else if !errors.Is(err, store.ErrNotFound) {
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
 	if req.RepoURL != nil {
 		existing.RepoURL = *req.RepoURL
 	}
 	if req.RepoUsername != nil {
 		existing.RepoUsername = *req.RepoUsername
 	}
 	if req.RepoPassword != nil {
 		existing.RepoPassword = *req.RepoPassword
 	}
 	if existing.RepoURL == "" || existing.RepoPassword == "" {
 		writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
 			"repo_url and repo_password must end up non-empty")
 		return
 	}
 	enc, err := s.encryptRepoCreds(existing, aad)
 	if err != nil {
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
 	if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, store.CredKindAdmin, enc); err != nil {
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
 	_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
 		ID:         ulid.Make().String(),
 		UserID:     &user.ID,
 		Actor:      "user",
 		Action:     "host.admin_credentials_set",
 		TargetKind: ptr("host"),
 		TargetID:   &hostID,
 		TS:         nowUTC(),
 	})
 	// Push to the agent if it's connected. Non-fatal: the next
 	// handleRunRepoPrune call will push on-demand.
 	if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
 		_ = s.pushAdminCredsToAgent(r.Context(), hostID)
 	}
 	w.WriteHeader(stdhttp.StatusNoContent)
 }
 // handleDeleteAdminCredentials removes the admin credentials row for the
 // host. Returns 204 on success, 404 if the row wasn't set. Does NOT push
 // a deletion to the agent — the agent's local admin slot stays as-is
 // until the next deployment/reinstall.
 func (s *Server) handleDeleteAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
 	user, ok := s.requireUser(r)
 	if !ok {
 		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
 		return
 	}
 	hostID := chi.URLParam(r, "id")
 	if hostID == "" {
 		writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
 		return
 	}
 	// Check existence first so we can 404 cleanly.
 	if _, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindAdmin); err != nil {
 		if errors.Is(err, store.ErrNotFound) {
 			writeJSONError(w, stdhttp.StatusNotFound, "not_set", "")
 			return
 		}
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
 	if err := s.deps.Store.DeleteHostCredentials(r.Context(), hostID, store.CredKindAdmin); err != nil {
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
 	_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
 		ID:         ulid.Make().String(),
 		UserID:     &user.ID,
 		Actor:      "user",
 		Action:     "host.admin_credentials_deleted",
 		TargetKind: ptr("host"),
 		TargetID:   &hostID,
 		TS:         nowUTC(),
 	})
 	w.WriteHeader(stdhttp.StatusNoContent)
 }
 // pushAdminCredsToAgent ships the admin-slot config.update down the
 // agent's WS. Used by:
 //   - handleSetAdminCredentials (immediate push when operator saves).
 //   - handleRunRepoPrune (on-demand push right before a prune dispatch).
 //
 // Returns store.ErrNotFound if no admin row exists for the host
 // (the prune endpoint uses this to refuse with a clear message).
 func (s *Server) pushAdminCredsToAgent(ctx context.Context, hostID string) error {
 	enc, err := s.deps.Store.GetHostCredentials(ctx, hostID, store.CredKindAdmin)
 	if err != nil {
 		return err // ErrNotFound bubbles
 	}
 	plain, err := s.deps.AEAD.Decrypt(enc, []byte("host:"+hostID+":admin"))
 	if err != nil {
 		return fmt.Errorf("push admin creds: decrypt: %w", err)
 	}
 	var blob repoCredsBlob
 	if err := json.Unmarshal(plain, &blob); err != nil {
 		return fmt.Errorf("push admin creds: parse: %w", err)
 	}
 	env, err := api.Marshal(api.MsgConfigUpdate, "", api.ConfigUpdatePayload{
 		Slot:         "admin",
 		RepoURL:      blob.RepoURL,
 		RepoUsername: blob.RepoUsername,
 		RepoPassword: blob.RepoPassword,
 	})
 	if err != nil {
 		return err
 	}
 	sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
 	defer cancel()
 	return s.deps.Hub.Send(sendCtx, hostID, env)
 }
 // onAgentHello runs synchronously inside the WS handler immediately
 // after a successful hello. It loads the host's encrypted creds (if
 // any), decrypts, and ships them down the conn as a config.update so
@@ -205,6 +411,11 @@ func (s *Server) onAgentHello(ctx context.Context, hostID string, conn *ws.Conn)
 	// just no-ops. Skipped silently when the host has no creds yet —
 	// the next hello after the operator binds creds will dispatch.
 	s.maybeAutoInit(ctx, hostID, conn)
 	// Drain any pending runs that accumulated while this host was
 	// offline. Use a fresh context — the hello-bound ctx is short-lived,
 	// and the drain may take seconds across many rows. A non-blocking
 	// goroutine keeps the hello path snappy.
 	go s.DrainPending(context.Background(), hostID)
 }
 // maybeAutoInit dispatches a `restic init` job iff the host has no
@@ -212,7 +423,7 @@ func (s *Server) onAgentHello(ctx context.Context, hostID string, conn *ws.Conn)
 // them the runner can't talk to the repo). We rely on Restic's
 // idempotent init for re-runs.
 func (s *Server) maybeAutoInit(ctx context.Context, hostID string, conn *ws.Conn) {
-	if _, err := s.deps.Store.GetHostCredentials(ctx, hostID); err != nil {
+	if _, err := s.deps.Store.GetHostCredentials(ctx, hostID, store.CredKindRepo); err != nil {
 		// No creds bound yet — operator hasn't supplied them. The next
 		// hello after creds land will pick this up.
 		return
@@ -266,7 +477,7 @@ func (s *Server) maybeAutoInit(ctx context.Context, hostID string, conn *ws.Conn
 // credentials. Silent no-op when the host has nothing on file
 // (the operator hasn't bound creds to it yet).
 func (s *Server) pushRepoCredsOnHello(ctx context.Context, hostID string, conn *ws.Conn) {
-	enc, err := s.deps.Store.GetHostCredentials(ctx, hostID)
+	enc, err := s.deps.Store.GetHostCredentials(ctx, hostID, store.CredKindRepo)
 	if err != nil {
 		if !errors.Is(err, store.ErrNotFound) {
 			slog.Warn("on-hello: load host creds", "host_id", hostID, "err", err)
@@ -5,6 +5,9 @@ import (
 	"encoding/json"
 	"testing"
 	"time"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // TestEnrollmentTransfersRepoCreds verifies the round-trip:
@@ -57,12 +60,12 @@ func TestEnrollmentTransfersRepoCreds(t *testing.T) {
 		hostID, "host42", "linux", "amd64", "2026-01-01T00:00:00Z"); err != nil {
 		t.Fatalf("insert host: %v", err)
 	}
-	if err := st.SetHostCredentials(ctx, hostID, encForHost); err != nil {
+	if err := st.SetHostCredentials(ctx, hostID, store.CredKindRepo, encForHost); err != nil {
 		t.Fatalf("set host credentials: %v", err)
 	}
 	// host_credentials row should now hold the host-bound ciphertext.
-	got, err := st.GetHostCredentials(ctx, hostID)
+	got, err := st.GetHostCredentials(ctx, hostID, store.CredKindRepo)
 	if err != nil {
 		t.Fatalf("get host creds: %v", err)
 	}
@@ -105,3 +108,263 @@ func TestEnrollmentTokenWithoutCreds(t *testing.T) {
 		t.Errorf("token without creds should return empty blob; got %q", att.EncRepoCreds)
 	}
 }
 // ----- admin credentials tests ----------------------------------------
 // TestAdminCredentialsRoundTrip verifies set→get→delete→get (404).
 func TestAdminCredentialsRoundTrip(t *testing.T) {
 	t.Parallel()
 	srv, url, st := newTestServerWithHub(t)
 	cookie := loginAsAdmin(t, st)
 	hostID := makeHost(t, st, "admin-creds-host")
 	// Mark init done so auto-init doesn't interfere.
 	_ = st.CreateJob(context.Background(), store.Job{
 		ID:        "init-" + hostID,
 		HostID:    hostID,
 		Kind:      string(api.JobInit),
 		ActorKind: "system",
 		CreatedAt: time.Now().UTC(),
 	})
 	// GET before set → 404.
 	status, body := doJSON(t, url, "GET", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
 	if status != 404 {
 		t.Fatalf("before set: want 404, got %d body=%+v", status, body)
 	}
 	// PUT — set admin creds.
 	status, body = doJSON(t, url, "PUT", "/api/hosts/"+hostID+"/admin-credentials",
 		map[string]any{
 			"repo_url":      "rest:http://admin.example/host",
 			"repo_username": "admin",
 			"repo_password": "s3cur3",
 		}, cookie)
 	if status != 204 {
 		t.Fatalf("set: want 204, got %d body=%+v", status, body)
 	}
 	// GET — should return redacted view.
 	status, body = doJSON(t, url, "GET", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
 	if status != 200 {
 		t.Fatalf("get after set: want 200, got %d body=%+v", status, body)
 	}
 	if body["repo_url"] != "rest:http://admin.example/host" {
 		t.Errorf("repo_url: %+v", body)
 	}
 	if body["repo_username"] != "admin" {
 		t.Errorf("repo_username: %+v", body)
 	}
 	if body["has_password"] != true {
 		t.Errorf("has_password: %+v", body)
 	}
 	// DELETE.
 	status, _ = doJSON(t, url, "DELETE", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
 	if status != 204 {
 		t.Fatalf("delete: want 204, got %d", status)
 	}
 	// GET after delete → 404.
 	status, _ = doJSON(t, url, "GET", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
 	if status != 404 {
 		t.Fatalf("after delete: want 404, got %d", status)
 	}
 	// Extra: suppress unused import warning by actually using srv in assertion.
 	_ = srv
 }
 // TestAdminCredsAADIsolatedFromRepo writes a blob encrypted with the repo
 // AAD ("host:<id>") into the admin kind slot, then GETs it — the handler
 // should fail to decrypt and return 500 decrypt_failed. This proves the
 // AAD scoping is real.
 func TestAdminCredsAADIsolatedFromRepo(t *testing.T) {
 	t.Parallel()
 	srv, url, st := newTestServerWithHub(t)
 	cookie := loginAsAdmin(t, st)
 	hostID := makeHost(t, st, "aad-isolation-host")
 	ctx := context.Background()
 	// Encrypt with the REPO AAD (wrong for admin slot).
 	enc, err := srv.encryptRepoCreds(repoCredsBlob{
 		RepoURL:      "rest:http://r/x",
 		RepoPassword: "p",
 	}, []byte("host:"+hostID)) // wrong AAD — repo, not admin
 	if err != nil {
 		t.Fatalf("encrypt: %v", err)
 	}
 	// Write it directly into the admin kind slot.
 	if err := st.SetHostCredentials(ctx, hostID, store.CredKindAdmin, enc); err != nil {
 		t.Fatalf("set host credentials: %v", err)
 	}
 	// GET admin-credentials — handler decrypts with admin AAD, which
 	// is different, so decrypt must fail → 500.
 	status, body := doJSON(t, url, "GET", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
 	if status != 500 {
 		t.Fatalf("want 500 (decrypt_failed), got %d body=%+v", status, body)
 	}
 	if code, _ := body["code"].(string); code != "decrypt_failed" {
 		t.Errorf("want code=decrypt_failed, got %+v", body)
 	}
 }
 // TestAdminCredsPushOnSet connects a fake WS host, sets admin creds via
 // PUT, drains the conn, and asserts a config.update with Slot:"admin"
 // was shipped.
 func TestAdminCredsPushOnSet(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "admin-push-host")
 	cookie := loginAsAdmin(t, st)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "admin-push-host")
 	// Drain the on-hello burst (config.update for repo + schedule.set
 	// + possibly command.run(init)).
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	// Now PUT admin creds — should trigger an immediate push.
 	status, body := doJSON(t, ts.URL, "PUT", "/api/hosts/"+hostID+"/admin-credentials",
 		map[string]any{
 			"repo_url":      "rest:http://admin.example/h",
 			"repo_username": "admin",
 			"repo_password": "prune-pass",
 		}, cookie)
 	if status != 204 {
 		t.Fatalf("set admin creds: want 204, got %d body=%+v", status, body)
 	}
 	// Drain until we see a config.update with Slot=admin.
 	deadline := time.Now().Add(3 * time.Second)
 	found := false
 	for !found && time.Now().Before(deadline) {
 		env := readEnvelope(t, c)
 		if env.Type != api.MsgConfigUpdate {
 			continue
 		}
 		var p api.ConfigUpdatePayload
 		if err := env.UnmarshalPayload(&p); err != nil {
 			t.Fatalf("unmarshal config.update: %v", err)
 		}
 		if p.Slot == "admin" {
 			found = true
 			if p.RepoURL != "rest:http://admin.example/h" {
 				t.Errorf("admin push: wrong URL %q", p.RepoURL)
 			}
 		}
 	}
 	if !found {
 		t.Fatal("timed out waiting for config.update(slot=admin)")
 	}
 }
 // TestDeleteAdminCredentialsAuditLogged checks that DELETE appends an
 // audit row with action='host.admin_credentials_deleted' and that the
 // row carries the acting user's ID.
 func TestDeleteAdminCredentialsAuditLogged(t *testing.T) {
 	t.Parallel()
 	_, url, st := newTestServerWithHub(t)
 	cookie, userID := loginAsAdminWithID(t, st)
 	hostID := makeHost(t, st, "audit-del-host")
 	ctx := context.Background()
 	// Set admin creds first so there is something to delete.
 	status, body := doJSON(t, url, "PUT", "/api/hosts/"+hostID+"/admin-credentials",
 		map[string]any{
 			"repo_url":      "rest:http://x/h",
 			"repo_password": "p",
 		}, cookie)
 	if status != 204 {
 		t.Fatalf("set: want 204, got %d body=%+v", status, body)
 	}
 	// Delete.
 	status, _ = doJSON(t, url, "DELETE", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
 	if status != 204 {
 		t.Fatalf("delete: want 204, got %d", status)
 	}
 	// Query audit_log for the delete row — action, user_id.
 	rows, err := st.DB().QueryContext(ctx,
 		`SELECT action, user_id FROM audit_log WHERE target_id = ? AND target_kind = 'host' AND action = 'host.admin_credentials_deleted'`,
 		hostID)
 	if err != nil {
 		t.Fatalf("query audit: %v", err)
 	}
 	defer rows.Close()
 	found := false
 	for rows.Next() {
 		var action string
 		var gotUserID *string
 		if err := rows.Scan(&action, &gotUserID); err != nil {
 			t.Fatalf("scan: %v", err)
 		}
 		found = true
 		if gotUserID == nil {
 			t.Error("audit row: user_id is NULL, want non-nil")
 		} else if *gotUserID != userID {
 			t.Errorf("audit row: user_id=%q, want %q", *gotUserID, userID)
 		}
 	}
 	if err := rows.Err(); err != nil {
 		t.Fatalf("rows: %v", err)
 	}
 	if !found {
 		t.Error("audit row with action='host.admin_credentials_deleted' not found")
 	}
 }
 // TestSetAdminCredentialsAuditCarriesUserID checks that PUT
 // /api/hosts/{id}/admin-credentials appends an audit row with the
 // correct action and a non-nil UserID matching the acting session.
 func TestSetAdminCredentialsAuditCarriesUserID(t *testing.T) {
 	t.Parallel()
 	_, url, st := newTestServerWithHub(t)
 	cookie, userID := loginAsAdminWithID(t, st)
 	hostID := makeHost(t, st, "audit-set-admin-host")
 	ctx := context.Background()
 	status, body := doJSON(t, url, "PUT", "/api/hosts/"+hostID+"/admin-credentials",
 		map[string]any{
 			"repo_url":      "rest:http://admin.example/h",
 			"repo_password": "s3cr3t",
 		}, cookie)
 	if status != 204 {
 		t.Fatalf("set: want 204, got %d body=%+v", status, body)
 	}
 	rows, err := st.DB().QueryContext(ctx,
 		`SELECT action, user_id FROM audit_log WHERE target_id = ? AND target_kind = 'host' AND action = 'host.admin_credentials_set'`,
 		hostID)
 	if err != nil {
 		t.Fatalf("query audit: %v", err)
 	}
 	defer rows.Close()
 	found := false
 	for rows.Next() {
 		var action string
 		var gotUserID *string
 		if err := rows.Scan(&action, &gotUserID); err != nil {
 			t.Fatalf("scan: %v", err)
 		}
 		found = true
 		if gotUserID == nil {
 			t.Error("audit row: user_id is NULL, want non-nil")
 		} else if *gotUserID != userID {
 			t.Errorf("audit row: user_id=%q, want %q", *gotUserID, userID)
 		}
 	}
 	if err := rows.Err(); err != nil {
 		t.Fatalf("rows: %v", err)
 	}
 	if !found {
 		t.Error("audit row with action='host.admin_credentials_set' not found")
 	}
 }
@@ -72,7 +72,7 @@ func (s *Server) dispatchJob(ctx context.Context, user *store.User,
 }
 // dispatchJobWithPayload is dispatchJob's variant that lets callers
-// fill in structured fields (Includes/Excludes/Tag/RetentionPolicy)
+// fill in structured fields (Includes/Excludes/Tag/ForgetGroups/RequiresAdminCreds)
 // — used by the per-source-group Run-now path. JobID is filled in
 // here; callers leave it zero on the input payload.
 func (s *Server) dispatchJobWithPayload(ctx context.Context, user *store.User,
@@ -0,0 +1,132 @@
 // maintenance_dispatch.go bridges the pure-logic maintenance.Ticker
 // (internal/server/maintenance) to the side-effecting world: checks
 // online state, builds the per-kind command.run payload, and calls
 // dispatchJobWithPayload — the same path operator-triggered Run-now
 // uses. Cadence-driven jobs are persisted with actor_kind="system"
 // (dispatchJobWithPayload tags it that way when user==nil).
 //
 // Maintenance fires deliberately do NOT queue to pending_runs when
 // the host is offline — five missed prunes on a laptop returning
 // from a week away is not what the operator wants. Skip + log; the
 // next 60s tick will re-evaluate.
 package http
 import (
 	"context"
 	"errors"
 	"log/slog"
 	"strconv"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/maintenance"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // DispatchMaintenance acts on each Decision from the ticker. Offline
 // hosts are skipped (logged); prune dispatches without admin creds
 // are skipped silently (logged) — the operator hasn't completed the
 // admin-creds setup yet, and re-trying every minute would just spam
 // the logs. (Operator-triggered prune via the run-now endpoint
 // returns a clear error instead — different path, different UX.)
 func (s *Server) DispatchMaintenance(ctx context.Context, decisions []maintenance.Decision) {
 	for _, d := range decisions {
 		if !s.deps.Hub.Connected(d.HostID) {
 			slog.Info("maintenance: host offline, skipping",
 				"host_id", d.HostID, "kind", d.Kind)
 			continue
 		}
 		switch d.Kind {
 		case "forget":
 			payload, ok := s.buildForgetPayloadForHost(ctx, d.HostID)
 			if !ok {
 				slog.Info("maintenance: forget skipped — no source groups with retention",
 					"host_id", d.HostID)
 				continue
 			}
 			_, _, code, msg := s.dispatchJobWithPayload(ctx, nil, d.HostID, api.JobForget, payload)
 			if code != "" {
 				slog.Warn("maintenance: forget dispatch failed",
 					"host_id", d.HostID, "code", code, "msg", msg)
 			}
 		case "prune":
 			if _, err := s.deps.Store.GetHostCredentials(ctx, d.HostID, store.CredKindAdmin); err != nil {
 				if errors.Is(err, store.ErrNotFound) {
 					slog.Info("maintenance: prune skipped — no admin creds",
 						"host_id", d.HostID)
 					continue
 				}
 				slog.Warn("maintenance: prune skipped — admin creds error",
 					"host_id", d.HostID, "err", err)
 				continue
 			}
 			if err := s.pushAdminCredsToAgent(ctx, d.HostID); err != nil {
 				slog.Warn("maintenance: prune push admin creds failed",
 					"host_id", d.HostID, "err", err)
 				continue
 			}
 			payload := api.CommandRunPayload{RequiresAdminCreds: true}
 			_, _, code, msg := s.dispatchJobWithPayload(ctx, nil, d.HostID, api.JobPrune, payload)
 			if code != "" {
 				slog.Warn("maintenance: prune dispatch failed",
 					"host_id", d.HostID, "code", code, "msg", msg)
 			}
 		case "check":
 			payload := api.CommandRunPayload{Args: []string{strconv.Itoa(d.SubsetPct)}}
 			_, _, code, msg := s.dispatchJobWithPayload(ctx, nil, d.HostID, api.JobCheck, payload)
 			if code != "" {
 				slog.Warn("maintenance: check dispatch failed",
 					"host_id", d.HostID, "code", code, "msg", msg)
 			}
 		default:
 			slog.Warn("maintenance: unknown decision kind",
 				"host_id", d.HostID, "kind", d.Kind)
 		}
 	}
 }
 // buildForgetPayloadForHost collects every source group on the host
 // that has a non-empty retention policy and builds a CommandRunPayload
 // with ForgetGroups populated. Returns ok=false if the host has no
 // such groups (the dispatcher then skips this kind).
 func (s *Server) buildForgetPayloadForHost(ctx context.Context, hostID string) (api.CommandRunPayload, bool) {
 	groups, err := s.deps.Store.ListSourceGroupsByHost(ctx, hostID)
 	if err != nil {
 		slog.Warn("maintenance: list source groups failed", "host_id", hostID, "err", err)
 		return api.CommandRunPayload{}, false
 	}
 	fg := make([]api.ForgetGroup, 0, len(groups))
 	for _, g := range groups {
 		if isEmptyRetention(g.RetentionPolicy) {
 			continue
 		}
 		fg = append(fg, api.ForgetGroup{
 			Tag:    g.Name,
 			Policy: forgetPolicyJSONFromStore(g.RetentionPolicy),
 		})
 	}
 	if len(fg) == 0 {
 		return api.CommandRunPayload{}, false
 	}
 	return api.CommandRunPayload{ForgetGroups: fg}, true
 }
 func isEmptyRetention(p store.RetentionPolicy) bool {
 	return p.KeepLast == nil && p.KeepHourly == nil &&
 		p.KeepDaily == nil && p.KeepWeekly == nil &&
 		p.KeepMonthly == nil && p.KeepYearly == nil
 }
 // forgetPolicyJSONFromStore copies retention pointers from the store
 // view to the wire view. Both shapes are field-for-field identical;
 // this avoids importing store from internal/api (which would invert
 // the dependency direction).
 func forgetPolicyJSONFromStore(p store.RetentionPolicy) api.ForgetPolicyJSON {
 	return api.ForgetPolicyJSON{
 		KeepLast:    p.KeepLast,
 		KeepHourly:  p.KeepHourly,
 		KeepDaily:   p.KeepDaily,
 		KeepWeekly:  p.KeepWeekly,
 		KeepMonthly: p.KeepMonthly,
 		KeepYearly:  p.KeepYearly,
 	}
 }
@@ -0,0 +1,304 @@
 // maintenance_dispatch_test.go — exercises Server.DispatchMaintenance
 // directly (one Decision at a time). Reuses the same fake-agent
 // harness as p2r01_ws_test / repo_ops_test: a real Server with a
 // real Hub, plus a websocket connected as the host. We then push
 // Decisions through DispatchMaintenance and assert the envelopes
 // the agent receives + the job rows that land.
 package http
 import (
 	"context"
 	"encoding/json"
 	"testing"
 	"time"
 	"github.com/coder/websocket"
 	"github.com/oklog/ulid/v2"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/maintenance"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // readNextCommandRun pulls envelopes until a command.run lands or the
 // deadline passes. Returns nil if the deadline is hit.
 func readNextCommandRun(t *testing.T, c *websocket.Conn, deadline time.Time) *api.CommandRunPayload {
 	t.Helper()
 	for time.Now().Before(deadline) {
 		ctx, cancel := context.WithTimeout(context.Background(), 600*time.Millisecond)
 		mt, raw, err := c.Read(ctx)
 		cancel()
 		if err != nil {
 			return nil
 		}
 		if mt != websocket.MessageText {
 			continue
 		}
 		var env api.Envelope
 		if err := json.Unmarshal(raw, &env); err != nil {
 			continue
 		}
 		if env.Type != api.MsgCommandRun {
 			continue
 		}
 		var p api.CommandRunPayload
 		if err := env.UnmarshalPayload(&p); err != nil {
 			continue
 		}
 		return &p
 	}
 	return nil
 }
 // TestDispatchMaintenanceSkipsOfflineHosts: host not connected → no
 // envelope, no job row.
 func TestDispatchMaintenanceSkipsOfflineHosts(t *testing.T) {
 	t.Parallel()
 	srv, _, st := rawTestServer(t)
 	hostID, _ := enrolHostForWS(t, srv, st, "offline-host")
 	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
 		{HostID: hostID, Kind: "check", SubsetPct: 10},
 	})
 	var n int
 	if err := st.DB().QueryRow(
 		`SELECT COUNT(*) FROM jobs WHERE host_id = ?`, hostID).Scan(&n); err != nil {
 		t.Fatalf("count: %v", err)
 	}
 	if n != 0 {
 		t.Errorf("offline host produced %d job rows; want 0", n)
 	}
 }
 // TestDispatchMaintenanceForgetShipsForgetGroups: connected host with
 // two source groups (one with retention, one without). Decision of
 // kind=forget → command.run with ForgetGroups containing only the
 // group that had retention.
 func TestDispatchMaintenanceForgetShipsForgetGroups(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "forget-host")
 	seedInitJob(t, st, hostID)
 	keep := 7
 	if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
 		ID: ulid.Make().String(), HostID: hostID, Name: "documents",
 		Includes:        []string{"/home/documents"},
 		RetentionPolicy: store.RetentionPolicy{KeepLast: &keep},
 	}); err != nil {
 		t.Fatalf("group docs: %v", err)
 	}
 	if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
 		ID: ulid.Make().String(), HostID: hostID, Name: "ephemeral",
 		Includes: []string{"/tmp"},
 	}); err != nil {
 		t.Fatalf("group eph: %v", err)
 	}
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "forget-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
 		{HostID: hostID, Kind: "forget"},
 	})
 	got := readNextCommandRun(t, c, time.Now().Add(2*time.Second))
 	if got == nil {
 		t.Fatal("no command.run received")
 	}
 	if got.Kind != api.JobForget {
 		t.Errorf("kind: got %q, want %q", got.Kind, api.JobForget)
 	}
 	if len(got.ForgetGroups) != 1 {
 		t.Fatalf("ForgetGroups: got %d entries (%+v), want 1", len(got.ForgetGroups), got.ForgetGroups)
 	}
 	if got.ForgetGroups[0].Tag != "documents" {
 		t.Errorf("forget group tag: got %q, want %q", got.ForgetGroups[0].Tag, "documents")
 	}
 	if got.ForgetGroups[0].Policy.KeepLast == nil || *got.ForgetGroups[0].Policy.KeepLast != 7 {
 		t.Errorf("forget group policy: got %+v", got.ForgetGroups[0].Policy)
 	}
 	// Job row must be persisted with actor_kind=system.
 	var actor string
 	if err := st.DB().QueryRow(
 		`SELECT actor_kind FROM jobs WHERE host_id = ? AND kind = 'forget'`, hostID).Scan(&actor); err != nil {
 		t.Fatalf("query actor_kind: %v", err)
 	}
 	if actor != "system" {
 		t.Errorf("actor_kind: got %q, want system", actor)
 	}
 }
 // TestDispatchMaintenanceForgetSkipsHostWithNoRetention: connected
 // host, but every source group has empty retention → no envelope.
 func TestDispatchMaintenanceForgetSkipsHostWithNoRetention(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "no-ret-host")
 	seedInitJob(t, st, hostID)
 	if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
 		ID: ulid.Make().String(), HostID: hostID, Name: "ephemeral",
 		Includes: []string{"/tmp"},
 	}); err != nil {
 		t.Fatalf("group: %v", err)
 	}
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "no-ret-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
 		{HostID: hostID, Kind: "forget"},
 	})
 	if got := readNextCommandRun(t, c, time.Now().Add(800*time.Millisecond)); got != nil {
 		t.Errorf("unexpected command.run: %+v", got)
 	}
 	var n int
 	if err := st.DB().QueryRow(`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'forget'`, hostID).Scan(&n); err != nil {
 		t.Fatalf("count: %v", err)
 	}
 	if n != 0 {
 		t.Errorf("forget job rows: got %d, want 0", n)
 	}
 }
 // TestDispatchMaintenancePruneSkipsWithoutAdminCreds: no admin creds
 // row → no envelope, no job row, silent skip.
 func TestDispatchMaintenancePruneSkipsWithoutAdminCreds(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "no-admin-host")
 	seedInitJob(t, st, hostID)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "no-admin-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
 		{HostID: hostID, Kind: "prune"},
 	})
 	if got := readNextCommandRun(t, c, time.Now().Add(800*time.Millisecond)); got != nil {
 		t.Errorf("unexpected command.run: %+v", got)
 	}
 	var n int
 	if err := st.DB().QueryRow(`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'prune'`, hostID).Scan(&n); err != nil {
 		t.Fatalf("count: %v", err)
 	}
 	if n != 0 {
 		t.Errorf("prune job rows: got %d, want 0", n)
 	}
 }
 // TestDispatchMaintenancePruneShipsConfigUpdateThenCommandRun: with
 // admin creds set, prune dispatch must push admin config.update first
 // then command.run(prune, RequiresAdminCreds=true).
 func TestDispatchMaintenancePruneShipsConfigUpdateThenCommandRun(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "prune-mt-host")
 	setAdminCreds(t, srv, st, hostID)
 	seedInitJob(t, st, hostID)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "prune-mt-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
 		{HostID: hostID, Kind: "prune"},
 	})
 	// Read until we've seen both config.update(slot=admin) and the
 	// prune command.run.
 	deadline := time.Now().Add(3 * time.Second)
 	var sawAdminPush bool
 	var prunePayload *api.CommandRunPayload
 	for prunePayload == nil && time.Now().Before(deadline) {
 		ctx, cancel := context.WithTimeout(context.Background(), 600*time.Millisecond)
 		mt, raw, err := c.Read(ctx)
 		cancel()
 		if err != nil {
 			break
 		}
 		if mt != websocket.MessageText {
 			continue
 		}
 		var env api.Envelope
 		if err := json.Unmarshal(raw, &env); err != nil {
 			continue
 		}
 		switch env.Type {
 		case api.MsgConfigUpdate:
 			var p api.ConfigUpdatePayload
 			if err := env.UnmarshalPayload(&p); err == nil && p.Slot == "admin" {
 				sawAdminPush = true
 			}
 		case api.MsgCommandRun:
 			var p api.CommandRunPayload
 			if err := env.UnmarshalPayload(&p); err == nil && p.Kind == api.JobPrune {
 				cp := p
 				prunePayload = &cp
 			}
 		}
 	}
 	if !sawAdminPush {
 		t.Error("expected config.update(slot=admin) before prune dispatch")
 	}
 	if prunePayload == nil {
 		t.Fatal("timed out waiting for command.run(prune)")
 	}
 	if !prunePayload.RequiresAdminCreds {
 		t.Error("prune command.run must have RequiresAdminCreds=true")
 	}
 	// Persisted job must be system actor.
 	var actor string
 	if err := st.DB().QueryRow(
 		`SELECT actor_kind FROM jobs WHERE host_id = ? AND kind = 'prune'`, hostID).Scan(&actor); err != nil {
 		t.Fatalf("query actor_kind: %v", err)
 	}
 	if actor != "system" {
 		t.Errorf("actor_kind: got %q, want system", actor)
 	}
 }
 // TestDispatchMaintenanceCheckCarriesSubset: Decision SubsetPct=15 →
 // command.run.Args == ["15"]. Job row actor_kind=system.
 func TestDispatchMaintenanceCheckCarriesSubset(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "check-mt-host")
 	seedInitJob(t, st, hostID)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "check-mt-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
 		{HostID: hostID, Kind: "check", SubsetPct: 15},
 	})
 	got := readNextCommandRun(t, c, time.Now().Add(2*time.Second))
 	if got == nil {
 		t.Fatal("no command.run received")
 	}
 	if got.Kind != api.JobCheck {
 		t.Errorf("kind: got %q, want %q", got.Kind, api.JobCheck)
 	}
 	if len(got.Args) != 1 || got.Args[0] != "15" {
 		t.Errorf("Args: got %+v, want [15]", got.Args)
 	}
 	var actor string
 	if err := st.DB().QueryRow(
 		`SELECT actor_kind FROM jobs WHERE host_id = ? AND kind = 'check'`, hostID).Scan(&actor); err != nil {
 		t.Fatalf("query actor_kind: %v", err)
 	}
 	if actor != "system" {
 		t.Errorf("actor_kind: got %q, want system", actor)
 	}
 }
@@ -47,6 +47,32 @@ func loginAsAdmin(t *testing.T, st *store.Store) *stdhttp.Cookie {
 	return &stdhttp.Cookie{Name: sessionCookieName, Value: tok}
 }
 // loginAsAdminWithID is like loginAsAdmin but also returns the user ID.
 // Use this when tests need to assert that the user ID was recorded
 // (e.g. on audit entries).
 func loginAsAdminWithID(t *testing.T, st *store.Store) (*stdhttp.Cookie, string) {
 	t.Helper()
 	ctx := context.Background()
 	uid := ulid.Make().String()
 	hash, _ := auth.HashPassword("very-long-test-password")
 	if err := st.CreateUser(ctx, store.User{
 		ID: uid, Username: "tester-" + uid[:6],
 		PasswordHash: hash, Role: store.RoleAdmin,
 		CreatedAt: time.Now().UTC(),
 	}); err != nil {
 		t.Fatalf("create user: %v", err)
 	}
 	tok, _ := auth.NewToken()
 	if err := st.CreateSession(ctx, store.Session{
 		UserID:    uid,
 		CreatedAt: time.Now().UTC(),
 		ExpiresAt: time.Now().Add(time.Hour).UTC(),
 	}, auth.HashToken(tok)); err != nil {
 		t.Fatalf("create session: %v", err)
 	}
 	return &stdhttp.Cookie{Name: sessionCookieName, Value: tok}, uid
 }
 // makeHost inserts a minimal Host row directly via the store. Used by
 // HTTP-level tests that don't want to go through the full enrollment
 // path. Returns the host id.
@@ -99,7 +99,7 @@ func enrolHostForWS(t *testing.T, srv *Server, st *store.Store, name string) (ho
 	if err != nil {
 		t.Fatalf("encrypt: %v", err)
 	}
-	if err := st.SetHostCredentials(context.Background(), hostID, enc); err != nil {
+	if err := st.SetHostCredentials(context.Background(), hostID, store.CredKindRepo, enc); err != nil {
 		t.Fatalf("set creds: %v", err)
 	}
 	return hostID, token
@@ -0,0 +1,209 @@
 // pending_drain.go — drains pending_runs rows that are due (or, on
 // agent reconnect, every row for that host).
 //
 // Two trigger paths:
 //  1. The 30s tick in cmd/server (DrainAllDue) — sweeps every host
 //     with rows whose next_attempt_at <= now.
 //  2. onAgentHello (DrainPending(hostID)) — when a host comes back,
 //     walk all of its pending rows synchronously so the operator
 //     sees the queue drain promptly.
 package http
 import (
 	"context"
 	"errors"
 	"log/slog"
 	"sync"
 	"time"
 	"github.com/oklog/ulid/v2"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 const (
 	pendingDrainBatchLimit = 100
 	pendingDrainBackoffMax = 30 * time.Minute
 )
 // DrainPending re-dispatches every pending_runs row for hostID. The
 // host must already be connected (caller's responsibility — typically
 // onAgentHello). Each row's source group + schedule are loaded; if
 // either is gone the row is dropped (audit-logged as abandoned). If
 // the row's attempt count meets/exceeds the group's retry_max, the
 // row is dropped (audit-logged as abandoned). Otherwise we attempt
 // dispatch; success deletes the row, failure bumps the attempt and
 // reschedules with exponential backoff.
 //
 // A per-host mutex (hostDrainMutex) ensures that the on-hello goroutine
 // and the 30s tick cannot process the same host concurrently. If a drain
 // is already in-flight for this host, the call returns immediately — the
 // running drain will see any rows we'd have processed.
 func (s *Server) DrainPending(ctx context.Context, hostID string) {
 	mu := s.hostDrainMutex(hostID)
 	if !mu.TryLock() {
 		return
 	}
 	defer mu.Unlock()
 	runs, err := s.deps.Store.ListPendingRunsForHost(ctx, hostID)
 	if err != nil {
 		slog.Warn("drain pending: list", "host_id", hostID, "err", err)
 		return
 	}
 	if len(runs) == 0 {
 		return
 	}
 	conn := s.deps.Hub.Conn(hostID)
 	if conn == nil {
 		// Host went offline between the connectedness check and now.
 		// Skip — next tick or next reconnect will retry.
 		return
 	}
 	for _, p := range runs {
 		s.drainOne(ctx, conn, p)
 	}
 }
 // drainOne handles a single pending row. Refactored out so DrainPending
 // reads cleanly. Side-effects: delete, bump, audit, dispatch — all
 // per-row.
 func (s *Server) drainOne(ctx context.Context, conn *ws.Conn, p store.PendingRun) {
 	sc, err := s.deps.Store.GetSchedule(ctx, p.HostID, p.ScheduleID)
 	if err != nil {
 		if errors.Is(err, store.ErrNotFound) {
 			s.abandonPending(ctx, p, "schedule gone")
 			return
 		}
 		slog.Warn("drain pending: load schedule",
 			"host_id", p.HostID, "schedule_id", p.ScheduleID, "err", err)
 		return
 	}
 	if !sc.Enabled {
 		s.abandonPending(ctx, p, "schedule disabled")
 		return
 	}
 	g, err := s.deps.Store.GetSourceGroup(ctx, p.HostID, p.SourceGroupID)
 	if err != nil {
 		if errors.Is(err, store.ErrNotFound) {
 			s.abandonPending(ctx, p, "source group gone")
 		} else {
 			slog.Warn("drain pending: load source group",
 				"host_id", p.HostID, "group_id", p.SourceGroupID, "err", err)
 		}
 		return
 	}
 	if g.RetryMax > 0 && p.Attempt >= g.RetryMax {
 		s.abandonPending(ctx, p, "retry_max exceeded")
 		return
 	}
 	// Calls dispatchBackupForGroupCore (not dispatchBackupForGroup) so a
 	// failed Send doesn't double-enqueue: dispatchBackupForGroup's
 	// enqueue-on-failure path would create a NEW pending_runs row while
 	// this function already bumps the EXISTING row via
 	// BumpPendingRunAttempt, producing geometric duplicates on repeated
 	// failures.
 	jobID, _ := s.dispatchBackupForGroupCore(ctx, conn, p.HostID, p.ScheduleID, g, p.ScheduledAt)
 	if jobID == "" {
 		// Send failed again. Bump attempt with exponential backoff.
 		// Exponential backoff doubles immediately on the first drain
 		// retry: enqueue at base, attempt=1 → drain → 2*base, attempt=2 →
 		// drain → 4*base, etc. Capped at pendingDrainBackoffMax. With
 		// defaults (60s base, retry_max=3) the schedule is 60→120→240s.
 		baseBackoff := time.Duration(g.RetryBackoffSeconds) * time.Second
 		if baseBackoff <= 0 {
 			baseBackoff = 60 * time.Second
 		}
 		backoff := baseBackoff
 		for i := 0; i < p.Attempt; i++ {
 			backoff *= 2
 			if backoff >= pendingDrainBackoffMax {
 				backoff = pendingDrainBackoffMax
 				break
 			}
 		}
 		next := time.Now().UTC().Add(backoff)
 		if err := s.deps.Store.BumpPendingRunAttempt(ctx, p.ID, next, "drain dispatch failed"); err != nil {
 			slog.Warn("drain pending: bump", "host_id", p.HostID, "id", p.ID, "err", err)
 		}
 		return
 	}
 	// Success — drop the pending row.
 	if err := s.deps.Store.DeletePendingRun(ctx, p.ID); err != nil {
 		slog.Warn("drain pending: delete after dispatch", "host_id", p.HostID, "id", p.ID, "err", err)
 	}
 	slog.Info("drain pending: dispatched",
 		"host_id", p.HostID, "schedule_id", p.ScheduleID, "group", g.Name,
 		"attempt", p.Attempt, "job_id", jobID)
 }
 // abandonPending deletes the row and records an audit entry. The row
 // is gone but the audit trail preserves the forensic record of why.
 func (s *Server) abandonPending(ctx context.Context, p store.PendingRun, reason string) {
 	slog.Info("drain pending: abandoning",
 		"host_id", p.HostID, "schedule_id", p.ScheduleID,
 		"attempt", p.Attempt, "reason", reason)
 	scheduleID := p.ScheduleID
 	if err := s.deps.Store.AppendAudit(ctx, store.AuditEntry{
 		ID:         ulid.Make().String(),
 		Actor:      "system",
 		Action:     "pending_run.abandoned",
 		TargetKind: ptr("schedule"),
 		TargetID:   &scheduleID,
 		TS:         time.Now().UTC(),
 	}); err != nil {
 		slog.Warn("drain pending: audit on abandon", "id", p.ID, "err", err)
 	}
 	if err := s.deps.Store.DeletePendingRun(ctx, p.ID); err != nil {
 		slog.Warn("drain pending: delete on abandon", "id", p.ID, "err", err)
 	}
 }
 // hostDrainMutex returns the per-host mutex for DrainPending,
 // creating it on first request. The map is guarded by drainLocksMu.
 // Mutex objects are never deleted from the map — there are at most
 // len(hosts) entries, which is bounded by the fleet size.
 func (s *Server) hostDrainMutex(hostID string) *sync.Mutex {
 	s.drainLocksMu.Lock()
 	defer s.drainLocksMu.Unlock()
 	if s.drainLocks == nil {
 		s.drainLocks = make(map[string]*sync.Mutex)
 	}
 	mu, ok := s.drainLocks[hostID]
 	if !ok {
 		mu = &sync.Mutex{}
 		s.drainLocks[hostID] = mu
 	}
 	return mu
 }
 // DrainAllDue is the 30s-ticker entrypoint. Walks rows whose
 // next_attempt_at <= now (DuePendingRuns), dedupes by host, and calls
 // DrainPending per host. The DrainPending then re-walks the host's
 // rows (same DB hit as the dedupe iteration would have done — keeps
 // the per-host concurrency model simple).
 func (s *Server) DrainAllDue(ctx context.Context) {
 	if s.deps.Hub == nil {
 		return
 	}
 	due, err := s.deps.Store.DuePendingRuns(ctx, time.Now().UTC(), pendingDrainBatchLimit)
 	if err != nil {
 		slog.Warn("drain all due: list", "err", err)
 		return
 	}
 	if len(due) == 0 {
 		return
 	}
 	seen := make(map[string]struct{}, len(due))
 	for _, p := range due {
 		if _, ok := seen[p.HostID]; ok {
 			continue
 		}
 		seen[p.HostID] = struct{}{}
 		if !s.deps.Hub.Connected(p.HostID) {
 			continue
 		}
 		s.DrainPending(ctx, p.HostID)
 	}
 }
@@ -0,0 +1,567 @@
 // pending_drain_test.go — covers DrainPending / DrainAllDue and the
 // onAgentHello goroutine spawn that drains a freshly-reconnected
 // host's queue.
 package http
 import (
 	"context"
 	"encoding/json"
 	"sync"
 	"testing"
 	"time"
 	"github.com/coder/websocket"
 	"github.com/oklog/ulid/v2"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // seedSchedAndGroup wires up a host with one source group + one
 // schedule pointing at it. Returns (groupID, scheduleID).
 func seedSchedAndGroup(t *testing.T, st *store.Store, hostID string, retryMax int) (string, string) {
 	t.Helper()
 	gid := ulid.Make().String()
 	if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
 		ID: gid, HostID: hostID, Name: "default",
 		Includes: []string{"/etc"},
 		RetryMax: retryMax, RetryBackoffSeconds: 60,
 	}); err != nil {
 		t.Fatalf("create group: %v", err)
 	}
 	sid := ulid.Make().String()
 	if err := st.CreateSchedule(context.Background(), &store.Schedule{
 		ID: sid, HostID: hostID,
 		CronExpr: "0 3 * * *", Enabled: true,
 		SourceGroupIDs: []string{gid},
 	}); err != nil {
 		t.Fatalf("create schedule: %v", err)
 	}
 	// Mark a successful init job so auto-init doesn't pollute reads.
 	if err := st.CreateJob(context.Background(), store.Job{
 		ID: ulid.Make().String(), HostID: hostID, Kind: "init",
 		ActorKind: "system", CreatedAt: time.Now().UTC(),
 	}); err != nil {
 		t.Fatalf("seed init: %v", err)
 	}
 	return gid, sid
 }
 // countPendingForHost returns the number of pending_runs rows for hostID.
 func countPendingForHost(t *testing.T, st *store.Store, hostID string) int {
 	t.Helper()
 	var n int
 	if err := st.DB().QueryRow(
 		`SELECT COUNT(*) FROM pending_runs WHERE host_id = ?`, hostID).Scan(&n); err != nil {
 		t.Fatalf("count pending: %v", err)
 	}
 	return n
 }
 // waitForPendingCount polls until the pending_runs count for hostID
 // reaches wantN or the deadline expires. Use this instead of calling
 // DrainPending synchronously when the test relies on the on-hello
 // goroutine (which holds the per-host drain mutex) to process rows.
 func waitForPendingCount(t *testing.T, st *store.Store, hostID string, wantN int, timeout time.Duration) {
 	t.Helper()
 	deadline := time.Now().Add(timeout)
 	for time.Now().Before(deadline) {
 		if countPendingForHost(t, st, hostID) == wantN {
 			return
 		}
 		time.Sleep(20 * time.Millisecond)
 	}
 	t.Errorf("pending count for host %s: want %d after %v, got %d",
 		hostID, wantN, timeout, countPendingForHost(t, st, hostID))
 }
 // countAuditAction returns the number of audit_log rows with the given action.
 func countAuditAction(t *testing.T, st *store.Store, action string) int {
 	t.Helper()
 	var n int
 	if err := st.DB().QueryRow(
 		`SELECT COUNT(*) FROM audit_log WHERE action = ?`, action).Scan(&n); err != nil {
 		t.Fatalf("count audit: %v", err)
 	}
 	return n
 }
 func TestDrainPendingDispatchesOnReconnect(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "drain-host")
 	gid, sid := seedSchedAndGroup(t, st, hostID, 5)
 	// Pre-insert a pending row that's already due. The on-hello
 	// goroutine should drain it after we connect.
 	pendingID := ulid.Make().String()
 	now := time.Now().UTC()
 	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
 		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
 		Attempt: 1, NextAttemptAt: now.Add(-time.Second),
 		ScheduledAt: now.Add(-time.Minute),
 	}); err != nil {
 		t.Fatalf("enqueue: %v", err)
 	}
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "drain-host")
 	// Walk envelopes looking for a backup command.run carrying the
 	// group's includes.
 	var got *api.CommandRunPayload
 	deadline := time.Now().Add(3 * time.Second)
 	for time.Now().Before(deadline) {
 		ctx, cancel := context.WithTimeout(context.Background(), 800*time.Millisecond)
 		mt, raw, err := c.Read(ctx)
 		cancel()
 		if err != nil {
 			break
 		}
 		if mt != websocket.MessageText {
 			continue
 		}
 		var env api.Envelope
 		if err := json.Unmarshal(raw, &env); err != nil {
 			continue
 		}
 		if env.Type != api.MsgCommandRun {
 			continue
 		}
 		var p api.CommandRunPayload
 		_ = env.UnmarshalPayload(&p)
 		if p.Kind == api.JobBackup {
 			got = &p
 			break
 		}
 	}
 	if got == nil {
 		t.Fatalf("no backup command.run dispatched after reconnect drain")
 	}
 	if !equalStrings(got.Includes, []string{"/etc"}) {
 		t.Errorf("backup includes: %v", got.Includes)
 	}
 	if got.Tag != "default" {
 		t.Errorf("backup tag: %q", got.Tag)
 	}
 	// Pending row should be gone.
 	if n := countPendingForHost(t, st, hostID); n != 0 {
 		t.Errorf("pending rows after drain: got %d, want 0", n)
 	}
 	// One backup job row landed (in addition to the seeded init).
 	var n int
 	_ = st.DB().QueryRow(
 		`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup' AND actor_kind = 'schedule'`,
 		hostID).Scan(&n)
 	if n != 1 {
 		t.Errorf("backup job rows: got %d, want 1", n)
 	}
 }
 func TestDrainPendingAbandonsOnRetryMax(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "abandon-retry-host")
 	gid, sid := seedSchedAndGroup(t, st, hostID, 2)
 	pendingID := ulid.Make().String()
 	now := time.Now().UTC()
 	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
 		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
 		Attempt: 2, NextAttemptAt: now.Add(-time.Second),
 		ScheduledAt: now.Add(-time.Minute),
 	}); err != nil {
 		t.Fatalf("enqueue: %v", err)
 	}
 	auditBefore := countAuditAction(t, st, "pending_run.abandoned")
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "abandon-retry-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	// The on-hello goroutine processes the row (retry_max exceeded → abandon).
 	// Wait for it to finish rather than calling DrainPending directly, which
 	// would be a no-op while the goroutine holds the per-host drain mutex.
 	_ = connFromHub(t, srv, hostID) // ensure hub registration
 	waitForPendingCount(t, st, hostID, 0, 2*time.Second)
 	if n := countPendingForHost(t, st, hostID); n != 0 {
 		t.Errorf("pending rows after abandon: got %d, want 0", n)
 	}
 	if d := countAuditAction(t, st, "pending_run.abandoned") - auditBefore; d != 1 {
 		t.Errorf("audit pending_run.abandoned delta: got %d, want 1", d)
 	}
 	// No backup command.run should have been sent.
 	deadline := time.Now().Add(400 * time.Millisecond)
 	for time.Now().Before(deadline) {
 		ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
 		mt, raw, err := c.Read(ctx)
 		cancel()
 		if err != nil {
 			break
 		}
 		if mt != websocket.MessageText {
 			continue
 		}
 		var env api.Envelope
 		_ = json.Unmarshal(raw, &env)
 		if env.Type == api.MsgCommandRun {
 			var p api.CommandRunPayload
 			_ = env.UnmarshalPayload(&p)
 			if p.Kind == api.JobBackup {
 				t.Fatalf("abandoned row still dispatched a backup: %+v", p)
 			}
 		}
 	}
 	// No backup job row.
 	var n int
 	_ = st.DB().QueryRow(
 		`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup'`,
 		hostID).Scan(&n)
 	if n != 0 {
 		t.Errorf("abandon path created a backup job: %d rows", n)
 	}
 }
 func TestDrainPendingBumpsOnSendFailure(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "bump-host")
 	gid, sid := seedSchedAndGroup(t, st, hostID, 5)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "bump-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	// Capture the conn before closing the client side. Hub.Conn still
 	// returns it after the client-side close — the server's Unregister
 	// fires when its read loop sees the close, but the conn ptr remains
 	// valid; subsequent Sends just fail.
 	conn := connFromHub(t, srv, hostID)
 	if conn == nil {
 		t.Fatal("conn never registered")
 	}
 	// Insert the pending row AFTER the on-hello drain goroutine has
 	// already scanned (an empty list) — otherwise we race the on-hello
 	// drain dispatching the row over the still-live socket.
 	pendingID := ulid.Make().String()
 	now := time.Now().UTC()
 	if err := c.Close(websocket.StatusNormalClosure, "test"); err != nil {
 		t.Fatalf("close: %v", err)
 	}
 	// Brief settle so the close is observed by the server's read loop.
 	time.Sleep(150 * time.Millisecond)
 	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
 		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
 		Attempt: 1, NextAttemptAt: now.Add(-time.Second),
 		ScheduledAt: now.Add(-time.Minute),
 	}); err != nil {
 		t.Fatalf("enqueue: %v", err)
 	}
 	// DrainPending uses Hub.Conn(hostID); after the client close the
 	// server may have unregistered already. Call drainOne directly
 	// against the captured conn so we deterministically exercise the
 	// "Send fails" branch rather than the "host gone" branch.
 	srv.drainOne(context.Background(), conn, store.PendingRun{
 		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
 		Attempt: 1, NextAttemptAt: now.Add(-time.Second), ScheduledAt: now.Add(-time.Minute),
 	})
 	// The original row must be bumped to attempt=2 with a non-empty
 	// last_error. Critically, NO duplicate row should have been created:
 	// drainOne calls dispatchBackupForGroupCore (not dispatchBackupForGroup)
 	// so the enqueue-on-failure path is bypassed and the count stays at 1.
 	if n := countPendingForHost(t, st, hostID); n != 1 {
 		t.Errorf("pending rows after send failure: got %d, want 1 (no duplicate enqueue)", n)
 	}
 	var attempt int
 	var lastErr string
 	if err := st.DB().QueryRow(
 		`SELECT attempt, COALESCE(last_error,'') FROM pending_runs WHERE id = ?`,
 		pendingID).Scan(&attempt, &lastErr); err != nil {
 		t.Fatalf("scan original row: %v", err)
 	}
 	if attempt != 2 {
 		t.Errorf("attempt after bump: got %d, want 2", attempt)
 	}
 	if lastErr == "" {
 		t.Errorf("last_error empty after bump")
 	}
 }
 func TestDrainPendingDropsRowsForGoneSchedule(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "gone-sched-host")
 	gid, sid := seedSchedAndGroup(t, st, hostID, 5)
 	pendingID := ulid.Make().String()
 	now := time.Now().UTC()
 	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
 		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
 		Attempt: 1, NextAttemptAt: now.Add(-time.Second),
 		ScheduledAt: now.Add(-time.Minute),
 	}); err != nil {
 		t.Fatalf("enqueue: %v", err)
 	}
 	// Disable the schedule. (Deleting it would FK-cascade-delete the
 	// pending_runs row out from under the drainer, which is fine for
 	// production but defeats the point of the test. The
 	// disabled-schedule path goes through the same abandonPending code,
 	// so it's an equivalent assertion.)
 	if _, err := st.DB().Exec(
 		`UPDATE schedules SET enabled = 0 WHERE id = ?`, sid); err != nil {
 		t.Fatalf("disable schedule: %v", err)
 	}
 	auditBefore := countAuditAction(t, st, "pending_run.abandoned")
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "gone-sched-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	// The on-hello goroutine processes the row (disabled schedule → abandon).
 	// Poll for completion instead of calling DrainPending, which would return
 	// immediately while the goroutine holds the per-host drain mutex.
 	waitForPendingCount(t, st, hostID, 0, 2*time.Second)
 	if n := countPendingForHost(t, st, hostID); n != 0 {
 		t.Errorf("pending rows after schedule-gone abandon: got %d, want 0", n)
 	}
 	if d := countAuditAction(t, st, "pending_run.abandoned") - auditBefore; d != 1 {
 		t.Errorf("audit delta: got %d, want 1", d)
 	}
 	// Drain produced no backup envelope.
 	deadline := time.Now().Add(400 * time.Millisecond)
 	for time.Now().Before(deadline) {
 		ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
 		mt, raw, err := c.Read(ctx)
 		cancel()
 		if err != nil {
 			break
 		}
 		if mt != websocket.MessageText {
 			continue
 		}
 		var env api.Envelope
 		_ = json.Unmarshal(raw, &env)
 		if env.Type == api.MsgCommandRun {
 			var p api.CommandRunPayload
 			_ = env.UnmarshalPayload(&p)
 			if p.Kind == api.JobBackup {
 				t.Fatalf("gone-schedule abandon still dispatched: %+v", p)
 			}
 		}
 	}
 }
 // TestDrainPendingDropsRowsForGoneSourceGroup verifies that when a
 // source group is gone (ErrNotFound) the pending row is abandoned and
 // an audit entry is written. Transient-error paths (SQLITE_BUSY,
 // context cancellation) are not covered here because the real *Store
 // doesn't expose a fault-injection seam; the code-review check above
 // is the gate for that path.
 func TestDrainPendingDropsRowsForGoneSourceGroup(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "gone-group-host")
 	_, sid := seedSchedAndGroup(t, st, hostID, 5)
 	// Use a source_group_id that never existed. pending_runs carries a
 	// FK to source_groups, so we must bypass FK enforcement for this
 	// insert. PRAGMA foreign_keys is connection-scoped and can only be
 	// changed outside a transaction; DB().Exec runs on an arbitrary
 	// pooled connection, so we pin it with a dedicated *sql.Conn.
 	fakeGroupID := ulid.Make().String()
 	pendingID := ulid.Make().String()
 	now := time.Now().UTC()
 	conn, err := st.DB().Conn(context.Background())
 	if err != nil {
 		t.Fatalf("db conn: %v", err)
 	}
 	defer conn.Close()
 	if _, err := conn.ExecContext(context.Background(), `PRAGMA foreign_keys = OFF`); err != nil {
 		t.Fatalf("fk off: %v", err)
 	}
 	if _, err := conn.ExecContext(context.Background(),
 		`INSERT INTO pending_runs (id, schedule_id, source_group_id, host_id, attempt, next_attempt_at, scheduled_at)
 		 VALUES (?, ?, ?, ?, 1, ?, ?)`,
 		pendingID, sid, fakeGroupID, hostID,
 		now.Add(-time.Second), now.Add(-time.Minute),
 	); err != nil {
 		t.Fatalf("insert pending: %v", err)
 	}
 	if _, err := conn.ExecContext(context.Background(), `PRAGMA foreign_keys = ON`); err != nil {
 		t.Fatalf("fk on: %v", err)
 	}
 	auditBefore := countAuditAction(t, st, "pending_run.abandoned")
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "gone-group-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	// The on-hello goroutine processes the row (source group gone → abandon).
 	// Poll for completion instead of calling DrainPending, which would return
 	// immediately while the goroutine holds the per-host drain mutex.
 	waitForPendingCount(t, st, hostID, 0, 2*time.Second)
 	if n := countPendingForHost(t, st, hostID); n != 0 {
 		t.Errorf("pending rows after source-group-gone abandon: got %d, want 0", n)
 	}
 	if d := countAuditAction(t, st, "pending_run.abandoned") - auditBefore; d != 1 {
 		t.Errorf("audit delta: got %d, want 1", d)
 	}
 }
 func TestDrainAllDueSkipsOfflineHosts(t *testing.T) {
 	t.Parallel()
 	srv, _, st := rawTestServer(t)
 	// Don't dial — host is enrolled but never connected.
 	hostID, _ := enrolHostForWS(t, srv, st, "offline-host")
 	gid, sid := seedSchedAndGroup(t, st, hostID, 5)
 	pendingID := ulid.Make().String()
 	now := time.Now().UTC()
 	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
 		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
 		Attempt: 1, NextAttemptAt: now.Add(-time.Second),
 		ScheduledAt: now.Add(-time.Minute),
 	}); err != nil {
 		t.Fatalf("enqueue: %v", err)
 	}
 	auditBefore := countAuditAction(t, st, "pending_run.abandoned")
 	srv.DrainAllDue(context.Background())
 	// Row still there (host offline, drainer skips).
 	if n := countPendingForHost(t, st, hostID); n != 1 {
 		t.Errorf("pending rows after DrainAllDue against offline host: got %d, want 1", n)
 	}
 	if d := countAuditAction(t, st, "pending_run.abandoned") - auditBefore; d != 0 {
 		t.Errorf("audit unexpectedly changed: delta %d", d)
 	}
 }
 func TestEnqueueOnDispatchFailure(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "enqueue-host")
 	_, sid := seedSchedAndGroup(t, st, hostID, 5)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "enqueue-host")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	conn := connFromHub(t, srv, hostID)
 	_ = conn
 	// Close the client side so the server's next Send errors.
 	if err := c.Close(websocket.StatusNormalClosure, "test"); err != nil {
 		t.Fatalf("close: %v", err)
 	}
 	time.Sleep(100 * time.Millisecond)
 	scheduledAt := time.Now().UTC().Add(-30 * time.Second)
 	srv.dispatchScheduledJob(context.Background(), hostID, conn, sid, scheduledAt)
 	// One pending row should have been enqueued (attempt=1) with the
 	// scheduled_at preserved.
 	rows, err := st.ListPendingRunsForHost(context.Background(), hostID)
 	if err != nil {
 		t.Fatalf("list: %v", err)
 	}
 	if len(rows) != 1 {
 		t.Fatalf("pending rows: got %d, want 1", len(rows))
 	}
 	if rows[0].Attempt != 1 {
 		t.Errorf("attempt: got %d, want 1", rows[0].Attempt)
 	}
 	// scheduled_at preserved (within RFC3339Nano round-trip tolerance).
 	if rows[0].ScheduledAt.Sub(scheduledAt).Abs() > time.Microsecond {
 		t.Errorf("scheduled_at drift: %v vs %v", rows[0].ScheduledAt, scheduledAt)
 	}
 	if rows[0].LastError == "" {
 		t.Errorf("last_error empty")
 	}
 }
 // TestDrainPendingSerializesPerHost verifies that concurrent DrainPending
 // calls for the same host do not double-dispatch pending rows. The per-host
 // mutex (TryLock semantics) means exactly one drain processes each row.
 func TestDrainPendingSerializesPerHost(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "serialize-host")
 	gid, sid := seedSchedAndGroup(t, st, hostID, 10)
 	// Connect the agent so DrainPending can dispatch.
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "serialize-host")
 	// Drain the on-hello goroutine's pass first (no pending rows yet),
 	// then wait for the schedule.set so the connection is fully settled.
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	// Insert 5 pending rows now that the on-hello drain has already run.
 	now := time.Now().UTC()
 	for i := range 5 {
 		pid := ulid.Make().String()
 		if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
 			ID:            pid,
 			ScheduleID:    sid,
 			SourceGroupID: gid,
 			HostID:        hostID,
 			Attempt:       1,
 			NextAttemptAt: now.Add(-time.Second),
 			ScheduledAt:   now.Add(-time.Duration(i+1) * time.Minute),
 		}); err != nil {
 			t.Fatalf("enqueue row %d: %v", i, err)
 		}
 	}
 	// Spawn 10 goroutines all calling DrainPending concurrently.
 	var wg sync.WaitGroup
 	for range 10 {
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
 			srv.DrainPending(context.Background(), hostID)
 		}()
 	}
 	wg.Wait()
 	// Drain any envelopes the agent received so we don't block below.
 	// We read with short timeouts and stop when the connection goes quiet.
 	drainDeadline := time.Now().Add(500 * time.Millisecond)
 	for time.Now().Before(drainDeadline) {
 		ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
 		_, _, err := c.Read(ctx)
 		cancel()
 		if err != nil {
 			break
 		}
 	}
 	// All 5 pending rows must be gone.
 	if n := countPendingForHost(t, st, hostID); n != 0 {
 		t.Errorf("pending rows after concurrent drain: got %d, want 0", n)
 	}
 	// Exactly 5 backup job rows (one per pending row), not 10+ from a race.
 	var n int
 	_ = st.DB().QueryRow(
 		`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup' AND actor_kind = 'schedule'`,
 		hostID).Scan(&n)
 	if n != 5 {
 		t.Errorf("backup job rows: got %d, want 5 (per-host mutex must prevent double-dispatch)", n)
 	}
 }
@@ -0,0 +1,165 @@
 // repo_ops.go — operator-triggered Run-now for repo-level operations:
 // prune, check, unlock. Backed by the same dispatchJobWithPayload
 // pipeline as backup, with an extra step for prune: push admin creds
 // first if they're set, refuse loudly if they aren't.
 package http
 import (
 	"errors"
 	"log/slog"
 	stdhttp "net/http"
 	"strconv"
 	"github.com/go-chi/chi/v5"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // handleRunRepoPrune — POST /api/hosts/{id}/repo/prune (and the HTMX
 // twin outside /api). Pushes the host's admin credentials down the WS,
 // then dispatches a prune command.run with RequiresAdminCreds=true.
 func (s *Server) handleRunRepoPrune(w stdhttp.ResponseWriter, r *stdhttp.Request) {
 	user, ok := s.requireUser(r)
 	if !ok {
 		if wantsHTML(r) {
 			stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
 			return
 		}
 		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
 		return
 	}
 	hostID := chi.URLParam(r, "id")
 	if hostID == "" {
 		s.runOpError(w, r, stdhttp.StatusBadRequest, "missing_id", "")
 		return
 	}
 	// Push admin creds first. ErrNotFound → operator hasn't set them
 	// yet. Other errors → likely the host is offline or a decrypt fail.
 	if err := s.pushAdminCredsToAgent(r.Context(), hostID); err != nil {
 		if errors.Is(err, store.ErrNotFound) {
 			s.runOpError(w, r, stdhttp.StatusBadRequest, "admin_creds_required",
 				"set admin credentials on the Repo page before running prune")
 			return
 		}
 		// Hub.Send failure (offline) or decrypt failure — surface a
 		// generic offline message so the operator retries when the
 		// agent is back.
 		slog.Warn("prune: push admin creds failed", "host_id", hostID, "err", err)
 		s.runOpError(w, r, stdhttp.StatusServiceUnavailable, "host_offline",
 			"agent is not currently connected; try again when it reconnects")
 		return
 	}
 	res, status, code, msg := s.dispatchJobWithPayload(r.Context(), user, hostID, api.JobPrune,
 		api.CommandRunPayload{RequiresAdminCreds: true})
 	if code != "" {
 		s.runOpError(w, r, status, code, msg)
 		return
 	}
 	s.runOpRedirect(w, r, res)
 }
 // handleRunRepoCheck — POST /api/hosts/{id}/repo/check. Pulls
 // check_subset_pct from host_repo_maintenance for the host (operator
 // can override via ?subset=N query param, clamped 0..100). Dispatches
 // with the chosen subset in CommandRunPayload.Args[0].
 func (s *Server) handleRunRepoCheck(w stdhttp.ResponseWriter, r *stdhttp.Request) {
 	user, ok := s.requireUser(r)
 	if !ok {
 		if wantsHTML(r) {
 			stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
 			return
 		}
 		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
 		return
 	}
 	hostID := chi.URLParam(r, "id")
 	if hostID == "" {
 		s.runOpError(w, r, stdhttp.StatusBadRequest, "missing_id", "")
 		return
 	}
 	m, err := s.deps.Store.GetRepoMaintenance(r.Context(), hostID)
 	if err != nil {
 		if errors.Is(err, store.ErrNotFound) {
 			// Maintenance row should auto-seed at enrollment. If it's
 			// missing, surface a clear error rather than guessing 0%.
 			s.runOpError(w, r, stdhttp.StatusInternalServerError, "no_maintenance_row",
 				"host has no repo-maintenance config; was the host fully enrolled?")
 			return
 		}
 		s.runOpError(w, r, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
 	subset := m.CheckSubsetPct
 	if q := r.URL.Query().Get("subset"); q != "" {
 		if n, err2 := strconv.Atoi(q); err2 == nil {
 			if n < 0 {
 				n = 0
 			}
 			if n > 100 {
 				n = 100
 			}
 			subset = n
 		}
 		// Non-numeric ?subset silently falls back to DB value.
 	}
 	res, status, code, msg := s.dispatchJobWithPayload(r.Context(), user, hostID, api.JobCheck,
 		api.CommandRunPayload{Args: []string{strconv.Itoa(subset)}})
 	if code != "" {
 		s.runOpError(w, r, status, code, msg)
 		return
 	}
 	s.runOpRedirect(w, r, res)
 }
 // handleRunRepoUnlock — POST /api/hosts/{id}/repo/unlock. No admin
 // creds required — restic unlock works with the everyday user.
 func (s *Server) handleRunRepoUnlock(w stdhttp.ResponseWriter, r *stdhttp.Request) {
 	user, ok := s.requireUser(r)
 	if !ok {
 		if wantsHTML(r) {
 			stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
 			return
 		}
 		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
 		return
 	}
 	hostID := chi.URLParam(r, "id")
 	if hostID == "" {
 		s.runOpError(w, r, stdhttp.StatusBadRequest, "missing_id", "")
 		return
 	}
 	res, status, code, msg := s.dispatchJobWithPayload(r.Context(), user, hostID, api.JobUnlock,
 		api.CommandRunPayload{})
 	if code != "" {
 		s.runOpError(w, r, status, code, msg)
 		return
 	}
 	s.runOpRedirect(w, r, res)
 }
 // runOpRedirect: HTMX → HX-Redirect to /jobs/{id}; JSON → 202 + JSON
 // body. Mirrors handleRunSourceGroup's tail.
 func (s *Server) runOpRedirect(w stdhttp.ResponseWriter, r *stdhttp.Request, res runNowResponse) {
 	if wantsHTML(r) {
 		w.Header().Set("HX-Redirect", "/jobs/"+res.JobID)
 		w.WriteHeader(stdhttp.StatusNoContent)
 		return
 	}
 	writeJSON(w, stdhttp.StatusAccepted, res)
 }
 // runOpError: HTMX → plain-text status; JSON → standard envelope.
 // Mirrors runGroupError.
 func (s *Server) runOpError(w stdhttp.ResponseWriter, r *stdhttp.Request, status int, code, msg string) {
 	if wantsHTML(r) {
 		stdhttp.Error(w, msg, status)
 		return
 	}
 	writeJSONError(w, status, code, msg)
 }
@@ -0,0 +1,362 @@
 // repo_ops_test.go — integration tests for the repo run-now endpoints:
 // prune, check, unlock.
 package http
 import (
 	"context"
 	"encoding/json"
 	stdhttp "net/http"
 	"strconv"
 	"testing"
 	"time"
 	"github.com/coder/websocket"
 	"github.com/oklog/ulid/v2"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // ----- helpers -------------------------------------------------------
 // seedInitJob marks a fake init job done for the host so the auto-init
 // path doesn't fire and pollute the envelope sequence we're measuring.
 func seedInitJob(t *testing.T, st *store.Store, hostID string) {
 	t.Helper()
 	if err := st.CreateJob(context.Background(), store.Job{
 		ID: ulid.Make().String(), HostID: hostID, Kind: "init",
 		ActorKind: "system", CreatedAt: time.Now().UTC(),
 	}); err != nil {
 		t.Fatalf("seed init job: %v", err)
 	}
 }
 // setAdminCreds writes admin credentials for a host via the store directly.
 func setAdminCreds(t *testing.T, srv *Server, st *store.Store, hostID string) {
 	t.Helper()
 	enc, err := srv.encryptRepoCreds(repoCredsBlob{
 		RepoURL:      "rest:http://admin.example/h",
 		RepoUsername: "admin",
 		RepoPassword: "prune-pass",
 	}, []byte("host:"+hostID+":admin"))
 	if err != nil {
 		t.Fatalf("encrypt admin creds: %v", err)
 	}
 	if err := st.SetHostCredentials(context.Background(), hostID, store.CredKindAdmin, enc); err != nil {
 		t.Fatalf("set admin creds: %v", err)
 	}
 }
 // setMaintenanceSubset sets check_subset_pct for the host via the store.
 func setMaintenanceSubset(t *testing.T, st *store.Store, hostID string, pct int) {
 	t.Helper()
 	// Ensure the row exists first.
 	if err := st.CreateDefaultRepoMaintenance(context.Background(), hostID); err != nil {
 		t.Fatalf("seed maintenance: %v", err)
 	}
 	m, err := st.GetRepoMaintenance(context.Background(), hostID)
 	if err != nil {
 		t.Fatalf("get maintenance: %v", err)
 	}
 	m.CheckSubsetPct = pct
 	if err := st.UpdateRepoMaintenance(context.Background(), m); err != nil {
 		t.Fatalf("update maintenance: %v", err)
 	}
 }
 // drainCommandRun reads envelopes until a command.run arrives, then
 // unmarshals and returns the payload.
 func drainCommandRun(t *testing.T, c *websocket.Conn) api.CommandRunPayload {
 	t.Helper()
 	env := drainUntil(t, c, api.MsgCommandRun)
 	var p api.CommandRunPayload
 	if err := env.UnmarshalPayload(&p); err != nil {
 		t.Fatalf("unmarshal command.run: %v", err)
 	}
 	return p
 }
 // ----- prune tests ---------------------------------------------------
 // TestRunPruneRefusesWithoutAdminCreds: POST prune with no admin creds
 // set → 400, code admin_creds_required, no job row created.
 func TestRunPruneRefusesWithoutAdminCreds(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "prune-no-admin")
 	cookie := loginAsAdmin(t, st)
 	seedInitJob(t, st, hostID)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "prune-no-admin")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/prune", nil, cookie)
 	if status != stdhttp.StatusBadRequest {
 		t.Fatalf("want 400, got %d body=%+v", status, body)
 	}
 	if code, _ := body["code"].(string); code != "admin_creds_required" {
 		t.Errorf("want code=admin_creds_required, got %+v", body)
 	}
 	// No prune job row should have been persisted.
 	var n int
 	if err := st.DB().QueryRow(
 		`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'prune'`, hostID).Scan(&n); err != nil {
 		t.Fatalf("count: %v", err)
 	}
 	if n != 0 {
 		t.Errorf("unexpected prune job rows: %d", n)
 	}
 }
 // TestRunPruneShipsConfigUpdateThenCommandRun: set admin creds, connect
 // host, POST prune. Assert envelope sequence: config.update(slot=admin)
 // → command.run(prune, RequiresAdminCreds=true). Assert job row persisted.
 func TestRunPruneShipsConfigUpdateThenCommandRun(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "prune-happy")
 	cookie := loginAsAdmin(t, st)
 	setAdminCreds(t, srv, st, hostID)
 	seedInitJob(t, st, hostID)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "prune-happy")
 	// Drain on-hello burst (repo config.update + schedule.set).
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/prune", nil, cookie)
 	if status != stdhttp.StatusAccepted {
 		t.Fatalf("want 202, got %d body=%+v", status, body)
 	}
 	jobID, _ := body["job_id"].(string)
 	if jobID == "" {
 		t.Fatalf("no job_id in response: %+v", body)
 	}
 	// Read the next two envelopes — must be config.update(slot=admin)
 	// followed by command.run(prune).
 	deadline := time.Now().Add(3 * time.Second)
 	var sawAdminPush bool
 	var prunePayload *api.CommandRunPayload
 	for (prunePayload == nil) && time.Now().Before(deadline) {
 		ctx, cancel := context.WithTimeout(context.Background(), 800*time.Millisecond)
 		mt, raw, err := c.Read(ctx)
 		cancel()
 		if err != nil {
 			break
 		}
 		if mt != websocket.MessageText {
 			continue
 		}
 		var env api.Envelope
 		if err := json.Unmarshal(raw, &env); err != nil {
 			continue
 		}
 		switch env.Type {
 		case api.MsgConfigUpdate:
 			var p api.ConfigUpdatePayload
 			if err := env.UnmarshalPayload(&p); err == nil && p.Slot == "admin" {
 				sawAdminPush = true
 			}
 		case api.MsgCommandRun:
 			var p api.CommandRunPayload
 			if err := env.UnmarshalPayload(&p); err == nil && p.Kind == api.JobPrune {
 				copy := p
 				prunePayload = &copy
 			}
 		}
 	}
 	if !sawAdminPush {
 		t.Error("expected config.update(slot=admin) before prune dispatch")
 	}
 	if prunePayload == nil {
 		t.Fatal("timed out waiting for command.run(prune)")
 	}
 	if !prunePayload.RequiresAdminCreds {
 		t.Error("prune command.run must have RequiresAdminCreds=true")
 	}
 	if prunePayload.JobID != jobID {
 		t.Errorf("job_id mismatch: dispatch=%s run=%s", jobID, prunePayload.JobID)
 	}
 	// Job row must be persisted.
 	var n int
 	if err := st.DB().QueryRow(
 		`SELECT COUNT(*) FROM jobs WHERE id = ? AND host_id = ? AND kind = 'prune'`,
 		jobID, hostID).Scan(&n); err != nil {
 		t.Fatalf("count: %v", err)
 	}
 	if n != 1 {
 		t.Errorf("prune job row count: want 1, got %d", n)
 	}
 }
 // ----- check tests ---------------------------------------------------
 // TestRunCheckUsesMaintenanceSubset: check_subset_pct=25 → Args==["25"].
 func TestRunCheckUsesMaintenanceSubset(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "check-subset")
 	cookie := loginAsAdmin(t, st)
 	setMaintenanceSubset(t, st, hostID, 25)
 	seedInitJob(t, st, hostID)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "check-subset")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/check", nil, cookie)
 	if status != stdhttp.StatusAccepted {
 		t.Fatalf("want 202, got %d body=%+v", status, body)
 	}
 	p := drainCommandRun(t, c)
 	if p.Kind != api.JobCheck {
 		t.Fatalf("kind: want check, got %s", p.Kind)
 	}
 	if len(p.Args) != 1 || p.Args[0] != "25" {
 		t.Errorf("args: want [25], got %v", p.Args)
 	}
 }
 // TestRunCheckHonorsSubsetOverride: ?subset=10 overrides DB value of 25.
 func TestRunCheckHonorsSubsetOverride(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "check-override")
 	cookie := loginAsAdmin(t, st)
 	setMaintenanceSubset(t, st, hostID, 25)
 	seedInitJob(t, st, hostID)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "check-override")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/check?subset=10", nil, cookie)
 	if status != stdhttp.StatusAccepted {
 		t.Fatalf("want 202, got %d body=%+v", status, body)
 	}
 	p := drainCommandRun(t, c)
 	if len(p.Args) != 1 || p.Args[0] != "10" {
 		t.Errorf("args: want [10], got %v", p.Args)
 	}
 }
 // TestRunCheckRejectsBadSubsetGracefully: ?subset=abc falls back to DB
 // value (not an error). strconv.Atoi failure silently ignored.
 func TestRunCheckRejectsBadSubsetGracefully(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "check-badsubset")
 	cookie := loginAsAdmin(t, st)
 	setMaintenanceSubset(t, st, hostID, 30)
 	seedInitJob(t, st, hostID)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "check-badsubset")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/check?subset=abc", nil, cookie)
 	if status != stdhttp.StatusAccepted {
 		t.Fatalf("want 202 (bad subset falls back), got %d body=%+v", status, body)
 	}
 	p := drainCommandRun(t, c)
 	if len(p.Args) != 1 || p.Args[0] != strconv.Itoa(30) {
 		t.Errorf("args: want [30], got %v", p.Args)
 	}
 }
 // ----- unlock tests --------------------------------------------------
 // TestRunUnlockNeedsNoAdminCreds: no admin creds, POST unlock → 202.
 func TestRunUnlockNeedsNoAdminCreds(t *testing.T) {
 	t.Parallel()
 	srv, ts, st := rawTestServer(t)
 	hostID, token := enrolHostForWS(t, srv, st, "unlock-no-admin")
 	cookie := loginAsAdmin(t, st)
 	seedInitJob(t, st, hostID)
 	c := agentDial(t, srv, ts, hostID, token)
 	sendHello(t, c, "unlock-no-admin")
 	_ = drainUntil(t, c, api.MsgScheduleSet)
 	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/unlock", nil, cookie)
 	if status != stdhttp.StatusAccepted {
 		t.Fatalf("want 202, got %d body=%+v", status, body)
 	}
 	p := drainCommandRun(t, c)
 	if p.Kind != api.JobUnlock {
 		t.Fatalf("kind: want unlock, got %s", p.Kind)
 	}
 	// RequiresAdminCreds must be false for unlock.
 	if p.RequiresAdminCreds {
 		t.Error("unlock must not set RequiresAdminCreds")
 	}
 }
 // ----- auth tests ----------------------------------------------------
 // TestRunOpsRequireAuth: unauthenticated POST to each endpoint → 401.
 func TestRunOpsRequireAuth(t *testing.T) {
 	t.Parallel()
 	_, url, st := newTestServerWithHub(t)
 	hostID := makeHost(t, st, "auth-host")
 	for _, path := range []string{
 		"/api/hosts/" + hostID + "/repo/prune",
 		"/api/hosts/" + hostID + "/repo/check",
 		"/api/hosts/" + hostID + "/repo/unlock",
 	} {
 		path := path
 		t.Run(path, func(t *testing.T) {
 			t.Parallel()
 			req, _ := stdhttp.NewRequest("POST", url+path, nil)
 			res, err := stdhttp.DefaultClient.Do(req)
 			if err != nil {
 				t.Fatalf("do: %v", err)
 			}
 			defer res.Body.Close()
 			if res.StatusCode != stdhttp.StatusUnauthorized {
 				t.Errorf("want 401, got %d", res.StatusCode)
 			}
 		})
 	}
 	// HTMX path: unauthenticated POST with HX-Request: true → 303 to /login.
 	// Auth check fires before host lookup so the host ID doesn't need to exist.
 	for _, path := range []string{
 		"/hosts/" + hostID + "/repo/prune",
 		"/hosts/" + hostID + "/repo/check",
 		"/hosts/" + hostID + "/repo/unlock",
 	} {
 		path := path
 		t.Run("htmx"+path, func(t *testing.T) {
 			t.Parallel()
 			client := &stdhttp.Client{
 				CheckRedirect: func(_ *stdhttp.Request, _ []*stdhttp.Request) error {
 					return stdhttp.ErrUseLastResponse
 				},
 			}
 			req, _ := stdhttp.NewRequest("POST", url+path, nil)
 			req.Header.Set("HX-Request", "true")
 			res, err := client.Do(req)
 			if err != nil {
 				t.Fatalf("do: %v", err)
 			}
 			defer res.Body.Close()
 			if res.StatusCode != stdhttp.StatusSeeOther {
 				t.Errorf("want 303, got %d", res.StatusCode)
 			}
 			if loc := res.Header.Get("Location"); loc != "/login" {
 				t.Errorf("want Location=/login, got %q", loc)
 			}
 		})
 	}
 }
@@ -164,15 +164,19 @@ func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *
 	}
 }
-// dispatchBackupForGroup builds and sends a single backup command.run
+// dispatchBackupForGroupCore persists a backup job row, marshals and
-// envelope on conn for the given group. Persists the job row first so
+// sends the command.run envelope, and audit-logs the dispatch. It does
-// the live log viewer can subscribe to it.
+// NOT enqueue a PendingRun on failure — that responsibility belongs to
-// dispatchBackupForGroup persists a backup job row, sends the
+// the caller when appropriate.
-// command.run envelope to the agent, and audit-logs the dispatch.
+//
-// Returns the persisted job ID on success, or "" on any failure
+// Returns (jobID, nil) on success. Returns ("", err) on any failure;
-// (failures are slog.Warn-ed). Callers may use the returned ID to,
+// the error is also slog.Warn-ed inside this function so callers don't
-// e.g., redirect the UI to the live job log.
+// need to log it again.
-func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, hostID, scheduleID string, g *store.SourceGroup, scheduledAt time.Time) string {
+//
 // Used by both dispatchBackupForGroup (schedule.fire path, which adds
 // enqueue-on-failure) and drainOne (which handles failure via
 // BumpPendingRunAttempt on the existing row, avoiding double-enqueue).
 func (s *Server) dispatchBackupForGroupCore(ctx context.Context, conn *ws.Conn, hostID, scheduleID string, g *store.SourceGroup, scheduledAt time.Time) (string, error) {
 	jobID := ulid.Make().String()
 	now := time.Now().UTC()
 	scheduleRef := scheduleID
@@ -186,7 +190,7 @@ func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, host
 	}); err != nil {
 		slog.Warn("schedule.fire: persist job", "host_id", hostID,
 			"schedule_id", scheduleID, "group", g.Name, "err", err)
-		return ""
+		return "", err
 	}
 	// Backup ignores RetentionPolicy — the forget cadence lives on
 	// host_repo_maintenance and is driven by the server-side ticker
@@ -201,14 +205,17 @@ func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, host
 	if err != nil {
 		slog.Warn("schedule.fire: marshal command.run",
 			"host_id", hostID, "schedule_id", scheduleID, "err", err)
-		return ""
+		return "", err
 	}
 	sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
 	defer cancel()
 	if err := conn.Send(sendCtx, env); err != nil {
-		slog.Warn("schedule.fire: send command.run",
+		slog.Warn("schedule.fire: send command.run failed",
-			"host_id", hostID, "schedule_id", scheduleID, "err", err)
+			"host_id", hostID, "schedule_id", scheduleID, "group", g.Name, "err", err)
-		return ""
+		// The job row was already persisted — leave it in `queued` status.
 		// The drainer will re-dispatch (creating a new job row) and the
 		// orphaned queued row stays for forensic visibility.
 		return "", err
 	}
 	_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
 		ID:         ulid.Make().String(),
@@ -221,5 +228,37 @@ func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, host
 	slog.Info("schedule.fire: dispatched backup",
 		"host_id", hostID, "schedule_id", scheduleID,
 		"group", g.Name, "job_id", jobID, "scheduled_at", scheduledAt)
-	return jobID
+	return jobID, nil
 }
 // dispatchBackupForGroup is the schedule.fire entry point. Wraps
 // dispatchBackupForGroupCore with enqueue-on-failure: a failed Send
 // queues a fresh PendingRun for the drainer to retry later.
 //
 // Returns the persisted job ID on success, or "" on any failure.
 func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, hostID, scheduleID string, g *store.SourceGroup, scheduledAt time.Time) string {
 	jobID, err := s.dispatchBackupForGroupCore(ctx, conn, hostID, scheduleID, g, scheduledAt)
 	if err == nil {
 		return jobID
 	}
 	// Send (or an earlier step) failed — err was already logged inside
 	// the core. Enqueue a fresh PendingRun for the drainer to retry.
 	backoff := time.Duration(g.RetryBackoffSeconds) * time.Second
 	if backoff <= 0 {
 		backoff = 60 * time.Second
 	}
 	if enqueueErr := s.deps.Store.EnqueuePendingRun(ctx, &store.PendingRun{
 		ID:            ulid.Make().String(),
 		ScheduleID:    scheduleID,
 		SourceGroupID: g.ID,
 		HostID:        hostID,
 		Attempt:       1,
 		NextAttemptAt: time.Now().UTC().Add(backoff),
 		ScheduledAt:   scheduledAt,
 		LastError:     err.Error(),
 	}); enqueueErr != nil {
 		slog.Warn("schedule.fire: enqueue pending run failed",
 			"host_id", hostID, "schedule_id", scheduleID, "group", g.Name, "err", enqueueErr)
 	}
 	return ""
 }
@@ -7,6 +7,7 @@ import (
 	"context"
 	"errors"
 	stdhttp "net/http"
 	"sync"
 	"time"
 	"github.com/go-chi/chi/v5"
@@ -41,6 +42,13 @@ type Deps struct {
 type Server struct {
 	srv  *stdhttp.Server
 	deps Deps
 	// drainLocks serializes DrainPending per host. The on-hello
 	// goroutine and the 30s ticker can otherwise race for the same
 	// host, double-dispatching every pending row. Map of hostID →
 	// sync.Mutex; checked-and-locked atomically via drainLocksMu.
 	drainLocksMu sync.Mutex
 	drainLocks   map[string]*sync.Mutex
 }
 // New builds a configured but not-yet-started server.
@@ -59,7 +67,7 @@ func New(deps Deps) *Server {
 		w.WriteHeader(stdhttp.StatusNoContent)
 	})
-	s := &Server{deps: deps}
+	s := &Server{deps: deps, drainLocks: make(map[string]*sync.Mutex)}
 	s.routes(r)
 	s.srv = &stdhttp.Server{
@@ -105,6 +113,13 @@ func (s *Server) routes(r chi.Router) {
 		r.Get("/hosts/{id}/repo-credentials", s.handleGetHostCredentials)
 		r.Put("/hosts/{id}/repo-credentials", s.handleSetHostCredentials)
 		// Admin credentials — the prune-capable slot (separate from the
 		// everyday repo creds). Optional: hosts that don't prune against
 		// a rest-server repo with a separate admin user never need this.
 		r.Get("/hosts/{id}/admin-credentials", s.handleGetAdminCredentials)
 		r.Put("/hosts/{id}/admin-credentials", s.handleSetAdminCredentials)
 		r.Delete("/hosts/{id}/admin-credentials", s.handleDeleteAdminCredentials)
 		// Per-host schedule CRUD. Mutations bump host_schedule_version
 		// and async-push to a connected agent (see schedule_push.go).
 		r.Get("/hosts/{id}/schedules", s.handleListSchedules)
@@ -134,12 +149,23 @@ func (s *Server) routes(r chi.Router) {
 		// mounted at the equivalent path outside /api below — both
 		// resolve to the same handler, which sniffs HX-Request.
 		r.Post("/hosts/{id}/source-groups/{gid}/run", s.handleRunSourceGroup)
 		// Repo-level run-now: prune (needs admin creds), check, unlock.
 		// HTMX forms are also mounted outside /api below.
 		r.Post("/hosts/{id}/repo/prune", s.handleRunRepoPrune)
 		r.Post("/hosts/{id}/repo/check", s.handleRunRepoCheck)
 		r.Post("/hosts/{id}/repo/unlock", s.handleRunRepoUnlock)
 	})
 	// Per-source-group Run-now (HTMX form action). Available even
 	// when the server is started without UI templates so REST callers
 	// against the non-/api path also work.
 	r.Post("/hosts/{id}/source-groups/{gid}/run", s.handleRunSourceGroup)
 	// Repo-level run-now (HTMX form actions). Same handlers as the /api
 	// variants — wantsHTML sniff distinguishes JSON vs HTMX response.
 	r.Post("/hosts/{id}/repo/prune", s.handleRunRepoPrune)
 	r.Post("/hosts/{id}/repo/check", s.handleRunRepoCheck)
 	r.Post("/hosts/{id}/repo/unlock", s.handleRunRepoUnlock)
 	// Retired routes — see ui_handlers.go for the messages. Mounted
 	// outside the UI gate so cached browser tabs get a clear 410
 	// even if the server runs without templates.
@@ -202,6 +228,9 @@ func (s *Server) routes(r chi.Router) {
 		r.Post("/hosts/{id}/repo/credentials", s.handleUIRepoCredentialsSave)
 		r.Post("/hosts/{id}/repo/bandwidth", s.handleUIRepoBandwidthSave)
 		r.Post("/hosts/{id}/repo/maintenance", s.handleUIRepoMaintenanceSave)
 		// Admin credentials form (separate slot for prune-capable user).
 		r.Post("/hosts/{id}/admin-credentials", s.handleUIAdminCredentialsSave)
 		r.Post("/hosts/{id}/admin-credentials/delete", s.handleUIAdminCredentialsDelete)
 		// Schedules tab + create/edit/delete forms.
 		r.Get("/hosts/{id}/schedules", s.handleUISchedulesList)
 		r.Get("/hosts/{id}/schedules/new", s.handleUIScheduleNewGet)
@@ -7,6 +7,9 @@ import (
 	stdhttp "net/http"
 	"strconv"
 	"strings"
 	"time"
 	"github.com/oklog/ulid/v2"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
@@ -17,10 +20,31 @@ import (
 // the page into three independent forms so saving one section
 // doesn't disturb the others.
 //
-//   GET  /hosts/{id}/repo                       — render
+//   GET  /hosts/{id}/repo                            — render
-//   POST /hosts/{id}/repo/credentials           — connection
+//   POST /hosts/{id}/repo/credentials                — connection
-//   POST /hosts/{id}/repo/bandwidth             — host-wide bw caps
+//   POST /hosts/{id}/repo/bandwidth                  — host-wide bw caps
-//   POST /hosts/{id}/repo/maintenance           — forget/prune/check cadences
+//   POST /hosts/{id}/repo/maintenance                — forget/prune/check cadences
 //   POST /hosts/{id}/admin-credentials               — admin (prune) creds
 //   POST /hosts/{id}/admin-credentials/delete        — clear admin creds
 // repoStatsView is a flat, pre-dereferenced projection of
 // store.HostRepoStats for use in templates. Nil pointer fields are
 // collapsed to zero/false and accompanied by a Has* sentinel so the
 // template can distinguish "zero" from "not yet known."
 type repoStatsView struct {
 	HasTotalSize    bool
 	TotalSizeBytes  int64
 	HasRawSize      bool
 	RawSizeBytes    int64
 	HasLastCheck    bool
 	LastCheckAt     time.Time
 	LastCheckAgo    string
 	LastCheckStatus string
 	LockPresent     bool
 	HasLastPrune    bool
 	LastPruneAt     time.Time
 	LastPruneAgo    string
 }
 type hostRepoPage struct {
 	hostChromeData
@@ -30,6 +54,11 @@ type hostRepoPage struct {
 	RepoUsername string
 	HasPassword  bool
 	// Admin credentials (optional, prune-only — separate slot).
 	AdminURL         string
 	AdminUsername    string
 	HasAdminPassword bool
 	// Bandwidth (form values, blank means "no cap")
 	BandwidthUp   string
 	BandwidthDown string
@@ -37,6 +66,14 @@ type hostRepoPage struct {
 	// Maintenance row
 	Maintenance store.HostRepoMaintenance
 	// Online mirrors Hub.Connected so Run-now button disabled state is
 	// accurate at render time.
 	Online bool
 	// StatsView is a pre-dereferenced projection of host_repo_stats.
 	// Nil when no row exists yet (fresh hosts).
 	StatsView *repoStatsView
 	// Snapshots-by-tag — map[group_name]count, plus an "untagged" row.
 	SnapshotsByTag    map[string]int
 	UntaggedSnapshots int
@@ -44,6 +81,7 @@ type hostRepoPage struct {
 	// Inline form-error banners. Empty when no error for that section.
 	CredentialsError string
 	AdminCredsError  string
 	BandwidthError   string
 	MaintenanceError string
@@ -61,7 +99,7 @@ func (s *Server) loadHostRepoPage(r *stdhttp.Request, host store.Host) (*hostRep
 	}
 	// Credentials (redacted).
-	enc, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID)
+	enc, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindRepo)
 	switch {
 	case err == nil:
 		plain, derr := s.deps.AEAD.Decrypt(enc, []byte("host:"+host.ID))
@@ -79,6 +117,60 @@ func (s *Server) loadHostRepoPage(r *stdhttp.Request, host store.Host) (*hostRep
 		return nil, err
 	}
 	// Admin credentials (optional — prune-only slot).
 	adminEnc, aerr := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindAdmin)
 	switch {
 	case aerr == nil:
 		plain, derr := s.deps.AEAD.Decrypt(adminEnc, []byte("host:"+host.ID+":admin"))
 		if derr == nil {
 			var blob repoCredsBlob
 			if jerr := json.Unmarshal(plain, &blob); jerr == nil {
 				p.AdminURL = blob.RepoURL
 				p.AdminUsername = blob.RepoUsername
 				p.HasAdminPassword = blob.RepoPassword != ""
 			}
 		}
 	case errors.Is(aerr, store.ErrNotFound):
 		// admin slot not configured — fine
 	default:
 		return nil, aerr
 	}
 	// Online status.
 	if s.deps.Hub != nil {
 		p.Online = s.deps.Hub.Connected(host.ID)
 	}
 	// Repo stats (tolerate ErrNotFound — fresh hosts have no row yet).
 	if stats, serr := s.deps.Store.GetHostRepoStats(r.Context(), host.ID); serr == nil {
 		sv := &repoStatsView{}
 		if stats.TotalSizeBytes != nil {
 			sv.HasTotalSize = true
 			sv.TotalSizeBytes = *stats.TotalSizeBytes
 		}
 		if stats.RawSizeBytes != nil {
 			sv.HasRawSize = true
 			sv.RawSizeBytes = *stats.RawSizeBytes
 		}
 		if stats.LastCheckAt != nil {
 			sv.HasLastCheck = true
 			sv.LastCheckAt = *stats.LastCheckAt
 			sv.LastCheckAgo = relTimeAgo(*stats.LastCheckAt)
 		}
 		sv.LastCheckStatus = stats.LastCheckStatus
 		if stats.LockPresent != nil {
 			sv.LockPresent = *stats.LockPresent
 		}
 		if stats.LastPruneAt != nil {
 			sv.HasLastPrune = true
 			sv.LastPruneAt = *stats.LastPruneAt
 			sv.LastPruneAgo = relTimeAgo(*stats.LastPruneAt)
 		}
 		p.StatsView = sv
 	} else if !errors.Is(serr, store.ErrNotFound) {
 		return nil, serr
 	}
 	// Bandwidth.
 	if host.BandwidthUpKBps != nil {
 		p.BandwidthUp = strconv.Itoa(*host.BandwidthUpKBps)
@@ -152,11 +244,11 @@ func (s *Server) handleUIHostRepo(w stdhttp.ResponseWriter, r *stdhttp.Request)
 	}
 }
-// renderRepoFormError loads the page state, overlays the section's
+// renderRepoPage loads the page state, overlays section error banners,
-// error banner, and renders with a 422. Save-success goes through a
+// and renders with a 422. Save-success goes through a 303 redirect
-// 303 redirect with `?saved=<section>` instead, so this path is for
+// with `?saved=<section>` instead, so this path is for validation
-// validation failures only.
+// failures only.
-func (s *Server) renderRepoPage(w stdhttp.ResponseWriter, r *stdhttp.Request, u *ui.User, host *store.Host, credErr, bwErr, mntErr string) {
+func (s *Server) renderRepoPage(w stdhttp.ResponseWriter, r *stdhttp.Request, u *ui.User, host *store.Host, credErr, adminErr, bwErr, mntErr string) {
 	page, err := s.loadHostRepoPage(r, *host)
 	if err != nil {
 		slog.Error("ui repo: reload after save", "host_id", host.ID, "err", err)
@@ -164,6 +256,7 @@ func (s *Server) renderRepoPage(w stdhttp.ResponseWriter, r *stdhttp.Request, u
 		return
 	}
 	page.CredentialsError = credErr
 	page.AdminCredsError = adminErr
 	page.BandwidthError = bwErr
 	page.MaintenanceError = mntErr
 	view := s.baseView(u)
@@ -198,13 +291,13 @@ func (s *Server) handleUIRepoCredentialsSave(w stdhttp.ResponseWriter, r *stdhtt
 	repoPass := r.PostForm.Get("repo_password") // do NOT trim — operators may use trailing space deliberately
 	if repoURL == "" {
-		s.renderRepoPage(w, r, u, host, "Repo URL is required.", "", "")
+		s.renderRepoPage(w, r, u, host, "Repo URL is required.", "", "", "")
 		return
 	}
 	// Merge with existing blob — same semantics as the JSON PUT.
 	existing := repoCredsBlob{}
-	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID); err == nil {
+	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindRepo); err == nil {
 		if plain, derr := s.deps.AEAD.Decrypt(cur, []byte("host:"+host.ID)); derr == nil {
 			_ = json.Unmarshal(plain, &existing)
 		}
@@ -217,7 +310,7 @@ func (s *Server) handleUIRepoCredentialsSave(w stdhttp.ResponseWriter, r *stdhtt
 	if existing.RepoPassword == "" {
 		s.renderRepoPage(w, r, u, host,
 			"No password on file yet — set one before saving the URL/username.",
-			"", "")
+			"", "", "")
 		return
 	}
@@ -227,7 +320,7 @@ func (s *Server) handleUIRepoCredentialsSave(w stdhttp.ResponseWriter, r *stdhtt
 		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
 		return
 	}
-	if err := s.deps.Store.SetHostCredentials(r.Context(), host.ID, enc); err != nil {
+	if err := s.deps.Store.SetHostCredentials(r.Context(), host.ID, store.CredKindRepo, enc); err != nil {
 		slog.Error("ui repo creds: persist", "err", err)
 		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
 		return
@@ -256,7 +349,7 @@ func (s *Server) handleUIRepoBandwidthSave(w stdhttp.ResponseWriter, r *stdhttp.
 	up, upErr := parseOptionalNonNegInt(r.PostForm.Get("bandwidth_up"))
 	down, downErr := parseOptionalNonNegInt(r.PostForm.Get("bandwidth_down"))
 	if upErr != nil || downErr != nil {
-		s.renderRepoPage(w, r, u, host, "",
+		s.renderRepoPage(w, r, u, host, "", "",
 			"Bandwidth caps must be non-negative whole numbers (or blank for no cap).",
 			"")
 		return
@@ -294,19 +387,19 @@ func (s *Server) handleUIRepoMaintenanceSave(w stdhttp.ResponseWriter, r *stdhtt
 		"forget": forgetCron, "prune": pruneCron, "check": checkCron,
 	} {
 		if expr == "" {
-			s.renderRepoPage(w, r, u, host, "", "",
+			s.renderRepoPage(w, r, u, host, "", "", "",
 				label+" cadence is required.")
 			return
 		}
 		if _, err := cronParser.Parse(expr); err != nil {
-			s.renderRepoPage(w, r, u, host, "", "",
+			s.renderRepoPage(w, r, u, host, "", "", "",
 				label+" cadence didn't parse: "+err.Error())
 			return
 		}
 	}
 	subset, err := strconv.Atoi(subsetStr)
 	if err != nil || subset < 0 || subset > 100 {
-		s.renderRepoPage(w, r, u, host, "", "",
+		s.renderRepoPage(w, r, u, host, "", "", "",
 			"check subset % must be between 0 and 100.")
 		return
 	}
@@ -348,3 +441,143 @@ func parseOptionalNonNegInt(s string) (*int, error) {
 	}
 	return &n, nil
 }
 // relTimeAgo returns a short human-readable relative-time string like
 // "5m ago", "3h ago", "2d ago" for use in stats panels. Does not use
 // the template funcMap so it can be called from Go directly.
 func relTimeAgo(t time.Time) string {
 	d := time.Since(t)
 	if d < 0 {
 		d = 0
 	}
 	switch {
 	case d < time.Minute:
 		return "just now"
 	case d < time.Hour:
 		return strconv.Itoa(int(d.Minutes())) + "m ago"
 	case d < 24*time.Hour:
 		return strconv.Itoa(int(d.Hours())) + "h ago"
 	case d < 30*24*time.Hour:
 		return strconv.Itoa(int(d.Hours()/24)) + "d ago"
 	default:
 		return t.Format("2006-01-02")
 	}
 }
 // handleUIAdminCredentialsSave handles the HTML form POST to
 // /hosts/{id}/admin-credentials. Mirrors handleUIRepoCredentialsSave
 // but operates on the admin slot (store.CredKindAdmin, AAD "host:<id>:admin").
 // Re-renders the page with an inline error on validation failure;
 // redirects with ?saved=admin_credentials on success.
 func (s *Server) handleUIAdminCredentialsSave(w stdhttp.ResponseWriter, r *stdhttp.Request) {
 	u := s.requireUIUser(w, r)
 	if u == nil {
 		return
 	}
 	host, ok := s.loadHostForUI(w, r)
 	if !ok {
 		return
 	}
 	if err := r.ParseForm(); err != nil {
 		stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest)
 		return
 	}
 	repoURL := strings.TrimSpace(r.PostForm.Get("repo_url"))
 	repoUser := strings.TrimSpace(r.PostForm.Get("repo_username"))
 	repoPass := r.PostForm.Get("repo_password")
 	// All blank → no-op save (operator hit Save without filling anything).
 	// We treat this as harmless — they may have wanted to clear via the
 	// Clear button instead. Only validate if they've started filling fields.
 	if repoURL == "" && repoUser == "" && repoPass == "" {
 		stdhttp.Redirect(w, r, "/hosts/"+host.ID+"/repo", stdhttp.StatusSeeOther)
 		return
 	}
 	aad := []byte("host:" + host.ID + ":admin")
 	// Merge with the existing admin row, if any.
 	existing := repoCredsBlob{}
 	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindAdmin); err == nil {
 		if plain, derr := s.deps.AEAD.Decrypt(cur, aad); derr == nil {
 			_ = json.Unmarshal(plain, &existing)
 		}
 	}
 	existing.RepoURL = repoURL
 	existing.RepoUsername = repoUser
 	if repoPass != "" {
 		existing.RepoPassword = repoPass
 	}
 	if existing.RepoURL == "" {
 		s.renderRepoPage(w, r, u, host, "", "Repo URL is required.", "", "")
 		return
 	}
 	if existing.RepoPassword == "" {
 		s.renderRepoPage(w, r, u, host, "",
 			"No password on file yet — set one before saving the URL/username.",
 			"", "")
 		return
 	}
 	enc, err := s.encryptRepoCreds(existing, aad)
 	if err != nil {
 		slog.Error("ui admin creds: encrypt", "err", err)
 		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
 		return
 	}
 	if err := s.deps.Store.SetHostCredentials(r.Context(), host.ID, store.CredKindAdmin, enc); err != nil {
 		slog.Error("ui admin creds: persist", "err", err)
 		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
 		return
 	}
 	_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
 		ID:         ulid.Make().String(),
 		UserID:     &u.ID,
 		Actor:      "user",
 		Action:     "host.admin_credentials_set",
 		TargetKind: ptr("host"),
 		TargetID:   &host.ID,
 		TS:         nowUTC(),
 	})
 	if s.deps.Hub != nil && s.deps.Hub.Connected(host.ID) {
 		if perr := s.pushAdminCredsToAgent(r.Context(), host.ID); perr != nil {
 			slog.Warn("ui admin creds: push to agent", "host_id", host.ID, "err", perr)
 		}
 	}
 	stdhttp.Redirect(w, r, "/hosts/"+host.ID+"/repo?saved=admin_credentials", stdhttp.StatusSeeOther)
 }
 // handleUIAdminCredentialsDelete handles the HTML form POST to
 // /hosts/{id}/admin-credentials/delete. Removes the admin slot and
 // redirects back to the repo page. Treats "not found" as success
 // (idempotent delete from the operator's point of view).
 func (s *Server) handleUIAdminCredentialsDelete(w stdhttp.ResponseWriter, r *stdhttp.Request) {
 	u := s.requireUIUser(w, r)
 	if u == nil {
 		return
 	}
 	host, ok := s.loadHostForUI(w, r)
 	if !ok {
 		return
 	}
 	err := s.deps.Store.DeleteHostCredentials(r.Context(), host.ID, store.CredKindAdmin)
 	if err != nil && !errors.Is(err, store.ErrNotFound) {
 		slog.Error("ui admin creds: delete", "host_id", host.ID, "err", err)
 		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
 		return
 	}
 	if err == nil {
 		_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
 			ID:         ulid.Make().String(),
 			UserID:     &u.ID,
 			Actor:      "user",
 			Action:     "host.admin_credentials_deleted",
 			TargetKind: ptr("host"),
 			TargetID:   &host.ID,
 			TS:         nowUTC(),
 		})
 	}
 	stdhttp.Redirect(w, r, "/hosts/"+host.ID+"/repo?saved=admin_credentials", stdhttp.StatusSeeOther)
 }
@@ -0,0 +1,400 @@
 // ui_repo_test.go — integration tests for the Repo page HTML UI.
 // Covers: admin-creds form rendering, stats panel, lock banner,
 // run-now button disabled states, admin-creds form save/delete.
 package http
 import (
 	"context"
 	"io"
 	stdhttp "net/http"
 	"net/http/httptest"
 	"net/url"
 	"path/filepath"
 	"strings"
 	"testing"
 	"time"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // newTestServerWithUI creates a server that includes the UI renderer so
 // HTML page tests can render and inspect the full template output.
 func newTestServerWithUI(t *testing.T) (*Server, string, *store.Store) {
 	t.Helper()
 	dir := t.TempDir()
 	st, err := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
 	if err != nil {
 		t.Fatalf("store: %v", err)
 	}
 	t.Cleanup(func() { _ = st.Close() })
 	keyPath := filepath.Join(dir, "secret.key")
 	_ = crypto.GenerateKeyFile(keyPath)
 	key, _ := crypto.LoadKeyFromFile(keyPath)
 	aead, _ := crypto.NewAEAD(key)
 	renderer, err := ui.New()
 	if err != nil {
 		t.Fatalf("ui.New: %v", err)
 	}
 	deps := Deps{
 		Cfg:   config.Config{Listen: ":0", DataDir: dir, SecretKeyFile: keyPath},
 		Store: st,
 		AEAD:  aead,
 		Hub:   ws.NewHub(),
 		UI:    renderer,
 	}
 	s := New(deps)
 	ts := httptest.NewServer(s.srv.Handler)
 	t.Cleanup(ts.Close)
 	return s, ts.URL, st
 }
 // getRepoPage fetches /hosts/{id}/repo and returns the body string.
 func getRepoPage(t *testing.T, baseURL, hostID string, cookie *stdhttp.Cookie) string {
 	t.Helper()
 	client := &stdhttp.Client{
 		CheckRedirect: func(_ *stdhttp.Request, _ []*stdhttp.Request) error {
 			return stdhttp.ErrUseLastResponse
 		},
 	}
 	req, err := stdhttp.NewRequest("GET", baseURL+"/hosts/"+hostID+"/repo", nil)
 	if err != nil {
 		t.Fatalf("new request: %v", err)
 	}
 	req.AddCookie(cookie)
 	res, err := client.Do(req)
 	if err != nil {
 		t.Fatalf("GET /hosts/%s/repo: %v", hostID, err)
 	}
 	defer res.Body.Close()
 	if res.StatusCode != stdhttp.StatusOK {
 		t.Fatalf("GET /hosts/%s/repo: want 200, got %d", hostID, res.StatusCode)
 	}
 	raw, _ := io.ReadAll(res.Body)
 	return string(raw)
 }
 // postForm posts URL-encoded form data to path, following no redirects,
 // and returns the status code and Location header.
 func postForm(t *testing.T, baseURL, path string, data url.Values, cookie *stdhttp.Cookie) (int, string) {
 	t.Helper()
 	client := &stdhttp.Client{
 		CheckRedirect: func(_ *stdhttp.Request, _ []*stdhttp.Request) error {
 			return stdhttp.ErrUseLastResponse
 		},
 	}
 	req, err := stdhttp.NewRequest("POST", baseURL+path, strings.NewReader(data.Encode()))
 	if err != nil {
 		t.Fatalf("new request: %v", err)
 	}
 	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
 	if cookie != nil {
 		req.AddCookie(cookie)
 	}
 	res, err := client.Do(req)
 	if err != nil {
 		t.Fatalf("POST %s: %v", path, err)
 	}
 	defer res.Body.Close()
 	return res.StatusCode, res.Header.Get("Location")
 }
 // ----- rendering tests ------------------------------------------------
 // TestUIRepoPageRendersAdminCredsForm — visit /hosts/{id}/repo for a
 // host with no admin creds. Assert the page contains the admin-creds
 // section heading and the "not yet set" placeholder text.
 func TestUIRepoPageRendersAdminCredsForm(t *testing.T) {
 	t.Parallel()
 	_, baseURL, st := newTestServerWithUI(t)
 	cookie := loginAsAdmin(t, st)
 	hostID := makeHost(t, st, "repo-page-admin-form")
 	body := getRepoPage(t, baseURL, hostID, cookie)
 	if !strings.Contains(body, "Admin credentials") {
 		t.Error("page missing 'Admin credentials' heading")
 	}
 	if !strings.Contains(body, "— not yet set —") {
 		t.Error("page missing '— not yet set —' placeholder for admin password")
 	}
 }
 // TestUIRepoPageRendersStatsPanel — seed a host_repo_stats row, render
 // the page, assert "Repo health" panel and the seeded values appear.
 func TestUIRepoPageRendersStatsPanel(t *testing.T) {
 	t.Parallel()
 	_, baseURL, st := newTestServerWithUI(t)
 	cookie := loginAsAdmin(t, st)
 	hostID := makeHost(t, st, "repo-page-stats")
 	totalSize := int64(5_000_000_000) // 5 GB
 	checkStatus := "ok"
 	checkAt := time.Now().Add(-2 * time.Hour).UTC()
 	if err := st.UpsertHostRepoStats(context.Background(), hostID, store.HostRepoStats{
 		TotalSizeBytes:  &totalSize,
 		LastCheckAt:     &checkAt,
 		LastCheckStatus: checkStatus,
 	}); err != nil {
 		t.Fatalf("upsert stats: %v", err)
 	}
 	body := getRepoPage(t, baseURL, hostID, cookie)
 	if !strings.Contains(body, "Repo health") {
 		t.Error("page missing 'Repo health' heading")
 	}
 	// The bytes helper renders 5 GB as "5.0 GB" (with a <span> unit suffix)
 	if !strings.Contains(body, "5.0") {
 		t.Error("page missing '5.0' (total size formatted bytes)")
 	}
 	if !strings.Contains(body, "ok") {
 		t.Error("page missing 'ok' check status")
 	}
 }
 // TestUIRepoPageRendersLockBanner — seed stats with LockPresent=true,
 // render, assert stale lock warning appears.
 func TestUIRepoPageRendersLockBanner(t *testing.T) {
 	t.Parallel()
 	_, baseURL, st := newTestServerWithUI(t)
 	cookie := loginAsAdmin(t, st)
 	hostID := makeHost(t, st, "repo-page-lock")
 	lockPresent := true
 	if err := st.UpsertHostRepoStats(context.Background(), hostID, store.HostRepoStats{
 		LockPresent: &lockPresent,
 	}); err != nil {
 		t.Fatalf("upsert stats: %v", err)
 	}
 	body := getRepoPage(t, baseURL, hostID, cookie)
 	if !strings.Contains(body, "Stale lock detected") {
 		t.Error("page missing stale lock warning")
 	}
 }
 // TestUIRepoRunNowButtonsDisabledWhenOffline — host not in the Hub
 // (not connected), render, assert all three buttons carry disabled.
 func TestUIRepoRunNowButtonsDisabledWhenOffline(t *testing.T) {
 	t.Parallel()
 	_, baseURL, st := newTestServerWithUI(t)
 	cookie := loginAsAdmin(t, st)
 	hostID := makeHost(t, st, "repo-page-offline")
 	// No WS connection → Hub.Connected returns false.
 	body := getRepoPage(t, baseURL, hostID, cookie)
 	// All three Run-now buttons should have disabled.
 	// Each button appears once in the template with class "btn btn-secondary"
 	// and hx-post attributes. The disabled attribute is added conditionally.
 	// Count occurrences of 'disabled' in the Run-now section.
 	runNowIdx := strings.Index(body, "Run now · one-time")
 	dangerIdx := strings.Index(body, "Danger zone")
 	if runNowIdx < 0 {
 		t.Fatal("page missing 'Run now · one-time' section")
 	}
 	if dangerIdx < 0 {
 		t.Fatal("page missing 'Danger zone' section")
 	}
 	runNowSection := body[runNowIdx:dangerIdx]
 	disabledCount := strings.Count(runNowSection, "disabled")
 	if disabledCount < 3 {
 		t.Errorf("expected at least 3 disabled attributes in Run-now section (one per button), got %d", disabledCount)
 	}
 }
 // TestUIRepoPruneButtonDisabledWithoutAdminCreds — host is online but
 // no admin creds set. Assert prune button has disabled and mentions
 // "set admin credentials first".
 func TestUIRepoPruneButtonDisabledWithoutAdminCreds(t *testing.T) {
 	t.Parallel()
 	srv, baseURL, st := newTestServerWithUI(t)
 	cookie := loginAsAdmin(t, st)
 	hostID := makeHost(t, st, "repo-page-prune-no-admin")
 	// Register the host as "connected" in the Hub so the online check passes.
 	// We use a fake conn by injecting directly — for a simpler approach,
 	// rely on the fact that the Hub.Connected call just needs the ID registered.
 	// We can't easily fake a WS conn in a unit test, so instead we verify
 	// that even without the hub connected the prune button still has
 	// "set admin credentials first" text since that check runs first.
 	_ = srv // suppress unused warning
 	body := getRepoPage(t, baseURL, hostID, cookie)
 	if !strings.Contains(body, "set admin credentials first") {
 		t.Error("page missing 'set admin credentials first' on prune button")
 	}
 }
 // ----- admin-creds form save/delete tests ----------------------------
 // TestUIAdminCredentialsSaveRoundTrip — POST form-encoded body to
 // /hosts/{id}/admin-credentials, follow redirect, assert page now shows
 // "stored, leave blank to keep" placeholder. Audit row landed.
 func TestUIAdminCredentialsSaveRoundTrip(t *testing.T) {
 	t.Parallel()
 	_, baseURL, st := newTestServerWithUI(t)
 	cookie, userID := loginAsAdminWithID(t, st)
 	hostID := makeHost(t, st, "admin-save-roundtrip")
 	// POST admin credentials.
 	status, loc := postForm(t, baseURL, "/hosts/"+hostID+"/admin-credentials", url.Values{
 		"repo_url":      {"rest:http://admin.example/h"},
 		"repo_username": {"admin-user"},
 		"repo_password": {"s3cr3t-admin"},
 	}, cookie)
 	if status != stdhttp.StatusSeeOther {
 		t.Fatalf("save: want 303, got %d", status)
 	}
 	if !strings.Contains(loc, "saved=admin_credentials") {
 		t.Errorf("redirect location should contain saved=admin_credentials, got %q", loc)
 	}
 	// Follow redirect.
 	body := getRepoPage(t, baseURL, hostID, cookie)
 	if !strings.Contains(body, "stored, leave blank to keep") {
 		t.Error("after save: page missing 'stored, leave blank to keep' placeholder for admin password")
 	}
 	// Audit row should exist.
 	ctx := context.Background()
 	rows, err := st.DB().QueryContext(ctx,
 		`SELECT action, user_id FROM audit_log WHERE target_id = ? AND action = 'host.admin_credentials_set'`,
 		hostID)
 	if err != nil {
 		t.Fatalf("query audit: %v", err)
 	}
 	defer rows.Close()
 	found := false
 	for rows.Next() {
 		var action string
 		var gotUID *string
 		if err := rows.Scan(&action, &gotUID); err != nil {
 			t.Fatalf("scan: %v", err)
 		}
 		found = true
 		if gotUID == nil || *gotUID != userID {
 			t.Errorf("audit row user_id: want %q, got %v", userID, gotUID)
 		}
 	}
 	if err := rows.Err(); err != nil {
 		t.Fatalf("rows.Err: %v", err)
 	}
 	if !found {
 		t.Error("audit row with action='host.admin_credentials_set' not found")
 	}
 }
 // TestUIAdminCredentialsDelete — POST to the delete route, assert
 // admin row gone and audit row landed.
 func TestUIAdminCredentialsDelete(t *testing.T) {
 	t.Parallel()
 	srv, baseURL, st := newTestServerWithUI(t)
 	cookie, userID := loginAsAdminWithID(t, st)
 	hostID := makeHost(t, st, "admin-delete")
 	ctx := context.Background()
 	// Seed admin creds directly.
 	enc, err := srv.encryptRepoCreds(repoCredsBlob{
 		RepoURL:      "rest:http://admin.example/h",
 		RepoPassword: "pw",
 	}, []byte("host:"+hostID+":admin"))
 	if err != nil {
 		t.Fatalf("encrypt: %v", err)
 	}
 	if err := st.SetHostCredentials(ctx, hostID, store.CredKindAdmin, enc); err != nil {
 		t.Fatalf("set admin creds: %v", err)
 	}
 	// POST to delete route.
 	status, loc := postForm(t, baseURL, "/hosts/"+hostID+"/admin-credentials/delete", url.Values{}, cookie)
 	if status != stdhttp.StatusSeeOther {
 		t.Fatalf("delete: want 303, got %d", status)
 	}
 	if !strings.Contains(loc, "saved=admin_credentials") {
 		t.Errorf("redirect location: want saved=admin_credentials, got %q", loc)
 	}
 	// Admin row should be gone.
 	if _, err := st.GetHostCredentials(ctx, hostID, store.CredKindAdmin); err == nil {
 		t.Error("admin creds row still present after delete")
 	}
 	// Audit row.
 	rows, err := st.DB().QueryContext(ctx,
 		`SELECT action, user_id FROM audit_log WHERE target_id = ? AND action = 'host.admin_credentials_deleted'`,
 		hostID)
 	if err != nil {
 		t.Fatalf("query audit: %v", err)
 	}
 	defer rows.Close()
 	found := false
 	for rows.Next() {
 		var action string
 		var gotUID *string
 		if err := rows.Scan(&action, &gotUID); err != nil {
 			t.Fatalf("scan: %v", err)
 		}
 		found = true
 		if gotUID == nil || *gotUID != userID {
 			t.Errorf("audit row user_id: want %q, got %v", userID, gotUID)
 		}
 	}
 	if err := rows.Err(); err != nil {
 		t.Fatalf("rows.Err: %v", err)
 	}
 	if !found {
 		t.Error("audit row with action='host.admin_credentials_deleted' not found")
 	}
 }
 // TestUIAdminCredentialsDeleteIdempotent — POST to the delete route
 // when no admin creds exist → 303 redirect (no 404 / 500).
 func TestUIAdminCredentialsDeleteIdempotent(t *testing.T) {
 	t.Parallel()
 	_, baseURL, st := newTestServerWithUI(t)
 	cookie := loginAsAdmin(t, st)
 	hostID := makeHost(t, st, "admin-delete-noop")
 	status, _ := postForm(t, baseURL, "/hosts/"+hostID+"/admin-credentials/delete", url.Values{}, cookie)
 	if status != stdhttp.StatusSeeOther {
 		t.Fatalf("delete (noop): want 303, got %d", status)
 	}
 }
 // TestUIAdminCredentialsSaveAllBlankIsNoop — POST empty form → 303
 // redirect, no row created.
 func TestUIAdminCredentialsSaveAllBlankIsNoop(t *testing.T) {
 	t.Parallel()
 	_, baseURL, st := newTestServerWithUI(t)
 	cookie := loginAsAdmin(t, st)
 	hostID := makeHost(t, st, "admin-save-blank")
 	status, loc := postForm(t, baseURL, "/hosts/"+hostID+"/admin-credentials", url.Values{
 		"repo_url":      {""},
 		"repo_username": {""},
 		"repo_password": {""},
 	}, cookie)
 	if status != stdhttp.StatusSeeOther {
 		t.Fatalf("blank save: want 303, got %d", status)
 	}
 	// All-blank is a no-op: redirect must not carry ?saved= banner.
 	if strings.Contains(loc, "?saved=") {
 		t.Errorf("blank save: redirect Location %q must not contain ?saved=", loc)
 	}
 	// No admin row should have been created.
 	if _, err := st.GetHostCredentials(context.Background(), hostID, store.CredKindAdmin); err == nil {
 		t.Error("admin creds row created unexpectedly for blank save")
 	}
 }
@@ -0,0 +1,116 @@
 // Package maintenance owns the server-side scheduler that fires
 // forget/prune/check on the cadences operators set on
 // host_repo_maintenance rows. Independent of the agent's local cron
 // (which now only handles backup schedules).
 //
 // The ticker is intentionally side-effect-free at the package
 // boundary: it asks an injected Backend for current state and emits
 // a list of Decisions for the caller to act on. Easy to unit-test
 // without a running server.
 package maintenance
 import (
 	"context"
 	"errors"
 	"time"
 	"github.com/robfig/cron/v3"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // Decision is one cadence-driven dispatch the ticker recommends.
 // SubsetPct is populated only when Kind == "check"; ignored for
 // "forget" and "prune".
 type Decision struct {
 	HostID    string
 	Kind      string // "forget" | "prune" | "check"
 	SubsetPct int
 }
 // Backend is the subset of *store.Store the ticker depends on.
 // Constrained interface so tests can pass a fake.
 type Backend interface {
 	ListAllMaintenance(ctx context.Context) ([]store.HostRepoMaintenance, error)
 	LatestJobByKind(ctx context.Context, hostID, kind string) (*store.Job, error)
 }
 // Ticker decides which cadence-driven jobs are due to fire at a
 // given instant. Stateless — the only state lives in the Backend.
 type Ticker struct {
 	backend Backend
 	parser  cron.Parser
 }
 // New builds a Ticker bound to the given Backend.
 func New(b Backend) *Ticker {
 	return &Ticker{
 		backend: b,
 		parser:  cron.NewParser(cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow),
 	}
 }
 // Decide returns the set of jobs the ticker would dispatch at `now`.
 // The caller is responsible for: checking host online state,
 // persisting the job row, and shipping command.run. Returns nil
 // (not an error) when the maintenance table is empty — a fresh
 // install is the most common case.
 func (t *Ticker) Decide(ctx context.Context, now time.Time) ([]Decision, error) {
 	rows, err := t.backend.ListAllMaintenance(ctx)
 	if err != nil {
 		return nil, err
 	}
 	var out []Decision
 	for _, m := range rows {
 		if d, ok := t.dueFor(ctx, now, m.HostID, "forget", m.ForgetCron, m.ForgetEnabled, 0); ok {
 			out = append(out, d)
 		}
 		if d, ok := t.dueFor(ctx, now, m.HostID, "prune", m.PruneCron, m.PruneEnabled, 0); ok {
 			out = append(out, d)
 		}
 		if d, ok := t.dueFor(ctx, now, m.HostID, "check", m.CheckCron, m.CheckEnabled, m.CheckSubsetPct); ok {
 			out = append(out, d)
 		}
 	}
 	return out, nil
 }
 // dueFor returns true if the cron has a fire-instant strictly after
 // the latest persisted job's created_at and at-or-before now.
 //
 // Anchor selection:
 //   - When LatestJobByKind returns a job: anchor = j.CreatedAt.
 //   - When LatestJobByKind returns ErrNotFound: anchor = now - 24h
 //     (first-run case — cap the lookback so a brand-new host doesn't
 //     fire 30 days of missed monthly-checks on first tick).
 //   - When LatestJobByKind returns a hard error: skip this kind for
 //     this host on this tick.
 //
 // Disabled (`enabled == false`) or empty cron skips silently.
 // Cron parse failures skip silently — the schedule/maintenance
 // routes already validate cron at write time, so this is defensive.
 func (t *Ticker) dueFor(ctx context.Context, now time.Time, hostID, kind, expr string, enabled bool, subset int) (Decision, bool) {
 	if !enabled || expr == "" {
 		return Decision{}, false
 	}
 	sched, err := t.parser.Parse(expr)
 	if err != nil {
 		return Decision{}, false
 	}
 	j, err := t.backend.LatestJobByKind(ctx, hostID, kind)
 	var anchor time.Time
 	switch {
 	case err == nil && j != nil:
 		anchor = j.CreatedAt
 	case errors.Is(err, store.ErrNotFound):
 		anchor = now.Add(-24 * time.Hour)
 	default:
 		// Hard error — skip this kind on this tick.
 		return Decision{}, false
 	}
 	next := sched.Next(anchor)
 	if next.IsZero() || next.After(now) {
 		return Decision{}, false
 	}
 	return Decision{HostID: hostID, Kind: kind, SubsetPct: subset}, true
 }
@@ -0,0 +1,315 @@
 package maintenance
 import (
 	"context"
 	"errors"
 	"testing"
 	"time"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // fakeBackend implements Backend with table-driven canned responses.
 type fakeBackend struct {
 	rows []store.HostRepoMaintenance
 	// jobs[hostID][kind] -> job (if present, returned). If absent,
 	// fakeBackend returns ErrNotFound by default.
 	jobs map[string]map[string]*store.Job
 	// hardErr forces a non-ErrNotFound failure for a given (host, kind).
 	hardErr map[string]map[string]error
 	// listErr forces ListAllMaintenance to fail.
 	listErr error
 }
 func (f *fakeBackend) ListAllMaintenance(_ context.Context) ([]store.HostRepoMaintenance, error) {
 	if f.listErr != nil {
 		return nil, f.listErr
 	}
 	return f.rows, nil
 }
 func (f *fakeBackend) LatestJobByKind(_ context.Context, hostID, kind string) (*store.Job, error) {
 	if hostErrs, ok := f.hardErr[hostID]; ok {
 		if err := hostErrs[kind]; err != nil {
 			return nil, err
 		}
 	}
 	if hostJobs, ok := f.jobs[hostID]; ok {
 		if j := hostJobs[kind]; j != nil {
 			return j, nil
 		}
 	}
 	return nil, store.ErrNotFound
 }
 // mustTime parses an RFC3339 string, fatal on failure.
 func mustTime(t *testing.T, s string) time.Time {
 	t.Helper()
 	out, err := time.Parse(time.RFC3339, s)
 	if err != nil {
 		t.Fatalf("parse %q: %v", s, err)
 	}
 	return out
 }
 func TestTickerSkipsDisabled(t *testing.T) {
 	t.Parallel()
 	be := &fakeBackend{
 		rows: []store.HostRepoMaintenance{{
 			HostID:        "h1",
 			ForgetCron:    "0 3 * * *",
 			ForgetEnabled: false,
 			PruneCron:     "0 4 * * *",
 			PruneEnabled:  false,
 			CheckCron:     "0 5 * * *",
 			CheckEnabled:  false,
 		}},
 	}
 	tk := New(be)
 	now := mustTime(t, "2026-05-04T04:00:00Z")
 	got, err := tk.Decide(context.Background(), now)
 	if err != nil {
 		t.Fatalf("Decide: %v", err)
 	}
 	if len(got) != 0 {
 		t.Errorf("expected no decisions, got %+v", got)
 	}
 }
 func TestTickerSkipsEmptyCron(t *testing.T) {
 	t.Parallel()
 	be := &fakeBackend{
 		rows: []store.HostRepoMaintenance{{
 			HostID:        "h1",
 			ForgetCron:    "",
 			ForgetEnabled: true,
 			PruneCron:     "",
 			PruneEnabled:  true,
 			CheckCron:     "",
 			CheckEnabled:  true,
 		}},
 	}
 	tk := New(be)
 	got, err := tk.Decide(context.Background(), mustTime(t, "2026-05-04T04:00:00Z"))
 	if err != nil {
 		t.Fatalf("Decide: %v", err)
 	}
 	if len(got) != 0 {
 		t.Errorf("expected no decisions, got %+v", got)
 	}
 }
 func TestTickerFiresWhenOverdue(t *testing.T) {
 	t.Parallel()
 	now := mustTime(t, "2026-05-04T04:00:00Z")
 	// Latest forget job 25h ago.
 	last := now.Add(-25 * time.Hour)
 	be := &fakeBackend{
 		rows: []store.HostRepoMaintenance{{
 			HostID:        "h1",
 			ForgetCron:    "0 3 * * *",
 			ForgetEnabled: true,
 		}},
 		jobs: map[string]map[string]*store.Job{
 			"h1": {"forget": &store.Job{ID: "j1", HostID: "h1", Kind: "forget", CreatedAt: last}},
 		},
 	}
 	tk := New(be)
 	got, err := tk.Decide(context.Background(), now)
 	if err != nil {
 		t.Fatalf("Decide: %v", err)
 	}
 	if len(got) != 1 || got[0].Kind != "forget" || got[0].HostID != "h1" {
 		t.Errorf("expected one forget decision, got %+v", got)
 	}
 }
 func TestTickerSuppressesWhenRecent(t *testing.T) {
 	t.Parallel()
 	now := mustTime(t, "2026-05-04T04:00:00Z")
 	last := mustTime(t, "2026-05-04T03:30:00Z")
 	be := &fakeBackend{
 		rows: []store.HostRepoMaintenance{{
 			HostID:        "h1",
 			ForgetCron:    "0 3 * * *",
 			ForgetEnabled: true,
 		}},
 		jobs: map[string]map[string]*store.Job{
 			"h1": {"forget": &store.Job{ID: "j1", HostID: "h1", Kind: "forget", CreatedAt: last}},
 		},
 	}
 	tk := New(be)
 	got, err := tk.Decide(context.Background(), now)
 	if err != nil {
 		t.Fatalf("Decide: %v", err)
 	}
 	if len(got) != 0 {
 		t.Errorf("expected no decisions, got %+v", got)
 	}
 }
 func TestTickerFirstRunAnchorBoundedAt24h(t *testing.T) {
 	t.Parallel()
 	be := &fakeBackend{
 		rows: []store.HostRepoMaintenance{{
 			HostID:        "h1",
 			ForgetCron:    "0 3 * * *",
 			ForgetEnabled: true,
 		}},
 	}
 	tk := New(be)
 	// Case 1: now=04:00. Anchor=04:00 - 24h = previous-day 04:00. Next
 	// fire after that is today 03:00 — within window → fire.
 	now1 := mustTime(t, "2026-05-04T04:00:00Z")
 	got, err := tk.Decide(context.Background(), now1)
 	if err != nil {
 		t.Fatalf("Decide: %v", err)
 	}
 	if len(got) != 1 {
 		t.Errorf("case1: expected 1 decision, got %+v", got)
 	}
 	// Case 2: a cron firing less often than once per 24h with a
 	// no-prior-job anchor must not fire when the most recent fire is
 	// outside the 24h lookback window. Use a weekly cron (Mondays at
 	// 03:00) and `now` on a Tuesday: anchor=now-24h lands on Monday,
 	// so cron.Next(Monday) = next-week Monday → after now → no fire.
 	// 2026-05-04 is a Monday, 2026-05-05 a Tuesday.
 	be2 := &fakeBackend{
 		rows: []store.HostRepoMaintenance{{
 			HostID:        "h2",
 			ForgetCron:    "0 3 * * 1",
 			ForgetEnabled: true,
 		}},
 	}
 	tk2 := New(be2)
 	now2 := mustTime(t, "2026-05-05T03:00:00Z")
 	got2, err := tk2.Decide(context.Background(), now2)
 	if err != nil {
 		t.Fatalf("Decide case2: %v", err)
 	}
 	if len(got2) != 0 {
 		t.Errorf("case2: expected no decisions (cron fires < once/24h, prior fire was Monday 03:00 which is exactly 24h ago and anchor=now-24h means next-after is next Monday), got %+v", got2)
 	}
 }
 func TestTickerCheckDecisionCarriesSubset(t *testing.T) {
 	t.Parallel()
 	now := mustTime(t, "2026-05-04T04:00:00Z")
 	last := now.Add(-30 * 24 * time.Hour)
 	be := &fakeBackend{
 		rows: []store.HostRepoMaintenance{{
 			HostID:         "h1",
 			CheckCron:      "0 3 * * *",
 			CheckEnabled:   true,
 			CheckSubsetPct: 25,
 		}},
 		jobs: map[string]map[string]*store.Job{
 			"h1": {"check": &store.Job{ID: "j1", HostID: "h1", Kind: "check", CreatedAt: last}},
 		},
 	}
 	tk := New(be)
 	got, err := tk.Decide(context.Background(), now)
 	if err != nil {
 		t.Fatalf("Decide: %v", err)
 	}
 	if len(got) != 1 || got[0].Kind != "check" || got[0].SubsetPct != 25 {
 		t.Errorf("expected check decision with SubsetPct=25, got %+v", got)
 	}
 }
 func TestTickerHardJobErrorSkipsKind(t *testing.T) {
 	t.Parallel()
 	now := mustTime(t, "2026-05-04T04:00:00Z")
 	last := now.Add(-25 * time.Hour)
 	hardErr := errors.New("synthetic db error")
 	be := &fakeBackend{
 		rows: []store.HostRepoMaintenance{{
 			HostID:        "h1",
 			ForgetCron:    "0 3 * * *",
 			ForgetEnabled: true,
 			CheckCron:     "0 3 * * *",
 			CheckEnabled:  true,
 		}},
 		jobs: map[string]map[string]*store.Job{
 			// check has a normal latest-job; should still fire.
 			"h1": {"check": &store.Job{ID: "jc", HostID: "h1", Kind: "check", CreatedAt: last}},
 		},
 		hardErr: map[string]map[string]error{
 			"h1": {"forget": hardErr},
 		},
 	}
 	tk := New(be)
 	got, err := tk.Decide(context.Background(), now)
 	if err != nil {
 		t.Fatalf("Decide: %v", err)
 	}
 	// Only the check decision should land — forget is skipped.
 	if len(got) != 1 || got[0].Kind != "check" {
 		t.Errorf("expected only check decision, got %+v", got)
 	}
 }
 func TestTickerHandlesMultipleHosts(t *testing.T) {
 	t.Parallel()
 	now := mustTime(t, "2026-05-04T04:00:00Z")
 	last := now.Add(-25 * time.Hour)
 	be := &fakeBackend{
 		rows: []store.HostRepoMaintenance{
 			{
 				HostID:        "ha",
 				ForgetCron:    "0 3 * * *",
 				ForgetEnabled: true,
 			},
 			{
 				HostID:       "hb",
 				CheckCron:    "0 3 * * *",
 				CheckEnabled: true,
 				PruneCron:    "0 4 * * *",
 				PruneEnabled: false, // disabled — should not fire
 			},
 		},
 		jobs: map[string]map[string]*store.Job{
 			"ha": {"forget": &store.Job{ID: "j1", HostID: "ha", Kind: "forget", CreatedAt: last}},
 			"hb": {"check": &store.Job{ID: "j2", HostID: "hb", Kind: "check", CreatedAt: last}},
 		},
 	}
 	tk := New(be)
 	got, err := tk.Decide(context.Background(), now)
 	if err != nil {
 		t.Fatalf("Decide: %v", err)
 	}
 	if len(got) != 2 {
 		t.Fatalf("expected 2 decisions, got %d: %+v", len(got), got)
 	}
 	kinds := map[string]string{}
 	for _, d := range got {
 		kinds[d.HostID] = d.Kind
 	}
 	if kinds["ha"] != "forget" {
 		t.Errorf("ha: expected forget, got %q", kinds["ha"])
 	}
 	if kinds["hb"] != "check" {
 		t.Errorf("hb: expected check, got %q", kinds["hb"])
 	}
 }
 func TestTickerInvalidCronSkipsSilently(t *testing.T) {
 	t.Parallel()
 	be := &fakeBackend{
 		rows: []store.HostRepoMaintenance{{
 			HostID:        "h1",
 			ForgetCron:    "not a cron",
 			ForgetEnabled: true,
 		}},
 	}
 	tk := New(be)
 	got, err := tk.Decide(context.Background(), mustTime(t, "2026-05-04T04:00:00Z"))
 	if err != nil {
 		t.Fatalf("Decide: %v", err)
 	}
 	if len(got) != 0 {
 		t.Errorf("expected no decisions for invalid cron, got %+v", got)
 	}
 }
@@ -267,8 +267,34 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
 			deps.OnScheduleFire(ctx, hostID, c, p.ScheduleID, p.ScheduledAt)
 		}
-	case api.MsgRepoStats, api.MsgCommandResult:
+	case api.MsgRepoStats:
-		// TODO(P2): persist these projections.
+		var p api.RepoStatsPayload
 		if err := env.UnmarshalPayload(&p); err != nil {
 			slog.Warn("ws: bad repo.stats payload", "host_id", hostID, "err", err)
 			break
 		}
 		patch := store.HostRepoStats{
 			HostID:              hostID,
 			TotalSizeBytes:      p.TotalSizeBytes,
 			RawSizeBytes:        p.RawSizeBytes,
 			UniqueFiles:         p.UniqueFiles,
 			SnapshotCount:       p.SnapshotCount,
 			LastCheckAt:         p.LastCheckAt,
 			LastCheckStatus:     p.LastCheckStatus,
 			LockPresent:         p.LockPresent,
 			LastPruneAt:         p.LastPruneAt,
 			LastPruneFreedBytes: p.LastPruneFreedBytes,
 		}
 		if err := deps.Store.UpsertHostRepoStats(ctx, hostID, patch); err != nil {
 			slog.Warn("ws: upsert host repo stats", "host_id", hostID, "err", err)
 		} else {
 			slog.Info("ws: repo stats refreshed", "host_id", hostID)
 		}
 	case api.MsgCommandResult:
 		// TODO(P2): persist command.result acks for "did the agent
 		// accept the dispatch?" forensics. Currently the job lifecycle
 		// (job.started → job.finished) is sufficient signal.
 		slog.Debug("ws msg not yet handled", "type", env.Type, "host_id", hostID)
 	case api.MsgError:
@@ -0,0 +1,135 @@
 package ws
 import (
 	"context"
 	"path/filepath"
 	"testing"
 	"time"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )
 // openWSTestStore opens an isolated file-backed db in t.TempDir.
 func openWSTestStore(t *testing.T) *store.Store {
 	t.Helper()
 	dir := t.TempDir()
 	s, err := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
 	if err != nil {
 		t.Fatalf("open: %v", err)
 	}
 	t.Cleanup(func() { _ = s.Close() })
 	return s
 }
 // seedHostWS inserts a minimal host row directly via the store's DB.
 func seedHostWS(t *testing.T, s *store.Store, hostID string) {
 	t.Helper()
 	_, err := s.DB().Exec(
 		`INSERT INTO hosts (id, name, os, arch, enrolled_at) VALUES (?,?,?,?,?)`,
 		hostID, hostID, "linux", "amd64", "2026-01-01T00:00:00Z")
 	if err != nil {
 		t.Fatalf("seed host %q: %v", hostID, err)
 	}
 }
 func int64ptrWS(v int64) *int64 { return &v }
 func boolptrWS(v bool) *bool    { return &v }
 func TestRepoStatsReportPersisted(t *testing.T) {
 	t.Parallel()
 	s := openWSTestStore(t)
 	ctx := context.Background()
 	const hostID = "h-stats-ws"
 	seedHostWS(t, s, hostID)
 	now := time.Now().UTC().Truncate(time.Second)
 	pruneAt := now.Add(-2 * time.Hour)
 	payload := api.RepoStatsPayload{
 		TotalSizeBytes:      int64ptrWS(1024),
 		RawSizeBytes:        int64ptrWS(2048),
 		UniqueFiles:         int64ptrWS(42),
 		SnapshotCount:       int64ptrWS(7),
 		LastCheckAt:         &now,
 		LastCheckStatus:     "ok",
 		LockPresent:         boolptrWS(false),
 		LastPruneAt:         &pruneAt,
 		LastPruneFreedBytes: int64ptrWS(512),
 	}
 	env, err := api.Marshal(api.MsgRepoStats, "", payload)
 	if err != nil {
 		t.Fatalf("marshal: %v", err)
 	}
 	deps := HandlerDeps{Store: s}
 	dispatchAgentMessage(ctx, nil, hostID, env, deps)
 	got, err := s.GetHostRepoStats(ctx, hostID)
 	if err != nil {
 		t.Fatalf("get host repo stats: %v", err)
 	}
 	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 1024 {
 		t.Errorf("TotalSizeBytes: got %v want 1024", got.TotalSizeBytes)
 	}
 	if got.RawSizeBytes == nil || *got.RawSizeBytes != 2048 {
 		t.Errorf("RawSizeBytes: got %v want 2048", got.RawSizeBytes)
 	}
 	if got.UniqueFiles == nil || *got.UniqueFiles != 42 {
 		t.Errorf("UniqueFiles: got %v want 42", got.UniqueFiles)
 	}
 	if got.SnapshotCount == nil || *got.SnapshotCount != 7 {
 		t.Errorf("SnapshotCount: got %v want 7", got.SnapshotCount)
 	}
 	if got.LastCheckAt == nil || !got.LastCheckAt.Equal(now) {
 		t.Errorf("LastCheckAt: got %v want %v", got.LastCheckAt, now)
 	}
 	if got.LastCheckStatus != "ok" {
 		t.Errorf("LastCheckStatus: got %q want %q", got.LastCheckStatus, "ok")
 	}
 	if got.LockPresent == nil || *got.LockPresent != false {
 		t.Errorf("LockPresent: got %v want false", got.LockPresent)
 	}
 	if got.LastPruneAt == nil || !got.LastPruneAt.Equal(pruneAt) {
 		t.Errorf("LastPruneAt: got %v want %v", got.LastPruneAt, pruneAt)
 	}
 	if got.LastPruneFreedBytes == nil || *got.LastPruneFreedBytes != 512 {
 		t.Errorf("LastPruneFreedBytes: got %v want 512", got.LastPruneFreedBytes)
 	}
 }
 func TestRepoStatsReportPartialUpdate(t *testing.T) {
 	t.Parallel()
 	s := openWSTestStore(t)
 	ctx := context.Background()
 	const hostID = "h-stats-partial"
 	seedHostWS(t, s, hostID)
 	// Pre-seed: TotalSizeBytes = 100.
 	if err := s.UpsertHostRepoStats(ctx, hostID, store.HostRepoStats{
 		TotalSizeBytes: int64ptrWS(100),
 	}); err != nil {
 		t.Fatalf("pre-seed upsert: %v", err)
 	}
 	// Send a repo.stats payload that only sets LastCheckStatus.
 	env, err := api.Marshal(api.MsgRepoStats, "", api.RepoStatsPayload{
 		LastCheckStatus: "ok",
 	})
 	if err != nil {
 		t.Fatalf("marshal: %v", err)
 	}
 	dispatchAgentMessage(ctx, nil, hostID, env, HandlerDeps{Store: s})
 	got, err := s.GetHostRepoStats(ctx, hostID)
 	if err != nil {
 		t.Fatalf("get: %v", err)
 	}
 	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 100 {
 		t.Errorf("TotalSizeBytes lost: got %v want 100", got.TotalSizeBytes)
 	}
 	if got.LastCheckStatus != "ok" {
 		t.Errorf("LastCheckStatus: got %q want ok", got.LastCheckStatus)
 	}
 }
@@ -8,13 +8,23 @@ import (
 	"time"
 )
 // CredentialKind identifies the role of a host_credentials row.
 type CredentialKind string
 const (
 	// CredKindRepo is the append-only credential used for every backup.
 	CredKindRepo CredentialKind = "repo"
 	// CredKindAdmin is the delete-capable credential used for prune.
 	CredKindAdmin CredentialKind = "admin"
 )
 // GetHostCredentials returns the AEAD-encrypted repo creds blob for
-// the host, or ("", ErrNotFound) if no credential has ever been set.
+// the host + kind, or ("", ErrNotFound) if no matching row exists.
 // The caller decrypts using host_id as AEAD additional data.
-func (s *Store) GetHostCredentials(ctx context.Context, hostID string) (string, error) {
+func (s *Store) GetHostCredentials(ctx context.Context, hostID string, kind CredentialKind) (string, error) {
 	row := s.db.QueryRowContext(ctx,
-		`SELECT enc_repo_creds FROM host_credentials WHERE host_id = ?`,
+		`SELECT enc_repo_creds FROM host_credentials WHERE host_id = ? AND kind = ?`,
-		hostID)
+		hostID, string(kind))
 	var enc string
 	if err := row.Scan(&enc); err != nil {
 		if errors.Is(err, sql.ErrNoRows) {
@@ -25,22 +35,35 @@ func (s *Store) GetHostCredentials(ctx context.Context, hostID string) (string,
 	return enc, nil
 }
-// SetHostCredentials replaces the host's encrypted repo creds blob.
+// SetHostCredentials replaces the host's encrypted repo creds blob for
-// The caller has already encrypted using host_id as additional data.
+// the given kind. The caller has already encrypted using host_id as
-func (s *Store) SetHostCredentials(ctx context.Context, hostID, encRepoCreds string) error {
+// additional data.
 func (s *Store) SetHostCredentials(ctx context.Context, hostID string, kind CredentialKind, encRepoCreds string) error {
 	if encRepoCreds == "" {
 		return fmt.Errorf("store: empty enc_repo_creds")
 	}
 	now := time.Now().UTC().Format(time.RFC3339Nano)
 	_, err := s.db.ExecContext(ctx,
-		`INSERT INTO host_credentials (host_id, enc_repo_creds, updated_at)
+		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at)
-		 VALUES (?, ?, ?)
+		 VALUES (?, ?, ?, ?)
-		 ON CONFLICT(host_id) DO UPDATE SET
+		 ON CONFLICT(host_id, kind) DO UPDATE SET
 			enc_repo_creds = excluded.enc_repo_creds,
 			updated_at     = excluded.updated_at`,
-		hostID, encRepoCreds, now)
+		hostID, string(kind), encRepoCreds, now)
 	if err != nil {
 		return fmt.Errorf("store: set host credentials: %w", err)
 	}
 	return nil
 }
 // DeleteHostCredentials removes the credential row for the given host
 // and kind. A no-op if the row does not exist.
 func (s *Store) DeleteHostCredentials(ctx context.Context, hostID string, kind CredentialKind) error {
 	_, err := s.db.ExecContext(ctx,
 		`DELETE FROM host_credentials WHERE host_id = ? AND kind = ?`,
 		hostID, string(kind))
 	if err != nil {
 		return fmt.Errorf("store: delete host credentials: %w", err)
 	}
 	return nil
 }
@@ -0,0 +1,103 @@
 package store
 import (
 	"context"
 	"errors"
 	"testing"
 )
 // seedHost inserts a minimal host row for testing.
 func seedHost(t *testing.T, s *Store, hostID string) {
 	t.Helper()
 	_, err := s.DB().Exec(
 		`INSERT INTO hosts (id, name, os, arch, enrolled_at) VALUES (?,?,?,?,?)`,
 		hostID, hostID, "linux", "amd64", "2026-01-01T00:00:00Z")
 	if err != nil {
 		t.Fatalf("seed host %q: %v", hostID, err)
 	}
 }
 func TestHostCredentialsAdminRowSeparate(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	const hostID = "h-creds-test"
 	seedHost(t, s, hostID)
 	const repoBlob = "enc-repo-blob"
 	const adminBlob = "enc-admin-blob"
 	// Set repo creds.
 	if err := s.SetHostCredentials(ctx, hostID, CredKindRepo, repoBlob); err != nil {
 		t.Fatalf("set repo creds: %v", err)
 	}
 	// Set admin creds.
 	if err := s.SetHostCredentials(ctx, hostID, CredKindAdmin, adminBlob); err != nil {
 		t.Fatalf("set admin creds: %v", err)
 	}
 	// Fetch each by kind and assert they differ.
 	gotRepo, err := s.GetHostCredentials(ctx, hostID, CredKindRepo)
 	if err != nil {
 		t.Fatalf("get repo creds: %v", err)
 	}
 	gotAdmin, err := s.GetHostCredentials(ctx, hostID, CredKindAdmin)
 	if err != nil {
 		t.Fatalf("get admin creds: %v", err)
 	}
 	if gotRepo != repoBlob {
 		t.Errorf("repo creds: got %q, want %q", gotRepo, repoBlob)
 	}
 	if gotAdmin != adminBlob {
 		t.Errorf("admin creds: got %q, want %q", gotAdmin, adminBlob)
 	}
 	if gotRepo == gotAdmin {
 		t.Error("repo and admin blobs must differ")
 	}
 	// Delete admin; repo must be unaffected.
 	if err := s.DeleteHostCredentials(ctx, hostID, CredKindAdmin); err != nil {
 		t.Fatalf("delete admin creds: %v", err)
 	}
 	if _, err := s.GetHostCredentials(ctx, hostID, CredKindAdmin); !errors.Is(err, ErrNotFound) {
 		t.Errorf("after delete, expected ErrNotFound for admin; got %v", err)
 	}
 	if got, err := s.GetHostCredentials(ctx, hostID, CredKindRepo); err != nil || got != repoBlob {
 		t.Errorf("repo creds should survive admin delete; got %q, err %v", got, err)
 	}
 }
 func TestHostCredentialsNotFound(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	_, err := s.GetHostCredentials(ctx, "no-such-host", CredKindRepo)
 	if !errors.Is(err, ErrNotFound) {
 		t.Errorf("expected ErrNotFound, got %v", err)
 	}
 }
 func TestHostCredentialsUpsert(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	const hostID = "h-upsert-test"
 	seedHost(t, s, hostID)
 	if err := s.SetHostCredentials(ctx, hostID, CredKindRepo, "v1"); err != nil {
 		t.Fatalf("set v1: %v", err)
 	}
 	if err := s.SetHostCredentials(ctx, hostID, CredKindRepo, "v2"); err != nil {
 		t.Fatalf("set v2 (upsert): %v", err)
 	}
 	got, err := s.GetHostCredentials(ctx, hostID, CredKindRepo)
 	if err != nil {
 		t.Fatalf("get: %v", err)
 	}
 	if got != "v2" {
 		t.Errorf("expected v2, got %q", got)
 	}
 }
@@ -0,0 +1,231 @@
 package store
 import (
 	"context"
 	"database/sql"
 	"errors"
 	"fmt"
 	"time"
 )
 // HostRepoStats is the per-host projection of repo-level metrics.
 // All pointer fields are nullable; nil means "not yet known." The row
 // is created (or replaced) by UpsertHostRepoStats which merges in only
 // the non-nil fields from a patch.
 type HostRepoStats struct {
 	HostID              string
 	TotalSizeBytes      *int64
 	RawSizeBytes        *int64
 	UniqueFiles         *int64
 	SnapshotCount       *int64
 	LastCheckAt         *time.Time
 	LastCheckStatus     string // "" | "ok" | "errors_found" | "failed"
 	LockPresent         *bool
 	LastPruneAt         *time.Time
 	LastPruneFreedBytes *int64
 	UpdatedAt           time.Time
 }
 // GetHostRepoStats returns the row, or (nil, ErrNotFound) if absent.
 func (s *Store) GetHostRepoStats(ctx context.Context, hostID string) (*HostRepoStats, error) {
 	row := s.db.QueryRowContext(ctx,
 		`SELECT host_id, total_size_bytes, raw_size_bytes, unique_files,
 		        snapshot_count, last_check_at, last_check_status,
 		        lock_present, last_prune_at, last_prune_freed_bytes, updated_at
 		 FROM host_repo_stats WHERE host_id = ?`, hostID)
 	return scanHostRepoStats(row)
 }
 // getHostRepoStatsTx is identical to GetHostRepoStats but runs on an
 // existing transaction so the fetch-merge-upsert in UpsertHostRepoStats
 // is fully serialized.
 func getHostRepoStatsTx(ctx context.Context, tx *sql.Tx, hostID string) (*HostRepoStats, error) {
 	row := tx.QueryRowContext(ctx,
 		`SELECT host_id, total_size_bytes, raw_size_bytes, unique_files,
 		        snapshot_count, last_check_at, last_check_status,
 		        lock_present, last_prune_at, last_prune_freed_bytes, updated_at
 		 FROM host_repo_stats WHERE host_id = ?`, hostID)
 	return scanHostRepoStats(row)
 }
 // scanHostRepoStats scans one row from host_repo_stats.
 func scanHostRepoStats(row *sql.Row) (*HostRepoStats, error) {
 	var (
 		st              HostRepoStats
 		totalSize       sql.NullInt64
 		rawSize         sql.NullInt64
 		uniqueFiles     sql.NullInt64
 		snapshotCount   sql.NullInt64
 		lastCheckAt     sql.NullString
 		lastCheckStatus sql.NullString
 		lockPresent     int64
 		lastPruneAt     sql.NullString
 		lastPruneFreed  sql.NullInt64
 		updatedAt       string
 	)
 	if err := row.Scan(
 		&st.HostID,
 		&totalSize, &rawSize, &uniqueFiles, &snapshotCount,
 		&lastCheckAt, &lastCheckStatus,
 		&lockPresent,
 		&lastPruneAt, &lastPruneFreed,
 		&updatedAt,
 	); err != nil {
 		if errors.Is(err, sql.ErrNoRows) {
 			return nil, ErrNotFound
 		}
 		return nil, fmt.Errorf("store: scan host_repo_stats: %w", err)
 	}
 	if totalSize.Valid {
 		v := totalSize.Int64
 		st.TotalSizeBytes = &v
 	}
 	if rawSize.Valid {
 		v := rawSize.Int64
 		st.RawSizeBytes = &v
 	}
 	if uniqueFiles.Valid {
 		v := uniqueFiles.Int64
 		st.UniqueFiles = &v
 	}
 	if snapshotCount.Valid {
 		v := snapshotCount.Int64
 		st.SnapshotCount = &v
 	}
 	if lastCheckAt.Valid {
 		t, err := time.Parse(time.RFC3339Nano, lastCheckAt.String)
 		if err != nil {
 			return nil, fmt.Errorf("store: parse last_check_at: %w", err)
 		}
 		st.LastCheckAt = &t
 	}
 	if lastCheckStatus.Valid {
 		st.LastCheckStatus = lastCheckStatus.String
 	}
 	lp := lockPresent != 0
 	st.LockPresent = &lp
 	if lastPruneAt.Valid {
 		t, err := time.Parse(time.RFC3339Nano, lastPruneAt.String)
 		if err != nil {
 			return nil, fmt.Errorf("store: parse last_prune_at: %w", err)
 		}
 		st.LastPruneAt = &t
 	}
 	if lastPruneFreed.Valid {
 		v := lastPruneFreed.Int64
 		st.LastPruneFreedBytes = &v
 	}
 	t, err := time.Parse(time.RFC3339Nano, updatedAt)
 	if err != nil {
 		return nil, fmt.Errorf("store: parse host_repo_stats.updated_at: %w", err)
 	}
 	st.UpdatedAt = t
 	return &st, nil
 }
 // UpsertHostRepoStats writes a partial update — only non-nil pointer
 // fields (and LastCheckStatus when non-empty) overwrite existing
 // columns. Wrapped in a transaction so concurrent upserts on the same
 // host don't lose updates.
 func (s *Store) UpsertHostRepoStats(ctx context.Context, hostID string, patch HostRepoStats) error {
 	tx, err := s.db.BeginTx(ctx, nil)
 	if err != nil {
 		return fmt.Errorf("store: begin host_repo_stats tx: %w", err)
 	}
 	defer func() { _ = tx.Rollback() }()
 	// Fetch existing row; start from zero if absent.
 	cur, err := getHostRepoStatsTx(ctx, tx, hostID)
 	if err != nil && !errors.Is(err, ErrNotFound) {
 		return err
 	}
 	if cur == nil {
 		cur = &HostRepoStats{HostID: hostID}
 	}
 	// Merge: non-nil patch fields overwrite current.
 	if patch.TotalSizeBytes != nil {
 		cur.TotalSizeBytes = patch.TotalSizeBytes
 	}
 	if patch.RawSizeBytes != nil {
 		cur.RawSizeBytes = patch.RawSizeBytes
 	}
 	if patch.UniqueFiles != nil {
 		cur.UniqueFiles = patch.UniqueFiles
 	}
 	if patch.SnapshotCount != nil {
 		cur.SnapshotCount = patch.SnapshotCount
 	}
 	if patch.LastCheckAt != nil {
 		cur.LastCheckAt = patch.LastCheckAt
 	}
 	if patch.LastCheckStatus != "" {
 		cur.LastCheckStatus = patch.LastCheckStatus
 	}
 	if patch.LockPresent != nil {
 		cur.LockPresent = patch.LockPresent
 	}
 	if patch.LastPruneAt != nil {
 		cur.LastPruneAt = patch.LastPruneAt
 	}
 	if patch.LastPruneFreedBytes != nil {
 		cur.LastPruneFreedBytes = patch.LastPruneFreedBytes
 	}
 	now := time.Now().UTC().Format(time.RFC3339Nano)
 	// Convert *bool → int for lock_present.
 	var lockPresentInt int64
 	if cur.LockPresent != nil && *cur.LockPresent {
 		lockPresentInt = 1
 	}
 	if _, err = tx.ExecContext(ctx,
 		`INSERT INTO host_repo_stats
 		   (host_id, total_size_bytes, raw_size_bytes, unique_files,
 		    snapshot_count, last_check_at, last_check_status,
 		    lock_present, last_prune_at, last_prune_freed_bytes, updated_at)
 		 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
 		 ON CONFLICT(host_id) DO UPDATE SET
 		   total_size_bytes       = excluded.total_size_bytes,
 		   raw_size_bytes         = excluded.raw_size_bytes,
 		   unique_files           = excluded.unique_files,
 		   snapshot_count         = excluded.snapshot_count,
 		   last_check_at          = excluded.last_check_at,
 		   last_check_status      = excluded.last_check_status,
 		   lock_present           = excluded.lock_present,
 		   last_prune_at          = excluded.last_prune_at,
 		   last_prune_freed_bytes = excluded.last_prune_freed_bytes,
 		   updated_at             = excluded.updated_at`,
 		hostID,
 		nullableInt64(cur.TotalSizeBytes),
 		nullableInt64(cur.RawSizeBytes),
 		nullableInt64(cur.UniqueFiles),
 		nullableInt64(cur.SnapshotCount),
 		nullableTime(cur.LastCheckAt),
 		nullableStr(cur.LastCheckStatus),
 		lockPresentInt,
 		nullableTime(cur.LastPruneAt),
 		nullableInt64(cur.LastPruneFreedBytes),
 		now,
 	); err != nil {
 		return fmt.Errorf("store: upsert host_repo_stats: %w", err)
 	}
 	return tx.Commit()
 }
 // nullableInt64 converts *int64 to a database/sql-compatible nullable value.
 func nullableInt64(p *int64) any {
 	if p == nil {
 		return nil
 	}
 	return *p
 }
 // nullableTime converts *time.Time to an RFC3339Nano string or nil.
 func nullableTime(p *time.Time) any {
 	if p == nil {
 		return nil
 	}
 	return p.UTC().Format(time.RFC3339Nano)
 }
@@ -0,0 +1,131 @@
 package store
 import (
 	"context"
 	"errors"
 	"testing"
 	"time"
 )
 func int64ptr(v int64) *int64 { return &v }
 func boolptr(v bool) *bool    { return &v }
 func TestHostRepoStatsRoundTrip(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	const hostID = "h-stats-test"
 	seedHost(t, s, hostID)
 	// 1. Initial upsert: set TotalSizeBytes only.
 	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
 		TotalSizeBytes: int64ptr(100),
 	}); err != nil {
 		t.Fatalf("upsert 1: %v", err)
 	}
 	got, err := s.GetHostRepoStats(ctx, hostID)
 	if err != nil {
 		t.Fatalf("get after upsert 1: %v", err)
 	}
 	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 100 {
 		t.Errorf("TotalSizeBytes: want 100, got %v", got.TotalSizeBytes)
 	}
 	if got.LastCheckStatus != "" {
 		t.Errorf("LastCheckStatus should be empty after first upsert, got %q", got.LastCheckStatus)
 	}
 	// 2. Upsert with LastCheckStatus; TotalSizeBytes must be preserved.
 	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
 		LastCheckStatus: "ok",
 	}); err != nil {
 		t.Fatalf("upsert 2: %v", err)
 	}
 	got, err = s.GetHostRepoStats(ctx, hostID)
 	if err != nil {
 		t.Fatalf("get after upsert 2: %v", err)
 	}
 	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 100 {
 		t.Errorf("TotalSizeBytes should still be 100 after second upsert, got %v", got.TotalSizeBytes)
 	}
 	if got.LastCheckStatus != "ok" {
 		t.Errorf("LastCheckStatus: want %q, got %q", "ok", got.LastCheckStatus)
 	}
 	// 3. Upsert with LockPresent=true; all other fields preserved.
 	now := time.Now().UTC().Truncate(time.Second)
 	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
 		LockPresent: boolptr(true),
 		LastCheckAt: &now,
 	}); err != nil {
 		t.Fatalf("upsert 3: %v", err)
 	}
 	got, err = s.GetHostRepoStats(ctx, hostID)
 	if err != nil {
 		t.Fatalf("get after upsert 3: %v", err)
 	}
 	if got.LockPresent == nil || !*got.LockPresent {
 		t.Error("LockPresent should be true after upsert 3")
 	}
 	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 100 {
 		t.Errorf("TotalSizeBytes still 100 expected, got %v", got.TotalSizeBytes)
 	}
 	if got.LastCheckStatus != "ok" {
 		t.Errorf("LastCheckStatus still 'ok' expected, got %q", got.LastCheckStatus)
 	}
 	if got.LastCheckAt == nil {
 		t.Error("LastCheckAt should be set")
 	} else if !got.LastCheckAt.UTC().Truncate(time.Second).Equal(now) {
 		t.Errorf("LastCheckAt: got %v, want %v", got.LastCheckAt.UTC().Truncate(time.Second), now)
 	}
 	// 4. Clear lock (set to false).
 	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
 		LockPresent: boolptr(false),
 	}); err != nil {
 		t.Fatalf("upsert 4: %v", err)
 	}
 	got, err = s.GetHostRepoStats(ctx, hostID)
 	if err != nil {
 		t.Fatalf("get after upsert 4: %v", err)
 	}
 	if got.LockPresent == nil || *got.LockPresent {
 		t.Error("LockPresent should be false after upsert 4")
 	}
 }
 func TestHostRepoStatsNotFound(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	_, err := s.GetHostRepoStats(ctx, "no-such-host")
 	if !errors.Is(err, ErrNotFound) {
 		t.Errorf("expected ErrNotFound, got %v", err)
 	}
 }
 func TestHostRepoStatsCascadeDelete(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	const hostID = "h-cascade-test"
 	seedHost(t, s, hostID)
 	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
 		TotalSizeBytes: int64ptr(999),
 	}); err != nil {
 		t.Fatalf("upsert: %v", err)
 	}
 	// Delete the host; stats row should cascade-delete.
 	if _, err := s.DB().ExecContext(ctx,
 		`DELETE FROM hosts WHERE id = ?`, hostID); err != nil {
 		t.Fatalf("delete host: %v", err)
 	}
 	_, err := s.GetHostRepoStats(ctx, hostID)
 	if !errors.Is(err, ErrNotFound) {
 		t.Errorf("after host delete, expected ErrNotFound for stats; got %v", err)
 	}
 }
@@ -193,6 +193,71 @@ func (s *Store) GetJob(ctx context.Context, id string) (*Job, error) {
 	return &j, nil
 }
 // LatestJobByKind returns the most recent job (any status, including
 // queued and running) of the given kind for the host, or
 // (nil, ErrNotFound) if no such job exists. Used by the maintenance
 // ticker to compute "last fire" anchors for the cron-due check;
 // in-flight jobs MUST be considered or a long-running prune (>60s)
 // would re-fire on the next tick while the first is still running.
 func (s *Store) LatestJobByKind(ctx context.Context, hostID, kind string) (*Job, error) {
 	row := s.db.QueryRowContext(ctx,
 		`SELECT id, host_id, kind, status, scheduled_id, actor_kind, actor_id,
 		        started_at, finished_at, exit_code, stats, error, created_at
 		 FROM jobs
 		 WHERE host_id = ? AND kind = ?
 		 ORDER BY created_at DESC
 		 LIMIT 1`, hostID, kind)
 	var (
 		j          Job
 		schedID    sql.NullString
 		actorID    sql.NullString
 		startedAt  sql.NullString
 		finishedAt sql.NullString
 		exitCode   sql.NullInt64
 		stats      sql.NullString
 		errMsg     sql.NullString
 		createdAt  string
 	)
 	if err := row.Scan(&j.ID, &j.HostID, &j.Kind, &j.Status, &schedID,
 		&j.ActorKind, &actorID, &startedAt, &finishedAt,
 		&exitCode, &stats, &errMsg, &createdAt); err != nil {
 		if errors.Is(err, sql.ErrNoRows) {
 			return nil, ErrNotFound
 		}
 		return nil, fmt.Errorf("store: scan latest job by kind: %w", err)
 	}
 	if schedID.Valid {
 		s := schedID.String
 		j.ScheduledID = &s
 	}
 	if actorID.Valid {
 		s := actorID.String
 		j.ActorID = &s
 	}
 	if startedAt.Valid {
 		t, _ := time.Parse(time.RFC3339Nano, startedAt.String)
 		j.StartedAt = &t
 	}
 	if finishedAt.Valid {
 		t, _ := time.Parse(time.RFC3339Nano, finishedAt.String)
 		j.FinishedAt = &t
 	}
 	if exitCode.Valid {
 		i := int(exitCode.Int64)
 		j.ExitCode = &i
 	}
 	if stats.Valid && stats.String != "" {
 		j.Stats = json.RawMessage(stats.String)
 	}
 	if errMsg.Valid {
 		s := errMsg.String
 		j.Error = &s
 	}
 	t, _ := time.Parse(time.RFC3339Nano, createdAt)
 	j.CreatedAt = t
 	return &j, nil
 }
 // HasJobOfKind reports whether any job of the given kind exists for
 // this host, regardless of status. Used by the auto-init path on
 // agent hello to decide whether to dispatch a fresh `restic init` —
@@ -0,0 +1,136 @@
 package store
 import (
 	"context"
 	"errors"
 	"testing"
 	"time"
 )
 func TestLatestJobByKind(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	hostID := makeSchedHost(t, s)
 	// No jobs yet → ErrNotFound.
 	if _, err := s.LatestJobByKind(ctx, hostID, "forget"); !errors.Is(err, ErrNotFound) {
 		t.Fatalf("expected ErrNotFound on empty, got %v", err)
 	}
 	// Insert two finished jobs of kind=forget; the newer one should win.
 	older := time.Now().UTC().Add(-2 * time.Hour)
 	newer := time.Now().UTC().Add(-1 * time.Hour)
 	if err := s.CreateJob(ctx, Job{
 		ID: "j-old", HostID: hostID, Kind: "forget",
 		ActorKind: "system", CreatedAt: older,
 	}); err != nil {
 		t.Fatalf("create older: %v", err)
 	}
 	if err := s.MarkJobFinished(ctx, "j-old", "succeeded", 0, nil, "", older.Add(time.Minute)); err != nil {
 		t.Fatalf("finish older: %v", err)
 	}
 	if err := s.CreateJob(ctx, Job{
 		ID: "j-new", HostID: hostID, Kind: "forget",
 		ActorKind: "system", CreatedAt: newer,
 	}); err != nil {
 		t.Fatalf("create newer: %v", err)
 	}
 	if err := s.MarkJobFinished(ctx, "j-new", "failed", 1, nil, "boom", newer.Add(time.Minute)); err != nil {
 		t.Fatalf("finish newer: %v", err)
 	}
 	got, err := s.LatestJobByKind(ctx, hostID, "forget")
 	if err != nil {
 		t.Fatalf("LatestJobByKind: %v", err)
 	}
 	if got.ID != "j-new" {
 		t.Errorf("want j-new, got %q", got.ID)
 	}
 	// An in-flight running job must be returned — long-prune-suppresses-tick
 	// scenario: if a prune runs >60s the next tick must not re-fire it.
 	runningAt := time.Now().UTC()
 	if err := s.CreateJob(ctx, Job{
 		ID: "j-running", HostID: hostID, Kind: "forget",
 		ActorKind: "system", CreatedAt: runningAt,
 	}); err != nil {
 		t.Fatalf("create running: %v", err)
 	}
 	if err := s.MarkJobStarted(ctx, "j-running", runningAt); err != nil {
 		t.Fatalf("mark started: %v", err)
 	}
 	got2, err := s.LatestJobByKind(ctx, hostID, "forget")
 	if err != nil {
 		t.Fatalf("LatestJobByKind 2: %v", err)
 	}
 	if got2.ID != "j-running" {
 		t.Errorf("in-flight running job must be returned; want j-running, got %q", got2.ID)
 	}
 	// A queued (not-yet-started) job is also returned (it is newer than
 	// j-running because CreatedAt is later).
 	queuedAt := runningAt.Add(time.Millisecond)
 	if err := s.CreateJob(ctx, Job{
 		ID: "j-queued", HostID: hostID, Kind: "forget",
 		ActorKind: "system", CreatedAt: queuedAt,
 	}); err != nil {
 		t.Fatalf("create queued: %v", err)
 	}
 	got3, err := s.LatestJobByKind(ctx, hostID, "forget")
 	if err != nil {
 		t.Fatalf("LatestJobByKind 3: %v", err)
 	}
 	if got3.ID != "j-queued" {
 		t.Errorf("queued job must be returned as newest; want j-queued, got %q", got3.ID)
 	}
 	// Different kind → ErrNotFound.
 	if _, err := s.LatestJobByKind(ctx, hostID, "prune"); !errors.Is(err, ErrNotFound) {
 		t.Fatalf("expected ErrNotFound for prune, got %v", err)
 	}
 }
 func TestListAllMaintenance(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	// Empty case.
 	rows, err := s.ListAllMaintenance(ctx)
 	if err != nil {
 		t.Fatalf("empty list: %v", err)
 	}
 	if len(rows) != 0 {
 		t.Errorf("want empty, got %+v", rows)
 	}
 	// Seed two hosts with maintenance rows.
 	h1 := "01HMAINTHOST00000000000A1"
 	h2 := "01HMAINTHOST00000000000A2"
 	for i, id := range []string{h1, h2} {
 		if err := s.CreateHost(ctx, Host{
 			ID: id, Name: "maint-host-" + string(rune('a'+i)),
 			OS: "linux", Arch: "amd64",
 			AgentVersion: "dev", ResticVersion: "0.16.0", ProtocolVersion: 1,
 			EnrolledAt: time.Now().UTC(),
 		}, "th-"+id, ""); err != nil {
 			t.Fatalf("create host %s: %v", id, err)
 		}
 	}
 	if err := s.CreateDefaultRepoMaintenance(ctx, h1); err != nil {
 		t.Fatalf("seed h1: %v", err)
 	}
 	if err := s.CreateDefaultRepoMaintenance(ctx, h2); err != nil {
 		t.Fatalf("seed h2: %v", err)
 	}
 	rows, err = s.ListAllMaintenance(ctx)
 	if err != nil {
 		t.Fatalf("list: %v", err)
 	}
 	if len(rows) != 2 {
 		t.Errorf("want 2 rows, got %d", len(rows))
 	}
 }
@@ -50,6 +50,40 @@ func (st *Store) GetRepoMaintenance(ctx context.Context, hostID string) (*HostRe
 	return &m, nil
 }
 // ListAllMaintenance returns every host_repo_maintenance row.
 // Used by the server-side maintenance ticker to iterate every
 // host on each tick. Order is unspecified (the ticker doesn't
 // care).
 func (st *Store) ListAllMaintenance(ctx context.Context) ([]HostRepoMaintenance, error) {
 	rows, err := st.db.QueryContext(ctx,
 		`SELECT host_id, forget_cron, forget_enabled,
 			prune_cron, prune_enabled,
 			check_cron, check_enabled, check_subset_pct
 		 FROM host_repo_maintenance`)
 	if err != nil {
 		return nil, fmt.Errorf("store: list all maintenance: %w", err)
 	}
 	defer func() { _ = rows.Close() }()
 	var out []HostRepoMaintenance
 	for rows.Next() {
 		var (
 			m                                         HostRepoMaintenance
 			forgetEnabled, pruneEnabled, checkEnabled int
 		)
 		if err := rows.Scan(&m.HostID,
 			&m.ForgetCron, &forgetEnabled,
 			&m.PruneCron, &pruneEnabled,
 			&m.CheckCron, &checkEnabled, &m.CheckSubsetPct); err != nil {
 			return nil, fmt.Errorf("store: scan maintenance: %w", err)
 		}
 		m.ForgetEnabled = forgetEnabled != 0
 		m.PruneEnabled = pruneEnabled != 0
 		m.CheckEnabled = checkEnabled != 0
 		out = append(out, m)
 	}
 	return out, rows.Err()
 }
 // UpdateRepoMaintenance replaces every editable field. Doesn't bump
 // the schedule version — these run on the server's own ticker, not
 // the agent's local cron, so the agent doesn't need to know.
@@ -0,0 +1,58 @@
 -- 0009_admin_creds_and_repo_stats.sql
 --
 -- Phase 5 of the P2 redesign needs two things in the schema:
 --
 -- 1. A second credential row per host. Today host_credentials is
 --    1:1 with hosts. For prune (and any future destructive op) we
 --    want a rest-server admin user whose password gives delete
 --    access — separate from the append-only user used on every
 --    backup. Add a `kind` column with default 'repo'; existing rows
 --    become kind='repo'. Future admin rows live alongside.
 --
 -- 2. A small singleton-per-host projection for repo size, snapshot
 --    count, last-prune freed bytes, lock state, and last-check
 --    result. Backed by `restic stats --json` + sniffed `restic
 --    check` stderr.
 --
 -- Use column-level ALTERs only; host_credentials has no inbound
 -- FKs but the rule from CLAUDE.md still applies.
 ALTER TABLE host_credentials ADD COLUMN kind TEXT NOT NULL DEFAULT 'repo';
 -- The PK on host_credentials is currently (host_id) — we need a
 -- composite (host_id, kind). SQLite has no ALTER TABLE …
 -- ADD/CHANGE PRIMARY KEY, so this is the one place a rebuild is
 -- justified. host_credentials has no inbound FKs, so the cascade
 -- trap doesn't apply here. Verified against schema/0002.
 CREATE TABLE host_credentials_new (
  host_id        TEXT NOT NULL REFERENCES hosts(id) ON DELETE CASCADE,
  kind           TEXT NOT NULL DEFAULT 'repo'
                   CHECK (kind IN ('repo', 'admin')),
  enc_repo_creds TEXT NOT NULL,
  updated_at     TEXT NOT NULL,
  PRIMARY KEY (host_id, kind)
 );
 INSERT INTO host_credentials_new (host_id, kind, enc_repo_creds, updated_at)
  SELECT host_id, kind, enc_repo_creds, updated_at FROM host_credentials;
 DROP TABLE host_credentials;
 ALTER TABLE host_credentials_new RENAME TO host_credentials;
 -- Repo stats projection. One row per host, upserted by the agent's
 -- stats.report envelope (which fires after every successful backup
 -- and after every check / prune). All fields nullable so a freshly
 -- enrolled host with no jobs yet is representable.
 CREATE TABLE host_repo_stats (
  host_id                 TEXT PRIMARY KEY REFERENCES hosts(id) ON DELETE CASCADE,
  total_size_bytes        INTEGER,
  raw_size_bytes          INTEGER,
  unique_files            INTEGER,
  snapshot_count          INTEGER,
  last_check_at           TEXT,
  last_check_status       TEXT CHECK (last_check_status IS NULL OR last_check_status IN ('ok', 'errors_found', 'failed')),
  lock_present            INTEGER NOT NULL DEFAULT 0,
  last_prune_at           TEXT,
  last_prune_freed_bytes  INTEGER,
  updated_at              TEXT NOT NULL
 );
@@ -72,6 +72,43 @@ func (st *Store) DuePendingRuns(ctx context.Context, now time.Time, limit int) (
 	return out, rows.Err()
 }
 // ListPendingRunsForHost returns every pending row for the host
 // (regardless of next_attempt_at), ordered by next_attempt_at
 // ascending. Used by the on-reconnect drain — when a host comes
 // back, we walk every pending row for it, not just the due ones,
 // because the host being back makes "due" unimportant: every row
 // is dispatchable now.
 func (st *Store) ListPendingRunsForHost(ctx context.Context, hostID string) ([]PendingRun, error) {
 	rows, err := st.db.QueryContext(ctx,
 		`SELECT id, schedule_id, source_group_id, host_id, attempt,
 			next_attempt_at, scheduled_at, COALESCE(last_error, '')
 		 FROM pending_runs
 		 WHERE host_id = ?
 		 ORDER BY next_attempt_at`,
 		hostID)
 	if err != nil {
 		return nil, fmt.Errorf("store: list pending runs for host: %w", err)
 	}
 	defer func() { _ = rows.Close() }()
 	out := []PendingRun{}
 	for rows.Next() {
 		var p PendingRun
 		var nextAt, scheduledAt string
 		if err := rows.Scan(&p.ID, &p.ScheduleID, &p.SourceGroupID, &p.HostID,
 			&p.Attempt, &nextAt, &scheduledAt, &p.LastError); err != nil {
 			return nil, err
 		}
 		if t, err := time.Parse(time.RFC3339Nano, nextAt); err == nil {
 			p.NextAttemptAt = t
 		}
 		if t, err := time.Parse(time.RFC3339Nano, scheduledAt); err == nil {
 			p.ScheduledAt = t
 		}
 		out = append(out, p)
 	}
 	return out, rows.Err()
 }
 // DeletePendingRun removes a row by id. Called after successful
 // dispatch or after exceeding retry_max.
 func (st *Store) DeletePendingRun(ctx context.Context, id string) error {
@@ -219,3 +219,78 @@ func TestPendingRunQueue(t *testing.T) {
 		t.Fatalf("after delete: %v", due)
 	}
 }
 func TestListPendingRunsForHost(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	hostA := makeSchedHost(t, s)
 	hostB := "01HPENDLISTHOSTB00000001"
 	if err := s.CreateHost(ctx, Host{
 		ID: hostB, Name: "pending-list-host-b", OS: "linux", Arch: "amd64",
 		AgentVersion: "dev", ResticVersion: "0.16.0", ProtocolVersion: 1,
 		EnrolledAt: time.Now().UTC(),
 	}, "tokenhashB", ""); err != nil {
 		t.Fatal(err)
 	}
 	gA := makeGroup(t, s, hostA, "default", "01HPENDLISTGRPA000000001")
 	gB := makeGroup(t, s, hostB, "default", "01HPENDLISTGRPB000000001")
 	schedA := "01HPENDLISTSCHEDA0000001"
 	schedB := "01HPENDLISTSCHEDB0000001"
 	if err := s.CreateSchedule(ctx, &Schedule{
 		ID: schedA, HostID: hostA, CronExpr: "@hourly", Enabled: true,
 		SourceGroupIDs: []string{gA},
 	}); err != nil {
 		t.Fatal(err)
 	}
 	if err := s.CreateSchedule(ctx, &Schedule{
 		ID: schedB, HostID: hostB, CronExpr: "@hourly", Enabled: true,
 		SourceGroupIDs: []string{gB},
 	}); err != nil {
 		t.Fatal(err)
 	}
 	now := time.Now().UTC()
 	// Two rows for hostA — one not-yet-due, one already-due — and one
 	// for hostB. ListPendingRunsForHost(A) must return both A rows
 	// (regardless of due-ness) ordered by next_attempt_at ascending.
 	rows := []*PendingRun{
 		{
 			ID: "01HPENDLISTROW0000000A02", ScheduleID: schedA, SourceGroupID: gA, HostID: hostA,
 			NextAttemptAt: now.Add(time.Hour), ScheduledAt: now,
 		},
 		{
 			ID: "01HPENDLISTROW0000000A01", ScheduleID: schedA, SourceGroupID: gA, HostID: hostA,
 			NextAttemptAt: now.Add(-time.Minute), ScheduledAt: now.Add(-time.Hour),
 		},
 		{
 			ID: "01HPENDLISTROW0000000B01", ScheduleID: schedB, SourceGroupID: gB, HostID: hostB,
 			NextAttemptAt: now, ScheduledAt: now,
 		},
 	}
 	for _, r := range rows {
 		if err := s.EnqueuePendingRun(ctx, r); err != nil {
 			t.Fatal(err)
 		}
 	}
 	out, err := s.ListPendingRunsForHost(ctx, hostA)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if len(out) != 2 {
 		t.Fatalf("len=%d, want 2: %+v", len(out), out)
 	}
 	// Ordered ascending by next_attempt_at: the -1m row first, then +1h.
 	if out[0].ID != "01HPENDLISTROW0000000A01" || out[1].ID != "01HPENDLISTROW0000000A02" {
 		t.Fatalf("order: got %s,%s", out[0].ID, out[1].ID)
 	}
 	out, err = s.ListPendingRunsForHost(ctx, "non-existent-host")
 	if err != nil {
 		t.Fatal(err)
 	}
 	if len(out) != 0 {
 		t.Fatalf("non-existent host: got %d rows", len(out))
 	}
 }
@@ -84,6 +84,70 @@ func TestMigrateIsIdempotent(t *testing.T) {
 	}
 }
 func TestMigration0009Schema(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
 	ctx := context.Background()
 	// host_credentials must have a composite PK (host_id, kind).
 	// We verify this by inserting two rows for the same host_id (different kinds)
 	// and confirming a duplicate (host_id, kind) fails.
 	_, err := s.DB().ExecContext(ctx,
 		`INSERT INTO hosts (id, name, os, arch, enrolled_at) VALUES (?,?,?,?,?)`,
 		"h-0009", "test-host", "linux", "amd64", "2026-01-01T00:00:00Z")
 	if err != nil {
 		t.Fatalf("insert host: %v", err)
 	}
 	now := "2026-01-01T00:00:00Z"
 	if _, err := s.DB().ExecContext(ctx,
 		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at) VALUES (?,?,?,?)`,
 		"h-0009", "repo", "enc-repo", now); err != nil {
 		t.Fatalf("insert repo creds: %v", err)
 	}
 	if _, err := s.DB().ExecContext(ctx,
 		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at) VALUES (?,?,?,?)`,
 		"h-0009", "admin", "enc-admin", now); err != nil {
 		t.Fatalf("insert admin creds: %v", err)
 	}
 	// Duplicate (host_id, kind) must fail.
 	if _, err := s.DB().ExecContext(ctx,
 		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at) VALUES (?,?,?,?)`,
 		"h-0009", "repo", "enc-repo-2", now); err == nil {
 		t.Fatal("expected unique constraint violation on (host_id, kind), got nil")
 	}
 	// CHECK (kind IN ('repo','admin')) must reject an invalid kind.
 	if _, err := s.DB().ExecContext(ctx,
 		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at) VALUES (?,?,?,?)`,
 		"h-0009", "other", "enc-other", now); err == nil {
 		t.Fatal("expected CHECK constraint violation on kind='other', got nil")
 	}
 	// host_repo_stats table must exist with expected columns.
 	if _, err := s.DB().ExecContext(ctx,
 		`INSERT INTO host_repo_stats (host_id, lock_present, updated_at) VALUES (?,?,?)`,
 		"h-0009", 0, now); err != nil {
 		t.Fatalf("insert host_repo_stats: %v", err)
 	}
 	var lockPresent int
 	if err := s.DB().QueryRowContext(ctx,
 		`SELECT lock_present FROM host_repo_stats WHERE host_id = ?`, "h-0009",
 	).Scan(&lockPresent); err != nil {
 		t.Fatalf("select host_repo_stats: %v", err)
 	}
 	if lockPresent != 0 {
 		t.Errorf("expected lock_present=0, got %d", lockPresent)
 	}
 	// CHECK (last_check_status IN ('ok','errors_found','failed')) must reject
 	// an invalid value.
 	if _, err := s.DB().ExecContext(ctx,
 		`UPDATE host_repo_stats SET last_check_status = ? WHERE host_id = ?`,
 		"wat", "h-0009"); err == nil {
 		t.Fatal("expected CHECK constraint violation on last_check_status='wat', got nil")
 	}
 }
 func TestForeignKeysEnforced(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
@@ -1,327 +0,0 @@
 #!/usr/bin/env bash
 #
 # provision-gitea-runner.sh — one-shot, idempotent host setup for an
 # act_runner LXC. Speeds up Gitea Actions runs by:
 #
 #   1. Disabling forced docker pulls (image refresh moves to a cron).
 #   2. Mounting persistent host volumes for Go module/build caches and
 #      the act-actions clone cache.
 #   3. Pre-pulling the runner-images container image.
 #   4. Pre-cloning a configurable list of GitHub actions into the
 #      act cache so jobs don't fetch them on every run.
 #   5. Installing golangci-lint (latest v2.x) at /usr/local/bin.
 #   6. Setting up a nightly cron to refresh image + action clones +
 #      golangci-lint.
 #
 # The script is generic — no per-project state. Point it at any LXC
 # running act_runner as a systemd service and it will provision the
 # host. Re-runs are safe; they reconcile state.
 #
 # Usage:  sudo ./provision-gitea-runner.sh
 #
 # Configurable via environment variables (defaults shown):
 #
 #   CACHE_BASE=/var/cache/gitea-runner
 #   ACT_RUNNER_CONFIG=/etc/act_runner/config.yaml
 #   RUNNER_IMAGE=docker.gitea.com/runner-images:ubuntu-latest
 #   ACTIONS_TO_PRECLONE=(actions/checkout@v4 actions/setup-go@v5
 #                        actions/upload-artifact@v4
 #                        golangci/golangci-lint-action@v7)
 #
 # To add more pre-cloned actions later, edit /etc/cron.d/gitea-runner-refresh
 # (the ACTIONS list is materialised into the cron script).
 set -euo pipefail
 # ---------- defaults ---------------------------------------------------
 : "${CACHE_BASE:=/var/cache/gitea-runner}"
 : "${ACT_RUNNER_CONFIG:=/etc/act_runner/config.yaml}"
 : "${RUNNER_IMAGE:=docker.gitea.com/runner-images:ubuntu-latest}"
 DEFAULT_ACTIONS=(
  "actions/checkout@v4"
  "actions/setup-go@v5"
  "actions/upload-artifact@v4"
  "golangci/golangci-lint-action@v7"
 )
 # Allow caller to override by exporting ACTIONS_TO_PRECLONE as a
 # space-separated string (env vars can't carry arrays cleanly).
 if [[ -n "${ACTIONS_TO_PRECLONE:-}" ]]; then
  read -r -a ACTIONS <<<"${ACTIONS_TO_PRECLONE}"
 else
  ACTIONS=("${DEFAULT_ACTIONS[@]}")
 fi
 # ---------- helpers ----------------------------------------------------
 log()  { printf '\033[1;36m==>\033[0m %s\n' "$*"; }
 warn() { printf '\033[1;33m==>\033[0m %s\n' "$*" >&2; }
 die()  { printf '\033[1;31m==>\033[0m %s\n' "$*" >&2; exit 1; }
 require_cmd() {
  command -v "$1" >/dev/null 2>&1 || die "missing: $1 (install it first)"
 }
 # sha256_url <url> — act_runner names its action-clone dirs after
 # sha256(URL). Verified against a real run log:
 #   url=https://github.com/actions/checkout
 #   sha256=c3fe249fe73091a17d6638fe1341e7bd0bcc3466ce52323c0688e83e2463a4ab
 sha256_url() {
  printf '%s' "$1" | sha256sum | awk '{print $1}'
 }
 # ---------- pre-flight -------------------------------------------------
 [[ $EUID -eq 0 ]] || die "run as root (the act_runner service writes /var/lib/act_runner as root)"
 require_cmd systemctl
 require_cmd docker
 require_cmd git
 require_cmd curl
 require_cmd python3
 # PyYAML for the config edit. Install if missing — Ubuntu 24.04 ships
 # python3-yaml in the default repos.
 if ! python3 -c 'import yaml' 2>/dev/null; then
  log "installing python3-yaml (needed for safe YAML edits)"
  apt-get update -qq
  apt-get install -y -qq python3-yaml
 fi
 [[ -f "$ACT_RUNNER_CONFIG" ]] || die "$ACT_RUNNER_CONFIG not found — is act_runner installed?"
 systemctl list-unit-files act_runner.service >/dev/null 2>&1 || \
  die "act_runner.service not found — register the runner first"
 log "pre-flight OK"
 log "  cache base       : $CACHE_BASE"
 log "  config file      : $ACT_RUNNER_CONFIG"
 log "  runner image     : $RUNNER_IMAGE"
 log "  actions to clone : ${ACTIONS[*]}"
 # ---------- 1. cache directories ---------------------------------------
 log "creating cache directories under $CACHE_BASE"
 for sub in go-mod go-build act-actions; do
  install -d -m 0755 -o root -g root "$CACHE_BASE/$sub"
 done
 # ---------- 2. edit /etc/act_runner/config.yaml ------------------------
 #
 # Three keys are reconciled to known values:
 #
 #   container.force_pull   : false  (we keep the image fresh via cron)
 #   container.options      : "-v <cache mounts...>"  (auto-mount caches
 #                             into every job container)
 #   container.valid_volumes: [<our cache paths>]  (whitelist so the
 #                             container.options mounts are accepted)
 #
 # Other keys are preserved verbatim. The edit is idempotent: re-running
 # yields the same file content.
 log "patching $ACT_RUNNER_CONFIG"
 # Backup once (only if no .pre-provision backup exists yet).
 if [[ ! -f "${ACT_RUNNER_CONFIG}.pre-provision" ]]; then
  cp -p "$ACT_RUNNER_CONFIG" "${ACT_RUNNER_CONFIG}.pre-provision"
  log "  saved pristine copy to ${ACT_RUNNER_CONFIG}.pre-provision"
 fi
 CONTAINER_OPTIONS_VALUE="-v ${CACHE_BASE}/go-mod:/root/go/pkg/mod:rw -v ${CACHE_BASE}/go-build:/root/.cache/go-build:rw -v ${CACHE_BASE}/act-actions:/root/.cache/act:rw"
 CACHE_BASE="$CACHE_BASE" CONTAINER_OPTIONS_VALUE="$CONTAINER_OPTIONS_VALUE" \
 ACT_RUNNER_CONFIG="$ACT_RUNNER_CONFIG" \
 python3 - <<'PY'
 import os, sys, yaml
 cfg_path = os.environ['ACT_RUNNER_CONFIG']
 cache_base = os.environ['CACHE_BASE']
 container_options = os.environ['CONTAINER_OPTIONS_VALUE']
 with open(cfg_path) as f:
    cfg = yaml.safe_load(f) or {}
 cfg.setdefault('container', {})
 cfg['container']['force_pull'] = False
 cfg['container']['options'] = container_options
 # Whitelist every cache subdir explicitly so jobs that try to bind-mount
 # them via workflow-side `volumes:` (rare but possible) are accepted.
 desired_vols = [
    f"{cache_base}/go-mod",
    f"{cache_base}/go-build",
    f"{cache_base}/act-actions",
 ]
 existing = cfg['container'].get('valid_volumes') or []
 merged = list(dict.fromkeys(existing + desired_vols))  # de-dup, preserve order
 cfg['container']['valid_volumes'] = merged
 # Write back with stable formatting. yaml.dump preserves enough
 # structure for act_runner to parse; comments in the original config
 # do get stripped — that's why we preserve the .pre-provision backup.
 with open(cfg_path + '.tmp', 'w') as f:
    yaml.safe_dump(cfg, f, default_flow_style=False, sort_keys=False)
 os.replace(cfg_path + '.tmp', cfg_path)
 print(f"  container.force_pull   : false")
 print(f"  container.options      : {container_options}")
 print(f"  container.valid_volumes: {merged}")
 PY
 # ---------- 3. pre-pull the runner image -------------------------------
 log "pulling $RUNNER_IMAGE (one-time; cron refreshes it nightly)"
 docker pull "$RUNNER_IMAGE"
 # ---------- 4. pre-clone the actions list ------------------------------
 #
 # act_runner expects clones at $cache/<sha256(url)> with the ref already
 # checked out. We clone the default branch then fetch + check out the
 # requested ref. Re-running fetches updates rather than re-cloning.
 log "pre-cloning actions into $CACHE_BASE/act-actions"
 for spec in "${ACTIONS[@]}"; do
  if [[ "$spec" != *@* ]]; then
    warn "  skip '$spec' — must be owner/repo@ref"
    continue
  fi
  repo="${spec%@*}"
  ref="${spec##*@}"
  url="https://github.com/${repo}"
  dir="${CACHE_BASE}/act-actions/$(sha256_url "$url")"
  if [[ -d "$dir/.git" ]]; then
    log "  refresh $repo @ $ref"
    git -C "$dir" fetch --quiet --tags --prune origin
  else
    log "  clone   $repo @ $ref → $dir"
    git clone --quiet "$url" "$dir"
  fi
  # Detach onto the requested ref. Works for branches, tags, and SHAs.
  if ! git -C "$dir" -c advice.detachedHead=false checkout --quiet "$ref" 2>/dev/null; then
    # If `ref` is a remote branch we haven't tracked yet, try origin/<ref>.
    git -C "$dir" -c advice.detachedHead=false checkout --quiet "origin/$ref"
  fi
 done
 # ---------- 5. golangci-lint -------------------------------------------
 #
 # Install the latest v2.x at /usr/local/bin/golangci-lint. Workflows
 # that pin a specific version via the action's `version:` arg will
 # still re-download — but jobs that don't pin (or pin to "latest"/"v2")
 # get the host-installed binary for free.
 log "installing/updating golangci-lint (latest v2.x) → /usr/local/bin"
 GOLANGCI_INSTALL_URL="https://raw.githubusercontent.com/golangci/golangci-lint/HEAD/install.sh"
 # `-b` = install dir, `-d` = quiet "downloading" lines, no version arg
 # means "latest" — which install.sh resolves to the latest v2 release
 # from GitHub releases.
 curl -fsSL "$GOLANGCI_INSTALL_URL" | sh -s -- -b /usr/local/bin >/dev/null
 /usr/local/bin/golangci-lint --version || warn "golangci-lint install verification failed"
 # ---------- 6. nightly refresh cron ------------------------------------
 #
 # Re-pulls the runner image, refreshes the action clones, and updates
 # golangci-lint. Runs at 03:17 to dodge top-of-hour CI bursts.
 CRON_PATH=/etc/cron.d/gitea-runner-refresh
 REFRESH_SCRIPT=/usr/local/sbin/gitea-runner-refresh
 log "writing $REFRESH_SCRIPT and $CRON_PATH"
 # Materialise the actions list into the script so the cron is
 # self-contained and surviving an edit to this file.
 ACTIONS_LITERAL=""
 for s in "${ACTIONS[@]}"; do
  ACTIONS_LITERAL="${ACTIONS_LITERAL}  \"$s\"\n"
 done
 cat >"$REFRESH_SCRIPT" <<EOF
 #!/usr/bin/env bash
 # Auto-generated by provision-gitea-runner.sh. Re-running the
 # provisioning script regenerates this file.
 set -euo pipefail
 CACHE_BASE="$CACHE_BASE"
 RUNNER_IMAGE="$RUNNER_IMAGE"
 ACTIONS=(
 $(printf '  "%s"\n' "${ACTIONS[@]}")
 )
 sha256_url() { printf '%s' "\$1" | sha256sum | awk '{print \$1}'; }
 # 1. Refresh the runner-images base.
 docker pull -q "\$RUNNER_IMAGE" >/dev/null
 # 2. Refresh action clones.
 for spec in "\${ACTIONS[@]}"; do
  [[ "\$spec" == *@* ]] || continue
  repo="\${spec%@*}"; ref="\${spec##*@}"
  url="https://github.com/\$repo"
  dir="\$CACHE_BASE/act-actions/\$(sha256_url "\$url")"
  if [[ -d "\$dir/.git" ]]; then
    git -C "\$dir" fetch --quiet --tags --prune origin || true
    git -C "\$dir" -c advice.detachedHead=false checkout --quiet "\$ref" 2>/dev/null \\
      || git -C "\$dir" -c advice.detachedHead=false checkout --quiet "origin/\$ref" || true
  fi
 done
 # 3. Refresh golangci-lint (latest v2.x). Tolerate transient
 #    GitHub-rate-limit failures — next night will retry.
 curl -fsSL https://raw.githubusercontent.com/golangci/golangci-lint/HEAD/install.sh \\
  | sh -s -- -b /usr/local/bin >/dev/null 2>&1 || true
 EOF
 chmod 0755 "$REFRESH_SCRIPT"
 cat >"$CRON_PATH" <<EOF
 # Auto-generated by provision-gitea-runner.sh. Refreshes the runner
 # image, action clones, and golangci-lint every night at 03:17.
 SHELL=/bin/bash
 PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
 17 3 * * * root $REFRESH_SCRIPT >> /var/log/gitea-runner-refresh.log 2>&1
 EOF
 chmod 0644 "$CRON_PATH"
 # ---------- 7. restart act_runner --------------------------------------
 log "restarting act_runner.service to pick up the new config"
 systemctl restart act_runner.service
 sleep 2
 systemctl is-active --quiet act_runner.service \
  || die "act_runner did not come back up — check 'journalctl -u act_runner -n 50'"
 # ---------- 8. container-create benchmark ------------------------------
 #
 # Reports cold + warm `docker run --rm <image> true` time. Sanity check
 # that overlay setup is fast on this host. Numbers > ~5s indicate a
 # slow filesystem or DNS issue worth investigating separately.
 log "benchmark: docker run --rm $RUNNER_IMAGE true"
 {
  printf '  cold (post-pull) : '
  /usr/bin/time -f '%e s' docker run --rm "$RUNNER_IMAGE" true 2>&1 | tail -1
  printf '  warm (immediate) : '
  /usr/bin/time -f '%e s' docker run --rm "$RUNNER_IMAGE" true 2>&1 | tail -1
 } || warn "benchmark failed — non-fatal"
 # ---------- done -------------------------------------------------------
 cat <<EOF
 \033[1;32m==> Provisioning complete\033[0m
 What changed on this host:
  * /etc/act_runner/config.yaml — force_pull off, container.options +
    valid_volumes set for the cache mounts.  Pristine copy preserved
    at ${ACT_RUNNER_CONFIG}.pre-provision.
  * $CACHE_BASE/{go-mod,go-build,act-actions} — persistent caches.
  * /usr/local/bin/golangci-lint — latest v2.x.
  * $REFRESH_SCRIPT and $CRON_PATH — nightly refresh @ 03:17.
  * Runner image pre-pulled.
 \033[1;33mNote on Go cache + setup-go:\033[0m if your workflow uses
 \`actions/setup-go\` with \`cache: true\`, the action will still tar/untar
 the cache via the Gitea cache backend on every job — partially
 defeating the persistent volume.  For full speed-up, drop \`cache: true\`
 from the workflow once the persistent volume is warm.  Per-project
 decision; this script doesn't touch workflows.
 EOF
@@ -166,14 +166,24 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days.
  - Header "version N · agent in sync / agent at vM" indicator preserved across all tabs (backed by `host_schedule_version` + `applied_schedule_version`).
  - Form validation re-renders with the operator's typed input intact (mirror P2-04's behaviour). Each save fires `pushScheduleSetAsync` so an online agent re-arms within seconds.
-### P2 redesign — Phase 5 (server-side maintenance ticker) — TODO
+### P2 redesign — Phase 5 (server-side maintenance ticker)
- [ ] **P2R-03** (M) `prune` command end-to-end. Restic wrapper (`restic.RunPrune`), agent dispatcher (`case api.JobPrune:`), wire envelope. **Admin-only credential**: a second `host_credentials` row keyed by `host_id` + `kind=admin` carries the non-append-only username/password; server pushes it via `config.update` only when dispatching a prune job, and the agent's secrets store keeps it in a separate slot from the everyday append-only creds. UI: prune row on the Repo page. Operator-triggered Run-now via `POST /hosts/{id}/repo/prune`. Cadence-driven dispatch lands in P2R-04.
+- [x] **P2R-03** (M) `prune` command end-to-end. Restic wrapper (`restic.RunPrune`), agent dispatcher (`case api.JobPrune:`), wire envelope. **Admin-only credential**: a second `host_credentials` row keyed by `host_id` + `kind=admin` carries the non-append-only username/password; server pushes it via `config.update` only when dispatching a prune job, and the agent's secrets store keeps it in a separate slot from the everyday append-only creds. UI: prune row on the Repo page. Operator-triggered Run-now via `POST /hosts/{id}/repo/prune`. Cadence-driven dispatch lands in P2R-04.
- [ ] **P2R-04** (M) `check` command end-to-end (`restic check --read-data-subset N%`). Wrapper + dispatcher + wire. UI: check row on the Repo page (with the subset % slider). Operator Run-now via `POST /hosts/{id}/repo/check`. Cadence-driven dispatch lands in P2R-05.
+- [x] **P2R-04** (M) `check` command end-to-end (`restic check --read-data-subset N%`). Wrapper + dispatcher + wire. UI: check row on the Repo page (with the subset % slider). Operator Run-now via `POST /hosts/{id}/repo/check`. Cadence-driven dispatch lands in P2R-05.
- [ ] **P2R-05** (S) `unlock` command end-to-end (`restic unlock`). Operator-only — no cadence. `POST /hosts/{id}/repo/unlock`. Repo page surfaces lock state from the most recent `check` (which warns about stale locks).
+- [x] **P2R-05** (S) `unlock` command end-to-end (`restic unlock`). Operator-only — no cadence. `POST /hosts/{id}/repo/unlock`. Repo page surfaces lock state from the most recent `check` (which warns about stale locks).
- [ ] **P2R-06** (M) Server-side maintenance ticker. Cron-style loop on the server reads `host_repo_maintenance` rows, dispatches `forget` / `prune` / `check` jobs against the right host on the configured cadence (last-run timestamps tracked per kind on the maintenance row). Independent of the agent's local cron — the agent's cron only handles backup schedules now. Skips offline hosts (queues to `pending_runs` instead — see P2R-08). Handles ticker restarts cleanly (no-op if a job of the same kind ran inside the cadence window).
+- [x] **P2R-06** (M) Server-side maintenance ticker. Cron-style loop on the server reads `host_repo_maintenance` rows, dispatches `forget` / `prune` / `check` jobs against the right host on the configured cadence (last-run timestamps tracked per kind on the maintenance row). Independent of the agent's local cron — the agent's cron only handles backup schedules now. Skips offline hosts (queues to `pending_runs` instead — see P2R-08). Handles ticker restarts cleanly (no-op if a job of the same kind ran inside the cadence window).
- [ ] **P2R-07** (S) Repo stats panel on the Repo page: size, dedup ratio, snapshot count, last-check timestamp + result, lock state, last-prune timestamp + bytes-freed. Backed by parsing `restic stats --json` output that the agent ships periodically (piggyback on the existing snapshots-report path).
+- [x] **P2R-07** (S) Repo stats panel on the Repo page: size, dedup ratio, snapshot count, last-check timestamp + result, lock state, last-prune timestamp + bytes-freed. Backed by parsing `restic stats --json` output that the agent ships periodically (piggyback on the existing snapshots-report path).
- [ ] **P2R-08** (M) Pending-runs queue worker. On agent reconnect, server drains `pending_runs` rows for that host and re-dispatches them in order. Bump backoff per `pending_run.attempt_count`; drop rows that have exceeded the source-group's `retry_max`. Audit-logged. Smoke-tested by stopping the agent, running maintenance ticker so cadence misses, restarting agent, watching the queue drain.
+- [x] **P2R-08** (M) Pending-runs queue worker. On agent reconnect, server drains `pending_runs` rows for that host and re-dispatches them in order. Bump backoff per `pending_run.attempt_count`; drop rows that have exceeded the source-group's `retry_max`. Audit-logged. Smoke-tested by stopping the agent, running maintenance ticker so cadence misses, restarting agent, watching the queue drain.
 ### P2 redesign — Phase 5 ✅
 - Restic-manager Phase 5 lands on branch `p2r-phase5-maintenance`:
  prune/check/unlock end-to-end (P2R-03/04/05); server-side
  maintenance ticker drives forget/prune/check on cadence (P2R-06);
  repo-stats panel surfaces size, lock state, last-check / last-prune
  (P2R-07); pending-runs queue worker drains scheduled-backup
  fires that raced an agent disconnect (P2R-08). See
  `docs/superpowers/plans/2026-05-03-p2-redesign-phase-5.md`.
 ### P2 redesign — Phase 6 (auto-init follow-up) — TODO
@@ -42,6 +42,54 @@
      </div>
    </form>
    {{/* ---------- Admin credentials (optional) ---------- */}}
    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">
      Admin credentials <span class="text-ink-fade normal-case">· prune-only · optional</span>
    </h2>
    <form method="post" action="/hosts/{{$host.ID}}/admin-credentials" class="panel rounded-[7px] p-5">
      {{if $page.AdminCredsError}}
        <div class="rounded-[6px] px-3.5 py-3 text-[13px] mb-4"
             style="border: 1px solid color-mix(in oklch, var(--bad), transparent 60%); background: color-mix(in oklch, var(--bad), transparent 92%);">
          {{$page.AdminCredsError}}
        </div>
      {{end}}
      {{if eq $page.SavedSection "admin_credentials"}}
        <div class="text-[12px] text-ok mb-3 mono">✓ saved</div>
      {{end}}
      <p class="text-[12.5px] text-ink-mid leading-[1.6] mb-4 max-w-[640px]">
        Only needed for rest-server repos that distinguish an append-only
        user (everyday backups) from a delete-capable user (prune /
        forget). For S3 / B2 / SFTP / local, leave this blank — the
        everyday repo credentials handle prune too.
      </p>
      <div class="grid grid-cols-2 gap-4">
        <div>
          <label class="field-label" for="admin_repo_url">Repo URL <span class="text-ink-fade">· usually same as above</span></label>
          <input id="admin_repo_url" name="repo_url" type="text" class="field mono" value="{{$page.AdminURL}}" />
        </div>
        <div>
          <label class="field-label" for="admin_repo_username">Username</label>
          <input id="admin_repo_username" name="repo_username" type="text" class="field mono" value="{{$page.AdminUsername}}" />
        </div>
        <div class="col-span-2">
          <label class="field-label" for="admin_repo_password">Password</label>
          <input id="admin_repo_password" name="repo_password" type="password" class="field mono"
                 placeholder="{{if $page.HasAdminPassword}}•••••••••••••••• · stored, leave blank to keep{{else}}— not yet set —{{end}}"
                 autocomplete="new-password" />
        </div>
      </div>
      <div class="mt-4 pt-4 border-t border-line-soft flex gap-2 items-center">
        <button type="submit" class="btn btn-primary">Save admin credentials</button>
        {{if $page.HasAdminPassword}}
          <button type="submit" form="admin-creds-clear" class="btn btn-secondary"
                  onclick="return confirm('Clear admin credentials? Prune jobs will be refused until you re-set them.');">Clear</button>
        {{end}}
      </div>
    </form>
    {{if $page.HasAdminPassword}}
      <form id="admin-creds-clear" method="post" action="/hosts/{{$host.ID}}/admin-credentials/delete"></form>
    {{end}}
    {{/* ---------- Bandwidth ---------- */}}
    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">Bandwidth · host-wide</h2>
    <form method="post" action="/hosts/{{$host.ID}}/repo/bandwidth" class="panel rounded-[7px] p-5">
@@ -138,6 +186,40 @@
      </div>
    </form>
    {{/* ---------- Run now · one-time ---------- */}}
    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">Run now · one-time</h2>
    <div class="panel rounded-[7px] p-5">
      <p class="text-[12.5px] text-ink-mid leading-[1.6] mb-4 max-w-[640px]">
        Operator-triggered. Output streams live to the job log. Cadence-driven runs land independently from the server-side ticker.
      </p>
      <div class="grid grid-cols-3 gap-3">
        <button type="button"
                hx-post="/hosts/{{$host.ID}}/repo/check"
                hx-swap="none"
                hx-confirm="Run check now ({{$m.CheckSubsetPct}}% data subset)?"
                class="btn btn-secondary"
                {{if not $page.Online}}disabled title="agent is offline"{{end}}>
          check
        </button>
        <button type="button"
                hx-post="/hosts/{{$host.ID}}/repo/prune"
                hx-swap="none"
                hx-confirm="Run prune now? Removes data not referenced by any snapshot — heavy operation."
                class="btn btn-secondary"
                {{if not $page.HasAdminPassword}}disabled title="set admin credentials first"{{else if not $page.Online}}disabled title="agent is offline"{{end}}>
          prune
        </button>
        <button type="button"
                hx-post="/hosts/{{$host.ID}}/repo/unlock"
                hx-swap="none"
                hx-confirm="Clear stale repo locks?"
                class="btn btn-secondary"
                {{if not $page.Online}}disabled title="agent is offline"{{end}}>
          unlock
        </button>
      </div>
    </div>
    {{/* ---------- Danger zone ---------- */}}
    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-bad mt-9 mb-3.5">Danger zone</h2>
    <div class="panel rounded-[7px] p-5"
@@ -179,6 +261,41 @@
      </div>
    </div>
    {{/* ---------- Repo health ---------- */}}
    {{if $page.StatsView}}
    {{$s := $page.StatsView}}
    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">Repo health</h2>
    <div class="panel rounded-[7px] p-5 text-[13px]">
      {{if $s.LockPresent}}
        <div class="rounded-[6px] px-3.5 py-3 text-[12.5px] mb-4"
             style="border: 1px solid color-mix(in oklch, var(--warn), transparent 60%); background: color-mix(in oklch, var(--warn), transparent 92%);">
          Stale lock detected on the most recent check. Run <span class="mono">unlock</span> above to clear it before the next backup.
        </div>
      {{end}}
      <dl class="grid grid-cols-2 gap-y-2 gap-x-4">
        {{if $s.HasTotalSize}}
          <dt class="text-ink-fade">Total size</dt>
          <dd class="mono text-right">{{bytes $s.TotalSizeBytes}}</dd>
        {{end}}
        {{if $s.HasRawSize}}
          <dt class="text-ink-fade">Raw size <span class="text-ink-fade text-[11px]">· pre-dedup</span></dt>
          <dd class="mono text-right">{{bytes $s.RawSizeBytes}}</dd>
        {{end}}
        {{if $s.HasLastCheck}}
          <dt class="text-ink-fade">Last check</dt>
          <dd class="mono text-right text-[12px]">
            {{$s.LastCheckAgo}}
            {{if $s.LastCheckStatus}} · <span class="{{if eq $s.LastCheckStatus "ok"}}text-ok{{else if eq $s.LastCheckStatus "errors_found"}}text-bad{{else}}text-ink-mid{{end}}">{{$s.LastCheckStatus}}</span>{{end}}
          </dd>
        {{end}}
        {{if $s.HasLastPrune}}
          <dt class="text-ink-fade">Last prune</dt>
          <dd class="mono text-right text-[12px]">{{$s.LastPruneAgo}}</dd>
        {{end}}
      </dl>
    </div>
    {{end}}
    {{if gt (len $page.GroupNames) 0}}
      <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">Snapshots by source</h2>
      <div class="panel rounded-[7px] p-4">
Author	SHA1	Message	Date
steve	d8dd21b5e0	test: write-then-rename script-bin helpers (avoid ETXTBSY under -race) CI / Build (windows/amd64) (pull_request) Successful in 18s Details CI / Build (linux/amd64) (pull_request) Successful in 19s Details CI / Lint (pull_request) Successful in 41s Details CI / Build (linux/arm64) (pull_request) Successful in 18s Details CI / Test (linux/amd64) (pull_request) Failing after 3m41s Details CI run #48 failed with: --- FAIL: TestRunInitShipsStartedAndFinished RunInit: ... fork/exec /tmp/.../restic: text file busy setupScript and setupScriptBin used os.WriteFile to write a shell script directly at the final path, then exec'd it. Under -race + many t.Parallel tests, a fork-from-another-goroutine could inherit the still-open writable fd from one of those WriteFile calls; the kernel returns ETXTBSY when the freshly-execed binary still has a writable fd anywhere on the system. Fix: write to "<path>.tmp", then os.Rename into place. The rename is a pure dirent op; by the time the final path exists, no process has a writable fd on its inode and exec is safe. -race + -count=5 on both runner packages now passes consistently.	2026-05-04 10:19:15 +01:00
steve	b054e7b987	api+agent: document protocol-version stability and forget back-compat decisions version.go: add a comment block explaining why Phase 5's wire changes (CommandRunPayload, ConfigUpdatePayload, RepoStatsPayload reshapes) did not bump CurrentProtocolVersion — lockstep deploy, no rolling-upgrade path, smoke env restage enforces it. Notes where a version bump to 2 would be required if a multi-version path is ever introduced. cmd/agent/main.go: document why the JobForget handler hard-errors on empty ForgetGroups rather than falling back to a single-policy form. The maintenance ticker is the only writer and always populates the field; the fallback was specced but skipped given lockstep deploy.	2026-05-04 10:19:15 +01:00
steve	99ef2b7a71	server: serialize DrainPending per host (avoid drain double-dispatch) Add a per-host drain mutex (drainLocks map guarded by drainLocksMu) on the Server struct. DrainPending acquires it with TryLock: if a drain is already in-flight for this host, the call returns immediately — the running drain will see every pending row. This prevents the on-hello goroutine and the 30s tick from both listing the same host's rows and dispatching them twice. Update three existing tests that called srv.DrainPending explicitly after the on-hello goroutine had already been spawned: replace the now-redundant direct call with a waitForPendingCount poll so they don't race the goroutine's mutex ownership. Add TestDrainPendingSerializesPerHost which fires 10 concurrent DrainPending goroutines against a 5-row queue and asserts exactly 5 job rows result.	2026-05-04 10:19:15 +01:00
steve	b8c9c50a93	store: LatestJobByKind includes in-flight jobs (avoid maintenance double-fire) Widen the SQL query to consider all statuses (queued, running, succeeded, failed, cancelled) rather than terminal-only. An in-flight prune that outlasts the 60s tick interval previously produced ErrNotFound, causing the ticker to anchor at now-24h and fire a second prune concurrently with the first. Update the doc comment and test: remove the "queued job filtered out" case, add assertions that a running job and a queued job are each returned as the latest.	2026-05-04 10:19:15 +01:00
steve	18cc90d54e	tasks: tick P2R-03 through P2R-08 done	2026-05-04 10:19:15 +01:00
steve	a1db4ce4f7	diag: phase 5 Playwright sweep screenshots	2026-05-04 10:19:15 +01:00
steve	99b88d08c9	server/ws: persist repo.stats into host_repo_stats	2026-05-04 10:19:15 +01:00
steve	1629dc7146	server: drainer abandons only on ErrNotFound, not transient errors GetSourceGroup errors in drainOne now gate on errors.Is(err, store.ErrNotFound) before calling abandonPending, mirroring the existing GetSchedule pattern. Transient errors (SQLITE_BUSY, context cancellation) now log a warning and return without deleting the row. Add regression test TestDrainPendingDropsRowsForGoneSourceGroup confirming the ErrNotFound path still abandons correctly. Also add a comment above the backoff-doubling loop explaining the progression.	2026-05-04 10:19:15 +01:00
steve	0c9ea75046	server: drainer uses dispatch-core to avoid duplicate pending_run enqueue Extract dispatchBackupForGroupCore (persist+marshal+send, no enqueue on failure) from dispatchBackupForGroup. drainOne now calls the core directly so a failed Send only bumps the existing pending_runs row via BumpPendingRunAttempt — not create a second row — stopping the geometric duplication on repeated drain failures. dispatchBackupForGroup (schedule.fire path) wraps the core and keeps its enqueue-on-failure behaviour unchanged. TestDrainPendingBumpsOnSendFailure strengthened: asserts exactly 1 row remains after a send failure (was tolerating >=1 duplicate rows).	2026-05-04 10:19:15 +01:00
steve	3e337dfb3c	server: drain pending_runs on tick + on agent reconnect Two trigger paths land here: - A 30s ticker in cmd/server calls Server.DrainAllDue(ctx). It walks pending_runs rows whose next_attempt_at <= now, dedupes by host, skips offline hosts, and per online host runs DrainPending. - onAgentHello spawns a background DrainPending(hostID). When a host comes back, every pending row for it is dispatchable now — due-ness becomes irrelevant once the wire is back. Each row's schedule + group are reloaded; ErrNotFound or disabled-schedule or gone-group abandons the row with a pending_run.abandoned audit. attempt >= retry_max also abandons. Otherwise dispatchBackupForGroup is invoked; success deletes the row, failure bumps attempt with exponential backoff capped at 30m.	2026-05-04 10:19:15 +01:00
steve	e64cf25c0e	server: enqueue pending_runs when scheduled-job dispatch fails When dispatchBackupForGroup's conn.Send errors, queue a pending_runs row (attempt=1, next_attempt_at = now + group.RetryBackoffSeconds) instead of silently dropping the fire. The orphaned queued job row is left behind for forensic visibility — the drainer will create a fresh job row on its retry. Also adds Store.ListPendingRunsForHost — the on-reconnect drain walks every row for the host, regardless of due-ness, since the host being back makes 'due' irrelevant.	2026-05-04 10:19:15 +01:00
steve	2794d5a821	server: fix stale RetentionPolicy comment + check Scan errors in maintenance test	2026-05-04 10:19:15 +01:00
steve	c47cc682e0	server: maintenance ticker drives forget/prune/check on cadence Wires a 60s server-side ticker to the pure-logic maintenance.Decide introduced in the previous commit. Decisions flow through a new DispatchMaintenance method on Server, which: - skips offline hosts (no pending_runs queueing — maintenance is not a backup, missed fires shouldn't pile up) - silently skips prune when admin creds aren't bound - pushes admin creds before prune, then dispatches with RequiresAdminCreds=true (same as operator-driven prune) - persists job rows with actor_kind="system" Reshapes the forget wire payload from a single RetentionPolicy to a ForgetGroups list (one tag + per-group keep- per source group). The agent walks the groups and runs `restic forget --tag <name> --keep-*` once per group. Dead-code removed: CommandRunPayload.RetentionPolicy, the old forget JSON-decode in cmd/agent, and the single-policy form of restic.RunForget.	2026-05-04 10:19:15 +01:00
steve	e7e11454a8	maintenance: pure-logic ticker decides forget/prune/check fires	2026-05-04 10:19:15 +01:00
steve	77a8590e3a	ui: hx-swap none on Run-now + truthful save banner + tailwind rebuild Add hx-swap="none" to the three Run-now buttons (check/prune/unlock) in host_repo.html to match the existing pattern on host_sources.html and host_schedules.html. Fix all-blank admin-credentials save to redirect without ?saved= query string so no false-positive banner is shown; strengthen the corresponding test to assert Location has no ?saved=. Rebuild CSS bundle via Tailwind to pick up max-w-[640px] JIT class.	2026-05-04 10:19:15 +01:00
steve	46ec123f95	ui: Slice E — admin creds form + run-now buttons + repo health panel - hostRepoPage gains AdminURL/AdminUsername/HasAdminPassword, Online, and StatsView (pre-dereferenced projection of host_repo_stats). - loadHostRepoPage loads the admin slot (tolerating ErrNotFound), hub.Connected, and stats (tolerating ErrNotFound). - renderRepoPage gains an adminErr parameter; all callers updated. - handleUIAdminCredentialsSave / handleUIAdminCredentialsDelete added (form-POST handlers mirroring the repo-creds pattern, with audit). - Routes /hosts/{id}/admin-credentials POST and /delete POST registered. - Template: Admin credentials form after Connection, Run-now HTMX buttons after Maintenance, Repo health stats panel in right rail. - Tests: 9 new tests covering rendering, disabled states, save/delete round-trips, audit rows, and idempotent delete.	2026-05-04 10:19:15 +01:00
steve	b35f1736f7	server: populate audit UserID on credential mutations + slog prune push errors Switch handleSetHostCredentials, handleSetAdminCredentials, and handleDeleteAdminCredentials from authedUser (bool) to requireUser (*store.User) so AuditEntry.UserID and Actor are populated correctly. Add slog.Warn on the non-ErrNotFound pushAdminCredsToAgent path in handleRunRepoPrune so decrypt/send failures surface in the server log rather than appearing as a generic host_offline 503.	2026-05-04 10:19:15 +01:00
steve	a8aff2c62b	server: cover HTMX auth-redirect path in repo-ops tests	2026-05-04 10:19:15 +01:00
steve	1ae567021a	server: HTTP run-now for prune / check / unlock Adds POST /api/hosts/{id}/repo/{prune,check,unlock} (and matching outer routes for HTMX form posts). Prune pushes the admin-cred slot via pushAdminCredsToAgent before dispatch and refuses with admin_creds_required when the slot is not set. Check reads check_subset_pct from host_repo_maintenance (overridable via ?subset=N, clamped 0-100; non-numeric override falls back to DB value silently). Unlock needs no admin creds. All three share the same wantsHTML/HX-Redirect response split as the per-source-group run-now endpoint.	2026-05-04 10:19:15 +01:00
steve	81a00202d0	server: admin-credentials REST + Slot:admin push helper Adds GET/PUT/DELETE /api/hosts/{id}/admin-credentials handlers that mirror the existing repo-credentials endpoints but write to store.CredKindAdmin with AEAD additional-data "host:<id>:admin" (scoped away from the repo slot to prevent cross-binding). PUT immediately pushes a config.update(Slot:"admin") to the agent when it is connected, and the new pushAdminCredsToAgent helper is wired for use by the upcoming prune run-now endpoint (D2) to push on-demand before dispatch.	2026-05-04 10:19:15 +01:00
steve	dafae84149	agent: secrets fail-loud on corrupt blob + small polish Save and SaveAdmin now propagate loadBundle errors instead of silently overwriting a corrupt file (data-loss fix). Tests added for both paths. reportStats logs a Debug on RunStats failure; r in runJob gets a comment explaining the prune-runner asymmetry; runner_test comment tightened.	2026-05-04 10:19:15 +01:00
steve	d3c354cd97	agent/runner: ship repo.stats before job.finished in RunCheck/RunUnlock RunCheck and RunUnlock were calling sendFinished before reportStats, inverting the required job.started → log.stream → repo.stats → job.finished envelope order. Move reportStats ahead of sendFinished in both functions to match the pattern already correct in RunPrune. Strengthen TestRunCheckShipsCheckStatus, TestRunCheckErrorsFoundShipsErrorsStatus, and TestRunUnlockClearsLock with the same position-index ordering assertions used by TestRunPruneShipsExpectedEnvelopes; these assertions would have failed against the pre-fix code.	2026-05-04 10:19:15 +01:00
steve	1f600fa849	agent: RunPrune/RunCheck/RunUnlock + reportStats + admin-cred slot dispatch Extract resticEnv/sendStarted/streamHandler/sendFinished helpers to remove boilerplate duplication across Run* methods. Add RunPrune (ships repo.stats with LastPruneAt before job.finished), RunCheck (ships stats with LastCheckStatus/LockPresent regardless of outcome), RunUnlock (ships LockPresent=false on success), and reportStats (fills size fields via RunStats when caller didn't populate them). Wire JobPrune/JobCheck/JobUnlock into the dispatcher switch; teach MsgConfigUpdate about the Slot discriminator for admin vs repo creds; add strconv import for subset-pct parsing.	2026-05-04 10:19:15 +01:00
steve	212fd3e400	agent/secrets: separate admin slot with backwards-compatible decode Split the on-disk bundle into repo + admin slots. Legacy flat Repo blobs are detected at load time by the presence of "repo_url" at the top level and transparently promoted into the new shape on the next Save/SaveAdmin. Adds ErrNoAdmin sentinel, LoadAdmin, SaveAdmin, and three new tests.	2026-05-04 10:19:15 +01:00
steve	c9be9040d9	api: stats partial-update payload + ConfigUpdate.Slot + CommandRun.RequiresAdminCreds Reshape RepoStatsPayload into pointer-field partial-update form matching store.HostRepoStats semantics; add Slot discriminator to ConfigUpdatePayload for admin vs repo credential routing; add RequiresAdminCreds flag to CommandRunPayload for prune/unlock jobs that need delete authority.	2026-05-04 10:19:15 +01:00
steve	7fd29427a0	restic: tighten RunCheck lock sniff + RunStats zero-snapshot test Narrow the LockPresent predicate from bare "locked" (too broad) to "stale lock" and "already locked" — the two phrases restic actually emits. Replace TestRunCheckParsesLock with table-driven TestRunCheckLockSniff covering both trigger phrases and a benign "locked-file" line that must not set LockPresent. Add TestRunStatsZeroSnapshots to pin that RunStats accepts zero-snapshot JSON without error.	2026-05-04 10:19:15 +01:00
steve	49fd3f4441	restic: RunUnlock + RunStats (raw-data mode) Add RunUnlock (delegates straight to runWithPump) and RunStats which runs `restic stats --json --mode raw-data`, captures the single JSON line from stdout into RepoStats, and returns an error if no JSON arrives. Tests cover arg plumbing for unlock, JSON parsing, and the no-JSON error path.	2026-05-04 10:19:15 +01:00
steve	f3eaf511be	restic: RunCheck with subset% + lock-state sniffing Add CheckResult (LockPresent, ErrorsFound) and RunCheck. subsetPct>0 passes --read-data-subset N% to limit data reads. Stderr is sniffed for "Found stale lock"/"locked" to set LockPresent; a non-zero exit from restic is absorbed as ErrorsFound=true rather than an error so the caller can always persist last_check_status. Tests cover lock detection, exit-1 absorption, and subset-arg plumbing.	2026-05-04 10:19:15 +01:00
steve	2caf7f1193	restic: RunPrune + runWithPump helper, refactor Forget/Init onto it Add RunPrune for admin-credential prune invocations. Extract runWithPump to DRY the stdout+stderr pump pattern; refactor RunForget and RunInit to delegate to it (RunInit preserves the "config file already exists" soft-success sniff by wrapping the handler before the call). Add runner_test.go with TestRunPruneInvokesPrune.	2026-05-04 10:19:15 +01:00
steve	4ad0b5147a	store: tighten CHECK constraint on host_repo_stats.last_check_status	2026-05-04 10:19:15 +01:00
steve	f97f67eb67	store: wrap UpsertHostRepoStats in a transaction (concurrency safety)	2026-05-04 10:19:15 +01:00
steve	bc77081366	store: assert CHECK constraint on host_credentials.kind	2026-05-04 10:19:15 +01:00
steve	87655cf0e4	store: HostRepoStats projection (size, lock, last-check, last-prune)	2026-05-04 10:19:15 +01:00
steve	de6d51eeb1	store: host_credentials becomes kind-aware (repo + admin slots)	2026-05-04 10:19:15 +01:00
steve	212ddfe226	store: migration 0009 — admin-creds kind + host_repo_stats	2026-05-04 10:19:15 +01:00
steve	b640775a61	plan: P2 redesign Phase 5 (P2R-03..P2R-08)	2026-05-04 10:19:15 +01:00
steve	13f58537ad	infra: remove provision-gitea-runner.sh (now lives with the infra team) The runner-provisioning script has been handed off to the infra agent, who will own it going forward. ci.yml's header comment is updated to point at "the infra team owns the script" rather than the in-repo path, but the runner expectations themselves stay the same — workflows still rely on the persistent volumes, pre-cloned actions, and host-installed golangci-lint that any compliant provisioning produces.	2026-05-04 10:19:09 +01:00