Merge pull request 'P2 redesign Phase 5 — prune/check/unlock + maintenance ticker + repo stats + pending-runs queue' (#3) from p2r-phase5-maintenance into main

Reviewed-on: #3
2026-05-04 09:25:00 +00:00
parent e73c4bd96c e850f6f44c
commit 609aaf741d
51 changed files with 6247 additions and 328 deletions
@@ -2,13 +2,13 @@ package main

 import (
 	"context"
-	"encoding/json"
 	"errors"
 	"flag"
 	"fmt"
 	"log/slog"
 	"os"
 	"os/signal"
+	"strconv"
 	"syscall"
 	"time"

@@ -199,32 +199,68 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
 	case api.MsgConfigUpdate:
 		var p api.ConfigUpdatePayload
 		_ = env.UnmarshalPayload(&p)
-		// Merge with whatever's already in secrets.enc — empty fields
-		// in the push mean "leave alone." Atomic write underneath.
-		cur, err := d.secrets.Load()
-		if err != nil {
-			slog.Error("ws agent: load secrets for merge", "err", err)
-			return nil
+		slot := p.Slot
+		if slot == "" {
+			slot = "repo"
 		}
-		changed := false
-		if p.RepoURL != "" && p.RepoURL != cur.URL {
-			cur.URL = p.RepoURL
-			changed = true
-		}
-		if p.RepoUsername != "" && p.RepoUsername != cur.Username {
-			cur.Username = p.RepoUsername
-			changed = true
-		}
-		if p.RepoPassword != "" && p.RepoPassword != cur.Password {
-			cur.Password = p.RepoPassword
-			changed = true
-		}
-		if changed {
-			if err := d.secrets.Save(cur); err != nil {
-				slog.Error("ws agent: persist secrets", "err", err)
+		switch slot {
+		case "repo":
+			// Merge with whatever's already in secrets.enc — empty fields
+			// in the push mean "leave alone." Atomic write underneath.
+			cur, err := d.secrets.Load()
+			if err != nil {
+				slog.Error("ws agent: load secrets for merge", "err", err)
 				return nil
 			}
-			slog.Info("ws agent: repo credentials updated via config.update")
+			changed := false
+			if p.RepoURL != "" && p.RepoURL != cur.URL {
+				cur.URL = p.RepoURL
+				changed = true
+			}
+			if p.RepoUsername != "" && p.RepoUsername != cur.Username {
+				cur.Username = p.RepoUsername
+				changed = true
+			}
+			if p.RepoPassword != "" && p.RepoPassword != cur.Password {
+				cur.Password = p.RepoPassword
+				changed = true
+			}
+			if changed {
+				if err := d.secrets.Save(cur); err != nil {
+					slog.Error("ws agent: persist secrets", "err", err)
+					return nil
+				}
+				slog.Info("ws agent: repo credentials updated via config.update")
+			}
+		case "admin":
+			cur, err := d.secrets.LoadAdmin()
+			if err != nil && !errors.Is(err, secrets.ErrNoAdmin) {
+				slog.Error("ws agent: load admin secrets", "err", err)
+				return nil
+			}
+			// ErrNoAdmin is not an error here — we are creating the slot.
+			changed := false
+			if p.RepoURL != "" && p.RepoURL != cur.URL {
+				cur.URL = p.RepoURL
+				changed = true
+			}
+			if p.RepoUsername != "" && p.RepoUsername != cur.Username {
+				cur.Username = p.RepoUsername
+				changed = true
+			}
+			if p.RepoPassword != "" && p.RepoPassword != cur.Password {
+				cur.Password = p.RepoPassword
+				changed = true
+			}
+			if changed {
+				if err := d.secrets.SaveAdmin(cur); err != nil {
+					slog.Error("ws agent: persist admin secrets", "err", err)
+					return nil
+				}
+				slog.Info("ws agent: admin credentials updated via config.update")
+			}
+		default:
+			slog.Warn("ws agent: unknown config.update slot, ignoring", "slot", p.Slot)
 		}

 	case api.MsgAgentUpdateAvail:
@@ -251,6 +287,14 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
 	if creds.Empty() {
 		return fmt.Errorf("repo credentials not configured (waiting for server config.update push)")
 	}
+	// r is the everyday runner — bound to the host's repo
+	// (append-only) credentials. Reused by every kind except
+	// JobPrune, which builds its own runner against the
+	// admin-credentials slot when p.RequiresAdminCreds is set
+	// (admin creds are not loaded for any other kind, so they're
+	// not on r). If you find yourself adding a new JobKind that
+	// needs delete authority, mirror the JobPrune pattern below
+	// — don't try to overload r.
 	r := runner.New(runner.Config{
 		ResticBin:    d.resticBin,
 		RepoURL:      creds.URL,
@@ -291,33 +335,81 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
 			slog.Info("agent: init job complete", "job_id", p.JobID)
 		}()
 	case api.JobForget:
-		var policy restic.ForgetPolicy
-		if len(p.RetentionPolicy) > 0 {
-			var raw struct {
-				KeepLast    *int `json:"keep_last,omitempty"`
-				KeepHourly  *int `json:"keep_hourly,omitempty"`
-				KeepDaily   *int `json:"keep_daily,omitempty"`
-				KeepWeekly  *int `json:"keep_weekly,omitempty"`
-				KeepMonthly *int `json:"keep_monthly,omitempty"`
-				KeepYearly  *int `json:"keep_yearly,omitempty"`
-			}
-			if err := json.Unmarshal(p.RetentionPolicy, &raw); err != nil {
-				return fmt.Errorf("forget: decode retention_policy: %w", err)
-			}
-			policy = restic.ForgetPolicy{
-				KeepLast: raw.KeepLast, KeepHourly: raw.KeepHourly,
-				KeepDaily: raw.KeepDaily, KeepWeekly: raw.KeepWeekly,
-				KeepMonthly: raw.KeepMonthly, KeepYearly: raw.KeepYearly,
-			}
+		if len(p.ForgetGroups) == 0 {
+			// Hard-error rather than fall back to a single-policy form:
+			// the server-side dispatch path (maintenance ticker) is the
+			// only writer of forget command.run today, and it always
+			// populates ForgetGroups. A backwards-compatible single-
+			// policy fallback was specced but skipped — see the
+			// Phase 5 plan rationale and version.go's lockstep-deploy
+			// note for why.
+			return fmt.Errorf("forget: command.run carried no forget_groups (server didn't populate them)")
 		}
-		slog.Info("agent: accepting forget job", "job_id", p.JobID, "policy", p.RetentionPolicy)
+		groups := make([]restic.ForgetGroup, 0, len(p.ForgetGroups))
+		for _, g := range p.ForgetGroups {
+			groups = append(groups, restic.ForgetGroup{
+				Tag: g.Tag,
+				Policy: restic.ForgetPolicy{
+					KeepLast:    g.Policy.KeepLast,
+					KeepHourly:  g.Policy.KeepHourly,
+					KeepDaily:   g.Policy.KeepDaily,
+					KeepWeekly:  g.Policy.KeepWeekly,
+					KeepMonthly: g.Policy.KeepMonthly,
+					KeepYearly:  g.Policy.KeepYearly,
+				},
+			})
+		}
+		slog.Info("agent: accepting forget job", "job_id", p.JobID, "groups", len(groups))
 		go func() {
-			if err := r.RunForget(ctx, p.JobID, policy); err != nil {
+			if err := r.RunForget(ctx, p.JobID, groups); err != nil {
 				slog.Warn("agent: forget job failed", "job_id", p.JobID, "err", err)
 				return
 			}
 			slog.Info("agent: forget job complete", "job_id", p.JobID)
 		}()
+	case api.JobPrune:
+		// Prune may require admin creds (delete authority on rest-server).
+		runCreds := creds
+		if p.RequiresAdminCreds {
+			ac, err := d.secrets.LoadAdmin()
+			if err != nil {
+				return fmt.Errorf("prune: admin creds not configured (server didn't push them): %w", err)
+			}
+			if ac.Empty() {
+				return fmt.Errorf("prune: admin creds incomplete")
+			}
+			runCreds = ac
+		}
+		prr := runner.New(runner.Config{
+			ResticBin:    d.resticBin,
+			RepoURL:      runCreds.URL,
+			RepoUsername: runCreds.Username,
+			RepoPassword: runCreds.Password,
+		}, tx, time.Second)
+		slog.Info("agent: accepting prune job", "job_id", p.JobID, "admin_creds", p.RequiresAdminCreds)
+		go func() {
+			if err := prr.RunPrune(ctx, p.JobID); err != nil {
+				slog.Warn("agent: prune job failed", "job_id", p.JobID, "err", err)
+			}
+		}()
+	case api.JobCheck:
+		subset := 0
+		if len(p.Args) > 0 {
+			subset, _ = strconv.Atoi(p.Args[0])
+		}
+		slog.Info("agent: accepting check job", "job_id", p.JobID, "subset_pct", subset)
+		go func() {
+			if err := r.RunCheck(ctx, p.JobID, subset); err != nil {
+				slog.Warn("agent: check job failed", "job_id", p.JobID, "err", err)
+			}
+		}()
+	case api.JobUnlock:
+		slog.Info("agent: accepting unlock job", "job_id", p.JobID)
+		go func() {
+			if err := r.RunUnlock(ctx, p.JobID); err != nil {
+				slog.Warn("agent: unlock job failed", "job_id", p.JobID, "err", err)
+			}
+		}()
 	default:
 		return fmt.Errorf("kind %q not implemented yet (Phase 2 lands the rest)", p.Kind)
 	}
@@ -16,6 +16,7 @@ import (
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
 	rmhttp "gitea.dcglab.co.uk/steve/restic-manager/internal/server/http"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/maintenance"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
@@ -139,6 +140,23 @@ func run() error {
 	defer purgeTick.Stop()
 	offlineTick := time.NewTicker(30 * time.Second)
 	defer offlineTick.Stop()
+	// Maintenance ticker: drives forget/prune/check on the cadences
+	// operators set per-host. Independent of the agent's local cron
+	// (which only handles backup schedules). 60s cadence — the cron
+	// expressions are minute-grained, so anything finer is wasted
+	// work.
+	maintenanceTick := time.NewTicker(60 * time.Second)
+	defer maintenanceTick.Stop()
+	// Pending-runs drain ticker: 30s cadence sweeps every host with
+	// pending_runs rows whose next_attempt_at <= now (rows accumulate
+	// when a schedule.fire's command.run send fails because the agent
+	// dropped offline mid-flight). The on-reconnect path in
+	// onAgentHello handles the common case; this ticker is the
+	// safety-net for hosts that come back without a fresh hello (they
+	// shouldn't, but the queue exists either way).
+	pendingDrainTick := time.NewTicker(30 * time.Second)
+	defer pendingDrainTick.Stop()
+	mt := maintenance.New(st)
 	go func() {
 		for {
 			select {
@@ -156,6 +174,18 @@ func run() error {
 				if n, err := st.MarkHostsOfflineStale(ctx, cutoff); err == nil && n > 0 {
 					slog.Info("marked hosts offline (stale heartbeat)", "n", n)
 				}
+			case <-pendingDrainTick.C:
+				srv.DrainAllDue(ctx)
+			case <-maintenanceTick.C:
+				decisions, err := mt.Decide(ctx, time.Now().UTC())
+				if err != nil {
+					slog.Warn("maintenance ticker: decide", "err", err)
+					continue
+				}
+				if len(decisions) > 0 {
+					slog.Info("maintenance ticker: dispatching", "n", len(decisions))
+					srv.DispatchMaintenance(ctx, decisions)
+				}
 			}
 		}
 	}()
@@ -51,24 +51,70 @@ func New(cfg Config, tx Sender, progressMinPeriod time.Duration) *Runner {
 	return &Runner{cfg: cfg, tx: tx, progressMinPeriod: progressMinPeriod}
 }

-// RunBackup executes a backup job and reports back via the sender.
-// Returns nil on a clean (or "incomplete-but-snapshot-created") finish.
-func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, tags []string) error {
-	startedAt := time.Now().UTC()
-
-	startEnv, _ := api.Marshal(api.MsgJobStarted, jobID, api.JobStartedPayload{
-		JobID: jobID, Kind: api.JobBackup, StartedAt: startedAt,
-	})
-	if err := r.tx.Send(startEnv); err != nil {
-		slog.Warn("runner: send job.started", "err", err)
-	}
-
-	env := restic.Env{
+// resticEnv builds the shared restic.Env from r.cfg.
+func (r *Runner) resticEnv() restic.Env {
+	return restic.Env{
 		Bin:          r.cfg.ResticBin,
 		RepoURL:      r.cfg.RepoURL,
 		RepoUsername: r.cfg.RepoUsername,
 		RepoPassword: r.cfg.RepoPassword,
 	}
+}
+
+// sendStarted ships a job.started envelope.
+func (r *Runner) sendStarted(jobID string, kind api.JobKind, startedAt time.Time) {
+	env, _ := api.Marshal(api.MsgJobStarted, jobID, api.JobStartedPayload{
+		JobID: jobID, Kind: kind, StartedAt: startedAt,
+	})
+	if err := r.tx.Send(env); err != nil {
+		slog.Warn("runner: send job.started", "job_id", jobID, "kind", kind, "err", err)
+	}
+}
+
+// streamHandler returns a LineHandler that ships log.stream envelopes.
+func (r *Runner) streamHandler(jobID string, seq *atomic.Int64) restic.LineHandler {
+	return func(stream string, line string, _ any) {
+		now := time.Now().UTC()
+		logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
+			JobID:   jobID,
+			Seq:     seq.Add(1),
+			TS:      now,
+			Stream:  api.LogStream(stream),
+			Payload: line,
+		})
+		_ = r.tx.Send(logEnv)
+	}
+}
+
+// sendFinished ships a job.finished envelope. err==nil → succeeded;
+// otherwise failed. statsBlob is forwarded as JobFinishedPayload.Stats.
+func (r *Runner) sendFinished(jobID string, finishedAt time.Time, err error, statsBlob json.RawMessage) {
+	status := api.JobSucceeded
+	exit := 0
+	errMsg := ""
+	if err != nil {
+		status = api.JobFailed
+		exit = -1
+		errMsg = err.Error()
+	}
+	finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
+		JobID:      jobID,
+		Status:     status,
+		ExitCode:   exit,
+		FinishedAt: finishedAt,
+		Stats:      statsBlob,
+		Error:      errMsg,
+	})
+	_ = r.tx.Send(finEnv)
+}
+
+// RunBackup executes a backup job and reports back via the sender.
+// Returns nil on a clean (or "incomplete-but-snapshot-created") finish.
+func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, tags []string) error {
+	startedAt := time.Now().UTC()
+	r.sendStarted(jobID, api.JobBackup, startedAt)
+
+	env := r.resticEnv()

 	var seq atomic.Int64
 	lastProgress := time.Now()
@@ -115,27 +161,11 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
 	summary, err := env.RunBackup(ctx, paths, excludes, tags, handle)
 	finishedAt := time.Now().UTC()

-	status := api.JobSucceeded
-	exit := 0
-	errMsg := ""
-	if err != nil {
-		status = api.JobFailed
-		exit = -1
-		errMsg = err.Error()
-	}
 	var statsBlob json.RawMessage
 	if summary != nil {
 		statsBlob, _ = json.Marshal(summary)
 	}
-	finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
-		JobID:      jobID,
-		Status:     status,
-		ExitCode:   exit,
-		FinishedAt: finishedAt,
-		Stats:      statsBlob,
-		Error:      errMsg,
-	})
-	_ = r.tx.Send(finEnv)
+	r.sendFinished(jobID, finishedAt, err, statsBlob)

 	// On a successful backup, refresh the server's snapshot projection.
 	// We do this *after* job.finished so the UI sees the job land first;
@@ -147,6 +177,9 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
 		if rerr := r.reportSnapshots(ctx, env); rerr != nil {
 			slog.Warn("runner: snapshots.report failed", "job_id", jobID, "err", rerr)
 		}
+		if rerr := r.reportStats(ctx, env, api.RepoStatsPayload{}); rerr != nil {
+			slog.Warn("runner: stats.report after backup failed", "job_id", jobID, "err", rerr)
+		}
 	}

 	if err != nil {
@@ -160,111 +193,35 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
 // browser-side log viewer just works.
 func (r *Runner) RunInit(ctx context.Context, jobID string) error {
 	startedAt := time.Now().UTC()
-	startEnv, _ := api.Marshal(api.MsgJobStarted, jobID, api.JobStartedPayload{
-		JobID: jobID, Kind: api.JobInit, StartedAt: startedAt,
-	})
-	if err := r.tx.Send(startEnv); err != nil {
-		slog.Warn("runner: send job.started (init)", "err", err)
-	}
-
-	env := restic.Env{
-		Bin:          r.cfg.ResticBin,
-		RepoURL:      r.cfg.RepoURL,
-		RepoUsername: r.cfg.RepoUsername,
-		RepoPassword: r.cfg.RepoPassword,
-	}
+	r.sendStarted(jobID, api.JobInit, startedAt)

+	env := r.resticEnv()
 	var seq atomic.Int64
-	handle := func(stream string, line string, _ any) {
-		now := time.Now().UTC()
-		logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
-			JobID:   jobID,
-			Seq:     seq.Add(1),
-			TS:      now,
-			Stream:  api.LogStream(stream),
-			Payload: line,
-		})
-		_ = r.tx.Send(logEnv)
-	}
-
-	err := env.RunInit(ctx, handle)
+	err := env.RunInit(ctx, r.streamHandler(jobID, &seq))
 	finishedAt := time.Now().UTC()
-
-	status := api.JobSucceeded
-	exit := 0
-	errMsg := ""
-	if err != nil {
-		status = api.JobFailed
-		exit = -1
-		errMsg = err.Error()
-	}
-	finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
-		JobID:      jobID,
-		Status:     status,
-		ExitCode:   exit,
-		FinishedAt: finishedAt,
-		Error:      errMsg,
-	})
-	_ = r.tx.Send(finEnv)
+	r.sendFinished(jobID, finishedAt, err, nil)
 	if err != nil {
 		return fmt.Errorf("runner init: %w", err)
 	}
 	return nil
 }

-// RunForget executes a forget job against the configured repo with
-// the given retention policy. Same envelope shape as RunBackup so
-// the live log viewer + job lifecycle work without special-casing.
-// On success refreshes the snapshot projection (forget rewrites the
-// snapshot index — the host's snapshot list shrinks).
-func (r *Runner) RunForget(ctx context.Context, jobID string, policy restic.ForgetPolicy) error {
+// RunForget executes a forget job against the configured repo by
+// invoking `restic forget --tag <Tag> --keep-* …` once per group.
+// Same envelope shape as RunBackup so the live log viewer + job
+// lifecycle work without special-casing. On success refreshes the
+// snapshot projection (forget rewrites the snapshot index — the
+// host's snapshot list shrinks). Snapshot refresh runs once after
+// every group completes, not per-group.
+func (r *Runner) RunForget(ctx context.Context, jobID string, groups []restic.ForgetGroup) error {
 	startedAt := time.Now().UTC()
-	startEnv, _ := api.Marshal(api.MsgJobStarted, jobID, api.JobStartedPayload{
-		JobID: jobID, Kind: api.JobForget, StartedAt: startedAt,
-	})
-	if err := r.tx.Send(startEnv); err != nil {
-		slog.Warn("runner: send job.started (forget)", "err", err)
-	}
-
-	env := restic.Env{
-		Bin:          r.cfg.ResticBin,
-		RepoURL:      r.cfg.RepoURL,
-		RepoUsername: r.cfg.RepoUsername,
-		RepoPassword: r.cfg.RepoPassword,
-	}
+	r.sendStarted(jobID, api.JobForget, startedAt)

+	env := r.resticEnv()
 	var seq atomic.Int64
-	handle := func(stream string, line string, _ any) {
-		now := time.Now().UTC()
-		logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
-			JobID:   jobID,
-			Seq:     seq.Add(1),
-			TS:      now,
-			Stream:  api.LogStream(stream),
-			Payload: line,
-		})
-		_ = r.tx.Send(logEnv)
-	}
-
-	err := env.RunForget(ctx, policy, handle)
+	err := env.RunForget(ctx, groups, r.streamHandler(jobID, &seq))
 	finishedAt := time.Now().UTC()
-
-	status := api.JobSucceeded
-	exit := 0
-	errMsg := ""
-	if err != nil {
-		status = api.JobFailed
-		exit = -1
-		errMsg = err.Error()
-	}
-	finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
-		JobID:      jobID,
-		Status:     status,
-		ExitCode:   exit,
-		FinishedAt: finishedAt,
-		Error:      errMsg,
-	})
-	_ = r.tx.Send(finEnv)
+	r.sendFinished(jobID, finishedAt, err, nil)

 	// Refresh the server's snapshot projection — forget rewrites the
 	// index so the host's snapshot list almost certainly shrunk.
@@ -281,6 +238,129 @@ func (r *Runner) RunForget(ctx context.Context, jobID string, policy restic.Forg
 	return nil
 }

+// RunPrune executes a prune job against the configured repo. On
+// success it ships a repo.stats envelope with LastPruneAt set (plus
+// a full size refresh via RunStats) before the job.finished envelope,
+// so the UI can display updated size information alongside the
+// completed job. On failure no stats refresh is attempted.
+func (r *Runner) RunPrune(ctx context.Context, jobID string) error {
+	startedAt := time.Now().UTC()
+	r.sendStarted(jobID, api.JobPrune, startedAt)
+
+	env := r.resticEnv()
+	var seq atomic.Int64
+	err := env.RunPrune(ctx, r.streamHandler(jobID, &seq))
+	finishedAt := time.Now().UTC()
+
+	if err == nil {
+		pruneAt := finishedAt
+		if rerr := r.reportStats(ctx, env, api.RepoStatsPayload{LastPruneAt: &pruneAt}); rerr != nil {
+			slog.Warn("runner: stats.report after prune failed", "job_id", jobID, "err", rerr)
+		}
+	}
+
+	r.sendFinished(jobID, finishedAt, err, nil)
+
+	if err != nil {
+		return fmt.Errorf("runner prune: %w", err)
+	}
+	return nil
+}
+
+// RunCheck executes a `restic check` job. Always ships a repo.stats
+// envelope (success or failure) with LastCheckAt, LastCheckStatus,
+// and LockPresent populated from the check result.
+func (r *Runner) RunCheck(ctx context.Context, jobID string, subsetPct int) error {
+	startedAt := time.Now().UTC()
+	r.sendStarted(jobID, api.JobCheck, startedAt)
+
+	env := r.resticEnv()
+	var seq atomic.Int64
+	res, err := env.RunCheck(ctx, subsetPct, r.streamHandler(jobID, &seq))
+	finishedAt := time.Now().UTC()
+
+	// Determine check status string.
+	checkStatus := "ok"
+	if err != nil {
+		checkStatus = "failed"
+	} else if res.ErrorsFound {
+		checkStatus = "errors_found"
+	}
+
+	lockPresent := res.LockPresent
+	now := finishedAt
+	patch := api.RepoStatsPayload{
+		LastCheckAt:     &now,
+		LastCheckStatus: checkStatus,
+		LockPresent:     &lockPresent,
+	}
+	if rerr := r.reportStats(ctx, env, patch); rerr != nil {
+		slog.Warn("runner: stats.report after check failed", "job_id", jobID, "err", rerr)
+	}
+
+	r.sendFinished(jobID, finishedAt, err, nil)
+
+	if err != nil {
+		return fmt.Errorf("runner check: %w", err)
+	}
+	return nil
+}
+
+// RunUnlock executes a `restic unlock` job. On success it ships a
+// repo.stats envelope with LockPresent=false so the UI banner clears.
+func (r *Runner) RunUnlock(ctx context.Context, jobID string) error {
+	startedAt := time.Now().UTC()
+	r.sendStarted(jobID, api.JobUnlock, startedAt)
+
+	env := r.resticEnv()
+	var seq atomic.Int64
+	err := env.RunUnlock(ctx, r.streamHandler(jobID, &seq))
+	finishedAt := time.Now().UTC()
+
+	if err == nil {
+		lockFalse := false
+		patch := api.RepoStatsPayload{LockPresent: &lockFalse}
+		if rerr := r.reportStats(ctx, env, patch); rerr != nil {
+			slog.Warn("runner: stats.report after unlock failed", "job_id", jobID, "err", rerr)
+		}
+	}
+
+	r.sendFinished(jobID, finishedAt, err, nil)
+
+	if err != nil {
+		return fmt.Errorf("runner unlock: %w", err)
+	}
+	return nil
+}
+
+// reportStats ships a repo.stats envelope. If the patch doesn't
+// already include size fields, fills them in by invoking env.RunStats.
+// Errors from RunStats are non-fatal — the patch is shipped anyway
+// with whatever the caller did populate.
+func (r *Runner) reportStats(ctx context.Context, env restic.Env, patch api.RepoStatsPayload) error {
+	listCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
+	defer cancel()
+	if patch.TotalSizeBytes == nil {
+		if s, err := env.RunStats(listCtx, nil); err == nil {
+			total := s.TotalSize
+			raw := s.TotalUncompressed
+			files := s.TotalFileCount
+			snaps := s.SnapshotsCount
+			patch.TotalSizeBytes = &total
+			patch.RawSizeBytes = &raw
+			patch.UniqueFiles = &files
+			patch.SnapshotCount = &snaps
+		} else {
+			slog.Debug("runner: stats refresh failed (non-fatal)", "err", err)
+		}
+	}
+	envOut, err := api.Marshal(api.MsgRepoStats, "", patch)
+	if err != nil {
+		return err
+	}
+	return r.tx.Send(envOut)
+}
+
 // reportSnapshots calls `restic snapshots --json`, translates the
 // payload into the wire shape, and ships it as a snapshots.report
 // envelope. Bounded by a separate timeout so a sluggish repo doesn't
@@ -0,0 +1,357 @@
+package runner
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
+)
+
+// fakeSender collects sent envelopes for assertions.
+type fakeSender struct{ envs []api.Envelope }
+
+func (s *fakeSender) Send(e api.Envelope) error {
+	s.envs = append(s.envs, e)
+	return nil
+}
+
+// setupScript writes a shell script (without shebang) to a temp dir,
+// names it "restic", makes it executable, and returns the path.
+//
+// Writes to "<path>.tmp" then renames into place. The rename is what
+// makes this race-free: under -race + many t.Parallel tests, a
+// fork-from-another-goroutine can inherit the writable fd from
+// os.WriteFile before close completes, and exec'ing the file then
+// returns ETXTBSY ("text file busy"). Once the rename lands, the
+// final path is a fresh dirent pointing at an inode that has no
+// writable fd open anywhere — exec is safe.
+func setupScript(t *testing.T, body string) string {
+	t.Helper()
+	dir := t.TempDir()
+	final := filepath.Join(dir, "restic")
+	tmp := final + ".tmp"
+	if err := os.WriteFile(tmp, []byte("#!/bin/sh\n"+body+"\n"), 0o755); err != nil {
+		t.Fatalf("setupScript: write tmp: %v", err)
+	}
+	if err := os.Rename(tmp, final); err != nil {
+		t.Fatalf("setupScript: rename: %v", err)
+	}
+	return final
+}
+
+// firstEnvOfType returns the first envelope with the given type, or
+// fails the test if none is found.
+func firstEnvOfType(t *testing.T, envs []api.Envelope, mt api.MessageType) api.Envelope {
+	t.Helper()
+	for _, e := range envs {
+		if e.Type == mt {
+			return e
+		}
+	}
+	t.Fatalf("no envelope of type %q found in %d envelopes", mt, len(envs))
+	return api.Envelope{}
+}
+
+// envelopeOrder returns the message types of all sent envelopes.
+func envelopeOrder(envs []api.Envelope) []api.MessageType {
+	out := make([]api.MessageType, len(envs))
+	for i, e := range envs {
+		out[i] = e.Type
+	}
+	return out
+}
+
+// TestRunPruneShipsExpectedEnvelopes drives RunPrune with a fake
+// binary that prints "prune" on stdout (for the log.stream envelope)
+// and emits valid stats JSON so reportStats can populate size fields.
+// Expected sequence: job.started → log.stream → repo.stats → job.finished.
+func TestRunPruneShipsExpectedEnvelopes(t *testing.T) {
+	t.Parallel()
+
+	// The fake "restic" handles both "prune" and "stats --json" calls.
+	statsJSON := `{"total_size":1000,"total_uncompressed_size":2000,"snapshots_count":3,"total_file_count":10}`
+	bin := setupScript(t, `
+case "$1" in
+  prune)  echo "prune" ;;
+  stats)  echo '`+statsJSON+`' ;;
+  *)      echo "unknown: $*" ;;
+esac
+`)
+
+	tx := &fakeSender{}
+	r := New(Config{ResticBin: bin}, tx, 0)
+	if err := r.RunPrune(context.Background(), "job-1"); err != nil {
+		t.Fatalf("RunPrune: %v", err)
+	}
+
+	order := envelopeOrder(tx.envs)
+	// Confirm landmark envelope types appear in the required order.
+	wantTypes := []api.MessageType{api.MsgJobStarted, api.MsgLogStream, api.MsgRepoStats, api.MsgJobFinished}
+	positions := map[api.MessageType]int{}
+	for i, mt := range order {
+		if _, seen := positions[mt]; !seen {
+			positions[mt] = i
+		}
+	}
+	for i := 0; i < len(wantTypes)-1; i++ {
+		a, b := wantTypes[i], wantTypes[i+1]
+		pa, aOK := positions[a]
+		pb, bOK := positions[b]
+		if !aOK {
+			t.Errorf("envelope type %q not found in output %v", a, order)
+			continue
+		}
+		if !bOK {
+			t.Errorf("envelope type %q not found in output %v", b, order)
+			continue
+		}
+		if pa >= pb {
+			t.Errorf("expected %q before %q but positions are %d >= %d (order: %v)", a, b, pa, pb, order)
+		}
+	}
+
+	// The repo.stats payload must have LastPruneAt set.
+	statsEnv := firstEnvOfType(t, tx.envs, api.MsgRepoStats)
+	var statsPayload api.RepoStatsPayload
+	if err := statsEnv.UnmarshalPayload(&statsPayload); err != nil {
+		t.Fatalf("unmarshal repo.stats payload: %v", err)
+	}
+	if statsPayload.LastPruneAt == nil {
+		t.Error("expected LastPruneAt to be set in repo.stats after prune")
+	}
+
+	// The job.finished payload must indicate success.
+	finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished)
+	var finPayload api.JobFinishedPayload
+	if err := finEnv.UnmarshalPayload(&finPayload); err != nil {
+		t.Fatalf("unmarshal job.finished payload: %v", err)
+	}
+	if finPayload.Status != api.JobSucceeded {
+		t.Errorf("expected job.finished status=%q, got %q", api.JobSucceeded, finPayload.Status)
+	}
+}
+
+// TestRunCheckShipsCheckStatus verifies that a check run which emits
+// a stale-lock line on stderr (exit 0) reports LastCheckStatus="ok"
+// and LockPresent=true.
+func TestRunCheckShipsCheckStatus(t *testing.T) {
+	t.Parallel()
+
+	statsJSON := `{"total_size":500,"total_uncompressed_size":600,"snapshots_count":1,"total_file_count":5}`
+	bin := setupScript(t, `
+case "$1" in
+  check) echo "Found stale lock" >&2; exit 0 ;;
+  stats) echo '`+statsJSON+`' ;;
+  *)     exit 0 ;;
+esac
+`)
+
+	tx := &fakeSender{}
+	r := New(Config{ResticBin: bin}, tx, 0)
+	if err := r.RunCheck(context.Background(), "job-2", 0); err != nil {
+		t.Fatalf("RunCheck: %v", err)
+	}
+
+	// Assert envelope ordering: job.started → log.stream → repo.stats → job.finished.
+	order := envelopeOrder(tx.envs)
+	wantTypes := []api.MessageType{api.MsgJobStarted, api.MsgLogStream, api.MsgRepoStats, api.MsgJobFinished}
+	positions := map[api.MessageType]int{}
+	for i, mt := range order {
+		if _, seen := positions[mt]; !seen {
+			positions[mt] = i
+		}
+	}
+	for i := 0; i < len(wantTypes)-1; i++ {
+		a, b := wantTypes[i], wantTypes[i+1]
+		pa, aOK := positions[a]
+		pb, bOK := positions[b]
+		if !aOK {
+			t.Errorf("envelope type %q not found in output %v", a, order)
+			continue
+		}
+		if !bOK {
+			t.Errorf("envelope type %q not found in output %v", b, order)
+			continue
+		}
+		if pa >= pb {
+			t.Errorf("expected %q before %q but positions are %d >= %d (order: %v)", a, b, pa, pb, order)
+		}
+	}
+
+	statsEnv := firstEnvOfType(t, tx.envs, api.MsgRepoStats)
+	var p api.RepoStatsPayload
+	if err := statsEnv.UnmarshalPayload(&p); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if p.LastCheckStatus != "ok" {
+		t.Errorf("LastCheckStatus: got %q, want %q", p.LastCheckStatus, "ok")
+	}
+	if p.LockPresent == nil || !*p.LockPresent {
+		t.Errorf("expected LockPresent=true, got %v", p.LockPresent)
+	}
+	if p.LastCheckAt == nil {
+		t.Error("expected LastCheckAt to be set")
+	}
+}
+
+// TestRunCheckErrorsFoundShipsErrorsStatus verifies that a check run
+// that exits 1 (errors found) reports LastCheckStatus="errors_found".
+func TestRunCheckErrorsFoundShipsErrorsStatus(t *testing.T) {
+	t.Parallel()
+
+	statsJSON := `{"total_size":500,"total_uncompressed_size":600,"snapshots_count":1,"total_file_count":5}`
+	bin := setupScript(t, `
+case "$1" in
+  check) exit 1 ;;
+  stats) echo '`+statsJSON+`' ;;
+  *)     exit 0 ;;
+esac
+`)
+
+	tx := &fakeSender{}
+	r := New(Config{ResticBin: bin}, tx, 0)
+	// RunCheck returns nil for exit 1 (errors_found is not a wrapper failure).
+	if err := r.RunCheck(context.Background(), "job-3", 0); err != nil {
+		t.Fatalf("RunCheck: %v", err)
+	}
+
+	// Assert envelope ordering: job.started → repo.stats → job.finished.
+	// (No log.stream expected because the fake script produces no
+	// output before exit 1 — a real restic check would emit log lines
+	// before exiting non-zero.)
+	order := envelopeOrder(tx.envs)
+	wantTypes := []api.MessageType{api.MsgJobStarted, api.MsgRepoStats, api.MsgJobFinished}
+	positions := map[api.MessageType]int{}
+	for i, mt := range order {
+		if _, seen := positions[mt]; !seen {
+			positions[mt] = i
+		}
+	}
+	for i := 0; i < len(wantTypes)-1; i++ {
+		a, b := wantTypes[i], wantTypes[i+1]
+		pa, aOK := positions[a]
+		pb, bOK := positions[b]
+		if !aOK {
+			t.Errorf("envelope type %q not found in output %v", a, order)
+			continue
+		}
+		if !bOK {
+			t.Errorf("envelope type %q not found in output %v", b, order)
+			continue
+		}
+		if pa >= pb {
+			t.Errorf("expected %q before %q but positions are %d >= %d (order: %v)", a, b, pa, pb, order)
+		}
+	}
+
+	statsEnv := firstEnvOfType(t, tx.envs, api.MsgRepoStats)
+	var p api.RepoStatsPayload
+	if err := statsEnv.UnmarshalPayload(&p); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if p.LastCheckStatus != "errors_found" {
+		t.Errorf("LastCheckStatus: got %q, want %q", p.LastCheckStatus, "errors_found")
+	}
+}
+
+// TestRunUnlockClearsLock verifies that a successful unlock ships a
+// repo.stats envelope with LockPresent=false.
+func TestRunUnlockClearsLock(t *testing.T) {
+	t.Parallel()
+
+	statsJSON := `{"total_size":100,"total_uncompressed_size":150,"snapshots_count":2,"total_file_count":8}`
+	bin := setupScript(t, `
+case "$1" in
+  unlock) echo "removed 1 locks" ;;
+  stats)  echo '`+statsJSON+`' ;;
+  *)      exit 0 ;;
+esac
+`)
+
+	tx := &fakeSender{}
+	r := New(Config{ResticBin: bin}, tx, 0)
+	if err := r.RunUnlock(context.Background(), "job-4"); err != nil {
+		t.Fatalf("RunUnlock: %v", err)
+	}
+
+	// Assert envelope ordering: job.started → log.stream → repo.stats → job.finished.
+	order := envelopeOrder(tx.envs)
+	wantTypes := []api.MessageType{api.MsgJobStarted, api.MsgLogStream, api.MsgRepoStats, api.MsgJobFinished}
+	positions := map[api.MessageType]int{}
+	for i, mt := range order {
+		if _, seen := positions[mt]; !seen {
+			positions[mt] = i
+		}
+	}
+	for i := 0; i < len(wantTypes)-1; i++ {
+		a, b := wantTypes[i], wantTypes[i+1]
+		pa, aOK := positions[a]
+		pb, bOK := positions[b]
+		if !aOK {
+			t.Errorf("envelope type %q not found in output %v", a, order)
+			continue
+		}
+		if !bOK {
+			t.Errorf("envelope type %q not found in output %v", b, order)
+			continue
+		}
+		if pa >= pb {
+			t.Errorf("expected %q before %q but positions are %d >= %d (order: %v)", a, b, pa, pb, order)
+		}
+	}
+
+	statsEnv := firstEnvOfType(t, tx.envs, api.MsgRepoStats)
+	var p api.RepoStatsPayload
+	if err := statsEnv.UnmarshalPayload(&p); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if p.LockPresent == nil {
+		t.Fatal("expected LockPresent to be set (non-nil)")
+	}
+	if *p.LockPresent {
+		t.Errorf("expected LockPresent=false after unlock, got true")
+	}
+}
+
+// TestRunInitShipsStartedAndFinished confirms the refactored RunInit
+// still produces job.started and job.finished envelopes.
+func TestRunInitShipsStartedAndFinished(t *testing.T) {
+	t.Parallel()
+	bin := setupScript(t, `echo "initialized repository"`)
+	tx := &fakeSender{}
+	r := New(Config{ResticBin: bin}, tx, 0)
+	if err := r.RunInit(context.Background(), "job-init"); err != nil {
+		t.Fatalf("RunInit: %v", err)
+	}
+	_ = firstEnvOfType(t, tx.envs, api.MsgJobStarted)
+	_ = firstEnvOfType(t, tx.envs, api.MsgJobFinished)
+}
+
+// TestRunForgetShipsStartedAndFinished confirms the refactored
+// RunForget still produces job.started and job.finished envelopes.
+func TestRunForgetShipsStartedAndFinished(t *testing.T) {
+	t.Parallel()
+	// Script handles both "forget --json ..." and "snapshots --json" calls.
+	bin := setupScript(t, `
+case "$1" in
+  forget)    echo "[]" ;;
+  snapshots) echo "[]" ;;
+  *)         exit 0 ;;
+esac
+`)
+	tx := &fakeSender{}
+	r := New(Config{ResticBin: bin}, tx, 0)
+	keepLast := 1
+	groups := []restic.ForgetGroup{{
+		Tag:    "documents",
+		Policy: restic.ForgetPolicy{KeepLast: &keepLast},
+	}}
+	if err := r.RunForget(context.Background(), "job-forget", groups); err != nil {
+		t.Fatalf("RunForget: %v", err)
+	}
+	_ = firstEnvOfType(t, tx.envs, api.MsgJobStarted)
+	_ = firstEnvOfType(t, tx.envs, api.MsgJobFinished)
+}
@@ -9,6 +9,7 @@
 package secrets

 import (
+	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -24,6 +25,11 @@ import (
 // depth — the key is per-host today, but cheap to be careful.)
 const additionalData = "rm-agent-repo-creds-v1"

+// ErrNoAdmin is returned by LoadAdmin when no admin slot has been
+// written yet. Callers must distinguish this from a hard error: the
+// agent simply hasn't received an admin config.update push yet.
+var ErrNoAdmin = errors.New("secrets: admin slot not configured")
+
 // Repo is the plaintext shape persisted inside the AEAD blob.
 type Repo struct {
 	URL      string `json:"repo_url,omitempty"`
@@ -35,6 +41,15 @@ type Repo struct {
 // minimum (URL + password) needed to run a backup.
 func (r Repo) Empty() bool { return r.URL == "" || r.Password == "" }

+// bundle is the on-disk JSON shape as of secrets v2. It holds the
+// everyday repo slot and an optional admin slot (prune / unlock).
+// Legacy files (pre-v2) contain a flat Repo object; loadBundle
+// transparently upgrades those on the next Save.
+type bundle struct {
+	Repo  Repo  `json:"repo,omitempty"`
+	Admin *Repo `json:"admin,omitempty"`
+}
+
 // Store reads and writes the encrypted secrets file at Path, sealed
 // under the 32-byte key Key.
 type Store struct {
@@ -55,32 +70,47 @@ func New(path string, key []byte) (*Store, error) {
 	return &Store{path: path, a: a}, nil
 }

-// Load returns the persisted Repo, or a zero-value Repo (with no
-// error) if the file does not exist yet — first-run agents have
-// nothing on disk until the server pushes a config.update.
-func (s *Store) Load() (Repo, error) {
+// loadBundle reads and decrypts the on-disk blob, returning a bundle.
+// It handles back-compat decode: legacy flat Repo blobs are detected
+// by the presence of a top-level "repo_url" key and re-wrapped into
+// the bundle shape transparently. Returns an empty bundle when the
+// file does not exist yet.
+func (s *Store) loadBundle() (bundle, error) {
 	body, err := os.ReadFile(s.path)
 	if err != nil {
 		if errors.Is(err, os.ErrNotExist) {
-			return Repo{}, nil
+			return bundle{}, nil
 		}
-		return Repo{}, fmt.Errorf("secrets: read %q: %w", s.path, err)
+		return bundle{}, fmt.Errorf("secrets: read %q: %w", s.path, err)
 	}
 	plain, err := s.a.Decrypt(string(body), []byte(additionalData))
 	if err != nil {
-		return Repo{}, fmt.Errorf("secrets: decrypt %q: %w", s.path, err)
+		return bundle{}, fmt.Errorf("secrets: decrypt %q: %w", s.path, err)
 	}
-	var r Repo
-	if err := json.Unmarshal(plain, &r); err != nil {
-		return Repo{}, fmt.Errorf("secrets: parse %q: %w", s.path, err)
+
+	// Try the new bundle shape first.
+	var b bundle
+	if err := json.Unmarshal(plain, &b); err != nil {
+		return bundle{}, fmt.Errorf("secrets: parse %q: %w", s.path, err)
 	}
-	return r, nil
+
+	// If the bundle has an empty Repo slot but the raw JSON contains
+	// a top-level "repo_url" key, this is a legacy flat blob —
+	// re-unmarshal it as a Repo and slot it in.
+	if b.Repo == (Repo{}) && bytes.Contains(plain, []byte(`"repo_url"`)) {
+		var legacy Repo
+		if err := json.Unmarshal(plain, &legacy); err == nil {
+			b.Repo = legacy
+		}
+	}
+
+	return b, nil
 }

-// Save replaces the on-disk blob atomically. Mode is 0600. Parent
-// directory must already exist (the install script lays it down).
-func (s *Store) Save(r Repo) error {
-	body, err := json.Marshal(r)
+// saveBundle marshals b, encrypts it and writes it atomically at
+// mode 0600. Parent directory must already exist.
+func (s *Store) saveBundle(b bundle) error {
+	body, err := json.Marshal(b)
 	if err != nil {
 		return fmt.Errorf("secrets: marshal: %w", err)
 	}
@@ -115,3 +145,50 @@ func (s *Store) Save(r Repo) error {
 	}
 	return nil
 }
+
+// Load returns the persisted Repo (the everyday repo slot), or a
+// zero-value Repo (with no error) if the file does not exist yet —
+// first-run agents have nothing on disk until the server pushes a
+// config.update.
+func (s *Store) Load() (Repo, error) {
+	b, err := s.loadBundle()
+	if err != nil {
+		return Repo{}, err
+	}
+	return b.Repo, nil
+}
+
+// Save replaces the repo slot on disk atomically, preserving the
+// admin slot. Mode is 0600. Parent directory must already exist.
+func (s *Store) Save(r Repo) error {
+	b, err := s.loadBundle()
+	if err != nil {
+		return fmt.Errorf("secrets: load before save: %w", err)
+	}
+	b.Repo = r
+	return s.saveBundle(b)
+}
+
+// LoadAdmin returns the admin slot, or (Repo{}, ErrNoAdmin) when no
+// admin slot has been set. All other errors are hard failures.
+func (s *Store) LoadAdmin() (Repo, error) {
+	b, err := s.loadBundle()
+	if err != nil {
+		return Repo{}, err
+	}
+	if b.Admin == nil {
+		return Repo{}, ErrNoAdmin
+	}
+	return *b.Admin, nil
+}
+
+// SaveAdmin replaces the admin slot on disk atomically, preserving
+// the repo slot. Mode is 0600.
+func (s *Store) SaveAdmin(r Repo) error {
+	b, err := s.loadBundle()
+	if err != nil {
+		return fmt.Errorf("secrets: load before save: %w", err)
+	}
+	b.Admin = &r
+	return s.saveBundle(b)
+}
@@ -2,6 +2,8 @@ package secrets

 import (
 	"crypto/rand"
+	"encoding/json"
+	"errors"
 	"io"
 	"os"
 	"path/filepath"
@@ -97,3 +99,211 @@ func TestSaveIsAtomic(t *testing.T) {
 		t.Errorf("dir should hold one file post-save, got %v", names)
 	}
 }
+
+func TestSecretsLoadAdminEmpty(t *testing.T) {
+	t.Parallel()
+	// No file yet: LoadAdmin must return ErrNoAdmin, not a hard error.
+	dir := t.TempDir()
+	path := filepath.Join(dir, "secrets.enc")
+	st, err := New(path, freshKey(t))
+	if err != nil {
+		t.Fatalf("new: %v", err)
+	}
+	_, err = st.LoadAdmin()
+	if !errors.Is(err, ErrNoAdmin) {
+		t.Errorf("expected ErrNoAdmin, got %v", err)
+	}
+}
+
+func TestSecretsAdminSlotIndependent(t *testing.T) {
+	t.Parallel()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "secrets.enc")
+	st, err := New(path, freshKey(t))
+	if err != nil {
+		t.Fatalf("new: %v", err)
+	}
+
+	repo := Repo{URL: "rest:https://repo/host", Username: "user", Password: "pw"}
+	admin := Repo{URL: "rest:https://repo/host", Username: "admin", Password: "adminpw"}
+
+	if err := st.Save(repo); err != nil {
+		t.Fatalf("save repo: %v", err)
+	}
+	if err := st.SaveAdmin(admin); err != nil {
+		t.Fatalf("save admin: %v", err)
+	}
+
+	// Load returns the repo slot unchanged.
+	gotRepo, err := st.Load()
+	if err != nil {
+		t.Fatalf("load: %v", err)
+	}
+	if gotRepo != repo {
+		t.Errorf("repo slot mismatch: got %+v want %+v", gotRepo, repo)
+	}
+
+	// LoadAdmin returns the admin slot.
+	gotAdmin, err := st.LoadAdmin()
+	if err != nil {
+		t.Fatalf("load admin: %v", err)
+	}
+	if gotAdmin != admin {
+		t.Errorf("admin slot mismatch: got %+v want %+v", gotAdmin, admin)
+	}
+
+	// SaveAdmin a second time replaces admin only; repo unchanged.
+	admin2 := Repo{URL: "rest:https://repo/host", Username: "admin2", Password: "pw2"}
+	if err := st.SaveAdmin(admin2); err != nil {
+		t.Fatalf("save admin2: %v", err)
+	}
+	gotRepo2, err := st.Load()
+	if err != nil {
+		t.Fatalf("load after admin2 save: %v", err)
+	}
+	if gotRepo2 != repo {
+		t.Errorf("repo slot changed unexpectedly: got %+v want %+v", gotRepo2, repo)
+	}
+	gotAdmin2, err := st.LoadAdmin()
+	if err != nil {
+		t.Fatalf("load admin2: %v", err)
+	}
+	if gotAdmin2 != admin2 {
+		t.Errorf("admin2 slot mismatch: got %+v want %+v", gotAdmin2, admin2)
+	}
+}
+
+func TestSecretsSaveRefusesCorruptFile(t *testing.T) {
+	t.Parallel()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "secrets.enc")
+	st, err := New(path, freshKey(t))
+	if err != nil {
+		t.Fatalf("new: %v", err)
+	}
+
+	// Lay down a valid file first.
+	if err := st.Save(Repo{URL: "rest:https://r/host", Password: "pw"}); err != nil {
+		t.Fatalf("initial save: %v", err)
+	}
+
+	// Corrupt the file.
+	garbage := []byte("not encrypted")
+	if err := os.WriteFile(path, garbage, 0o600); err != nil {
+		t.Fatalf("write garbage: %v", err)
+	}
+
+	// Save must refuse to overwrite: decrypt will fail.
+	saveErr := st.Save(Repo{URL: "rest:https://r/host", Password: "new"})
+	if saveErr == nil {
+		t.Fatal("Save over corrupt file must return an error; got nil")
+	}
+
+	// File must NOT have been replaced — still contains the garbage bytes.
+	got, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("re-read: %v", err)
+	}
+	if string(got) != string(garbage) {
+		t.Errorf("corrupt file was overwritten; file size now %d (was %d)", len(got), len(garbage))
+	}
+}
+
+func TestSecretsSaveAdminRefusesCorruptFile(t *testing.T) {
+	t.Parallel()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "secrets.enc")
+	st, err := New(path, freshKey(t))
+	if err != nil {
+		t.Fatalf("new: %v", err)
+	}
+
+	// Lay down a valid file first.
+	if err := st.SaveAdmin(Repo{URL: "rest:https://r/host", Password: "adminpw"}); err != nil {
+		t.Fatalf("initial save admin: %v", err)
+	}
+
+	// Corrupt the file.
+	garbage := []byte("not encrypted admin")
+	if err := os.WriteFile(path, garbage, 0o600); err != nil {
+		t.Fatalf("write garbage: %v", err)
+	}
+
+	// SaveAdmin must refuse to overwrite: decrypt will fail.
+	saveErr := st.SaveAdmin(Repo{URL: "rest:https://r/host", Password: "new"})
+	if saveErr == nil {
+		t.Fatal("SaveAdmin over corrupt file must return an error; got nil")
+	}
+
+	// File must NOT have been replaced.
+	got, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("re-read: %v", err)
+	}
+	if string(got) != string(garbage) {
+		t.Errorf("corrupt file was overwritten; file size now %d (was %d)", len(got), len(garbage))
+	}
+}
+
+func TestSecretsLegacyFlatBlobMigrates(t *testing.T) {
+	t.Parallel()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "secrets.enc")
+	key := freshKey(t)
+
+	// Write a legacy flat Repo blob directly — bypassing bundle wrapping.
+	legacy := Repo{URL: "rest:https://legacy/host", Username: "legacyuser", Password: "legacypw"}
+	plain, err := json.Marshal(legacy)
+	if err != nil {
+		t.Fatalf("marshal legacy: %v", err)
+	}
+	a, err := crypto.NewAEAD(key)
+	if err != nil {
+		t.Fatalf("aead: %v", err)
+	}
+	ct, err := a.Encrypt(plain, []byte(additionalData))
+	if err != nil {
+		t.Fatalf("encrypt legacy: %v", err)
+	}
+	if err := os.WriteFile(path, []byte(ct), 0o600); err != nil {
+		t.Fatalf("write legacy file: %v", err)
+	}
+
+	// Open via secrets.New + Load — must return the legacy Repo.
+	st, err := New(path, key)
+	if err != nil {
+		t.Fatalf("new: %v", err)
+	}
+	got, err := st.Load()
+	if err != nil {
+		t.Fatalf("load legacy: %v", err)
+	}
+	if got != legacy {
+		t.Errorf("legacy decode mismatch: got %+v want %+v", got, legacy)
+	}
+
+	// SaveAdmin should write both slots; re-opening must have both.
+	admin := Repo{URL: "rest:https://legacy/host", Username: "admin", Password: "adminpw"}
+	if err := st.SaveAdmin(admin); err != nil {
+		t.Fatalf("save admin after legacy: %v", err)
+	}
+
+	st2, err := New(path, key)
+	if err != nil {
+		t.Fatalf("reopen: %v", err)
+	}
+	gotRepo, err := st2.Load()
+	if err != nil {
+		t.Fatalf("load repo after migration: %v", err)
+	}
+	if gotRepo != legacy {
+		t.Errorf("repo after migration: got %+v want %+v", gotRepo, legacy)
+	}
+	gotAdmin, err := st2.LoadAdmin()
+	if err != nil {
+		t.Fatalf("load admin after migration: %v", err)
+	}
+	if gotAdmin != admin {
+		t.Errorf("admin after migration: got %+v want %+v", gotAdmin, admin)
+	}
+}
@@ -77,6 +77,30 @@ const (
 	JobCancelled JobStatus = "cancelled" //nolint:misspell // wire format
 )

+// ForgetPolicyJSON is the wire shape of a per-group retention policy
+// shipped with a forget command.run. Mirrors store.RetentionPolicy
+// JSON tags exactly so a future caller could json-roundtrip between
+// the two without reshaping. All fields nullable; an empty struct is
+// rejected by the agent (restic refuses to forget without --keep-*).
+type ForgetPolicyJSON struct {
+	KeepLast    *int `json:"keep_last,omitempty"`
+	KeepHourly  *int `json:"keep_hourly,omitempty"`
+	KeepDaily   *int `json:"keep_daily,omitempty"`
+	KeepWeekly  *int `json:"keep_weekly,omitempty"`
+	KeepMonthly *int `json:"keep_monthly,omitempty"`
+	KeepYearly  *int `json:"keep_yearly,omitempty"`
+}
+
+// ForgetGroup is one (tag, retention) pair shipped to the agent in a
+// forget command.run. The agent invokes
+// `restic forget --tag <Tag> --keep-* …` once per group, with each
+// group's own policy. The Tag is the source-group name (which is
+// also the snapshot tag carried at backup time).
+type ForgetGroup struct {
+	Tag    string           `json:"tag"`
+	Policy ForgetPolicyJSON `json:"policy"`
+}
+
 // CommandRunPayload is the server → agent dispatch for a run-now job.
 //
 // For kind=backup, Includes/Excludes/Tag are populated from the source
@@ -85,19 +109,27 @@ const (
 // the source group's name) so retention can target it later via
 // `restic forget --tag`.
 //
-// For kind=forget, RetentionPolicy is the typed keep-* set as raw JSON
-// (the agent doesn't share the store package's typed struct).
+// For kind=forget, ForgetGroups carries one entry per source-group on
+// the host that has a non-empty retention policy. The agent walks the
+// list and runs `restic forget --tag <Tag> --keep-* …` per group.
 //
 // Args is preserved as a generic free-form slice for kinds that don't
-// fit the structured fields (e.g. unlock takes none; init takes none).
+// fit the structured fields (e.g. unlock takes none; init takes none;
+// check carries the subset% as Args[0]).
+//
+// RequiresAdminCreds tells the agent to load the admin slot of its
+// secrets store rather than the everyday repo slot. Set by the server
+// only for prune (the only kind that needs delete authority on a
+// rest-server repo today).
 type CommandRunPayload struct {
-	JobID           string          `json:"job_id"`
-	Kind            JobKind         `json:"kind"`
-	Args            []string        `json:"args,omitempty"`
-	Includes        []string        `json:"includes,omitempty"`
-	Excludes        []string        `json:"excludes,omitempty"`
-	Tag             string          `json:"tag,omitempty"`
-	RetentionPolicy json.RawMessage `json:"retention_policy,omitempty"`
+	JobID              string        `json:"job_id"`
+	Kind               JobKind       `json:"kind"`
+	Args               []string      `json:"args,omitempty"`
+	Includes           []string      `json:"includes,omitempty"`
+	Excludes           []string      `json:"excludes,omitempty"`
+	Tag                string        `json:"tag,omitempty"`
+	ForgetGroups       []ForgetGroup `json:"forget_groups,omitempty"`
+	RequiresAdminCreds bool          `json:"requires_admin_creds,omitempty"`
 }

 // CommandCancelPayload is the server → agent cancel signal.
@@ -186,15 +218,24 @@ type Snapshot struct {
 	FileCount int64     `json:"file_count,omitempty"`
 }

-// RepoStatsPayload — agent reports periodic repo health facts derived
-// from `restic stats` and lock-file inspection.
+// RepoStatsPayload carries a partial-update snapshot of repo health
+// facts, shipped by the agent after prune/check/unlock or a periodic
+// stats refresh. Pointer fields follow omitempty semantics: a nil
+// pointer means "no update for this field" and is omitted on the
+// wire; the server merges only the non-nil fields into its
+// host_repo_stats row (matching UpsertHostRepoStats partial-update
+// semantics). Non-pointer fields (LastCheckStatus) use the empty
+// string as the "no update" sentinel.
 type RepoStatsPayload struct {
-	SizeBytes       int64     `json:"size_bytes"`
-	SnapshotCount   int       `json:"snapshot_count"`
-	DedupRatio      float64   `json:"dedup_ratio"`
-	LastCheckAt     time.Time `json:"last_check_at,omitempty"`
-	LastCheckStatus string    `json:"last_check_status,omitempty"`
-	LockState       string    `json:"lock_state"` // locked|unlocked
+	TotalSizeBytes      *int64     `json:"total_size_bytes,omitempty"`
+	RawSizeBytes        *int64     `json:"raw_size_bytes,omitempty"`
+	UniqueFiles         *int64     `json:"unique_files,omitempty"`
+	SnapshotCount       *int64     `json:"snapshot_count,omitempty"`
+	LastCheckAt         *time.Time `json:"last_check_at,omitempty"`
+	LastCheckStatus     string     `json:"last_check_status,omitempty"`
+	LockPresent         *bool      `json:"lock_present,omitempty"`
+	LastPruneAt         *time.Time `json:"last_prune_at,omitempty"`
+	LastPruneFreedBytes *int64     `json:"last_prune_freed_bytes,omitempty"`
 }

 // Schedule is the agent-facing view of a slim Schedule row plus its
@@ -252,12 +293,19 @@ type ScheduleFirePayload struct {
 // ConfigUpdatePayload — server pushes per-host config (currently just
 // repo connection details). Empty fields mean "leave existing alone";
 // to clear something, send an explicit zero value.
+//
+// Slot picks which secrets-store slot the agent writes the creds to.
+// Empty / "repo" = everyday repo creds (default). "admin" = the
+// prune-capable admin user (separate slot — not loaded for backups).
+// Forwards-compatible: an agent that ignores Slot simply writes to the
+// repo slot and admin pushes become no-ops.
 type ConfigUpdatePayload struct {
 	RepoURL        string `json:"repo_url,omitempty"`
 	RepoPassword   string `json:"repo_password,omitempty"` // sensitive
 	RepoUsername   string `json:"repo_username,omitempty"`
 	RepoCredential string `json:"repo_credential,omitempty"` // sensitive (for rest server basic auth)
 	HookShell      string `json:"hook_shell,omitempty"`
+	Slot           string `json:"slot,omitempty"`
 }

 // AgentUpdateAvailablePayload — informational only; the agent does
@@ -12,3 +12,15 @@ const CurrentProtocolVersion = 1
 // server accepts in a hello. Agents below this are disconnected with
 // a structured error pointing at the upgrade docs.
 const MinAgentProtocolVersion = 1
+
+// Phase 5 (P2R-03..P2R-08, branch p2r-phase5-maintenance, 2026-05) reshaped
+// CommandRunPayload (RetentionPolicy removed, ForgetGroups added, RequiresAdminCreds added),
+// ConfigUpdatePayload (Slot added), and RepoStatsPayload (full reshape).
+// The protocol version was deliberately NOT bumped because:
+//  1. This project deploys agent and server in lockstep from the same release.
+//  2. There is no supported "rolling upgrade" path with mixed agent/server versions.
+//  3. The smoke env restage block in CLAUDE.md restages the agent binary on
+//     every server build for exactly this reason.
+//
+// If a multi-version protocol path is ever introduced, every Phase 5 wire
+// change is a breaking change and the version must bump to 2 at that time.
@@ -138,6 +138,85 @@ func TestJobProgressShapeStable(t *testing.T) {
 	}
 }

+func TestRepoStatsPayloadRoundTrip(t *testing.T) {
+	t.Parallel()
+
+	// Nil pointer fields must be omitted from JSON output.
+	empty := RepoStatsPayload{}
+	raw, err := json.Marshal(empty)
+	if err != nil {
+		t.Fatalf("marshal empty: %v", err)
+	}
+	if string(raw) != "{}" {
+		t.Errorf("empty payload should marshal to {}, got %s", raw)
+	}
+
+	// Populated fields must survive a round trip.
+	total := int64(123456)
+	rawSize := int64(200000)
+	files := int64(42)
+	snaps := int64(7)
+	lockPresent := true
+	now := time.Date(2026, 1, 2, 3, 4, 5, 0, time.UTC)
+	pruneAt := time.Date(2026, 1, 3, 0, 0, 0, 0, time.UTC)
+	freed := int64(8192)
+
+	p := RepoStatsPayload{
+		TotalSizeBytes:      &total,
+		RawSizeBytes:        &rawSize,
+		UniqueFiles:         &files,
+		SnapshotCount:       &snaps,
+		LastCheckAt:         &now,
+		LastCheckStatus:     "ok",
+		LockPresent:         &lockPresent,
+		LastPruneAt:         &pruneAt,
+		LastPruneFreedBytes: &freed,
+	}
+	raw2, err := json.Marshal(p)
+	if err != nil {
+		t.Fatalf("marshal full: %v", err)
+	}
+	var got RepoStatsPayload
+	if err := json.Unmarshal(raw2, &got); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != total {
+		t.Errorf("TotalSizeBytes: got %v, want %d", got.TotalSizeBytes, total)
+	}
+	if got.RawSizeBytes == nil || *got.RawSizeBytes != rawSize {
+		t.Errorf("RawSizeBytes: got %v, want %d", got.RawSizeBytes, rawSize)
+	}
+	if got.UniqueFiles == nil || *got.UniqueFiles != files {
+		t.Errorf("UniqueFiles: got %v, want %d", got.UniqueFiles, files)
+	}
+	if got.SnapshotCount == nil || *got.SnapshotCount != snaps {
+		t.Errorf("SnapshotCount: got %v, want %d", got.SnapshotCount, snaps)
+	}
+	if got.LastCheckAt == nil || !got.LastCheckAt.Equal(now) {
+		t.Errorf("LastCheckAt: got %v, want %v", got.LastCheckAt, now)
+	}
+	if got.LastCheckStatus != "ok" {
+		t.Errorf("LastCheckStatus: got %q, want %q", got.LastCheckStatus, "ok")
+	}
+	if got.LockPresent == nil || *got.LockPresent != lockPresent {
+		t.Errorf("LockPresent: got %v, want %v", got.LockPresent, lockPresent)
+	}
+	if got.LastPruneAt == nil || !got.LastPruneAt.Equal(pruneAt) {
+		t.Errorf("LastPruneAt: got %v, want %v", got.LastPruneAt, pruneAt)
+	}
+	if got.LastPruneFreedBytes == nil || *got.LastPruneFreedBytes != freed {
+		t.Errorf("LastPruneFreedBytes: got %v, want %d", got.LastPruneFreedBytes, freed)
+	}
+
+	// Partial update: only set LockPresent.
+	lockFalse := false
+	partial := RepoStatsPayload{LockPresent: &lockFalse}
+	rawPartial, _ := json.Marshal(partial)
+	if string(rawPartial) != `{"lock_present":false}` {
+		t.Errorf("partial marshal: got %s, want {\"lock_present\":false}", rawPartial)
+	}
+}
+
 // touch time so the import is used by other tests in this file when
 // they grow over time.
 var _ = time.Now
@@ -151,8 +151,7 @@ func (e Env) RunBackup(ctx context.Context, paths, excludes, tags []string, hand
 }

 // ForgetPolicy mirrors restic forget's --keep-* flags. All optional;
-// nil/zero means "don't pass that flag." Caller passes whatever the
-// schedule's RetentionPolicy carries.
+// nil/zero means "don't pass that flag."
 type ForgetPolicy struct {
 	KeepLast    *int
 	KeepHourly  *int
@@ -181,53 +180,47 @@ func (p ForgetPolicy) args() []string {
 	return out
 }

-// Empty reports whether no retention dimensions are set. restic
-// forget refuses to run without at least one keep-* flag (it would
-// delete every snapshot), so the agent rejects empty policies before
-// invoking restic.
+// Empty reports whether no retention dimensions are set.
 func (p ForgetPolicy) Empty() bool {
 	return p.KeepLast == nil && p.KeepHourly == nil &&
 		p.KeepDaily == nil && p.KeepWeekly == nil &&
 		p.KeepMonthly == nil && p.KeepYearly == nil
 }

-// RunForget executes `restic forget --keep-* … --json` against the
-// configured repo. Does NOT pass --prune — pruning lives behind a
-// separate, admin-only credential (see spec §4.3 / P2-06). Restic
-// just rewrites the snapshot index; the actual data deletion waits
-// for the next prune. Returns nil on a clean exit.
-func (e Env) RunForget(ctx context.Context, policy ForgetPolicy, handle LineHandler) error {
-	if policy.Empty() {
-		return fmt.Errorf("restic forget: refusing to run with empty retention policy (would delete every snapshot)")
-	}
-	args := append([]string{"forget", "--json"}, policy.args()...)
-	cmd := exec.CommandContext(ctx, e.Bin, args...)
-	cmd.Env = e.envSlice()
-	cmd.Dir = e.WorkDir
+// ForgetGroup is one (tag, retention-policy) pair fed to RunForget.
+// The wrapper invokes `restic forget --tag <Tag> --keep-* …` per
+// group so retention can be targeted at a single source-group's
+// snapshots without disturbing snapshots tagged for other groups.
+type ForgetGroup struct {
+	Tag    string
+	Policy ForgetPolicy
+}

-	stdout, err := cmd.StdoutPipe()
-	if err != nil {
-		return fmt.Errorf("restic forget: stdout pipe: %w", err)
+// RunForget executes one `restic forget --tag <Tag> --keep-* …`
+// invocation per group. Does NOT pass --prune — pruning lives behind
+// a separate admin-only credential (see spec §4.3 / P2-06). Restic
+// rewrites the snapshot index; the actual data deletion waits for
+// the next prune. Empty groups slice is rejected (would be a no-op);
+// any group with an empty policy is rejected (restic forget without
+// any keep-* would delete every snapshot in the tagged set).
+// Returns the first error encountered, or nil when every group runs
+// to a clean exit.
+func (e Env) RunForget(ctx context.Context, groups []ForgetGroup, handle LineHandler) error {
+	if len(groups) == 0 {
+		return fmt.Errorf("restic forget: refusing to run with no groups (would be a no-op)")
 	}
-	stderr, err := cmd.StderrPipe()
-	if err != nil {
-		return fmt.Errorf("restic forget: stderr pipe: %w", err)
-	}
-
-	if err := cmd.Start(); err != nil {
-		return fmt.Errorf("restic forget: start: %w", err)
-	}
-
-	done := make(chan error, 2)
-	go func() { done <- pumpPlain(stdout, "stdout", handle) }()
-	go func() { done <- pumpPlain(stderr, "stderr", handle) }()
-	for i := 0; i < 2; i++ {
-		if err := <-done; err != nil && handle != nil {
-			handle("event", fmt.Sprintf("pump error: %v", err), nil)
+	for _, g := range groups {
+		if g.Policy.Empty() {
+			return fmt.Errorf("restic forget: group %q has empty retention policy (would delete every snapshot)", g.Tag)
+		}
+		args := []string{"forget", "--json", "--tag", g.Tag}
+		args = append(args, g.Policy.args()...)
+		cmd := exec.CommandContext(ctx, e.Bin, args...)
+		cmd.Env = e.envSlice()
+		cmd.Dir = e.WorkDir
+		if err := runWithPump(cmd, handle); err != nil {
+			return err
 		}
-	}
-	if werr := cmd.Wait(); werr != nil {
-		return fmt.Errorf("restic forget: %w", werr)
 	}
 	return nil
 }
@@ -243,19 +236,6 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
 	cmd.Env = e.envSlice()
 	cmd.Dir = e.WorkDir

-	stdout, err := cmd.StdoutPipe()
-	if err != nil {
-		return fmt.Errorf("restic init: stdout pipe: %w", err)
-	}
-	stderr, err := cmd.StderrPipe()
-	if err != nil {
-		return fmt.Errorf("restic init: stderr pipe: %w", err)
-	}
-
-	if err := cmd.Start(); err != nil {
-		return fmt.Errorf("restic init: start: %w", err)
-	}
-
 	// Sniff for "config file already exists" on stderr; if we see it
 	// we'll treat the non-zero exit as a soft success — running init
 	// against an already-initialized repo is a no-op semantically,
@@ -271,26 +251,166 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
 		}
 	}

-	done := make(chan error, 2)
-	go func() { done <- pumpPlain(stdout, "stdout", sniff) }()
-	go func() { done <- pumpPlain(stderr, "stderr", sniff) }()
-	for i := 0; i < 2; i++ {
-		if err := <-done; err != nil && handle != nil {
-			handle("event", fmt.Sprintf("pump error: %v", err), nil)
-		}
-	}
-	if werr := cmd.Wait(); werr != nil {
+	if err := runWithPump(cmd, sniff); err != nil {
 		if alreadyInited {
 			if handle != nil {
 				handle("event", "repo already initialized — treating as success", nil)
 			}
 			return nil
 		}
-		return fmt.Errorf("restic init: %w", werr)
+		return err
 	}
 	return nil
 }

+// RunPrune executes `restic prune` against the configured repo.
+// Requires the *admin* credentials (delete access on the rest-server
+// repo) — the caller is responsible for populating Env.RepoUsername
+// and Env.RepoPassword with the admin pair before calling this.
+//
+// Prune emits human-readable progress on stdout/stderr (no --json
+// support that's useful for our purposes). We tee everything to the
+// handler so the live log is the operator's progress bar.
+func (e Env) RunPrune(ctx context.Context, handle LineHandler) error {
+	cmd := exec.CommandContext(ctx, e.Bin, "prune")
+	cmd.Env = e.envSlice()
+	cmd.Dir = e.WorkDir
+	return runWithPump(cmd, handle)
+}
+
+// runWithPump starts the configured cmd, fans stdout+stderr into
+// pumpPlain via the supplied handler, waits, and wraps any error
+// with the cmd's verb (e.g., "restic prune") for context.
+func runWithPump(cmd *exec.Cmd, handle LineHandler) error {
+	label := "restic"
+	if len(cmd.Args) > 1 {
+		label = "restic " + cmd.Args[1]
+	}
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return fmt.Errorf("%s: stdout pipe: %w", label, err)
+	}
+	stderr, err := cmd.StderrPipe()
+	if err != nil {
+		return fmt.Errorf("%s: stderr pipe: %w", label, err)
+	}
+	if err := cmd.Start(); err != nil {
+		return fmt.Errorf("%s: start: %w", label, err)
+	}
+	done := make(chan error, 2)
+	go func() { done <- pumpPlain(stdout, "stdout", handle) }()
+	go func() { done <- pumpPlain(stderr, "stderr", handle) }()
+	for i := 0; i < 2; i++ {
+		if err := <-done; err != nil && handle != nil {
+			handle("event", fmt.Sprintf("pump error: %v", err), nil)
+		}
+	}
+	if werr := cmd.Wait(); werr != nil {
+		return fmt.Errorf("%s: %w", label, werr)
+	}
+	return nil
+}
+
+// RunUnlock executes `restic unlock`. Returns nil on a clean exit.
+func (e Env) RunUnlock(ctx context.Context, handle LineHandler) error {
+	cmd := exec.CommandContext(ctx, e.Bin, "unlock")
+	cmd.Env = e.envSlice()
+	cmd.Dir = e.WorkDir
+	return runWithPump(cmd, handle)
+}
+
+// RepoStats mirrors `restic stats --json --mode raw-data` output.
+type RepoStats struct {
+	TotalSize         int64 `json:"total_size"`
+	TotalUncompressed int64 `json:"total_uncompressed_size"`
+	SnapshotsCount    int64 `json:"snapshots_count"`
+	TotalFileCount    int64 `json:"total_file_count"`
+	TotalBlobCount    int64 `json:"total_blob_count"`
+}
+
+// RunStats executes `restic stats --json --mode raw-data` and parses
+// the (single-line) JSON response. Tees raw output to handle so the
+// caller can still log it. Returns an error if no JSON-shaped line
+// arrived on stdout.
+func (e Env) RunStats(ctx context.Context, handle LineHandler) (*RepoStats, error) {
+	cmd := exec.CommandContext(ctx, e.Bin, "stats", "--json", "--mode", "raw-data")
+	cmd.Env = e.envSlice()
+	cmd.Dir = e.WorkDir
+	var out *RepoStats
+	capture := func(stream, line string, ev any) {
+		if stream == "stdout" && strings.HasPrefix(line, "{") {
+			var s RepoStats
+			if json.Unmarshal([]byte(line), &s) == nil {
+				cp := s
+				out = &cp
+			}
+		}
+		if handle != nil {
+			handle(stream, line, ev)
+		}
+	}
+	if err := runWithPump(cmd, capture); err != nil {
+		return nil, err
+	}
+	if out == nil {
+		return nil, fmt.Errorf("restic stats: no JSON in output")
+	}
+	return out, nil
+}
+
+// CheckResult summarizes a `restic check` invocation. LockPresent is
+// true if the stderr stream contained a stale-lock signal (caller is
+// expected to surface this in the UI so the operator can run unlock).
+// ErrorsFound is true if check exited with a non-zero status (errors
+// detected in repo metadata).
+type CheckResult struct {
+	LockPresent bool
+	ErrorsFound bool
+}
+
+// RunCheck executes `restic check` with optional --read-data-subset.
+// subsetPct of 0 omits the flag (full data check); >0 passes
+// --read-data-subset N%. Returns a CheckResult summarizing what was
+// sniffed from stderr; the result is set even if check itself
+// returns an error (so the caller can persist last_check_status).
+func (e Env) RunCheck(ctx context.Context, subsetPct int, handle LineHandler) (CheckResult, error) {
+	args := []string{"check"}
+	if subsetPct > 0 {
+		args = append(args, "--read-data-subset", fmt.Sprintf("%d%%", subsetPct))
+	}
+	cmd := exec.CommandContext(ctx, e.Bin, args...)
+	cmd.Env = e.envSlice()
+	cmd.Dir = e.WorkDir
+
+	var res CheckResult
+	sniff := func(stream, line string, ev any) {
+		if stream == "stderr" {
+			if strings.Contains(line, "stale lock") || strings.Contains(line, "already locked") {
+				res.LockPresent = true
+			}
+		}
+		if handle != nil {
+			handle(stream, line, ev)
+		}
+	}
+
+	err := runWithPump(cmd, sniff)
+	if err != nil {
+		// restic check exits non-zero when corruption is found; that's
+		// a CheckResult, not a wrapper failure. Treat ExitError as
+		// "errors found" but still return the result so the caller can
+		// persist last_check_status='errors_found'. Reserve the error
+		// return for actually-broken invocations (binary missing, etc).
+		var ee *exec.ExitError
+		if errors.As(err, &ee) {
+			res.ErrorsFound = true
+			return res, nil
+		}
+		return res, err
+	}
+	return res, nil
+}
+
 func pumpPlain(r io.Reader, stream string, handle LineHandler) error {
 	scanner := bufio.NewScanner(r)
 	scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
@@ -0,0 +1,193 @@
+package restic
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+// setupScriptBin writes a small shell script to a temp directory,
+// makes it executable, and returns its path. scriptBody is the
+// complete script content (without the shebang line — that's added
+// automatically).
+// Writes to "<path>.tmp" then renames into place — see the matching
+// helper in internal/agent/runner/runner_test.go for the ETXTBSY
+// race rationale. Same fix applied here so this helper doesn't lose
+// the race the next time CI gets unlucky.
+func setupScriptBin(t *testing.T, scriptBody string) string {
+	t.Helper()
+	dir := t.TempDir()
+	final := filepath.Join(dir, "restic")
+	tmp := final + ".tmp"
+	content := "#!/bin/sh\n" + scriptBody + "\n"
+	if err := os.WriteFile(tmp, []byte(content), 0o755); err != nil {
+		t.Fatalf("setupScriptBin: write tmp: %v", err)
+	}
+	if err := os.Rename(tmp, final); err != nil {
+		t.Fatalf("setupScriptBin: rename: %v", err)
+	}
+	return final
+}
+
+// captureLines returns a LineHandler that appends "stream:line" into
+// the returned slice pointer (safe for single-goroutine test use).
+func captureLines() (*[]string, LineHandler) {
+	var lines []string
+	h := func(stream, line string, _ any) {
+		lines = append(lines, fmt.Sprintf("%s:%s", stream, line))
+	}
+	return &lines, h
+}
+
+// --- B1: RunPrune + B2: RunCheck ---
+
+func TestRunPruneInvokesPrune(t *testing.T) {
+	// Shell script that echoes its args; "prune" should appear in output.
+	bin := setupScriptBin(t, `echo "$@"`)
+	env := Env{Bin: bin}
+	lines, h := captureLines()
+	if err := env.RunPrune(context.Background(), h); err != nil {
+		t.Fatalf("RunPrune returned error: %v", err)
+	}
+	for _, l := range *lines {
+		if strings.Contains(l, "prune") {
+			return
+		}
+	}
+	t.Fatalf("expected 'prune' in captured output; got: %v", *lines)
+}
+
+// --- B2: RunCheck ---
+
+func TestRunCheckLockSniff(t *testing.T) {
+	cases := []struct {
+		name       string
+		stderrLine string
+		wantLocked bool
+	}{
+		{"stale lock", "Found stale lock from PID 1234", true},
+		{"already locked", "repository is already locked exclusively", true},
+		{"benign mention", "subdir/locked-file ok", false},
+		{"empty", "", false},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			// Script emits the line on stderr, then exits 0.
+			script := fmt.Sprintf(`printf '%%s\n' %q >&2`, c.stderrLine)
+			bin := setupScriptBin(t, script)
+			env := Env{Bin: bin}
+			res, err := env.RunCheck(context.Background(), 0, nil)
+			if err != nil {
+				t.Fatalf("RunCheck returned unexpected error: %v", err)
+			}
+			if res.LockPresent != c.wantLocked {
+				t.Fatalf("LockPresent: got %v, want %v (line: %q)", res.LockPresent, c.wantLocked, c.stderrLine)
+			}
+			if res.ErrorsFound {
+				t.Fatal("expected ErrorsFound=false")
+			}
+		})
+	}
+}
+
+func TestRunCheckErrorsFoundOnExit1(t *testing.T) {
+	bin := setupScriptBin(t, `exit 1`)
+	env := Env{Bin: bin}
+	res, err := env.RunCheck(context.Background(), 0, nil)
+	if err != nil {
+		t.Fatalf("RunCheck returned unexpected error (should have absorbed exit 1): %v", err)
+	}
+	if !res.ErrorsFound {
+		t.Fatal("expected ErrorsFound=true for exit 1")
+	}
+}
+
+func TestRunCheckSubsetArg(t *testing.T) {
+	bin := setupScriptBin(t, `echo "$@"`)
+	env := Env{Bin: bin}
+	lines, h := captureLines()
+	if _, err := env.RunCheck(context.Background(), 25, h); err != nil {
+		t.Fatalf("RunCheck: %v", err)
+	}
+	want := "--read-data-subset 25%"
+	for _, l := range *lines {
+		if strings.Contains(l, want) {
+			return
+		}
+	}
+	t.Fatalf("expected %q in captured output; got: %v", want, *lines)
+}
+
+// --- B3: RunUnlock + RunStats ---
+
+func TestRunUnlockInvokesUnlock(t *testing.T) {
+	bin := setupScriptBin(t, `echo "$@"`)
+	env := Env{Bin: bin}
+	lines, h := captureLines()
+	if err := env.RunUnlock(context.Background(), h); err != nil {
+		t.Fatalf("RunUnlock: %v", err)
+	}
+	for _, l := range *lines {
+		if strings.Contains(l, "unlock") {
+			return
+		}
+	}
+	t.Fatalf("expected 'unlock' in captured output; got: %v", *lines)
+}
+
+func TestRunStatsParsesJSON(t *testing.T) {
+	bin := setupScriptBin(t, `echo '{"total_size":1234,"total_uncompressed_size":5678,"snapshots_count":3,"total_file_count":100,"total_blob_count":50}'`)
+	env := Env{Bin: bin}
+	stats, err := env.RunStats(context.Background(), nil)
+	if err != nil {
+		t.Fatalf("RunStats: %v", err)
+	}
+	if stats.TotalSize != 1234 {
+		t.Fatalf("TotalSize: got %d, want 1234", stats.TotalSize)
+	}
+	if stats.TotalUncompressed != 5678 {
+		t.Fatalf("TotalUncompressed: got %d, want 5678", stats.TotalUncompressed)
+	}
+	if stats.SnapshotsCount != 3 {
+		t.Fatalf("SnapshotsCount: got %d, want 3", stats.SnapshotsCount)
+	}
+	if stats.TotalFileCount != 100 {
+		t.Fatalf("TotalFileCount: got %d, want 100", stats.TotalFileCount)
+	}
+	if stats.TotalBlobCount != 50 {
+		t.Fatalf("TotalBlobCount: got %d, want 50", stats.TotalBlobCount)
+	}
+}
+
+func TestRunStatsErrorsWithoutJSON(t *testing.T) {
+	bin := setupScriptBin(t, `echo "no json here"`)
+	env := Env{Bin: bin}
+	_, err := env.RunStats(context.Background(), nil)
+	if err == nil {
+		t.Fatal("expected error when no JSON in output")
+	}
+	if !strings.Contains(err.Error(), "no JSON in output") {
+		t.Fatalf("unexpected error: %v", err)
+	}
+}
+
+func TestRunStatsZeroSnapshots(t *testing.T) {
+	// Confirms RunStats succeeds and returns a valid *RepoStats when the
+	// repo has no snapshots (snapshots_count=0). A regression that
+	// re-added a "SnapshotsCount > 0" guard would return an error here.
+	bin := setupScriptBin(t, `echo '{"total_size":0,"total_uncompressed_size":0,"snapshots_count":0,"total_file_count":0,"total_blob_count":0}'`)
+	env := Env{Bin: bin}
+	stats, err := env.RunStats(context.Background(), nil)
+	if err != nil {
+		t.Fatalf("RunStats with zero snapshots returned unexpected error: %v", err)
+	}
+	if stats == nil {
+		t.Fatal("expected non-nil *RepoStats, got nil")
+	}
+	if stats.SnapshotsCount != 0 {
+		t.Fatalf("SnapshotsCount: got %d, want 0", stats.SnapshotsCount)
+	}
+}
@@ -167,7 +167,7 @@ func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request)
 	// /api/hosts/{id}/repo-credentials. Failing the whole enrolment
 	// here would leave a half-burned token + an orphan host.
 	if encForHost != "" {
-		if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, encForHost); err != nil {
+		if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, store.CredKindRepo, encForHost); err != nil {
 			slog.Error("enrollment: set host credentials failed",
 				"host_id", hostID, "err", err)
 		}
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"errors"
+	"fmt"
 	"log/slog"
 	stdhttp "net/http"
 	"time"
@@ -39,7 +40,7 @@ func (s *Server) handleGetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
 		writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
 		return
 	}
-	enc, err := s.deps.Store.GetHostCredentials(r.Context(), hostID)
+	enc, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindRepo)
 	if err != nil {
 		if errors.Is(err, store.ErrNotFound) {
 			writeJSONError(w, stdhttp.StatusNotFound, "not_set", "")
@@ -85,7 +86,8 @@ type hostRepoCredsRequest struct {
 // preserved. Re-encrypts under host_id and pushes a config.update
 // over the WS if the agent is connected.
 func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
-	if !s.authedUser(r) {
+	user, ok := s.requireUser(r)
+	if !ok {
 		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
 		return
 	}
@@ -107,7 +109,7 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R

 	// Merge with the existing row, if any.
 	existing := repoCredsBlob{}
-	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), hostID); err == nil {
+	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindRepo); err == nil {
 		plain, err := s.deps.AEAD.Decrypt(cur, []byte("host:"+hostID))
 		if err != nil {
 			writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
@@ -139,13 +141,14 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}
-	if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, enc); err != nil {
+	if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, store.CredKindRepo, enc); err != nil {
 		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
 		return
 	}

 	_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
 		ID:         ulid.Make().String(),
+		UserID:     &user.ID,
 		Actor:      "user",
 		Action:     "host.repo_credentials_set",
 		TargetKind: ptr("host"),
@@ -184,6 +187,209 @@ func (s *Server) pushRepoCredsToAgent(ctx context.Context, hostID string, blob r
 	return nil
 }

+// handleGetAdminCredentials returns a redacted view of the host's admin
+// creds for UI display. 404 if no admin slot has been set yet. Operator
+// uses this to pre-fill the edit form.
+func (s *Server) handleGetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
+	if !s.authedUser(r) {
+		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
+		return
+	}
+	hostID := chi.URLParam(r, "id")
+	if hostID == "" {
+		writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
+		return
+	}
+	enc, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindAdmin)
+	if err != nil {
+		if errors.Is(err, store.ErrNotFound) {
+			writeJSONError(w, stdhttp.StatusNotFound, "not_set", "")
+			return
+		}
+		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
+		return
+	}
+	plain, err := s.deps.AEAD.Decrypt(enc, []byte("host:"+hostID+":admin"))
+	if err != nil {
+		writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
+		return
+	}
+	var blob repoCredsBlob
+	if err := json.Unmarshal(plain, &blob); err != nil {
+		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
+		return
+	}
+	writeJSON(w, stdhttp.StatusOK, hostRepoCredsView{
+		RepoURL:      blob.RepoURL,
+		RepoUsername: blob.RepoUsername,
+		HasPassword:  blob.RepoPassword != "",
+	})
+}
+
+// handleSetAdminCredentials lets an operator/admin update a host's admin
+// creds (the prune-capable slot). Same merge-then-validate semantics as
+// handleSetHostCredentials but operates on store.CredKindAdmin. After
+// persisting, pushes a config.update with Slot:"admin" over the WS if
+// the agent is connected.
+func (s *Server) handleSetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
+	user, ok := s.requireUser(r)
+	if !ok {
+		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
+		return
+	}
+	hostID := chi.URLParam(r, "id")
+	if hostID == "" {
+		writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
+		return
+	}
+	if _, err := s.deps.Store.GetHost(r.Context(), hostID); err != nil {
+		writeJSONError(w, stdhttp.StatusNotFound, "host_not_found", "")
+		return
+	}
+
+	var req hostRepoCredsRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
+		return
+	}
+
+	// Merge with the existing admin row, if any.
+	existing := repoCredsBlob{}
+	aad := []byte("host:" + hostID + ":admin")
+	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindAdmin); err == nil {
+		plain, err := s.deps.AEAD.Decrypt(cur, aad)
+		if err != nil {
+			writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
+			return
+		}
+		_ = json.Unmarshal(plain, &existing)
+	} else if !errors.Is(err, store.ErrNotFound) {
+		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
+		return
+	}
+
+	if req.RepoURL != nil {
+		existing.RepoURL = *req.RepoURL
+	}
+	if req.RepoUsername != nil {
+		existing.RepoUsername = *req.RepoUsername
+	}
+	if req.RepoPassword != nil {
+		existing.RepoPassword = *req.RepoPassword
+	}
+	if existing.RepoURL == "" || existing.RepoPassword == "" {
+		writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
+			"repo_url and repo_password must end up non-empty")
+		return
+	}
+
+	enc, err := s.encryptRepoCreds(existing, aad)
+	if err != nil {
+		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
+		return
+	}
+	if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, store.CredKindAdmin, enc); err != nil {
+		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
+		return
+	}
+
+	_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
+		ID:         ulid.Make().String(),
+		UserID:     &user.ID,
+		Actor:      "user",
+		Action:     "host.admin_credentials_set",
+		TargetKind: ptr("host"),
+		TargetID:   &hostID,
+		TS:         nowUTC(),
+	})
+
+	// Push to the agent if it's connected. Non-fatal: the next
+	// handleRunRepoPrune call will push on-demand.
+	if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
+		_ = s.pushAdminCredsToAgent(r.Context(), hostID)
+	}
+
+	w.WriteHeader(stdhttp.StatusNoContent)
+}
+
+// handleDeleteAdminCredentials removes the admin credentials row for the
+// host. Returns 204 on success, 404 if the row wasn't set. Does NOT push
+// a deletion to the agent — the agent's local admin slot stays as-is
+// until the next deployment/reinstall.
+func (s *Server) handleDeleteAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
+	user, ok := s.requireUser(r)
+	if !ok {
+		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
+		return
+	}
+	hostID := chi.URLParam(r, "id")
+	if hostID == "" {
+		writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
+		return
+	}
+
+	// Check existence first so we can 404 cleanly.
+	if _, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindAdmin); err != nil {
+		if errors.Is(err, store.ErrNotFound) {
+			writeJSONError(w, stdhttp.StatusNotFound, "not_set", "")
+			return
+		}
+		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
+		return
+	}
+
+	if err := s.deps.Store.DeleteHostCredentials(r.Context(), hostID, store.CredKindAdmin); err != nil {
+		writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
+		return
+	}
+
+	_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
+		ID:         ulid.Make().String(),
+		UserID:     &user.ID,
+		Actor:      "user",
+		Action:     "host.admin_credentials_deleted",
+		TargetKind: ptr("host"),
+		TargetID:   &hostID,
+		TS:         nowUTC(),
+	})
+
+	w.WriteHeader(stdhttp.StatusNoContent)
+}
+
+// pushAdminCredsToAgent ships the admin-slot config.update down the
+// agent's WS. Used by:
+//   - handleSetAdminCredentials (immediate push when operator saves).
+//   - handleRunRepoPrune (on-demand push right before a prune dispatch).
+//
+// Returns store.ErrNotFound if no admin row exists for the host
+// (the prune endpoint uses this to refuse with a clear message).
+func (s *Server) pushAdminCredsToAgent(ctx context.Context, hostID string) error {
+	enc, err := s.deps.Store.GetHostCredentials(ctx, hostID, store.CredKindAdmin)
+	if err != nil {
+		return err // ErrNotFound bubbles
+	}
+	plain, err := s.deps.AEAD.Decrypt(enc, []byte("host:"+hostID+":admin"))
+	if err != nil {
+		return fmt.Errorf("push admin creds: decrypt: %w", err)
+	}
+	var blob repoCredsBlob
+	if err := json.Unmarshal(plain, &blob); err != nil {
+		return fmt.Errorf("push admin creds: parse: %w", err)
+	}
+	env, err := api.Marshal(api.MsgConfigUpdate, "", api.ConfigUpdatePayload{
+		Slot:         "admin",
+		RepoURL:      blob.RepoURL,
+		RepoUsername: blob.RepoUsername,
+		RepoPassword: blob.RepoPassword,
+	})
+	if err != nil {
+		return err
+	}
+	sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+	defer cancel()
+	return s.deps.Hub.Send(sendCtx, hostID, env)
+}
+
 // onAgentHello runs synchronously inside the WS handler immediately
 // after a successful hello. It loads the host's encrypted creds (if
 // any), decrypts, and ships them down the conn as a config.update so
@@ -205,6 +411,11 @@ func (s *Server) onAgentHello(ctx context.Context, hostID string, conn *ws.Conn)
 	// just no-ops. Skipped silently when the host has no creds yet —
 	// the next hello after the operator binds creds will dispatch.
 	s.maybeAutoInit(ctx, hostID, conn)
+	// Drain any pending runs that accumulated while this host was
+	// offline. Use a fresh context — the hello-bound ctx is short-lived,
+	// and the drain may take seconds across many rows. A non-blocking
+	// goroutine keeps the hello path snappy.
+	go s.DrainPending(context.Background(), hostID)
 }

 // maybeAutoInit dispatches a `restic init` job iff the host has no
@@ -212,7 +423,7 @@ func (s *Server) onAgentHello(ctx context.Context, hostID string, conn *ws.Conn)
 // them the runner can't talk to the repo). We rely on Restic's
 // idempotent init for re-runs.
 func (s *Server) maybeAutoInit(ctx context.Context, hostID string, conn *ws.Conn) {
-	if _, err := s.deps.Store.GetHostCredentials(ctx, hostID); err != nil {
+	if _, err := s.deps.Store.GetHostCredentials(ctx, hostID, store.CredKindRepo); err != nil {
 		// No creds bound yet — operator hasn't supplied them. The next
 		// hello after creds land will pick this up.
 		return
@@ -266,7 +477,7 @@ func (s *Server) maybeAutoInit(ctx context.Context, hostID string, conn *ws.Conn
 // credentials. Silent no-op when the host has nothing on file
 // (the operator hasn't bound creds to it yet).
 func (s *Server) pushRepoCredsOnHello(ctx context.Context, hostID string, conn *ws.Conn) {
-	enc, err := s.deps.Store.GetHostCredentials(ctx, hostID)
+	enc, err := s.deps.Store.GetHostCredentials(ctx, hostID, store.CredKindRepo)
 	if err != nil {
 		if !errors.Is(err, store.ErrNotFound) {
 			slog.Warn("on-hello: load host creds", "host_id", hostID, "err", err)
@@ -5,6 +5,9 @@ import (
 	"encoding/json"
 	"testing"
 	"time"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
 )

 // TestEnrollmentTransfersRepoCreds verifies the round-trip:
@@ -57,12 +60,12 @@ func TestEnrollmentTransfersRepoCreds(t *testing.T) {
 		hostID, "host42", "linux", "amd64", "2026-01-01T00:00:00Z"); err != nil {
 		t.Fatalf("insert host: %v", err)
 	}
-	if err := st.SetHostCredentials(ctx, hostID, encForHost); err != nil {
+	if err := st.SetHostCredentials(ctx, hostID, store.CredKindRepo, encForHost); err != nil {
 		t.Fatalf("set host credentials: %v", err)
 	}

 	// host_credentials row should now hold the host-bound ciphertext.
-	got, err := st.GetHostCredentials(ctx, hostID)
+	got, err := st.GetHostCredentials(ctx, hostID, store.CredKindRepo)
 	if err != nil {
 		t.Fatalf("get host creds: %v", err)
 	}
@@ -105,3 +108,263 @@ func TestEnrollmentTokenWithoutCreds(t *testing.T) {
 		t.Errorf("token without creds should return empty blob; got %q", att.EncRepoCreds)
 	}
 }
+
+// ----- admin credentials tests ----------------------------------------
+
+// TestAdminCredentialsRoundTrip verifies set→get→delete→get (404).
+func TestAdminCredentialsRoundTrip(t *testing.T) {
+	t.Parallel()
+	srv, url, st := newTestServerWithHub(t)
+	cookie := loginAsAdmin(t, st)
+	hostID := makeHost(t, st, "admin-creds-host")
+
+	// Mark init done so auto-init doesn't interfere.
+	_ = st.CreateJob(context.Background(), store.Job{
+		ID:        "init-" + hostID,
+		HostID:    hostID,
+		Kind:      string(api.JobInit),
+		ActorKind: "system",
+		CreatedAt: time.Now().UTC(),
+	})
+
+	// GET before set → 404.
+	status, body := doJSON(t, url, "GET", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
+	if status != 404 {
+		t.Fatalf("before set: want 404, got %d body=%+v", status, body)
+	}
+
+	// PUT — set admin creds.
+	status, body = doJSON(t, url, "PUT", "/api/hosts/"+hostID+"/admin-credentials",
+		map[string]any{
+			"repo_url":      "rest:http://admin.example/host",
+			"repo_username": "admin",
+			"repo_password": "s3cur3",
+		}, cookie)
+	if status != 204 {
+		t.Fatalf("set: want 204, got %d body=%+v", status, body)
+	}
+
+	// GET — should return redacted view.
+	status, body = doJSON(t, url, "GET", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
+	if status != 200 {
+		t.Fatalf("get after set: want 200, got %d body=%+v", status, body)
+	}
+	if body["repo_url"] != "rest:http://admin.example/host" {
+		t.Errorf("repo_url: %+v", body)
+	}
+	if body["repo_username"] != "admin" {
+		t.Errorf("repo_username: %+v", body)
+	}
+	if body["has_password"] != true {
+		t.Errorf("has_password: %+v", body)
+	}
+
+	// DELETE.
+	status, _ = doJSON(t, url, "DELETE", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
+	if status != 204 {
+		t.Fatalf("delete: want 204, got %d", status)
+	}
+
+	// GET after delete → 404.
+	status, _ = doJSON(t, url, "GET", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
+	if status != 404 {
+		t.Fatalf("after delete: want 404, got %d", status)
+	}
+
+	// Extra: suppress unused import warning by actually using srv in assertion.
+	_ = srv
+}
+
+// TestAdminCredsAADIsolatedFromRepo writes a blob encrypted with the repo
+// AAD ("host:<id>") into the admin kind slot, then GETs it — the handler
+// should fail to decrypt and return 500 decrypt_failed. This proves the
+// AAD scoping is real.
+func TestAdminCredsAADIsolatedFromRepo(t *testing.T) {
+	t.Parallel()
+	srv, url, st := newTestServerWithHub(t)
+	cookie := loginAsAdmin(t, st)
+	hostID := makeHost(t, st, "aad-isolation-host")
+
+	ctx := context.Background()
+	// Encrypt with the REPO AAD (wrong for admin slot).
+	enc, err := srv.encryptRepoCreds(repoCredsBlob{
+		RepoURL:      "rest:http://r/x",
+		RepoPassword: "p",
+	}, []byte("host:"+hostID)) // wrong AAD — repo, not admin
+	if err != nil {
+		t.Fatalf("encrypt: %v", err)
+	}
+	// Write it directly into the admin kind slot.
+	if err := st.SetHostCredentials(ctx, hostID, store.CredKindAdmin, enc); err != nil {
+		t.Fatalf("set host credentials: %v", err)
+	}
+
+	// GET admin-credentials — handler decrypts with admin AAD, which
+	// is different, so decrypt must fail → 500.
+	status, body := doJSON(t, url, "GET", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
+	if status != 500 {
+		t.Fatalf("want 500 (decrypt_failed), got %d body=%+v", status, body)
+	}
+	if code, _ := body["code"].(string); code != "decrypt_failed" {
+		t.Errorf("want code=decrypt_failed, got %+v", body)
+	}
+}
+
+// TestAdminCredsPushOnSet connects a fake WS host, sets admin creds via
+// PUT, drains the conn, and asserts a config.update with Slot:"admin"
+// was shipped.
+func TestAdminCredsPushOnSet(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "admin-push-host")
+	cookie := loginAsAdmin(t, st)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "admin-push-host")
+
+	// Drain the on-hello burst (config.update for repo + schedule.set
+	// + possibly command.run(init)).
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	// Now PUT admin creds — should trigger an immediate push.
+	status, body := doJSON(t, ts.URL, "PUT", "/api/hosts/"+hostID+"/admin-credentials",
+		map[string]any{
+			"repo_url":      "rest:http://admin.example/h",
+			"repo_username": "admin",
+			"repo_password": "prune-pass",
+		}, cookie)
+	if status != 204 {
+		t.Fatalf("set admin creds: want 204, got %d body=%+v", status, body)
+	}
+
+	// Drain until we see a config.update with Slot=admin.
+	deadline := time.Now().Add(3 * time.Second)
+	found := false
+	for !found && time.Now().Before(deadline) {
+		env := readEnvelope(t, c)
+		if env.Type != api.MsgConfigUpdate {
+			continue
+		}
+		var p api.ConfigUpdatePayload
+		if err := env.UnmarshalPayload(&p); err != nil {
+			t.Fatalf("unmarshal config.update: %v", err)
+		}
+		if p.Slot == "admin" {
+			found = true
+			if p.RepoURL != "rest:http://admin.example/h" {
+				t.Errorf("admin push: wrong URL %q", p.RepoURL)
+			}
+		}
+	}
+	if !found {
+		t.Fatal("timed out waiting for config.update(slot=admin)")
+	}
+}
+
+// TestDeleteAdminCredentialsAuditLogged checks that DELETE appends an
+// audit row with action='host.admin_credentials_deleted' and that the
+// row carries the acting user's ID.
+func TestDeleteAdminCredentialsAuditLogged(t *testing.T) {
+	t.Parallel()
+	_, url, st := newTestServerWithHub(t)
+	cookie, userID := loginAsAdminWithID(t, st)
+	hostID := makeHost(t, st, "audit-del-host")
+
+	ctx := context.Background()
+
+	// Set admin creds first so there is something to delete.
+	status, body := doJSON(t, url, "PUT", "/api/hosts/"+hostID+"/admin-credentials",
+		map[string]any{
+			"repo_url":      "rest:http://x/h",
+			"repo_password": "p",
+		}, cookie)
+	if status != 204 {
+		t.Fatalf("set: want 204, got %d body=%+v", status, body)
+	}
+
+	// Delete.
+	status, _ = doJSON(t, url, "DELETE", "/api/hosts/"+hostID+"/admin-credentials", nil, cookie)
+	if status != 204 {
+		t.Fatalf("delete: want 204, got %d", status)
+	}
+
+	// Query audit_log for the delete row — action, user_id.
+	rows, err := st.DB().QueryContext(ctx,
+		`SELECT action, user_id FROM audit_log WHERE target_id = ? AND target_kind = 'host' AND action = 'host.admin_credentials_deleted'`,
+		hostID)
+	if err != nil {
+		t.Fatalf("query audit: %v", err)
+	}
+	defer rows.Close()
+
+	found := false
+	for rows.Next() {
+		var action string
+		var gotUserID *string
+		if err := rows.Scan(&action, &gotUserID); err != nil {
+			t.Fatalf("scan: %v", err)
+		}
+		found = true
+		if gotUserID == nil {
+			t.Error("audit row: user_id is NULL, want non-nil")
+		} else if *gotUserID != userID {
+			t.Errorf("audit row: user_id=%q, want %q", *gotUserID, userID)
+		}
+	}
+	if err := rows.Err(); err != nil {
+		t.Fatalf("rows: %v", err)
+	}
+	if !found {
+		t.Error("audit row with action='host.admin_credentials_deleted' not found")
+	}
+}
+
+// TestSetAdminCredentialsAuditCarriesUserID checks that PUT
+// /api/hosts/{id}/admin-credentials appends an audit row with the
+// correct action and a non-nil UserID matching the acting session.
+func TestSetAdminCredentialsAuditCarriesUserID(t *testing.T) {
+	t.Parallel()
+	_, url, st := newTestServerWithHub(t)
+	cookie, userID := loginAsAdminWithID(t, st)
+	hostID := makeHost(t, st, "audit-set-admin-host")
+
+	ctx := context.Background()
+
+	status, body := doJSON(t, url, "PUT", "/api/hosts/"+hostID+"/admin-credentials",
+		map[string]any{
+			"repo_url":      "rest:http://admin.example/h",
+			"repo_password": "s3cr3t",
+		}, cookie)
+	if status != 204 {
+		t.Fatalf("set: want 204, got %d body=%+v", status, body)
+	}
+
+	rows, err := st.DB().QueryContext(ctx,
+		`SELECT action, user_id FROM audit_log WHERE target_id = ? AND target_kind = 'host' AND action = 'host.admin_credentials_set'`,
+		hostID)
+	if err != nil {
+		t.Fatalf("query audit: %v", err)
+	}
+	defer rows.Close()
+
+	found := false
+	for rows.Next() {
+		var action string
+		var gotUserID *string
+		if err := rows.Scan(&action, &gotUserID); err != nil {
+			t.Fatalf("scan: %v", err)
+		}
+		found = true
+		if gotUserID == nil {
+			t.Error("audit row: user_id is NULL, want non-nil")
+		} else if *gotUserID != userID {
+			t.Errorf("audit row: user_id=%q, want %q", *gotUserID, userID)
+		}
+	}
+	if err := rows.Err(); err != nil {
+		t.Fatalf("rows: %v", err)
+	}
+	if !found {
+		t.Error("audit row with action='host.admin_credentials_set' not found")
+	}
+}
@@ -72,7 +72,7 @@ func (s *Server) dispatchJob(ctx context.Context, user *store.User,
 }

 // dispatchJobWithPayload is dispatchJob's variant that lets callers
-// fill in structured fields (Includes/Excludes/Tag/RetentionPolicy)
+// fill in structured fields (Includes/Excludes/Tag/ForgetGroups/RequiresAdminCreds)
 // — used by the per-source-group Run-now path. JobID is filled in
 // here; callers leave it zero on the input payload.
 func (s *Server) dispatchJobWithPayload(ctx context.Context, user *store.User,
@@ -0,0 +1,132 @@
+// maintenance_dispatch.go bridges the pure-logic maintenance.Ticker
+// (internal/server/maintenance) to the side-effecting world: checks
+// online state, builds the per-kind command.run payload, and calls
+// dispatchJobWithPayload — the same path operator-triggered Run-now
+// uses. Cadence-driven jobs are persisted with actor_kind="system"
+// (dispatchJobWithPayload tags it that way when user==nil).
+//
+// Maintenance fires deliberately do NOT queue to pending_runs when
+// the host is offline — five missed prunes on a laptop returning
+// from a week away is not what the operator wants. Skip + log; the
+// next 60s tick will re-evaluate.
+package http
+
+import (
+	"context"
+	"errors"
+	"log/slog"
+	"strconv"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/maintenance"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+// DispatchMaintenance acts on each Decision from the ticker. Offline
+// hosts are skipped (logged); prune dispatches without admin creds
+// are skipped silently (logged) — the operator hasn't completed the
+// admin-creds setup yet, and re-trying every minute would just spam
+// the logs. (Operator-triggered prune via the run-now endpoint
+// returns a clear error instead — different path, different UX.)
+func (s *Server) DispatchMaintenance(ctx context.Context, decisions []maintenance.Decision) {
+	for _, d := range decisions {
+		if !s.deps.Hub.Connected(d.HostID) {
+			slog.Info("maintenance: host offline, skipping",
+				"host_id", d.HostID, "kind", d.Kind)
+			continue
+		}
+		switch d.Kind {
+		case "forget":
+			payload, ok := s.buildForgetPayloadForHost(ctx, d.HostID)
+			if !ok {
+				slog.Info("maintenance: forget skipped — no source groups with retention",
+					"host_id", d.HostID)
+				continue
+			}
+			_, _, code, msg := s.dispatchJobWithPayload(ctx, nil, d.HostID, api.JobForget, payload)
+			if code != "" {
+				slog.Warn("maintenance: forget dispatch failed",
+					"host_id", d.HostID, "code", code, "msg", msg)
+			}
+		case "prune":
+			if _, err := s.deps.Store.GetHostCredentials(ctx, d.HostID, store.CredKindAdmin); err != nil {
+				if errors.Is(err, store.ErrNotFound) {
+					slog.Info("maintenance: prune skipped — no admin creds",
+						"host_id", d.HostID)
+					continue
+				}
+				slog.Warn("maintenance: prune skipped — admin creds error",
+					"host_id", d.HostID, "err", err)
+				continue
+			}
+			if err := s.pushAdminCredsToAgent(ctx, d.HostID); err != nil {
+				slog.Warn("maintenance: prune push admin creds failed",
+					"host_id", d.HostID, "err", err)
+				continue
+			}
+			payload := api.CommandRunPayload{RequiresAdminCreds: true}
+			_, _, code, msg := s.dispatchJobWithPayload(ctx, nil, d.HostID, api.JobPrune, payload)
+			if code != "" {
+				slog.Warn("maintenance: prune dispatch failed",
+					"host_id", d.HostID, "code", code, "msg", msg)
+			}
+		case "check":
+			payload := api.CommandRunPayload{Args: []string{strconv.Itoa(d.SubsetPct)}}
+			_, _, code, msg := s.dispatchJobWithPayload(ctx, nil, d.HostID, api.JobCheck, payload)
+			if code != "" {
+				slog.Warn("maintenance: check dispatch failed",
+					"host_id", d.HostID, "code", code, "msg", msg)
+			}
+		default:
+			slog.Warn("maintenance: unknown decision kind",
+				"host_id", d.HostID, "kind", d.Kind)
+		}
+	}
+}
+
+// buildForgetPayloadForHost collects every source group on the host
+// that has a non-empty retention policy and builds a CommandRunPayload
+// with ForgetGroups populated. Returns ok=false if the host has no
+// such groups (the dispatcher then skips this kind).
+func (s *Server) buildForgetPayloadForHost(ctx context.Context, hostID string) (api.CommandRunPayload, bool) {
+	groups, err := s.deps.Store.ListSourceGroupsByHost(ctx, hostID)
+	if err != nil {
+		slog.Warn("maintenance: list source groups failed", "host_id", hostID, "err", err)
+		return api.CommandRunPayload{}, false
+	}
+	fg := make([]api.ForgetGroup, 0, len(groups))
+	for _, g := range groups {
+		if isEmptyRetention(g.RetentionPolicy) {
+			continue
+		}
+		fg = append(fg, api.ForgetGroup{
+			Tag:    g.Name,
+			Policy: forgetPolicyJSONFromStore(g.RetentionPolicy),
+		})
+	}
+	if len(fg) == 0 {
+		return api.CommandRunPayload{}, false
+	}
+	return api.CommandRunPayload{ForgetGroups: fg}, true
+}
+
+func isEmptyRetention(p store.RetentionPolicy) bool {
+	return p.KeepLast == nil && p.KeepHourly == nil &&
+		p.KeepDaily == nil && p.KeepWeekly == nil &&
+		p.KeepMonthly == nil && p.KeepYearly == nil
+}
+
+// forgetPolicyJSONFromStore copies retention pointers from the store
+// view to the wire view. Both shapes are field-for-field identical;
+// this avoids importing store from internal/api (which would invert
+// the dependency direction).
+func forgetPolicyJSONFromStore(p store.RetentionPolicy) api.ForgetPolicyJSON {
+	return api.ForgetPolicyJSON{
+		KeepLast:    p.KeepLast,
+		KeepHourly:  p.KeepHourly,
+		KeepDaily:   p.KeepDaily,
+		KeepWeekly:  p.KeepWeekly,
+		KeepMonthly: p.KeepMonthly,
+		KeepYearly:  p.KeepYearly,
+	}
+}
@@ -0,0 +1,304 @@
+// maintenance_dispatch_test.go — exercises Server.DispatchMaintenance
+// directly (one Decision at a time). Reuses the same fake-agent
+// harness as p2r01_ws_test / repo_ops_test: a real Server with a
+// real Hub, plus a websocket connected as the host. We then push
+// Decisions through DispatchMaintenance and assert the envelopes
+// the agent receives + the job rows that land.
+package http
+
+import (
+	"context"
+	"encoding/json"
+	"testing"
+	"time"
+
+	"github.com/coder/websocket"
+	"github.com/oklog/ulid/v2"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/maintenance"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+// readNextCommandRun pulls envelopes until a command.run lands or the
+// deadline passes. Returns nil if the deadline is hit.
+func readNextCommandRun(t *testing.T, c *websocket.Conn, deadline time.Time) *api.CommandRunPayload {
+	t.Helper()
+	for time.Now().Before(deadline) {
+		ctx, cancel := context.WithTimeout(context.Background(), 600*time.Millisecond)
+		mt, raw, err := c.Read(ctx)
+		cancel()
+		if err != nil {
+			return nil
+		}
+		if mt != websocket.MessageText {
+			continue
+		}
+		var env api.Envelope
+		if err := json.Unmarshal(raw, &env); err != nil {
+			continue
+		}
+		if env.Type != api.MsgCommandRun {
+			continue
+		}
+		var p api.CommandRunPayload
+		if err := env.UnmarshalPayload(&p); err != nil {
+			continue
+		}
+		return &p
+	}
+	return nil
+}
+
+// TestDispatchMaintenanceSkipsOfflineHosts: host not connected → no
+// envelope, no job row.
+func TestDispatchMaintenanceSkipsOfflineHosts(t *testing.T) {
+	t.Parallel()
+	srv, _, st := rawTestServer(t)
+	hostID, _ := enrolHostForWS(t, srv, st, "offline-host")
+
+	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
+		{HostID: hostID, Kind: "check", SubsetPct: 10},
+	})
+
+	var n int
+	if err := st.DB().QueryRow(
+		`SELECT COUNT(*) FROM jobs WHERE host_id = ?`, hostID).Scan(&n); err != nil {
+		t.Fatalf("count: %v", err)
+	}
+	if n != 0 {
+		t.Errorf("offline host produced %d job rows; want 0", n)
+	}
+}
+
+// TestDispatchMaintenanceForgetShipsForgetGroups: connected host with
+// two source groups (one with retention, one without). Decision of
+// kind=forget → command.run with ForgetGroups containing only the
+// group that had retention.
+func TestDispatchMaintenanceForgetShipsForgetGroups(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "forget-host")
+	seedInitJob(t, st, hostID)
+
+	keep := 7
+	if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
+		ID: ulid.Make().String(), HostID: hostID, Name: "documents",
+		Includes:        []string{"/home/documents"},
+		RetentionPolicy: store.RetentionPolicy{KeepLast: &keep},
+	}); err != nil {
+		t.Fatalf("group docs: %v", err)
+	}
+	if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
+		ID: ulid.Make().String(), HostID: hostID, Name: "ephemeral",
+		Includes: []string{"/tmp"},
+	}); err != nil {
+		t.Fatalf("group eph: %v", err)
+	}
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "forget-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
+		{HostID: hostID, Kind: "forget"},
+	})
+
+	got := readNextCommandRun(t, c, time.Now().Add(2*time.Second))
+	if got == nil {
+		t.Fatal("no command.run received")
+	}
+	if got.Kind != api.JobForget {
+		t.Errorf("kind: got %q, want %q", got.Kind, api.JobForget)
+	}
+	if len(got.ForgetGroups) != 1 {
+		t.Fatalf("ForgetGroups: got %d entries (%+v), want 1", len(got.ForgetGroups), got.ForgetGroups)
+	}
+	if got.ForgetGroups[0].Tag != "documents" {
+		t.Errorf("forget group tag: got %q, want %q", got.ForgetGroups[0].Tag, "documents")
+	}
+	if got.ForgetGroups[0].Policy.KeepLast == nil || *got.ForgetGroups[0].Policy.KeepLast != 7 {
+		t.Errorf("forget group policy: got %+v", got.ForgetGroups[0].Policy)
+	}
+
+	// Job row must be persisted with actor_kind=system.
+	var actor string
+	if err := st.DB().QueryRow(
+		`SELECT actor_kind FROM jobs WHERE host_id = ? AND kind = 'forget'`, hostID).Scan(&actor); err != nil {
+		t.Fatalf("query actor_kind: %v", err)
+	}
+	if actor != "system" {
+		t.Errorf("actor_kind: got %q, want system", actor)
+	}
+}
+
+// TestDispatchMaintenanceForgetSkipsHostWithNoRetention: connected
+// host, but every source group has empty retention → no envelope.
+func TestDispatchMaintenanceForgetSkipsHostWithNoRetention(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "no-ret-host")
+	seedInitJob(t, st, hostID)
+	if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
+		ID: ulid.Make().String(), HostID: hostID, Name: "ephemeral",
+		Includes: []string{"/tmp"},
+	}); err != nil {
+		t.Fatalf("group: %v", err)
+	}
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "no-ret-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
+		{HostID: hostID, Kind: "forget"},
+	})
+
+	if got := readNextCommandRun(t, c, time.Now().Add(800*time.Millisecond)); got != nil {
+		t.Errorf("unexpected command.run: %+v", got)
+	}
+	var n int
+	if err := st.DB().QueryRow(`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'forget'`, hostID).Scan(&n); err != nil {
+		t.Fatalf("count: %v", err)
+	}
+	if n != 0 {
+		t.Errorf("forget job rows: got %d, want 0", n)
+	}
+}
+
+// TestDispatchMaintenancePruneSkipsWithoutAdminCreds: no admin creds
+// row → no envelope, no job row, silent skip.
+func TestDispatchMaintenancePruneSkipsWithoutAdminCreds(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "no-admin-host")
+	seedInitJob(t, st, hostID)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "no-admin-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
+		{HostID: hostID, Kind: "prune"},
+	})
+
+	if got := readNextCommandRun(t, c, time.Now().Add(800*time.Millisecond)); got != nil {
+		t.Errorf("unexpected command.run: %+v", got)
+	}
+	var n int
+	if err := st.DB().QueryRow(`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'prune'`, hostID).Scan(&n); err != nil {
+		t.Fatalf("count: %v", err)
+	}
+	if n != 0 {
+		t.Errorf("prune job rows: got %d, want 0", n)
+	}
+}
+
+// TestDispatchMaintenancePruneShipsConfigUpdateThenCommandRun: with
+// admin creds set, prune dispatch must push admin config.update first
+// then command.run(prune, RequiresAdminCreds=true).
+func TestDispatchMaintenancePruneShipsConfigUpdateThenCommandRun(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "prune-mt-host")
+	setAdminCreds(t, srv, st, hostID)
+	seedInitJob(t, st, hostID)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "prune-mt-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
+		{HostID: hostID, Kind: "prune"},
+	})
+
+	// Read until we've seen both config.update(slot=admin) and the
+	// prune command.run.
+	deadline := time.Now().Add(3 * time.Second)
+	var sawAdminPush bool
+	var prunePayload *api.CommandRunPayload
+	for prunePayload == nil && time.Now().Before(deadline) {
+		ctx, cancel := context.WithTimeout(context.Background(), 600*time.Millisecond)
+		mt, raw, err := c.Read(ctx)
+		cancel()
+		if err != nil {
+			break
+		}
+		if mt != websocket.MessageText {
+			continue
+		}
+		var env api.Envelope
+		if err := json.Unmarshal(raw, &env); err != nil {
+			continue
+		}
+		switch env.Type {
+		case api.MsgConfigUpdate:
+			var p api.ConfigUpdatePayload
+			if err := env.UnmarshalPayload(&p); err == nil && p.Slot == "admin" {
+				sawAdminPush = true
+			}
+		case api.MsgCommandRun:
+			var p api.CommandRunPayload
+			if err := env.UnmarshalPayload(&p); err == nil && p.Kind == api.JobPrune {
+				cp := p
+				prunePayload = &cp
+			}
+		}
+	}
+	if !sawAdminPush {
+		t.Error("expected config.update(slot=admin) before prune dispatch")
+	}
+	if prunePayload == nil {
+		t.Fatal("timed out waiting for command.run(prune)")
+	}
+	if !prunePayload.RequiresAdminCreds {
+		t.Error("prune command.run must have RequiresAdminCreds=true")
+	}
+
+	// Persisted job must be system actor.
+	var actor string
+	if err := st.DB().QueryRow(
+		`SELECT actor_kind FROM jobs WHERE host_id = ? AND kind = 'prune'`, hostID).Scan(&actor); err != nil {
+		t.Fatalf("query actor_kind: %v", err)
+	}
+	if actor != "system" {
+		t.Errorf("actor_kind: got %q, want system", actor)
+	}
+}
+
+// TestDispatchMaintenanceCheckCarriesSubset: Decision SubsetPct=15 →
+// command.run.Args == ["15"]. Job row actor_kind=system.
+func TestDispatchMaintenanceCheckCarriesSubset(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "check-mt-host")
+	seedInitJob(t, st, hostID)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "check-mt-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	srv.DispatchMaintenance(context.Background(), []maintenance.Decision{
+		{HostID: hostID, Kind: "check", SubsetPct: 15},
+	})
+
+	got := readNextCommandRun(t, c, time.Now().Add(2*time.Second))
+	if got == nil {
+		t.Fatal("no command.run received")
+	}
+	if got.Kind != api.JobCheck {
+		t.Errorf("kind: got %q, want %q", got.Kind, api.JobCheck)
+	}
+	if len(got.Args) != 1 || got.Args[0] != "15" {
+		t.Errorf("Args: got %+v, want [15]", got.Args)
+	}
+
+	var actor string
+	if err := st.DB().QueryRow(
+		`SELECT actor_kind FROM jobs WHERE host_id = ? AND kind = 'check'`, hostID).Scan(&actor); err != nil {
+		t.Fatalf("query actor_kind: %v", err)
+	}
+	if actor != "system" {
+		t.Errorf("actor_kind: got %q, want system", actor)
+	}
+}
@@ -47,6 +47,32 @@ func loginAsAdmin(t *testing.T, st *store.Store) *stdhttp.Cookie {
 	return &stdhttp.Cookie{Name: sessionCookieName, Value: tok}
 }

+// loginAsAdminWithID is like loginAsAdmin but also returns the user ID.
+// Use this when tests need to assert that the user ID was recorded
+// (e.g. on audit entries).
+func loginAsAdminWithID(t *testing.T, st *store.Store) (*stdhttp.Cookie, string) {
+	t.Helper()
+	ctx := context.Background()
+	uid := ulid.Make().String()
+	hash, _ := auth.HashPassword("very-long-test-password")
+	if err := st.CreateUser(ctx, store.User{
+		ID: uid, Username: "tester-" + uid[:6],
+		PasswordHash: hash, Role: store.RoleAdmin,
+		CreatedAt: time.Now().UTC(),
+	}); err != nil {
+		t.Fatalf("create user: %v", err)
+	}
+	tok, _ := auth.NewToken()
+	if err := st.CreateSession(ctx, store.Session{
+		UserID:    uid,
+		CreatedAt: time.Now().UTC(),
+		ExpiresAt: time.Now().Add(time.Hour).UTC(),
+	}, auth.HashToken(tok)); err != nil {
+		t.Fatalf("create session: %v", err)
+	}
+	return &stdhttp.Cookie{Name: sessionCookieName, Value: tok}, uid
+}
+
 // makeHost inserts a minimal Host row directly via the store. Used by
 // HTTP-level tests that don't want to go through the full enrollment
 // path. Returns the host id.
@@ -99,7 +99,7 @@ func enrolHostForWS(t *testing.T, srv *Server, st *store.Store, name string) (ho
 	if err != nil {
 		t.Fatalf("encrypt: %v", err)
 	}
-	if err := st.SetHostCredentials(context.Background(), hostID, enc); err != nil {
+	if err := st.SetHostCredentials(context.Background(), hostID, store.CredKindRepo, enc); err != nil {
 		t.Fatalf("set creds: %v", err)
 	}
 	return hostID, token
@@ -0,0 +1,209 @@
+// pending_drain.go — drains pending_runs rows that are due (or, on
+// agent reconnect, every row for that host).
+//
+// Two trigger paths:
+//  1. The 30s tick in cmd/server (DrainAllDue) — sweeps every host
+//     with rows whose next_attempt_at <= now.
+//  2. onAgentHello (DrainPending(hostID)) — when a host comes back,
+//     walk all of its pending rows synchronously so the operator
+//     sees the queue drain promptly.
+package http
+
+import (
+	"context"
+	"errors"
+	"log/slog"
+	"sync"
+	"time"
+
+	"github.com/oklog/ulid/v2"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+const (
+	pendingDrainBatchLimit = 100
+	pendingDrainBackoffMax = 30 * time.Minute
+)
+
+// DrainPending re-dispatches every pending_runs row for hostID. The
+// host must already be connected (caller's responsibility — typically
+// onAgentHello). Each row's source group + schedule are loaded; if
+// either is gone the row is dropped (audit-logged as abandoned). If
+// the row's attempt count meets/exceeds the group's retry_max, the
+// row is dropped (audit-logged as abandoned). Otherwise we attempt
+// dispatch; success deletes the row, failure bumps the attempt and
+// reschedules with exponential backoff.
+//
+// A per-host mutex (hostDrainMutex) ensures that the on-hello goroutine
+// and the 30s tick cannot process the same host concurrently. If a drain
+// is already in-flight for this host, the call returns immediately — the
+// running drain will see any rows we'd have processed.
+func (s *Server) DrainPending(ctx context.Context, hostID string) {
+	mu := s.hostDrainMutex(hostID)
+	if !mu.TryLock() {
+		return
+	}
+	defer mu.Unlock()
+
+	runs, err := s.deps.Store.ListPendingRunsForHost(ctx, hostID)
+	if err != nil {
+		slog.Warn("drain pending: list", "host_id", hostID, "err", err)
+		return
+	}
+	if len(runs) == 0 {
+		return
+	}
+	conn := s.deps.Hub.Conn(hostID)
+	if conn == nil {
+		// Host went offline between the connectedness check and now.
+		// Skip — next tick or next reconnect will retry.
+		return
+	}
+	for _, p := range runs {
+		s.drainOne(ctx, conn, p)
+	}
+}
+
+// drainOne handles a single pending row. Refactored out so DrainPending
+// reads cleanly. Side-effects: delete, bump, audit, dispatch — all
+// per-row.
+func (s *Server) drainOne(ctx context.Context, conn *ws.Conn, p store.PendingRun) {
+	sc, err := s.deps.Store.GetSchedule(ctx, p.HostID, p.ScheduleID)
+	if err != nil {
+		if errors.Is(err, store.ErrNotFound) {
+			s.abandonPending(ctx, p, "schedule gone")
+			return
+		}
+		slog.Warn("drain pending: load schedule",
+			"host_id", p.HostID, "schedule_id", p.ScheduleID, "err", err)
+		return
+	}
+	if !sc.Enabled {
+		s.abandonPending(ctx, p, "schedule disabled")
+		return
+	}
+	g, err := s.deps.Store.GetSourceGroup(ctx, p.HostID, p.SourceGroupID)
+	if err != nil {
+		if errors.Is(err, store.ErrNotFound) {
+			s.abandonPending(ctx, p, "source group gone")
+		} else {
+			slog.Warn("drain pending: load source group",
+				"host_id", p.HostID, "group_id", p.SourceGroupID, "err", err)
+		}
+		return
+	}
+	if g.RetryMax > 0 && p.Attempt >= g.RetryMax {
+		s.abandonPending(ctx, p, "retry_max exceeded")
+		return
+	}
+	// Calls dispatchBackupForGroupCore (not dispatchBackupForGroup) so a
+	// failed Send doesn't double-enqueue: dispatchBackupForGroup's
+	// enqueue-on-failure path would create a NEW pending_runs row while
+	// this function already bumps the EXISTING row via
+	// BumpPendingRunAttempt, producing geometric duplicates on repeated
+	// failures.
+	jobID, _ := s.dispatchBackupForGroupCore(ctx, conn, p.HostID, p.ScheduleID, g, p.ScheduledAt)
+	if jobID == "" {
+		// Send failed again. Bump attempt with exponential backoff.
+		// Exponential backoff doubles immediately on the first drain
+		// retry: enqueue at base, attempt=1 → drain → 2*base, attempt=2 →
+		// drain → 4*base, etc. Capped at pendingDrainBackoffMax. With
+		// defaults (60s base, retry_max=3) the schedule is 60→120→240s.
+		baseBackoff := time.Duration(g.RetryBackoffSeconds) * time.Second
+		if baseBackoff <= 0 {
+			baseBackoff = 60 * time.Second
+		}
+		backoff := baseBackoff
+		for i := 0; i < p.Attempt; i++ {
+			backoff *= 2
+			if backoff >= pendingDrainBackoffMax {
+				backoff = pendingDrainBackoffMax
+				break
+			}
+		}
+		next := time.Now().UTC().Add(backoff)
+		if err := s.deps.Store.BumpPendingRunAttempt(ctx, p.ID, next, "drain dispatch failed"); err != nil {
+			slog.Warn("drain pending: bump", "host_id", p.HostID, "id", p.ID, "err", err)
+		}
+		return
+	}
+	// Success — drop the pending row.
+	if err := s.deps.Store.DeletePendingRun(ctx, p.ID); err != nil {
+		slog.Warn("drain pending: delete after dispatch", "host_id", p.HostID, "id", p.ID, "err", err)
+	}
+	slog.Info("drain pending: dispatched",
+		"host_id", p.HostID, "schedule_id", p.ScheduleID, "group", g.Name,
+		"attempt", p.Attempt, "job_id", jobID)
+}
+
+// abandonPending deletes the row and records an audit entry. The row
+// is gone but the audit trail preserves the forensic record of why.
+func (s *Server) abandonPending(ctx context.Context, p store.PendingRun, reason string) {
+	slog.Info("drain pending: abandoning",
+		"host_id", p.HostID, "schedule_id", p.ScheduleID,
+		"attempt", p.Attempt, "reason", reason)
+	scheduleID := p.ScheduleID
+	if err := s.deps.Store.AppendAudit(ctx, store.AuditEntry{
+		ID:         ulid.Make().String(),
+		Actor:      "system",
+		Action:     "pending_run.abandoned",
+		TargetKind: ptr("schedule"),
+		TargetID:   &scheduleID,
+		TS:         time.Now().UTC(),
+	}); err != nil {
+		slog.Warn("drain pending: audit on abandon", "id", p.ID, "err", err)
+	}
+	if err := s.deps.Store.DeletePendingRun(ctx, p.ID); err != nil {
+		slog.Warn("drain pending: delete on abandon", "id", p.ID, "err", err)
+	}
+}
+
+// hostDrainMutex returns the per-host mutex for DrainPending,
+// creating it on first request. The map is guarded by drainLocksMu.
+// Mutex objects are never deleted from the map — there are at most
+// len(hosts) entries, which is bounded by the fleet size.
+func (s *Server) hostDrainMutex(hostID string) *sync.Mutex {
+	s.drainLocksMu.Lock()
+	defer s.drainLocksMu.Unlock()
+	if s.drainLocks == nil {
+		s.drainLocks = make(map[string]*sync.Mutex)
+	}
+	mu, ok := s.drainLocks[hostID]
+	if !ok {
+		mu = &sync.Mutex{}
+		s.drainLocks[hostID] = mu
+	}
+	return mu
+}
+
+// DrainAllDue is the 30s-ticker entrypoint. Walks rows whose
+// next_attempt_at <= now (DuePendingRuns), dedupes by host, and calls
+// DrainPending per host. The DrainPending then re-walks the host's
+// rows (same DB hit as the dedupe iteration would have done — keeps
+// the per-host concurrency model simple).
+func (s *Server) DrainAllDue(ctx context.Context) {
+	if s.deps.Hub == nil {
+		return
+	}
+	due, err := s.deps.Store.DuePendingRuns(ctx, time.Now().UTC(), pendingDrainBatchLimit)
+	if err != nil {
+		slog.Warn("drain all due: list", "err", err)
+		return
+	}
+	if len(due) == 0 {
+		return
+	}
+	seen := make(map[string]struct{}, len(due))
+	for _, p := range due {
+		if _, ok := seen[p.HostID]; ok {
+			continue
+		}
+		seen[p.HostID] = struct{}{}
+		if !s.deps.Hub.Connected(p.HostID) {
+			continue
+		}
+		s.DrainPending(ctx, p.HostID)
+	}
+}
@@ -0,0 +1,572 @@
+// pending_drain_test.go — covers DrainPending / DrainAllDue and the
+// onAgentHello goroutine spawn that drains a freshly-reconnected
+// host's queue.
+package http
+
+import (
+	"context"
+	"encoding/json"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/coder/websocket"
+	"github.com/oklog/ulid/v2"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+// seedSchedAndGroup wires up a host with one source group + one
+// schedule pointing at it. Returns (groupID, scheduleID).
+func seedSchedAndGroup(t *testing.T, st *store.Store, hostID string, retryMax int) (string, string) {
+	t.Helper()
+	gid := ulid.Make().String()
+	if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
+		ID: gid, HostID: hostID, Name: "default",
+		Includes: []string{"/etc"},
+		RetryMax: retryMax, RetryBackoffSeconds: 60,
+	}); err != nil {
+		t.Fatalf("create group: %v", err)
+	}
+	sid := ulid.Make().String()
+	if err := st.CreateSchedule(context.Background(), &store.Schedule{
+		ID: sid, HostID: hostID,
+		CronExpr: "0 3 * * *", Enabled: true,
+		SourceGroupIDs: []string{gid},
+	}); err != nil {
+		t.Fatalf("create schedule: %v", err)
+	}
+	// Mark a successful init job so auto-init doesn't pollute reads.
+	if err := st.CreateJob(context.Background(), store.Job{
+		ID: ulid.Make().String(), HostID: hostID, Kind: "init",
+		ActorKind: "system", CreatedAt: time.Now().UTC(),
+	}); err != nil {
+		t.Fatalf("seed init: %v", err)
+	}
+	return gid, sid
+}
+
+// countPendingForHost returns the number of pending_runs rows for hostID.
+func countPendingForHost(t *testing.T, st *store.Store, hostID string) int {
+	t.Helper()
+	var n int
+	if err := st.DB().QueryRow(
+		`SELECT COUNT(*) FROM pending_runs WHERE host_id = ?`, hostID).Scan(&n); err != nil {
+		t.Fatalf("count pending: %v", err)
+	}
+	return n
+}
+
+// waitForPendingCount polls until the pending_runs count for hostID
+// reaches wantN or the deadline expires. Use this instead of calling
+// DrainPending synchronously when the test relies on the on-hello
+// goroutine (which holds the per-host drain mutex) to process rows.
+func waitForPendingCount(t *testing.T, st *store.Store, hostID string, wantN int, timeout time.Duration) {
+	t.Helper()
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
+		if countPendingForHost(t, st, hostID) == wantN {
+			return
+		}
+		time.Sleep(20 * time.Millisecond)
+	}
+	t.Errorf("pending count for host %s: want %d after %v, got %d",
+		hostID, wantN, timeout, countPendingForHost(t, st, hostID))
+}
+
+// countAuditAction returns the number of audit_log rows with the given action.
+func countAuditAction(t *testing.T, st *store.Store, action string) int {
+	t.Helper()
+	var n int
+	if err := st.DB().QueryRow(
+		`SELECT COUNT(*) FROM audit_log WHERE action = ?`, action).Scan(&n); err != nil {
+		t.Fatalf("count audit: %v", err)
+	}
+	return n
+}
+
+func TestDrainPendingDispatchesOnReconnect(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "drain-host")
+	gid, sid := seedSchedAndGroup(t, st, hostID, 5)
+
+	// Pre-insert a pending row that's already due. The on-hello
+	// goroutine should drain it after we connect.
+	pendingID := ulid.Make().String()
+	now := time.Now().UTC()
+	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
+		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
+		Attempt: 1, NextAttemptAt: now.Add(-time.Second),
+		ScheduledAt: now.Add(-time.Minute),
+	}); err != nil {
+		t.Fatalf("enqueue: %v", err)
+	}
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "drain-host")
+
+	// Walk envelopes looking for a backup command.run carrying the
+	// group's includes.
+	var got *api.CommandRunPayload
+	deadline := time.Now().Add(3 * time.Second)
+	for time.Now().Before(deadline) {
+		ctx, cancel := context.WithTimeout(context.Background(), 800*time.Millisecond)
+		mt, raw, err := c.Read(ctx)
+		cancel()
+		if err != nil {
+			break
+		}
+		if mt != websocket.MessageText {
+			continue
+		}
+		var env api.Envelope
+		if err := json.Unmarshal(raw, &env); err != nil {
+			continue
+		}
+		if env.Type != api.MsgCommandRun {
+			continue
+		}
+		var p api.CommandRunPayload
+		_ = env.UnmarshalPayload(&p)
+		if p.Kind == api.JobBackup {
+			got = &p
+			break
+		}
+	}
+	if got == nil {
+		t.Fatalf("no backup command.run dispatched after reconnect drain")
+	}
+	if !equalStrings(got.Includes, []string{"/etc"}) {
+		t.Errorf("backup includes: %v", got.Includes)
+	}
+	if got.Tag != "default" {
+		t.Errorf("backup tag: %q", got.Tag)
+	}
+
+	// Pending row should be gone. Poll briefly: the drain goroutine
+	// sends command.run via conn.Send and only then calls
+	// DeletePendingRun. Reading the envelope off the wire above proves
+	// the send happened, but the delete runs after that on the drain
+	// goroutine — small window where the count is still 1.
+	waitForPendingCount(t, st, hostID, 0, 2*time.Second)
+	if n := countPendingForHost(t, st, hostID); n != 0 {
+		t.Errorf("pending rows after drain: got %d, want 0", n)
+	}
+
+	// One backup job row landed (in addition to the seeded init).
+	var n int
+	_ = st.DB().QueryRow(
+		`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup' AND actor_kind = 'schedule'`,
+		hostID).Scan(&n)
+	if n != 1 {
+		t.Errorf("backup job rows: got %d, want 1", n)
+	}
+}
+
+func TestDrainPendingAbandonsOnRetryMax(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "abandon-retry-host")
+	gid, sid := seedSchedAndGroup(t, st, hostID, 2)
+
+	pendingID := ulid.Make().String()
+	now := time.Now().UTC()
+	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
+		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
+		Attempt: 2, NextAttemptAt: now.Add(-time.Second),
+		ScheduledAt: now.Add(-time.Minute),
+	}); err != nil {
+		t.Fatalf("enqueue: %v", err)
+	}
+
+	auditBefore := countAuditAction(t, st, "pending_run.abandoned")
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "abandon-retry-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	// The on-hello goroutine processes the row (retry_max exceeded → abandon).
+	// Wait for it to finish rather than calling DrainPending directly, which
+	// would be a no-op while the goroutine holds the per-host drain mutex.
+	_ = connFromHub(t, srv, hostID) // ensure hub registration
+	waitForPendingCount(t, st, hostID, 0, 2*time.Second)
+
+	if n := countPendingForHost(t, st, hostID); n != 0 {
+		t.Errorf("pending rows after abandon: got %d, want 0", n)
+	}
+	if d := countAuditAction(t, st, "pending_run.abandoned") - auditBefore; d != 1 {
+		t.Errorf("audit pending_run.abandoned delta: got %d, want 1", d)
+	}
+	// No backup command.run should have been sent.
+	deadline := time.Now().Add(400 * time.Millisecond)
+	for time.Now().Before(deadline) {
+		ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
+		mt, raw, err := c.Read(ctx)
+		cancel()
+		if err != nil {
+			break
+		}
+		if mt != websocket.MessageText {
+			continue
+		}
+		var env api.Envelope
+		_ = json.Unmarshal(raw, &env)
+		if env.Type == api.MsgCommandRun {
+			var p api.CommandRunPayload
+			_ = env.UnmarshalPayload(&p)
+			if p.Kind == api.JobBackup {
+				t.Fatalf("abandoned row still dispatched a backup: %+v", p)
+			}
+		}
+	}
+	// No backup job row.
+	var n int
+	_ = st.DB().QueryRow(
+		`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup'`,
+		hostID).Scan(&n)
+	if n != 0 {
+		t.Errorf("abandon path created a backup job: %d rows", n)
+	}
+}
+
+func TestDrainPendingBumpsOnSendFailure(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "bump-host")
+	gid, sid := seedSchedAndGroup(t, st, hostID, 5)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "bump-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	// Capture the conn before closing the client side. Hub.Conn still
+	// returns it after the client-side close — the server's Unregister
+	// fires when its read loop sees the close, but the conn ptr remains
+	// valid; subsequent Sends just fail.
+	conn := connFromHub(t, srv, hostID)
+	if conn == nil {
+		t.Fatal("conn never registered")
+	}
+
+	// Insert the pending row AFTER the on-hello drain goroutine has
+	// already scanned (an empty list) — otherwise we race the on-hello
+	// drain dispatching the row over the still-live socket.
+	pendingID := ulid.Make().String()
+	now := time.Now().UTC()
+
+	if err := c.Close(websocket.StatusNormalClosure, "test"); err != nil {
+		t.Fatalf("close: %v", err)
+	}
+	// Brief settle so the close is observed by the server's read loop.
+	time.Sleep(150 * time.Millisecond)
+
+	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
+		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
+		Attempt: 1, NextAttemptAt: now.Add(-time.Second),
+		ScheduledAt: now.Add(-time.Minute),
+	}); err != nil {
+		t.Fatalf("enqueue: %v", err)
+	}
+
+	// DrainPending uses Hub.Conn(hostID); after the client close the
+	// server may have unregistered already. Call drainOne directly
+	// against the captured conn so we deterministically exercise the
+	// "Send fails" branch rather than the "host gone" branch.
+	srv.drainOne(context.Background(), conn, store.PendingRun{
+		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
+		Attempt: 1, NextAttemptAt: now.Add(-time.Second), ScheduledAt: now.Add(-time.Minute),
+	})
+
+	// The original row must be bumped to attempt=2 with a non-empty
+	// last_error. Critically, NO duplicate row should have been created:
+	// drainOne calls dispatchBackupForGroupCore (not dispatchBackupForGroup)
+	// so the enqueue-on-failure path is bypassed and the count stays at 1.
+	if n := countPendingForHost(t, st, hostID); n != 1 {
+		t.Errorf("pending rows after send failure: got %d, want 1 (no duplicate enqueue)", n)
+	}
+	var attempt int
+	var lastErr string
+	if err := st.DB().QueryRow(
+		`SELECT attempt, COALESCE(last_error,'') FROM pending_runs WHERE id = ?`,
+		pendingID).Scan(&attempt, &lastErr); err != nil {
+		t.Fatalf("scan original row: %v", err)
+	}
+	if attempt != 2 {
+		t.Errorf("attempt after bump: got %d, want 2", attempt)
+	}
+	if lastErr == "" {
+		t.Errorf("last_error empty after bump")
+	}
+}
+
+func TestDrainPendingDropsRowsForGoneSchedule(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "gone-sched-host")
+	gid, sid := seedSchedAndGroup(t, st, hostID, 5)
+
+	pendingID := ulid.Make().String()
+	now := time.Now().UTC()
+	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
+		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
+		Attempt: 1, NextAttemptAt: now.Add(-time.Second),
+		ScheduledAt: now.Add(-time.Minute),
+	}); err != nil {
+		t.Fatalf("enqueue: %v", err)
+	}
+
+	// Disable the schedule. (Deleting it would FK-cascade-delete the
+	// pending_runs row out from under the drainer, which is fine for
+	// production but defeats the point of the test. The
+	// disabled-schedule path goes through the same abandonPending code,
+	// so it's an equivalent assertion.)
+	if _, err := st.DB().Exec(
+		`UPDATE schedules SET enabled = 0 WHERE id = ?`, sid); err != nil {
+		t.Fatalf("disable schedule: %v", err)
+	}
+
+	auditBefore := countAuditAction(t, st, "pending_run.abandoned")
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "gone-sched-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	// The on-hello goroutine processes the row (disabled schedule → abandon).
+	// Poll for completion instead of calling DrainPending, which would return
+	// immediately while the goroutine holds the per-host drain mutex.
+	waitForPendingCount(t, st, hostID, 0, 2*time.Second)
+
+	if n := countPendingForHost(t, st, hostID); n != 0 {
+		t.Errorf("pending rows after schedule-gone abandon: got %d, want 0", n)
+	}
+	if d := countAuditAction(t, st, "pending_run.abandoned") - auditBefore; d != 1 {
+		t.Errorf("audit delta: got %d, want 1", d)
+	}
+	// Drain produced no backup envelope.
+	deadline := time.Now().Add(400 * time.Millisecond)
+	for time.Now().Before(deadline) {
+		ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
+		mt, raw, err := c.Read(ctx)
+		cancel()
+		if err != nil {
+			break
+		}
+		if mt != websocket.MessageText {
+			continue
+		}
+		var env api.Envelope
+		_ = json.Unmarshal(raw, &env)
+		if env.Type == api.MsgCommandRun {
+			var p api.CommandRunPayload
+			_ = env.UnmarshalPayload(&p)
+			if p.Kind == api.JobBackup {
+				t.Fatalf("gone-schedule abandon still dispatched: %+v", p)
+			}
+		}
+	}
+}
+
+// TestDrainPendingDropsRowsForGoneSourceGroup verifies that when a
+// source group is gone (ErrNotFound) the pending row is abandoned and
+// an audit entry is written. Transient-error paths (SQLITE_BUSY,
+// context cancellation) are not covered here because the real *Store
+// doesn't expose a fault-injection seam; the code-review check above
+// is the gate for that path.
+func TestDrainPendingDropsRowsForGoneSourceGroup(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "gone-group-host")
+	_, sid := seedSchedAndGroup(t, st, hostID, 5)
+
+	// Use a source_group_id that never existed. pending_runs carries a
+	// FK to source_groups, so we must bypass FK enforcement for this
+	// insert. PRAGMA foreign_keys is connection-scoped and can only be
+	// changed outside a transaction; DB().Exec runs on an arbitrary
+	// pooled connection, so we pin it with a dedicated *sql.Conn.
+	fakeGroupID := ulid.Make().String()
+	pendingID := ulid.Make().String()
+	now := time.Now().UTC()
+	conn, err := st.DB().Conn(context.Background())
+	if err != nil {
+		t.Fatalf("db conn: %v", err)
+	}
+	defer conn.Close()
+	if _, err := conn.ExecContext(context.Background(), `PRAGMA foreign_keys = OFF`); err != nil {
+		t.Fatalf("fk off: %v", err)
+	}
+	if _, err := conn.ExecContext(context.Background(),
+		`INSERT INTO pending_runs (id, schedule_id, source_group_id, host_id, attempt, next_attempt_at, scheduled_at)
+		 VALUES (?, ?, ?, ?, 1, ?, ?)`,
+		pendingID, sid, fakeGroupID, hostID,
+		now.Add(-time.Second), now.Add(-time.Minute),
+	); err != nil {
+		t.Fatalf("insert pending: %v", err)
+	}
+	if _, err := conn.ExecContext(context.Background(), `PRAGMA foreign_keys = ON`); err != nil {
+		t.Fatalf("fk on: %v", err)
+	}
+
+	auditBefore := countAuditAction(t, st, "pending_run.abandoned")
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "gone-group-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	// The on-hello goroutine processes the row (source group gone → abandon).
+	// Poll for completion instead of calling DrainPending, which would return
+	// immediately while the goroutine holds the per-host drain mutex.
+	waitForPendingCount(t, st, hostID, 0, 2*time.Second)
+
+	if n := countPendingForHost(t, st, hostID); n != 0 {
+		t.Errorf("pending rows after source-group-gone abandon: got %d, want 0", n)
+	}
+	if d := countAuditAction(t, st, "pending_run.abandoned") - auditBefore; d != 1 {
+		t.Errorf("audit delta: got %d, want 1", d)
+	}
+}
+
+func TestDrainAllDueSkipsOfflineHosts(t *testing.T) {
+	t.Parallel()
+	srv, _, st := rawTestServer(t)
+	// Don't dial — host is enrolled but never connected.
+	hostID, _ := enrolHostForWS(t, srv, st, "offline-host")
+	gid, sid := seedSchedAndGroup(t, st, hostID, 5)
+
+	pendingID := ulid.Make().String()
+	now := time.Now().UTC()
+	if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
+		ID: pendingID, ScheduleID: sid, SourceGroupID: gid, HostID: hostID,
+		Attempt: 1, NextAttemptAt: now.Add(-time.Second),
+		ScheduledAt: now.Add(-time.Minute),
+	}); err != nil {
+		t.Fatalf("enqueue: %v", err)
+	}
+
+	auditBefore := countAuditAction(t, st, "pending_run.abandoned")
+
+	srv.DrainAllDue(context.Background())
+
+	// Row still there (host offline, drainer skips).
+	if n := countPendingForHost(t, st, hostID); n != 1 {
+		t.Errorf("pending rows after DrainAllDue against offline host: got %d, want 1", n)
+	}
+	if d := countAuditAction(t, st, "pending_run.abandoned") - auditBefore; d != 0 {
+		t.Errorf("audit unexpectedly changed: delta %d", d)
+	}
+}
+
+func TestEnqueueOnDispatchFailure(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "enqueue-host")
+	_, sid := seedSchedAndGroup(t, st, hostID, 5)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "enqueue-host")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	conn := connFromHub(t, srv, hostID)
+	_ = conn
+
+	// Close the client side so the server's next Send errors.
+	if err := c.Close(websocket.StatusNormalClosure, "test"); err != nil {
+		t.Fatalf("close: %v", err)
+	}
+	time.Sleep(100 * time.Millisecond)
+
+	scheduledAt := time.Now().UTC().Add(-30 * time.Second)
+	srv.dispatchScheduledJob(context.Background(), hostID, conn, sid, scheduledAt)
+
+	// One pending row should have been enqueued (attempt=1) with the
+	// scheduled_at preserved.
+	rows, err := st.ListPendingRunsForHost(context.Background(), hostID)
+	if err != nil {
+		t.Fatalf("list: %v", err)
+	}
+	if len(rows) != 1 {
+		t.Fatalf("pending rows: got %d, want 1", len(rows))
+	}
+	if rows[0].Attempt != 1 {
+		t.Errorf("attempt: got %d, want 1", rows[0].Attempt)
+	}
+	// scheduled_at preserved (within RFC3339Nano round-trip tolerance).
+	if rows[0].ScheduledAt.Sub(scheduledAt).Abs() > time.Microsecond {
+		t.Errorf("scheduled_at drift: %v vs %v", rows[0].ScheduledAt, scheduledAt)
+	}
+	if rows[0].LastError == "" {
+		t.Errorf("last_error empty")
+	}
+}
+
+// TestDrainPendingSerializesPerHost verifies that concurrent DrainPending
+// calls for the same host do not double-dispatch pending rows. The per-host
+// mutex (TryLock semantics) means exactly one drain processes each row.
+func TestDrainPendingSerializesPerHost(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "serialize-host")
+	gid, sid := seedSchedAndGroup(t, st, hostID, 10)
+
+	// Connect the agent so DrainPending can dispatch.
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "serialize-host")
+	// Drain the on-hello goroutine's pass first (no pending rows yet),
+	// then wait for the schedule.set so the connection is fully settled.
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	// Insert 5 pending rows now that the on-hello drain has already run.
+	now := time.Now().UTC()
+	for i := range 5 {
+		pid := ulid.Make().String()
+		if err := st.EnqueuePendingRun(context.Background(), &store.PendingRun{
+			ID:            pid,
+			ScheduleID:    sid,
+			SourceGroupID: gid,
+			HostID:        hostID,
+			Attempt:       1,
+			NextAttemptAt: now.Add(-time.Second),
+			ScheduledAt:   now.Add(-time.Duration(i+1) * time.Minute),
+		}); err != nil {
+			t.Fatalf("enqueue row %d: %v", i, err)
+		}
+	}
+
+	// Spawn 10 goroutines all calling DrainPending concurrently.
+	var wg sync.WaitGroup
+	for range 10 {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			srv.DrainPending(context.Background(), hostID)
+		}()
+	}
+	wg.Wait()
+
+	// Drain any envelopes the agent received so we don't block below.
+	// We read with short timeouts and stop when the connection goes quiet.
+	drainDeadline := time.Now().Add(500 * time.Millisecond)
+	for time.Now().Before(drainDeadline) {
+		ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
+		_, _, err := c.Read(ctx)
+		cancel()
+		if err != nil {
+			break
+		}
+	}
+
+	// All 5 pending rows must be gone.
+	if n := countPendingForHost(t, st, hostID); n != 0 {
+		t.Errorf("pending rows after concurrent drain: got %d, want 0", n)
+	}
+
+	// Exactly 5 backup job rows (one per pending row), not 10+ from a race.
+	var n int
+	_ = st.DB().QueryRow(
+		`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup' AND actor_kind = 'schedule'`,
+		hostID).Scan(&n)
+	if n != 5 {
+		t.Errorf("backup job rows: got %d, want 5 (per-host mutex must prevent double-dispatch)", n)
+	}
+}
@@ -0,0 +1,165 @@
+// repo_ops.go — operator-triggered Run-now for repo-level operations:
+// prune, check, unlock. Backed by the same dispatchJobWithPayload
+// pipeline as backup, with an extra step for prune: push admin creds
+// first if they're set, refuse loudly if they aren't.
+package http
+
+import (
+	"errors"
+	"log/slog"
+	stdhttp "net/http"
+	"strconv"
+
+	"github.com/go-chi/chi/v5"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+// handleRunRepoPrune — POST /api/hosts/{id}/repo/prune (and the HTMX
+// twin outside /api). Pushes the host's admin credentials down the WS,
+// then dispatches a prune command.run with RequiresAdminCreds=true.
+func (s *Server) handleRunRepoPrune(w stdhttp.ResponseWriter, r *stdhttp.Request) {
+	user, ok := s.requireUser(r)
+	if !ok {
+		if wantsHTML(r) {
+			stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
+			return
+		}
+		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
+		return
+	}
+	hostID := chi.URLParam(r, "id")
+	if hostID == "" {
+		s.runOpError(w, r, stdhttp.StatusBadRequest, "missing_id", "")
+		return
+	}
+
+	// Push admin creds first. ErrNotFound → operator hasn't set them
+	// yet. Other errors → likely the host is offline or a decrypt fail.
+	if err := s.pushAdminCredsToAgent(r.Context(), hostID); err != nil {
+		if errors.Is(err, store.ErrNotFound) {
+			s.runOpError(w, r, stdhttp.StatusBadRequest, "admin_creds_required",
+				"set admin credentials on the Repo page before running prune")
+			return
+		}
+		// Hub.Send failure (offline) or decrypt failure — surface a
+		// generic offline message so the operator retries when the
+		// agent is back.
+		slog.Warn("prune: push admin creds failed", "host_id", hostID, "err", err)
+		s.runOpError(w, r, stdhttp.StatusServiceUnavailable, "host_offline",
+			"agent is not currently connected; try again when it reconnects")
+		return
+	}
+
+	res, status, code, msg := s.dispatchJobWithPayload(r.Context(), user, hostID, api.JobPrune,
+		api.CommandRunPayload{RequiresAdminCreds: true})
+	if code != "" {
+		s.runOpError(w, r, status, code, msg)
+		return
+	}
+	s.runOpRedirect(w, r, res)
+}
+
+// handleRunRepoCheck — POST /api/hosts/{id}/repo/check. Pulls
+// check_subset_pct from host_repo_maintenance for the host (operator
+// can override via ?subset=N query param, clamped 0..100). Dispatches
+// with the chosen subset in CommandRunPayload.Args[0].
+func (s *Server) handleRunRepoCheck(w stdhttp.ResponseWriter, r *stdhttp.Request) {
+	user, ok := s.requireUser(r)
+	if !ok {
+		if wantsHTML(r) {
+			stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
+			return
+		}
+		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
+		return
+	}
+	hostID := chi.URLParam(r, "id")
+	if hostID == "" {
+		s.runOpError(w, r, stdhttp.StatusBadRequest, "missing_id", "")
+		return
+	}
+
+	m, err := s.deps.Store.GetRepoMaintenance(r.Context(), hostID)
+	if err != nil {
+		if errors.Is(err, store.ErrNotFound) {
+			// Maintenance row should auto-seed at enrollment. If it's
+			// missing, surface a clear error rather than guessing 0%.
+			s.runOpError(w, r, stdhttp.StatusInternalServerError, "no_maintenance_row",
+				"host has no repo-maintenance config; was the host fully enrolled?")
+			return
+		}
+		s.runOpError(w, r, stdhttp.StatusInternalServerError, "internal", "")
+		return
+	}
+	subset := m.CheckSubsetPct
+	if q := r.URL.Query().Get("subset"); q != "" {
+		if n, err2 := strconv.Atoi(q); err2 == nil {
+			if n < 0 {
+				n = 0
+			}
+			if n > 100 {
+				n = 100
+			}
+			subset = n
+		}
+		// Non-numeric ?subset silently falls back to DB value.
+	}
+
+	res, status, code, msg := s.dispatchJobWithPayload(r.Context(), user, hostID, api.JobCheck,
+		api.CommandRunPayload{Args: []string{strconv.Itoa(subset)}})
+	if code != "" {
+		s.runOpError(w, r, status, code, msg)
+		return
+	}
+	s.runOpRedirect(w, r, res)
+}
+
+// handleRunRepoUnlock — POST /api/hosts/{id}/repo/unlock. No admin
+// creds required — restic unlock works with the everyday user.
+func (s *Server) handleRunRepoUnlock(w stdhttp.ResponseWriter, r *stdhttp.Request) {
+	user, ok := s.requireUser(r)
+	if !ok {
+		if wantsHTML(r) {
+			stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
+			return
+		}
+		writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
+		return
+	}
+	hostID := chi.URLParam(r, "id")
+	if hostID == "" {
+		s.runOpError(w, r, stdhttp.StatusBadRequest, "missing_id", "")
+		return
+	}
+
+	res, status, code, msg := s.dispatchJobWithPayload(r.Context(), user, hostID, api.JobUnlock,
+		api.CommandRunPayload{})
+	if code != "" {
+		s.runOpError(w, r, status, code, msg)
+		return
+	}
+	s.runOpRedirect(w, r, res)
+}
+
+// runOpRedirect: HTMX → HX-Redirect to /jobs/{id}; JSON → 202 + JSON
+// body. Mirrors handleRunSourceGroup's tail.
+func (s *Server) runOpRedirect(w stdhttp.ResponseWriter, r *stdhttp.Request, res runNowResponse) {
+	if wantsHTML(r) {
+		w.Header().Set("HX-Redirect", "/jobs/"+res.JobID)
+		w.WriteHeader(stdhttp.StatusNoContent)
+		return
+	}
+	writeJSON(w, stdhttp.StatusAccepted, res)
+}
+
+// runOpError: HTMX → plain-text status; JSON → standard envelope.
+// Mirrors runGroupError.
+func (s *Server) runOpError(w stdhttp.ResponseWriter, r *stdhttp.Request, status int, code, msg string) {
+	if wantsHTML(r) {
+		stdhttp.Error(w, msg, status)
+		return
+	}
+	writeJSONError(w, status, code, msg)
+}
@@ -0,0 +1,362 @@
+// repo_ops_test.go — integration tests for the repo run-now endpoints:
+// prune, check, unlock.
+package http
+
+import (
+	"context"
+	"encoding/json"
+	stdhttp "net/http"
+	"strconv"
+	"testing"
+	"time"
+
+	"github.com/coder/websocket"
+	"github.com/oklog/ulid/v2"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+// ----- helpers -------------------------------------------------------
+
+// seedInitJob marks a fake init job done for the host so the auto-init
+// path doesn't fire and pollute the envelope sequence we're measuring.
+func seedInitJob(t *testing.T, st *store.Store, hostID string) {
+	t.Helper()
+	if err := st.CreateJob(context.Background(), store.Job{
+		ID: ulid.Make().String(), HostID: hostID, Kind: "init",
+		ActorKind: "system", CreatedAt: time.Now().UTC(),
+	}); err != nil {
+		t.Fatalf("seed init job: %v", err)
+	}
+}
+
+// setAdminCreds writes admin credentials for a host via the store directly.
+func setAdminCreds(t *testing.T, srv *Server, st *store.Store, hostID string) {
+	t.Helper()
+	enc, err := srv.encryptRepoCreds(repoCredsBlob{
+		RepoURL:      "rest:http://admin.example/h",
+		RepoUsername: "admin",
+		RepoPassword: "prune-pass",
+	}, []byte("host:"+hostID+":admin"))
+	if err != nil {
+		t.Fatalf("encrypt admin creds: %v", err)
+	}
+	if err := st.SetHostCredentials(context.Background(), hostID, store.CredKindAdmin, enc); err != nil {
+		t.Fatalf("set admin creds: %v", err)
+	}
+}
+
+// setMaintenanceSubset sets check_subset_pct for the host via the store.
+func setMaintenanceSubset(t *testing.T, st *store.Store, hostID string, pct int) {
+	t.Helper()
+	// Ensure the row exists first.
+	if err := st.CreateDefaultRepoMaintenance(context.Background(), hostID); err != nil {
+		t.Fatalf("seed maintenance: %v", err)
+	}
+	m, err := st.GetRepoMaintenance(context.Background(), hostID)
+	if err != nil {
+		t.Fatalf("get maintenance: %v", err)
+	}
+	m.CheckSubsetPct = pct
+	if err := st.UpdateRepoMaintenance(context.Background(), m); err != nil {
+		t.Fatalf("update maintenance: %v", err)
+	}
+}
+
+// drainCommandRun reads envelopes until a command.run arrives, then
+// unmarshals and returns the payload.
+func drainCommandRun(t *testing.T, c *websocket.Conn) api.CommandRunPayload {
+	t.Helper()
+	env := drainUntil(t, c, api.MsgCommandRun)
+	var p api.CommandRunPayload
+	if err := env.UnmarshalPayload(&p); err != nil {
+		t.Fatalf("unmarshal command.run: %v", err)
+	}
+	return p
+}
+
+// ----- prune tests ---------------------------------------------------
+
+// TestRunPruneRefusesWithoutAdminCreds: POST prune with no admin creds
+// set → 400, code admin_creds_required, no job row created.
+func TestRunPruneRefusesWithoutAdminCreds(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "prune-no-admin")
+	cookie := loginAsAdmin(t, st)
+	seedInitJob(t, st, hostID)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "prune-no-admin")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/prune", nil, cookie)
+	if status != stdhttp.StatusBadRequest {
+		t.Fatalf("want 400, got %d body=%+v", status, body)
+	}
+	if code, _ := body["code"].(string); code != "admin_creds_required" {
+		t.Errorf("want code=admin_creds_required, got %+v", body)
+	}
+
+	// No prune job row should have been persisted.
+	var n int
+	if err := st.DB().QueryRow(
+		`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'prune'`, hostID).Scan(&n); err != nil {
+		t.Fatalf("count: %v", err)
+	}
+	if n != 0 {
+		t.Errorf("unexpected prune job rows: %d", n)
+	}
+}
+
+// TestRunPruneShipsConfigUpdateThenCommandRun: set admin creds, connect
+// host, POST prune. Assert envelope sequence: config.update(slot=admin)
+// → command.run(prune, RequiresAdminCreds=true). Assert job row persisted.
+func TestRunPruneShipsConfigUpdateThenCommandRun(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "prune-happy")
+	cookie := loginAsAdmin(t, st)
+	setAdminCreds(t, srv, st, hostID)
+	seedInitJob(t, st, hostID)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "prune-happy")
+	// Drain on-hello burst (repo config.update + schedule.set).
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/prune", nil, cookie)
+	if status != stdhttp.StatusAccepted {
+		t.Fatalf("want 202, got %d body=%+v", status, body)
+	}
+	jobID, _ := body["job_id"].(string)
+	if jobID == "" {
+		t.Fatalf("no job_id in response: %+v", body)
+	}
+
+	// Read the next two envelopes — must be config.update(slot=admin)
+	// followed by command.run(prune).
+	deadline := time.Now().Add(3 * time.Second)
+	var sawAdminPush bool
+	var prunePayload *api.CommandRunPayload
+	for (prunePayload == nil) && time.Now().Before(deadline) {
+		ctx, cancel := context.WithTimeout(context.Background(), 800*time.Millisecond)
+		mt, raw, err := c.Read(ctx)
+		cancel()
+		if err != nil {
+			break
+		}
+		if mt != websocket.MessageText {
+			continue
+		}
+		var env api.Envelope
+		if err := json.Unmarshal(raw, &env); err != nil {
+			continue
+		}
+		switch env.Type {
+		case api.MsgConfigUpdate:
+			var p api.ConfigUpdatePayload
+			if err := env.UnmarshalPayload(&p); err == nil && p.Slot == "admin" {
+				sawAdminPush = true
+			}
+		case api.MsgCommandRun:
+			var p api.CommandRunPayload
+			if err := env.UnmarshalPayload(&p); err == nil && p.Kind == api.JobPrune {
+				copy := p
+				prunePayload = &copy
+			}
+		}
+	}
+
+	if !sawAdminPush {
+		t.Error("expected config.update(slot=admin) before prune dispatch")
+	}
+	if prunePayload == nil {
+		t.Fatal("timed out waiting for command.run(prune)")
+	}
+	if !prunePayload.RequiresAdminCreds {
+		t.Error("prune command.run must have RequiresAdminCreds=true")
+	}
+	if prunePayload.JobID != jobID {
+		t.Errorf("job_id mismatch: dispatch=%s run=%s", jobID, prunePayload.JobID)
+	}
+
+	// Job row must be persisted.
+	var n int
+	if err := st.DB().QueryRow(
+		`SELECT COUNT(*) FROM jobs WHERE id = ? AND host_id = ? AND kind = 'prune'`,
+		jobID, hostID).Scan(&n); err != nil {
+		t.Fatalf("count: %v", err)
+	}
+	if n != 1 {
+		t.Errorf("prune job row count: want 1, got %d", n)
+	}
+}
+
+// ----- check tests ---------------------------------------------------
+
+// TestRunCheckUsesMaintenanceSubset: check_subset_pct=25 → Args==["25"].
+func TestRunCheckUsesMaintenanceSubset(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "check-subset")
+	cookie := loginAsAdmin(t, st)
+	setMaintenanceSubset(t, st, hostID, 25)
+	seedInitJob(t, st, hostID)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "check-subset")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/check", nil, cookie)
+	if status != stdhttp.StatusAccepted {
+		t.Fatalf("want 202, got %d body=%+v", status, body)
+	}
+
+	p := drainCommandRun(t, c)
+	if p.Kind != api.JobCheck {
+		t.Fatalf("kind: want check, got %s", p.Kind)
+	}
+	if len(p.Args) != 1 || p.Args[0] != "25" {
+		t.Errorf("args: want [25], got %v", p.Args)
+	}
+}
+
+// TestRunCheckHonorsSubsetOverride: ?subset=10 overrides DB value of 25.
+func TestRunCheckHonorsSubsetOverride(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "check-override")
+	cookie := loginAsAdmin(t, st)
+	setMaintenanceSubset(t, st, hostID, 25)
+	seedInitJob(t, st, hostID)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "check-override")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/check?subset=10", nil, cookie)
+	if status != stdhttp.StatusAccepted {
+		t.Fatalf("want 202, got %d body=%+v", status, body)
+	}
+
+	p := drainCommandRun(t, c)
+	if len(p.Args) != 1 || p.Args[0] != "10" {
+		t.Errorf("args: want [10], got %v", p.Args)
+	}
+}
+
+// TestRunCheckRejectsBadSubsetGracefully: ?subset=abc falls back to DB
+// value (not an error). strconv.Atoi failure silently ignored.
+func TestRunCheckRejectsBadSubsetGracefully(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "check-badsubset")
+	cookie := loginAsAdmin(t, st)
+	setMaintenanceSubset(t, st, hostID, 30)
+	seedInitJob(t, st, hostID)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "check-badsubset")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/check?subset=abc", nil, cookie)
+	if status != stdhttp.StatusAccepted {
+		t.Fatalf("want 202 (bad subset falls back), got %d body=%+v", status, body)
+	}
+
+	p := drainCommandRun(t, c)
+	if len(p.Args) != 1 || p.Args[0] != strconv.Itoa(30) {
+		t.Errorf("args: want [30], got %v", p.Args)
+	}
+}
+
+// ----- unlock tests --------------------------------------------------
+
+// TestRunUnlockNeedsNoAdminCreds: no admin creds, POST unlock → 202.
+func TestRunUnlockNeedsNoAdminCreds(t *testing.T) {
+	t.Parallel()
+	srv, ts, st := rawTestServer(t)
+	hostID, token := enrolHostForWS(t, srv, st, "unlock-no-admin")
+	cookie := loginAsAdmin(t, st)
+	seedInitJob(t, st, hostID)
+
+	c := agentDial(t, srv, ts, hostID, token)
+	sendHello(t, c, "unlock-no-admin")
+	_ = drainUntil(t, c, api.MsgScheduleSet)
+
+	status, body := doJSON(t, ts.URL, "POST", "/api/hosts/"+hostID+"/repo/unlock", nil, cookie)
+	if status != stdhttp.StatusAccepted {
+		t.Fatalf("want 202, got %d body=%+v", status, body)
+	}
+
+	p := drainCommandRun(t, c)
+	if p.Kind != api.JobUnlock {
+		t.Fatalf("kind: want unlock, got %s", p.Kind)
+	}
+	// RequiresAdminCreds must be false for unlock.
+	if p.RequiresAdminCreds {
+		t.Error("unlock must not set RequiresAdminCreds")
+	}
+}
+
+// ----- auth tests ----------------------------------------------------
+
+// TestRunOpsRequireAuth: unauthenticated POST to each endpoint → 401.
+func TestRunOpsRequireAuth(t *testing.T) {
+	t.Parallel()
+	_, url, st := newTestServerWithHub(t)
+	hostID := makeHost(t, st, "auth-host")
+
+	for _, path := range []string{
+		"/api/hosts/" + hostID + "/repo/prune",
+		"/api/hosts/" + hostID + "/repo/check",
+		"/api/hosts/" + hostID + "/repo/unlock",
+	} {
+		path := path
+		t.Run(path, func(t *testing.T) {
+			t.Parallel()
+			req, _ := stdhttp.NewRequest("POST", url+path, nil)
+			res, err := stdhttp.DefaultClient.Do(req)
+			if err != nil {
+				t.Fatalf("do: %v", err)
+			}
+			defer res.Body.Close()
+			if res.StatusCode != stdhttp.StatusUnauthorized {
+				t.Errorf("want 401, got %d", res.StatusCode)
+			}
+		})
+	}
+
+	// HTMX path: unauthenticated POST with HX-Request: true → 303 to /login.
+	// Auth check fires before host lookup so the host ID doesn't need to exist.
+	for _, path := range []string{
+		"/hosts/" + hostID + "/repo/prune",
+		"/hosts/" + hostID + "/repo/check",
+		"/hosts/" + hostID + "/repo/unlock",
+	} {
+		path := path
+		t.Run("htmx"+path, func(t *testing.T) {
+			t.Parallel()
+			client := &stdhttp.Client{
+				CheckRedirect: func(_ *stdhttp.Request, _ []*stdhttp.Request) error {
+					return stdhttp.ErrUseLastResponse
+				},
+			}
+			req, _ := stdhttp.NewRequest("POST", url+path, nil)
+			req.Header.Set("HX-Request", "true")
+			res, err := client.Do(req)
+			if err != nil {
+				t.Fatalf("do: %v", err)
+			}
+			defer res.Body.Close()
+			if res.StatusCode != stdhttp.StatusSeeOther {
+				t.Errorf("want 303, got %d", res.StatusCode)
+			}
+			if loc := res.Header.Get("Location"); loc != "/login" {
+				t.Errorf("want Location=/login, got %q", loc)
+			}
+		})
+	}
+}
@@ -164,15 +164,19 @@ func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *
 	}
 }

-// dispatchBackupForGroup builds and sends a single backup command.run
-// envelope on conn for the given group. Persists the job row first so
-// the live log viewer can subscribe to it.
-// dispatchBackupForGroup persists a backup job row, sends the
-// command.run envelope to the agent, and audit-logs the dispatch.
-// Returns the persisted job ID on success, or "" on any failure
-// (failures are slog.Warn-ed). Callers may use the returned ID to,
-// e.g., redirect the UI to the live job log.
-func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, hostID, scheduleID string, g *store.SourceGroup, scheduledAt time.Time) string {
+// dispatchBackupForGroupCore persists a backup job row, marshals and
+// sends the command.run envelope, and audit-logs the dispatch. It does
+// NOT enqueue a PendingRun on failure — that responsibility belongs to
+// the caller when appropriate.
+//
+// Returns (jobID, nil) on success. Returns ("", err) on any failure;
+// the error is also slog.Warn-ed inside this function so callers don't
+// need to log it again.
+//
+// Used by both dispatchBackupForGroup (schedule.fire path, which adds
+// enqueue-on-failure) and drainOne (which handles failure via
+// BumpPendingRunAttempt on the existing row, avoiding double-enqueue).
+func (s *Server) dispatchBackupForGroupCore(ctx context.Context, conn *ws.Conn, hostID, scheduleID string, g *store.SourceGroup, scheduledAt time.Time) (string, error) {
 	jobID := ulid.Make().String()
 	now := time.Now().UTC()
 	scheduleRef := scheduleID
@@ -186,7 +190,7 @@ func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, host
 	}); err != nil {
 		slog.Warn("schedule.fire: persist job", "host_id", hostID,
 			"schedule_id", scheduleID, "group", g.Name, "err", err)
-		return ""
+		return "", err
 	}
 	// Backup ignores RetentionPolicy — the forget cadence lives on
 	// host_repo_maintenance and is driven by the server-side ticker
@@ -201,14 +205,17 @@ func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, host
 	if err != nil {
 		slog.Warn("schedule.fire: marshal command.run",
 			"host_id", hostID, "schedule_id", scheduleID, "err", err)
-		return ""
+		return "", err
 	}
 	sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
 	defer cancel()
 	if err := conn.Send(sendCtx, env); err != nil {
-		slog.Warn("schedule.fire: send command.run",
-			"host_id", hostID, "schedule_id", scheduleID, "err", err)
-		return ""
+		slog.Warn("schedule.fire: send command.run failed",
+			"host_id", hostID, "schedule_id", scheduleID, "group", g.Name, "err", err)
+		// The job row was already persisted — leave it in `queued` status.
+		// The drainer will re-dispatch (creating a new job row) and the
+		// orphaned queued row stays for forensic visibility.
+		return "", err
 	}
 	_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
 		ID:         ulid.Make().String(),
@@ -221,5 +228,37 @@ func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, host
 	slog.Info("schedule.fire: dispatched backup",
 		"host_id", hostID, "schedule_id", scheduleID,
 		"group", g.Name, "job_id", jobID, "scheduled_at", scheduledAt)
-	return jobID
+	return jobID, nil
+}
+
+// dispatchBackupForGroup is the schedule.fire entry point. Wraps
+// dispatchBackupForGroupCore with enqueue-on-failure: a failed Send
+// queues a fresh PendingRun for the drainer to retry later.
+//
+// Returns the persisted job ID on success, or "" on any failure.
+func (s *Server) dispatchBackupForGroup(ctx context.Context, conn *ws.Conn, hostID, scheduleID string, g *store.SourceGroup, scheduledAt time.Time) string {
+	jobID, err := s.dispatchBackupForGroupCore(ctx, conn, hostID, scheduleID, g, scheduledAt)
+	if err == nil {
+		return jobID
+	}
+	// Send (or an earlier step) failed — err was already logged inside
+	// the core. Enqueue a fresh PendingRun for the drainer to retry.
+	backoff := time.Duration(g.RetryBackoffSeconds) * time.Second
+	if backoff <= 0 {
+		backoff = 60 * time.Second
+	}
+	if enqueueErr := s.deps.Store.EnqueuePendingRun(ctx, &store.PendingRun{
+		ID:            ulid.Make().String(),
+		ScheduleID:    scheduleID,
+		SourceGroupID: g.ID,
+		HostID:        hostID,
+		Attempt:       1,
+		NextAttemptAt: time.Now().UTC().Add(backoff),
+		ScheduledAt:   scheduledAt,
+		LastError:     err.Error(),
+	}); enqueueErr != nil {
+		slog.Warn("schedule.fire: enqueue pending run failed",
+			"host_id", hostID, "schedule_id", scheduleID, "group", g.Name, "err", enqueueErr)
+	}
+	return ""
 }
@@ -7,6 +7,7 @@ import (
 	"context"
 	"errors"
 	stdhttp "net/http"
+	"sync"
 	"time"

 	"github.com/go-chi/chi/v5"
@@ -41,6 +42,13 @@ type Deps struct {
 type Server struct {
 	srv  *stdhttp.Server
 	deps Deps
+
+	// drainLocks serializes DrainPending per host. The on-hello
+	// goroutine and the 30s ticker can otherwise race for the same
+	// host, double-dispatching every pending row. Map of hostID →
+	// sync.Mutex; checked-and-locked atomically via drainLocksMu.
+	drainLocksMu sync.Mutex
+	drainLocks   map[string]*sync.Mutex
 }

 // New builds a configured but not-yet-started server.
@@ -59,7 +67,7 @@ func New(deps Deps) *Server {
 		w.WriteHeader(stdhttp.StatusNoContent)
 	})

-	s := &Server{deps: deps}
+	s := &Server{deps: deps, drainLocks: make(map[string]*sync.Mutex)}
 	s.routes(r)

 	s.srv = &stdhttp.Server{
@@ -105,6 +113,13 @@ func (s *Server) routes(r chi.Router) {
 		r.Get("/hosts/{id}/repo-credentials", s.handleGetHostCredentials)
 		r.Put("/hosts/{id}/repo-credentials", s.handleSetHostCredentials)

+		// Admin credentials — the prune-capable slot (separate from the
+		// everyday repo creds). Optional: hosts that don't prune against
+		// a rest-server repo with a separate admin user never need this.
+		r.Get("/hosts/{id}/admin-credentials", s.handleGetAdminCredentials)
+		r.Put("/hosts/{id}/admin-credentials", s.handleSetAdminCredentials)
+		r.Delete("/hosts/{id}/admin-credentials", s.handleDeleteAdminCredentials)
+
 		// Per-host schedule CRUD. Mutations bump host_schedule_version
 		// and async-push to a connected agent (see schedule_push.go).
 		r.Get("/hosts/{id}/schedules", s.handleListSchedules)
@@ -134,12 +149,23 @@ func (s *Server) routes(r chi.Router) {
 		// mounted at the equivalent path outside /api below — both
 		// resolve to the same handler, which sniffs HX-Request.
 		r.Post("/hosts/{id}/source-groups/{gid}/run", s.handleRunSourceGroup)
+
+		// Repo-level run-now: prune (needs admin creds), check, unlock.
+		// HTMX forms are also mounted outside /api below.
+		r.Post("/hosts/{id}/repo/prune", s.handleRunRepoPrune)
+		r.Post("/hosts/{id}/repo/check", s.handleRunRepoCheck)
+		r.Post("/hosts/{id}/repo/unlock", s.handleRunRepoUnlock)
 	})

 	// Per-source-group Run-now (HTMX form action). Available even
 	// when the server is started without UI templates so REST callers
 	// against the non-/api path also work.
 	r.Post("/hosts/{id}/source-groups/{gid}/run", s.handleRunSourceGroup)
+	// Repo-level run-now (HTMX form actions). Same handlers as the /api
+	// variants — wantsHTML sniff distinguishes JSON vs HTMX response.
+	r.Post("/hosts/{id}/repo/prune", s.handleRunRepoPrune)
+	r.Post("/hosts/{id}/repo/check", s.handleRunRepoCheck)
+	r.Post("/hosts/{id}/repo/unlock", s.handleRunRepoUnlock)
 	// Retired routes — see ui_handlers.go for the messages. Mounted
 	// outside the UI gate so cached browser tabs get a clear 410
 	// even if the server runs without templates.
@@ -202,6 +228,9 @@ func (s *Server) routes(r chi.Router) {
 		r.Post("/hosts/{id}/repo/credentials", s.handleUIRepoCredentialsSave)
 		r.Post("/hosts/{id}/repo/bandwidth", s.handleUIRepoBandwidthSave)
 		r.Post("/hosts/{id}/repo/maintenance", s.handleUIRepoMaintenanceSave)
+		// Admin credentials form (separate slot for prune-capable user).
+		r.Post("/hosts/{id}/admin-credentials", s.handleUIAdminCredentialsSave)
+		r.Post("/hosts/{id}/admin-credentials/delete", s.handleUIAdminCredentialsDelete)
 		// Schedules tab + create/edit/delete forms.
 		r.Get("/hosts/{id}/schedules", s.handleUISchedulesList)
 		r.Get("/hosts/{id}/schedules/new", s.handleUIScheduleNewGet)
@@ -7,6 +7,9 @@ import (
 	stdhttp "net/http"
 	"strconv"
 	"strings"
+	"time"
+
+	"github.com/oklog/ulid/v2"

 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
@@ -17,10 +20,31 @@ import (
 // the page into three independent forms so saving one section
 // doesn't disturb the others.
 //
-//   GET  /hosts/{id}/repo                       — render
-//   POST /hosts/{id}/repo/credentials           — connection
-//   POST /hosts/{id}/repo/bandwidth             — host-wide bw caps
-//   POST /hosts/{id}/repo/maintenance           — forget/prune/check cadences
+//   GET  /hosts/{id}/repo                            — render
+//   POST /hosts/{id}/repo/credentials                — connection
+//   POST /hosts/{id}/repo/bandwidth                  — host-wide bw caps
+//   POST /hosts/{id}/repo/maintenance                — forget/prune/check cadences
+//   POST /hosts/{id}/admin-credentials               — admin (prune) creds
+//   POST /hosts/{id}/admin-credentials/delete        — clear admin creds
+
+// repoStatsView is a flat, pre-dereferenced projection of
+// store.HostRepoStats for use in templates. Nil pointer fields are
+// collapsed to zero/false and accompanied by a Has* sentinel so the
+// template can distinguish "zero" from "not yet known."
+type repoStatsView struct {
+	HasTotalSize    bool
+	TotalSizeBytes  int64
+	HasRawSize      bool
+	RawSizeBytes    int64
+	HasLastCheck    bool
+	LastCheckAt     time.Time
+	LastCheckAgo    string
+	LastCheckStatus string
+	LockPresent     bool
+	HasLastPrune    bool
+	LastPruneAt     time.Time
+	LastPruneAgo    string
+}

 type hostRepoPage struct {
 	hostChromeData
@@ -30,6 +54,11 @@ type hostRepoPage struct {
 	RepoUsername string
 	HasPassword  bool

+	// Admin credentials (optional, prune-only — separate slot).
+	AdminURL         string
+	AdminUsername    string
+	HasAdminPassword bool
+
 	// Bandwidth (form values, blank means "no cap")
 	BandwidthUp   string
 	BandwidthDown string
@@ -37,6 +66,14 @@ type hostRepoPage struct {
 	// Maintenance row
 	Maintenance store.HostRepoMaintenance

+	// Online mirrors Hub.Connected so Run-now button disabled state is
+	// accurate at render time.
+	Online bool
+
+	// StatsView is a pre-dereferenced projection of host_repo_stats.
+	// Nil when no row exists yet (fresh hosts).
+	StatsView *repoStatsView
+
 	// Snapshots-by-tag — map[group_name]count, plus an "untagged" row.
 	SnapshotsByTag    map[string]int
 	UntaggedSnapshots int
@@ -44,6 +81,7 @@ type hostRepoPage struct {

 	// Inline form-error banners. Empty when no error for that section.
 	CredentialsError string
+	AdminCredsError  string
 	BandwidthError   string
 	MaintenanceError string

@@ -61,7 +99,7 @@ func (s *Server) loadHostRepoPage(r *stdhttp.Request, host store.Host) (*hostRep
 	}

 	// Credentials (redacted).
-	enc, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID)
+	enc, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindRepo)
 	switch {
 	case err == nil:
 		plain, derr := s.deps.AEAD.Decrypt(enc, []byte("host:"+host.ID))
@@ -79,6 +117,60 @@ func (s *Server) loadHostRepoPage(r *stdhttp.Request, host store.Host) (*hostRep
 		return nil, err
 	}

+	// Admin credentials (optional — prune-only slot).
+	adminEnc, aerr := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindAdmin)
+	switch {
+	case aerr == nil:
+		plain, derr := s.deps.AEAD.Decrypt(adminEnc, []byte("host:"+host.ID+":admin"))
+		if derr == nil {
+			var blob repoCredsBlob
+			if jerr := json.Unmarshal(plain, &blob); jerr == nil {
+				p.AdminURL = blob.RepoURL
+				p.AdminUsername = blob.RepoUsername
+				p.HasAdminPassword = blob.RepoPassword != ""
+			}
+		}
+	case errors.Is(aerr, store.ErrNotFound):
+		// admin slot not configured — fine
+	default:
+		return nil, aerr
+	}
+
+	// Online status.
+	if s.deps.Hub != nil {
+		p.Online = s.deps.Hub.Connected(host.ID)
+	}
+
+	// Repo stats (tolerate ErrNotFound — fresh hosts have no row yet).
+	if stats, serr := s.deps.Store.GetHostRepoStats(r.Context(), host.ID); serr == nil {
+		sv := &repoStatsView{}
+		if stats.TotalSizeBytes != nil {
+			sv.HasTotalSize = true
+			sv.TotalSizeBytes = *stats.TotalSizeBytes
+		}
+		if stats.RawSizeBytes != nil {
+			sv.HasRawSize = true
+			sv.RawSizeBytes = *stats.RawSizeBytes
+		}
+		if stats.LastCheckAt != nil {
+			sv.HasLastCheck = true
+			sv.LastCheckAt = *stats.LastCheckAt
+			sv.LastCheckAgo = relTimeAgo(*stats.LastCheckAt)
+		}
+		sv.LastCheckStatus = stats.LastCheckStatus
+		if stats.LockPresent != nil {
+			sv.LockPresent = *stats.LockPresent
+		}
+		if stats.LastPruneAt != nil {
+			sv.HasLastPrune = true
+			sv.LastPruneAt = *stats.LastPruneAt
+			sv.LastPruneAgo = relTimeAgo(*stats.LastPruneAt)
+		}
+		p.StatsView = sv
+	} else if !errors.Is(serr, store.ErrNotFound) {
+		return nil, serr
+	}
+
 	// Bandwidth.
 	if host.BandwidthUpKBps != nil {
 		p.BandwidthUp = strconv.Itoa(*host.BandwidthUpKBps)
@@ -152,11 +244,11 @@ func (s *Server) handleUIHostRepo(w stdhttp.ResponseWriter, r *stdhttp.Request)
 	}
 }

-// renderRepoFormError loads the page state, overlays the section's
-// error banner, and renders with a 422. Save-success goes through a
-// 303 redirect with `?saved=<section>` instead, so this path is for
-// validation failures only.
-func (s *Server) renderRepoPage(w stdhttp.ResponseWriter, r *stdhttp.Request, u *ui.User, host *store.Host, credErr, bwErr, mntErr string) {
+// renderRepoPage loads the page state, overlays section error banners,
+// and renders with a 422. Save-success goes through a 303 redirect
+// with `?saved=<section>` instead, so this path is for validation
+// failures only.
+func (s *Server) renderRepoPage(w stdhttp.ResponseWriter, r *stdhttp.Request, u *ui.User, host *store.Host, credErr, adminErr, bwErr, mntErr string) {
 	page, err := s.loadHostRepoPage(r, *host)
 	if err != nil {
 		slog.Error("ui repo: reload after save", "host_id", host.ID, "err", err)
@@ -164,6 +256,7 @@ func (s *Server) renderRepoPage(w stdhttp.ResponseWriter, r *stdhttp.Request, u
 		return
 	}
 	page.CredentialsError = credErr
+	page.AdminCredsError = adminErr
 	page.BandwidthError = bwErr
 	page.MaintenanceError = mntErr
 	view := s.baseView(u)
@@ -198,13 +291,13 @@ func (s *Server) handleUIRepoCredentialsSave(w stdhttp.ResponseWriter, r *stdhtt
 	repoPass := r.PostForm.Get("repo_password") // do NOT trim — operators may use trailing space deliberately

 	if repoURL == "" {
-		s.renderRepoPage(w, r, u, host, "Repo URL is required.", "", "")
+		s.renderRepoPage(w, r, u, host, "Repo URL is required.", "", "", "")
 		return
 	}

 	// Merge with existing blob — same semantics as the JSON PUT.
 	existing := repoCredsBlob{}
-	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID); err == nil {
+	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindRepo); err == nil {
 		if plain, derr := s.deps.AEAD.Decrypt(cur, []byte("host:"+host.ID)); derr == nil {
 			_ = json.Unmarshal(plain, &existing)
 		}
@@ -217,7 +310,7 @@ func (s *Server) handleUIRepoCredentialsSave(w stdhttp.ResponseWriter, r *stdhtt
 	if existing.RepoPassword == "" {
 		s.renderRepoPage(w, r, u, host,
 			"No password on file yet — set one before saving the URL/username.",
-			"", "")
+			"", "", "")
 		return
 	}

@@ -227,7 +320,7 @@ func (s *Server) handleUIRepoCredentialsSave(w stdhttp.ResponseWriter, r *stdhtt
 		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
 		return
 	}
-	if err := s.deps.Store.SetHostCredentials(r.Context(), host.ID, enc); err != nil {
+	if err := s.deps.Store.SetHostCredentials(r.Context(), host.ID, store.CredKindRepo, enc); err != nil {
 		slog.Error("ui repo creds: persist", "err", err)
 		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
 		return
@@ -256,7 +349,7 @@ func (s *Server) handleUIRepoBandwidthSave(w stdhttp.ResponseWriter, r *stdhttp.
 	up, upErr := parseOptionalNonNegInt(r.PostForm.Get("bandwidth_up"))
 	down, downErr := parseOptionalNonNegInt(r.PostForm.Get("bandwidth_down"))
 	if upErr != nil || downErr != nil {
-		s.renderRepoPage(w, r, u, host, "",
+		s.renderRepoPage(w, r, u, host, "", "",
 			"Bandwidth caps must be non-negative whole numbers (or blank for no cap).",
 			"")
 		return
@@ -294,19 +387,19 @@ func (s *Server) handleUIRepoMaintenanceSave(w stdhttp.ResponseWriter, r *stdhtt
 		"forget": forgetCron, "prune": pruneCron, "check": checkCron,
 	} {
 		if expr == "" {
-			s.renderRepoPage(w, r, u, host, "", "",
+			s.renderRepoPage(w, r, u, host, "", "", "",
 				label+" cadence is required.")
 			return
 		}
 		if _, err := cronParser.Parse(expr); err != nil {
-			s.renderRepoPage(w, r, u, host, "", "",
+			s.renderRepoPage(w, r, u, host, "", "", "",
 				label+" cadence didn't parse: "+err.Error())
 			return
 		}
 	}
 	subset, err := strconv.Atoi(subsetStr)
 	if err != nil || subset < 0 || subset > 100 {
-		s.renderRepoPage(w, r, u, host, "", "",
+		s.renderRepoPage(w, r, u, host, "", "", "",
 			"check subset % must be between 0 and 100.")
 		return
 	}
@@ -348,3 +441,143 @@ func parseOptionalNonNegInt(s string) (*int, error) {
 	}
 	return &n, nil
 }
+
+// relTimeAgo returns a short human-readable relative-time string like
+// "5m ago", "3h ago", "2d ago" for use in stats panels. Does not use
+// the template funcMap so it can be called from Go directly.
+func relTimeAgo(t time.Time) string {
+	d := time.Since(t)
+	if d < 0 {
+		d = 0
+	}
+	switch {
+	case d < time.Minute:
+		return "just now"
+	case d < time.Hour:
+		return strconv.Itoa(int(d.Minutes())) + "m ago"
+	case d < 24*time.Hour:
+		return strconv.Itoa(int(d.Hours())) + "h ago"
+	case d < 30*24*time.Hour:
+		return strconv.Itoa(int(d.Hours()/24)) + "d ago"
+	default:
+		return t.Format("2006-01-02")
+	}
+}
+
+// handleUIAdminCredentialsSave handles the HTML form POST to
+// /hosts/{id}/admin-credentials. Mirrors handleUIRepoCredentialsSave
+// but operates on the admin slot (store.CredKindAdmin, AAD "host:<id>:admin").
+// Re-renders the page with an inline error on validation failure;
+// redirects with ?saved=admin_credentials on success.
+func (s *Server) handleUIAdminCredentialsSave(w stdhttp.ResponseWriter, r *stdhttp.Request) {
+	u := s.requireUIUser(w, r)
+	if u == nil {
+		return
+	}
+	host, ok := s.loadHostForUI(w, r)
+	if !ok {
+		return
+	}
+	if err := r.ParseForm(); err != nil {
+		stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest)
+		return
+	}
+	repoURL := strings.TrimSpace(r.PostForm.Get("repo_url"))
+	repoUser := strings.TrimSpace(r.PostForm.Get("repo_username"))
+	repoPass := r.PostForm.Get("repo_password")
+
+	// All blank → no-op save (operator hit Save without filling anything).
+	// We treat this as harmless — they may have wanted to clear via the
+	// Clear button instead. Only validate if they've started filling fields.
+	if repoURL == "" && repoUser == "" && repoPass == "" {
+		stdhttp.Redirect(w, r, "/hosts/"+host.ID+"/repo", stdhttp.StatusSeeOther)
+		return
+	}
+
+	aad := []byte("host:" + host.ID + ":admin")
+
+	// Merge with the existing admin row, if any.
+	existing := repoCredsBlob{}
+	if cur, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindAdmin); err == nil {
+		if plain, derr := s.deps.AEAD.Decrypt(cur, aad); derr == nil {
+			_ = json.Unmarshal(plain, &existing)
+		}
+	}
+	existing.RepoURL = repoURL
+	existing.RepoUsername = repoUser
+	if repoPass != "" {
+		existing.RepoPassword = repoPass
+	}
+
+	if existing.RepoURL == "" {
+		s.renderRepoPage(w, r, u, host, "", "Repo URL is required.", "", "")
+		return
+	}
+	if existing.RepoPassword == "" {
+		s.renderRepoPage(w, r, u, host, "",
+			"No password on file yet — set one before saving the URL/username.",
+			"", "")
+		return
+	}
+
+	enc, err := s.encryptRepoCreds(existing, aad)
+	if err != nil {
+		slog.Error("ui admin creds: encrypt", "err", err)
+		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
+		return
+	}
+	if err := s.deps.Store.SetHostCredentials(r.Context(), host.ID, store.CredKindAdmin, enc); err != nil {
+		slog.Error("ui admin creds: persist", "err", err)
+		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
+		return
+	}
+	_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
+		ID:         ulid.Make().String(),
+		UserID:     &u.ID,
+		Actor:      "user",
+		Action:     "host.admin_credentials_set",
+		TargetKind: ptr("host"),
+		TargetID:   &host.ID,
+		TS:         nowUTC(),
+	})
+	if s.deps.Hub != nil && s.deps.Hub.Connected(host.ID) {
+		if perr := s.pushAdminCredsToAgent(r.Context(), host.ID); perr != nil {
+			slog.Warn("ui admin creds: push to agent", "host_id", host.ID, "err", perr)
+		}
+	}
+	stdhttp.Redirect(w, r, "/hosts/"+host.ID+"/repo?saved=admin_credentials", stdhttp.StatusSeeOther)
+}
+
+// handleUIAdminCredentialsDelete handles the HTML form POST to
+// /hosts/{id}/admin-credentials/delete. Removes the admin slot and
+// redirects back to the repo page. Treats "not found" as success
+// (idempotent delete from the operator's point of view).
+func (s *Server) handleUIAdminCredentialsDelete(w stdhttp.ResponseWriter, r *stdhttp.Request) {
+	u := s.requireUIUser(w, r)
+	if u == nil {
+		return
+	}
+	host, ok := s.loadHostForUI(w, r)
+	if !ok {
+		return
+	}
+
+	err := s.deps.Store.DeleteHostCredentials(r.Context(), host.ID, store.CredKindAdmin)
+	if err != nil && !errors.Is(err, store.ErrNotFound) {
+		slog.Error("ui admin creds: delete", "host_id", host.ID, "err", err)
+		stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
+		return
+	}
+	if err == nil {
+		_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
+			ID:         ulid.Make().String(),
+			UserID:     &u.ID,
+			Actor:      "user",
+			Action:     "host.admin_credentials_deleted",
+			TargetKind: ptr("host"),
+			TargetID:   &host.ID,
+			TS:         nowUTC(),
+		})
+	}
+	stdhttp.Redirect(w, r, "/hosts/"+host.ID+"/repo?saved=admin_credentials", stdhttp.StatusSeeOther)
+}
@@ -0,0 +1,400 @@
+// ui_repo_test.go — integration tests for the Repo page HTML UI.
+// Covers: admin-creds form rendering, stats panel, lock banner,
+// run-now button disabled states, admin-creds form save/delete.
+package http
+
+import (
+	"context"
+	"io"
+	stdhttp "net/http"
+	"net/http/httptest"
+	"net/url"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+// newTestServerWithUI creates a server that includes the UI renderer so
+// HTML page tests can render and inspect the full template output.
+func newTestServerWithUI(t *testing.T) (*Server, string, *store.Store) {
+	t.Helper()
+	dir := t.TempDir()
+	st, err := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
+	if err != nil {
+		t.Fatalf("store: %v", err)
+	}
+	t.Cleanup(func() { _ = st.Close() })
+
+	keyPath := filepath.Join(dir, "secret.key")
+	_ = crypto.GenerateKeyFile(keyPath)
+	key, _ := crypto.LoadKeyFromFile(keyPath)
+	aead, _ := crypto.NewAEAD(key)
+
+	renderer, err := ui.New()
+	if err != nil {
+		t.Fatalf("ui.New: %v", err)
+	}
+
+	deps := Deps{
+		Cfg:   config.Config{Listen: ":0", DataDir: dir, SecretKeyFile: keyPath},
+		Store: st,
+		AEAD:  aead,
+		Hub:   ws.NewHub(),
+		UI:    renderer,
+	}
+	s := New(deps)
+	ts := httptest.NewServer(s.srv.Handler)
+	t.Cleanup(ts.Close)
+	return s, ts.URL, st
+}
+
+// getRepoPage fetches /hosts/{id}/repo and returns the body string.
+func getRepoPage(t *testing.T, baseURL, hostID string, cookie *stdhttp.Cookie) string {
+	t.Helper()
+	client := &stdhttp.Client{
+		CheckRedirect: func(_ *stdhttp.Request, _ []*stdhttp.Request) error {
+			return stdhttp.ErrUseLastResponse
+		},
+	}
+	req, err := stdhttp.NewRequest("GET", baseURL+"/hosts/"+hostID+"/repo", nil)
+	if err != nil {
+		t.Fatalf("new request: %v", err)
+	}
+	req.AddCookie(cookie)
+	res, err := client.Do(req)
+	if err != nil {
+		t.Fatalf("GET /hosts/%s/repo: %v", hostID, err)
+	}
+	defer res.Body.Close()
+	if res.StatusCode != stdhttp.StatusOK {
+		t.Fatalf("GET /hosts/%s/repo: want 200, got %d", hostID, res.StatusCode)
+	}
+	raw, _ := io.ReadAll(res.Body)
+	return string(raw)
+}
+
+// postForm posts URL-encoded form data to path, following no redirects,
+// and returns the status code and Location header.
+func postForm(t *testing.T, baseURL, path string, data url.Values, cookie *stdhttp.Cookie) (int, string) {
+	t.Helper()
+	client := &stdhttp.Client{
+		CheckRedirect: func(_ *stdhttp.Request, _ []*stdhttp.Request) error {
+			return stdhttp.ErrUseLastResponse
+		},
+	}
+	req, err := stdhttp.NewRequest("POST", baseURL+path, strings.NewReader(data.Encode()))
+	if err != nil {
+		t.Fatalf("new request: %v", err)
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	if cookie != nil {
+		req.AddCookie(cookie)
+	}
+	res, err := client.Do(req)
+	if err != nil {
+		t.Fatalf("POST %s: %v", path, err)
+	}
+	defer res.Body.Close()
+	return res.StatusCode, res.Header.Get("Location")
+}
+
+// ----- rendering tests ------------------------------------------------
+
+// TestUIRepoPageRendersAdminCredsForm — visit /hosts/{id}/repo for a
+// host with no admin creds. Assert the page contains the admin-creds
+// section heading and the "not yet set" placeholder text.
+func TestUIRepoPageRendersAdminCredsForm(t *testing.T) {
+	t.Parallel()
+	_, baseURL, st := newTestServerWithUI(t)
+	cookie := loginAsAdmin(t, st)
+	hostID := makeHost(t, st, "repo-page-admin-form")
+
+	body := getRepoPage(t, baseURL, hostID, cookie)
+
+	if !strings.Contains(body, "Admin credentials") {
+		t.Error("page missing 'Admin credentials' heading")
+	}
+	if !strings.Contains(body, "— not yet set —") {
+		t.Error("page missing '— not yet set —' placeholder for admin password")
+	}
+}
+
+// TestUIRepoPageRendersStatsPanel — seed a host_repo_stats row, render
+// the page, assert "Repo health" panel and the seeded values appear.
+func TestUIRepoPageRendersStatsPanel(t *testing.T) {
+	t.Parallel()
+	_, baseURL, st := newTestServerWithUI(t)
+	cookie := loginAsAdmin(t, st)
+	hostID := makeHost(t, st, "repo-page-stats")
+
+	totalSize := int64(5_000_000_000) // 5 GB
+	checkStatus := "ok"
+	checkAt := time.Now().Add(-2 * time.Hour).UTC()
+	if err := st.UpsertHostRepoStats(context.Background(), hostID, store.HostRepoStats{
+		TotalSizeBytes:  &totalSize,
+		LastCheckAt:     &checkAt,
+		LastCheckStatus: checkStatus,
+	}); err != nil {
+		t.Fatalf("upsert stats: %v", err)
+	}
+
+	body := getRepoPage(t, baseURL, hostID, cookie)
+
+	if !strings.Contains(body, "Repo health") {
+		t.Error("page missing 'Repo health' heading")
+	}
+	// The bytes helper renders 5 GB as "5.0 GB" (with a <span> unit suffix)
+	if !strings.Contains(body, "5.0") {
+		t.Error("page missing '5.0' (total size formatted bytes)")
+	}
+	if !strings.Contains(body, "ok") {
+		t.Error("page missing 'ok' check status")
+	}
+}
+
+// TestUIRepoPageRendersLockBanner — seed stats with LockPresent=true,
+// render, assert stale lock warning appears.
+func TestUIRepoPageRendersLockBanner(t *testing.T) {
+	t.Parallel()
+	_, baseURL, st := newTestServerWithUI(t)
+	cookie := loginAsAdmin(t, st)
+	hostID := makeHost(t, st, "repo-page-lock")
+
+	lockPresent := true
+	if err := st.UpsertHostRepoStats(context.Background(), hostID, store.HostRepoStats{
+		LockPresent: &lockPresent,
+	}); err != nil {
+		t.Fatalf("upsert stats: %v", err)
+	}
+
+	body := getRepoPage(t, baseURL, hostID, cookie)
+
+	if !strings.Contains(body, "Stale lock detected") {
+		t.Error("page missing stale lock warning")
+	}
+}
+
+// TestUIRepoRunNowButtonsDisabledWhenOffline — host not in the Hub
+// (not connected), render, assert all three buttons carry disabled.
+func TestUIRepoRunNowButtonsDisabledWhenOffline(t *testing.T) {
+	t.Parallel()
+	_, baseURL, st := newTestServerWithUI(t)
+	cookie := loginAsAdmin(t, st)
+	hostID := makeHost(t, st, "repo-page-offline")
+
+	// No WS connection → Hub.Connected returns false.
+	body := getRepoPage(t, baseURL, hostID, cookie)
+
+	// All three Run-now buttons should have disabled.
+	// Each button appears once in the template with class "btn btn-secondary"
+	// and hx-post attributes. The disabled attribute is added conditionally.
+	// Count occurrences of 'disabled' in the Run-now section.
+	runNowIdx := strings.Index(body, "Run now · one-time")
+	dangerIdx := strings.Index(body, "Danger zone")
+	if runNowIdx < 0 {
+		t.Fatal("page missing 'Run now · one-time' section")
+	}
+	if dangerIdx < 0 {
+		t.Fatal("page missing 'Danger zone' section")
+	}
+	runNowSection := body[runNowIdx:dangerIdx]
+	disabledCount := strings.Count(runNowSection, "disabled")
+	if disabledCount < 3 {
+		t.Errorf("expected at least 3 disabled attributes in Run-now section (one per button), got %d", disabledCount)
+	}
+}
+
+// TestUIRepoPruneButtonDisabledWithoutAdminCreds — host is online but
+// no admin creds set. Assert prune button has disabled and mentions
+// "set admin credentials first".
+func TestUIRepoPruneButtonDisabledWithoutAdminCreds(t *testing.T) {
+	t.Parallel()
+	srv, baseURL, st := newTestServerWithUI(t)
+	cookie := loginAsAdmin(t, st)
+	hostID := makeHost(t, st, "repo-page-prune-no-admin")
+
+	// Register the host as "connected" in the Hub so the online check passes.
+	// We use a fake conn by injecting directly — for a simpler approach,
+	// rely on the fact that the Hub.Connected call just needs the ID registered.
+	// We can't easily fake a WS conn in a unit test, so instead we verify
+	// that even without the hub connected the prune button still has
+	// "set admin credentials first" text since that check runs first.
+	_ = srv // suppress unused warning
+
+	body := getRepoPage(t, baseURL, hostID, cookie)
+
+	if !strings.Contains(body, "set admin credentials first") {
+		t.Error("page missing 'set admin credentials first' on prune button")
+	}
+}
+
+// ----- admin-creds form save/delete tests ----------------------------
+
+// TestUIAdminCredentialsSaveRoundTrip — POST form-encoded body to
+// /hosts/{id}/admin-credentials, follow redirect, assert page now shows
+// "stored, leave blank to keep" placeholder. Audit row landed.
+func TestUIAdminCredentialsSaveRoundTrip(t *testing.T) {
+	t.Parallel()
+	_, baseURL, st := newTestServerWithUI(t)
+	cookie, userID := loginAsAdminWithID(t, st)
+	hostID := makeHost(t, st, "admin-save-roundtrip")
+
+	// POST admin credentials.
+	status, loc := postForm(t, baseURL, "/hosts/"+hostID+"/admin-credentials", url.Values{
+		"repo_url":      {"rest:http://admin.example/h"},
+		"repo_username": {"admin-user"},
+		"repo_password": {"s3cr3t-admin"},
+	}, cookie)
+	if status != stdhttp.StatusSeeOther {
+		t.Fatalf("save: want 303, got %d", status)
+	}
+	if !strings.Contains(loc, "saved=admin_credentials") {
+		t.Errorf("redirect location should contain saved=admin_credentials, got %q", loc)
+	}
+
+	// Follow redirect.
+	body := getRepoPage(t, baseURL, hostID, cookie)
+	if !strings.Contains(body, "stored, leave blank to keep") {
+		t.Error("after save: page missing 'stored, leave blank to keep' placeholder for admin password")
+	}
+
+	// Audit row should exist.
+	ctx := context.Background()
+	rows, err := st.DB().QueryContext(ctx,
+		`SELECT action, user_id FROM audit_log WHERE target_id = ? AND action = 'host.admin_credentials_set'`,
+		hostID)
+	if err != nil {
+		t.Fatalf("query audit: %v", err)
+	}
+	defer rows.Close()
+	found := false
+	for rows.Next() {
+		var action string
+		var gotUID *string
+		if err := rows.Scan(&action, &gotUID); err != nil {
+			t.Fatalf("scan: %v", err)
+		}
+		found = true
+		if gotUID == nil || *gotUID != userID {
+			t.Errorf("audit row user_id: want %q, got %v", userID, gotUID)
+		}
+	}
+	if err := rows.Err(); err != nil {
+		t.Fatalf("rows.Err: %v", err)
+	}
+	if !found {
+		t.Error("audit row with action='host.admin_credentials_set' not found")
+	}
+}
+
+// TestUIAdminCredentialsDelete — POST to the delete route, assert
+// admin row gone and audit row landed.
+func TestUIAdminCredentialsDelete(t *testing.T) {
+	t.Parallel()
+	srv, baseURL, st := newTestServerWithUI(t)
+	cookie, userID := loginAsAdminWithID(t, st)
+	hostID := makeHost(t, st, "admin-delete")
+
+	ctx := context.Background()
+
+	// Seed admin creds directly.
+	enc, err := srv.encryptRepoCreds(repoCredsBlob{
+		RepoURL:      "rest:http://admin.example/h",
+		RepoPassword: "pw",
+	}, []byte("host:"+hostID+":admin"))
+	if err != nil {
+		t.Fatalf("encrypt: %v", err)
+	}
+	if err := st.SetHostCredentials(ctx, hostID, store.CredKindAdmin, enc); err != nil {
+		t.Fatalf("set admin creds: %v", err)
+	}
+
+	// POST to delete route.
+	status, loc := postForm(t, baseURL, "/hosts/"+hostID+"/admin-credentials/delete", url.Values{}, cookie)
+	if status != stdhttp.StatusSeeOther {
+		t.Fatalf("delete: want 303, got %d", status)
+	}
+	if !strings.Contains(loc, "saved=admin_credentials") {
+		t.Errorf("redirect location: want saved=admin_credentials, got %q", loc)
+	}
+
+	// Admin row should be gone.
+	if _, err := st.GetHostCredentials(ctx, hostID, store.CredKindAdmin); err == nil {
+		t.Error("admin creds row still present after delete")
+	}
+
+	// Audit row.
+	rows, err := st.DB().QueryContext(ctx,
+		`SELECT action, user_id FROM audit_log WHERE target_id = ? AND action = 'host.admin_credentials_deleted'`,
+		hostID)
+	if err != nil {
+		t.Fatalf("query audit: %v", err)
+	}
+	defer rows.Close()
+	found := false
+	for rows.Next() {
+		var action string
+		var gotUID *string
+		if err := rows.Scan(&action, &gotUID); err != nil {
+			t.Fatalf("scan: %v", err)
+		}
+		found = true
+		if gotUID == nil || *gotUID != userID {
+			t.Errorf("audit row user_id: want %q, got %v", userID, gotUID)
+		}
+	}
+	if err := rows.Err(); err != nil {
+		t.Fatalf("rows.Err: %v", err)
+	}
+	if !found {
+		t.Error("audit row with action='host.admin_credentials_deleted' not found")
+	}
+}
+
+// TestUIAdminCredentialsDeleteIdempotent — POST to the delete route
+// when no admin creds exist → 303 redirect (no 404 / 500).
+func TestUIAdminCredentialsDeleteIdempotent(t *testing.T) {
+	t.Parallel()
+	_, baseURL, st := newTestServerWithUI(t)
+	cookie := loginAsAdmin(t, st)
+	hostID := makeHost(t, st, "admin-delete-noop")
+
+	status, _ := postForm(t, baseURL, "/hosts/"+hostID+"/admin-credentials/delete", url.Values{}, cookie)
+	if status != stdhttp.StatusSeeOther {
+		t.Fatalf("delete (noop): want 303, got %d", status)
+	}
+}
+
+// TestUIAdminCredentialsSaveAllBlankIsNoop — POST empty form → 303
+// redirect, no row created.
+func TestUIAdminCredentialsSaveAllBlankIsNoop(t *testing.T) {
+	t.Parallel()
+	_, baseURL, st := newTestServerWithUI(t)
+	cookie := loginAsAdmin(t, st)
+	hostID := makeHost(t, st, "admin-save-blank")
+
+	status, loc := postForm(t, baseURL, "/hosts/"+hostID+"/admin-credentials", url.Values{
+		"repo_url":      {""},
+		"repo_username": {""},
+		"repo_password": {""},
+	}, cookie)
+	if status != stdhttp.StatusSeeOther {
+		t.Fatalf("blank save: want 303, got %d", status)
+	}
+	// All-blank is a no-op: redirect must not carry ?saved= banner.
+	if strings.Contains(loc, "?saved=") {
+		t.Errorf("blank save: redirect Location %q must not contain ?saved=", loc)
+	}
+
+	// No admin row should have been created.
+	if _, err := st.GetHostCredentials(context.Background(), hostID, store.CredKindAdmin); err == nil {
+		t.Error("admin creds row created unexpectedly for blank save")
+	}
+}
@@ -0,0 +1,116 @@
+// Package maintenance owns the server-side scheduler that fires
+// forget/prune/check on the cadences operators set on
+// host_repo_maintenance rows. Independent of the agent's local cron
+// (which now only handles backup schedules).
+//
+// The ticker is intentionally side-effect-free at the package
+// boundary: it asks an injected Backend for current state and emits
+// a list of Decisions for the caller to act on. Easy to unit-test
+// without a running server.
+package maintenance
+
+import (
+	"context"
+	"errors"
+	"time"
+
+	"github.com/robfig/cron/v3"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+// Decision is one cadence-driven dispatch the ticker recommends.
+// SubsetPct is populated only when Kind == "check"; ignored for
+// "forget" and "prune".
+type Decision struct {
+	HostID    string
+	Kind      string // "forget" | "prune" | "check"
+	SubsetPct int
+}
+
+// Backend is the subset of *store.Store the ticker depends on.
+// Constrained interface so tests can pass a fake.
+type Backend interface {
+	ListAllMaintenance(ctx context.Context) ([]store.HostRepoMaintenance, error)
+	LatestJobByKind(ctx context.Context, hostID, kind string) (*store.Job, error)
+}
+
+// Ticker decides which cadence-driven jobs are due to fire at a
+// given instant. Stateless — the only state lives in the Backend.
+type Ticker struct {
+	backend Backend
+	parser  cron.Parser
+}
+
+// New builds a Ticker bound to the given Backend.
+func New(b Backend) *Ticker {
+	return &Ticker{
+		backend: b,
+		parser:  cron.NewParser(cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow),
+	}
+}
+
+// Decide returns the set of jobs the ticker would dispatch at `now`.
+// The caller is responsible for: checking host online state,
+// persisting the job row, and shipping command.run. Returns nil
+// (not an error) when the maintenance table is empty — a fresh
+// install is the most common case.
+func (t *Ticker) Decide(ctx context.Context, now time.Time) ([]Decision, error) {
+	rows, err := t.backend.ListAllMaintenance(ctx)
+	if err != nil {
+		return nil, err
+	}
+	var out []Decision
+	for _, m := range rows {
+		if d, ok := t.dueFor(ctx, now, m.HostID, "forget", m.ForgetCron, m.ForgetEnabled, 0); ok {
+			out = append(out, d)
+		}
+		if d, ok := t.dueFor(ctx, now, m.HostID, "prune", m.PruneCron, m.PruneEnabled, 0); ok {
+			out = append(out, d)
+		}
+		if d, ok := t.dueFor(ctx, now, m.HostID, "check", m.CheckCron, m.CheckEnabled, m.CheckSubsetPct); ok {
+			out = append(out, d)
+		}
+	}
+	return out, nil
+}
+
+// dueFor returns true if the cron has a fire-instant strictly after
+// the latest persisted job's created_at and at-or-before now.
+//
+// Anchor selection:
+//   - When LatestJobByKind returns a job: anchor = j.CreatedAt.
+//   - When LatestJobByKind returns ErrNotFound: anchor = now - 24h
+//     (first-run case — cap the lookback so a brand-new host doesn't
+//     fire 30 days of missed monthly-checks on first tick).
+//   - When LatestJobByKind returns a hard error: skip this kind for
+//     this host on this tick.
+//
+// Disabled (`enabled == false`) or empty cron skips silently.
+// Cron parse failures skip silently — the schedule/maintenance
+// routes already validate cron at write time, so this is defensive.
+func (t *Ticker) dueFor(ctx context.Context, now time.Time, hostID, kind, expr string, enabled bool, subset int) (Decision, bool) {
+	if !enabled || expr == "" {
+		return Decision{}, false
+	}
+	sched, err := t.parser.Parse(expr)
+	if err != nil {
+		return Decision{}, false
+	}
+	j, err := t.backend.LatestJobByKind(ctx, hostID, kind)
+	var anchor time.Time
+	switch {
+	case err == nil && j != nil:
+		anchor = j.CreatedAt
+	case errors.Is(err, store.ErrNotFound):
+		anchor = now.Add(-24 * time.Hour)
+	default:
+		// Hard error — skip this kind on this tick.
+		return Decision{}, false
+	}
+	next := sched.Next(anchor)
+	if next.IsZero() || next.After(now) {
+		return Decision{}, false
+	}
+	return Decision{HostID: hostID, Kind: kind, SubsetPct: subset}, true
+}
@@ -0,0 +1,315 @@
+package maintenance
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+// fakeBackend implements Backend with table-driven canned responses.
+type fakeBackend struct {
+	rows []store.HostRepoMaintenance
+	// jobs[hostID][kind] -> job (if present, returned). If absent,
+	// fakeBackend returns ErrNotFound by default.
+	jobs map[string]map[string]*store.Job
+	// hardErr forces a non-ErrNotFound failure for a given (host, kind).
+	hardErr map[string]map[string]error
+	// listErr forces ListAllMaintenance to fail.
+	listErr error
+}
+
+func (f *fakeBackend) ListAllMaintenance(_ context.Context) ([]store.HostRepoMaintenance, error) {
+	if f.listErr != nil {
+		return nil, f.listErr
+	}
+	return f.rows, nil
+}
+
+func (f *fakeBackend) LatestJobByKind(_ context.Context, hostID, kind string) (*store.Job, error) {
+	if hostErrs, ok := f.hardErr[hostID]; ok {
+		if err := hostErrs[kind]; err != nil {
+			return nil, err
+		}
+	}
+	if hostJobs, ok := f.jobs[hostID]; ok {
+		if j := hostJobs[kind]; j != nil {
+			return j, nil
+		}
+	}
+	return nil, store.ErrNotFound
+}
+
+// mustTime parses an RFC3339 string, fatal on failure.
+func mustTime(t *testing.T, s string) time.Time {
+	t.Helper()
+	out, err := time.Parse(time.RFC3339, s)
+	if err != nil {
+		t.Fatalf("parse %q: %v", s, err)
+	}
+	return out
+}
+
+func TestTickerSkipsDisabled(t *testing.T) {
+	t.Parallel()
+	be := &fakeBackend{
+		rows: []store.HostRepoMaintenance{{
+			HostID:        "h1",
+			ForgetCron:    "0 3 * * *",
+			ForgetEnabled: false,
+			PruneCron:     "0 4 * * *",
+			PruneEnabled:  false,
+			CheckCron:     "0 5 * * *",
+			CheckEnabled:  false,
+		}},
+	}
+	tk := New(be)
+	now := mustTime(t, "2026-05-04T04:00:00Z")
+	got, err := tk.Decide(context.Background(), now)
+	if err != nil {
+		t.Fatalf("Decide: %v", err)
+	}
+	if len(got) != 0 {
+		t.Errorf("expected no decisions, got %+v", got)
+	}
+}
+
+func TestTickerSkipsEmptyCron(t *testing.T) {
+	t.Parallel()
+	be := &fakeBackend{
+		rows: []store.HostRepoMaintenance{{
+			HostID:        "h1",
+			ForgetCron:    "",
+			ForgetEnabled: true,
+			PruneCron:     "",
+			PruneEnabled:  true,
+			CheckCron:     "",
+			CheckEnabled:  true,
+		}},
+	}
+	tk := New(be)
+	got, err := tk.Decide(context.Background(), mustTime(t, "2026-05-04T04:00:00Z"))
+	if err != nil {
+		t.Fatalf("Decide: %v", err)
+	}
+	if len(got) != 0 {
+		t.Errorf("expected no decisions, got %+v", got)
+	}
+}
+
+func TestTickerFiresWhenOverdue(t *testing.T) {
+	t.Parallel()
+	now := mustTime(t, "2026-05-04T04:00:00Z")
+	// Latest forget job 25h ago.
+	last := now.Add(-25 * time.Hour)
+	be := &fakeBackend{
+		rows: []store.HostRepoMaintenance{{
+			HostID:        "h1",
+			ForgetCron:    "0 3 * * *",
+			ForgetEnabled: true,
+		}},
+		jobs: map[string]map[string]*store.Job{
+			"h1": {"forget": &store.Job{ID: "j1", HostID: "h1", Kind: "forget", CreatedAt: last}},
+		},
+	}
+	tk := New(be)
+	got, err := tk.Decide(context.Background(), now)
+	if err != nil {
+		t.Fatalf("Decide: %v", err)
+	}
+	if len(got) != 1 || got[0].Kind != "forget" || got[0].HostID != "h1" {
+		t.Errorf("expected one forget decision, got %+v", got)
+	}
+}
+
+func TestTickerSuppressesWhenRecent(t *testing.T) {
+	t.Parallel()
+	now := mustTime(t, "2026-05-04T04:00:00Z")
+	last := mustTime(t, "2026-05-04T03:30:00Z")
+	be := &fakeBackend{
+		rows: []store.HostRepoMaintenance{{
+			HostID:        "h1",
+			ForgetCron:    "0 3 * * *",
+			ForgetEnabled: true,
+		}},
+		jobs: map[string]map[string]*store.Job{
+			"h1": {"forget": &store.Job{ID: "j1", HostID: "h1", Kind: "forget", CreatedAt: last}},
+		},
+	}
+	tk := New(be)
+	got, err := tk.Decide(context.Background(), now)
+	if err != nil {
+		t.Fatalf("Decide: %v", err)
+	}
+	if len(got) != 0 {
+		t.Errorf("expected no decisions, got %+v", got)
+	}
+}
+
+func TestTickerFirstRunAnchorBoundedAt24h(t *testing.T) {
+	t.Parallel()
+	be := &fakeBackend{
+		rows: []store.HostRepoMaintenance{{
+			HostID:        "h1",
+			ForgetCron:    "0 3 * * *",
+			ForgetEnabled: true,
+		}},
+	}
+	tk := New(be)
+
+	// Case 1: now=04:00. Anchor=04:00 - 24h = previous-day 04:00. Next
+	// fire after that is today 03:00 — within window → fire.
+	now1 := mustTime(t, "2026-05-04T04:00:00Z")
+	got, err := tk.Decide(context.Background(), now1)
+	if err != nil {
+		t.Fatalf("Decide: %v", err)
+	}
+	if len(got) != 1 {
+		t.Errorf("case1: expected 1 decision, got %+v", got)
+	}
+
+	// Case 2: a cron firing less often than once per 24h with a
+	// no-prior-job anchor must not fire when the most recent fire is
+	// outside the 24h lookback window. Use a weekly cron (Mondays at
+	// 03:00) and `now` on a Tuesday: anchor=now-24h lands on Monday,
+	// so cron.Next(Monday) = next-week Monday → after now → no fire.
+	// 2026-05-04 is a Monday, 2026-05-05 a Tuesday.
+	be2 := &fakeBackend{
+		rows: []store.HostRepoMaintenance{{
+			HostID:        "h2",
+			ForgetCron:    "0 3 * * 1",
+			ForgetEnabled: true,
+		}},
+	}
+	tk2 := New(be2)
+	now2 := mustTime(t, "2026-05-05T03:00:00Z")
+	got2, err := tk2.Decide(context.Background(), now2)
+	if err != nil {
+		t.Fatalf("Decide case2: %v", err)
+	}
+	if len(got2) != 0 {
+		t.Errorf("case2: expected no decisions (cron fires < once/24h, prior fire was Monday 03:00 which is exactly 24h ago and anchor=now-24h means next-after is next Monday), got %+v", got2)
+	}
+}
+
+func TestTickerCheckDecisionCarriesSubset(t *testing.T) {
+	t.Parallel()
+	now := mustTime(t, "2026-05-04T04:00:00Z")
+	last := now.Add(-30 * 24 * time.Hour)
+	be := &fakeBackend{
+		rows: []store.HostRepoMaintenance{{
+			HostID:         "h1",
+			CheckCron:      "0 3 * * *",
+			CheckEnabled:   true,
+			CheckSubsetPct: 25,
+		}},
+		jobs: map[string]map[string]*store.Job{
+			"h1": {"check": &store.Job{ID: "j1", HostID: "h1", Kind: "check", CreatedAt: last}},
+		},
+	}
+	tk := New(be)
+	got, err := tk.Decide(context.Background(), now)
+	if err != nil {
+		t.Fatalf("Decide: %v", err)
+	}
+	if len(got) != 1 || got[0].Kind != "check" || got[0].SubsetPct != 25 {
+		t.Errorf("expected check decision with SubsetPct=25, got %+v", got)
+	}
+}
+
+func TestTickerHardJobErrorSkipsKind(t *testing.T) {
+	t.Parallel()
+	now := mustTime(t, "2026-05-04T04:00:00Z")
+	last := now.Add(-25 * time.Hour)
+	hardErr := errors.New("synthetic db error")
+	be := &fakeBackend{
+		rows: []store.HostRepoMaintenance{{
+			HostID:        "h1",
+			ForgetCron:    "0 3 * * *",
+			ForgetEnabled: true,
+			CheckCron:     "0 3 * * *",
+			CheckEnabled:  true,
+		}},
+		jobs: map[string]map[string]*store.Job{
+			// check has a normal latest-job; should still fire.
+			"h1": {"check": &store.Job{ID: "jc", HostID: "h1", Kind: "check", CreatedAt: last}},
+		},
+		hardErr: map[string]map[string]error{
+			"h1": {"forget": hardErr},
+		},
+	}
+	tk := New(be)
+	got, err := tk.Decide(context.Background(), now)
+	if err != nil {
+		t.Fatalf("Decide: %v", err)
+	}
+	// Only the check decision should land — forget is skipped.
+	if len(got) != 1 || got[0].Kind != "check" {
+		t.Errorf("expected only check decision, got %+v", got)
+	}
+}
+
+func TestTickerHandlesMultipleHosts(t *testing.T) {
+	t.Parallel()
+	now := mustTime(t, "2026-05-04T04:00:00Z")
+	last := now.Add(-25 * time.Hour)
+	be := &fakeBackend{
+		rows: []store.HostRepoMaintenance{
+			{
+				HostID:        "ha",
+				ForgetCron:    "0 3 * * *",
+				ForgetEnabled: true,
+			},
+			{
+				HostID:       "hb",
+				CheckCron:    "0 3 * * *",
+				CheckEnabled: true,
+				PruneCron:    "0 4 * * *",
+				PruneEnabled: false, // disabled — should not fire
+			},
+		},
+		jobs: map[string]map[string]*store.Job{
+			"ha": {"forget": &store.Job{ID: "j1", HostID: "ha", Kind: "forget", CreatedAt: last}},
+			"hb": {"check": &store.Job{ID: "j2", HostID: "hb", Kind: "check", CreatedAt: last}},
+		},
+	}
+	tk := New(be)
+	got, err := tk.Decide(context.Background(), now)
+	if err != nil {
+		t.Fatalf("Decide: %v", err)
+	}
+	if len(got) != 2 {
+		t.Fatalf("expected 2 decisions, got %d: %+v", len(got), got)
+	}
+	kinds := map[string]string{}
+	for _, d := range got {
+		kinds[d.HostID] = d.Kind
+	}
+	if kinds["ha"] != "forget" {
+		t.Errorf("ha: expected forget, got %q", kinds["ha"])
+	}
+	if kinds["hb"] != "check" {
+		t.Errorf("hb: expected check, got %q", kinds["hb"])
+	}
+}
+
+func TestTickerInvalidCronSkipsSilently(t *testing.T) {
+	t.Parallel()
+	be := &fakeBackend{
+		rows: []store.HostRepoMaintenance{{
+			HostID:        "h1",
+			ForgetCron:    "not a cron",
+			ForgetEnabled: true,
+		}},
+	}
+	tk := New(be)
+	got, err := tk.Decide(context.Background(), mustTime(t, "2026-05-04T04:00:00Z"))
+	if err != nil {
+		t.Fatalf("Decide: %v", err)
+	}
+	if len(got) != 0 {
+		t.Errorf("expected no decisions for invalid cron, got %+v", got)
+	}
+}
@@ -267,8 +267,34 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
 			deps.OnScheduleFire(ctx, hostID, c, p.ScheduleID, p.ScheduledAt)
 		}

-	case api.MsgRepoStats, api.MsgCommandResult:
-		// TODO(P2): persist these projections.
+	case api.MsgRepoStats:
+		var p api.RepoStatsPayload
+		if err := env.UnmarshalPayload(&p); err != nil {
+			slog.Warn("ws: bad repo.stats payload", "host_id", hostID, "err", err)
+			break
+		}
+		patch := store.HostRepoStats{
+			HostID:              hostID,
+			TotalSizeBytes:      p.TotalSizeBytes,
+			RawSizeBytes:        p.RawSizeBytes,
+			UniqueFiles:         p.UniqueFiles,
+			SnapshotCount:       p.SnapshotCount,
+			LastCheckAt:         p.LastCheckAt,
+			LastCheckStatus:     p.LastCheckStatus,
+			LockPresent:         p.LockPresent,
+			LastPruneAt:         p.LastPruneAt,
+			LastPruneFreedBytes: p.LastPruneFreedBytes,
+		}
+		if err := deps.Store.UpsertHostRepoStats(ctx, hostID, patch); err != nil {
+			slog.Warn("ws: upsert host repo stats", "host_id", hostID, "err", err)
+		} else {
+			slog.Info("ws: repo stats refreshed", "host_id", hostID)
+		}
+
+	case api.MsgCommandResult:
+		// TODO(P2): persist command.result acks for "did the agent
+		// accept the dispatch?" forensics. Currently the job lifecycle
+		// (job.started → job.finished) is sufficient signal.
 		slog.Debug("ws msg not yet handled", "type", env.Type, "host_id", hostID)

 	case api.MsgError:
@@ -0,0 +1,135 @@
+package ws
+
+import (
+	"context"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
+)
+
+// openWSTestStore opens an isolated file-backed db in t.TempDir.
+func openWSTestStore(t *testing.T) *store.Store {
+	t.Helper()
+	dir := t.TempDir()
+	s, err := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	t.Cleanup(func() { _ = s.Close() })
+	return s
+}
+
+// seedHostWS inserts a minimal host row directly via the store's DB.
+func seedHostWS(t *testing.T, s *store.Store, hostID string) {
+	t.Helper()
+	_, err := s.DB().Exec(
+		`INSERT INTO hosts (id, name, os, arch, enrolled_at) VALUES (?,?,?,?,?)`,
+		hostID, hostID, "linux", "amd64", "2026-01-01T00:00:00Z")
+	if err != nil {
+		t.Fatalf("seed host %q: %v", hostID, err)
+	}
+}
+
+func int64ptrWS(v int64) *int64 { return &v }
+func boolptrWS(v bool) *bool    { return &v }
+
+func TestRepoStatsReportPersisted(t *testing.T) {
+	t.Parallel()
+	s := openWSTestStore(t)
+	ctx := context.Background()
+
+	const hostID = "h-stats-ws"
+	seedHostWS(t, s, hostID)
+
+	now := time.Now().UTC().Truncate(time.Second)
+	pruneAt := now.Add(-2 * time.Hour)
+	payload := api.RepoStatsPayload{
+		TotalSizeBytes:      int64ptrWS(1024),
+		RawSizeBytes:        int64ptrWS(2048),
+		UniqueFiles:         int64ptrWS(42),
+		SnapshotCount:       int64ptrWS(7),
+		LastCheckAt:         &now,
+		LastCheckStatus:     "ok",
+		LockPresent:         boolptrWS(false),
+		LastPruneAt:         &pruneAt,
+		LastPruneFreedBytes: int64ptrWS(512),
+	}
+	env, err := api.Marshal(api.MsgRepoStats, "", payload)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+
+	deps := HandlerDeps{Store: s}
+	dispatchAgentMessage(ctx, nil, hostID, env, deps)
+
+	got, err := s.GetHostRepoStats(ctx, hostID)
+	if err != nil {
+		t.Fatalf("get host repo stats: %v", err)
+	}
+	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 1024 {
+		t.Errorf("TotalSizeBytes: got %v want 1024", got.TotalSizeBytes)
+	}
+	if got.RawSizeBytes == nil || *got.RawSizeBytes != 2048 {
+		t.Errorf("RawSizeBytes: got %v want 2048", got.RawSizeBytes)
+	}
+	if got.UniqueFiles == nil || *got.UniqueFiles != 42 {
+		t.Errorf("UniqueFiles: got %v want 42", got.UniqueFiles)
+	}
+	if got.SnapshotCount == nil || *got.SnapshotCount != 7 {
+		t.Errorf("SnapshotCount: got %v want 7", got.SnapshotCount)
+	}
+	if got.LastCheckAt == nil || !got.LastCheckAt.Equal(now) {
+		t.Errorf("LastCheckAt: got %v want %v", got.LastCheckAt, now)
+	}
+	if got.LastCheckStatus != "ok" {
+		t.Errorf("LastCheckStatus: got %q want %q", got.LastCheckStatus, "ok")
+	}
+	if got.LockPresent == nil || *got.LockPresent != false {
+		t.Errorf("LockPresent: got %v want false", got.LockPresent)
+	}
+	if got.LastPruneAt == nil || !got.LastPruneAt.Equal(pruneAt) {
+		t.Errorf("LastPruneAt: got %v want %v", got.LastPruneAt, pruneAt)
+	}
+	if got.LastPruneFreedBytes == nil || *got.LastPruneFreedBytes != 512 {
+		t.Errorf("LastPruneFreedBytes: got %v want 512", got.LastPruneFreedBytes)
+	}
+}
+
+func TestRepoStatsReportPartialUpdate(t *testing.T) {
+	t.Parallel()
+	s := openWSTestStore(t)
+	ctx := context.Background()
+
+	const hostID = "h-stats-partial"
+	seedHostWS(t, s, hostID)
+
+	// Pre-seed: TotalSizeBytes = 100.
+	if err := s.UpsertHostRepoStats(ctx, hostID, store.HostRepoStats{
+		TotalSizeBytes: int64ptrWS(100),
+	}); err != nil {
+		t.Fatalf("pre-seed upsert: %v", err)
+	}
+
+	// Send a repo.stats payload that only sets LastCheckStatus.
+	env, err := api.Marshal(api.MsgRepoStats, "", api.RepoStatsPayload{
+		LastCheckStatus: "ok",
+	})
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	dispatchAgentMessage(ctx, nil, hostID, env, HandlerDeps{Store: s})
+
+	got, err := s.GetHostRepoStats(ctx, hostID)
+	if err != nil {
+		t.Fatalf("get: %v", err)
+	}
+	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 100 {
+		t.Errorf("TotalSizeBytes lost: got %v want 100", got.TotalSizeBytes)
+	}
+	if got.LastCheckStatus != "ok" {
+		t.Errorf("LastCheckStatus: got %q want ok", got.LastCheckStatus)
+	}
+}
@@ -8,13 +8,23 @@ import (
 	"time"
 )

+// CredentialKind identifies the role of a host_credentials row.
+type CredentialKind string
+
+const (
+	// CredKindRepo is the append-only credential used for every backup.
+	CredKindRepo CredentialKind = "repo"
+	// CredKindAdmin is the delete-capable credential used for prune.
+	CredKindAdmin CredentialKind = "admin"
+)
+
 // GetHostCredentials returns the AEAD-encrypted repo creds blob for
-// the host, or ("", ErrNotFound) if no credential has ever been set.
+// the host + kind, or ("", ErrNotFound) if no matching row exists.
 // The caller decrypts using host_id as AEAD additional data.
-func (s *Store) GetHostCredentials(ctx context.Context, hostID string) (string, error) {
+func (s *Store) GetHostCredentials(ctx context.Context, hostID string, kind CredentialKind) (string, error) {
 	row := s.db.QueryRowContext(ctx,
-		`SELECT enc_repo_creds FROM host_credentials WHERE host_id = ?`,
-		hostID)
+		`SELECT enc_repo_creds FROM host_credentials WHERE host_id = ? AND kind = ?`,
+		hostID, string(kind))
 	var enc string
 	if err := row.Scan(&enc); err != nil {
 		if errors.Is(err, sql.ErrNoRows) {
@@ -25,22 +35,35 @@ func (s *Store) GetHostCredentials(ctx context.Context, hostID string) (string,
 	return enc, nil
 }

-// SetHostCredentials replaces the host's encrypted repo creds blob.
-// The caller has already encrypted using host_id as additional data.
-func (s *Store) SetHostCredentials(ctx context.Context, hostID, encRepoCreds string) error {
+// SetHostCredentials replaces the host's encrypted repo creds blob for
+// the given kind. The caller has already encrypted using host_id as
+// additional data.
+func (s *Store) SetHostCredentials(ctx context.Context, hostID string, kind CredentialKind, encRepoCreds string) error {
 	if encRepoCreds == "" {
 		return fmt.Errorf("store: empty enc_repo_creds")
 	}
 	now := time.Now().UTC().Format(time.RFC3339Nano)
 	_, err := s.db.ExecContext(ctx,
-		`INSERT INTO host_credentials (host_id, enc_repo_creds, updated_at)
-		 VALUES (?, ?, ?)
-		 ON CONFLICT(host_id) DO UPDATE SET
+		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at)
+		 VALUES (?, ?, ?, ?)
+		 ON CONFLICT(host_id, kind) DO UPDATE SET
 			enc_repo_creds = excluded.enc_repo_creds,
 			updated_at     = excluded.updated_at`,
-		hostID, encRepoCreds, now)
+		hostID, string(kind), encRepoCreds, now)
 	if err != nil {
 		return fmt.Errorf("store: set host credentials: %w", err)
 	}
 	return nil
 }
+
+// DeleteHostCredentials removes the credential row for the given host
+// and kind. A no-op if the row does not exist.
+func (s *Store) DeleteHostCredentials(ctx context.Context, hostID string, kind CredentialKind) error {
+	_, err := s.db.ExecContext(ctx,
+		`DELETE FROM host_credentials WHERE host_id = ? AND kind = ?`,
+		hostID, string(kind))
+	if err != nil {
+		return fmt.Errorf("store: delete host credentials: %w", err)
+	}
+	return nil
+}
@@ -0,0 +1,103 @@
+package store
+
+import (
+	"context"
+	"errors"
+	"testing"
+)
+
+// seedHost inserts a minimal host row for testing.
+func seedHost(t *testing.T, s *Store, hostID string) {
+	t.Helper()
+	_, err := s.DB().Exec(
+		`INSERT INTO hosts (id, name, os, arch, enrolled_at) VALUES (?,?,?,?,?)`,
+		hostID, hostID, "linux", "amd64", "2026-01-01T00:00:00Z")
+	if err != nil {
+		t.Fatalf("seed host %q: %v", hostID, err)
+	}
+}
+
+func TestHostCredentialsAdminRowSeparate(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+
+	const hostID = "h-creds-test"
+	seedHost(t, s, hostID)
+
+	const repoBlob = "enc-repo-blob"
+	const adminBlob = "enc-admin-blob"
+
+	// Set repo creds.
+	if err := s.SetHostCredentials(ctx, hostID, CredKindRepo, repoBlob); err != nil {
+		t.Fatalf("set repo creds: %v", err)
+	}
+	// Set admin creds.
+	if err := s.SetHostCredentials(ctx, hostID, CredKindAdmin, adminBlob); err != nil {
+		t.Fatalf("set admin creds: %v", err)
+	}
+
+	// Fetch each by kind and assert they differ.
+	gotRepo, err := s.GetHostCredentials(ctx, hostID, CredKindRepo)
+	if err != nil {
+		t.Fatalf("get repo creds: %v", err)
+	}
+	gotAdmin, err := s.GetHostCredentials(ctx, hostID, CredKindAdmin)
+	if err != nil {
+		t.Fatalf("get admin creds: %v", err)
+	}
+	if gotRepo != repoBlob {
+		t.Errorf("repo creds: got %q, want %q", gotRepo, repoBlob)
+	}
+	if gotAdmin != adminBlob {
+		t.Errorf("admin creds: got %q, want %q", gotAdmin, adminBlob)
+	}
+	if gotRepo == gotAdmin {
+		t.Error("repo and admin blobs must differ")
+	}
+
+	// Delete admin; repo must be unaffected.
+	if err := s.DeleteHostCredentials(ctx, hostID, CredKindAdmin); err != nil {
+		t.Fatalf("delete admin creds: %v", err)
+	}
+	if _, err := s.GetHostCredentials(ctx, hostID, CredKindAdmin); !errors.Is(err, ErrNotFound) {
+		t.Errorf("after delete, expected ErrNotFound for admin; got %v", err)
+	}
+	if got, err := s.GetHostCredentials(ctx, hostID, CredKindRepo); err != nil || got != repoBlob {
+		t.Errorf("repo creds should survive admin delete; got %q, err %v", got, err)
+	}
+}
+
+func TestHostCredentialsNotFound(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+
+	_, err := s.GetHostCredentials(ctx, "no-such-host", CredKindRepo)
+	if !errors.Is(err, ErrNotFound) {
+		t.Errorf("expected ErrNotFound, got %v", err)
+	}
+}
+
+func TestHostCredentialsUpsert(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+
+	const hostID = "h-upsert-test"
+	seedHost(t, s, hostID)
+
+	if err := s.SetHostCredentials(ctx, hostID, CredKindRepo, "v1"); err != nil {
+		t.Fatalf("set v1: %v", err)
+	}
+	if err := s.SetHostCredentials(ctx, hostID, CredKindRepo, "v2"); err != nil {
+		t.Fatalf("set v2 (upsert): %v", err)
+	}
+	got, err := s.GetHostCredentials(ctx, hostID, CredKindRepo)
+	if err != nil {
+		t.Fatalf("get: %v", err)
+	}
+	if got != "v2" {
+		t.Errorf("expected v2, got %q", got)
+	}
+}
@@ -0,0 +1,231 @@
+package store
+
+import (
+	"context"
+	"database/sql"
+	"errors"
+	"fmt"
+	"time"
+)
+
+// HostRepoStats is the per-host projection of repo-level metrics.
+// All pointer fields are nullable; nil means "not yet known." The row
+// is created (or replaced) by UpsertHostRepoStats which merges in only
+// the non-nil fields from a patch.
+type HostRepoStats struct {
+	HostID              string
+	TotalSizeBytes      *int64
+	RawSizeBytes        *int64
+	UniqueFiles         *int64
+	SnapshotCount       *int64
+	LastCheckAt         *time.Time
+	LastCheckStatus     string // "" | "ok" | "errors_found" | "failed"
+	LockPresent         *bool
+	LastPruneAt         *time.Time
+	LastPruneFreedBytes *int64
+	UpdatedAt           time.Time
+}
+
+// GetHostRepoStats returns the row, or (nil, ErrNotFound) if absent.
+func (s *Store) GetHostRepoStats(ctx context.Context, hostID string) (*HostRepoStats, error) {
+	row := s.db.QueryRowContext(ctx,
+		`SELECT host_id, total_size_bytes, raw_size_bytes, unique_files,
+		        snapshot_count, last_check_at, last_check_status,
+		        lock_present, last_prune_at, last_prune_freed_bytes, updated_at
+		 FROM host_repo_stats WHERE host_id = ?`, hostID)
+	return scanHostRepoStats(row)
+}
+
+// getHostRepoStatsTx is identical to GetHostRepoStats but runs on an
+// existing transaction so the fetch-merge-upsert in UpsertHostRepoStats
+// is fully serialized.
+func getHostRepoStatsTx(ctx context.Context, tx *sql.Tx, hostID string) (*HostRepoStats, error) {
+	row := tx.QueryRowContext(ctx,
+		`SELECT host_id, total_size_bytes, raw_size_bytes, unique_files,
+		        snapshot_count, last_check_at, last_check_status,
+		        lock_present, last_prune_at, last_prune_freed_bytes, updated_at
+		 FROM host_repo_stats WHERE host_id = ?`, hostID)
+	return scanHostRepoStats(row)
+}
+
+// scanHostRepoStats scans one row from host_repo_stats.
+func scanHostRepoStats(row *sql.Row) (*HostRepoStats, error) {
+	var (
+		st              HostRepoStats
+		totalSize       sql.NullInt64
+		rawSize         sql.NullInt64
+		uniqueFiles     sql.NullInt64
+		snapshotCount   sql.NullInt64
+		lastCheckAt     sql.NullString
+		lastCheckStatus sql.NullString
+		lockPresent     int64
+		lastPruneAt     sql.NullString
+		lastPruneFreed  sql.NullInt64
+		updatedAt       string
+	)
+	if err := row.Scan(
+		&st.HostID,
+		&totalSize, &rawSize, &uniqueFiles, &snapshotCount,
+		&lastCheckAt, &lastCheckStatus,
+		&lockPresent,
+		&lastPruneAt, &lastPruneFreed,
+		&updatedAt,
+	); err != nil {
+		if errors.Is(err, sql.ErrNoRows) {
+			return nil, ErrNotFound
+		}
+		return nil, fmt.Errorf("store: scan host_repo_stats: %w", err)
+	}
+	if totalSize.Valid {
+		v := totalSize.Int64
+		st.TotalSizeBytes = &v
+	}
+	if rawSize.Valid {
+		v := rawSize.Int64
+		st.RawSizeBytes = &v
+	}
+	if uniqueFiles.Valid {
+		v := uniqueFiles.Int64
+		st.UniqueFiles = &v
+	}
+	if snapshotCount.Valid {
+		v := snapshotCount.Int64
+		st.SnapshotCount = &v
+	}
+	if lastCheckAt.Valid {
+		t, err := time.Parse(time.RFC3339Nano, lastCheckAt.String)
+		if err != nil {
+			return nil, fmt.Errorf("store: parse last_check_at: %w", err)
+		}
+		st.LastCheckAt = &t
+	}
+	if lastCheckStatus.Valid {
+		st.LastCheckStatus = lastCheckStatus.String
+	}
+	lp := lockPresent != 0
+	st.LockPresent = &lp
+	if lastPruneAt.Valid {
+		t, err := time.Parse(time.RFC3339Nano, lastPruneAt.String)
+		if err != nil {
+			return nil, fmt.Errorf("store: parse last_prune_at: %w", err)
+		}
+		st.LastPruneAt = &t
+	}
+	if lastPruneFreed.Valid {
+		v := lastPruneFreed.Int64
+		st.LastPruneFreedBytes = &v
+	}
+	t, err := time.Parse(time.RFC3339Nano, updatedAt)
+	if err != nil {
+		return nil, fmt.Errorf("store: parse host_repo_stats.updated_at: %w", err)
+	}
+	st.UpdatedAt = t
+	return &st, nil
+}
+
+// UpsertHostRepoStats writes a partial update — only non-nil pointer
+// fields (and LastCheckStatus when non-empty) overwrite existing
+// columns. Wrapped in a transaction so concurrent upserts on the same
+// host don't lose updates.
+func (s *Store) UpsertHostRepoStats(ctx context.Context, hostID string, patch HostRepoStats) error {
+	tx, err := s.db.BeginTx(ctx, nil)
+	if err != nil {
+		return fmt.Errorf("store: begin host_repo_stats tx: %w", err)
+	}
+	defer func() { _ = tx.Rollback() }()
+
+	// Fetch existing row; start from zero if absent.
+	cur, err := getHostRepoStatsTx(ctx, tx, hostID)
+	if err != nil && !errors.Is(err, ErrNotFound) {
+		return err
+	}
+	if cur == nil {
+		cur = &HostRepoStats{HostID: hostID}
+	}
+
+	// Merge: non-nil patch fields overwrite current.
+	if patch.TotalSizeBytes != nil {
+		cur.TotalSizeBytes = patch.TotalSizeBytes
+	}
+	if patch.RawSizeBytes != nil {
+		cur.RawSizeBytes = patch.RawSizeBytes
+	}
+	if patch.UniqueFiles != nil {
+		cur.UniqueFiles = patch.UniqueFiles
+	}
+	if patch.SnapshotCount != nil {
+		cur.SnapshotCount = patch.SnapshotCount
+	}
+	if patch.LastCheckAt != nil {
+		cur.LastCheckAt = patch.LastCheckAt
+	}
+	if patch.LastCheckStatus != "" {
+		cur.LastCheckStatus = patch.LastCheckStatus
+	}
+	if patch.LockPresent != nil {
+		cur.LockPresent = patch.LockPresent
+	}
+	if patch.LastPruneAt != nil {
+		cur.LastPruneAt = patch.LastPruneAt
+	}
+	if patch.LastPruneFreedBytes != nil {
+		cur.LastPruneFreedBytes = patch.LastPruneFreedBytes
+	}
+
+	now := time.Now().UTC().Format(time.RFC3339Nano)
+
+	// Convert *bool → int for lock_present.
+	var lockPresentInt int64
+	if cur.LockPresent != nil && *cur.LockPresent {
+		lockPresentInt = 1
+	}
+
+	if _, err = tx.ExecContext(ctx,
+		`INSERT INTO host_repo_stats
+		   (host_id, total_size_bytes, raw_size_bytes, unique_files,
+		    snapshot_count, last_check_at, last_check_status,
+		    lock_present, last_prune_at, last_prune_freed_bytes, updated_at)
+		 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+		 ON CONFLICT(host_id) DO UPDATE SET
+		   total_size_bytes       = excluded.total_size_bytes,
+		   raw_size_bytes         = excluded.raw_size_bytes,
+		   unique_files           = excluded.unique_files,
+		   snapshot_count         = excluded.snapshot_count,
+		   last_check_at          = excluded.last_check_at,
+		   last_check_status      = excluded.last_check_status,
+		   lock_present           = excluded.lock_present,
+		   last_prune_at          = excluded.last_prune_at,
+		   last_prune_freed_bytes = excluded.last_prune_freed_bytes,
+		   updated_at             = excluded.updated_at`,
+		hostID,
+		nullableInt64(cur.TotalSizeBytes),
+		nullableInt64(cur.RawSizeBytes),
+		nullableInt64(cur.UniqueFiles),
+		nullableInt64(cur.SnapshotCount),
+		nullableTime(cur.LastCheckAt),
+		nullableStr(cur.LastCheckStatus),
+		lockPresentInt,
+		nullableTime(cur.LastPruneAt),
+		nullableInt64(cur.LastPruneFreedBytes),
+		now,
+	); err != nil {
+		return fmt.Errorf("store: upsert host_repo_stats: %w", err)
+	}
+	return tx.Commit()
+}
+
+// nullableInt64 converts *int64 to a database/sql-compatible nullable value.
+func nullableInt64(p *int64) any {
+	if p == nil {
+		return nil
+	}
+	return *p
+}
+
+// nullableTime converts *time.Time to an RFC3339Nano string or nil.
+func nullableTime(p *time.Time) any {
+	if p == nil {
+		return nil
+	}
+	return p.UTC().Format(time.RFC3339Nano)
+}
@@ -0,0 +1,131 @@
+package store
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+)
+
+func int64ptr(v int64) *int64 { return &v }
+func boolptr(v bool) *bool    { return &v }
+
+func TestHostRepoStatsRoundTrip(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+
+	const hostID = "h-stats-test"
+	seedHost(t, s, hostID)
+
+	// 1. Initial upsert: set TotalSizeBytes only.
+	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
+		TotalSizeBytes: int64ptr(100),
+	}); err != nil {
+		t.Fatalf("upsert 1: %v", err)
+	}
+	got, err := s.GetHostRepoStats(ctx, hostID)
+	if err != nil {
+		t.Fatalf("get after upsert 1: %v", err)
+	}
+	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 100 {
+		t.Errorf("TotalSizeBytes: want 100, got %v", got.TotalSizeBytes)
+	}
+	if got.LastCheckStatus != "" {
+		t.Errorf("LastCheckStatus should be empty after first upsert, got %q", got.LastCheckStatus)
+	}
+
+	// 2. Upsert with LastCheckStatus; TotalSizeBytes must be preserved.
+	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
+		LastCheckStatus: "ok",
+	}); err != nil {
+		t.Fatalf("upsert 2: %v", err)
+	}
+	got, err = s.GetHostRepoStats(ctx, hostID)
+	if err != nil {
+		t.Fatalf("get after upsert 2: %v", err)
+	}
+	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 100 {
+		t.Errorf("TotalSizeBytes should still be 100 after second upsert, got %v", got.TotalSizeBytes)
+	}
+	if got.LastCheckStatus != "ok" {
+		t.Errorf("LastCheckStatus: want %q, got %q", "ok", got.LastCheckStatus)
+	}
+
+	// 3. Upsert with LockPresent=true; all other fields preserved.
+	now := time.Now().UTC().Truncate(time.Second)
+	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
+		LockPresent: boolptr(true),
+		LastCheckAt: &now,
+	}); err != nil {
+		t.Fatalf("upsert 3: %v", err)
+	}
+	got, err = s.GetHostRepoStats(ctx, hostID)
+	if err != nil {
+		t.Fatalf("get after upsert 3: %v", err)
+	}
+	if got.LockPresent == nil || !*got.LockPresent {
+		t.Error("LockPresent should be true after upsert 3")
+	}
+	if got.TotalSizeBytes == nil || *got.TotalSizeBytes != 100 {
+		t.Errorf("TotalSizeBytes still 100 expected, got %v", got.TotalSizeBytes)
+	}
+	if got.LastCheckStatus != "ok" {
+		t.Errorf("LastCheckStatus still 'ok' expected, got %q", got.LastCheckStatus)
+	}
+	if got.LastCheckAt == nil {
+		t.Error("LastCheckAt should be set")
+	} else if !got.LastCheckAt.UTC().Truncate(time.Second).Equal(now) {
+		t.Errorf("LastCheckAt: got %v, want %v", got.LastCheckAt.UTC().Truncate(time.Second), now)
+	}
+
+	// 4. Clear lock (set to false).
+	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
+		LockPresent: boolptr(false),
+	}); err != nil {
+		t.Fatalf("upsert 4: %v", err)
+	}
+	got, err = s.GetHostRepoStats(ctx, hostID)
+	if err != nil {
+		t.Fatalf("get after upsert 4: %v", err)
+	}
+	if got.LockPresent == nil || *got.LockPresent {
+		t.Error("LockPresent should be false after upsert 4")
+	}
+}
+
+func TestHostRepoStatsNotFound(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+
+	_, err := s.GetHostRepoStats(ctx, "no-such-host")
+	if !errors.Is(err, ErrNotFound) {
+		t.Errorf("expected ErrNotFound, got %v", err)
+	}
+}
+
+func TestHostRepoStatsCascadeDelete(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+
+	const hostID = "h-cascade-test"
+	seedHost(t, s, hostID)
+
+	if err := s.UpsertHostRepoStats(ctx, hostID, HostRepoStats{
+		TotalSizeBytes: int64ptr(999),
+	}); err != nil {
+		t.Fatalf("upsert: %v", err)
+	}
+
+	// Delete the host; stats row should cascade-delete.
+	if _, err := s.DB().ExecContext(ctx,
+		`DELETE FROM hosts WHERE id = ?`, hostID); err != nil {
+		t.Fatalf("delete host: %v", err)
+	}
+	_, err := s.GetHostRepoStats(ctx, hostID)
+	if !errors.Is(err, ErrNotFound) {
+		t.Errorf("after host delete, expected ErrNotFound for stats; got %v", err)
+	}
+}
@@ -193,6 +193,71 @@ func (s *Store) GetJob(ctx context.Context, id string) (*Job, error) {
 	return &j, nil
 }

+// LatestJobByKind returns the most recent job (any status, including
+// queued and running) of the given kind for the host, or
+// (nil, ErrNotFound) if no such job exists. Used by the maintenance
+// ticker to compute "last fire" anchors for the cron-due check;
+// in-flight jobs MUST be considered or a long-running prune (>60s)
+// would re-fire on the next tick while the first is still running.
+func (s *Store) LatestJobByKind(ctx context.Context, hostID, kind string) (*Job, error) {
+	row := s.db.QueryRowContext(ctx,
+		`SELECT id, host_id, kind, status, scheduled_id, actor_kind, actor_id,
+		        started_at, finished_at, exit_code, stats, error, created_at
+		 FROM jobs
+		 WHERE host_id = ? AND kind = ?
+		 ORDER BY created_at DESC
+		 LIMIT 1`, hostID, kind)
+	var (
+		j          Job
+		schedID    sql.NullString
+		actorID    sql.NullString
+		startedAt  sql.NullString
+		finishedAt sql.NullString
+		exitCode   sql.NullInt64
+		stats      sql.NullString
+		errMsg     sql.NullString
+		createdAt  string
+	)
+	if err := row.Scan(&j.ID, &j.HostID, &j.Kind, &j.Status, &schedID,
+		&j.ActorKind, &actorID, &startedAt, &finishedAt,
+		&exitCode, &stats, &errMsg, &createdAt); err != nil {
+		if errors.Is(err, sql.ErrNoRows) {
+			return nil, ErrNotFound
+		}
+		return nil, fmt.Errorf("store: scan latest job by kind: %w", err)
+	}
+	if schedID.Valid {
+		s := schedID.String
+		j.ScheduledID = &s
+	}
+	if actorID.Valid {
+		s := actorID.String
+		j.ActorID = &s
+	}
+	if startedAt.Valid {
+		t, _ := time.Parse(time.RFC3339Nano, startedAt.String)
+		j.StartedAt = &t
+	}
+	if finishedAt.Valid {
+		t, _ := time.Parse(time.RFC3339Nano, finishedAt.String)
+		j.FinishedAt = &t
+	}
+	if exitCode.Valid {
+		i := int(exitCode.Int64)
+		j.ExitCode = &i
+	}
+	if stats.Valid && stats.String != "" {
+		j.Stats = json.RawMessage(stats.String)
+	}
+	if errMsg.Valid {
+		s := errMsg.String
+		j.Error = &s
+	}
+	t, _ := time.Parse(time.RFC3339Nano, createdAt)
+	j.CreatedAt = t
+	return &j, nil
+}
+
 // HasJobOfKind reports whether any job of the given kind exists for
 // this host, regardless of status. Used by the auto-init path on
 // agent hello to decide whether to dispatch a fresh `restic init` —
@@ -0,0 +1,136 @@
+package store
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+)
+
+func TestLatestJobByKind(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+	hostID := makeSchedHost(t, s)
+
+	// No jobs yet → ErrNotFound.
+	if _, err := s.LatestJobByKind(ctx, hostID, "forget"); !errors.Is(err, ErrNotFound) {
+		t.Fatalf("expected ErrNotFound on empty, got %v", err)
+	}
+
+	// Insert two finished jobs of kind=forget; the newer one should win.
+	older := time.Now().UTC().Add(-2 * time.Hour)
+	newer := time.Now().UTC().Add(-1 * time.Hour)
+
+	if err := s.CreateJob(ctx, Job{
+		ID: "j-old", HostID: hostID, Kind: "forget",
+		ActorKind: "system", CreatedAt: older,
+	}); err != nil {
+		t.Fatalf("create older: %v", err)
+	}
+	if err := s.MarkJobFinished(ctx, "j-old", "succeeded", 0, nil, "", older.Add(time.Minute)); err != nil {
+		t.Fatalf("finish older: %v", err)
+	}
+	if err := s.CreateJob(ctx, Job{
+		ID: "j-new", HostID: hostID, Kind: "forget",
+		ActorKind: "system", CreatedAt: newer,
+	}); err != nil {
+		t.Fatalf("create newer: %v", err)
+	}
+	if err := s.MarkJobFinished(ctx, "j-new", "failed", 1, nil, "boom", newer.Add(time.Minute)); err != nil {
+		t.Fatalf("finish newer: %v", err)
+	}
+
+	got, err := s.LatestJobByKind(ctx, hostID, "forget")
+	if err != nil {
+		t.Fatalf("LatestJobByKind: %v", err)
+	}
+	if got.ID != "j-new" {
+		t.Errorf("want j-new, got %q", got.ID)
+	}
+
+	// An in-flight running job must be returned — long-prune-suppresses-tick
+	// scenario: if a prune runs >60s the next tick must not re-fire it.
+	runningAt := time.Now().UTC()
+	if err := s.CreateJob(ctx, Job{
+		ID: "j-running", HostID: hostID, Kind: "forget",
+		ActorKind: "system", CreatedAt: runningAt,
+	}); err != nil {
+		t.Fatalf("create running: %v", err)
+	}
+	if err := s.MarkJobStarted(ctx, "j-running", runningAt); err != nil {
+		t.Fatalf("mark started: %v", err)
+	}
+	got2, err := s.LatestJobByKind(ctx, hostID, "forget")
+	if err != nil {
+		t.Fatalf("LatestJobByKind 2: %v", err)
+	}
+	if got2.ID != "j-running" {
+		t.Errorf("in-flight running job must be returned; want j-running, got %q", got2.ID)
+	}
+
+	// A queued (not-yet-started) job is also returned (it is newer than
+	// j-running because CreatedAt is later).
+	queuedAt := runningAt.Add(time.Millisecond)
+	if err := s.CreateJob(ctx, Job{
+		ID: "j-queued", HostID: hostID, Kind: "forget",
+		ActorKind: "system", CreatedAt: queuedAt,
+	}); err != nil {
+		t.Fatalf("create queued: %v", err)
+	}
+	got3, err := s.LatestJobByKind(ctx, hostID, "forget")
+	if err != nil {
+		t.Fatalf("LatestJobByKind 3: %v", err)
+	}
+	if got3.ID != "j-queued" {
+		t.Errorf("queued job must be returned as newest; want j-queued, got %q", got3.ID)
+	}
+
+	// Different kind → ErrNotFound.
+	if _, err := s.LatestJobByKind(ctx, hostID, "prune"); !errors.Is(err, ErrNotFound) {
+		t.Fatalf("expected ErrNotFound for prune, got %v", err)
+	}
+}
+
+func TestListAllMaintenance(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+
+	// Empty case.
+	rows, err := s.ListAllMaintenance(ctx)
+	if err != nil {
+		t.Fatalf("empty list: %v", err)
+	}
+	if len(rows) != 0 {
+		t.Errorf("want empty, got %+v", rows)
+	}
+
+	// Seed two hosts with maintenance rows.
+	h1 := "01HMAINTHOST00000000000A1"
+	h2 := "01HMAINTHOST00000000000A2"
+	for i, id := range []string{h1, h2} {
+		if err := s.CreateHost(ctx, Host{
+			ID: id, Name: "maint-host-" + string(rune('a'+i)),
+			OS: "linux", Arch: "amd64",
+			AgentVersion: "dev", ResticVersion: "0.16.0", ProtocolVersion: 1,
+			EnrolledAt: time.Now().UTC(),
+		}, "th-"+id, ""); err != nil {
+			t.Fatalf("create host %s: %v", id, err)
+		}
+	}
+	if err := s.CreateDefaultRepoMaintenance(ctx, h1); err != nil {
+		t.Fatalf("seed h1: %v", err)
+	}
+	if err := s.CreateDefaultRepoMaintenance(ctx, h2); err != nil {
+		t.Fatalf("seed h2: %v", err)
+	}
+
+	rows, err = s.ListAllMaintenance(ctx)
+	if err != nil {
+		t.Fatalf("list: %v", err)
+	}
+	if len(rows) != 2 {
+		t.Errorf("want 2 rows, got %d", len(rows))
+	}
+}
@@ -50,6 +50,40 @@ func (st *Store) GetRepoMaintenance(ctx context.Context, hostID string) (*HostRe
 	return &m, nil
 }

+// ListAllMaintenance returns every host_repo_maintenance row.
+// Used by the server-side maintenance ticker to iterate every
+// host on each tick. Order is unspecified (the ticker doesn't
+// care).
+func (st *Store) ListAllMaintenance(ctx context.Context) ([]HostRepoMaintenance, error) {
+	rows, err := st.db.QueryContext(ctx,
+		`SELECT host_id, forget_cron, forget_enabled,
+			prune_cron, prune_enabled,
+			check_cron, check_enabled, check_subset_pct
+		 FROM host_repo_maintenance`)
+	if err != nil {
+		return nil, fmt.Errorf("store: list all maintenance: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+	var out []HostRepoMaintenance
+	for rows.Next() {
+		var (
+			m                                         HostRepoMaintenance
+			forgetEnabled, pruneEnabled, checkEnabled int
+		)
+		if err := rows.Scan(&m.HostID,
+			&m.ForgetCron, &forgetEnabled,
+			&m.PruneCron, &pruneEnabled,
+			&m.CheckCron, &checkEnabled, &m.CheckSubsetPct); err != nil {
+			return nil, fmt.Errorf("store: scan maintenance: %w", err)
+		}
+		m.ForgetEnabled = forgetEnabled != 0
+		m.PruneEnabled = pruneEnabled != 0
+		m.CheckEnabled = checkEnabled != 0
+		out = append(out, m)
+	}
+	return out, rows.Err()
+}
+
 // UpdateRepoMaintenance replaces every editable field. Doesn't bump
 // the schedule version — these run on the server's own ticker, not
 // the agent's local cron, so the agent doesn't need to know.
@@ -0,0 +1,58 @@
+-- 0009_admin_creds_and_repo_stats.sql
+--
+-- Phase 5 of the P2 redesign needs two things in the schema:
+--
+-- 1. A second credential row per host. Today host_credentials is
+--    1:1 with hosts. For prune (and any future destructive op) we
+--    want a rest-server admin user whose password gives delete
+--    access — separate from the append-only user used on every
+--    backup. Add a `kind` column with default 'repo'; existing rows
+--    become kind='repo'. Future admin rows live alongside.
+--
+-- 2. A small singleton-per-host projection for repo size, snapshot
+--    count, last-prune freed bytes, lock state, and last-check
+--    result. Backed by `restic stats --json` + sniffed `restic
+--    check` stderr.
+--
+-- Use column-level ALTERs only; host_credentials has no inbound
+-- FKs but the rule from CLAUDE.md still applies.
+
+ALTER TABLE host_credentials ADD COLUMN kind TEXT NOT NULL DEFAULT 'repo';
+
+-- The PK on host_credentials is currently (host_id) — we need a
+-- composite (host_id, kind). SQLite has no ALTER TABLE …
+-- ADD/CHANGE PRIMARY KEY, so this is the one place a rebuild is
+-- justified. host_credentials has no inbound FKs, so the cascade
+-- trap doesn't apply here. Verified against schema/0002.
+
+CREATE TABLE host_credentials_new (
+  host_id        TEXT NOT NULL REFERENCES hosts(id) ON DELETE CASCADE,
+  kind           TEXT NOT NULL DEFAULT 'repo'
+                   CHECK (kind IN ('repo', 'admin')),
+  enc_repo_creds TEXT NOT NULL,
+  updated_at     TEXT NOT NULL,
+  PRIMARY KEY (host_id, kind)
+);
+INSERT INTO host_credentials_new (host_id, kind, enc_repo_creds, updated_at)
+  SELECT host_id, kind, enc_repo_creds, updated_at FROM host_credentials;
+DROP TABLE host_credentials;
+ALTER TABLE host_credentials_new RENAME TO host_credentials;
+
+-- Repo stats projection. One row per host, upserted by the agent's
+-- stats.report envelope (which fires after every successful backup
+-- and after every check / prune). All fields nullable so a freshly
+-- enrolled host with no jobs yet is representable.
+
+CREATE TABLE host_repo_stats (
+  host_id                 TEXT PRIMARY KEY REFERENCES hosts(id) ON DELETE CASCADE,
+  total_size_bytes        INTEGER,
+  raw_size_bytes          INTEGER,
+  unique_files            INTEGER,
+  snapshot_count          INTEGER,
+  last_check_at           TEXT,
+  last_check_status       TEXT CHECK (last_check_status IS NULL OR last_check_status IN ('ok', 'errors_found', 'failed')),
+  lock_present            INTEGER NOT NULL DEFAULT 0,
+  last_prune_at           TEXT,
+  last_prune_freed_bytes  INTEGER,
+  updated_at              TEXT NOT NULL
+);
@@ -72,6 +72,43 @@ func (st *Store) DuePendingRuns(ctx context.Context, now time.Time, limit int) (
 	return out, rows.Err()
 }

+// ListPendingRunsForHost returns every pending row for the host
+// (regardless of next_attempt_at), ordered by next_attempt_at
+// ascending. Used by the on-reconnect drain — when a host comes
+// back, we walk every pending row for it, not just the due ones,
+// because the host being back makes "due" unimportant: every row
+// is dispatchable now.
+func (st *Store) ListPendingRunsForHost(ctx context.Context, hostID string) ([]PendingRun, error) {
+	rows, err := st.db.QueryContext(ctx,
+		`SELECT id, schedule_id, source_group_id, host_id, attempt,
+			next_attempt_at, scheduled_at, COALESCE(last_error, '')
+		 FROM pending_runs
+		 WHERE host_id = ?
+		 ORDER BY next_attempt_at`,
+		hostID)
+	if err != nil {
+		return nil, fmt.Errorf("store: list pending runs for host: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+	out := []PendingRun{}
+	for rows.Next() {
+		var p PendingRun
+		var nextAt, scheduledAt string
+		if err := rows.Scan(&p.ID, &p.ScheduleID, &p.SourceGroupID, &p.HostID,
+			&p.Attempt, &nextAt, &scheduledAt, &p.LastError); err != nil {
+			return nil, err
+		}
+		if t, err := time.Parse(time.RFC3339Nano, nextAt); err == nil {
+			p.NextAttemptAt = t
+		}
+		if t, err := time.Parse(time.RFC3339Nano, scheduledAt); err == nil {
+			p.ScheduledAt = t
+		}
+		out = append(out, p)
+	}
+	return out, rows.Err()
+}
+
 // DeletePendingRun removes a row by id. Called after successful
 // dispatch or after exceeding retry_max.
 func (st *Store) DeletePendingRun(ctx context.Context, id string) error {
@@ -219,3 +219,78 @@ func TestPendingRunQueue(t *testing.T) {
 		t.Fatalf("after delete: %v", due)
 	}
 }
+
+func TestListPendingRunsForHost(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+	hostA := makeSchedHost(t, s)
+	hostB := "01HPENDLISTHOSTB00000001"
+	if err := s.CreateHost(ctx, Host{
+		ID: hostB, Name: "pending-list-host-b", OS: "linux", Arch: "amd64",
+		AgentVersion: "dev", ResticVersion: "0.16.0", ProtocolVersion: 1,
+		EnrolledAt: time.Now().UTC(),
+	}, "tokenhashB", ""); err != nil {
+		t.Fatal(err)
+	}
+	gA := makeGroup(t, s, hostA, "default", "01HPENDLISTGRPA000000001")
+	gB := makeGroup(t, s, hostB, "default", "01HPENDLISTGRPB000000001")
+	schedA := "01HPENDLISTSCHEDA0000001"
+	schedB := "01HPENDLISTSCHEDB0000001"
+	if err := s.CreateSchedule(ctx, &Schedule{
+		ID: schedA, HostID: hostA, CronExpr: "@hourly", Enabled: true,
+		SourceGroupIDs: []string{gA},
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.CreateSchedule(ctx, &Schedule{
+		ID: schedB, HostID: hostB, CronExpr: "@hourly", Enabled: true,
+		SourceGroupIDs: []string{gB},
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	now := time.Now().UTC()
+	// Two rows for hostA — one not-yet-due, one already-due — and one
+	// for hostB. ListPendingRunsForHost(A) must return both A rows
+	// (regardless of due-ness) ordered by next_attempt_at ascending.
+	rows := []*PendingRun{
+		{
+			ID: "01HPENDLISTROW0000000A02", ScheduleID: schedA, SourceGroupID: gA, HostID: hostA,
+			NextAttemptAt: now.Add(time.Hour), ScheduledAt: now,
+		},
+		{
+			ID: "01HPENDLISTROW0000000A01", ScheduleID: schedA, SourceGroupID: gA, HostID: hostA,
+			NextAttemptAt: now.Add(-time.Minute), ScheduledAt: now.Add(-time.Hour),
+		},
+		{
+			ID: "01HPENDLISTROW0000000B01", ScheduleID: schedB, SourceGroupID: gB, HostID: hostB,
+			NextAttemptAt: now, ScheduledAt: now,
+		},
+	}
+	for _, r := range rows {
+		if err := s.EnqueuePendingRun(ctx, r); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	out, err := s.ListPendingRunsForHost(ctx, hostA)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(out) != 2 {
+		t.Fatalf("len=%d, want 2: %+v", len(out), out)
+	}
+	// Ordered ascending by next_attempt_at: the -1m row first, then +1h.
+	if out[0].ID != "01HPENDLISTROW0000000A01" || out[1].ID != "01HPENDLISTROW0000000A02" {
+		t.Fatalf("order: got %s,%s", out[0].ID, out[1].ID)
+	}
+
+	out, err = s.ListPendingRunsForHost(ctx, "non-existent-host")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(out) != 0 {
+		t.Fatalf("non-existent host: got %d rows", len(out))
+	}
+}
@@ -84,6 +84,70 @@ func TestMigrateIsIdempotent(t *testing.T) {
 	}
 }

+func TestMigration0009Schema(t *testing.T) {
+	t.Parallel()
+	s := openTestStore(t)
+	ctx := context.Background()
+
+	// host_credentials must have a composite PK (host_id, kind).
+	// We verify this by inserting two rows for the same host_id (different kinds)
+	// and confirming a duplicate (host_id, kind) fails.
+	_, err := s.DB().ExecContext(ctx,
+		`INSERT INTO hosts (id, name, os, arch, enrolled_at) VALUES (?,?,?,?,?)`,
+		"h-0009", "test-host", "linux", "amd64", "2026-01-01T00:00:00Z")
+	if err != nil {
+		t.Fatalf("insert host: %v", err)
+	}
+	now := "2026-01-01T00:00:00Z"
+	if _, err := s.DB().ExecContext(ctx,
+		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at) VALUES (?,?,?,?)`,
+		"h-0009", "repo", "enc-repo", now); err != nil {
+		t.Fatalf("insert repo creds: %v", err)
+	}
+	if _, err := s.DB().ExecContext(ctx,
+		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at) VALUES (?,?,?,?)`,
+		"h-0009", "admin", "enc-admin", now); err != nil {
+		t.Fatalf("insert admin creds: %v", err)
+	}
+	// Duplicate (host_id, kind) must fail.
+	if _, err := s.DB().ExecContext(ctx,
+		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at) VALUES (?,?,?,?)`,
+		"h-0009", "repo", "enc-repo-2", now); err == nil {
+		t.Fatal("expected unique constraint violation on (host_id, kind), got nil")
+	}
+
+	// CHECK (kind IN ('repo','admin')) must reject an invalid kind.
+	if _, err := s.DB().ExecContext(ctx,
+		`INSERT INTO host_credentials (host_id, kind, enc_repo_creds, updated_at) VALUES (?,?,?,?)`,
+		"h-0009", "other", "enc-other", now); err == nil {
+		t.Fatal("expected CHECK constraint violation on kind='other', got nil")
+	}
+
+	// host_repo_stats table must exist with expected columns.
+	if _, err := s.DB().ExecContext(ctx,
+		`INSERT INTO host_repo_stats (host_id, lock_present, updated_at) VALUES (?,?,?)`,
+		"h-0009", 0, now); err != nil {
+		t.Fatalf("insert host_repo_stats: %v", err)
+	}
+	var lockPresent int
+	if err := s.DB().QueryRowContext(ctx,
+		`SELECT lock_present FROM host_repo_stats WHERE host_id = ?`, "h-0009",
+	).Scan(&lockPresent); err != nil {
+		t.Fatalf("select host_repo_stats: %v", err)
+	}
+	if lockPresent != 0 {
+		t.Errorf("expected lock_present=0, got %d", lockPresent)
+	}
+
+	// CHECK (last_check_status IN ('ok','errors_found','failed')) must reject
+	// an invalid value.
+	if _, err := s.DB().ExecContext(ctx,
+		`UPDATE host_repo_stats SET last_check_status = ? WHERE host_id = ?`,
+		"wat", "h-0009"); err == nil {
+		t.Fatal("expected CHECK constraint violation on last_check_status='wat', got nil")
+	}
+}
+
 func TestForeignKeysEnforced(t *testing.T) {
 	t.Parallel()
 	s := openTestStore(t)
@@ -166,14 +166,24 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days.
  - Header "version N · agent in sync / agent at vM" indicator preserved across all tabs (backed by `host_schedule_version` + `applied_schedule_version`).
  - Form validation re-renders with the operator's typed input intact (mirror P2-04's behaviour). Each save fires `pushScheduleSetAsync` so an online agent re-arms within seconds.

-### P2 redesign — Phase 5 (server-side maintenance ticker) — TODO
+### P2 redesign — Phase 5 (server-side maintenance ticker)

- [ ] **P2R-03** (M) `prune` command end-to-end. Restic wrapper (`restic.RunPrune`), agent dispatcher (`case api.JobPrune:`), wire envelope. **Admin-only credential**: a second `host_credentials` row keyed by `host_id` + `kind=admin` carries the non-append-only username/password; server pushes it via `config.update` only when dispatching a prune job, and the agent's secrets store keeps it in a separate slot from the everyday append-only creds. UI: prune row on the Repo page. Operator-triggered Run-now via `POST /hosts/{id}/repo/prune`. Cadence-driven dispatch lands in P2R-04.
- [ ] **P2R-04** (M) `check` command end-to-end (`restic check --read-data-subset N%`). Wrapper + dispatcher + wire. UI: check row on the Repo page (with the subset % slider). Operator Run-now via `POST /hosts/{id}/repo/check`. Cadence-driven dispatch lands in P2R-05.
- [ ] **P2R-05** (S) `unlock` command end-to-end (`restic unlock`). Operator-only — no cadence. `POST /hosts/{id}/repo/unlock`. Repo page surfaces lock state from the most recent `check` (which warns about stale locks).
- [ ] **P2R-06** (M) Server-side maintenance ticker. Cron-style loop on the server reads `host_repo_maintenance` rows, dispatches `forget` / `prune` / `check` jobs against the right host on the configured cadence (last-run timestamps tracked per kind on the maintenance row). Independent of the agent's local cron — the agent's cron only handles backup schedules now. Skips offline hosts (queues to `pending_runs` instead — see P2R-08). Handles ticker restarts cleanly (no-op if a job of the same kind ran inside the cadence window).
- [ ] **P2R-07** (S) Repo stats panel on the Repo page: size, dedup ratio, snapshot count, last-check timestamp + result, lock state, last-prune timestamp + bytes-freed. Backed by parsing `restic stats --json` output that the agent ships periodically (piggyback on the existing snapshots-report path).
- [ ] **P2R-08** (M) Pending-runs queue worker. On agent reconnect, server drains `pending_runs` rows for that host and re-dispatches them in order. Bump backoff per `pending_run.attempt_count`; drop rows that have exceeded the source-group's `retry_max`. Audit-logged. Smoke-tested by stopping the agent, running maintenance ticker so cadence misses, restarting agent, watching the queue drain.
+- [x] **P2R-03** (M) `prune` command end-to-end. Restic wrapper (`restic.RunPrune`), agent dispatcher (`case api.JobPrune:`), wire envelope. **Admin-only credential**: a second `host_credentials` row keyed by `host_id` + `kind=admin` carries the non-append-only username/password; server pushes it via `config.update` only when dispatching a prune job, and the agent's secrets store keeps it in a separate slot from the everyday append-only creds. UI: prune row on the Repo page. Operator-triggered Run-now via `POST /hosts/{id}/repo/prune`. Cadence-driven dispatch lands in P2R-04.
+- [x] **P2R-04** (M) `check` command end-to-end (`restic check --read-data-subset N%`). Wrapper + dispatcher + wire. UI: check row on the Repo page (with the subset % slider). Operator Run-now via `POST /hosts/{id}/repo/check`. Cadence-driven dispatch lands in P2R-05.
+- [x] **P2R-05** (S) `unlock` command end-to-end (`restic unlock`). Operator-only — no cadence. `POST /hosts/{id}/repo/unlock`. Repo page surfaces lock state from the most recent `check` (which warns about stale locks).
+- [x] **P2R-06** (M) Server-side maintenance ticker. Cron-style loop on the server reads `host_repo_maintenance` rows, dispatches `forget` / `prune` / `check` jobs against the right host on the configured cadence (last-run timestamps tracked per kind on the maintenance row). Independent of the agent's local cron — the agent's cron only handles backup schedules now. Skips offline hosts (queues to `pending_runs` instead — see P2R-08). Handles ticker restarts cleanly (no-op if a job of the same kind ran inside the cadence window).
+- [x] **P2R-07** (S) Repo stats panel on the Repo page: size, dedup ratio, snapshot count, last-check timestamp + result, lock state, last-prune timestamp + bytes-freed. Backed by parsing `restic stats --json` output that the agent ships periodically (piggyback on the existing snapshots-report path).
+- [x] **P2R-08** (M) Pending-runs queue worker. On agent reconnect, server drains `pending_runs` rows for that host and re-dispatches them in order. Bump backoff per `pending_run.attempt_count`; drop rows that have exceeded the source-group's `retry_max`. Audit-logged. Smoke-tested by stopping the agent, running maintenance ticker so cadence misses, restarting agent, watching the queue drain.
+
+### P2 redesign — Phase 5 ✅
+
+- Restic-manager Phase 5 lands on branch `p2r-phase5-maintenance`:
+  prune/check/unlock end-to-end (P2R-03/04/05); server-side
+  maintenance ticker drives forget/prune/check on cadence (P2R-06);
+  repo-stats panel surfaces size, lock state, last-check / last-prune
+  (P2R-07); pending-runs queue worker drains scheduled-backup
+  fires that raced an agent disconnect (P2R-08). See
+  `docs/superpowers/plans/2026-05-03-p2-redesign-phase-5.md`.

 ### P2 redesign — Phase 6 (auto-init follow-up) — TODO

@@ -42,6 +42,54 @@
      </div>
    </form>

+    {{/* ---------- Admin credentials (optional) ---------- */}}
+    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">
+      Admin credentials <span class="text-ink-fade normal-case">· prune-only · optional</span>
+    </h2>
+    <form method="post" action="/hosts/{{$host.ID}}/admin-credentials" class="panel rounded-[7px] p-5">
+      {{if $page.AdminCredsError}}
+        <div class="rounded-[6px] px-3.5 py-3 text-[13px] mb-4"
+             style="border: 1px solid color-mix(in oklch, var(--bad), transparent 60%); background: color-mix(in oklch, var(--bad), transparent 92%);">
+          {{$page.AdminCredsError}}
+        </div>
+      {{end}}
+      {{if eq $page.SavedSection "admin_credentials"}}
+        <div class="text-[12px] text-ok mb-3 mono">✓ saved</div>
+      {{end}}
+      <p class="text-[12.5px] text-ink-mid leading-[1.6] mb-4 max-w-[640px]">
+        Only needed for rest-server repos that distinguish an append-only
+        user (everyday backups) from a delete-capable user (prune /
+        forget). For S3 / B2 / SFTP / local, leave this blank — the
+        everyday repo credentials handle prune too.
+      </p>
+      <div class="grid grid-cols-2 gap-4">
+        <div>
+          <label class="field-label" for="admin_repo_url">Repo URL <span class="text-ink-fade">· usually same as above</span></label>
+          <input id="admin_repo_url" name="repo_url" type="text" class="field mono" value="{{$page.AdminURL}}" />
+        </div>
+        <div>
+          <label class="field-label" for="admin_repo_username">Username</label>
+          <input id="admin_repo_username" name="repo_username" type="text" class="field mono" value="{{$page.AdminUsername}}" />
+        </div>
+        <div class="col-span-2">
+          <label class="field-label" for="admin_repo_password">Password</label>
+          <input id="admin_repo_password" name="repo_password" type="password" class="field mono"
+                 placeholder="{{if $page.HasAdminPassword}}•••••••••••••••• · stored, leave blank to keep{{else}}— not yet set —{{end}}"
+                 autocomplete="new-password" />
+        </div>
+      </div>
+      <div class="mt-4 pt-4 border-t border-line-soft flex gap-2 items-center">
+        <button type="submit" class="btn btn-primary">Save admin credentials</button>
+        {{if $page.HasAdminPassword}}
+          <button type="submit" form="admin-creds-clear" class="btn btn-secondary"
+                  onclick="return confirm('Clear admin credentials? Prune jobs will be refused until you re-set them.');">Clear</button>
+        {{end}}
+      </div>
+    </form>
+    {{if $page.HasAdminPassword}}
+      <form id="admin-creds-clear" method="post" action="/hosts/{{$host.ID}}/admin-credentials/delete"></form>
+    {{end}}
+
    {{/* ---------- Bandwidth ---------- */}}
    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">Bandwidth · host-wide</h2>
    <form method="post" action="/hosts/{{$host.ID}}/repo/bandwidth" class="panel rounded-[7px] p-5">
@@ -138,6 +186,40 @@
      </div>
    </form>

+    {{/* ---------- Run now · one-time ---------- */}}
+    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">Run now · one-time</h2>
+    <div class="panel rounded-[7px] p-5">
+      <p class="text-[12.5px] text-ink-mid leading-[1.6] mb-4 max-w-[640px]">
+        Operator-triggered. Output streams live to the job log. Cadence-driven runs land independently from the server-side ticker.
+      </p>
+      <div class="grid grid-cols-3 gap-3">
+        <button type="button"
+                hx-post="/hosts/{{$host.ID}}/repo/check"
+                hx-swap="none"
+                hx-confirm="Run check now ({{$m.CheckSubsetPct}}% data subset)?"
+                class="btn btn-secondary"
+                {{if not $page.Online}}disabled title="agent is offline"{{end}}>
+          check
+        </button>
+        <button type="button"
+                hx-post="/hosts/{{$host.ID}}/repo/prune"
+                hx-swap="none"
+                hx-confirm="Run prune now? Removes data not referenced by any snapshot — heavy operation."
+                class="btn btn-secondary"
+                {{if not $page.HasAdminPassword}}disabled title="set admin credentials first"{{else if not $page.Online}}disabled title="agent is offline"{{end}}>
+          prune
+        </button>
+        <button type="button"
+                hx-post="/hosts/{{$host.ID}}/repo/unlock"
+                hx-swap="none"
+                hx-confirm="Clear stale repo locks?"
+                class="btn btn-secondary"
+                {{if not $page.Online}}disabled title="agent is offline"{{end}}>
+          unlock
+        </button>
+      </div>
+    </div>
+
    {{/* ---------- Danger zone ---------- */}}
    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-bad mt-9 mb-3.5">Danger zone</h2>
    <div class="panel rounded-[7px] p-5"
@@ -179,6 +261,41 @@
      </div>
    </div>

+    {{/* ---------- Repo health ---------- */}}
+    {{if $page.StatsView}}
+    {{$s := $page.StatsView}}
+    <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">Repo health</h2>
+    <div class="panel rounded-[7px] p-5 text-[13px]">
+      {{if $s.LockPresent}}
+        <div class="rounded-[6px] px-3.5 py-3 text-[12.5px] mb-4"
+             style="border: 1px solid color-mix(in oklch, var(--warn), transparent 60%); background: color-mix(in oklch, var(--warn), transparent 92%);">
+          Stale lock detected on the most recent check. Run <span class="mono">unlock</span> above to clear it before the next backup.
+        </div>
+      {{end}}
+      <dl class="grid grid-cols-2 gap-y-2 gap-x-4">
+        {{if $s.HasTotalSize}}
+          <dt class="text-ink-fade">Total size</dt>
+          <dd class="mono text-right">{{bytes $s.TotalSizeBytes}}</dd>
+        {{end}}
+        {{if $s.HasRawSize}}
+          <dt class="text-ink-fade">Raw size <span class="text-ink-fade text-[11px]">· pre-dedup</span></dt>
+          <dd class="mono text-right">{{bytes $s.RawSizeBytes}}</dd>
+        {{end}}
+        {{if $s.HasLastCheck}}
+          <dt class="text-ink-fade">Last check</dt>
+          <dd class="mono text-right text-[12px]">
+            {{$s.LastCheckAgo}}
+            {{if $s.LastCheckStatus}} · <span class="{{if eq $s.LastCheckStatus "ok"}}text-ok{{else if eq $s.LastCheckStatus "errors_found"}}text-bad{{else}}text-ink-mid{{end}}">{{$s.LastCheckStatus}}</span>{{end}}
+          </dd>
+        {{end}}
+        {{if $s.HasLastPrune}}
+          <dt class="text-ink-fade">Last prune</dt>
+          <dd class="mono text-right text-[12px]">{{$s.LastPruneAgo}}</dd>
+        {{end}}
+      </dl>
+    </div>
+    {{end}}
+
    {{if gt (len $page.GroupNames) 0}}
      <h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-7 mb-3.5">Snapshots by source</h2>
      <div class="panel rounded-[7px] p-4">