Phase 3 — Restore (P3-X1, X2, 01, 02, 03, 09, X3-X6) #6

Merged
steve merged 13 commits from p3-restore into main 2026-05-04 18:06:18 +01:00
71 changed files with 4879 additions and 156 deletions
+1 -1
View File
@@ -26,7 +26,7 @@ linters:
- name: exported
arguments: ["disableStutteringCheck"]
misspell:
locale: US
locale: UK
exclusions:
rules:
- path: _test\.go
+196 -38
View File
@@ -136,6 +136,7 @@ func run() error {
d := &dispatcher{
resticBin: resticBin,
resticVer: snap.ResticVersion,
secrets: sec,
scheduler: scheduler.New(),
}
@@ -200,6 +201,7 @@ func openSecretsStore(cfg *config.Config) (*secrets.Store, error) {
// so a job dispatched in the same session sees the latest values.
type dispatcher struct {
resticBin string
resticVer string // e.g. "0.17.1"; empty if restic isn't installed yet
secrets *secrets.Store
scheduler *scheduler.Scheduler
@@ -210,6 +212,45 @@ type dispatcher struct {
bwMu sync.Mutex
bwUpKBps int
bwDownKBps int
// Per-running-job cancellation handles. Populated when runJob
// spawns the goroutine, removed when it returns. Looked up by
// the command.cancel handler (server → agent) to abort an
// in-flight restic invocation.
cancelMu sync.Mutex
cancels map[string]context.CancelFunc
}
// trackJob registers a cancel func for an in-flight job and returns a
// cleanup that removes it. Call cleanup when the job goroutine exits
// regardless of outcome — runs even on panic.
func (d *dispatcher) trackJob(jobID string, cancel context.CancelFunc) func() {
d.cancelMu.Lock()
if d.cancels == nil {
d.cancels = make(map[string]context.CancelFunc)
}
d.cancels[jobID] = cancel
d.cancelMu.Unlock()
return func() {
d.cancelMu.Lock()
delete(d.cancels, jobID)
d.cancelMu.Unlock()
}
}
// cancelJob fires the cancel func for jobID if there is one and
// returns whether the job was actually known. The runner is expected
// to surface the resulting context.Canceled as a JobCancelled status
// in its job.finished envelope (see runner.sendFinished).
func (d *dispatcher) cancelJob(jobID string) bool {
d.cancelMu.Lock()
cancel, ok := d.cancels[jobID]
d.cancelMu.Unlock()
if !ok {
return false
}
cancel()
return true
}
func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.Sender) error {
@@ -222,8 +263,29 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
return d.runJob(ctx, p, tx)
case api.MsgCommandCancel:
// TODO(P2): cancellation requires keeping a job→cancelFunc map.
slog.Info("ws agent: command.cancel received (cancellation lands in P2)", "id", env.ID)
var p api.CommandCancelPayload
if err := env.UnmarshalPayload(&p); err != nil {
return fmt.Errorf("command.cancel: %w", err)
}
if d.cancelJob(p.JobID) {
slog.Info("ws agent: command.cancel applied", "job_id", p.JobID)
} else {
// Job already finished or was never seen on this agent.
// Not an error — operator may have raced cancel against
// natural completion. Server-side state is authoritative.
slog.Info("ws agent: command.cancel for unknown job (already finished?)", "job_id", p.JobID)
}
case api.MsgTreeList:
// Synchronous RPC for the restore wizard's tree browser. The
// server has serialised access; we just run restic ls and reply
// with the same envelope ID. Run in a goroutine so the WS read
// loop keeps draining.
var p api.TreeListRequestPayload
if err := env.UnmarshalPayload(&p); err != nil {
return fmt.Errorf("tree.list: %w", err)
}
go d.handleTreeList(ctx, env.ID, p, tx)
case api.MsgScheduleSet:
var p api.ScheduleSetPayload
@@ -332,6 +394,72 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
return nil
}
// handleTreeList runs `restic ls --json <snapshot> <path>` and ships
// the matching tree.list.result envelope back, correlated by the
// request envelope's ID. Errors (missing creds, restic failure)
// surface in the result's Error field rather than as transport-level
// failures so the server-side waiter can render a sensible message.
func (d *dispatcher) handleTreeList(ctx context.Context, reqID string, p api.TreeListRequestPayload, tx wsclient.Sender) {
reply := func(result api.TreeListResultPayload) {
result.SnapshotID = p.SnapshotID
result.Path = p.Path
env, err := api.Marshal(api.MsgTreeListResult, reqID, result)
if err != nil {
slog.Warn("ws agent: marshal tree.list.result", "err", err)
return
}
_ = tx.Send(env)
}
if d.resticBin == "" {
reply(api.TreeListResultPayload{Error: "restic binary not located on this agent"})
return
}
creds, err := d.secrets.Load()
if err != nil {
reply(api.TreeListResultPayload{Error: "load credentials: " + err.Error()})
return
}
if creds.Empty() {
reply(api.TreeListResultPayload{Error: "repo credentials not configured"})
return
}
d.bwMu.Lock()
upKBps, downKBps := d.bwUpKBps, d.bwDownKBps
d.bwMu.Unlock()
env := restic.Env{
Bin: d.resticBin,
RepoURL: creds.URL,
RepoUsername: creds.Username,
RepoPassword: creds.Password,
LimitUploadKBps: upKBps,
LimitDownloadKBps: downKBps,
}
// 60s ceiling matches snapshots/stats — restic ls on a single
// directory is normally sub-second; if the repo is unreachable we
// want to surface the failure rather than block the wizard.
listCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
defer cancel()
entries, err := env.ListTreeChildren(listCtx, p.SnapshotID, p.Path)
if err != nil {
reply(api.TreeListResultPayload{Error: err.Error()})
return
}
apiEntries := make([]api.TreeListEntry, 0, len(entries))
for _, e := range entries {
apiEntries = append(apiEntries, api.TreeListEntry{
Name: e.Name,
Type: e.Type,
Size: e.Size,
})
}
reply(api.TreeListResultPayload{Entries: apiEntries})
}
// runJob spawns a runner for one job. We launch a goroutine so the
// WS read loop keeps draining messages while restic chugs along.
func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsclient.Sender) error {
@@ -367,6 +495,7 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
r := runner.New(runner.Config{
ResticBin: d.resticBin,
ResticVersion: d.resticVer,
RepoURL: creds.URL,
RepoUsername: creds.Username,
RepoPassword: creds.Password,
@@ -374,6 +503,25 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
LimitDownloadKBps: downKBps,
}, tx, time.Second)
// spawn wraps the kind-specific goroutine: derives a per-job
// cancellable context from the connection-scoped ctx, registers
// the cancel func so command.cancel can fire it, deregisters on
// completion. Per-job ctx means canceling one job doesn't kill
// any other in-flight invocations.
spawn := func(name string, fn func(ctx context.Context) error) {
jobCtx, cancel := context.WithCancel(ctx)
cleanup := d.trackJob(p.JobID, cancel)
go func() {
defer cleanup()
defer cancel() // release ctx resources on goroutine exit
if err := fn(jobCtx); err != nil {
slog.Warn("agent: "+name+" job failed", "job_id", p.JobID, "err", err)
return
}
slog.Info("agent: "+name+" job complete", "job_id", p.JobID)
}()
}
switch p.Kind {
case api.JobBackup:
// Includes/Excludes/Tag come from the source group resolved
@@ -391,22 +539,14 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
slog.Info("agent: accepting backup job",
"job_id", p.JobID, "paths", paths, "excludes", p.Excludes, "tag", p.Tag)
hooks := runner.BackupHooks{Pre: p.PreHook, Post: p.PostHook}
go func() {
if err := r.RunBackup(ctx, p.JobID, paths, p.Excludes, tags, hooks); err != nil {
slog.Warn("agent: backup job failed", "job_id", p.JobID, "err", err)
return
}
slog.Info("agent: backup job complete", "job_id", p.JobID)
}()
spawn("backup", func(jobCtx context.Context) error {
return r.RunBackup(jobCtx, p.JobID, paths, p.Excludes, tags, hooks)
})
case api.JobInit:
slog.Info("agent: accepting init job", "job_id", p.JobID)
go func() {
if err := r.RunInit(ctx, p.JobID); err != nil {
slog.Warn("agent: init job failed", "job_id", p.JobID, "err", err)
return
}
slog.Info("agent: init job complete", "job_id", p.JobID)
}()
spawn("init", func(jobCtx context.Context) error {
return r.RunInit(jobCtx, p.JobID)
})
case api.JobForget:
if len(p.ForgetGroups) == 0 {
// Hard-error rather than fall back to a single-policy form:
@@ -433,13 +573,9 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
})
}
slog.Info("agent: accepting forget job", "job_id", p.JobID, "groups", len(groups))
go func() {
if err := r.RunForget(ctx, p.JobID, groups); err != nil {
slog.Warn("agent: forget job failed", "job_id", p.JobID, "err", err)
return
}
slog.Info("agent: forget job complete", "job_id", p.JobID)
}()
spawn("forget", func(jobCtx context.Context) error {
return r.RunForget(jobCtx, p.JobID, groups)
})
case api.JobPrune:
// Prune may require admin creds (delete authority on rest-server).
runCreds := creds
@@ -455,6 +591,7 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
}
prr := runner.New(runner.Config{
ResticBin: d.resticBin,
ResticVersion: d.resticVer,
RepoURL: runCreds.URL,
RepoUsername: runCreds.Username,
RepoPassword: runCreds.Password,
@@ -462,29 +599,50 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
LimitDownloadKBps: downKBps,
}, tx, time.Second)
slog.Info("agent: accepting prune job", "job_id", p.JobID, "admin_creds", p.RequiresAdminCreds)
go func() {
if err := prr.RunPrune(ctx, p.JobID); err != nil {
slog.Warn("agent: prune job failed", "job_id", p.JobID, "err", err)
}
}()
spawn("prune", func(jobCtx context.Context) error {
return prr.RunPrune(jobCtx, p.JobID)
})
case api.JobCheck:
subset := 0
if len(p.Args) > 0 {
subset, _ = strconv.Atoi(p.Args[0])
}
slog.Info("agent: accepting check job", "job_id", p.JobID, "subset_pct", subset)
go func() {
if err := r.RunCheck(ctx, p.JobID, subset); err != nil {
slog.Warn("agent: check job failed", "job_id", p.JobID, "err", err)
}
}()
spawn("check", func(jobCtx context.Context) error {
return r.RunCheck(jobCtx, p.JobID, subset)
})
case api.JobUnlock:
slog.Info("agent: accepting unlock job", "job_id", p.JobID)
go func() {
if err := r.RunUnlock(ctx, p.JobID); err != nil {
slog.Warn("agent: unlock job failed", "job_id", p.JobID, "err", err)
}
}()
spawn("unlock", func(jobCtx context.Context) error {
return r.RunUnlock(jobCtx, p.JobID)
})
case api.JobRestore:
if p.Restore == nil {
return fmt.Errorf("restore: command.run carried no restore payload")
}
rp := *p.Restore
if rp.SnapshotID == "" {
return fmt.Errorf("restore: snapshot_id is required")
}
if !rp.InPlace && rp.TargetDir == "" {
return fmt.Errorf("restore: target_dir required for non-in-place restore")
}
slog.Info("agent: accepting restore job",
"job_id", p.JobID, "snapshot_id", rp.SnapshotID,
"paths", rp.Paths, "in_place", rp.InPlace, "target", rp.TargetDir)
spawn("restore", func(jobCtx context.Context) error {
return r.RunRestore(jobCtx, p.JobID, rp.SnapshotID, rp.Paths, rp.InPlace, rp.TargetDir)
})
case api.JobDiff:
if p.Diff == nil || p.Diff.SnapshotA == "" || p.Diff.SnapshotB == "" {
return fmt.Errorf("diff: command.run carried incomplete diff payload")
}
dp := *p.Diff
slog.Info("agent: accepting diff job",
"job_id", p.JobID, "a", dp.SnapshotA, "b", dp.SnapshotB)
spawn("diff", func(jobCtx context.Context) error {
return r.RunDiff(jobCtx, p.JobID, dp.SnapshotA, dp.SnapshotB)
})
default:
return fmt.Errorf("kind %q not implemented yet (Phase 2 lands the rest)", p.Kind)
}
+7
View File
@@ -49,6 +49,13 @@ detect_arch() {
ensure_dirs() {
install -d -m 0700 -o root -g root "$RM_CONFIG_DIR"
install -d -m 0700 -o root -g root "$RM_STATE_DIR"
# Default new-directory restore target: $HOME/rm-restore. Pre-create
# so the systemd unit's ReadWritePaths bind-mount applies cleanly
# (paths that don't exist when systemd starts get a soft-fail
# because of the '-' prefix, but the agent then can't mkdir into
# the read-only /root). Mode 0700 + root-owned matches the threat
# model — files restored here are operator-readable as root.
install -d -m 0700 -o root -g root /root/rm-restore
}
detect_existing_schedulers() {
+6 -1
View File
@@ -37,7 +37,12 @@ AmbientCapabilities=CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_CHOWN
# needs. Filesystem reads stay open: that's the whole job.
NoNewPrivileges=true
ProtectSystem=strict
ReadWritePaths=/etc/restic-manager /var/lib/restic-manager
# /etc/restic-manager: agent.yaml + secrets.enc.
# /var/lib/restic-manager: agent state (currently unused but reserved).
# /root/rm-restore: default target for new-directory restores
# ($HOME/rm-restore/<job-id>/ resolves here for User=root).
# ReadWritePaths overrides ProtectHome=read-only on this subdir only.
ReadWritePaths=/etc/restic-manager /var/lib/restic-manager -/root/rm-restore
ProtectHome=read-only
ProtectHostname=true
ProtectKernelTunables=true
@@ -0,0 +1,342 @@
# P3 — Restore (design)
> Phase 3 sub-spec covering single-host restore (P3-01, P3-02, P3-03, P3-09).
> P3-04 (cross-host restore) is deferred to a new "Future / unscheduled"
> section in `tasks.md` — disaster recovery is already covered by re-enrolling
> a replacement host with the same repo credentials.
>
> Wireframe: `_diag/p3-restore-wizard/wireframe.html`. Screenshot:
> `_diag/p3-restore-wizard/01-full-wizard.png`.
## Scope locked
Brainstorm decisions (in order asked):
1. **In-place vs new-directory.** Default is a new directory under
`/var/restic-restore/<job-id>/`. An "Restore in place (overwrite original
paths)" toggle is gated by typed-confirmation of the host name, mirroring
the repo re-init pattern.
2. **Path-selection granularity.** Tree browser as the path selector, lazy-
loaded via `restic ls --json <snapshot> <path>` per directory expansion.
3. **Cross-host restore (P3-04).** Out of scope this phase. Move to
"Future / unscheduled" in `tasks.md`. The disaster-recovery case is covered
by the standard enrolment flow: stand up a replacement host, paste the
original repo creds at enrolment, snapshots reappear, restore is
same-host.
4. **Snapshot diff (P3-09).** Diff-as-a-job. New `JobDiff` JobKind dispatched
like every other agent operation. Output streams as `log.stream` and
renders on the live job log page.
5. **Wizard entry points.** Top-level "Restore" button on host detail
(`/hosts/{id}/restore`, opens wizard at step 1) plus a per-snapshot
Restore action on snapshot rows (`/hosts/{id}/snapshots/{sid}/restore`,
skips step 1).
6. **Wizard interaction model.** Single-page, sections progressively enable;
tree-browser nodes lazy-load via HTMX partials. No `restore_drafts` table.
7. **Tree-browser data path.** Synchronous WS RPC (`tree.list`
`tree.list.result`, correlation-ID) plus a per-wizard-session in-memory
cache keyed by `{snapshot_id, path}` with ~30-min TTL.
8. **Restore progress UI.** Restore-specific job-page variant: files-restored
/ bytes-restored / throughput / ETA / current-file display, driven by
restic restore's JSON status events surfaced through `job.progress`.
9. **Permissions/ownership.** Policy, not toggle. In-place restore preserves
original ownership; new-directory restore drops ownership
(`--no-ownership`).
10. **Concurrency.** Single-flight per host (one job at a time across all
kinds). Plus a real cancel-job feature: `command.cancel` envelope, agent
kills the `restic` subprocess via context cancel (SIGTERM, SIGKILL after
grace), server transitions the job to `cancelled`. The "Cancel" button
already in the `job_detail` template becomes real for any running job
kind.
11. **Audit + safety.** Audit row on every restore dispatch (`host.restore`
with snapshot ID, paths, target, in-place flag). Recent-restores panel
on the host page surfacing the latest restore job alongside last-backup
and last-init signals. Role gate deferred to P4-03.
## Architecture
Restore composes from existing primitives plus three new pieces:
- **New JobKind values**: `JobRestore`, `JobDiff`. Dispatcher cases mirror
the prune/check pattern. Agent-side handlers wrap `restic.RunRestore` and
`restic.RunDiff` (new methods on the `restic` package).
- **New WS RPC**: `tree.list` request (`{snapshot_id, path}`) ↔
`tree.list.result` reply (`{entries: [{name, type, size}], ...}` or
`{error}`). Reuses existing correlation-ID infrastructure from P1-09. No
`jobs` row.
- **New cancel surface**: `command.cancel` request (`{job_id}`), agent
cancels the running subprocess context, returns `command.ack` + `job.finished`
with status `cancelled`. Server endpoint `POST /api/jobs/{id}/cancel`
bridges UI button → WS envelope.
Everything else (job lifecycle, log streaming, progress envelope, snapshot
listing, audit log writer, host_chrome partial, danger-zone typed-confirmation)
already exists and is reused verbatim.
### Component boundaries
| Component | Purpose | Depends on |
| ---------------------------------- | ---------------------------------------------------- | ----------------------------------------- |
| `internal/restic.RunRestore` | Run `restic restore` with paths + target + ownership | `restic.Env` |
| `internal/restic.RunDiff` | Run `restic diff --json a b` | `restic.Env` |
| `internal/agent/runner` cases | Dispatch `JobRestore` / `JobDiff` jobs | `restic.Run*`, hooks (skipped: backup-only) |
| `internal/agent/runner` cancel hook | Wire WS `command.cancel` → ctx.CancelFunc per job | runner job map |
| `internal/agent/runner` tree-list | Sync RPC handler: `restic ls --json` for one path | `restic.Env` |
| `internal/server/ws/cancel.go` | Validate + send `command.cancel` envelope | hub.Send, store.UpdateJobStatus |
| `internal/server/ws/tree.go` | RPC mediator: `tree.list` request → reply, with cache | hub.SendRPC, in-memory cache |
| `internal/server/http/restore.go` | Wizard routes + dispatch endpoint | store, ws, audit |
| `internal/server/http/diff.go` | Snapshot-diff dispatch endpoint | store, ws |
| `internal/server/http/cancel.go` | `POST /api/jobs/{id}/cancel` | ws |
| `web/templates/pages/host_restore.html` | Wizard page | host_chrome partial |
| `web/templates/partials/tree_node.html` | Lazy-loaded tree node fragment for HTMX swap | — |
| `web/templates/pages/job_detail.html` | Restore-kind progress widget (variant) | existing job_detail |
### Data flow — wizard happy path
```
operator
├─ GET /hosts/{id}/restore
│ server renders wizard shell, snapshot table from store.ListSnapshotsByHost
├─ click snapshot row (or arrives via /hosts/{id}/snapshots/{sid}/restore)
│ wizard advances to step 2, snapshot summary card rendered
├─ expand a tree node (chevron click)
│ HTMX GET /hosts/{id}/restore/tree?snapshot={sid}&path=/etc
│ server checks per-session cache (keyed by sid+path)
│ hit → render tree_node fragment from cache
│ miss → hub.SendRPC(host_id, "tree.list", {sid, path}) → wait reply
│ cache result, render tree_node fragment
├─ tick file/dir checkboxes (form state, no round-trip)
├─ pick target radio (and optionally type host name to unlock in-place)
└─ POST /hosts/{id}/restore (form submit)
server validates: ≥1 path, target mode, in-place ⇒ host name match
write audit row host.restore
store.CreateJob{kind=restore, payload={snapshot_id, paths, target, in_place}}
hub.Send(host_id, "command.run", {job_id, kind=restore, payload})
HX-Redirect: /jobs/{job_id}
```
### Data flow — agent restore execution
```
agent.runner receives command.run kind=restore
├─ check single-flight: if r.activeJobID != "" → reply busy
│ (server queues to pending_runs only for kind=backup; restore returns busy)
├─ allocate ctx, ctxCancel — store cancelFunc against job_id in r.cancels
├─ sendStarted(job_id, JobRestore, now)
├─ build target path: if in_place → "/" else "/var/restic-restore/<job_id>/"
├─ build flags: paths from payload, --no-ownership when !in_place
├─ restic.RunRestore(ctx, env, snapshot_id, paths, target, in_place):
│ restic restore <sid> --target <path> [--no-ownership] -- <p1> <p2> ...
│ parse stdout JSON: forward "status" → job.progress (1Hz throttle), "summary" → final
├─ on success: sendFinished(job_id, succeeded, exit=0)
├─ on ctx.Err() == context.Canceled: sendFinished(job_id, cancelled, exit=130)
└─ delete cancel func from r.cancels
```
### Data flow — cancel
```
operator clicks Cancel on /jobs/{id} (running)
POST /api/jobs/{id}/cancel
server: lookup job, ensure status=running, find host
hub.Send(host_id, "command.cancel", {job_id})
→ agent.runner receives command.cancel
cancelFunc, ok := r.cancels[job_id]
ok && cancelFunc()
→ restic subprocess context done → exec.Cmd kills via SIGTERM
→ if still alive after 5s grace → SIGKILL
→ runner sendFinished(job_id, cancelled, exit=130)
→ server receives job.finished status=cancelled, persists, broadcasts
→ browser refresh shows cancelled state
```
The cancel surface is independently useful for any kind (prune/check/backup) —
not gated to restore. The button already in `job_detail.html` becomes real.
### Tree-list RPC details
New WS message types (added to `internal/api/messages.go`):
```
type TreeListRequestPayload struct {
SnapshotID string `json:"snapshot_id"`
Path string `json:"path"`
}
type TreeListEntry struct {
Name string `json:"name"`
Type string `json:"type"` // "dir" | "file" | "symlink"
Size int64 `json:"size,omitempty"`
}
type TreeListResultPayload struct {
SnapshotID string `json:"snapshot_id"`
Path string `json:"path"`
Entries []TreeListEntry `json:"entries,omitempty"`
Error string `json:"error,omitempty"`
}
```
Server-side mediator (`ws.SendRPC`) takes a request envelope, registers the
correlation ID in a pending map, sends, blocks on a per-call channel until
the matching reply arrives (or 30s timeout). The pattern is small enough
to inline in `internal/server/ws/rpc.go` as a generic helper — future
synchronous RPCs reuse it.
In-memory cache: `map[sessionID]map[cacheKey]TreeListResultPayload` with
`cacheKey = snapshot_id + "\x00" + path`. Session ID minted per wizard
load (HTTP-only cookie scoped to `/hosts/{id}/restore/tree`, lifetime 30
min). On wizard close (browser navigation away) the entry expires
naturally. No persistence, no migration.
Agent handler runs `restic ls --json <sid> <path>` (non-recursive — restic
defaults to recursive but `restic ls` accepts `--long` and a path filter;
parse output line-by-line and emit only direct children of `path`). 60s
context timeout, mirroring existing `restic snapshots` invocation.
### Restore payload
`api.CommandRunPayload` gains a nested optional `restore` field:
```
type RestorePayload struct {
SnapshotID string `json:"snapshot_id"`
Paths []string `json:"paths"` // absolute paths inside the snapshot
InPlace bool `json:"in_place"`
TargetDir string `json:"target_dir"` // empty when in_place=true
PreserveOwner bool `json:"preserve_owner"` // mirrors policy: in_place=>true, else=>false
}
```
The payload is set by the server when dispatching `JobRestore` and ignored
on every other kind. Wire-shape test pinned in `wire_test.go`.
### Diff payload
`api.CommandRunPayload` gains:
```
type DiffPayload struct {
SnapshotA string `json:"snapshot_a"`
SnapshotB string `json:"snapshot_b"`
}
```
Set on `JobDiff`. Output is plain `restic diff --json <a> <b>` forwarded as
`log.stream` lines. Job page renders unchanged — operator reads the diff
output directly.
### Recent-restores panel
A small panel rendered on the host detail page below the existing init-status
line:
```
last restore: succeeded 2h ago · job f73ab4c1… · 3 files to /var/restic-restore/...
```
Backed by a new `store.LatestJobByKind(host_id, JobRestore)` query (mirroring
the existing `store.LatestJobByKind` already used for init/forget/prune/check
in P2R-06). One template addition in `host_chrome.html` next to the
`InitStatus` block.
## Routes added
| Method | Path | Purpose |
| ------- | --------------------------------------------------------- | ----------------------------------------------------------- |
| GET | `/hosts/{id}/restore` | Wizard shell (step 1 = snapshot picker) |
| GET | `/hosts/{id}/snapshots/{sid}/restore` | Wizard shell with snapshot pre-selected (skips step 1) |
| GET | `/hosts/{id}/restore/tree` | HTMX partial: tree node listing for `?snapshot=&path=` |
| POST | `/hosts/{id}/restore` | Validate + dispatch restore job, redirect to live job page |
| POST | `/api/hosts/{id}/snapshots/diff` | Dispatch a diff job for `{snapshot_a, snapshot_b}` |
| POST | `/api/jobs/{id}/cancel` | Send `command.cancel` to host, transition job → cancelled |
## Migrations
None. Restore + diff piggyback on the existing `jobs` table (their `kind` is
new but the schema already accepts arbitrary kind strings — there's no
CHECK constraint on `kind`). The cancel feature uses the existing
`JobCancelled` terminal status. The tree-list cache lives in process memory.
## Tests (target coverage)
- `internal/restic/restore_test.go``RunRestore` invocation builds the
expected argv (paths, --target, --no-ownership flag presence, in-place
variant); JSON status parsing → `BackupStatus`-shaped progress envelopes.
- `internal/restic/diff_test.go``RunDiff` argv shape and JSON forwarding.
- `internal/agent/runner/restore_test.go` — happy path, cancel mid-run
produces `cancelled` finished, in-place vs new-directory dispatch,
single-flight rejects when another job is running.
- `internal/agent/runner/tree_test.go``tree.list` handler returns
direct children for a synthetic restic ls output, surfaces error on
missing snapshot.
- `internal/server/ws/rpc_test.go``SendRPC` correlation matching,
timeout, concurrent calls.
- `internal/server/http/restore_test.go` — wizard renders with snapshots,
POST validates ≥1 path + in-place host-name match, audit row written,
job dispatched with correct payload, in-place without typed-confirm
re-renders form with input intact and an error.
- `internal/server/http/diff_test.go` — POST dispatches `JobDiff`,
snapshot IDs validated against the host's snapshot list.
- `internal/server/http/cancel_test.go` — POST cancel happy path
(running → cancelled), 4xx for non-running jobs, 4xx when host offline.
- `internal/server/http/restore_e2e_test.go` — happy path: GET wizard,
expand `/etc` (HTMX call returns expected fragment), submit, follow
HX-Redirect to job page, see status.
- `web/templates/pages/host_restore_test.go` (template-render test) —
wizard renders all four sections; in-place card disabled until typed
confirm.
## Playwright iteration / sweep
A Playwright sweep at the end (mirroring P2R-02 Slice 6) runs against the
local smoke server with a real agent enrolled. Steps:
1. Login → navigate to alfa-01 host → click Restore.
2. Wizard step 1: pick the most recent snapshot.
3. Wizard step 2: expand a directory two levels, tick three files,
verify tally updates.
4. Wizard step 3: leave default new-directory.
5. Wizard step 4: dispatch.
6. Land on live job page, see progress widget animating, see log lines.
7. Click Cancel mid-flight, verify status transitions to cancelled and
the agent's subprocess actually died (log line `signal: killed` or exit
130).
8. Repeat with in-place mode: type host name, dispatch, verify red
primary button, verify files actually overwritten on host.
9. Snapshot diff: navigate to snapshots, pick two, dispatch diff, see
diff output streamed.
10. Screenshots into `_diag/p3-restore-sweep/`.
End-to-end clean, zero console errors, before handing back.
## What does NOT change
- `host_chrome.html` only grows the recent-restores line; sub-tab list
unchanged (Restore is a top-level button on the host page, not a sub-tab).
- `enrollment.go`, schedule reconciliation, source-group CRUD, repo
maintenance ticker, hook execution — none of these are touched.
- The CLAUDE.md restage block applies as-is when the agent binary changes
(it does — runner gains restore/diff/cancel/tree handlers). The unit
file does not change.
## Open questions / explicit non-goals
- **Restore preview / dry-run.** Restic doesn't have a dry-run for restore.
Out of scope.
- **Resumable restore.** Restic restore is idempotent per-file but not
resumable mid-stream from where it left off. If a restore is cancelled,
the operator re-runs (files already written are overwritten). No state
to track.
- **Restore to a glob/pattern (e.g. `*.conf`).** Out of scope; the tree
picker requires explicit ticks. Power users can edit the URL or use the
CLI.
- **Bandwidth caps for restore.** Honoured automatically — restic's
`--limit-download` is part of `restic.Env` already (P2R-13) and applies
to restore unchanged.
- **Pre/post hooks for restore.** Hooks today gate only `kind=backup`
(P2R-11). Out of scope.
+81
View File
@@ -0,0 +1,81 @@
package runner
import (
"context"
"strings"
"testing"
"time"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// (fakeSender is defined in runner_test.go; it's already lock-protected
// because the runner's stdout + stderr pump goroutines call Send
// concurrently. The original local 'safeSender' here was a workaround
// from before fakeSender itself grew the mutex.)
// TestRunBackupCanceledMidRunReportsCanceled spawns a backup against
// a fake restic that sleeps for 30 seconds, cancels the context after
// a short delay, and confirms the resulting job.finished envelope
// reports status=canceled (not failed).
func TestRunBackupCanceledMidRunReportsCanceled(t *testing.T) {
t.Parallel()
// Fake restic: replace the shell with a long sleep via `exec` so the
// process tree is one process — SIGTERM goes directly to sleep and
// it exits. Without `exec`, the shell stays in the foreground while
// sleep is its child; SIGTERM-to-shell may or may not propagate to
// sleep depending on the shell, leading to the WaitDelay-then-
// SIGKILL fallback path firing — slower and noisier.
bin := setupScript(t, `exec sleep 30`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
ctx, cancel := context.WithCancel(context.Background())
done := make(chan error, 1)
go func() {
done <- r.RunBackup(ctx, "job-cancel", []string{"/tmp/x"}, nil, nil, BackupHooks{})
}()
// Wait long enough for the subprocess to actually start before
// canceling. Without this, exec.CommandContext can race the
// kill against Start and produce a different error path.
time.Sleep(150 * time.Millisecond)
cancel()
select {
case <-done:
case <-time.After(15 * time.Second):
t.Fatal("RunBackup did not return within 15s of cancel")
}
// Locate the job.finished envelope and check its status.
envs := tx.snapshot()
var finEnv api.Envelope
var found bool
for _, e := range envs {
if e.Type == api.MsgJobFinished {
finEnv = e
found = true
break
}
}
if !found {
t.Fatal("no job.finished envelope was sent")
}
var fin api.JobFinishedPayload
if err := finEnv.UnmarshalPayload(&fin); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if fin.Status != api.JobCancelled {
t.Fatalf("status: got %q, want %q", fin.Status, api.JobCancelled)
}
if fin.ExitCode != 130 {
t.Errorf("exit_code: got %d, want 130 (POSIX cancel convention)", fin.ExitCode)
}
// The error message should be empty for canceled jobs (see runner.sendFinished).
if !strings.HasPrefix(fin.Error, "") || fin.Error != "" {
t.Errorf("error: got %q, want empty for canceled jobs", fin.Error)
}
}
+266
View File
@@ -0,0 +1,266 @@
package runner
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// TestRunRestoreShipsExpectedEnvelopes: a fake restic emits a couple
// of restore status lines and a summary; the runner translates them
// into job.progress envelopes and finishes the job successfully.
func TestRunRestoreShipsExpectedEnvelopes(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
restore)
echo '{"message_type":"status","seconds_elapsed":1,"percent_done":0.5,"total_files":10,"files_restored":5,"total_bytes":1000,"bytes_restored":500}'
echo '{"message_type":"status","seconds_elapsed":2,"percent_done":1.0,"total_files":10,"files_restored":10,"total_bytes":1000,"bytes_restored":1000}'
echo '{"message_type":"summary","seconds_elapsed":2,"total_files":10,"files_restored":10,"total_bytes":1000,"bytes_restored":1000}'
;;
*)
echo "unknown: $*" ;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
if err := r.RunRestore(context.Background(), "job-r1", "f3a7b2c1",
[]string{"/etc/nginx/sites-available/alfa.conf"},
false, "/tmp/restore-out"); err != nil {
t.Fatalf("RunRestore: %v", err)
}
// Confirm landmarks: started → progress → finished.
order := envelopeOrder(tx.envs)
wants := []api.MessageType{api.MsgJobStarted, api.MsgJobProgress, api.MsgJobFinished}
positions := map[api.MessageType]int{}
for i, mt := range order {
if _, seen := positions[mt]; !seen {
positions[mt] = i
}
}
for i := 0; i < len(wants)-1; i++ {
a, b := wants[i], wants[i+1]
pa, aOK := positions[a]
pb, bOK := positions[b]
if !aOK {
t.Fatalf("envelope %q not found in %v", a, order)
}
if !bOK {
t.Fatalf("envelope %q not found in %v", b, order)
}
if pa >= pb {
t.Fatalf("expected %q before %q (positions %d, %d)", a, b, pa, pb)
}
}
// Started carries the right kind.
startEnv := firstEnvOfType(t, tx.envs, api.MsgJobStarted)
var startP api.JobStartedPayload
if err := startEnv.UnmarshalPayload(&startP); err != nil {
t.Fatalf("unmarshal started: %v", err)
}
if startP.Kind != api.JobRestore {
t.Fatalf("kind: got %q want %q", startP.Kind, api.JobRestore)
}
// Finished is succeeded.
finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished)
var finP api.JobFinishedPayload
if err := finEnv.UnmarshalPayload(&finP); err != nil {
t.Fatalf("unmarshal finished: %v", err)
}
if finP.Status != api.JobSucceeded {
t.Fatalf("status: got %q want %q", finP.Status, api.JobSucceeded)
}
// Progress envelope reflects the last status line: 100% with 10 files.
progEnv := firstEnvOfType(t, tx.envs, api.MsgJobProgress)
var progP api.JobProgressPayload
if err := progEnv.UnmarshalPayload(&progP); err != nil {
t.Fatalf("unmarshal progress: %v", err)
}
// First progress will be from line 1 (50%) since we send first status
// immediately. Verify we at least see a sensible value.
if progP.PercentDone <= 0 {
t.Fatalf("expected non-zero progress, got %v", progP.PercentDone)
}
if progP.FilesDone <= 0 || progP.TotalFiles <= 0 {
t.Fatalf("expected file counters set, got %+v", progP)
}
}
// TestRunRestoreInPlaceArgvHasNoNoOwnership: indirectly verifies that
// in-place mode doesn't pass --no-ownership. We can't see the actual
// argv without a custom test harness, so we use a fake restic that
// echoes its args and check the captured log.stream.
func TestRunRestoreInPlaceArgvHasNoNoOwnership(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
restore)
# Print all args on stderr so they're forwarded as log.stream.
echo "argv: $*" 1>&2
echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}'
;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
if err := r.RunRestore(context.Background(), "job-r2", "abc",
nil, true, ""); err != nil {
t.Fatalf("RunRestore: %v", err)
}
// Reconstruct the argv from the captured stderr log line.
var argv string
for _, e := range tx.envs {
if e.Type == api.MsgLogStream {
var p api.LogStreamLine
_ = e.UnmarshalPayload(&p)
if p.Stream == api.LogStderr && strings.HasPrefix(p.Payload, "argv:") {
argv = p.Payload
break
}
}
}
if argv == "" {
t.Fatal("never captured argv echo from fake restic")
}
if strings.Contains(argv, "--no-ownership") {
t.Errorf("in-place restore should NOT pass --no-ownership; got argv=%q", argv)
}
if !strings.Contains(argv, "--target /") {
t.Errorf("in-place restore should pass --target /; got argv=%q", argv)
}
}
// TestRunRestoreNewDirArgvShape: non-in-place restore passes --target
// to the operator-chosen new directory and includes the path filters.
// We deliberately do NOT pass --no-ownership (added in restic 0.17;
// older versions error out — the comment in restore.go explains why).
func TestRunRestoreNewDirArgvShape(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
restore)
echo "argv: $*" 1>&2
echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}'
;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
if err := r.RunRestore(context.Background(), "job-r3", "abc",
[]string{"/etc/foo"}, false, "/tmp/restore-out"); err != nil {
t.Fatalf("RunRestore: %v", err)
}
var argv string
for _, e := range tx.envs {
if e.Type == api.MsgLogStream {
var p api.LogStreamLine
_ = e.UnmarshalPayload(&p)
if p.Stream == api.LogStderr && strings.HasPrefix(p.Payload, "argv:") {
argv = p.Payload
break
}
}
}
if argv == "" {
t.Fatal("no argv echo")
}
if strings.Contains(argv, "--no-ownership") {
t.Errorf("restic 0.16 doesn't accept --no-ownership; got argv=%q", argv)
}
if !strings.Contains(argv, "--target /tmp/restore-out") {
t.Errorf("expected --target /tmp/restore-out; got argv=%q", argv)
}
if !strings.Contains(argv, "--include /etc/foo") {
t.Errorf("expected --include /etc/foo; got argv=%q", argv)
}
}
// TestRunRestoreNewDirAutoCreatesTarget: a new-directory restore
// should mkdir the requested target chain before invoking restic, so
// operators don't have to pre-create the per-job subdir.
func TestRunRestoreNewDirAutoCreatesTarget(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
restore)
echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}'
;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
// Multi-level path the operator hasn't created yet.
target := filepath.Join(t.TempDir(), "deep", "deeper", "deepest")
if err := r.RunRestore(context.Background(), "job-rmkdir", "abc",
[]string{"/etc/foo"}, false, target); err != nil {
t.Fatalf("RunRestore: %v", err)
}
if st, err := os.Stat(target); err != nil {
t.Fatalf("expected target dir to exist: %v", err)
} else if !st.IsDir() {
t.Fatalf("expected directory, got %v", st.Mode())
}
}
// TestRunDiffShipsLogLines: diff output is forwarded as log.stream.
func TestRunDiffShipsLogLines(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
diff)
echo '{"message_type":"change","path":"/etc/nginx/nginx.conf","modifier":"M"}'
echo '{"message_type":"statistics","added":{"files":0,"dirs":0}}'
;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
if err := r.RunDiff(context.Background(), "job-d1", "snap-a", "snap-b"); err != nil {
t.Fatalf("RunDiff: %v", err)
}
startEnv := firstEnvOfType(t, tx.envs, api.MsgJobStarted)
var startP api.JobStartedPayload
_ = startEnv.UnmarshalPayload(&startP)
if startP.Kind != api.JobDiff {
t.Fatalf("kind: got %q want %q", startP.Kind, api.JobDiff)
}
finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished)
var finP api.JobFinishedPayload
_ = finEnv.UnmarshalPayload(&finP)
if finP.Status != api.JobSucceeded {
t.Fatalf("status: %q", finP.Status)
}
// At least one log line should carry the change payload.
var sawChange bool
for _, e := range tx.envs {
if e.Type != api.MsgLogStream {
continue
}
var p api.LogStreamLine
_ = e.UnmarshalPayload(&p)
if strings.Contains(p.Payload, `"message_type":"change"`) {
sawChange = true
}
}
if !sawChange {
t.Fatal("never saw a change log line in diff output")
}
}
+124 -14
View File
@@ -26,10 +26,11 @@ type Sender interface {
// from the agent's config file (server-pushed config.update payloads
// override these in memory).
type Config struct {
ResticBin string
RepoURL string
RepoUsername string
RepoPassword string
ResticBin string
ResticVersion string // e.g. "0.17.1" — empty if unknown
RepoURL string
RepoUsername string
RepoPassword string
// Bandwidth caps in KB/s applied to every restic invocation.
// <=0 means "no cap". Per-job override: callers that build a
@@ -61,6 +62,7 @@ func New(cfg Config, tx Sender, progressMinPeriod time.Duration) *Runner {
func (r *Runner) resticEnv() restic.Env {
return restic.Env{
Bin: r.cfg.ResticBin,
Version: r.cfg.ResticVersion,
RepoURL: r.cfg.RepoURL,
RepoUsername: r.cfg.RepoUsername,
RepoPassword: r.cfg.RepoPassword,
@@ -95,8 +97,10 @@ func (r *Runner) streamHandler(jobID string, seq *atomic.Int64) restic.LineHandl
}
// sendFinished ships a job.finished envelope. err==nil → succeeded;
// otherwise failed. statsBlob is forwarded as JobFinishedPayload.Stats.
func (r *Runner) sendFinished(jobID string, finishedAt time.Time, err error, statsBlob json.RawMessage) {
// otherwise failed (or canceled if ctx was canceled — operator
// hit the Cancel button or the agent is shutting down).
// statsBlob is forwarded as JobFinishedPayload.Stats.
func (r *Runner) sendFinished(ctx context.Context, jobID string, finishedAt time.Time, err error, statsBlob json.RawMessage) {
status := api.JobSucceeded
exit := 0
errMsg := ""
@@ -104,6 +108,16 @@ func (r *Runner) sendFinished(jobID string, finishedAt time.Time, err error, sta
status = api.JobFailed
exit = -1
errMsg = err.Error()
// If the context was canceled, the failure is operator-driven
// (or shutdown). Surface as JobCancelled so the UI shows a
// neutral "canceled" state rather than a red "failed" one.
// exec.CommandContext returns the process's exit error on
// ctx-cancel, which we'd otherwise rebadge as failed.
if ctxErr := ctx.Err(); ctxErr != nil {
status = api.JobCancelled
exit = 130 // POSIX convention for SIGINT/SIGTERM-killed
errMsg = "" // no need to surface the underlying restic error
}
}
finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
JobID: jobID,
@@ -138,13 +152,13 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
if hooks.Pre != "" {
if err := r.runHook(ctx, jobID, "pre", hooks.Pre, "", &seq); err != nil {
finishedAt := time.Now().UTC()
r.sendFinished(jobID, finishedAt, err, nil)
r.sendFinished(ctx, jobID, finishedAt, err, nil)
return fmt.Errorf("pre_hook failed: %w", err)
}
}
env := r.resticEnv()
lastProgress := time.Now()
lastProgress := time.Time{} // zero time → first status event always emits
handle := func(stream string, line string, ev any) {
// Throttled progress events come from restic's `status` JSON.
@@ -206,7 +220,7 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
}
}
r.sendFinished(jobID, finishedAt, err, statsBlob)
r.sendFinished(ctx, jobID, finishedAt, err, statsBlob)
// On a successful backup, refresh the server's snapshot projection.
// We do this *after* job.finished so the UI sees the job land first;
@@ -240,7 +254,7 @@ func (r *Runner) RunInit(ctx context.Context, jobID string) error {
var seq atomic.Int64
err := env.RunInit(ctx, r.streamHandler(jobID, &seq))
finishedAt := time.Now().UTC()
r.sendFinished(jobID, finishedAt, err, nil)
r.sendFinished(ctx, jobID, finishedAt, err, nil)
if err != nil {
return fmt.Errorf("runner init: %w", err)
}
@@ -262,7 +276,7 @@ func (r *Runner) RunForget(ctx context.Context, jobID string, groups []restic.Fo
var seq atomic.Int64
err := env.RunForget(ctx, groups, r.streamHandler(jobID, &seq))
finishedAt := time.Now().UTC()
r.sendFinished(jobID, finishedAt, err, nil)
r.sendFinished(ctx, jobID, finishedAt, err, nil)
// Refresh the server's snapshot projection — forget rewrites the
// index so the host's snapshot list almost certainly shrunk.
@@ -300,7 +314,7 @@ func (r *Runner) RunPrune(ctx context.Context, jobID string) error {
}
}
r.sendFinished(jobID, finishedAt, err, nil)
r.sendFinished(ctx, jobID, finishedAt, err, nil)
if err != nil {
return fmt.Errorf("runner prune: %w", err)
@@ -339,7 +353,7 @@ func (r *Runner) RunCheck(ctx context.Context, jobID string, subsetPct int) erro
slog.Warn("runner: stats.report after check failed", "job_id", jobID, "err", rerr)
}
r.sendFinished(jobID, finishedAt, err, nil)
r.sendFinished(ctx, jobID, finishedAt, err, nil)
if err != nil {
return fmt.Errorf("runner check: %w", err)
@@ -347,6 +361,102 @@ func (r *Runner) RunCheck(ctx context.Context, jobID string, subsetPct int) erro
return nil
}
// RunRestore executes a restic restore job and reports back via the
// sender. paths is the operator-selected file/dir list to restore.
// inPlace=true preserves uid/gid/mode and writes at "/"; inPlace=false
// writes at targetDir with --no-ownership.
//
// Status events from restic are throttled into job.progress in the
// same shape as backup; raw status lines are dropped from log.stream
// (they would drown the log on a fast restore — the progress widget
// already covers them).
func (r *Runner) RunRestore(ctx context.Context, jobID, snapshotID string, paths []string, inPlace bool, targetDir string) error {
startedAt := time.Now().UTC()
r.sendStarted(jobID, api.JobRestore, startedAt)
env := r.resticEnv()
var seq atomic.Int64
lastProgress := time.Time{} // zero time → first status event always emits
handle := func(stream string, line string, ev any) {
status, isStatus := ev.(restic.RestoreStatus)
if !isStatus {
now := time.Now().UTC()
logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
JobID: jobID,
Seq: seq.Add(1),
TS: now,
Stream: api.LogStream(stream),
Payload: line,
})
_ = r.tx.Send(logEnv)
}
if isStatus {
if time.Since(lastProgress) < r.progressMinPeriod {
return
}
lastProgress = time.Now()
progEnv, _ := api.Marshal(api.MsgJobProgress, jobID, api.JobProgressPayload{
JobID: jobID,
PercentDone: status.PercentDone,
FilesDone: status.FilesRestored,
TotalFiles: status.TotalFiles,
BytesDone: status.BytesRestored,
TotalBytes: status.TotalBytes,
ETASeconds: estimateETA(status.BytesRestored, status.TotalBytes, status.SecondsElapsed),
ThroughputBps: throughput(status.BytesRestored, status.SecondsElapsed),
})
_ = r.tx.Send(progEnv)
}
}
summary, err := env.RunRestore(ctx, snapshotID, paths, inPlace, targetDir, handle)
finishedAt := time.Now().UTC()
var statsBlob json.RawMessage
if summary != nil {
statsBlob, _ = json.Marshal(summary)
}
r.sendFinished(ctx, jobID, finishedAt, err, statsBlob)
if err != nil {
return fmt.Errorf("runner restore: %w", err)
}
return nil
}
// estimateETA computes an ETA in seconds based on current bytes
// progress + elapsed seconds. Restic restore's --json doesn't emit an
// ETA field of its own (unlike backup), so we approximate by linear
// extrapolation. Returns 0 when we don't have enough data.
func estimateETA(bytesDone, totalBytes, secondsElapsed int64) int64 {
if bytesDone <= 0 || totalBytes <= 0 || secondsElapsed <= 0 || bytesDone >= totalBytes {
return 0
}
rate := float64(bytesDone) / float64(secondsElapsed)
if rate <= 0 {
return 0
}
return int64(float64(totalBytes-bytesDone) / rate)
}
// RunDiff executes `restic diff --json <a> <b>` and forwards output
// as log.stream lines. No snapshot-list refresh, no stats update —
// diff is purely informational.
func (r *Runner) RunDiff(ctx context.Context, jobID, snapshotA, snapshotB string) error {
startedAt := time.Now().UTC()
r.sendStarted(jobID, api.JobDiff, startedAt)
env := r.resticEnv()
var seq atomic.Int64
err := env.RunDiff(ctx, snapshotA, snapshotB, r.streamHandler(jobID, &seq))
finishedAt := time.Now().UTC()
r.sendFinished(ctx, jobID, finishedAt, err, nil)
if err != nil {
return fmt.Errorf("runner diff: %w", err)
}
return nil
}
// RunUnlock executes a `restic unlock` job. On success it ships a
// repo.stats envelope with LockPresent=false so the UI banner clears.
func (r *Runner) RunUnlock(ctx context.Context, jobID string) error {
@@ -366,7 +476,7 @@ func (r *Runner) RunUnlock(ctx context.Context, jobID string) error {
}
}
r.sendFinished(jobID, finishedAt, err, nil)
r.sendFinished(ctx, jobID, finishedAt, err, nil)
if err != nil {
return fmt.Errorf("runner unlock: %w", err)
+25 -3
View File
@@ -4,20 +4,42 @@ import (
"context"
"os"
"path/filepath"
"sync"
"testing"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
)
// fakeSender collects sent envelopes for assertions.
type fakeSender struct{ envs []api.Envelope }
// fakeSender collects sent envelopes for assertions. Lock-protected
// because the runner's pumpStdout / pumpStderr goroutines call Send
// concurrently — without the mutex, -race in CI flags every test
// that exercises a Run* method with both pumps active.
type fakeSender struct {
mu sync.Mutex
envs []api.Envelope
}
func (s *fakeSender) Send(e api.Envelope) error {
s.mu.Lock()
s.envs = append(s.envs, e)
s.mu.Unlock()
return nil
}
// snapshot returns a copy of the captured envelopes safe to read
// without holding the lock. Tests use this when iterating envs while
// other goroutines may still be writing — though in practice all
// runner Run* methods join their pumps before returning, so callers
// can also read .envs directly post-return.
func (s *fakeSender) snapshot() []api.Envelope {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]api.Envelope, len(s.envs))
copy(out, s.envs)
return out
}
// setupScript writes a shell script (without shebang) to a temp dir,
// names it "restic", makes it executable, and returns the path.
//
@@ -320,7 +342,7 @@ esac
// still produces job.started and job.finished envelopes.
func TestRunInitShipsStartedAndFinished(t *testing.T) {
t.Parallel()
bin := setupScript(t, `echo "initialized repository"`)
bin := setupScript(t, `echo "initialised repository"`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
if err := r.RunInit(context.Background(), "job-init"); err != nil {
+1 -1
View File
@@ -110,7 +110,7 @@ func (s *Scheduler) Apply(payload api.ScheduleSetPayload, tx Sender) {
"received", len(payload.Schedules), "active", added)
// Ack outside the lock — Send() shouldn't take long, but holding
// s.mu across an external call would needlessly serialize other
// s.mu across an external call would needlessly serialise other
// callers (e.g. a future Status() inspection from the UI).
ackEnv, err := api.Marshal(api.MsgScheduleAck, "", api.ScheduleAckPayload{
Version: payload.Version,
+1 -1
View File
@@ -21,7 +21,7 @@ import (
// additionalData binds ciphertexts to the agent-secrets context, so a
// blob lifted from one role's file can't be replayed into another's
// row in some unrelated table that uses the same key. (Defense in
// row in some unrelated table that uses the same key. (Defence in
// depth — the key is per-host today, but cheap to be careful.)
const additionalData = "rm-agent-repo-creds-v1"
+1 -1
View File
@@ -76,5 +76,5 @@ func detectResticVersion(ctx context.Context, override string) (string, error) {
if len(parts) >= 2 && parts[0] == "restic" {
return parts[1], nil
}
return "", fmt.Errorf("sysinfo: unrecognized restic version output: %q", first)
return "", fmt.Errorf("sysinfo: unrecognised restic version output: %q", first)
}
+1 -1
View File
@@ -40,7 +40,7 @@ type Config struct {
// Sender is what handlers use to push agent → server messages
// (job.progress, job.finished, log.stream, command.result, …).
// Returned by the WS client to the dispatch handler. Write operations
// serialize behind a single mutex on the conn; concurrent calls are
// serialise behind a single mutex on the conn; concurrent calls are
// safe.
type Sender interface {
Send(env api.Envelope) error
+73 -7
View File
@@ -52,14 +52,17 @@ type JobKind string
// Allowed JobKind values. backup is operator/cron driven; init runs
// once per host on first connect; forget/prune/check fire from the
// server-side maintenance ticker; unlock is operator-only.
// server-side maintenance ticker; unlock and restore are operator-
// only; diff is operator-only and read-only.
const (
JobBackup JobKind = "backup"
JobInit JobKind = "init"
JobForget JobKind = "forget"
JobPrune JobKind = "prune"
JobCheck JobKind = "check"
JobUnlock JobKind = "unlock"
JobBackup JobKind = "backup"
JobInit JobKind = "init"
JobForget JobKind = "forget"
JobPrune JobKind = "prune"
JobCheck JobKind = "check"
JobUnlock JobKind = "unlock"
JobRestore JobKind = "restore"
JobDiff JobKind = "diff"
)
// JobStatus is the lifecycle state of a job.
@@ -143,6 +146,35 @@ type CommandRunPayload struct {
// just executes whatever is here.
PreHook string `json:"pre_hook,omitempty"`
PostHook string `json:"post_hook,omitempty"`
// Restore is populated only for kind=restore. See RestorePayload
// for the shape; nil for every other kind.
Restore *RestorePayload `json:"restore,omitempty"`
// Diff is populated only for kind=diff. See DiffPayload for
// shape; nil for every other kind.
Diff *DiffPayload `json:"diff,omitempty"`
}
// RestorePayload carries restore-specific arguments on a JobRestore
// command.run. Paths are absolute paths inside the snapshot (same
// shape restic accepts as positional args). When InPlace is true the
// agent restores at root (`--target /`) and preserves uid/gid/mode;
// otherwise it restores into TargetDir with --no-ownership so the
// operator can inspect the files as the agent user.
type RestorePayload struct {
SnapshotID string `json:"snapshot_id"`
Paths []string `json:"paths"`
InPlace bool `json:"in_place"`
TargetDir string `json:"target_dir,omitempty"` // ignored when in_place=true
}
// DiffPayload carries snapshot-diff arguments on a JobDiff command.run.
// SnapshotA / SnapshotB may be either short or long IDs; restic
// accepts both.
type DiffPayload struct {
SnapshotA string `json:"snapshot_a"`
SnapshotB string `json:"snapshot_b"`
}
// CommandCancelPayload is the server → agent cancel signal.
@@ -337,3 +369,37 @@ type AgentUpdateAvailablePayload struct {
PackageURL string `json:"package_url"` // apt repo / choco source
Changelog string `json:"changelog,omitempty"`
}
// TreeListRequestPayload is the body of a tree.list RPC. Used by the
// restore wizard to lazy-load directory contents from a snapshot.
//
// The exchange is synchronous: the server marshals MsgTreeList with a
// fresh Envelope.ID, sends to the agent, blocks on a channel keyed by
// that ID. The agent runs `restic ls --json <SnapshotID> <Path>`,
// emits direct children, and replies with MsgTreeListResult carrying
// the same ID. The server-side handler matches on ID and forwards to
// the waiting channel. See internal/server/ws/rpc.go for the helper.
type TreeListRequestPayload struct {
SnapshotID string `json:"snapshot_id"`
Path string `json:"path"` // absolute path inside the snapshot, "/" for root
}
// TreeListEntry is one direct child returned by a tree.list call.
// Type is "dir" | "file" | "symlink"; size is best-effort (zero on
// directories and symlinks).
type TreeListEntry struct {
Name string `json:"name"`
Type string `json:"type"`
Size int64 `json:"size,omitempty"`
}
// TreeListResultPayload is the reply to a tree.list. Error is set
// when the agent couldn't fulfil the request (missing snapshot,
// path doesn't exist, restic invocation failed); Entries is empty in
// that case. A successful empty directory has Error="" + nil Entries.
type TreeListResultPayload struct {
SnapshotID string `json:"snapshot_id"`
Path string `json:"path"`
Entries []TreeListEntry `json:"entries,omitempty"`
Error string `json:"error,omitempty"`
}
+15 -13
View File
@@ -12,18 +12,19 @@ type MessageType string
// Agent → server message types.
const (
MsgHello MessageType = "hello"
MsgHeartbeat MessageType = "heartbeat"
MsgJobStarted MessageType = "job.started"
MsgJobProgress MessageType = "job.progress"
MsgJobFinished MessageType = "job.finished"
MsgSnapshotsRpt MessageType = "snapshots.report"
MsgRepoStats MessageType = "repo.stats"
MsgLogStream MessageType = "log.stream"
MsgScheduleAck MessageType = "schedule.ack"
MsgScheduleFire MessageType = "schedule.fire" // agent: a local cron entry fired, please dispatch a job
MsgCommandResult MessageType = "command.result" // ack for command.run
MsgError MessageType = "error"
MsgHello MessageType = "hello"
MsgHeartbeat MessageType = "heartbeat"
MsgJobStarted MessageType = "job.started"
MsgJobProgress MessageType = "job.progress"
MsgJobFinished MessageType = "job.finished"
MsgSnapshotsRpt MessageType = "snapshots.report"
MsgRepoStats MessageType = "repo.stats"
MsgLogStream MessageType = "log.stream"
MsgScheduleAck MessageType = "schedule.ack"
MsgScheduleFire MessageType = "schedule.fire" // agent: a local cron entry fired, please dispatch a job
MsgCommandResult MessageType = "command.result" // ack for command.run
MsgTreeListResult MessageType = "tree.list.result" // reply to a server-driven tree.list
MsgError MessageType = "error"
)
// Server → agent message types.
@@ -33,6 +34,7 @@ const (
MsgScheduleSet MessageType = "schedule.set"
MsgConfigUpdate MessageType = "config.update"
MsgAgentUpdateAvail MessageType = "agent.update.available"
MsgTreeList MessageType = "tree.list" // sync RPC: list a snapshot's children
)
// Envelope is the framing for every WS message in either direction.
@@ -76,7 +78,7 @@ type ErrorCode string
const (
ErrProtocolTooOld ErrorCode = "protocol_too_old"
ErrProtocolTooNew ErrorCode = "protocol_too_new"
ErrUnauthorized ErrorCode = "unauthorized"
ErrUnauthorized ErrorCode = "unauthorised"
ErrBadRequest ErrorCode = "bad_request"
ErrInternal ErrorCode = "internal"
)
+1 -1
View File
@@ -56,7 +56,7 @@ func VerifyPassword(encoded, password string) error {
parts := strings.Split(encoded, "$")
// "$argon2id$v=...$m=...,t=...,p=...$<salt>$<hash>" → 6 parts (leading empty)
if len(parts) != 6 || parts[1] != "argon2id" {
return errors.New("auth: unrecognized hash format")
return errors.New("auth: unrecognised hash format")
}
var version int
if _, err := fmt.Sscanf(parts[2], "v=%d", &version); err != nil {
+1 -1
View File
@@ -2,7 +2,7 @@
// passwords, REST-server credentials, hook bodies, and any other
// secret that lands in the SQLite store.
//
// The threat model is "defense in depth against a stolen DB file" —
// The threat model is "defence in depth against a stolen DB file" —
// not "an attacker with code execution can't read secrets at runtime."
// We need the encryption key at runtime to do any actual work, so
// anyone with a memory dump of the running server can extract it.
+7
View File
@@ -0,0 +1,7 @@
//go:build !windows
package restic
import "syscall"
var sigterm = syscall.SIGTERM
+12
View File
@@ -0,0 +1,12 @@
//go:build windows
package restic
import "os"
// Windows has no SIGTERM. The closest equivalent is os.Interrupt
// (CTRL_BREAK_EVENT), but Go's exec.Cmd.Process.Signal() on Windows
// only supports os.Kill — sending anything else returns an error and
// no signal is delivered. Fall back to os.Kill so Cancel still works
// (immediate force-kill); WaitDelay is unused but harmless.
var sigterm = os.Kill
+140
View File
@@ -0,0 +1,140 @@
package restic
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os/exec"
"path"
"strings"
)
// LsEntry is one node from `restic ls --json`. Restic emits these as
// line-delimited JSON; we keep only the fields the restore wizard
// needs.
type LsEntry struct {
Name string `json:"name"`
Type string `json:"type"`
Path string `json:"path"`
Size int64 `json:"size,omitempty"`
Struct string `json:"struct_type,omitempty"`
}
// ListTreeChildren runs `restic ls --json <snapshot> <dirPath>` and
// returns only the direct children of dirPath. Restic ls is recursive
// by default, so we filter post-hoc — for a typical interactive
// drill-down ("expand /etc/nginx") the subtree is small (a few KB of
// JSON); for huge subtrees this is suboptimal but correct.
//
// The first emitted line is restic's "snapshot" preamble (struct_type
// = "snapshot") which we discard. Subsequent lines are nodes; we
// match on path equal to dirPath + "/" + name (with normalisation so
// trailing slashes don't break the comparison).
//
// dirPath="" or "/" lists the snapshot root.
func (e Env) ListTreeChildren(ctx context.Context, snapshotID, dirPath string) ([]LsEntry, error) {
if snapshotID == "" {
return nil, fmt.Errorf("restic ls: snapshot id required")
}
parent := normalizeTreePath(dirPath)
args := []string{"ls", "--json", snapshotID}
if parent != "/" {
args = append(args, parent)
}
cmd := e.resticCmd(ctx, args...)
var stderr bytes.Buffer
cmd.Stderr = &stderr
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("restic ls: stdout pipe: %w", err)
}
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("restic ls: start: %w", err)
}
out, parseErr := parseLsChildren(stdout, parent)
werr := cmd.Wait()
if werr != nil {
var ee *exec.ExitError
if errors.As(werr, &ee) {
return nil, fmt.Errorf("restic ls: exit %d: %s",
ee.ExitCode(), strings.TrimSpace(stderr.String()))
}
return nil, fmt.Errorf("restic ls: %w", werr)
}
if parseErr != nil {
return nil, parseErr
}
return out, nil
}
// parseLsChildren reads line-delimited JSON from r and returns nodes
// whose Path is a direct child of parent. Exposed for testing.
func parseLsChildren(r io.Reader, parent string) ([]LsEntry, error) {
scanner := bufio.NewScanner(r)
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
var out []LsEntry
for scanner.Scan() {
line := scanner.Bytes()
if len(line) == 0 {
continue
}
var entry LsEntry
if err := json.Unmarshal(line, &entry); err != nil {
return nil, fmt.Errorf("restic ls: parse line: %w", err)
}
// Skip the snapshot preamble and any future struct_type
// entries we don't care about.
if entry.Struct == "snapshot" || entry.Path == "" {
continue
}
if isDirectChild(entry.Path, parent) {
out = append(out, entry)
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("restic ls: read output: %w", err)
}
return out, nil
}
// normalizeTreePath turns "" / "/" / "/etc/" / "etc" all into a
// canonical absolute form with a leading slash and no trailing slash
// (except the root, which is "/" alone).
func normalizeTreePath(p string) string {
p = strings.TrimSpace(p)
if p == "" || p == "/" {
return "/"
}
if !strings.HasPrefix(p, "/") {
p = "/" + p
}
cleaned := path.Clean(p)
return cleaned
}
// isDirectChild reports whether childPath is a direct child of parent.
// "/etc/nginx" is a direct child of "/etc"; "/etc/nginx/conf" is not.
// "/etc" is a direct child of "/".
func isDirectChild(childPath, parent string) bool {
cp := normalizeTreePath(childPath)
pp := normalizeTreePath(parent)
if pp == "/" {
// Direct children of root: exactly one slash-delimited segment.
return cp != "/" && strings.Count(cp, "/") == 1
}
// Must start with parent + "/" and have no further slashes.
prefix := pp + "/"
if !strings.HasPrefix(cp, prefix) {
return false
}
rest := cp[len(prefix):]
return rest != "" && !strings.Contains(rest, "/")
}
+123
View File
@@ -0,0 +1,123 @@
package restic
import (
"strings"
"testing"
)
// realistic restic ls --json output sample. First line is the
// snapshot preamble, subsequent lines are nodes. Trimmed to a few
// entries that exercise depth filtering.
const sampleLsOutput = `{"struct_type":"snapshot","time":"2026-05-04T09:14:00Z","id":"f3a7b2c1"}
{"name":"etc","type":"dir","path":"/etc","permissions":"drwxr-xr-x","struct_type":"node"}
{"name":"nginx","type":"dir","path":"/etc/nginx","permissions":"drwxr-xr-x","struct_type":"node"}
{"name":"nginx.conf","type":"file","path":"/etc/nginx/nginx.conf","size":2400,"struct_type":"node"}
{"name":"sites-available","type":"dir","path":"/etc/nginx/sites-available","struct_type":"node"}
{"name":"alfa.conf","type":"file","path":"/etc/nginx/sites-available/alfa.conf","size":3100,"struct_type":"node"}
{"name":"default.conf","type":"file","path":"/etc/nginx/sites-available/default.conf","size":2900,"struct_type":"node"}
`
func TestParseLsChildrenAtRoot(t *testing.T) {
t.Parallel()
entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/")
if err != nil {
t.Fatalf("parse: %v", err)
}
if len(entries) != 1 {
t.Fatalf("entries: got %d (%+v), want 1", len(entries), entries)
}
if entries[0].Name != "etc" || entries[0].Path != "/etc" || entries[0].Type != "dir" {
t.Fatalf("entry: %+v", entries[0])
}
}
func TestParseLsChildrenAtEtc(t *testing.T) {
t.Parallel()
entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/etc")
if err != nil {
t.Fatalf("parse: %v", err)
}
if len(entries) != 1 {
t.Fatalf("entries: got %d, want 1 (just nginx, not nested children)", len(entries))
}
if entries[0].Name != "nginx" {
t.Fatalf("entry: %+v", entries[0])
}
}
func TestParseLsChildrenAtNginx(t *testing.T) {
t.Parallel()
entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/etc/nginx")
if err != nil {
t.Fatalf("parse: %v", err)
}
if len(entries) != 2 {
t.Fatalf("entries: got %d (%+v), want 2 (nginx.conf + sites-available, not nested)",
len(entries), entries)
}
gotNames := []string{entries[0].Name, entries[1].Name}
want := map[string]bool{"nginx.conf": true, "sites-available": true}
for _, n := range gotNames {
if !want[n] {
t.Errorf("unexpected name %q in result", n)
}
}
}
func TestParseLsChildrenAtSitesAvailable(t *testing.T) {
t.Parallel()
entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/etc/nginx/sites-available")
if err != nil {
t.Fatalf("parse: %v", err)
}
if len(entries) != 2 {
t.Fatalf("entries: got %d, want 2", len(entries))
}
for _, e := range entries {
if e.Type != "file" {
t.Errorf("expected file type, got %q on %q", e.Type, e.Name)
}
}
}
func TestNormalizeTreePath(t *testing.T) {
t.Parallel()
cases := []struct{ in, want string }{
{"", "/"},
{"/", "/"},
{"/etc", "/etc"},
{"/etc/", "/etc"},
{"etc/nginx", "/etc/nginx"},
{"/etc//nginx", "/etc/nginx"},
{"/etc/./nginx", "/etc/nginx"},
}
for _, c := range cases {
got := normalizeTreePath(c.in)
if got != c.want {
t.Errorf("normalizeTreePath(%q): got %q, want %q", c.in, got, c.want)
}
}
}
func TestIsDirectChild(t *testing.T) {
t.Parallel()
cases := []struct {
child, parent string
want bool
}{
{"/etc", "/", true},
{"/etc/nginx", "/", false},
{"/etc/nginx", "/etc", true},
{"/etc/nginx/conf", "/etc", false},
{"/etc/nginx/conf", "/etc/nginx", true},
{"/etc", "/etc", false},
{"/etcc", "/etc", false}, // prefix match guard
}
for _, c := range cases {
got := isDirectChild(c.child, c.parent)
if got != c.want {
t.Errorf("isDirectChild(%q, %q): got %v, want %v",
c.child, c.parent, got, c.want)
}
}
}
+271
View File
@@ -0,0 +1,271 @@
package restic
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
)
// RestoreStatus mirrors the JSON `status` lines `restic restore --json`
// emits while restoring. Field names track restic's wire format; we
// project a subset (the rest are cosmetic).
type RestoreStatus struct {
MessageType string `json:"message_type"`
SecondsElapsed int64 `json:"seconds_elapsed"`
PercentDone float64 `json:"percent_done"`
TotalFiles int64 `json:"total_files"`
FilesRestored int64 `json:"files_restored"`
FilesSkipped int64 `json:"files_skipped"`
TotalBytes int64 `json:"total_bytes"`
BytesRestored int64 `json:"bytes_restored"`
BytesSkipped int64 `json:"bytes_skipped"`
}
// RestoreSummary is the final summary line emitted after a successful
// restore. Newer restic prints it; older clients leave us with no
// summary, in which case the agent skips the stats and the live UI
// just sees percent reach 100%.
type RestoreSummary struct {
MessageType string `json:"message_type"`
SecondsElapsed int64 `json:"seconds_elapsed"`
TotalFiles int64 `json:"total_files"`
FilesRestored int64 `json:"files_restored"`
FilesSkipped int64 `json:"files_skipped"`
TotalBytes int64 `json:"total_bytes"`
BytesRestored int64 `json:"bytes_restored"`
BytesSkipped int64 `json:"bytes_skipped"`
}
// RunRestore executes `restic restore <snapshotID> --target <dir>
// [--include <p>...]` with --json and pumps progress events into
// handle. paths is the operator-selected list (each becomes an
// `--include` flag); preserveOwner controls --no-ownership.
//
// inPlace toggles target semantics:
// - true → target is "/" and ownership is preserved
// - false → target is targetDir and --no-ownership is passed
//
// targetDir is created on demand by restic itself.
func (e Env) RunRestore(ctx context.Context, snapshotID string, paths []string, inPlace bool, targetDir string, handle LineHandler) (*RestoreSummary, error) {
if snapshotID == "" {
return nil, fmt.Errorf("restic restore: snapshot id required")
}
if !inPlace && targetDir == "" {
return nil, fmt.Errorf("restic restore: target dir required for non-in-place restore")
}
args := []string{"restore", "--json", snapshotID}
target := targetDir
if inPlace {
target = "/"
} else {
// Expand $HOME / ${HOME} / leading ~/ in the operator-supplied
// path, using the agent's own HOME (typically /root for the
// User=root unit). The expansion runs agent-side so the
// operator can specify a portable default like
// $HOME/rm-restore/<job-id>/ in the wizard without the server
// needing to know which user the agent runs as.
target = expandHome(target)
// Ensure the target directory exists. Restic itself creates
// missing leaves but won't traverse multiple missing levels
// (and we don't want the operator to have to pre-create the
// per-job subdir). 0700 keeps the data root-only — the agent
// runs as root, and operators who want a different mode can
// chmod after the fact. If MkdirAll fails (operator typed a
// path inside a read-only sandbox mount, ENOSPC, etc.) we
// surface a clean error rather than letting restic fail with
// something cryptic.
if err := os.MkdirAll(target, 0o700); err != nil {
return nil, fmt.Errorf("restic restore: prepare target %q: %w", target, err)
}
}
args = append(args, "--target", target)
// --no-ownership was added in restic 0.17. Older versions reject
// the flag with "unknown flag: --no-ownership". For new-dir
// restores we want the files owned by the agent user (operator
// can cp them without juggling chown), so pass the flag iff the
// running restic supports it. In-place restores always preserve
// ownership — that's the whole point of in-place.
if !inPlace && e.AtLeastVersion(0, 17) {
args = append(args, "--no-ownership")
}
for _, p := range paths {
args = append(args, "--include", p)
}
cmd := e.resticCmd(ctx, args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("restic restore: stdout pipe: %w", err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, fmt.Errorf("restic restore: stderr pipe: %w", err)
}
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("restic restore: start: %w", err)
}
var summary *RestoreSummary
done := make(chan error, 2)
go func() { done <- pumpRestoreStdout(stdout, handle, &summary) }()
go func() { done <- pumpStderr(stderr, handle) }()
for i := 0; i < 2; i++ {
if err := <-done; err != nil && handle != nil {
handle("event", fmt.Sprintf("pump error: %v", err), nil)
}
}
werr := cmd.Wait()
if werr != nil {
var ee *exec.ExitError
if errors.As(werr, &ee) {
return summary, fmt.Errorf("restic restore: exit %d", ee.ExitCode())
}
return summary, fmt.Errorf("restic restore: %w", werr)
}
return summary, nil
}
// pumpRestoreStdout is the restore variant of pumpStdout: it emits
// `event` lines for the parsed status/summary objects (so the runner
// can shape them into job.progress) and forwards everything else as
// stdout — but unlike backup we include the raw status JSON in
// log.stream too because restore is short and the live log audience
// genuinely benefits from the per-file traffic. Actually — we mirror
// backup's behaviour and DROP raw status lines from log.stream
// (they'd drown the log on a fast restore); the progress envelope
// covers them.
func pumpRestoreStdout(r io.Reader, handle LineHandler, summary **RestoreSummary) error {
scanner := bufio.NewScanner(r)
scanner.Buffer(make([]byte, 0, 64*1024), 4*1024*1024)
for scanner.Scan() {
line := scanner.Text()
if handle == nil {
continue
}
if !strings.HasPrefix(line, "{") {
handle("stdout", line, nil)
continue
}
var probe struct {
MessageType string `json:"message_type"`
}
if err := json.Unmarshal([]byte(line), &probe); err != nil {
handle("stdout", line, nil)
continue
}
switch probe.MessageType {
case "status":
var ev RestoreStatus
if json.Unmarshal([]byte(line), &ev) == nil {
// Don't tee status lines to log.stream — too chatty.
handle("event", line, ev)
continue
}
case "summary":
var ev RestoreSummary
if json.Unmarshal([]byte(line), &ev) == nil {
if summary != nil {
s := ev
*summary = &s
}
handle("event", line, ev)
continue
}
case "verbose_status":
handle("event", line, nil)
continue
}
handle("stdout", line, nil)
}
return scanner.Err()
}
// expandHome rewrites $HOME, ${HOME}, or a leading ~/ in p to the
// agent process's home directory. Other env-var references are left
// untouched on purpose (operator-supplied paths shouldn't be able to
// pick up arbitrary agent env values like $PATH or $RESTIC_PASSWORD).
// Returns p unchanged if HOME can't be resolved.
func expandHome(p string) string {
if p == "" {
return p
}
home, err := os.UserHomeDir()
if err != nil || home == "" {
return p
}
switch {
case strings.HasPrefix(p, "$HOME/"):
return filepath.Join(home, p[len("$HOME/"):])
case p == "$HOME":
return home
case strings.HasPrefix(p, "${HOME}/"):
return filepath.Join(home, p[len("${HOME}/"):])
case p == "${HOME}":
return home
case strings.HasPrefix(p, "~/"):
return filepath.Join(home, p[2:])
case p == "~":
return home
}
return p
}
// RunDiff executes `restic diff --json <a> <b>` and forwards every
// line to handle as stdout. Restic emits per-line "change" objects
// plus a final "statistics" object; we don't parse them server-side —
// the operator reads the raw output on the live job log page.
func (e Env) RunDiff(ctx context.Context, snapshotA, snapshotB string, handle LineHandler) error {
if snapshotA == "" || snapshotB == "" {
return fmt.Errorf("restic diff: two snapshot ids required")
}
cmd := e.resticCmd(ctx, "diff", "--json", snapshotA, snapshotB)
stdout, err := cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("restic diff: stdout pipe: %w", err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
return fmt.Errorf("restic diff: stderr pipe: %w", err)
}
if err := cmd.Start(); err != nil {
return fmt.Errorf("restic diff: start: %w", err)
}
done := make(chan error, 2)
// diff output isn't huge; pumpStderr-ish line-by-line forwarding
// is fine.
go func() {
s := bufio.NewScanner(stdout)
s.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for s.Scan() {
if handle != nil {
handle("stdout", s.Text(), nil)
}
}
done <- s.Err()
}()
go func() { done <- pumpStderr(stderr, handle) }()
for i := 0; i < 2; i++ {
if err := <-done; err != nil && handle != nil {
handle("event", fmt.Sprintf("pump error: %v", err), nil)
}
}
werr := cmd.Wait()
if werr != nil {
var ee *exec.ExitError
if errors.As(werr, &ee) {
return fmt.Errorf("restic diff: exit %d", ee.ExitCode())
}
return fmt.Errorf("restic diff: %w", werr)
}
return nil
}
+67 -8
View File
@@ -15,7 +15,7 @@ import (
"time"
)
// Locate resolves the path to the restic binary. Honor an explicit
// Locate resolves the path to the restic binary. Honour an explicit
// override if provided, else fall back to PATH.
func Locate(override string) (string, error) {
if override != "" {
@@ -42,6 +42,7 @@ func Locate(override string) (string, error) {
// in this package ever needs to *log* a URL, use RedactURL.
type Env struct {
Bin string // path to restic binary
Version string // e.g. "0.17.1"; empty if unknown
RepoURL string // RESTIC_REPOSITORY (no embedded creds)
RepoUsername string // optional HTTP basic-auth user for rest: URLs
RepoPassword string // doubles as RESTIC_PASSWORD and (for rest:) HTTP basic-auth password
@@ -55,6 +56,45 @@ type Env struct {
LimitDownloadKBps int
}
// AtLeastVersion reports whether e.Version >= the given major/minor.
// Comparison is best-effort: empty / unparseable versions return false
// (callers stay on the conservative path). Patch level is ignored.
func (e Env) AtLeastVersion(major, minor int) bool {
v := strings.TrimSpace(e.Version)
if v == "" {
return false
}
parts := strings.SplitN(v, ".", 3)
if len(parts) < 2 {
return false
}
maj, err1 := atoi(parts[0])
min, err2 := atoi(parts[1])
if err1 != nil || err2 != nil {
return false
}
if maj != major {
return maj > major
}
return min >= minor
}
// atoi is strconv.Atoi without dragging the import into a file that
// only needs it for one helper.
func atoi(s string) (int, error) {
n := 0
if len(s) == 0 {
return 0, fmt.Errorf("empty")
}
for _, r := range s {
if r < '0' || r > '9' {
return 0, fmt.Errorf("not a digit: %q", r)
}
n = n*10 + int(r-'0')
}
return n, nil
}
// globalArgs returns restic's pre-subcommand global flags derived
// from the Env. Currently just bandwidth caps.
func (e Env) globalArgs() []string {
@@ -69,14 +109,33 @@ func (e Env) globalArgs() []string {
}
// resticCmd builds an exec.Cmd with bandwidth-limit globals prefixed
// before the supplied subcommand args. Centralizing this so every
// command (backup/forget/prune/check/unlock/init/stats) honors
// before the supplied subcommand args. Centralising this so every
// command (backup/forget/prune/check/unlock/init/stats) honours
// the caps without each call site having to remember.
//
// Cancellation: by default exec.CommandContext sends SIGKILL when
// ctx is canceled, which leaves restic no chance to clean up its
// repository lock. Override Cmd.Cancel to send SIGTERM first, and
// set Cmd.WaitDelay so the process is force-killed if it doesn't
// exit within five seconds. Restic responds to SIGTERM by removing
// its lock file before exiting, which is what we want when an
// operator cancels a long-running backup/restore from the UI.
func (e Env) resticCmd(ctx context.Context, sub ...string) *exec.Cmd {
args := append(e.globalArgs(), sub...)
cmd := exec.CommandContext(ctx, e.Bin, args...)
cmd.Env = e.envSlice()
cmd.Dir = e.WorkDir
cmd.Cancel = func() error {
// Cmd.Process is set after Start; Cancel only fires post-Start
// so the nil check is defensive against the documented but
// unlikely race. Signal returns ErrProcessDone if the process
// already exited; that's not a problem here either.
if cmd.Process == nil {
return nil
}
return cmd.Process.Signal(sigterm)
}
cmd.WaitDelay = 5 * time.Second
return cmd
}
@@ -123,7 +182,7 @@ type BackupSummary struct {
}
// LineHandler receives every stdout/stderr line. event is non-nil
// when the line is a recognized JSON status; raw always carries the
// when the line is a recognised JSON status; raw always carries the
// original text (so we can also tee to job_logs as `stdout`).
type LineHandler func(stream string, raw string, event any)
@@ -263,7 +322,7 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
// Sniff for "config file already exists" on stderr; if we see it
// we'll treat the non-zero exit as a soft success — running init
// against an already-initialized repo is a no-op semantically,
// against an already-initialised repo is a no-op semantically,
// not a failure. Wraps the caller's handle so the line still
// gets streamed verbatim to the operator-facing log.
alreadyInited := false
@@ -279,7 +338,7 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
if err := runWithPump(cmd, sniff); err != nil {
if alreadyInited {
if handle != nil {
handle("event", "repo already initialized — treating as success", nil)
handle("event", "repo already initialised — treating as success", nil)
}
return nil
}
@@ -375,7 +434,7 @@ func (e Env) RunStats(ctx context.Context, handle LineHandler) (*RepoStats, erro
return out, nil
}
// CheckResult summarizes a `restic check` invocation. LockPresent is
// CheckResult summarises a `restic check` invocation. LockPresent is
// true if the stderr stream contained a stale-lock signal (caller is
// expected to surface this in the UI so the operator can run unlock).
// ErrorsFound is true if check exited with a non-zero status (errors
@@ -387,7 +446,7 @@ type CheckResult struct {
// RunCheck executes `restic check` with optional --read-data-subset.
// subsetPct of 0 omits the flag (full data check); >0 passes
// --read-data-subset N%. Returns a CheckResult summarizing what was
// --read-data-subset N%. Returns a CheckResult summarising what was
// sniffed from stderr; the result is set even if check itself
// returns an error (so the caller can persist last_check_status).
func (e Env) RunCheck(ctx context.Context, subsetPct int, handle LineHandler) (CheckResult, error) {
+5 -3
View File
@@ -13,9 +13,11 @@ import (
// decode only the fields we project to the server; restic's full
// shape has more (parent, tree, program version) that we don't need.
//
// Summary is only populated by restic 0.16+ (which embeds the backup
// summary inside each snapshot). Older clients leave it nil and the
// agent reports zero size/file-count — the UI degrades to "—".
// Summary is only populated by restic 0.17+ (which embeds the backup
// summary inside each snapshot record). Older clients leave it nil
// and the agent reports zero size/file-count — the UI degrades to
// "—" and the column headers carry a tooltip explaining the version
// requirement (see web/templates/pages/host_detail.html).
type Snapshot struct {
ID string `json:"id"`
ShortID string `json:"short_id"`
+64
View File
@@ -0,0 +1,64 @@
package restic
import (
"path/filepath"
"testing"
)
func TestEnvAtLeastVersion(t *testing.T) {
t.Parallel()
cases := []struct {
ver string
major int
minor int
want bool
shortDesc string
}{
{"0.17.0", 0, 17, true, "exact match"},
{"0.17.1", 0, 17, true, "patch above"},
{"0.18.0", 0, 17, true, "minor above"},
{"1.0.0", 0, 17, true, "major above"},
{"0.16.4", 0, 17, false, "minor below"},
{"0.16", 0, 17, false, "two-part minor below"},
{"", 0, 17, false, "empty"},
{"v0.17", 0, 17, false, "prefixed v rejected"},
{"unknown", 0, 17, false, "non-numeric rejected"},
}
for _, c := range cases {
got := Env{Version: c.ver}.AtLeastVersion(c.major, c.minor)
if got != c.want {
t.Errorf("AtLeastVersion(%q, %d, %d): got %v want %v · %s",
c.ver, c.major, c.minor, got, c.want, c.shortDesc)
}
}
}
func TestExpandHome(t *testing.T) {
// Not parallel — t.Setenv on HOME would race with sibling tests.
tmp := t.TempDir()
t.Setenv("HOME", tmp)
cases := []struct {
in, want string
}{
{"$HOME/rm-restore/job-1/", filepath.Join(tmp, "rm-restore/job-1")},
{"${HOME}/rm-restore/job-2/", filepath.Join(tmp, "rm-restore/job-2")},
{"~/rm-restore/job-3/", filepath.Join(tmp, "rm-restore/job-3")},
{"$HOME", tmp},
{"~", tmp},
{"/var/lib/x/y", "/var/lib/x/y"}, // absolute path passes through
{"", ""},
{"$PATH/foo", "$PATH/foo"}, // other env vars not expanded
}
for _, c := range cases {
got := expandHome(c.in)
if got != c.want {
t.Errorf("expandHome(%q): got %q want %q", c.in, got, c.want)
}
}
// Sanity: an absolute path always passes through regardless of HOME.
if got := expandHome("/abs"); got != "/abs" {
t.Errorf("expandHome(/abs): got %q", got)
}
}
+1 -1
View File
@@ -57,7 +57,7 @@ func (s *Server) handleAgentBinary(w stdhttp.ResponseWriter, r *stdhttp.Request)
}
func (s *Server) handleInstallAsset(w stdhttp.ResponseWriter, r *stdhttp.Request) {
// chi's TrimPrefix-like behavior: r.URL.Path is "/install/<file>".
// chi's TrimPrefix-like behaviour: r.URL.Path is "/install/<file>".
rel := strings.TrimPrefix(r.URL.Path, "/install/")
// Reject any path traversal — must be a flat filename.
if rel == "" || strings.ContainsAny(rel, "/\\") {
+2 -2
View File
@@ -133,7 +133,7 @@ func (s *Server) handleAnnounce(w stdhttp.ResponseWriter, r *stdhttp.Request) {
keyBytes, err := base64.StdEncoding.DecodeString(req.PublicKey)
if err != nil {
// Try URL-safe / no-padding flavors before giving up.
// Try URL-safe / no-padding flavours before giving up.
if k2, e2 := base64.RawStdEncoding.DecodeString(req.PublicKey); e2 == nil {
keyBytes = k2
} else {
@@ -195,7 +195,7 @@ func (s *Server) handleAnnounce(w stdhttp.ResponseWriter, r *stdhttp.Request) {
// remoteIP returns r.RemoteAddr stripped of any :port suffix, plus
// the X-Forwarded-For chain's first hop when behind a trusted proxy
// (RM_TRUSTED_PROXY in the deployment doc). Trust-proxy lookup
// matches the framework's existing behavior elsewhere.
// matches the framework's existing behaviour elsewhere.
func remoteIP(r *stdhttp.Request) string {
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
// Take the first IP in the chain (closest to the original
+1 -1
View File
@@ -137,7 +137,7 @@ func (s *Server) handleBootstrap(w stdhttp.ResponseWriter, r *stdhttp.Request) {
return
}
if n > 0 {
writeJSONError(w, stdhttp.StatusConflict, "already_initialized",
writeJSONError(w, stdhttp.StatusConflict, "already_initialised",
"a user already exists; bootstrap is disabled")
return
}
+86
View File
@@ -0,0 +1,86 @@
package http
import (
stdhttp "net/http"
"time"
"github.com/go-chi/chi/v5"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// handleCancelJob is POST /api/jobs/{id}/cancel. Sends a command.cancel
// envelope to the host that owns the job; the agent kills the running
// restic subprocess, and the resulting job.finished envelope (status =
// canceled) is what actually transitions the job row — this handler
// does not touch the jobs table directly. Returning 202 makes that
// asynchronicity explicit.
//
// 4xx cases:
// - job not found (404)
// - job already in a terminal state (409 — nothing to cancel)
// - host offline (503 — same code path the run-now endpoint uses)
//
// Audit-logged as job.cancel with the job ID as target.
func (s *Server) handleCancelJob(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
jobID := chi.URLParam(r, "id")
if jobID == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_job_id", "")
return
}
job, err := s.deps.Store.GetJob(r.Context(), jobID)
if err != nil {
writeJSONError(w, stdhttp.StatusNotFound, "job_not_found", "")
return
}
switch api.JobStatus(job.Status) {
case api.JobSucceeded, api.JobFailed, api.JobCancelled:
writeJSONError(w, stdhttp.StatusConflict, "job_terminal",
"job is already in a terminal state ("+job.Status+")")
return
}
if !s.deps.Hub.Connected(job.HostID) {
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline",
"agent is not connected; can't deliver cancel signal")
return
}
env, err := api.Marshal(api.MsgCommandCancel, jobID, api.CommandCancelPayload{
JobID: jobID,
})
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
if err := s.deps.Hub.Send(r.Context(), job.HostID, env); err != nil {
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline", err.Error())
return
}
var actorID *string
actor := "system"
if user != nil {
actor = "user"
actorID = &user.ID
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: actorID,
Actor: actor,
Action: "job.cancel",
TargetKind: ptr("job"),
TargetID: &jobID,
TS: time.Now().UTC(),
})
w.WriteHeader(stdhttp.StatusAccepted)
}
+204
View File
@@ -0,0 +1,204 @@
// cancel_test.go — covers POST /api/jobs/{id}/cancel.
package http
import (
"context"
"encoding/json"
stdhttp "net/http"
"strings"
"testing"
"time"
"github.com/coder/websocket"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// TestCancelJobRunningHappyPath: a running job's cancel endpoint sends
// a command.cancel envelope with the right job id, returns 202, and
// writes a job.cancel audit row.
func TestCancelJobRunningHappyPath(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
hostID, token := enrolHostForWS(t, srv, st, "cancel-host")
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "cancel-host")
_ = drainUntil(t, c, api.MsgScheduleSet)
// Seed a running job we can target.
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := st.CreateJob(context.Background(), store.Job{
ID: jobID, HostID: hostID, Kind: "backup",
ActorKind: "user", CreatedAt: now,
}); err != nil {
t.Fatalf("create job: %v", err)
}
if err := st.MarkJobStarted(context.Background(), jobID, now); err != nil {
t.Fatalf("mark started: %v", err)
}
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/api/jobs/"+jobID+"/cancel", nil)
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusAccepted {
t.Fatalf("status: got %d, want 202", res.StatusCode)
}
// Read the dispatched command.cancel envelope.
deadline := time.Now().Add(2 * time.Second)
var got api.Envelope
for time.Now().Before(deadline) {
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
mt, raw, rerr := c.Read(ctx)
cancel()
if rerr != nil {
break
}
if mt != websocket.MessageText {
continue
}
if !strings.Contains(string(raw), `"command.cancel"`) {
continue
}
if err := json.Unmarshal(raw, &got); err != nil {
t.Fatalf("unmarshal: %v", err)
}
break
}
if got.Type != api.MsgCommandCancel {
t.Fatalf("never received command.cancel envelope")
}
var cp api.CommandCancelPayload
if err := got.UnmarshalPayload(&cp); err != nil {
t.Fatalf("unmarshal payload: %v", err)
}
if cp.JobID != jobID {
t.Fatalf("payload job_id: got %q want %q", cp.JobID, jobID)
}
// Audit row exists.
var n int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM audit_log WHERE action = 'job.cancel' AND target_id = ?`,
jobID).Scan(&n); err != nil {
t.Fatalf("audit count: %v", err)
}
if n != 1 {
t.Fatalf("audit rows: got %d, want 1", n)
}
}
// TestCancelJobAlreadyTerminal: a job in succeeded/failed/canceled
// state returns 409 and does NOT send a WS envelope.
func TestCancelJobAlreadyTerminal(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
hostID, token := enrolHostForWS(t, srv, st, "term-host")
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "term-host")
_ = drainUntil(t, c, api.MsgScheduleSet)
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := st.CreateJob(context.Background(), store.Job{
ID: jobID, HostID: hostID, Kind: "backup",
ActorKind: "user", CreatedAt: now,
}); err != nil {
t.Fatalf("create job: %v", err)
}
if err := st.MarkJobFinished(context.Background(), jobID, "succeeded", 0, nil, "", now); err != nil {
t.Fatalf("mark finished: %v", err)
}
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/api/jobs/"+jobID+"/cancel", nil)
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusConflict {
t.Fatalf("status: got %d, want 409", res.StatusCode)
}
// Drain — no command.cancel should arrive.
ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond)
defer cancel()
for {
mt, raw, rerr := c.Read(ctx)
if rerr != nil {
break
}
if mt == websocket.MessageText && strings.Contains(string(raw), `"command.cancel"`) {
t.Fatalf("unexpected command.cancel envelope for terminal job")
}
}
}
// TestCancelJobNotFound: 404 for a job id that doesn't exist.
func TestCancelJobNotFound(t *testing.T) {
t.Parallel()
_, ts, st := rawTestServer(t)
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/api/jobs/"+ulid.Make().String()+"/cancel", nil)
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusNotFound {
t.Fatalf("status: got %d, want 404", res.StatusCode)
}
}
// TestCancelJobHostOffline: a queued/running job whose host has no
// active WS connection returns 503.
func TestCancelJobHostOffline(t *testing.T) {
t.Parallel()
_, ts, st := rawTestServer(t)
// Create a host but don't connect a WS for it.
hostID := ulid.Make().String()
if err := st.CreateHost(context.Background(), store.Host{
ID: hostID, Name: "offline-host", OS: "linux", Arch: "amd64",
EnrolledAt: time.Now().UTC(),
}, "deadbeef", ""); err != nil {
t.Fatalf("create host: %v", err)
}
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := st.CreateJob(context.Background(), store.Job{
ID: jobID, HostID: hostID, Kind: "backup",
ActorKind: "user", CreatedAt: now,
}); err != nil {
t.Fatalf("create job: %v", err)
}
if err := st.MarkJobStarted(context.Background(), jobID, now); err != nil {
t.Fatalf("mark started: %v", err)
}
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/api/jobs/"+jobID+"/cancel", nil)
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusServiceUnavailable {
t.Fatalf("status: got %d, want 503", res.StatusCode)
}
}
+150
View File
@@ -0,0 +1,150 @@
package http
import (
"encoding/json"
stdhttp "net/http"
"strings"
"time"
"github.com/go-chi/chi/v5"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// snapshotDiffRequest is the JSON body for POST .../snapshots/diff.
// Either short or long snapshot IDs are accepted (restic's diff
// command takes both).
type snapshotDiffRequest struct {
SnapshotA string `json:"snapshot_a"`
SnapshotB string `json:"snapshot_b"`
}
// handleSnapshotDiff dispatches a JobDiff. Output streams as
// log.stream lines to the standard live job page; the operator reads
// the diff text directly there. Behaves like the run-now endpoints:
// 503 if the host is offline, 400 if the IDs are missing, 422 if
// they're not in the host's snapshot list (we don't want operators
// running diffs against arbitrary snapshot strings).
func (s *Server) handleSnapshotDiff(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
host, err := s.deps.Store.GetHost(r.Context(), hostID)
if err != nil {
writeJSONError(w, stdhttp.StatusNotFound, "host_not_found", "")
return
}
var req snapshotDiffRequest
// HTMX form posts arrive as application/x-www-form-urlencoded;
// the JSON shape is also accepted for REST callers.
ct := r.Header.Get("Content-Type")
if strings.HasPrefix(ct, "application/x-www-form-urlencoded") {
if err := r.ParseForm(); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_form", err.Error())
return
}
req.SnapshotA = strings.TrimSpace(r.PostForm.Get("snapshot_a"))
req.SnapshotB = strings.TrimSpace(r.PostForm.Get("snapshot_b"))
} else {
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
req.SnapshotA = strings.TrimSpace(req.SnapshotA)
req.SnapshotB = strings.TrimSpace(req.SnapshotB)
}
if req.SnapshotA == "" || req.SnapshotB == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_snapshot",
"snapshot_a and snapshot_b are both required")
return
}
if req.SnapshotA == req.SnapshotB {
writeJSONError(w, stdhttp.StatusUnprocessableEntity, "same_snapshot",
"diff requires two different snapshots")
return
}
// Validate the IDs are known to this host. Match on long ID, short
// ID, or any prefix match — operators sometimes paste a 6-char
// shortened form.
snaps, err := s.deps.Store.ListSnapshotsByHost(r.Context(), host.ID)
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
resolveID := func(idOrShort string) string {
for _, s := range snaps {
if s.ID == idOrShort || s.ShortID == idOrShort {
return s.ID
}
}
// Prefix fallback (operator pasted 6 chars of a long id).
for _, s := range snaps {
if strings.HasPrefix(s.ID, idOrShort) {
return s.ID
}
}
return ""
}
a := resolveID(req.SnapshotA)
b := resolveID(req.SnapshotB)
if a == "" || b == "" {
writeJSONError(w, stdhttp.StatusUnprocessableEntity, "snapshot_not_found",
"one or both snapshot ids are not in this host's snapshot list")
return
}
if !s.deps.Hub.Connected(host.ID) {
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline",
"agent is not connected; try again when it reconnects")
return
}
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := s.deps.Store.CreateJob(r.Context(), store.Job{
ID: jobID, HostID: host.ID, Kind: string(api.JobDiff),
ActorKind: "user", ActorID: &user.ID, CreatedAt: now,
}); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
JobID: jobID, Kind: api.JobDiff,
Diff: &api.DiffPayload{SnapshotA: a, SnapshotB: b},
})
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
if err := s.deps.Hub.Send(r.Context(), host.ID, env); err != nil {
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline", err.Error())
return
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &user.ID,
Actor: "user",
Action: "host.snapshot_diff",
TargetKind: ptr("host"),
TargetID: &host.ID,
TS: now,
})
jobURL := "/jobs/" + jobID
if r.Header.Get("HX-Request") == "true" {
w.Header().Set("HX-Redirect", jobURL)
w.WriteHeader(stdhttp.StatusNoContent)
return
}
writeJSON(w, stdhttp.StatusAccepted, map[string]string{
"job_id": jobID,
"job_url": jobURL,
})
}
+136
View File
@@ -0,0 +1,136 @@
// diff_test.go — covers POST /api/hosts/{id}/snapshots/diff (P3-09).
package http
import (
"context"
"encoding/json"
stdhttp "net/http"
"net/url"
"strings"
"testing"
"time"
"github.com/coder/websocket"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// TestSnapshotDiffHappyPath verifies a valid two-snapshot form ships
// a JobDiff command.run with the right payload.
func TestSnapshotDiffHappyPath(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, token := enrolHostForUI(t, srv, st, "diff-host")
a, b := seedTwoSnapshots(t, st, hostID, "diff-host")
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "diff-host")
_ = drainUntil(t, c, api.MsgScheduleSet)
cookie := loginAsAdmin(t, st)
form := url.Values{
"snapshot_a": {a},
"snapshot_b": {b},
}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/snapshots/diff",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("HX-Request", "true")
req.AddCookie(cookie)
client := &stdhttp.Client{
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
return stdhttp.ErrUseLastResponse
},
}
res, err := client.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusNoContent {
t.Fatalf("status: got %d, want 204", res.StatusCode)
}
if res.Header.Get("HX-Redirect") == "" {
t.Fatal("expected HX-Redirect to live job page")
}
deadline := time.Now().Add(2 * time.Second)
var got api.Envelope
for time.Now().Before(deadline) {
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
mt, raw, rerr := c.Read(ctx)
cancel()
if rerr != nil {
break
}
if mt != websocket.MessageText {
continue
}
if !strings.Contains(string(raw), `"kind":"diff"`) {
continue
}
_ = json.Unmarshal(raw, &got)
break
}
if got.Type != api.MsgCommandRun {
t.Fatal("never received diff command.run")
}
var cp api.CommandRunPayload
_ = got.UnmarshalPayload(&cp)
if cp.Diff == nil {
t.Fatal("diff payload nil")
}
if cp.Diff.SnapshotA != a || cp.Diff.SnapshotB != b {
t.Fatalf("diff payload: got %+v want a=%s b=%s", cp.Diff, a, b)
}
}
// TestSnapshotDiffSameID rejects diff(a,a) with 422.
func TestSnapshotDiffSameID(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, srv, st, "diff-same")
a := seedSnapshot(t, st, hostID, "diff-same")
cookie := loginAsAdmin(t, st)
form := url.Values{"snapshot_a": {a}, "snapshot_b": {a}}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/snapshots/diff",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
t.Fatalf("status: got %d, want 422", res.StatusCode)
}
_ = srv
}
// TestSnapshotDiffUnknownID rejects ids not in the host's snapshot list.
func TestSnapshotDiffUnknownID(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, srv, st, "diff-unknown")
_ = seedSnapshot(t, st, hostID, "diff-unknown")
cookie := loginAsAdmin(t, st)
form := url.Values{"snapshot_a": {"deadbeef"}, "snapshot_b": {"cafebabe"}}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/snapshots/diff",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
t.Fatalf("status: got %d, want 422", res.StatusCode)
}
_ = srv
}
+1 -1
View File
@@ -213,7 +213,7 @@ func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request)
// session cookie and trust it, validating the cookie via store.
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
+1 -1
View File
@@ -27,7 +27,7 @@ type hostBandwidthView struct {
func (s *Server) handleUpdateHostBandwidth(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
+1 -1
View File
@@ -58,7 +58,7 @@ func (s *Server) pushBandwidthToAgent(ctx context.Context, hostID string, up, do
// bandwidthPayload builds a ConfigUpdatePayload with only the
// bandwidth fields populated. Pointers are passed through verbatim;
// callers wanting to clear a cap should pass a non-nil pointer to 0.
// On the on-hello path we materialize zero-valued pointers when the
// On the on-hello path we materialise zero-valued pointers when the
// host record has no cap set, so the agent's stored state is always
// in sync (rather than retaining whatever value it last received).
func bandwidthPayload(up, down *int) api.ConfigUpdatePayload {
+6 -6
View File
@@ -32,7 +32,7 @@ type hostRepoCredsView struct {
// creds for UI display. 404 if no credential has ever been set.
func (s *Server) handleGetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -88,7 +88,7 @@ type hostRepoCredsRequest struct {
func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -165,7 +165,7 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
w.WriteHeader(stdhttp.StatusNoContent)
}
// pushRepoCredsToAgent serializes blob into a config.update envelope
// pushRepoCredsToAgent serialises blob into a config.update envelope
// and ships it down the agent's WS. Returns an error from the hub
// (no-op if not connected — caller is expected to check first when it
// matters).
@@ -192,7 +192,7 @@ func (s *Server) pushRepoCredsToAgent(ctx context.Context, hostID string, blob r
// uses this to pre-fill the edit form.
func (s *Server) handleGetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -234,7 +234,7 @@ func (s *Server) handleGetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.
func (s *Server) handleSetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -319,7 +319,7 @@ func (s *Server) handleSetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.
func (s *Server) handleDeleteAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
+2 -2
View File
@@ -34,7 +34,7 @@ type hostView struct {
// see the same projection.
func (s *Server) handleListHosts(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hosts, err := s.deps.Store.ListHosts(r.Context())
@@ -55,7 +55,7 @@ func (s *Server) handleListHosts(w stdhttp.ResponseWriter, r *stdhttp.Request) {
// handleFleetSummary returns the dashboard tile aggregate.
func (s *Server) handleFleetSummary(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
fs, err := s.deps.Store.FleetSummary(r.Context())
+135
View File
@@ -0,0 +1,135 @@
package http
import (
"bufio"
"encoding/json"
"fmt"
stdhttp "net/http"
"strings"
"github.com/go-chi/chi/v5"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// handleJobLogDownload is GET /api/jobs/{id}/log{.txt,.ndjson}.
//
// Source of truth is the persisted job_logs table — works any time,
// regardless of whether the job is running or already finished. The
// download is "everything the server has up to right now"; the live
// stream is unaffected (no pause needed). If the operator wants a
// fuller snapshot of a still-running job, they hit Download again.
//
// Format is picked from the URL suffix (.txt | .ndjson) for a
// sensible filename in the browser, or the ?format= query param for
// REST callers. Default is txt.
func (s *Server) handleJobLogDownload(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if _, ok := s.requireUser(r); !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
jobID := chi.URLParam(r, "id")
if jobID == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_job_id", "")
return
}
job, err := s.deps.Store.GetJob(r.Context(), jobID)
if err != nil {
writeJSONError(w, stdhttp.StatusNotFound, "job_not_found", "")
return
}
format := r.URL.Query().Get("format")
if format == "" {
// Sniff the URL — chi routes both /log.txt and /log.ndjson here
// (or .log if a future route adds it) via the {format} matcher.
fmtParam := chi.URLParam(r, "format")
switch fmtParam {
case "ndjson":
format = "ndjson"
default:
format = "txt"
}
}
logs, err := s.deps.Store.ListJobLogs(r.Context(), jobID, 0, 0)
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
short := jobID
if len(short) > 8 {
short = short[:8]
}
filename := "job-" + job.Kind + "-" + short
switch format {
case "ndjson":
w.Header().Set("Content-Type", "application/x-ndjson; charset=utf-8")
w.Header().Set("Content-Disposition",
`attachment; filename="`+filename+`.ndjson"`)
writeLogsNDJSON(w, logs)
default:
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.Header().Set("Content-Disposition",
`attachment; filename="`+filename+`.txt"`)
writeLogsText(w, job, logs)
}
}
// writeLogsText renders the logs in the same shape the live page shows:
// "HH:MM:SS.mmm TAG payload". Adds a small header so the file is
// useful as a standalone artefact (operator pastes it into a ticket).
func writeLogsText(w stdhttp.ResponseWriter, job *store.Job, logs []store.JobLogLine) {
bw := bufio.NewWriter(w)
defer func() { _ = bw.Flush() }()
_, _ = fmt.Fprintf(bw, "# job %s · kind %s · status %s\n",
job.ID, job.Kind, job.Status)
if job.StartedAt != nil {
_, _ = fmt.Fprintf(bw, "# started %s\n", job.StartedAt.UTC().Format("2006-01-02T15:04:05.000Z"))
}
if job.FinishedAt != nil {
_, _ = fmt.Fprintf(bw, "# finished %s\n", job.FinishedAt.UTC().Format("2006-01-02T15:04:05.000Z"))
}
_, _ = fmt.Fprintf(bw, "# %d log lines\n\n", len(logs))
for _, l := range logs {
tag := streamTag(l.Stream)
ts := l.TS.UTC().Format("15:04:05.000")
// Strip embedded newlines from payload — log lines should be
// single-line, but defensive: a stray '\n' in stderr would
// break grep -n.
payload := strings.ReplaceAll(l.Payload, "\n", " ")
_, _ = fmt.Fprintf(bw, "%s %s %s\n", ts, tag, payload)
}
}
// writeLogsNDJSON emits one JSON object per line. Each object stands
// alone — appending to the file remains valid NDJSON.
func writeLogsNDJSON(w stdhttp.ResponseWriter, logs []store.JobLogLine) {
enc := json.NewEncoder(w)
for _, l := range logs {
_ = enc.Encode(struct {
Seq int64 `json:"seq"`
TS string `json:"ts"`
Stream string `json:"stream"`
Payload string `json:"payload"`
}{
Seq: l.Seq,
TS: l.TS.UTC().Format("2006-01-02T15:04:05.000Z"),
Stream: l.Stream,
Payload: l.Payload,
})
}
}
func streamTag(s string) string {
switch s {
case "stdout":
return "OUT"
case "stderr":
return "ERR"
case "event":
return "EVENT"
}
return strings.ToUpper(s)
}
+181
View File
@@ -0,0 +1,181 @@
// job_download_test.go — covers GET /api/jobs/{id}/log.{txt,ndjson}.
package http
import (
"context"
"encoding/json"
stdhttp "net/http"
"strings"
"testing"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// seedJobWithLogs creates a job + a few log lines for it. Returns the
// job ID. Caller is responsible for the test server + auth.
func seedJobWithLogs(t *testing.T, st *store.Store, hostID string, lineCount int) string {
t.Helper()
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := st.CreateJob(context.Background(), store.Job{
ID: jobID, HostID: hostID, Kind: "diff",
ActorKind: "user", CreatedAt: now,
}); err != nil {
t.Fatalf("create job: %v", err)
}
if err := st.MarkJobStarted(context.Background(), jobID, now); err != nil {
t.Fatalf("mark started: %v", err)
}
for i := 0; i < lineCount; i++ {
stream := "stdout"
if i%5 == 0 {
stream = "stderr"
}
payload := `{"message_type":"change","path":"/etc/file` +
ulid.Make().String()[:6] + `","modifier":"M"}`
if err := st.AppendJobLog(context.Background(), jobID, int64(i+1),
now.Add(time.Duration(i)*time.Millisecond),
stream, payload); err != nil {
t.Fatalf("append log: %v", err)
}
}
if err := st.MarkJobFinished(context.Background(), jobID, "succeeded", 0, nil, "", now); err != nil {
t.Fatalf("mark finished: %v", err)
}
return jobID
}
// TestJobLogDownloadTxt: plain-text format includes a header + one
// line per log row in the expected shape.
func TestJobLogDownloadTxt(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
hostID, _ := enrolHostForWS(t, srv, st, "dl-txt-host")
jobID := seedJobWithLogs(t, st, hostID, 12)
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("GET",
ts.URL+"/api/jobs/"+jobID+"/log.txt", nil)
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusOK {
t.Fatalf("status: got %d, want 200", res.StatusCode)
}
if ct := res.Header.Get("Content-Type"); !strings.HasPrefix(ct, "text/plain") {
t.Errorf("content-type: got %q", ct)
}
if cd := res.Header.Get("Content-Disposition"); !strings.Contains(cd, ".txt") {
t.Errorf("content-disposition: got %q", cd)
}
body := readBody(t, res.Body)
// Header lines.
if !strings.HasPrefix(body, "# job ") {
t.Errorf("expected '# job ...' header line; got %q", short(body))
}
if !strings.Contains(body, "12 log lines") {
t.Errorf("expected '12 log lines'; got %q", short(body))
}
// One body line per log row — count non-comment, non-empty lines.
var rows int
for _, line := range strings.Split(body, "\n") {
l := strings.TrimSpace(line)
if l == "" || strings.HasPrefix(l, "#") {
continue
}
rows++
}
if rows != 12 {
t.Errorf("expected 12 body rows, got %d", rows)
}
// Tag check: at least one ERR row (every 5th was stderr).
if !strings.Contains(body, " ERR ") {
t.Errorf("expected at least one ERR row")
}
}
// TestJobLogDownloadNDJSON: each line is a self-contained JSON object.
func TestJobLogDownloadNDJSON(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
hostID, _ := enrolHostForWS(t, srv, st, "dl-ndjson-host")
jobID := seedJobWithLogs(t, st, hostID, 5)
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("GET",
ts.URL+"/api/jobs/"+jobID+"/log.ndjson", nil)
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusOK {
t.Fatalf("status: got %d, want 200", res.StatusCode)
}
if ct := res.Header.Get("Content-Type"); !strings.HasPrefix(ct, "application/x-ndjson") {
t.Errorf("content-type: got %q", ct)
}
body := readBody(t, res.Body)
// Each non-empty line should parse as an object with seq/ts/stream/payload.
var seen int
for _, line := range strings.Split(body, "\n") {
if strings.TrimSpace(line) == "" {
continue
}
var obj struct {
Seq int64 `json:"seq"`
TS string `json:"ts"`
Stream string `json:"stream"`
Payload string `json:"payload"`
}
if err := json.Unmarshal([]byte(line), &obj); err != nil {
t.Fatalf("parse line %q: %v", line, err)
}
if obj.Seq == 0 || obj.TS == "" || obj.Stream == "" || obj.Payload == "" {
t.Errorf("incomplete object: %+v", obj)
}
seen++
}
if seen != 5 {
t.Errorf("parsed %d objects, want 5", seen)
}
}
// TestJobLogDownloadNotFound: 404 for an unknown job id.
func TestJobLogDownloadNotFound(t *testing.T) {
t.Parallel()
_, ts, st := rawTestServer(t)
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("GET",
ts.URL+"/api/jobs/"+ulid.Make().String()+"/log.txt", nil)
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusNotFound {
t.Fatalf("status: got %d, want 404", res.StatusCode)
}
}
// TestJobLogDownloadUnauthenticated: without a session cookie, 401.
func TestJobLogDownloadUnauthenticated(t *testing.T) {
t.Parallel()
_, ts, _ := rawTestServer(t)
res, err := stdhttp.Get(ts.URL + "/api/jobs/x/log.txt")
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnauthorized {
t.Fatalf("status: got %d, want 401", res.StatusCode)
}
}
+3 -2
View File
@@ -31,7 +31,7 @@ type runNowResponse struct {
func (s *Server) handleRunNow(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -152,7 +152,8 @@ func (s *Server) requireUser(r *stdhttp.Request) (*store.User, bool) {
func validJobKind(k api.JobKind) bool {
switch k {
case api.JobBackup, api.JobInit, api.JobForget, api.JobPrune, api.JobCheck, api.JobUnlock:
case api.JobBackup, api.JobInit, api.JobForget, api.JobPrune,
api.JobCheck, api.JobUnlock, api.JobRestore, api.JobDiff:
return true
}
return false
+1 -1
View File
@@ -81,7 +81,7 @@ func drainUntil(t *testing.T, c *websocket.Conn, wantType api.MessageType) api.E
return api.Envelope{}
}
// enrolHostForWS pre-enrolls a host with bound repo creds so the server
// enrolHostForWS pre-enrols a host with bound repo creds so the server
// will treat it as ready to receive command.run.
func enrolHostForWS(t *testing.T, srv *Server, st *store.Store, name string) (hostID, token string) {
t.Helper()
+2 -2
View File
@@ -506,12 +506,12 @@ func TestEnqueueOnDispatchFailure(t *testing.T) {
func TestDrainPendingSerializesPerHost(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
hostID, token := enrolHostForWS(t, srv, st, "serialize-host")
hostID, token := enrolHostForWS(t, srv, st, "serialise-host")
gid, sid := seedSchedAndGroup(t, st, hostID, 10)
// Connect the agent so DrainPending can dispatch.
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "serialize-host")
sendHello(t, c, "serialise-host")
// Drain the on-hello goroutine's pass first (no pending rows yet),
// then wait for the schedule.set so the connection is fully settled.
_ = drainUntil(t, c, api.MsgScheduleSet)
+2 -2
View File
@@ -214,7 +214,7 @@ type acceptForm struct {
func (s *Server) handleAcceptPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
pendingID := chi.URLParam(r, "id")
@@ -315,7 +315,7 @@ func (s *Server) handleAcceptPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Re
func (s *Server) handleRejectPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
pendingID := chi.URLParam(r, "id")
+2 -2
View File
@@ -41,7 +41,7 @@ func toRepoMaintenanceView(m store.HostRepoMaintenance) repoMaintenanceView {
func (s *Server) handleGetRepoMaintenance(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -84,7 +84,7 @@ type repoMaintenanceWriteRequest struct {
func (s *Server) handleUpdateRepoMaintenance(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
+3 -3
View File
@@ -26,7 +26,7 @@ func (s *Server) handleRunRepoPrune(w stdhttp.ResponseWriter, r *stdhttp.Request
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
return
}
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -72,7 +72,7 @@ func (s *Server) handleRunRepoCheck(w stdhttp.ResponseWriter, r *stdhttp.Request
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
return
}
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -125,7 +125,7 @@ func (s *Server) handleRunRepoUnlock(w stdhttp.ResponseWriter, r *stdhttp.Reques
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
return
}
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
+1 -1
View File
@@ -53,7 +53,7 @@ func (s *Server) handleRunSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Reque
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
return
}
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
+4 -4
View File
@@ -61,7 +61,7 @@ var cronParser = cron.NewParser(
func (s *Server) handleListSchedules(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -89,7 +89,7 @@ func (s *Server) handleListSchedules(w stdhttp.ResponseWriter, r *stdhttp.Reques
func (s *Server) handleCreateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -126,7 +126,7 @@ func (s *Server) handleCreateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Reque
func (s *Server) handleUpdateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -173,7 +173,7 @@ func (s *Server) handleUpdateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Reque
func (s *Server) handleDeleteSchedule(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
+33 -2
View File
@@ -43,7 +43,7 @@ type Server struct {
srv *stdhttp.Server
deps Deps
// drainLocks serializes DrainPending per host. The on-hello
// drainLocks serialises DrainPending per host. The on-hello
// goroutine and the 30s ticker can otherwise race for the same
// host, double-dispatching every pending row. Map of hostID →
// sync.Mutex; checked-and-locked atomically via drainLocksMu.
@@ -58,6 +58,11 @@ type Server struct {
// pending_id so the accept/reject handlers can push the bearer
// or close cleanly (P2-18b).
pendingHub *pendingHub
// treeCache holds per-wizard-session listings of snapshot
// directories (P3-X2). Pre-allocated in New so the lazy-init
// race is impossible.
treeCache *treeCache
}
// New builds a configured but not-yet-started server.
@@ -81,6 +86,7 @@ func New(deps Deps) *Server {
drainLocks: make(map[string]*sync.Mutex),
announceRL: newAnnounceLimiter(),
pendingHub: newPendingHub(),
treeCache: newTreeCache(),
}
s.routes(r)
@@ -178,8 +184,22 @@ func (s *Server) routes(r chi.Router) {
r.Post("/hosts/{id}/repo/prune", s.handleRunRepoPrune)
r.Post("/hosts/{id}/repo/check", s.handleRunRepoCheck)
r.Post("/hosts/{id}/repo/unlock", s.handleRunRepoUnlock)
// Cancel a running job. Operator-driven, sends command.cancel
// to the agent which kills the restic subprocess; the agent's
// resulting job.finished (status=canceled) is what flips the
// job row.
r.Post("/jobs/{id}/cancel", s.handleCancelJob)
// Snapshot diff (P3-09). Dispatches a JobDiff against two
// snapshots; output streams to the standard live job page.
r.Post("/hosts/{id}/snapshots/diff", s.handleSnapshotDiff)
})
// HTMX form variant of diff (mounted outside /api so HTMX forms
// can post against it without the api/ prefix).
r.Post("/hosts/{id}/snapshots/diff", s.handleSnapshotDiff)
// Per-source-group Run-now (HTMX form action). Available even
// when the server is started without UI templates so REST callers
// against the non-/api path also work.
@@ -237,7 +257,7 @@ func (s *Server) routes(r chi.Router) {
// Durable post-Add-host page (operator can refresh / come
// back; password decrypted from the token row each render).
// Polled fragment under /awaiting flips to "connected" once
// the agent enrolls.
// the agent enrols.
r.Get("/hosts/pending/{token}", s.handleUIPendingHost)
r.Get("/hosts/pending/{token}/awaiting", s.handleUIPendingAwaiting)
// Host detail (Snapshots tab is the default).
@@ -270,6 +290,12 @@ func (s *Server) routes(r chi.Router) {
r.Post("/hosts/{id}/schedules/{sid}/run", s.handleUIScheduleRun)
// Live job log.
r.Get("/jobs/{id}", s.handleUIJobDetail)
// Restore wizard (P3-01/P3-02). Two GET variants land on the
// same handler; the second deep-links a chosen snapshot.
r.Get("/hosts/{id}/restore", s.handleUIRestoreGet)
r.Get("/hosts/{id}/snapshots/{sid}/restore", s.handleUIRestoreGet)
r.Post("/hosts/{id}/restore", s.handleUIRestorePost)
r.Get("/hosts/{id}/restore/tree", s.handleUIRestoreTree)
}
// Browser job-log stream (separate from /ws/agent so the auth
@@ -278,6 +304,11 @@ func (s *Server) routes(r chi.Router) {
if s.deps.JobHub != nil {
r.Get("/api/jobs/{id}/stream", s.handleJobStream)
}
// Job log download (txt + ndjson). Source of truth is the
// persisted job_logs table; safe to call any time, no pause
// needed against the live stream.
r.Get("/api/jobs/{id}/log.{format:txt|ndjson}", s.handleJobLogDownload)
}
// Start begins listening. Blocks until ListenAndServe returns
+1 -1
View File
@@ -35,7 +35,7 @@ type listSnapshotsResponse struct {
// onto whatever the server most recently received.
func (s *Server) handleListHostSnapshots(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if _, ok := s.requireUser(r); !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
+5 -5
View File
@@ -66,7 +66,7 @@ type sourceGroupWriteRequest struct {
func (s *Server) handleListSourceGroups(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -90,7 +90,7 @@ func (s *Server) handleListSourceGroups(w stdhttp.ResponseWriter, r *stdhttp.Req
func (s *Server) handleGetSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -109,7 +109,7 @@ func (s *Server) handleGetSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Reque
func (s *Server) handleCreateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -152,7 +152,7 @@ func (s *Server) handleCreateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Re
func (s *Server) handleUpdateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
@@ -207,7 +207,7 @@ func (s *Server) handleUpdateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Re
// the UI can offer "remove from these schedules first."
func (s *Server) handleDeleteSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
+112
View File
@@ -0,0 +1,112 @@
package http
import (
"context"
"sync"
"time"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// treeCacheTTL is how long a per-session cached directory listing
// stays valid. The whole point of the cache is to make re-expanding
// nodes within the same wizard session snappy; 30 minutes covers a
// generous wizard interaction window without holding stale data
// indefinitely.
const treeCacheTTL = 30 * time.Minute
// treeCacheKey identifies one cached listing. session_id scopes
// entries to a single browser session so two operators don't share
// view state; snapshot_id + path identify the directory inside the
// snapshot.
type treeCacheKey struct {
SessionID string
HostID string
SnapshotID string
Path string
}
type treeCacheEntry struct {
Result api.TreeListResultPayload
ExpiresAt time.Time
}
// treeCache is a per-process map of synchronously fetched directory
// listings. Concurrency is light (a few entries per active wizard
// session) so a single mutex is fine.
type treeCache struct {
mu sync.Mutex
entries map[treeCacheKey]treeCacheEntry
}
func newTreeCache() *treeCache {
return &treeCache{entries: make(map[treeCacheKey]treeCacheEntry)}
}
// Get returns a cached entry if one exists and hasn't expired.
func (c *treeCache) Get(k treeCacheKey, now time.Time) (api.TreeListResultPayload, bool) {
c.mu.Lock()
defer c.mu.Unlock()
e, ok := c.entries[k]
if !ok {
return api.TreeListResultPayload{}, false
}
if now.After(e.ExpiresAt) {
delete(c.entries, k)
return api.TreeListResultPayload{}, false
}
return e.Result, true
}
// Put records a fresh listing under k. Caller is responsible for
// having validated the result first (Error == "").
func (c *treeCache) Put(k treeCacheKey, result api.TreeListResultPayload, now time.Time) {
c.mu.Lock()
c.entries[k] = treeCacheEntry{
Result: result,
ExpiresAt: now.Add(treeCacheTTL),
}
c.mu.Unlock()
}
// Sweep deletes expired entries. Called opportunistically from the
// wizard handler — no separate goroutine needed; cache size is small.
func (c *treeCache) Sweep(now time.Time) {
c.mu.Lock()
for k, e := range c.entries {
if now.After(e.ExpiresAt) {
delete(c.entries, k)
}
}
c.mu.Unlock()
}
// fetchTreeWithCache returns a directory listing — cache hit, or a
// synchronous tree.list RPC against the agent on miss. On agent error
// (not transport error), the result is returned as-is with Error set
// rather than cached, so a transient failure doesn't poison subsequent
// requests for the same path.
//
//nolint:unused // wired in by the wizard handler in the next slice
func (s *Server) fetchTreeWithCache(ctx context.Context, sessionID, hostID, snapshotID, path string) (api.TreeListResultPayload, error) {
now := time.Now()
k := treeCacheKey{SessionID: sessionID, HostID: hostID, SnapshotID: snapshotID, Path: path}
if cached, ok := s.treeCache.Get(k, now); ok {
return cached, nil
}
reply, err := s.deps.Hub.SendRPC(ctx, hostID, api.MsgTreeList,
api.TreeListRequestPayload{SnapshotID: snapshotID, Path: path},
30*time.Second)
if err != nil {
return api.TreeListResultPayload{}, err
}
var result api.TreeListResultPayload
if perr := reply.UnmarshalPayload(&result); perr != nil {
return api.TreeListResultPayload{}, perr
}
if result.Error == "" {
s.treeCache.Put(k, result, now)
}
return result, nil
}
+146
View File
@@ -0,0 +1,146 @@
// tree_rpc_test.go — full round-trip test for the tree.list synchronous
// RPC (P3-X2). A fake agent reads the inbound tree.list, replies with a
// canned tree.list.result, and we assert the server's SendRPC returned
// the expected payload.
package http
import (
"context"
"encoding/json"
"testing"
"time"
"github.com/coder/websocket"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
func TestSendRPCTreeListRoundTrip(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
hostID, token := enrolHostForWS(t, srv, st, "rpc-host")
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "rpc-host")
_ = drainUntil(t, c, api.MsgScheduleSet)
// Fake agent: read inbound envelopes, mirror tree.list with a
// canned result. Other inbound envelopes (config.update etc) are
// already drained above.
done := make(chan error, 1)
go func() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
for {
mt, raw, err := c.Read(ctx)
if err != nil {
done <- err
return
}
if mt != websocket.MessageText {
continue
}
var env api.Envelope
if err := json.Unmarshal(raw, &env); err != nil {
done <- err
return
}
if env.Type != api.MsgTreeList {
continue
}
var req api.TreeListRequestPayload
if err := env.UnmarshalPayload(&req); err != nil {
done <- err
return
}
result := api.TreeListResultPayload{
SnapshotID: req.SnapshotID,
Path: req.Path,
Entries: []api.TreeListEntry{
{Name: "etc", Type: "dir"},
{Name: "var", Type: "dir"},
},
}
out, err := api.Marshal(api.MsgTreeListResult, env.ID, result)
if err != nil {
done <- err
return
}
rawOut, _ := json.Marshal(out)
if err := c.Write(ctx, websocket.MessageText, rawOut); err != nil {
done <- err
return
}
done <- nil
return
}
}()
// Server-side SendRPC.
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
reply, err := srv.deps.Hub.SendRPC(ctx, hostID, api.MsgTreeList,
api.TreeListRequestPayload{SnapshotID: "f3a7b2c1", Path: "/"},
3*time.Second)
if err != nil {
t.Fatalf("SendRPC: %v", err)
}
if reply.Type != api.MsgTreeListResult {
t.Fatalf("reply type: got %q want %q", reply.Type, api.MsgTreeListResult)
}
var result api.TreeListResultPayload
if err := reply.UnmarshalPayload(&result); err != nil {
t.Fatalf("unmarshal reply: %v", err)
}
if result.SnapshotID != "f3a7b2c1" || result.Path != "/" {
t.Fatalf("payload: got %+v", result)
}
if len(result.Entries) != 2 || result.Entries[0].Name != "etc" {
t.Fatalf("entries: %+v", result.Entries)
}
// Make sure the fake agent didn't error out.
select {
case err := <-done:
if err != nil {
t.Fatalf("fake agent: %v", err)
}
case <-time.After(2 * time.Second):
t.Fatal("fake agent didn't finish")
}
}
// TestSendRPCTimeoutNoReply: SendRPC times out cleanly when the agent
// never replies; the registry entry is released so a stray late reply
// wouldn't deadlock anything.
func TestSendRPCTimeoutNoReply(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
hostID, token := enrolHostForWS(t, srv, st, "rpc-timeout-host")
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "rpc-timeout-host")
_ = drainUntil(t, c, api.MsgScheduleSet)
// Fake agent reads but never replies.
go func() {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
for {
if _, _, err := c.Read(ctx); err != nil {
return
}
}
}()
ctx := context.Background()
t0 := time.Now()
_, err := srv.deps.Hub.SendRPC(ctx, hostID, api.MsgTreeList,
api.TreeListRequestPayload{SnapshotID: "x", Path: "/"},
300*time.Millisecond)
if err == nil {
t.Fatal("expected timeout error")
}
elapsed := time.Since(t0)
if elapsed < 250*time.Millisecond || elapsed > 2*time.Second {
t.Fatalf("timeout took %s, expected ~300ms", elapsed)
}
}
+28 -3
View File
@@ -16,6 +16,7 @@ import (
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
@@ -276,7 +277,7 @@ type addHostPage struct {
}
// pendingHostPage is the GET /hosts/pending/{token} view. Lives
// for as long as the token does (1h ttl); once the agent enrolls,
// for as long as the token does (1h ttl); once the agent enrols,
// the handler redirects to /hosts/{host_id} and this page is gone.
type pendingHostPage struct {
Token string
@@ -377,7 +378,7 @@ func (s *Server) handleUIAddHostPost(w stdhttp.ResponseWriter, r *stdhttp.Reques
// handleUIPendingHost serves the durable Add-host result page —
// shown after a successful POST /hosts/new and reachable until the
// agent enrolls (the page redirects to /hosts/{id} once that
// agent enrols (the page redirects to /hosts/{id} once that
// happens) or the token expires (1h ttl). The password is
// re-decrypted from the encrypted token row on every render so
// the operator can refresh, bookmark, navigate away and come back.
@@ -512,6 +513,14 @@ type hostChromeData struct {
InitStatus string
InitAt *time.Time // started_at if non-nil else created_at
InitJobID string
// Latest 'restore' job — surfaced as a small line below the
// init-status one so the operator has at-a-glance visibility into
// recent destructive activity. Empty status means no restore has
// ever run on this host.
RestoreStatus string
RestoreAt *time.Time
RestoreJobID string
}
// loadHostChrome fetches the per-tab counts that every host-detail tab
@@ -542,6 +551,15 @@ func (s *Server) loadHostChrome(r *stdhttp.Request, host store.Host, subtab, cru
}
d.InitAt = &t
}
if j, err := s.deps.Store.LatestJobByKind(r.Context(), host.ID, "restore"); err == nil && j != nil {
d.RestoreStatus = j.Status
d.RestoreJobID = j.ID
t := j.CreatedAt
if j.StartedAt != nil {
t = *j.StartedAt
}
d.RestoreAt = &t
}
return d
}
@@ -552,6 +570,12 @@ type hostDetailPage struct {
// SnapshotsShown is the number rendered (we cap at ~50 for the
// first slice; pagination lands when it matters).
SnapshotsShown int
// LegacyRestic is true when the host's restic version predates
// 0.17, in which case `restic snapshots --json` doesn't embed the
// per-snapshot summary block and the Size/Files columns render
// blank. The template uses this to attach a tooltip to those
// column headers explaining the version requirement.
LegacyRestic bool
}
// handleUIHostDetail is the host detail page (snapshots tab by default).
@@ -594,6 +618,7 @@ func (s *Server) handleUIHostDetail(w stdhttp.ResponseWriter, r *stdhttp.Request
hostChromeData: s.loadHostChrome(r, *host, "snapshots", "snapshots"),
Snapshots: shown,
SnapshotsShown: len(shown),
LegacyRestic: !restic.Env{Version: host.ResticVersion}.AtLeastVersion(0, 17),
}
if err := s.deps.UI.Render(w, "host_detail", view); err != nil {
slog.Error("ui: render host_detail", "err", err)
@@ -713,7 +738,7 @@ func (s *Server) handleUIJobDetail(w stdhttp.ResponseWriter, r *stdhttp.Request)
// same way our Go code does.
func (s *Server) handleJobStream(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if u, _ := s.sessionUser(r); u == nil {
stdhttp.Error(w, "unauthorized", stdhttp.StatusUnauthorized)
stdhttp.Error(w, "unauthorised", stdhttp.StatusUnauthorized)
return
}
jobID := chi.URLParam(r, "id")
+2 -2
View File
@@ -49,7 +49,7 @@ func (s *Server) handleUIRepoReinit(w stdhttp.ResponseWriter, r *stdhttp.Request
}
if !s.deps.Hub.Connected(host.ID) {
s.renderRepoPage(w, r, u, host,
"Host is offline — bring the agent back up before re-initializing.",
"Host is offline — bring the agent back up before re-initialising.",
"", "", "")
return
}
@@ -58,7 +58,7 @@ func (s *Server) handleUIRepoReinit(w stdhttp.ResponseWriter, r *stdhttp.Request
if _, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindRepo); err != nil {
if errors.Is(err, store.ErrNotFound) {
s.renderRepoPage(w, r, u, host,
"Bind repo credentials before re-initializing.",
"Bind repo credentials before re-initialising.",
"", "", "")
return
}
+447
View File
@@ -0,0 +1,447 @@
package http
import (
"context"
"errors"
"log/slog"
stdhttp "net/http"
"sort"
"strings"
"time"
"github.com/go-chi/chi/v5"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// ui_restore.go — restore wizard backend (P3-01).
//
// GET /hosts/{id}/restore wizard step 1 (snapshot picker)
// GET /hosts/{id}/snapshots/{sid}/restore wizard with snapshot pre-selected
// GET /hosts/{id}/restore/tree HTMX partial: one tree node + children
// POST /hosts/{id}/restore dispatch the restore job
// hostRestorePage is the model for the wizard template.
type hostRestorePage struct {
hostChromeData
// Snapshot picker rows; rendered by the template into the step-1
// table. Limited to most-recent N (the operator can refine on
// snapshot ID if they need an older one — out of scope for v1).
Snapshots []store.Snapshot
// Selected is non-nil iff a snapshot has been chosen — either via
// the deep-link path /hosts/{id}/snapshots/{sid}/restore or by a
// previous form submission that the wizard re-rendered.
Selected *store.Snapshot
// Default target dir — surfaced in the step-3 radio card.
DefaultTargetDir string
// Online mirrors Hub.Connected so the dispatch button can be
// disabled at render time when the agent is offline.
Online bool
// Error is shown as a banner above the wizard. Re-render-friendly:
// the operator's snapshot/path/target choices survive the round-trip.
Error string
// Form fields preserved on validation re-render. The template
// reads these to pre-tick checkboxes etc; the names match the
// POST form keys.
FormPaths []string // "/etc/nginx/sites-available/alfa.conf"
FormInPlace bool
FormTargetDir string
FormConfirmHN string // typed-confirm input value
}
// handleUIRestoreGet renders the wizard. URL variants:
// - /hosts/{id}/restore — step 1 = pick snapshot
// - /hosts/{id}/snapshots/{sid}/restore — snapshot pre-selected
func (s *Server) handleUIRestoreGet(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
hostID := chi.URLParam(r, "id")
host, err := s.deps.Store.GetHost(r.Context(), hostID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
stdhttp.NotFound(w, r)
return
}
slog.Error("ui restore: get host", "host_id", hostID, "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
page := hostRestorePage{
hostChromeData: s.loadHostChrome(r, *host, "snapshots", "restore"),
DefaultTargetDir: defaultRestoreTargetDir(),
Online: s.deps.Hub.Connected(host.ID),
}
snaps, err := s.deps.Store.ListSnapshotsByHost(r.Context(), hostID)
if err != nil {
slog.Error("ui restore: list snapshots", "host_id", hostID, "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
if len(snaps) > 100 {
snaps = snaps[:100]
}
page.Snapshots = snaps
// Snapshot deep-link variant — if the URL carries a sid, prefill it.
if sid := chi.URLParam(r, "sid"); sid != "" {
for i := range snaps {
if snaps[i].ID == sid || snaps[i].ShortID == sid {
p := snaps[i]
page.Selected = &p
break
}
}
}
view := s.baseView(u)
view.Title = "Restore · " + host.Name
view.Page = page
if err := s.deps.UI.Render(w, "host_restore", view); err != nil {
slog.Error("ui restore: render", "err", err)
}
}
// handleUIRestorePost validates the form and dispatches the restore
// job. On validation error re-renders the wizard with the error
// banner + the operator's input intact.
func (s *Server) handleUIRestorePost(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
hostID := chi.URLParam(r, "id")
host, err := s.deps.Store.GetHost(r.Context(), hostID)
if err != nil {
stdhttp.NotFound(w, r)
return
}
if err := r.ParseForm(); err != nil {
stdhttp.Error(w, "bad form", stdhttp.StatusBadRequest)
return
}
snapshotID := strings.TrimSpace(r.PostForm.Get("snapshot_id"))
paths := r.PostForm["paths"] // multiple checkbox values
inPlace := r.PostForm.Get("target_mode") == "in_place"
targetDir := strings.TrimSpace(r.PostForm.Get("target_dir"))
confirmHN := strings.TrimSpace(r.PostForm.Get("confirm_hostname"))
rerender := func(errMsg string, status int) {
page := hostRestorePage{
hostChromeData: s.loadHostChrome(r, *host, "snapshots", "restore"),
DefaultTargetDir: defaultRestoreTargetDir(),
Online: s.deps.Hub.Connected(host.ID),
Error: errMsg,
FormPaths: paths,
FormInPlace: inPlace,
FormTargetDir: targetDir,
FormConfirmHN: confirmHN,
}
snaps, _ := s.deps.Store.ListSnapshotsByHost(r.Context(), hostID)
if len(snaps) > 100 {
snaps = snaps[:100]
}
page.Snapshots = snaps
for i := range snaps {
if snaps[i].ID == snapshotID || snaps[i].ShortID == snapshotID {
ss := snaps[i]
page.Selected = &ss
break
}
}
view := s.baseView(u)
view.Title = "Restore · " + host.Name
view.Page = page
w.WriteHeader(status)
_ = s.deps.UI.Render(w, "host_restore", view)
}
if snapshotID == "" {
rerender("Pick a snapshot first.", stdhttp.StatusUnprocessableEntity)
return
}
cleanPaths := make([]string, 0, len(paths))
for _, p := range paths {
p = strings.TrimSpace(p)
if p == "" {
continue
}
if !strings.HasPrefix(p, "/") {
rerender("Paths must be absolute (start with /).", stdhttp.StatusUnprocessableEntity)
return
}
cleanPaths = append(cleanPaths, p)
}
if len(cleanPaths) == 0 {
rerender("Pick at least one file or directory to restore.", stdhttp.StatusUnprocessableEntity)
return
}
if inPlace {
if confirmHN != host.Name {
rerender("Type the host name exactly to confirm an in-place (overwrite) restore.",
stdhttp.StatusUnprocessableEntity)
return
}
} else {
// New-directory mode: trust the operator's chosen target.
// Empty falls back to the default. Validate it's either
// absolute or starts with $HOME / ~/ (the agent expands
// these at run time).
if targetDir == "" {
targetDir = defaultRestoreTargetDir()
}
if !looksLikeRestoreTarget(targetDir) {
rerender("Target must be an absolute path, or start with $HOME or ~/.",
stdhttp.StatusUnprocessableEntity)
return
}
}
if !s.deps.Hub.Connected(host.ID) {
rerender("Agent is offline. Try again when it reconnects.",
stdhttp.StatusServiceUnavailable)
return
}
// Build a new job id up-front so we can substitute it into the
// new-directory target path. The agent will additionally expand
// $HOME / ~/ before invoking restic.
jobID := ulid.Make().String()
finalTarget := ""
if !inPlace {
finalTarget = strings.ReplaceAll(targetDir, "<job-id>", jobID)
}
now := time.Now().UTC()
if err := s.deps.Store.CreateJob(r.Context(), store.Job{
ID: jobID,
HostID: host.ID,
Kind: string(api.JobRestore),
ActorKind: "user",
ActorID: &u.ID,
CreatedAt: now,
}); err != nil {
slog.Error("ui restore: create job", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
payload := api.CommandRunPayload{
JobID: jobID,
Kind: api.JobRestore,
Restore: &api.RestorePayload{
SnapshotID: snapshotID,
Paths: cleanPaths,
InPlace: inPlace,
TargetDir: finalTarget,
},
}
env, err := api.Marshal(api.MsgCommandRun, jobID, payload)
if err != nil {
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
if err := s.deps.Hub.Send(r.Context(), host.ID, env); err != nil {
slog.Warn("ui restore: dispatch failed", "err", err)
rerender("Couldn't deliver the restore command (agent went offline).",
stdhttp.StatusServiceUnavailable)
return
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &u.ID,
Actor: "user",
Action: "host.restore",
TargetKind: ptr("host"),
TargetID: &host.ID,
TS: now,
})
// HTMX redirect (or vanilla redirect) to the live job log.
jobURL := "/jobs/" + jobID
if r.Header.Get("HX-Request") == "true" {
w.Header().Set("HX-Redirect", jobURL)
w.WriteHeader(stdhttp.StatusNoContent)
return
}
stdhttp.Redirect(w, r, jobURL, stdhttp.StatusSeeOther)
}
// hostRestoreTreePage is the data shape for the tree-node HTMX partial.
type hostRestoreTreePage struct {
HostID string
SnapshotID string
Path string
Children []treeChildView
Error string
}
// treeChildView is one row of the tree (a direct child of Path).
type treeChildView struct {
Name string
Type string // dir | file | symlink
Path string // full path, used in the checkbox value
Size int64
IsDir bool
}
// handleUIRestoreTree is the HTMX-served partial that loads one
// directory's children. Called when the operator clicks an expand
// chevron in the wizard's tree browser. Caches via fetchTreeWithCache.
func (s *Server) handleUIRestoreTree(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
hostID := chi.URLParam(r, "id")
host, err := s.deps.Store.GetHost(r.Context(), hostID)
if err != nil {
stdhttp.NotFound(w, r)
return
}
q := r.URL.Query()
snapshotID := strings.TrimSpace(q.Get("snapshot"))
pathArg := strings.TrimSpace(q.Get("path"))
if pathArg == "" {
pathArg = "/"
}
if snapshotID == "" {
stdhttp.Error(w, "snapshot required", stdhttp.StatusBadRequest)
return
}
if !s.deps.Hub.Connected(host.ID) {
// Render the partial with an error message rather than 503ing
// — the wizard renders the error inline next to the failed node.
page := hostRestoreTreePage{
HostID: host.ID, SnapshotID: snapshotID, Path: pathArg,
Error: "agent offline",
}
view := s.baseView(u)
view.Page = page
_ = s.deps.UI.RenderPartial(w, "tree_node", view)
return
}
sessionID := sessionIDFromCookie(r)
ctx, cancel := context.WithTimeout(r.Context(), 35*time.Second)
defer cancel()
result, err := s.fetchTreeWithCache(ctx, sessionID, host.ID, snapshotID, pathArg)
if err != nil {
page := hostRestoreTreePage{
HostID: host.ID, SnapshotID: snapshotID, Path: pathArg,
Error: err.Error(),
}
view := s.baseView(u)
view.Page = page
_ = s.deps.UI.RenderPartial(w, "tree_node", view)
return
}
if result.Error != "" {
page := hostRestoreTreePage{
HostID: host.ID, SnapshotID: snapshotID, Path: pathArg,
Error: result.Error,
}
view := s.baseView(u)
view.Page = page
_ = s.deps.UI.RenderPartial(w, "tree_node", view)
return
}
children := make([]treeChildView, 0, len(result.Entries))
for _, e := range result.Entries {
full := joinTreePath(pathArg, e.Name)
children = append(children, treeChildView{
Name: e.Name, Type: e.Type, Path: full,
Size: e.Size,
IsDir: e.Type == "dir",
})
}
// Stable order: dirs first, then files, alphabetically.
sort.SliceStable(children, func(i, j int) bool {
if children[i].IsDir != children[j].IsDir {
return children[i].IsDir
}
return children[i].Name < children[j].Name
})
page := hostRestoreTreePage{
HostID: host.ID, SnapshotID: snapshotID, Path: pathArg,
Children: children,
}
view := s.baseView(u)
view.Page = page
if err := s.deps.UI.RenderPartial(w, "tree_node", view); err != nil {
slog.Warn("ui restore tree: render partial", "err", err)
}
}
// defaultRestoreTargetDir is the placeholder shown on the step-3
// New-directory radio card and the value used when the operator
// leaves the field blank. $HOME resolves agent-side (typically /root
// for the systemd-as-root unit); <job-id> is substituted at dispatch.
// The systemd unit pins ReadWritePaths to include the agent user's
// home/rm-restore subdir so this default actually works under the
// sandbox.
func defaultRestoreTargetDir() string {
return "$HOME/rm-restore/<job-id>/"
}
// looksLikeRestoreTarget validates the operator-supplied target dir
// is a shape the agent can sensibly resolve. We accept absolute
// paths and a couple of agent-side expansions ($HOME, ~/). Other env
// vars are deliberately rejected — operator-supplied paths shouldn't
// be able to pick up arbitrary agent env values.
func looksLikeRestoreTarget(p string) bool {
if p == "" {
return false
}
switch {
case strings.HasPrefix(p, "/"):
return true
case strings.HasPrefix(p, "$HOME/"), p == "$HOME":
return true
case strings.HasPrefix(p, "${HOME}/"), p == "${HOME}":
return true
case strings.HasPrefix(p, "~/"), p == "~":
return true
}
return false
}
// sessionIDFromCookie returns the operator's session cookie value,
// used as the cache key scope for the tree-list cache. Unauthenticated
// requests don't reach this point, so an empty cookie value would
// only happen if requireUIUser is bypassed in tests — fall back to
// the request remote addr for those cases.
func sessionIDFromCookie(r *stdhttp.Request) string {
if c, err := r.Cookie(sessionCookieName); err == nil && c.Value != "" {
return c.Value
}
return r.RemoteAddr
}
// joinTreePath combines a directory path and a child name into an
// absolute snapshot-relative path, normalising any duplicate slashes.
func joinTreePath(dir, name string) string {
if dir == "" || dir == "/" {
return "/" + name
}
return strings.TrimRight(dir, "/") + "/" + name
}
// satisfy unused-import if compile order shifts.
var _ = ui.User{}
+380
View File
@@ -0,0 +1,380 @@
// ui_restore_test.go — covers the restore wizard backend (P3-01).
package http
import (
"context"
"encoding/json"
stdhttp "net/http"
"net/url"
"strings"
"testing"
"time"
"github.com/coder/websocket"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// seedSnapshot creates a snapshot row directly via ReplaceHostSnapshots.
// Returns the snapshot ID.
func seedSnapshot(t *testing.T, st *store.Store, hostID, hostname string) string {
t.Helper()
id := strings.ReplaceAll(ulid.Make().String(), "-", "")
short := id[:8]
if err := st.ReplaceHostSnapshots(context.Background(), hostID, []store.Snapshot{{
ID: id, ShortID: short, Time: time.Now().UTC().Add(-2 * time.Hour),
Hostname: hostname, Paths: []string{"/etc"}, Tags: []string{"system-config"},
SizeBytes: 612 * 1024 * 1024, FileCount: 100,
}}, time.Now().UTC()); err != nil {
t.Fatalf("seed snapshot: %v", err)
}
return id
}
// seedTwoSnapshots seeds two snapshots in one ReplaceHostSnapshots call
// so both end up in the host's list. ReplaceHostSnapshots is atomic-
// swap, so calling seedSnapshot twice would only leave the second.
func seedTwoSnapshots(t *testing.T, st *store.Store, hostID, hostname string) (string, string) {
t.Helper()
a := strings.ReplaceAll(ulid.Make().String(), "-", "")
b := strings.ReplaceAll(ulid.Make().String(), "-", "")
if err := st.ReplaceHostSnapshots(context.Background(), hostID, []store.Snapshot{
{
ID: a, ShortID: a[:8], Time: time.Now().UTC().Add(-3 * time.Hour),
Hostname: hostname, Paths: []string{"/etc"}, Tags: []string{"system-config"},
},
{
ID: b, ShortID: b[:8], Time: time.Now().UTC().Add(-1 * time.Hour),
Hostname: hostname, Paths: []string{"/etc"}, Tags: []string{"system-config"},
},
}, time.Now().UTC()); err != nil {
t.Fatalf("seed snapshots: %v", err)
}
return a, b
}
// TestRestoreWizardGetRendersStep1 verifies the snapshot picker is on
// the page when no snapshot is pre-selected.
func TestRestoreWizardGetRendersStep1(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, srv, st, "rstore-host-1")
_ = seedSnapshot(t, st, hostID, "rstore-host-1")
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("GET", ts.URL+"/hosts/"+hostID+"/restore", nil)
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusOK {
t.Fatalf("status: got %d, want 200", res.StatusCode)
}
body := readBody(t, res.Body)
if !strings.Contains(body, "Restore from snapshot") {
t.Errorf("expected wizard heading; body: %s", short(body))
}
if !strings.Contains(body, "Pick a snapshot first") &&
!strings.Contains(body, "Pick the point-in-time you want to restore from") {
t.Errorf("expected step-1 prompt")
}
}
// TestRestoreWizardGetWithSnapshotPreselected verifies the deep-link
// path puts the snapshot summary card on the page.
func TestRestoreWizardGetWithSnapshotPreselected(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, srv, st, "rstore-host-2")
sid := seedSnapshot(t, st, hostID, "rstore-host-2")
cookie := loginAsAdmin(t, st)
req, _ := stdhttp.NewRequest("GET",
ts.URL+"/hosts/"+hostID+"/snapshots/"+sid+"/restore", nil)
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusOK {
t.Fatalf("status: got %d", res.StatusCode)
}
body := readBody(t, res.Body)
// The selected summary card should reference the snapshot's short ID.
if !strings.Contains(body, sid[:8]) {
t.Errorf("expected snapshot short id in body")
}
if !strings.Contains(body, "picked from") {
t.Errorf("expected 'picked from N snapshots' summary line")
}
}
// TestRestorePostRequiresSnapshot: form without snapshot_id re-renders
// with an error.
func TestRestorePostRequiresSnapshot(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, srv, st, "rstore-no-snap")
cookie := loginAsAdmin(t, st)
form := url.Values{
"snapshot_id": {""},
"target_mode": {"new_dir"},
"paths": {"/etc/foo"},
}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
t.Fatalf("status: got %d, want 422", res.StatusCode)
}
body := readBody(t, res.Body)
if !strings.Contains(body, "Pick a snapshot") {
t.Errorf("expected 'Pick a snapshot' error in body")
}
}
// TestRestorePostRequiresPaths: form with snapshot but no paths is rejected.
func TestRestorePostRequiresPaths(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, srv, st, "rstore-no-paths")
sid := seedSnapshot(t, st, hostID, "rstore-no-paths")
cookie := loginAsAdmin(t, st)
form := url.Values{
"snapshot_id": {sid},
"target_mode": {"new_dir"},
}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
t.Fatalf("status: got %d, want 422", res.StatusCode)
}
body := readBody(t, res.Body)
if !strings.Contains(body, "at least one file") {
t.Errorf("expected paths-required error")
}
}
// TestRestorePostInPlaceRequiresHostnameMatch: in-place mode with the
// wrong hostname typed re-renders + does not dispatch.
func TestRestorePostInPlaceRequiresHostnameMatch(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, token := enrolHostForUI(t, srv, st, "rstore-inplace")
sid := seedSnapshot(t, st, hostID, "rstore-inplace")
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "rstore-inplace")
_ = drainUntil(t, c, api.MsgScheduleSet)
cookie := loginAsAdmin(t, st)
form := url.Values{
"snapshot_id": {sid},
"target_mode": {"in_place"},
"paths": {"/etc/nginx/nginx.conf"},
"confirm_hostname": {"WRONG"},
}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
t.Fatalf("status: got %d, want 422", res.StatusCode)
}
// No restore command should arrive at the agent.
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
defer cancel()
for {
mt, raw, rerr := c.Read(ctx)
if rerr != nil {
break
}
if mt == websocket.MessageText && strings.Contains(string(raw), `"command.run"`) &&
strings.Contains(string(raw), `"kind":"restore"`) {
t.Fatal("unexpected restore command.run after wrong-hostname rejection")
}
}
}
// TestRestorePostHappyPathDispatches: well-formed new-directory form
// dispatches a JobRestore command.run with the expected payload + writes
// an audit row + redirects.
func TestRestorePostHappyPathDispatches(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, token := enrolHostForUI(t, srv, st, "rstore-happy")
sid := seedSnapshot(t, st, hostID, "rstore-happy")
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "rstore-happy")
_ = drainUntil(t, c, api.MsgScheduleSet)
cookie := loginAsAdmin(t, st)
form := url.Values{
"snapshot_id": {sid},
"target_mode": {"new_dir"},
"paths": {"/etc/nginx/nginx.conf", "/etc/nginx/sites-available/alfa.conf"},
}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("HX-Request", "true")
req.AddCookie(cookie)
// Don't follow redirects — we want to inspect the HX-Redirect header.
client := &stdhttp.Client{
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
return stdhttp.ErrUseLastResponse
},
}
res, err := client.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusNoContent {
t.Fatalf("status: got %d, want 204", res.StatusCode)
}
if res.Header.Get("HX-Redirect") == "" {
t.Fatal("expected HX-Redirect header pointing at the live job page")
}
// Find the dispatched command.run on the agent socket.
deadline := time.Now().Add(2 * time.Second)
var got api.Envelope
for time.Now().Before(deadline) {
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
mt, raw, rerr := c.Read(ctx)
cancel()
if rerr != nil {
break
}
if mt != websocket.MessageText {
continue
}
if !strings.Contains(string(raw), `"command.run"`) || !strings.Contains(string(raw), `"kind":"restore"`) {
continue
}
if err := json.Unmarshal(raw, &got); err != nil {
t.Fatalf("unmarshal: %v", err)
}
break
}
if got.Type != api.MsgCommandRun {
t.Fatal("never received restore command.run")
}
var cp api.CommandRunPayload
if err := got.UnmarshalPayload(&cp); err != nil {
t.Fatalf("unmarshal payload: %v", err)
}
if cp.Kind != api.JobRestore {
t.Fatalf("kind: got %q", cp.Kind)
}
if cp.Restore == nil {
t.Fatal("restore payload is nil")
}
if cp.Restore.SnapshotID != sid {
t.Fatalf("snapshot id: got %q want %q", cp.Restore.SnapshotID, sid)
}
if cp.Restore.InPlace {
t.Fatal("expected new-directory mode (in_place=false)")
}
if !strings.HasPrefix(cp.Restore.TargetDir, "$HOME/rm-restore/") {
t.Fatalf("target_dir: got %q, want prefix $HOME/rm-restore/", cp.Restore.TargetDir)
}
// <job-id> placeholder substituted with the dispatched job_id.
if !strings.Contains(cp.Restore.TargetDir, "/01") {
t.Errorf("target_dir: expected job_id substituted into the path; got %q", cp.Restore.TargetDir)
}
if len(cp.Restore.Paths) != 2 {
t.Fatalf("paths: got %d, want 2", len(cp.Restore.Paths))
}
// Audit row.
var n int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM audit_log WHERE action = 'host.restore' AND target_id = ?`,
hostID).Scan(&n); err != nil {
t.Fatalf("audit count: %v", err)
}
if n != 1 {
t.Fatalf("audit rows: got %d, want 1", n)
}
}
// TestRestorePostOfflineHostRejected: agent not connected → 503 +
// no command.run.
func TestRestorePostOfflineHostRejected(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, _ := enrolHostForUI(t, srv, st, "rstore-offline")
sid := seedSnapshot(t, st, hostID, "rstore-offline")
cookie := loginAsAdmin(t, st)
form := url.Values{
"snapshot_id": {sid},
"target_mode": {"new_dir"},
"paths": {"/etc/foo"},
}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusServiceUnavailable {
t.Fatalf("status: got %d, want 503", res.StatusCode)
}
_ = srv
}
// helpers --------------------------------------------------------------
func readBody(t *testing.T, body interface{ Read(p []byte) (int, error) }) string {
t.Helper()
buf := make([]byte, 0, 16*1024)
tmp := make([]byte, 4096)
for {
n, err := body.Read(tmp)
if n > 0 {
buf = append(buf, tmp[:n]...)
}
if err != nil {
break
}
}
return string(buf)
}
func short(s string) string {
if len(s) > 400 {
return s[:400] + "…"
}
return s
}
+1
View File
@@ -92,6 +92,7 @@ func New() (*Renderer, error) {
"templates/partials/toast.html",
"templates/partials/awaiting_agent.html",
"templates/partials/host_chrome.html",
"templates/partials/tree_node.html",
}
pageEntries, err := fs.Glob(web.FS, "templates/pages/*.html")
+16 -2
View File
@@ -54,7 +54,7 @@ func AgentHandler(deps HandlerDeps) stdhttp.Handler {
return stdhttp.HandlerFunc(func(w stdhttp.ResponseWriter, r *stdhttp.Request) {
host, ok := authenticateAgent(r, deps.Store)
if !ok {
stdhttp.Error(w, "unauthorized", stdhttp.StatusUnauthorized)
stdhttp.Error(w, "unauthorised", stdhttp.StatusUnauthorized)
return
}
@@ -204,7 +204,7 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
string(p.Status), p.ExitCode, p.Stats, errMsg, p.FinishedAt); err != nil {
slog.Warn("ws: mark job finished", "job_id", p.JobID, "err", err)
}
// repo_initialized_at projection has been removed — auto-init
// repo_initialised_at projection has been removed — auto-init
// at host enrolment makes "is the repo init'd" derivable from
// the latest init job's status, no separate column needed.
if deps.JobHub != nil {
@@ -297,6 +297,20 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
// (job.started → job.finished) is sufficient signal.
slog.Debug("ws msg not yet handled", "type", env.Type, "host_id", hostID)
case api.MsgTreeListResult:
// Reply to a synchronous tree.list RPC. Route to the waiter
// registered against the request envelope's ID; if none is
// registered the caller already gave up (ctx expired) — drop
// the stray reply quietly.
if env.ID == "" {
slog.Warn("ws: tree.list.result missing envelope ID", "host_id", hostID)
break
}
if !deps.Hub.rpcs.resolve(env.ID, env) {
slog.Debug("ws: tree.list.result with no waiter (timeout?)",
"id", env.ID, "host_id", hostID)
}
case api.MsgError:
var ep api.ErrorPayload
_ = env.UnmarshalPayload(&ep)
+6 -1
View File
@@ -21,6 +21,11 @@ import (
type Hub struct {
mu sync.RWMutex
conns map[string]*Conn // hostID → conn
// rpcs tracks in-flight synchronous RPC calls (e.g. tree.list).
// See rpc.go for details. Lazy-initialised via the registry's
// own register() so callers don't have to juggle a constructor.
rpcs rpcRegistry
}
// NewHub returns an empty hub.
@@ -100,7 +105,7 @@ func NewConn(hostID string, c *websocket.Conn) *Conn {
}
// Send writes an envelope as a JSON text message. Concurrent calls
// are serialized; the underlying socket is not safe for parallel
// are serialised; the underlying socket is not safe for parallel
// writers.
func (c *Conn) Send(ctx context.Context, env api.Envelope) error {
c.writeMu.Lock()
+112
View File
@@ -0,0 +1,112 @@
package ws
import (
"context"
"errors"
"sync"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// rpcRegistry holds in-flight synchronous RPC calls. SendRPC registers
// a channel keyed by the request envelope's ID; the WS read loop's
// dispatcher routes incoming reply envelopes to the matching channel
// when their type is one of the known reply types (currently just
// tree.list.result).
//
// A single global registry keyed by envelope ID is fine because IDs
// are ULIDs — globally unique without coordinating across hubs.
type rpcRegistry struct {
mu sync.Mutex
pending map[string]chan api.Envelope
}
// register reserves a channel for the given request ID. The channel
// is buffered (cap 1) so a slow waiter doesn't block the read loop's
// dispatcher when the reply lands.
func (r *rpcRegistry) register(id string) chan api.Envelope {
ch := make(chan api.Envelope, 1)
r.mu.Lock()
if r.pending == nil {
r.pending = make(map[string]chan api.Envelope)
}
r.pending[id] = ch
r.mu.Unlock()
return ch
}
// resolve delivers an envelope to its waiter and removes the entry.
// Returns whether a waiter was actually present (the dispatcher uses
// this to decide whether to log a stray-reply warning).
func (r *rpcRegistry) resolve(id string, env api.Envelope) bool {
r.mu.Lock()
ch, ok := r.pending[id]
if ok {
delete(r.pending, id)
}
r.mu.Unlock()
if !ok {
return false
}
// Buffered chan cap 1 — non-blocking send. The waiter goroutine
// owns the receive side so this is the only sender.
ch <- env
close(ch)
return true
}
// release abandons the entry without delivering a value. Used when
// the caller's context expires before a reply arrives — the next
// stray reply (if any) will hit the no-waiter case in resolve and
// just be dropped.
func (r *rpcRegistry) release(id string) {
r.mu.Lock()
delete(r.pending, id)
r.mu.Unlock()
}
// SendRPC sends a request envelope to the host and blocks until a
// matching reply lands or the context expires. The hub picks a fresh
// envelope ID, marshals the payload, registers a waiter, and sends.
//
// timeout caps the wait; a too-aggressive value relative to the
// expected restic-side latency will leak the registry entry until the
// reply finally arrives (which is then silently dropped). The default
// callers use is 30s, which covers a slow network round-trip plus a
// restic ls invocation against a remote rest-server.
//
// If the host disconnects mid-flight, the read loop ends and no reply
// will ever come — the caller's ctx.Done()/timeout is the only path
// out. We could pre-fail by tracking conn lifetime, but the bound
// keeps the code simple and the worst case is a 30s wait.
func (h *Hub) SendRPC(ctx context.Context, hostID string, reqType api.MessageType, payload any, timeout time.Duration) (api.Envelope, error) {
if timeout <= 0 {
timeout = 30 * time.Second
}
id := ulid.Make().String()
env, err := api.Marshal(reqType, id, payload)
if err != nil {
return api.Envelope{}, err
}
ch := h.rpcs.register(id)
if err := h.Send(ctx, hostID, env); err != nil {
h.rpcs.release(id)
return api.Envelope{}, err
}
select {
case reply := <-ch:
return reply, nil
case <-ctx.Done():
h.rpcs.release(id)
return api.Envelope{}, ctx.Err()
case <-time.After(timeout):
h.rpcs.release(id)
return api.Envelope{}, errors.New("ws rpc: timed out waiting for reply")
}
}
+122
View File
@@ -0,0 +1,122 @@
package ws
import (
"context"
"encoding/json"
"sync"
"testing"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// TestRPCRegistryRoundTrip: register a waiter, resolve it, get the
// envelope back. Cover the no-waiter and double-resolve cases too.
func TestRPCRegistryRoundTrip(t *testing.T) {
t.Parallel()
var r rpcRegistry
id := ulid.Make().String()
ch := r.register(id)
want := api.Envelope{Type: api.MsgTreeListResult, ID: id, Payload: json.RawMessage(`{"path":"/"}`)}
if !r.resolve(id, want) {
t.Fatal("resolve: returned false for registered id")
}
got := <-ch
if got.ID != id {
t.Fatalf("id mismatch: got %q want %q", got.ID, id)
}
// A second resolve for the same id has no waiter and should not panic.
if r.resolve(id, want) {
t.Fatal("resolve: returned true for already-resolved id")
}
}
// TestRPCRegistryRelease: release abandons the waiter; a subsequent
// resolve is a no-op (no goroutine leak, no panic).
func TestRPCRegistryRelease(t *testing.T) {
t.Parallel()
var r rpcRegistry
id := ulid.Make().String()
_ = r.register(id)
r.release(id)
if r.resolve(id, api.Envelope{ID: id}) {
t.Fatal("resolve after release: should be no-op")
}
}
// TestRPCRegistryConcurrent: many waiters in flight concurrently get
// only their own reply. This catches buggy keying/locking.
func TestRPCRegistryConcurrent(t *testing.T) {
t.Parallel()
var r rpcRegistry
const n = 64
ids := make([]string, n)
chs := make([]chan api.Envelope, n)
for i := 0; i < n; i++ {
ids[i] = ulid.Make().String()
chs[i] = r.register(ids[i])
}
// Resolve in random-ish order from many goroutines.
var wg sync.WaitGroup
for i := 0; i < n; i++ {
wg.Add(1)
go func(idx int) {
defer wg.Done()
r.resolve(ids[idx], api.Envelope{ID: ids[idx], Type: api.MsgTreeListResult})
}(i)
}
wg.Wait()
for i := 0; i < n; i++ {
select {
case got := <-chs[i]:
if got.ID != ids[i] {
t.Fatalf("waiter %d: got id %q want %q", i, got.ID, ids[i])
}
case <-time.After(2 * time.Second):
t.Fatalf("waiter %d: timed out", i)
}
}
}
// TestSendRPCContextCancelReleases ensures that canceling the caller's
// ctx releases the registry entry so a stray late reply is harmlessly
// dropped. Skips if the hub isn't reachable for direct access — this
// is purely a unit test on the registry path inside SendRPC.
func TestSendRPCContextCancelReleases(t *testing.T) {
t.Parallel()
h := NewHub()
// No host registered, so Hub.Send returns "host offline" and
// SendRPC bails without ever waiting. We test the timeout/ctx
// path by going through register() directly.
id := ulid.Make().String()
ch := h.rpcs.register(id)
ctx, cancel := context.WithCancel(context.Background())
go func() {
time.Sleep(20 * time.Millisecond)
cancel()
}()
// Simulate the SendRPC select: ctx wins.
select {
case <-ch:
t.Fatal("unexpected reply")
case <-ctx.Done():
h.rpcs.release(id)
}
// Now a late reply should not block (ch is still open but no
// receiver — buffered size 1 absorbs it).
resolved := h.rpcs.resolve(id, api.Envelope{ID: id})
if resolved {
t.Fatal("resolve after release should return false")
}
}
+1 -1
View File
@@ -38,7 +38,7 @@ func (s *Store) GetHostRepoStats(ctx context.Context, hostID string) (*HostRepoS
// getHostRepoStatsTx is identical to GetHostRepoStats but runs on an
// existing transaction so the fetch-merge-upsert in UpsertHostRepoStats
// is fully serialized.
// is fully serialised.
func getHostRepoStatsTx(ctx context.Context, tx *sql.Tx, hostID string) (*HostRepoStats, error) {
row := tx.QueryRowContext(ctx,
`SELECT host_id, total_size_bytes, raw_size_bytes, unique_files,
@@ -0,0 +1,61 @@
-- 0012_jobs_restore_diff_kind.sql
--
-- Add 'restore' and 'diff' to the jobs.kind CHECK constraint so the
-- restore wizard (P3-01) and the snapshot-diff endpoint (P3-09) can
-- persist their job rows. SQLite can't ALTER a CHECK in place, so we
-- rebuild the table.
--
-- Rebuild safety: jobs has an inbound FK from job_logs (ON DELETE
-- CASCADE) and from schedules.jobs is referenced via scheduled_id.
-- CLAUDE.md flags DROP TABLE on a parent as risky under
-- foreign_keys=ON; we mitigate two ways:
--
-- 1. Stash job_logs into a temp table BEFORE rebuilding jobs, then
-- restore the rows after the rebuild settles. If a cascade
-- misbehaves we can still recover.
-- 2. Use the safe rebuild order from 0005: create jobs_new with the
-- wider CHECK → copy data → DROP jobs → RENAME jobs_new TO jobs.
-- Do NOT rename the original first (the dangling-FK trap that
-- 0005's first draft hit and 0006 cleaned up).
CREATE TEMPORARY TABLE _job_logs_backup AS
SELECT job_id, seq, ts, stream, payload FROM job_logs;
CREATE TABLE jobs_new (
id TEXT PRIMARY KEY,
host_id TEXT NOT NULL REFERENCES hosts(id) ON DELETE CASCADE,
kind TEXT NOT NULL CHECK (kind IN
('backup','init','forget','prune','check','unlock','restore','diff')),
status TEXT NOT NULL CHECK (status IN ('queued','running','succeeded','failed','cancelled')),
scheduled_id TEXT REFERENCES schedules(id) ON DELETE SET NULL,
actor_kind TEXT NOT NULL CHECK (actor_kind IN ('user','schedule','system')),
actor_id TEXT,
started_at TEXT,
finished_at TEXT,
exit_code INTEGER,
stats TEXT,
error TEXT,
created_at TEXT NOT NULL
);
INSERT INTO jobs_new
SELECT id, host_id, kind, status, scheduled_id, actor_kind, actor_id,
started_at, finished_at, exit_code, stats, error, created_at
FROM jobs;
DROP TABLE jobs;
ALTER TABLE jobs_new RENAME TO jobs;
CREATE INDEX jobs_host_id ON jobs(host_id);
CREATE INDEX jobs_status ON jobs(status);
CREATE INDEX jobs_created_at ON jobs(created_at);
-- Defensive: if cascade-on-DROP wiped job_logs (it shouldn't with the
-- foreign_keys behaviour SQLite documents, but the codebase has hit
-- "lost rows" before during rebuilds), restore from the temp backup.
-- INSERT OR IGNORE so re-running is harmless.
INSERT OR IGNORE INTO job_logs (job_id, seq, ts, stream, payload)
SELECT job_id, seq, ts, stream, payload FROM _job_logs_backup;
DROP TABLE _job_logs_backup;
+56 -6
View File
@@ -233,19 +233,58 @@ Sizes: **S** = under a day, **M** = 13 days, **L** = 37 days.
## Phase 3 — Restore, alerts, audit
- [ ] **P3-01** (L) Restore wizard backend: snapshot tree browse via `restic ls --json`, path picker, target selection
- [ ] **P3-02** (L) Restore wizard UI (multi-step: host → snapshot → paths → target → confirm)
- [ ] **P3-03** (M) Restore execution: `restic restore` invocation, progress streaming
- [ ] **P3-04** (L) Cross-host restore: target agent receives a temporary scoped read credential for source host's repo (single-job, auto-revoked); UI supports source→target path remapping; warns when source paths need root and target service user is non-root
> Phase 3 is split into three independently-shippable sub-phases:
> **Restore** (P3-01..03 + P3-09 + P3-X1 cancel + P3-X2 tree-list RPC),
> **Alerts** (P3-05..07), **Audit UI** (P3-08). Each sub-phase has its own
> spec → plan → implement cycle; we hand back at sub-phase boundaries.
>
> P3-04 (cross-host restore) was de-scoped during the Phase-3 brainstorm
> on 2026-05-04: disaster recovery is already covered by re-enrolling a
> replacement host with the same repo creds (snapshots reappear, restore
> is same-host). The remaining "pull a file from host A onto host C
> without giving C permanent access" use case is genuinely different and
> doesn't have a confirmed need yet, so it's moved to the **Future /
> unscheduled** section at the end of this file.
### Phase 3 — Restore ✅
> Spec: `docs/superpowers/specs/2026-05-04-p3-restore-design.md`.
> Wireframe: `_diag/p3-restore-wizard/wireframe.html`.
> Sweep screenshots: `_diag/p3-restore-sweep/`.
> Shipped on branch `p3-restore`.
- [x] **P3-X1** (S) Cancel-job feature. `command.cancel` WS envelope; agent tracks per-job ctx.CancelFunc and kills the running `restic` subprocess via context cancel (SIGTERM, SIGKILL after 5s grace via `cmd.Cancel` + `cmd.WaitDelay`); server endpoint `POST /api/jobs/{id}/cancel` bridges UI → WS; the existing UI Cancel button on `/jobs/{id}` is now real for any running kind. Sandbox-aware: `internal/restic/cancel_{unix,windows}.go` build-tags pick SIGTERM on POSIX vs `os.Kill` on Windows (which can't deliver SIGTERM). Tests: cancel mid-run via 'sleep 30' fake-restic returns JobCancelled with exit 130 in <200ms.
- [x] **P3-X2** (S) Tree-list synchronous WS RPC. `MsgTreeList``MsgTreeListResult` with `Envelope.ID` correlation; generic `Hub.SendRPC` helper (registry of buffered channels keyed by ULID, ctx-cancel + timeout aware). `internal/restic.ListTreeChildren` wraps `restic ls --json` and filters its recursive output to direct children. Server-side `treeCache` is per-wizard-session (keyed by session cookie + host + snapshot + path) with a 30-min TTL and lazy sweep.
- [x] **P3-01** (L) Restore wizard backend (`internal/server/http/ui_restore.go`). GET handlers render the four-step wizard against the wireframe. HTMX/fetch tree partial endpoint hits `fetchTreeWithCache`. POST validates: snapshot_id, ≥1 absolute path, in-place ⇒ confirm_hostname == host name, agent online; on error re-renders with operator's input intact. Happy path mints job_id, target = `/var/lib/restic-manager/restore/<job-id>` (server-picked, agent's writable dir under the systemd sandbox's `ReadWritePaths`), creates job row, ships `command.run` with `RestorePayload`, writes `host.restore` audit row, returns HX-Redirect (or 303) to the live job page.
- [x] **P3-02** (L) Wizard UI templates (`web/templates/pages/host_restore.html` + `partials/tree_node.html`). Single-page progressively-enabled four-step form. Form-state-driven JS computes a running tally + step-4 confirm summary client-side. Tree expansion uses plain fetch (not HTMX) for simpler target lookup; loaded-state cached per node. Top-level Restore button on host detail right rail + per-snapshot Restore action on snapshot rows. New `.snap-row` token in `web/styles/input.css`.
- [x] **P3-03** (M) Restore execution. `restic.RunRestore` builds `restore <sid> --target <dir> [--include p]...` with --json; new `pumpRestoreStdout` parses status + summary objects. `--no-ownership` is gated on the agent's restic version via `Env.AtLeastVersion(0, 17)` — the flag was added in 0.17 and 0.16 rejects it. Restic version is threaded through `runner.Config.ResticVersion` from the agent's sysinfo snapshot. New-dir target is operator-editable (default `$HOME/rm-restore/<job-id>/`); agent expands `$HOME` / `${HOME}` / `~/` at run time and calls `os.MkdirAll` on the target chain so the operator never has to pre-create the per-job subdir. `runner.RunRestore` translates `RestoreStatus` into `job.progress` (mapping FilesRestored → FilesDone, etc.); agent dispatcher case `JobRestore` reuses the `spawn()` helper from P3-X1 so cancel works. Restore-shaped job-detail variant with current-file display under the progress bar.
- [x] **P3-09** (S) `diff` between two snapshots. `JobDiff` JobKind + `restic.RunDiff` + `runner.RunDiff`; `POST /api/hosts/{id}/snapshots/diff` (and HTMX-form variant on the unprefixed path) dispatcher with two-snapshot guard + per-host snapshot-list validation; UI panel on host detail right rail (visible when 2+ snapshots) with two short-id inputs + Diff button. Output streams as log.stream to the standard live job log page.
- [x] **P3-X3** (S) Recent-restores line on host detail. `hostChromeData` grows `RestoreStatus` / `RestoreAt` / `RestoreJobID` populated via `store.LatestJobByKind(host_id, 'restore')` (already exists from P2R). `host_chrome.html` renders a small line below the init-status one with status-coloured copy + a link to the job log. Hidden when no restore has ever run on this host.
- [x] **P3-X4** (S) Job log download (txt + ndjson). New `GET /api/jobs/{id}/log.{txt|ndjson}` endpoint backed by the persisted `job_logs` table — works any time (running or finished) without pausing the live WS stream because the source is the DB, not the live socket. Plain-text format mirrors the on-screen "HH:MM:SS.mmm TAG payload" shape with a small `# job ... · kind ... · status ...` header; ndjson emits one self-contained `{seq,ts,stream,payload}` JSON object per line for `jq` / tooling. Surfaced as a single header dropdown on the live job page (`details/summary`-driven, native keyboard support, click-outside-to-close). New reusable `.dropdown` / `.dropdown-menu` / `.dropdown-item` tokens in `web/styles/input.css`.
- [x] **P3-X5** (S) UK lint locale + sweep. `.golangci.yml` misspell locale switched US → UK and the codebase swept (~73 corrections — behaviour, serialise, recognise, honour, initialise, enrol, unauthorised, etc.). Wire `ErrorCode` value `"unauthorized"``"unauthorised"` is a tiny contract change but the agent doesn't parse those codes today and no external clients exist yet.
- [x] **P3-X6** (S) Snapshot SIZE/FILES tooltip on host detail. The per-snapshot summary block was added by restic 0.17 (the source comment in `internal/restic/snapshots.go` incorrectly said 0.16+); on 0.16 hosts the columns render `—`. `hostDetailPage.LegacyRestic` (computed via `Env.AtLeastVersion(0, 17)`) drives a `title="Needs restic 0.17+ on the agent host. This host runs <ver>."` + `cursor: help` on the column headers, hidden once the host upgrades.
> **Migration 0012** widens the `jobs.kind` CHECK constraint to include `restore` and `diff`. Rebuild required (SQLite can't ALTER CHECK in place); follows the safe pattern from 0005, with a defensive temp-table backup of `job_logs` so the cascade-trap that bit migration 0007 wouldn't take the log history with it.
> **install.sh + systemd unit:** the install script now pre-creates `/root/rm-restore` (root-owned 0700) so the default new-dir restore target works under the sandbox out of the box; the unit's `ReadWritePaths` gains `-/root/rm-restore` (soft-fail prefix). Existing installs need a re-run of `install.sh` to pick up the new dir; new operator-typed targets are auto-created by the agent at job time.
> **As shipped (Playwright sweep against the live smoke env, 2026-05-04):** login → host detail → Restore button → wizard step 1 picks snapshot a1ac4006 (most recent) → tree drill-down `/home/steve/test` (3 lazy loads) → tick `file1` + `file2` → step 4 confirm summary populated → dispatch → live job page with running progress widget → restore succeeds, files land on disk at `/root/rm-restore/<job-id>/home/steve/test/file{1,2}` (default `$HOME/rm-restore/<job-id>/` after agent-side expansion). Custom-target restore to `/tmp/custom-restore/<job-id>/` lands inside the agent's `PrivateTmp` namespace. Snapshot diff between `a1ac4006` and `5f78c788` → diff job page, statistics output streamed (738 bytes added, 0 removed). Recent-restores line on host detail reads "last restore · succeeded 28s ago · job log →". Download dropdown serves both `.txt` and `.ndjson` with correct `Content-Type` + `Content-Disposition`. SIZE/FILES tooltip "Needs restic 0.17+ on the agent host. This host runs 0.16.4." renders on column hover.
### Phase 3 — Alerts (not started)
- [ ] **P3-05** (M) Alert engine: rule evaluation loop (failed backup, stale schedule, agent offline, check failed)
- [ ] **P3-06** (M) Notification channels: webhook, ntfy, SMTP email
- [ ] **P3-07** (S) Alert UI: list, acknowledge, resolve
### Phase 3 — Audit log UI (not started)
- [ ] **P3-08** (S) Audit log UI with filters (user, action, target, time range)
- [ ] **P3-09** (S) `diff` between two snapshots in UI
### Phase 3 acceptance
- A file deleted on a host can be restored from the UI in under 2 minutes. A failed backup raises an alert via the configured channel within 60s.
- A file deleted on a host can be restored from the UI in under 2 minutes via the wizard at `/hosts/{id}/restore`; the operator can cancel a running restore (or any other running job) from the live job page. Snapshot diff between two snapshots renders as a normal job page.
- A failed backup raises an alert via the configured channel within 60s.
- The audit-log UI lets an admin filter by user / action / target / time range.
---
@@ -290,3 +329,14 @@ Sizes: **S** = under a day, **M** = 13 days, **L** = 37 days.
- [ ] **X-03** Periodic dependency updates (`dependabot` or `renovate`)
- [ ] **X-04** Threat-model review at end of each phase
- [ ] **X-05** Proper first-run onboarding UI: admin shouldn't need to `curl` `/api/bootstrap` by hand. Render the bootstrap form on the same login page (extra "setup token" field shown only while no admin user exists, hidden after); on submit POST to `/api/bootstrap`, then drop straight into a session. Surface the one-time token from the server log somewhere copy-able (or print a clickable URL with the token in the query string at first-run). Also: relax the 12-char password floor for the first-run path or document it in the form so `admin` doesn't silently fail validation.
---
## Future / unscheduled
> Items here have a plausible use case but no confirmed need. They live
> outside numbered phases until a concrete trigger (a user request, a
> security review finding, a real disaster-recovery exercise) bumps them
> back into a phase.
- [ ] **F-01** ~~P3-04~~ Cross-host restore. De-scoped from Phase 3 on 2026-05-04. Disaster recovery is already covered: stand up a replacement host, paste the original repo creds at enrolment, snapshots reappear, restore is same-host. The remaining "pull a file from host A onto host C without granting C permanent access" use case is genuinely different (file sharing / migration, not DR) and hasn't been requested. Original spec language was: "target agent receives a temporary scoped read credential for source host's repo (single-job, auto-revoked); UI supports source→target path remapping; warns when source paths need root and target service user is non-root". Re-promote when there's a real ask.
File diff suppressed because one or more lines are too long
+72
View File
@@ -206,6 +206,78 @@
.src-row.clickable > .row-link { pointer-events: auto; }
.src-row.clickable > .row-action { pointer-events: auto; }
/* ---------- dropdown menu (header actions) ----------
* Uses native <details><summary> for keyboard + no-JS support.
* The summary is styled like a .btn, the panel sits absolute below.
* Click-outside-to-close handled by CSS via :has() no JS.
*/
.dropdown { position: relative; display: inline-block; }
.dropdown summary {
list-style: none; cursor: pointer;
/* match .btn shape */
font-size: 12px; font-weight: 500;
padding: 6px 11px; border-radius: 5px;
background: transparent;
border: 1px solid var(--line);
color: var(--ink-mid);
transition: all 120ms ease;
display: inline-flex; align-items: center; gap: 6px;
user-select: none;
}
.dropdown summary::-webkit-details-marker { display: none; }
.dropdown summary::marker { content: ""; }
.dropdown summary:hover { background: var(--panel-hi); color: var(--ink); }
.dropdown summary .chev {
font-size: 9px; color: var(--ink-fade);
transition: transform 120ms ease;
}
.dropdown[open] summary .chev { transform: rotate(180deg); }
.dropdown[open] summary { background: var(--panel-hi); color: var(--ink); }
.dropdown-menu {
position: absolute; top: calc(100% + 4px); right: 0;
z-index: 30;
min-width: 220px;
background: var(--panel);
border: 1px solid var(--line);
border-radius: 6px;
box-shadow: 0 6px 24px -8px rgba(0,0,0,0.55);
padding: 4px;
}
.dropdown-item {
display: block;
padding: 8px 11px;
border-radius: 4px;
text-decoration: none;
color: var(--ink-mid);
font-size: 12.5px;
line-height: 1.35;
}
.dropdown-item:hover { background: var(--panel-hi); color: var(--ink); }
.dropdown-item .label { display: block; color: var(--ink); font-weight: 500; }
.dropdown-item .hint {
display: block; font-size: 11px; color: var(--ink-mute); margin-top: 2px;
font-family: 'JetBrains Mono', ui-monospace, monospace;
}
/* ---------- snapshot picker rows (Restore wizard step 1) ---------- */
.snap-row {
display: grid; align-items: center;
grid-template-columns: 150px 130px 1fr 90px 130px 80px;
column-gap: 16px;
padding: 11px 14px; font-size: 13px;
border-bottom: 1px solid var(--line-soft);
cursor: pointer;
transition: background 100ms ease;
}
.snap-row:last-child { border-bottom: 0; }
.snap-row:hover { background: var(--panel-hi); }
.snap-row.head {
font-size: 11px; color: var(--ink-fade);
text-transform: uppercase; letter-spacing: 0.08em;
padding-top: 9px; padding-bottom: 9px; cursor: default;
}
.snap-row.head:hover { background: transparent; }
/* ---------- schedule rows (Schedules tab) ---------- */
.schd-row {
display: grid; align-items: center;
+34 -3
View File
@@ -35,8 +35,10 @@
<div>Snapshot id</div>
<div>Time</div>
<div>Paths</div>
<div class="text-right">Size</div>
<div class="text-right">Files</div>
<div class="text-right{{if $page.LegacyRestic}} cursor-help{{end}}"
{{if $page.LegacyRestic}}title="Needs restic 0.17+ on the agent host. This host runs {{$host.ResticVersion}}."{{end}}>Size</div>
<div class="text-right{{if $page.LegacyRestic}} cursor-help{{end}}"
{{if $page.LegacyRestic}}title="Needs restic 0.17+ on the agent host. This host runs {{$host.ResticVersion}}."{{end}}>Files</div>
<div></div>
</div>
@@ -51,7 +53,7 @@
{{if eq $s.FileCount 0}}<span class="text-ink-fade"></span>{{else}}{{comma $s.FileCount}}{{end}}
</div>
<div class="text-right">
<button class="btn btn-ghost" disabled title="restore wizard lands in P3">Restore →</button>
<a href="/hosts/{{$host.ID}}/snapshots/{{$s.ID}}/restore" class="btn">Restore →</a>
</div>
</div>
{{end}}
@@ -76,6 +78,35 @@
</p>
</div>
<div class="panel rounded-[7px] px-4 py-3.5">
<div class="text-[11px] text-ink-fade uppercase tracking-[0.1em] mb-2.5">Restore</div>
<p class="text-[12px] text-ink-mute leading-[1.55] mb-3">
Pick a snapshot, choose paths, dispatch. Live progress streams once the
agent starts.
</p>
<a href="/hosts/{{$host.ID}}/restore"
class="btn btn-block">Restore from snapshot…</a>
</div>
{{if gt $host.SnapshotCount 1}}
<div class="panel rounded-[7px] px-4 py-3.5">
<div class="text-[11px] text-ink-fade uppercase tracking-[0.1em] mb-2.5">Compare snapshots</div>
<p class="text-[12px] text-ink-mute leading-[1.55] mb-3">
Diff two snapshots to see what changed. Output streams to a live
job page like a regular run.
</p>
<form method="post" action="/hosts/{{$host.ID}}/snapshots/diff"
hx-post="/hosts/{{$host.ID}}/snapshots/diff" hx-swap="none"
class="space-y-2">
<input type="text" name="snapshot_a" placeholder="snapshot A id"
class="field mono text-[11.5px]" />
<input type="text" name="snapshot_b" placeholder="snapshot B id"
class="field mono text-[11.5px]" />
<button type="submit" class="btn btn-block">Diff →</button>
</form>
</div>
{{end}}
<div class="panel rounded-[7px] px-4 py-3.5">
<div class="text-[11px] text-bad uppercase tracking-[0.1em] font-semibold mb-2.5">Danger zone</div>
<p class="text-pretty text-[12px] text-ink-mute leading-[1.55] mb-3">
+380
View File
@@ -0,0 +1,380 @@
{{define "title"}}{{.Title}}{{end}}
{{define "content"}}
{{template "host_chrome" .}}
{{$page := .Page}}
{{$host := $page.Host}}
<div class="max-w-[1280px] mx-auto px-8 pt-6 pb-14">
<div class="flex items-baseline justify-between mb-4">
<div>
<h2 class="text-[19px] font-medium tracking-[-0.005em]">Restore from snapshot</h2>
<div class="text-[12.5px] text-ink-mute mt-1">
Pick a snapshot, choose paths, decide where files go, then dispatch.
Live progress streams to a job page once you start.
</div>
</div>
<div class="flex gap-2">
<a href="/hosts/{{$host.ID}}" class="btn">Cancel</a>
</div>
</div>
{{if $page.Error}}
<div class="rounded-[6px] px-3.5 py-3 text-[13px] mb-4"
style="border: 1px solid color-mix(in oklch, var(--bad), transparent 60%); background: color-mix(in oklch, var(--bad), transparent 92%);">
{{$page.Error}}
</div>
{{end}}
<form method="post" action="/hosts/{{$host.ID}}/restore" id="restore-form" class="space-y-4">
{{/* ============ STEP 1 — snapshot picker ============ */}}
<section class="rounded-[8px] border border-line-soft bg-panel overflow-hidden">
<header class="flex items-center justify-between px-[18px] py-[14px] border-b border-line-soft"
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
<div class="flex items-center gap-3">
{{if $page.Selected}}
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
style="background: color-mix(in oklch, var(--ok), transparent 86%); color: var(--ok); border: 1px solid color-mix(in oklch, var(--ok), transparent 60%);">✓</span>
{{else}}
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
style="background: color-mix(in oklch, var(--accent), transparent 84%); color: var(--accent); border: 1px solid color-mix(in oklch, var(--accent), transparent 50%);">1</span>
{{end}}
<div>
<div class="text-[14px] font-medium">Snapshot</div>
<div class="text-[12px] text-ink-mute mt-0.5">Pick the point-in-time you want to restore from.</div>
</div>
</div>
<span class="mono text-[11px] text-ink-fade">step 1 of 4</span>
</header>
<div class="p-[18px]">
{{if $page.Selected}}
{{/* selected summary card */}}
<div class="grid items-center gap-4 px-3.5 py-3 rounded-[6px] bg-bg border border-line-soft"
style="grid-template-columns: auto 1fr auto auto;">
<span class="mono text-[12px] text-accent">{{$page.Selected.ShortID}}</span>
<div>
<div class="text-[13px] text-ink">{{$page.Selected.Time.Format "2006-01-02 15:04 MST"}} <span class="text-ink-fade mx-2">·</span><span class="text-ink-mute">{{relTime $page.Selected.Time}}</span></div>
<div class="mt-1 text-[12px] text-ink-mute">
{{range $page.Selected.Tags}}<span class="tag mr-1.5">{{.}}</span>{{end}}
paths:
{{range $i, $p := $page.Selected.Paths}}{{if $i}}, {{end}}<span class="mono text-ink-mid">{{$p}}</span>{{end}}
{{if $page.Selected.SizeBytes}} · {{bytes $page.Selected.SizeBytes}}{{end}}
</div>
</div>
<span class="text-ink-fade text-[12px]">picked from {{len $page.Snapshots}} snapshots</span>
<a href="/hosts/{{$host.ID}}/restore" class="btn">Change</a>
</div>
<input type="hidden" name="snapshot_id" value="{{$page.Selected.ID}}" />
{{else}}
{{/* full picker table */}}
<div class="rounded-[6px] border border-line-soft bg-bg overflow-hidden">
<div class="snap-row head">
<div>Time</div>
<div>Tag</div>
<div>Paths</div>
<div>Size</div>
<div>Snapshot ID</div>
<div></div>
</div>
{{if not $page.Snapshots}}
<div class="px-4 py-8 text-center text-ink-mute text-[13px]">No snapshots yet. Run a backup first.</div>
{{end}}
{{range $page.Snapshots}}
<a href="/hosts/{{$host.ID}}/snapshots/{{.ID}}/restore" class="snap-row" style="text-decoration: none; color: inherit;">
<div class="mono text-ink-mid">{{relTime .Time}}</div>
<div>{{range .Tags}}<span class="tag">{{.}}</span>{{end}}</div>
<div class="text-ink-mute" style="overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">
{{range $i, $p := .Paths}}{{if $i}}, {{end}}<span class="mono text-ink-mid">{{$p}}</span>{{end}}
</div>
<div class="mono text-ink-mid">{{if .SizeBytes}}{{bytes .SizeBytes}}{{else}}—{{end}}</div>
<div class="mono text-ink-mid">{{.ShortID}}</div>
<div></div>
</a>
{{end}}
</div>
{{end}}
</div>
</section>
{{/* ============ STEP 2 — paths (tree browser) ============ */}}
<section class="rounded-[8px] border border-line-soft bg-panel overflow-hidden {{if not $page.Selected}}opacity-40 pointer-events-none{{end}}">
<header class="flex items-center justify-between px-[18px] py-[14px] border-b border-line-soft"
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
<div class="flex items-center gap-3">
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
style="{{if $page.Selected}}background: color-mix(in oklch, var(--accent), transparent 84%); color: var(--accent); border: 1px solid color-mix(in oklch, var(--accent), transparent 50%);{{else}}background: var(--bg); color: var(--ink-mute); border: 1px solid var(--line);{{end}}">2</span>
<div>
<div class="text-[14px] font-medium">Paths</div>
<div class="text-[12px] text-ink-mute mt-0.5">Tick files and directories to restore. Folders restore recursively.</div>
</div>
</div>
<span class="mono text-[11px] text-ink-fade">step 2 of 4</span>
</header>
<div class="p-[18px]">
{{if $page.Selected}}
<div class="rounded-[6px] border border-line-soft bg-bg overflow-hidden p-2">
{{/* Root tree node — fetched on first wizard render; child
expansions reuse the same tree.list cache server-side. */}}
<div id="tree-root">
<div class="text-ink-mute text-[12.5px] mono px-3 py-2">loading…</div>
</div>
</div>
<script>
(function() {
fetch('/hosts/{{$host.ID}}/restore/tree?snapshot={{$page.Selected.ID}}&path=/', { credentials: 'same-origin' })
.then(function(r) { return r.text(); })
.then(function(html) {
document.getElementById('tree-root').innerHTML = html;
document.body.dispatchEvent(new CustomEvent('tree:loaded'));
});
})();
</script>
<div class="mt-3 px-3.5 py-2.5 rounded-[6px] text-[12.5px]"
style="border: 1px solid color-mix(in oklch, var(--accent), transparent 70%); background: color-mix(in oklch, var(--accent), transparent 92%);">
<span class="text-accent" id="tally-count">0 files selected</span>
<span class="text-ink-fade mx-2">·</span>
<span class="text-ink-mute mono" id="tally-paths">tick a file or directory above</span>
</div>
{{else}}
<div class="text-ink-mute text-[13px]">Pick a snapshot above to load its paths.</div>
{{end}}
</div>
</section>
{{/* ============ STEP 3 — target ============ */}}
<section class="rounded-[8px] border border-line-soft bg-panel overflow-hidden {{if not $page.Selected}}opacity-40 pointer-events-none{{end}}">
<header class="flex items-center justify-between px-[18px] py-[14px] border-b border-line-soft"
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
<div class="flex items-center gap-3">
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
style="background: color-mix(in oklch, var(--accent), transparent 84%); color: var(--accent); border: 1px solid color-mix(in oklch, var(--accent), transparent 50%);">3</span>
<div>
<div class="text-[14px] font-medium">Target</div>
<div class="text-[12px] text-ink-mute mt-0.5">Where should the files land? Defaults to a fresh, isolated directory.</div>
</div>
</div>
<span class="mono text-[11px] text-ink-fade">step 3 of 4</span>
</header>
<div class="p-[18px]">
<div class="grid grid-cols-2 gap-3.5">
<label class="block rounded-[7px] p-4 cursor-pointer transition border target-card-new"
id="target-new-card"
style="border-color: color-mix(in oklch, var(--accent), transparent 50%); background: color-mix(in oklch, var(--accent), transparent 95%);">
<div class="flex items-start gap-3">
<input type="radio" name="target_mode" value="new_dir" class="mt-1" {{if not $page.FormInPlace}}checked{{end}} />
<div class="flex-1">
<div class="text-[14px] font-medium text-ink">New directory</div>
<div class="text-[12px] text-ink-mute mt-1 leading-[1.55]">
Files restore into a fresh path on the host. Original files untouched.
Restored as the agent user — original uid/gid is dropped (restic ≥ 0.17;
older versions preserve it).
</div>
<div class="mt-3 flex items-center gap-2.5">
<span class="text-ink-fade mono text-[12px]"></span>
<input type="text" name="target_dir" id="target-dir-input"
class="field mono text-[12px] flex-1"
value="{{if $page.FormTargetDir}}{{$page.FormTargetDir}}{{else}}{{$page.DefaultTargetDir}}{{end}}"
placeholder="$HOME/rm-restore/&lt;job-id&gt;/" />
</div>
<div class="text-[11.5px] text-ink-fade mt-1.5">
<span class="mono">$HOME</span> resolves to the agent user's home;
<span class="mono">&lt;job-id&gt;</span> is substituted on dispatch.
Edit if you want a specific directory.
</div>
</div>
</div>
</label>
<label class="block rounded-[7px] p-4 cursor-pointer transition border target-card-inplace"
id="target-inplace-card"
style="border-color: color-mix(in oklch, var(--bad), transparent 70%); background: color-mix(in oklch, var(--bad), transparent 96%);">
<div class="flex items-start gap-3">
<input type="radio" name="target_mode" value="in_place" class="mt-1" {{if $page.FormInPlace}}checked{{end}} />
<div class="flex-1">
<div class="text-[14px] font-medium">
<span class="text-bad">In place</span>
<span class="text-ink-mute font-normal">— overwrite original paths</span>
</div>
<div class="text-[12px] text-ink-mute mt-1 leading-[1.55]">
Files replace whatever is at their original paths.
Original ownership and permissions are preserved.
<span class="text-bad">Destructive — cannot be undone.</span>
</div>
<div class="mt-3 px-3 py-3 rounded-[5px]"
style="background: color-mix(in oklch, var(--bad), transparent 92%); border: 1px solid color-mix(in oklch, var(--bad), transparent 60%);">
<div class="text-[11px] text-bad uppercase tracking-[0.08em] font-medium">Confirm host name</div>
<div class="text-[11.5px] text-ink-mute mt-1 leading-[1.55]">
Type <span class="mono text-ink">{{$host.Name}}</span> to enable this option.
</div>
<input type="text" name="confirm_hostname" class="field mono mt-2"
placeholder="{{$host.Name}}"
value="{{$page.FormConfirmHN}}" />
</div>
</div>
</div>
</label>
</div>
</div>
</section>
{{/* ============ STEP 4 — confirm ============ */}}
<section class="rounded-[8px] border border-line-soft bg-panel overflow-hidden {{if not $page.Selected}}opacity-40 pointer-events-none{{end}}">
<header class="flex items-center justify-between px-[18px] py-[14px]"
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
<div class="flex items-center gap-3">
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
style="background: color-mix(in oklch, var(--accent), transparent 84%); color: var(--accent); border: 1px solid color-mix(in oklch, var(--accent), transparent 50%);">4</span>
<div>
<div class="text-[14px] font-medium">Confirm &amp; start</div>
<div class="text-[12px] text-ink-mute mt-0.5">Final review. Logs and progress will stream live.</div>
</div>
</div>
<span class="mono text-[11px] text-ink-fade">step 4 of 4</span>
</header>
<div class="px-[18px] pb-[18px]" id="confirm-summary">
<div class="text-[12px] text-ink-mute py-2">A summary will appear here once you've made your selections.</div>
</div>
</section>
{{/* sticky-style action bar */}}
<div class="rounded-[8px] border border-line-soft px-[18px] py-[14px] flex items-center justify-between"
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
<div class="text-[12.5px] text-ink-mute">
Audit row <span class="mono text-ink-mid">host.restore</span> will be written on dispatch.
</div>
<div class="flex items-center gap-2.5">
<a href="/hosts/{{$host.ID}}" class="btn">Back</a>
<button type="submit" id="dispatch-btn" class="btn btn-primary btn-lg" {{if not $page.Online}}disabled title="agent is offline"{{end}}>
Start restore →
</button>
</div>
</div>
</form>
</div>
{{/* Lightweight JS to drive the live tally + summary card + tree toggle.
The tree-toggle is plain fetch (not HTMX) so its target lookup is
trivial — the .tree-children div is always the next sibling
inside the same .tree-pair wrapper. */}}
<script>
window.__rmTreeToggle = function(btn) {
var pair = btn.closest('.tree-pair');
if (!pair) return;
var kids = pair.querySelector(':scope > .tree-children');
if (!kids) return;
var loaded = btn.getAttribute('data-loaded') === 'true';
if (!loaded) {
var url = btn.getAttribute('data-tree-url');
btn.disabled = true;
fetch(url, { credentials: 'same-origin' })
.then(function(r) { return r.text(); })
.then(function(html) {
kids.innerHTML = html;
kids.classList.remove('hidden');
btn.textContent = '▾';
btn.setAttribute('data-loaded', 'true');
btn.disabled = false;
// Notify the wizard's recompute() that tally state may have changed.
document.body.dispatchEvent(new CustomEvent('tree:loaded'));
})
.catch(function(e) {
kids.innerHTML = '<div class="px-3 py-2 mono text-[12px] text-bad">load failed: ' + e + '</div>';
kids.classList.remove('hidden');
btn.textContent = '▾';
btn.disabled = false;
});
return;
}
kids.classList.toggle('hidden');
btn.textContent = kids.classList.contains('hidden') ? '▸' : '▾';
};
(function() {
const form = document.getElementById('restore-form');
if (!form) return;
const tallyCount = document.getElementById('tally-count');
const tallyPaths = document.getElementById('tally-paths');
const dispatchBtn = document.getElementById('dispatch-btn');
const summary = document.getElementById('confirm-summary');
const inplaceRadio = document.querySelector('input[name="target_mode"][value="in_place"]');
const newRadio = document.querySelector('input[name="target_mode"][value="new_dir"]');
const newCard = document.getElementById('target-new-card');
const inplaceCard = document.getElementById('target-inplace-card');
const confirmInput = document.querySelector('input[name="confirm_hostname"]');
const hostName = {{$host.Name | js}};
const defaultTarget = {{$page.DefaultTargetDir | js}};
const selectedSnapID = {{if $page.Selected}}{{$page.Selected.ShortID | js}}{{else}}""{{end}};
const selectedSnapTime = {{if $page.Selected}}{{$page.Selected.Time.Format "2006-01-02 15:04 MST" | js}}{{else}}""{{end}};
function getCheckedPaths() {
return Array.from(form.querySelectorAll('input[name="paths"]:checked')).map(i => i.value);
}
function recompute() {
const paths = getCheckedPaths();
const count = paths.length;
if (tallyCount) tallyCount.textContent = count + ' file' + (count === 1 ? '' : 's') + ' selected';
if (tallyPaths) {
tallyPaths.textContent = count === 0 ? 'tick a file or directory above'
: paths.slice(0, 4).join(' · ') + (count > 4 ? ' …' : '');
}
// Card emphasis on radio change
if (newCard && inplaceCard && inplaceRadio && newRadio) {
const isInPlace = inplaceRadio.checked;
newCard.style.borderColor = isInPlace ? 'var(--line-soft)' : 'color-mix(in oklch, var(--accent), transparent 50%)';
newCard.style.background = isInPlace ? 'var(--bg)' : 'color-mix(in oklch, var(--accent), transparent 95%)';
inplaceCard.style.borderColor = isInPlace ? 'color-mix(in oklch, var(--bad), transparent 35%)' : 'color-mix(in oklch, var(--bad), transparent 70%)';
inplaceCard.style.background = isInPlace ? 'color-mix(in oklch, var(--bad), transparent 90%)' : 'color-mix(in oklch, var(--bad), transparent 96%)';
}
// Dispatch button state
if (dispatchBtn) {
const inPlace = inplaceRadio && inplaceRadio.checked;
const okConfirm = !inPlace || (confirmInput && confirmInput.value.trim() === hostName);
const enabled = count > 0 && okConfirm;
dispatchBtn.disabled = !enabled || !{{if $page.Online}}true{{else}}false{{end}};
dispatchBtn.textContent = inPlace ? 'Start restore (overwrite) →' : 'Start restore →';
if (inPlace) dispatchBtn.classList.add('btn-danger'); else dispatchBtn.classList.remove('btn-danger');
}
// Summary card
if (summary) {
if (count === 0) {
summary.innerHTML = '<div class="text-[12px] text-ink-mute py-2">A summary will appear here once you\'ve made your selections.</div>';
} else {
const inPlace = inplaceRadio && inplaceRadio.checked;
const escTarget = defaultTarget
.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
const targetLine = inPlace
? '<span class="text-bad">in place · originals will be overwritten</span>'
: '<span class="text-ink">New directory</span> <span class="text-ink-fade mx-2">·</span> <span class="mono text-ink-mid">' + escTarget + '</span>';
const ownLine = 'preserved (uid/gid/mode/mtime)';
const pathLines = paths.slice(0, 12).map(p => '<div>' + p + '</div>').join('');
const more = paths.length > 12 ? ('<div class="text-ink-fade">… and ' + (paths.length - 12) + ' more</div>') : '';
summary.innerHTML = `
<div class="rounded-[6px] border border-line-soft p-3.5 bg-bg">
<div class="grid gap-y-2.5" style="grid-template-columns: 140px 1fr; column-gap: 18px; font-size: 13px;">
<span class="text-[11px] text-ink-fade uppercase tracking-[0.08em] pt-0.5">Source</span>
<div>snapshot <span class="mono text-accent">${selectedSnapID}</span> · <span class="text-ink-mid">${selectedSnapTime}</span></div>
<span class="text-[11px] text-ink-fade uppercase tracking-[0.08em] pt-0.5">Paths</span>
<div>
<span class="text-ink">${count} file${count === 1 ? '' : 's'}</span>
<div class="mono text-[11.5px] text-ink-mute mt-1.5 leading-[1.7]">${pathLines}${more}</div>
</div>
<span class="text-[11px] text-ink-fade uppercase tracking-[0.08em] pt-0.5">Target</span>
<div>${targetLine}</div>
<span class="text-[11px] text-ink-fade uppercase tracking-[0.08em] pt-0.5">Ownership</span>
<div class="text-ink-mute">${ownLine}</div>
</div>
</div>
`;
}
}
}
// Recompute on any change in the form (path checks, radio swap, typed-confirm).
form.addEventListener('change', recompute);
form.addEventListener('input', recompute);
// Also after HTMX swaps in tree fragments (so initial state is right).
document.body.addEventListener('htmx:afterSwap', recompute);
recompute();
})();
</script>
{{end}}
+48 -3
View File
@@ -63,6 +63,22 @@
</div>
</div>
<div class="flex items-center gap-2">
<details class="dropdown" id="download-menu">
<summary>
Download log
<span class="chev"></span>
</summary>
<div class="dropdown-menu">
<a class="dropdown-item" href="/api/jobs/{{$job.ID}}/log.txt">
<span class="label">Plain text</span>
<span class="hint">.txt · for humans / paste into a ticket</span>
</a>
<a class="dropdown-item" href="/api/jobs/{{$job.ID}}/log.ndjson">
<span class="label">JSON Lines</span>
<span class="hint">.ndjson · pipe into jq / tooling</span>
</a>
</div>
</details>
{{if $page.IsActive}}
<button class="btn btn-danger" id="cancel-btn"
hx-post="/api/jobs/{{$job.ID}}/cancel"
@@ -71,14 +87,24 @@
<a href="/hosts/{{$host.ID}}" class="btn">Back to host</a>
{{end}}
</div>
<script>
// Close the download dropdown when clicking outside it.
(function() {
var dd = document.getElementById('download-menu');
if (!dd) return;
document.addEventListener('click', function(e) {
if (dd.open && !dd.contains(e.target)) dd.open = false;
});
})();
</script>
</div>
{{/* ---------- progress (running only) ---------- */}}
{{if $page.IsActive}}
<div class="mt-7" id="progress-block">
<div class="mt-7 panel rounded-[8px] p-[18px]" id="progress-block">
<div class="flex items-center justify-between mb-2.5">
<div class="flex items-center gap-3 text-sm">
<span class="mono text-ink font-medium" id="progress-pct"></span>
<div class="flex items-center gap-3.5 text-sm">
<span class="mono text-ink font-medium" id="progress-pct" style="font-size: 18px;"></span>
<span class="text-ink-mute" id="progress-bytes"></span>
</div>
<div class="text-sm text-ink-mute" id="progress-rate"></div>
@@ -86,6 +112,12 @@
<div class="progress-track">
<div class="progress-fill" id="progress-fill" style="width: 0%;"></div>
</div>
{{if eq (printf "%s" $job.Kind) "restore"}}
<div class="mt-3 text-[12px] text-ink-mute" id="restore-current-block">
<span class="text-ink-fade uppercase tracking-[0.08em] text-[10.5px]">Current</span>
<span class="mono text-ink-mid ml-2.5" id="restore-current-file"></span>
</div>
{{end}}
</div>
{{end}}
@@ -194,6 +226,18 @@
return (i === 0 ? n.toFixed(0) : n.toFixed(1)) + ' ' + u[i];
}
const currentFileEl = document.getElementById('restore-current-file');
function maybeUpdateCurrent(p) {
// Restore-specific: surface the most recent stdout path in the
// "Current" slot. Restic restore --json prints per-file lines on
// stdout (no JSON wrapper) so any line starting with "/" is a
// good candidate.
if (!currentFileEl || p.stream !== 'stdout') return;
const v = (p.payload || '').trim();
if (v.startsWith('/') && v.length < 400) {
currentFileEl.textContent = v;
}
}
function appendLine(p) {
// Drop the "awaiting" placeholder once real lines arrive.
if (stream.children.length === 1 && stream.firstElementChild.textContent.includes('awaiting agent')) {
@@ -208,6 +252,7 @@
`<span class="log-stream-${p.stream}">${escapeHtml(p.payload)}</span>`;
stream.appendChild(line);
if (autoScroll) container.scrollTop = container.scrollHeight;
maybeUpdateCurrent(p);
}
ws.onmessage = (ev) => {
+20
View File
@@ -121,6 +121,26 @@
</div>
{{end}}
{{/* ---------- latest restore line (P3-X3) ---------- */}}
{{if $page.RestoreStatus}}
<div class="text-[11.5px] text-ink-mute mt-1 leading-[1.5]">
{{if eq $page.RestoreStatus "succeeded"}}
last restore · <span class="text-ok">succeeded</span> <span class="mono text-ink-mid">{{relTime $page.RestoreAt}}</span> ·
<a href="/jobs/{{$page.RestoreJobID}}" class="link mono">job log →</a>
{{else if eq $page.RestoreStatus "failed"}}
last restore · <span class="text-bad font-medium">failed</span> <span class="mono text-ink-mid">{{relTime $page.RestoreAt}}</span> ·
<a href="/jobs/{{$page.RestoreJobID}}" class="link mono">job log →</a>
{{else if eq $page.RestoreStatus "running"}}
<span class="text-accent">restore running…</span> · <a href="/jobs/{{$page.RestoreJobID}}" class="link mono">live log →</a>
{{else if eq $page.RestoreStatus "cancelled"}}
last restore · <span class="text-warn">cancelled</span> <span class="mono text-ink-mid">{{relTime $page.RestoreAt}}</span> ·
<a href="/jobs/{{$page.RestoreJobID}}" class="link mono">job log →</a>
{{else if eq $page.RestoreStatus "queued"}}
<span class="text-ink-fade">restore queued</span> · <a href="/jobs/{{$page.RestoreJobID}}" class="link mono">job {{$page.RestoreJobID}}</a>
{{end}}
</div>
{{end}}
{{/* ---------- secondary tabs ---------- */}}
<div class="flex items-end mt-1.5">
<a class="sub-tab {{if eq $page.SubTab "snapshots"}}active{{end}}" href="/hosts/{{$host.ID}}">Snapshots <span class="mono text-ink-fade text-[11px] ml-1">{{comma $host.SnapshotCount}}</span></a>
+39
View File
@@ -0,0 +1,39 @@
{{define "tree_node"}}
{{$page := .Page}}
{{if $page.Error}}
<div class="px-3 py-2 mono text-[12px] text-bad">error: {{$page.Error}}</div>
{{else}}
<div class="flex items-center gap-2 px-3 py-1.5 text-[12px] text-ink-mute border-b border-line-soft">
<span class="mono text-ink-mid">{{$page.Path}}</span>
{{if not $page.Children}}
<span class="text-ink-fade ml-auto mono text-[11px]">empty directory</span>
{{end}}
</div>
{{range $page.Children}}
<div class="tree-pair">
<div class="grid items-center gap-2 px-3 py-[5px] mono text-[12.5px] border-b border-line-soft"
style="grid-template-columns: 14px 16px auto 1fr auto;">
{{if .IsDir}}
<button type="button"
class="tree-toggle text-ink-mute text-[10px] cursor-pointer"
data-tree-url="/hosts/{{$page.HostID}}/restore/tree?snapshot={{$page.SnapshotID}}&path={{.Path}}"
data-loaded="false"
onclick="window.__rmTreeToggle(this)">▸</button>
{{else}}
<span class="text-ink-fade text-center">·</span>
{{end}}
<label class="cursor-pointer flex items-center justify-center">
<input type="checkbox" name="paths" value="{{.Path}}"
class="w-[13px] h-[13px] cursor-pointer" />
</label>
<span class="{{if .IsDir}}text-ink{{else}}text-ink-mid{{end}}">{{.Name}}{{if .IsDir}}/{{end}}</span>
<span></span>
<span class="text-[11px] text-ink-fade">{{if not .IsDir}}{{if .Size}}{{bytes .Size}}{{else}}—{{end}}{{end}}</span>
</div>
{{if .IsDir}}
<div class="tree-children hidden pl-5 border-l border-line-soft ml-5"></div>
{{end}}
</div>
{{end}}
{{end}}
{{end}}