diff --git a/.golangci.yml b/.golangci.yml index 787123f..791caac 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -26,7 +26,7 @@ linters: - name: exported arguments: ["disableStutteringCheck"] misspell: - locale: US + locale: UK exclusions: rules: - path: _test\.go diff --git a/cmd/agent/main.go b/cmd/agent/main.go index ac43d3c..123cb50 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -136,6 +136,7 @@ func run() error { d := &dispatcher{ resticBin: resticBin, + resticVer: snap.ResticVersion, secrets: sec, scheduler: scheduler.New(), } @@ -200,6 +201,7 @@ func openSecretsStore(cfg *config.Config) (*secrets.Store, error) { // so a job dispatched in the same session sees the latest values. type dispatcher struct { resticBin string + resticVer string // e.g. "0.17.1"; empty if restic isn't installed yet secrets *secrets.Store scheduler *scheduler.Scheduler @@ -210,6 +212,45 @@ type dispatcher struct { bwMu sync.Mutex bwUpKBps int bwDownKBps int + + // Per-running-job cancellation handles. Populated when runJob + // spawns the goroutine, removed when it returns. Looked up by + // the command.cancel handler (server → agent) to abort an + // in-flight restic invocation. + cancelMu sync.Mutex + cancels map[string]context.CancelFunc +} + +// trackJob registers a cancel func for an in-flight job and returns a +// cleanup that removes it. Call cleanup when the job goroutine exits +// regardless of outcome — runs even on panic. +func (d *dispatcher) trackJob(jobID string, cancel context.CancelFunc) func() { + d.cancelMu.Lock() + if d.cancels == nil { + d.cancels = make(map[string]context.CancelFunc) + } + d.cancels[jobID] = cancel + d.cancelMu.Unlock() + return func() { + d.cancelMu.Lock() + delete(d.cancels, jobID) + d.cancelMu.Unlock() + } +} + +// cancelJob fires the cancel func for jobID if there is one and +// returns whether the job was actually known. The runner is expected +// to surface the resulting context.Canceled as a JobCancelled status +// in its job.finished envelope (see runner.sendFinished). +func (d *dispatcher) cancelJob(jobID string) bool { + d.cancelMu.Lock() + cancel, ok := d.cancels[jobID] + d.cancelMu.Unlock() + if !ok { + return false + } + cancel() + return true } func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.Sender) error { @@ -222,8 +263,29 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S return d.runJob(ctx, p, tx) case api.MsgCommandCancel: - // TODO(P2): cancellation requires keeping a job→cancelFunc map. - slog.Info("ws agent: command.cancel received (cancellation lands in P2)", "id", env.ID) + var p api.CommandCancelPayload + if err := env.UnmarshalPayload(&p); err != nil { + return fmt.Errorf("command.cancel: %w", err) + } + if d.cancelJob(p.JobID) { + slog.Info("ws agent: command.cancel applied", "job_id", p.JobID) + } else { + // Job already finished or was never seen on this agent. + // Not an error — operator may have raced cancel against + // natural completion. Server-side state is authoritative. + slog.Info("ws agent: command.cancel for unknown job (already finished?)", "job_id", p.JobID) + } + + case api.MsgTreeList: + // Synchronous RPC for the restore wizard's tree browser. The + // server has serialised access; we just run restic ls and reply + // with the same envelope ID. Run in a goroutine so the WS read + // loop keeps draining. + var p api.TreeListRequestPayload + if err := env.UnmarshalPayload(&p); err != nil { + return fmt.Errorf("tree.list: %w", err) + } + go d.handleTreeList(ctx, env.ID, p, tx) case api.MsgScheduleSet: var p api.ScheduleSetPayload @@ -332,6 +394,72 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S return nil } +// handleTreeList runs `restic ls --json ` and ships +// the matching tree.list.result envelope back, correlated by the +// request envelope's ID. Errors (missing creds, restic failure) +// surface in the result's Error field rather than as transport-level +// failures so the server-side waiter can render a sensible message. +func (d *dispatcher) handleTreeList(ctx context.Context, reqID string, p api.TreeListRequestPayload, tx wsclient.Sender) { + reply := func(result api.TreeListResultPayload) { + result.SnapshotID = p.SnapshotID + result.Path = p.Path + env, err := api.Marshal(api.MsgTreeListResult, reqID, result) + if err != nil { + slog.Warn("ws agent: marshal tree.list.result", "err", err) + return + } + _ = tx.Send(env) + } + + if d.resticBin == "" { + reply(api.TreeListResultPayload{Error: "restic binary not located on this agent"}) + return + } + creds, err := d.secrets.Load() + if err != nil { + reply(api.TreeListResultPayload{Error: "load credentials: " + err.Error()}) + return + } + if creds.Empty() { + reply(api.TreeListResultPayload{Error: "repo credentials not configured"}) + return + } + + d.bwMu.Lock() + upKBps, downKBps := d.bwUpKBps, d.bwDownKBps + d.bwMu.Unlock() + + env := restic.Env{ + Bin: d.resticBin, + RepoURL: creds.URL, + RepoUsername: creds.Username, + RepoPassword: creds.Password, + LimitUploadKBps: upKBps, + LimitDownloadKBps: downKBps, + } + + // 60s ceiling matches snapshots/stats — restic ls on a single + // directory is normally sub-second; if the repo is unreachable we + // want to surface the failure rather than block the wizard. + listCtx, cancel := context.WithTimeout(ctx, 60*time.Second) + defer cancel() + + entries, err := env.ListTreeChildren(listCtx, p.SnapshotID, p.Path) + if err != nil { + reply(api.TreeListResultPayload{Error: err.Error()}) + return + } + apiEntries := make([]api.TreeListEntry, 0, len(entries)) + for _, e := range entries { + apiEntries = append(apiEntries, api.TreeListEntry{ + Name: e.Name, + Type: e.Type, + Size: e.Size, + }) + } + reply(api.TreeListResultPayload{Entries: apiEntries}) +} + // runJob spawns a runner for one job. We launch a goroutine so the // WS read loop keeps draining messages while restic chugs along. func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsclient.Sender) error { @@ -367,6 +495,7 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc r := runner.New(runner.Config{ ResticBin: d.resticBin, + ResticVersion: d.resticVer, RepoURL: creds.URL, RepoUsername: creds.Username, RepoPassword: creds.Password, @@ -374,6 +503,25 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc LimitDownloadKBps: downKBps, }, tx, time.Second) + // spawn wraps the kind-specific goroutine: derives a per-job + // cancellable context from the connection-scoped ctx, registers + // the cancel func so command.cancel can fire it, deregisters on + // completion. Per-job ctx means canceling one job doesn't kill + // any other in-flight invocations. + spawn := func(name string, fn func(ctx context.Context) error) { + jobCtx, cancel := context.WithCancel(ctx) + cleanup := d.trackJob(p.JobID, cancel) + go func() { + defer cleanup() + defer cancel() // release ctx resources on goroutine exit + if err := fn(jobCtx); err != nil { + slog.Warn("agent: "+name+" job failed", "job_id", p.JobID, "err", err) + return + } + slog.Info("agent: "+name+" job complete", "job_id", p.JobID) + }() + } + switch p.Kind { case api.JobBackup: // Includes/Excludes/Tag come from the source group resolved @@ -391,22 +539,14 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc slog.Info("agent: accepting backup job", "job_id", p.JobID, "paths", paths, "excludes", p.Excludes, "tag", p.Tag) hooks := runner.BackupHooks{Pre: p.PreHook, Post: p.PostHook} - go func() { - if err := r.RunBackup(ctx, p.JobID, paths, p.Excludes, tags, hooks); err != nil { - slog.Warn("agent: backup job failed", "job_id", p.JobID, "err", err) - return - } - slog.Info("agent: backup job complete", "job_id", p.JobID) - }() + spawn("backup", func(jobCtx context.Context) error { + return r.RunBackup(jobCtx, p.JobID, paths, p.Excludes, tags, hooks) + }) case api.JobInit: slog.Info("agent: accepting init job", "job_id", p.JobID) - go func() { - if err := r.RunInit(ctx, p.JobID); err != nil { - slog.Warn("agent: init job failed", "job_id", p.JobID, "err", err) - return - } - slog.Info("agent: init job complete", "job_id", p.JobID) - }() + spawn("init", func(jobCtx context.Context) error { + return r.RunInit(jobCtx, p.JobID) + }) case api.JobForget: if len(p.ForgetGroups) == 0 { // Hard-error rather than fall back to a single-policy form: @@ -433,13 +573,9 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc }) } slog.Info("agent: accepting forget job", "job_id", p.JobID, "groups", len(groups)) - go func() { - if err := r.RunForget(ctx, p.JobID, groups); err != nil { - slog.Warn("agent: forget job failed", "job_id", p.JobID, "err", err) - return - } - slog.Info("agent: forget job complete", "job_id", p.JobID) - }() + spawn("forget", func(jobCtx context.Context) error { + return r.RunForget(jobCtx, p.JobID, groups) + }) case api.JobPrune: // Prune may require admin creds (delete authority on rest-server). runCreds := creds @@ -455,6 +591,7 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc } prr := runner.New(runner.Config{ ResticBin: d.resticBin, + ResticVersion: d.resticVer, RepoURL: runCreds.URL, RepoUsername: runCreds.Username, RepoPassword: runCreds.Password, @@ -462,29 +599,50 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc LimitDownloadKBps: downKBps, }, tx, time.Second) slog.Info("agent: accepting prune job", "job_id", p.JobID, "admin_creds", p.RequiresAdminCreds) - go func() { - if err := prr.RunPrune(ctx, p.JobID); err != nil { - slog.Warn("agent: prune job failed", "job_id", p.JobID, "err", err) - } - }() + spawn("prune", func(jobCtx context.Context) error { + return prr.RunPrune(jobCtx, p.JobID) + }) case api.JobCheck: subset := 0 if len(p.Args) > 0 { subset, _ = strconv.Atoi(p.Args[0]) } slog.Info("agent: accepting check job", "job_id", p.JobID, "subset_pct", subset) - go func() { - if err := r.RunCheck(ctx, p.JobID, subset); err != nil { - slog.Warn("agent: check job failed", "job_id", p.JobID, "err", err) - } - }() + spawn("check", func(jobCtx context.Context) error { + return r.RunCheck(jobCtx, p.JobID, subset) + }) case api.JobUnlock: slog.Info("agent: accepting unlock job", "job_id", p.JobID) - go func() { - if err := r.RunUnlock(ctx, p.JobID); err != nil { - slog.Warn("agent: unlock job failed", "job_id", p.JobID, "err", err) - } - }() + spawn("unlock", func(jobCtx context.Context) error { + return r.RunUnlock(jobCtx, p.JobID) + }) + case api.JobRestore: + if p.Restore == nil { + return fmt.Errorf("restore: command.run carried no restore payload") + } + rp := *p.Restore + if rp.SnapshotID == "" { + return fmt.Errorf("restore: snapshot_id is required") + } + if !rp.InPlace && rp.TargetDir == "" { + return fmt.Errorf("restore: target_dir required for non-in-place restore") + } + slog.Info("agent: accepting restore job", + "job_id", p.JobID, "snapshot_id", rp.SnapshotID, + "paths", rp.Paths, "in_place", rp.InPlace, "target", rp.TargetDir) + spawn("restore", func(jobCtx context.Context) error { + return r.RunRestore(jobCtx, p.JobID, rp.SnapshotID, rp.Paths, rp.InPlace, rp.TargetDir) + }) + case api.JobDiff: + if p.Diff == nil || p.Diff.SnapshotA == "" || p.Diff.SnapshotB == "" { + return fmt.Errorf("diff: command.run carried incomplete diff payload") + } + dp := *p.Diff + slog.Info("agent: accepting diff job", + "job_id", p.JobID, "a", dp.SnapshotA, "b", dp.SnapshotB) + spawn("diff", func(jobCtx context.Context) error { + return r.RunDiff(jobCtx, p.JobID, dp.SnapshotA, dp.SnapshotB) + }) default: return fmt.Errorf("kind %q not implemented yet (Phase 2 lands the rest)", p.Kind) } diff --git a/deploy/install/install.sh b/deploy/install/install.sh index f35319b..a0ff7ac 100755 --- a/deploy/install/install.sh +++ b/deploy/install/install.sh @@ -49,6 +49,13 @@ detect_arch() { ensure_dirs() { install -d -m 0700 -o root -g root "$RM_CONFIG_DIR" install -d -m 0700 -o root -g root "$RM_STATE_DIR" + # Default new-directory restore target: $HOME/rm-restore. Pre-create + # so the systemd unit's ReadWritePaths bind-mount applies cleanly + # (paths that don't exist when systemd starts get a soft-fail + # because of the '-' prefix, but the agent then can't mkdir into + # the read-only /root). Mode 0700 + root-owned matches the threat + # model — files restored here are operator-readable as root. + install -d -m 0700 -o root -g root /root/rm-restore } detect_existing_schedulers() { diff --git a/deploy/install/restic-manager-agent.service b/deploy/install/restic-manager-agent.service index 01931e1..5faf370 100644 --- a/deploy/install/restic-manager-agent.service +++ b/deploy/install/restic-manager-agent.service @@ -37,7 +37,12 @@ AmbientCapabilities=CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_CHOWN # needs. Filesystem reads stay open: that's the whole job. NoNewPrivileges=true ProtectSystem=strict -ReadWritePaths=/etc/restic-manager /var/lib/restic-manager +# /etc/restic-manager: agent.yaml + secrets.enc. +# /var/lib/restic-manager: agent state (currently unused but reserved). +# /root/rm-restore: default target for new-directory restores +# ($HOME/rm-restore// resolves here for User=root). +# ReadWritePaths overrides ProtectHome=read-only on this subdir only. +ReadWritePaths=/etc/restic-manager /var/lib/restic-manager -/root/rm-restore ProtectHome=read-only ProtectHostname=true ProtectKernelTunables=true diff --git a/docs/superpowers/specs/2026-05-04-p3-restore-design.md b/docs/superpowers/specs/2026-05-04-p3-restore-design.md new file mode 100644 index 0000000..7dbc747 --- /dev/null +++ b/docs/superpowers/specs/2026-05-04-p3-restore-design.md @@ -0,0 +1,342 @@ +# P3 — Restore (design) + +> Phase 3 sub-spec covering single-host restore (P3-01, P3-02, P3-03, P3-09). +> P3-04 (cross-host restore) is deferred to a new "Future / unscheduled" +> section in `tasks.md` — disaster recovery is already covered by re-enrolling +> a replacement host with the same repo credentials. +> +> Wireframe: `_diag/p3-restore-wizard/wireframe.html`. Screenshot: +> `_diag/p3-restore-wizard/01-full-wizard.png`. + +## Scope locked + +Brainstorm decisions (in order asked): + +1. **In-place vs new-directory.** Default is a new directory under + `/var/restic-restore//`. An "Restore in place (overwrite original + paths)" toggle is gated by typed-confirmation of the host name, mirroring + the repo re-init pattern. +2. **Path-selection granularity.** Tree browser as the path selector, lazy- + loaded via `restic ls --json ` per directory expansion. +3. **Cross-host restore (P3-04).** Out of scope this phase. Move to + "Future / unscheduled" in `tasks.md`. The disaster-recovery case is covered + by the standard enrolment flow: stand up a replacement host, paste the + original repo creds at enrolment, snapshots reappear, restore is + same-host. +4. **Snapshot diff (P3-09).** Diff-as-a-job. New `JobDiff` JobKind dispatched + like every other agent operation. Output streams as `log.stream` and + renders on the live job log page. +5. **Wizard entry points.** Top-level "Restore" button on host detail + (`/hosts/{id}/restore`, opens wizard at step 1) plus a per-snapshot + Restore action on snapshot rows (`/hosts/{id}/snapshots/{sid}/restore`, + skips step 1). +6. **Wizard interaction model.** Single-page, sections progressively enable; + tree-browser nodes lazy-load via HTMX partials. No `restore_drafts` table. +7. **Tree-browser data path.** Synchronous WS RPC (`tree.list` ↔ + `tree.list.result`, correlation-ID) plus a per-wizard-session in-memory + cache keyed by `{snapshot_id, path}` with ~30-min TTL. +8. **Restore progress UI.** Restore-specific job-page variant: files-restored + / bytes-restored / throughput / ETA / current-file display, driven by + restic restore's JSON status events surfaced through `job.progress`. +9. **Permissions/ownership.** Policy, not toggle. In-place restore preserves + original ownership; new-directory restore drops ownership + (`--no-ownership`). +10. **Concurrency.** Single-flight per host (one job at a time across all + kinds). Plus a real cancel-job feature: `command.cancel` envelope, agent + kills the `restic` subprocess via context cancel (SIGTERM, SIGKILL after + grace), server transitions the job to `cancelled`. The "Cancel" button + already in the `job_detail` template becomes real for any running job + kind. +11. **Audit + safety.** Audit row on every restore dispatch (`host.restore` + with snapshot ID, paths, target, in-place flag). Recent-restores panel + on the host page surfacing the latest restore job alongside last-backup + and last-init signals. Role gate deferred to P4-03. + +## Architecture + +Restore composes from existing primitives plus three new pieces: + +- **New JobKind values**: `JobRestore`, `JobDiff`. Dispatcher cases mirror + the prune/check pattern. Agent-side handlers wrap `restic.RunRestore` and + `restic.RunDiff` (new methods on the `restic` package). +- **New WS RPC**: `tree.list` request (`{snapshot_id, path}`) ↔ + `tree.list.result` reply (`{entries: [{name, type, size}], ...}` or + `{error}`). Reuses existing correlation-ID infrastructure from P1-09. No + `jobs` row. +- **New cancel surface**: `command.cancel` request (`{job_id}`), agent + cancels the running subprocess context, returns `command.ack` + `job.finished` + with status `cancelled`. Server endpoint `POST /api/jobs/{id}/cancel` + bridges UI button → WS envelope. + +Everything else (job lifecycle, log streaming, progress envelope, snapshot +listing, audit log writer, host_chrome partial, danger-zone typed-confirmation) +already exists and is reused verbatim. + +### Component boundaries + +| Component | Purpose | Depends on | +| ---------------------------------- | ---------------------------------------------------- | ----------------------------------------- | +| `internal/restic.RunRestore` | Run `restic restore` with paths + target + ownership | `restic.Env` | +| `internal/restic.RunDiff` | Run `restic diff --json a b` | `restic.Env` | +| `internal/agent/runner` cases | Dispatch `JobRestore` / `JobDiff` jobs | `restic.Run*`, hooks (skipped: backup-only) | +| `internal/agent/runner` cancel hook | Wire WS `command.cancel` → ctx.CancelFunc per job | runner job map | +| `internal/agent/runner` tree-list | Sync RPC handler: `restic ls --json` for one path | `restic.Env` | +| `internal/server/ws/cancel.go` | Validate + send `command.cancel` envelope | hub.Send, store.UpdateJobStatus | +| `internal/server/ws/tree.go` | RPC mediator: `tree.list` request → reply, with cache | hub.SendRPC, in-memory cache | +| `internal/server/http/restore.go` | Wizard routes + dispatch endpoint | store, ws, audit | +| `internal/server/http/diff.go` | Snapshot-diff dispatch endpoint | store, ws | +| `internal/server/http/cancel.go` | `POST /api/jobs/{id}/cancel` | ws | +| `web/templates/pages/host_restore.html` | Wizard page | host_chrome partial | +| `web/templates/partials/tree_node.html` | Lazy-loaded tree node fragment for HTMX swap | — | +| `web/templates/pages/job_detail.html` | Restore-kind progress widget (variant) | existing job_detail | + +### Data flow — wizard happy path + +``` +operator + ├─ GET /hosts/{id}/restore + │ server renders wizard shell, snapshot table from store.ListSnapshotsByHost + │ + ├─ click snapshot row (or arrives via /hosts/{id}/snapshots/{sid}/restore) + │ wizard advances to step 2, snapshot summary card rendered + │ + ├─ expand a tree node (chevron click) + │ HTMX GET /hosts/{id}/restore/tree?snapshot={sid}&path=/etc + │ server checks per-session cache (keyed by sid+path) + │ hit → render tree_node fragment from cache + │ miss → hub.SendRPC(host_id, "tree.list", {sid, path}) → wait reply + │ cache result, render tree_node fragment + │ + ├─ tick file/dir checkboxes (form state, no round-trip) + │ + ├─ pick target radio (and optionally type host name to unlock in-place) + │ + └─ POST /hosts/{id}/restore (form submit) + server validates: ≥1 path, target mode, in-place ⇒ host name match + write audit row host.restore + store.CreateJob{kind=restore, payload={snapshot_id, paths, target, in_place}} + hub.Send(host_id, "command.run", {job_id, kind=restore, payload}) + HX-Redirect: /jobs/{job_id} +``` + +### Data flow — agent restore execution + +``` +agent.runner receives command.run kind=restore + ├─ check single-flight: if r.activeJobID != "" → reply busy + │ (server queues to pending_runs only for kind=backup; restore returns busy) + ├─ allocate ctx, ctxCancel — store cancelFunc against job_id in r.cancels + ├─ sendStarted(job_id, JobRestore, now) + ├─ build target path: if in_place → "/" else "/var/restic-restore//" + ├─ build flags: paths from payload, --no-ownership when !in_place + ├─ restic.RunRestore(ctx, env, snapshot_id, paths, target, in_place): + │ restic restore --target [--no-ownership] -- ... + │ parse stdout JSON: forward "status" → job.progress (1Hz throttle), "summary" → final + ├─ on success: sendFinished(job_id, succeeded, exit=0) + ├─ on ctx.Err() == context.Canceled: sendFinished(job_id, cancelled, exit=130) + └─ delete cancel func from r.cancels +``` + +### Data flow — cancel + +``` +operator clicks Cancel on /jobs/{id} (running) + POST /api/jobs/{id}/cancel + server: lookup job, ensure status=running, find host + hub.Send(host_id, "command.cancel", {job_id}) + → agent.runner receives command.cancel + cancelFunc, ok := r.cancels[job_id] + ok && cancelFunc() + → restic subprocess context done → exec.Cmd kills via SIGTERM + → if still alive after 5s grace → SIGKILL + → runner sendFinished(job_id, cancelled, exit=130) + → server receives job.finished status=cancelled, persists, broadcasts + → browser refresh shows cancelled state +``` + +The cancel surface is independently useful for any kind (prune/check/backup) — +not gated to restore. The button already in `job_detail.html` becomes real. + +### Tree-list RPC details + +New WS message types (added to `internal/api/messages.go`): + +``` +type TreeListRequestPayload struct { + SnapshotID string `json:"snapshot_id"` + Path string `json:"path"` +} + +type TreeListEntry struct { + Name string `json:"name"` + Type string `json:"type"` // "dir" | "file" | "symlink" + Size int64 `json:"size,omitempty"` +} + +type TreeListResultPayload struct { + SnapshotID string `json:"snapshot_id"` + Path string `json:"path"` + Entries []TreeListEntry `json:"entries,omitempty"` + Error string `json:"error,omitempty"` +} +``` + +Server-side mediator (`ws.SendRPC`) takes a request envelope, registers the +correlation ID in a pending map, sends, blocks on a per-call channel until +the matching reply arrives (or 30s timeout). The pattern is small enough +to inline in `internal/server/ws/rpc.go` as a generic helper — future +synchronous RPCs reuse it. + +In-memory cache: `map[sessionID]map[cacheKey]TreeListResultPayload` with +`cacheKey = snapshot_id + "\x00" + path`. Session ID minted per wizard +load (HTTP-only cookie scoped to `/hosts/{id}/restore/tree`, lifetime 30 +min). On wizard close (browser navigation away) the entry expires +naturally. No persistence, no migration. + +Agent handler runs `restic ls --json ` (non-recursive — restic +defaults to recursive but `restic ls` accepts `--long` and a path filter; +parse output line-by-line and emit only direct children of `path`). 60s +context timeout, mirroring existing `restic snapshots` invocation. + +### Restore payload + +`api.CommandRunPayload` gains a nested optional `restore` field: + +``` +type RestorePayload struct { + SnapshotID string `json:"snapshot_id"` + Paths []string `json:"paths"` // absolute paths inside the snapshot + InPlace bool `json:"in_place"` + TargetDir string `json:"target_dir"` // empty when in_place=true + PreserveOwner bool `json:"preserve_owner"` // mirrors policy: in_place=>true, else=>false +} +``` + +The payload is set by the server when dispatching `JobRestore` and ignored +on every other kind. Wire-shape test pinned in `wire_test.go`. + +### Diff payload + +`api.CommandRunPayload` gains: + +``` +type DiffPayload struct { + SnapshotA string `json:"snapshot_a"` + SnapshotB string `json:"snapshot_b"` +} +``` + +Set on `JobDiff`. Output is plain `restic diff --json ` forwarded as +`log.stream` lines. Job page renders unchanged — operator reads the diff +output directly. + +### Recent-restores panel + +A small panel rendered on the host detail page below the existing init-status +line: + +``` +last restore: succeeded 2h ago · job f73ab4c1… · 3 files to /var/restic-restore/... +``` + +Backed by a new `store.LatestJobByKind(host_id, JobRestore)` query (mirroring +the existing `store.LatestJobByKind` already used for init/forget/prune/check +in P2R-06). One template addition in `host_chrome.html` next to the +`InitStatus` block. + +## Routes added + +| Method | Path | Purpose | +| ------- | --------------------------------------------------------- | ----------------------------------------------------------- | +| GET | `/hosts/{id}/restore` | Wizard shell (step 1 = snapshot picker) | +| GET | `/hosts/{id}/snapshots/{sid}/restore` | Wizard shell with snapshot pre-selected (skips step 1) | +| GET | `/hosts/{id}/restore/tree` | HTMX partial: tree node listing for `?snapshot=&path=` | +| POST | `/hosts/{id}/restore` | Validate + dispatch restore job, redirect to live job page | +| POST | `/api/hosts/{id}/snapshots/diff` | Dispatch a diff job for `{snapshot_a, snapshot_b}` | +| POST | `/api/jobs/{id}/cancel` | Send `command.cancel` to host, transition job → cancelled | + +## Migrations + +None. Restore + diff piggyback on the existing `jobs` table (their `kind` is +new but the schema already accepts arbitrary kind strings — there's no +CHECK constraint on `kind`). The cancel feature uses the existing +`JobCancelled` terminal status. The tree-list cache lives in process memory. + +## Tests (target coverage) + +- `internal/restic/restore_test.go` — `RunRestore` invocation builds the + expected argv (paths, --target, --no-ownership flag presence, in-place + variant); JSON status parsing → `BackupStatus`-shaped progress envelopes. +- `internal/restic/diff_test.go` — `RunDiff` argv shape and JSON forwarding. +- `internal/agent/runner/restore_test.go` — happy path, cancel mid-run + produces `cancelled` finished, in-place vs new-directory dispatch, + single-flight rejects when another job is running. +- `internal/agent/runner/tree_test.go` — `tree.list` handler returns + direct children for a synthetic restic ls output, surfaces error on + missing snapshot. +- `internal/server/ws/rpc_test.go` — `SendRPC` correlation matching, + timeout, concurrent calls. +- `internal/server/http/restore_test.go` — wizard renders with snapshots, + POST validates ≥1 path + in-place host-name match, audit row written, + job dispatched with correct payload, in-place without typed-confirm + re-renders form with input intact and an error. +- `internal/server/http/diff_test.go` — POST dispatches `JobDiff`, + snapshot IDs validated against the host's snapshot list. +- `internal/server/http/cancel_test.go` — POST cancel happy path + (running → cancelled), 4xx for non-running jobs, 4xx when host offline. +- `internal/server/http/restore_e2e_test.go` — happy path: GET wizard, + expand `/etc` (HTMX call returns expected fragment), submit, follow + HX-Redirect to job page, see status. +- `web/templates/pages/host_restore_test.go` (template-render test) — + wizard renders all four sections; in-place card disabled until typed + confirm. + +## Playwright iteration / sweep + +A Playwright sweep at the end (mirroring P2R-02 Slice 6) runs against the +local smoke server with a real agent enrolled. Steps: + +1. Login → navigate to alfa-01 host → click Restore. +2. Wizard step 1: pick the most recent snapshot. +3. Wizard step 2: expand a directory two levels, tick three files, + verify tally updates. +4. Wizard step 3: leave default new-directory. +5. Wizard step 4: dispatch. +6. Land on live job page, see progress widget animating, see log lines. +7. Click Cancel mid-flight, verify status transitions to cancelled and + the agent's subprocess actually died (log line `signal: killed` or exit + 130). +8. Repeat with in-place mode: type host name, dispatch, verify red + primary button, verify files actually overwritten on host. +9. Snapshot diff: navigate to snapshots, pick two, dispatch diff, see + diff output streamed. +10. Screenshots into `_diag/p3-restore-sweep/`. + +End-to-end clean, zero console errors, before handing back. + +## What does NOT change + +- `host_chrome.html` only grows the recent-restores line; sub-tab list + unchanged (Restore is a top-level button on the host page, not a sub-tab). +- `enrollment.go`, schedule reconciliation, source-group CRUD, repo + maintenance ticker, hook execution — none of these are touched. +- The CLAUDE.md restage block applies as-is when the agent binary changes + (it does — runner gains restore/diff/cancel/tree handlers). The unit + file does not change. + +## Open questions / explicit non-goals + +- **Restore preview / dry-run.** Restic doesn't have a dry-run for restore. + Out of scope. +- **Resumable restore.** Restic restore is idempotent per-file but not + resumable mid-stream from where it left off. If a restore is cancelled, + the operator re-runs (files already written are overwritten). No state + to track. +- **Restore to a glob/pattern (e.g. `*.conf`).** Out of scope; the tree + picker requires explicit ticks. Power users can edit the URL or use the + CLI. +- **Bandwidth caps for restore.** Honoured automatically — restic's + `--limit-download` is part of `restic.Env` already (P2R-13) and applies + to restore unchanged. +- **Pre/post hooks for restore.** Hooks today gate only `kind=backup` + (P2R-11). Out of scope. diff --git a/internal/agent/runner/cancel_test.go b/internal/agent/runner/cancel_test.go new file mode 100644 index 0000000..2df7e74 --- /dev/null +++ b/internal/agent/runner/cancel_test.go @@ -0,0 +1,81 @@ +package runner + +import ( + "context" + "strings" + "testing" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" +) + +// (fakeSender is defined in runner_test.go; it's already lock-protected +// because the runner's stdout + stderr pump goroutines call Send +// concurrently. The original local 'safeSender' here was a workaround +// from before fakeSender itself grew the mutex.) + +// TestRunBackupCanceledMidRunReportsCanceled spawns a backup against +// a fake restic that sleeps for 30 seconds, cancels the context after +// a short delay, and confirms the resulting job.finished envelope +// reports status=canceled (not failed). +func TestRunBackupCanceledMidRunReportsCanceled(t *testing.T) { + t.Parallel() + + // Fake restic: replace the shell with a long sleep via `exec` so the + // process tree is one process — SIGTERM goes directly to sleep and + // it exits. Without `exec`, the shell stays in the foreground while + // sleep is its child; SIGTERM-to-shell may or may not propagate to + // sleep depending on the shell, leading to the WaitDelay-then- + // SIGKILL fallback path firing — slower and noisier. + bin := setupScript(t, `exec sleep 30`) + + tx := &fakeSender{} + r := New(Config{ResticBin: bin}, tx, 0) + + ctx, cancel := context.WithCancel(context.Background()) + done := make(chan error, 1) + go func() { + done <- r.RunBackup(ctx, "job-cancel", []string{"/tmp/x"}, nil, nil, BackupHooks{}) + }() + + // Wait long enough for the subprocess to actually start before + // canceling. Without this, exec.CommandContext can race the + // kill against Start and produce a different error path. + time.Sleep(150 * time.Millisecond) + cancel() + + select { + case <-done: + case <-time.After(15 * time.Second): + t.Fatal("RunBackup did not return within 15s of cancel") + } + + // Locate the job.finished envelope and check its status. + envs := tx.snapshot() + var finEnv api.Envelope + var found bool + for _, e := range envs { + if e.Type == api.MsgJobFinished { + finEnv = e + found = true + break + } + } + if !found { + t.Fatal("no job.finished envelope was sent") + } + var fin api.JobFinishedPayload + if err := finEnv.UnmarshalPayload(&fin); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if fin.Status != api.JobCancelled { + t.Fatalf("status: got %q, want %q", fin.Status, api.JobCancelled) + } + if fin.ExitCode != 130 { + t.Errorf("exit_code: got %d, want 130 (POSIX cancel convention)", fin.ExitCode) + } + // The error message should be empty for canceled jobs (see runner.sendFinished). + if !strings.HasPrefix(fin.Error, "") || fin.Error != "" { + t.Errorf("error: got %q, want empty for canceled jobs", fin.Error) + } +} diff --git a/internal/agent/runner/restore_test.go b/internal/agent/runner/restore_test.go new file mode 100644 index 0000000..e2f366c --- /dev/null +++ b/internal/agent/runner/restore_test.go @@ -0,0 +1,266 @@ +package runner + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" +) + +// TestRunRestoreShipsExpectedEnvelopes: a fake restic emits a couple +// of restore status lines and a summary; the runner translates them +// into job.progress envelopes and finishes the job successfully. +func TestRunRestoreShipsExpectedEnvelopes(t *testing.T) { + t.Parallel() + + bin := setupScript(t, ` +case "$1" in + restore) + echo '{"message_type":"status","seconds_elapsed":1,"percent_done":0.5,"total_files":10,"files_restored":5,"total_bytes":1000,"bytes_restored":500}' + echo '{"message_type":"status","seconds_elapsed":2,"percent_done":1.0,"total_files":10,"files_restored":10,"total_bytes":1000,"bytes_restored":1000}' + echo '{"message_type":"summary","seconds_elapsed":2,"total_files":10,"files_restored":10,"total_bytes":1000,"bytes_restored":1000}' + ;; + *) + echo "unknown: $*" ;; +esac +`) + + tx := &fakeSender{} + r := New(Config{ResticBin: bin}, tx, 0) + + if err := r.RunRestore(context.Background(), "job-r1", "f3a7b2c1", + []string{"/etc/nginx/sites-available/alfa.conf"}, + false, "/tmp/restore-out"); err != nil { + t.Fatalf("RunRestore: %v", err) + } + + // Confirm landmarks: started → progress → finished. + order := envelopeOrder(tx.envs) + wants := []api.MessageType{api.MsgJobStarted, api.MsgJobProgress, api.MsgJobFinished} + positions := map[api.MessageType]int{} + for i, mt := range order { + if _, seen := positions[mt]; !seen { + positions[mt] = i + } + } + for i := 0; i < len(wants)-1; i++ { + a, b := wants[i], wants[i+1] + pa, aOK := positions[a] + pb, bOK := positions[b] + if !aOK { + t.Fatalf("envelope %q not found in %v", a, order) + } + if !bOK { + t.Fatalf("envelope %q not found in %v", b, order) + } + if pa >= pb { + t.Fatalf("expected %q before %q (positions %d, %d)", a, b, pa, pb) + } + } + + // Started carries the right kind. + startEnv := firstEnvOfType(t, tx.envs, api.MsgJobStarted) + var startP api.JobStartedPayload + if err := startEnv.UnmarshalPayload(&startP); err != nil { + t.Fatalf("unmarshal started: %v", err) + } + if startP.Kind != api.JobRestore { + t.Fatalf("kind: got %q want %q", startP.Kind, api.JobRestore) + } + + // Finished is succeeded. + finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished) + var finP api.JobFinishedPayload + if err := finEnv.UnmarshalPayload(&finP); err != nil { + t.Fatalf("unmarshal finished: %v", err) + } + if finP.Status != api.JobSucceeded { + t.Fatalf("status: got %q want %q", finP.Status, api.JobSucceeded) + } + // Progress envelope reflects the last status line: 100% with 10 files. + progEnv := firstEnvOfType(t, tx.envs, api.MsgJobProgress) + var progP api.JobProgressPayload + if err := progEnv.UnmarshalPayload(&progP); err != nil { + t.Fatalf("unmarshal progress: %v", err) + } + // First progress will be from line 1 (50%) since we send first status + // immediately. Verify we at least see a sensible value. + if progP.PercentDone <= 0 { + t.Fatalf("expected non-zero progress, got %v", progP.PercentDone) + } + if progP.FilesDone <= 0 || progP.TotalFiles <= 0 { + t.Fatalf("expected file counters set, got %+v", progP) + } +} + +// TestRunRestoreInPlaceArgvHasNoNoOwnership: indirectly verifies that +// in-place mode doesn't pass --no-ownership. We can't see the actual +// argv without a custom test harness, so we use a fake restic that +// echoes its args and check the captured log.stream. +func TestRunRestoreInPlaceArgvHasNoNoOwnership(t *testing.T) { + t.Parallel() + + bin := setupScript(t, ` +case "$1" in + restore) + # Print all args on stderr so they're forwarded as log.stream. + echo "argv: $*" 1>&2 + echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}' + ;; +esac +`) + + tx := &fakeSender{} + r := New(Config{ResticBin: bin}, tx, 0) + if err := r.RunRestore(context.Background(), "job-r2", "abc", + nil, true, ""); err != nil { + t.Fatalf("RunRestore: %v", err) + } + + // Reconstruct the argv from the captured stderr log line. + var argv string + for _, e := range tx.envs { + if e.Type == api.MsgLogStream { + var p api.LogStreamLine + _ = e.UnmarshalPayload(&p) + if p.Stream == api.LogStderr && strings.HasPrefix(p.Payload, "argv:") { + argv = p.Payload + break + } + } + } + if argv == "" { + t.Fatal("never captured argv echo from fake restic") + } + if strings.Contains(argv, "--no-ownership") { + t.Errorf("in-place restore should NOT pass --no-ownership; got argv=%q", argv) + } + if !strings.Contains(argv, "--target /") { + t.Errorf("in-place restore should pass --target /; got argv=%q", argv) + } +} + +// TestRunRestoreNewDirArgvShape: non-in-place restore passes --target +// to the operator-chosen new directory and includes the path filters. +// We deliberately do NOT pass --no-ownership (added in restic 0.17; +// older versions error out — the comment in restore.go explains why). +func TestRunRestoreNewDirArgvShape(t *testing.T) { + t.Parallel() + + bin := setupScript(t, ` +case "$1" in + restore) + echo "argv: $*" 1>&2 + echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}' + ;; +esac +`) + tx := &fakeSender{} + r := New(Config{ResticBin: bin}, tx, 0) + if err := r.RunRestore(context.Background(), "job-r3", "abc", + []string{"/etc/foo"}, false, "/tmp/restore-out"); err != nil { + t.Fatalf("RunRestore: %v", err) + } + + var argv string + for _, e := range tx.envs { + if e.Type == api.MsgLogStream { + var p api.LogStreamLine + _ = e.UnmarshalPayload(&p) + if p.Stream == api.LogStderr && strings.HasPrefix(p.Payload, "argv:") { + argv = p.Payload + break + } + } + } + if argv == "" { + t.Fatal("no argv echo") + } + if strings.Contains(argv, "--no-ownership") { + t.Errorf("restic 0.16 doesn't accept --no-ownership; got argv=%q", argv) + } + if !strings.Contains(argv, "--target /tmp/restore-out") { + t.Errorf("expected --target /tmp/restore-out; got argv=%q", argv) + } + if !strings.Contains(argv, "--include /etc/foo") { + t.Errorf("expected --include /etc/foo; got argv=%q", argv) + } +} + +// TestRunRestoreNewDirAutoCreatesTarget: a new-directory restore +// should mkdir the requested target chain before invoking restic, so +// operators don't have to pre-create the per-job subdir. +func TestRunRestoreNewDirAutoCreatesTarget(t *testing.T) { + t.Parallel() + bin := setupScript(t, ` +case "$1" in + restore) + echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}' + ;; +esac +`) + tx := &fakeSender{} + r := New(Config{ResticBin: bin}, tx, 0) + + // Multi-level path the operator hasn't created yet. + target := filepath.Join(t.TempDir(), "deep", "deeper", "deepest") + if err := r.RunRestore(context.Background(), "job-rmkdir", "abc", + []string{"/etc/foo"}, false, target); err != nil { + t.Fatalf("RunRestore: %v", err) + } + + if st, err := os.Stat(target); err != nil { + t.Fatalf("expected target dir to exist: %v", err) + } else if !st.IsDir() { + t.Fatalf("expected directory, got %v", st.Mode()) + } +} + +// TestRunDiffShipsLogLines: diff output is forwarded as log.stream. +func TestRunDiffShipsLogLines(t *testing.T) { + t.Parallel() + bin := setupScript(t, ` +case "$1" in + diff) + echo '{"message_type":"change","path":"/etc/nginx/nginx.conf","modifier":"M"}' + echo '{"message_type":"statistics","added":{"files":0,"dirs":0}}' + ;; +esac +`) + tx := &fakeSender{} + r := New(Config{ResticBin: bin}, tx, 0) + if err := r.RunDiff(context.Background(), "job-d1", "snap-a", "snap-b"); err != nil { + t.Fatalf("RunDiff: %v", err) + } + + startEnv := firstEnvOfType(t, tx.envs, api.MsgJobStarted) + var startP api.JobStartedPayload + _ = startEnv.UnmarshalPayload(&startP) + if startP.Kind != api.JobDiff { + t.Fatalf("kind: got %q want %q", startP.Kind, api.JobDiff) + } + finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished) + var finP api.JobFinishedPayload + _ = finEnv.UnmarshalPayload(&finP) + if finP.Status != api.JobSucceeded { + t.Fatalf("status: %q", finP.Status) + } + // At least one log line should carry the change payload. + var sawChange bool + for _, e := range tx.envs { + if e.Type != api.MsgLogStream { + continue + } + var p api.LogStreamLine + _ = e.UnmarshalPayload(&p) + if strings.Contains(p.Payload, `"message_type":"change"`) { + sawChange = true + } + } + if !sawChange { + t.Fatal("never saw a change log line in diff output") + } +} diff --git a/internal/agent/runner/runner.go b/internal/agent/runner/runner.go index aae9882..91264aa 100644 --- a/internal/agent/runner/runner.go +++ b/internal/agent/runner/runner.go @@ -26,10 +26,11 @@ type Sender interface { // from the agent's config file (server-pushed config.update payloads // override these in memory). type Config struct { - ResticBin string - RepoURL string - RepoUsername string - RepoPassword string + ResticBin string + ResticVersion string // e.g. "0.17.1" — empty if unknown + RepoURL string + RepoUsername string + RepoPassword string // Bandwidth caps in KB/s applied to every restic invocation. // <=0 means "no cap". Per-job override: callers that build a @@ -61,6 +62,7 @@ func New(cfg Config, tx Sender, progressMinPeriod time.Duration) *Runner { func (r *Runner) resticEnv() restic.Env { return restic.Env{ Bin: r.cfg.ResticBin, + Version: r.cfg.ResticVersion, RepoURL: r.cfg.RepoURL, RepoUsername: r.cfg.RepoUsername, RepoPassword: r.cfg.RepoPassword, @@ -95,8 +97,10 @@ func (r *Runner) streamHandler(jobID string, seq *atomic.Int64) restic.LineHandl } // sendFinished ships a job.finished envelope. err==nil → succeeded; -// otherwise failed. statsBlob is forwarded as JobFinishedPayload.Stats. -func (r *Runner) sendFinished(jobID string, finishedAt time.Time, err error, statsBlob json.RawMessage) { +// otherwise failed (or canceled if ctx was canceled — operator +// hit the Cancel button or the agent is shutting down). +// statsBlob is forwarded as JobFinishedPayload.Stats. +func (r *Runner) sendFinished(ctx context.Context, jobID string, finishedAt time.Time, err error, statsBlob json.RawMessage) { status := api.JobSucceeded exit := 0 errMsg := "" @@ -104,6 +108,16 @@ func (r *Runner) sendFinished(jobID string, finishedAt time.Time, err error, sta status = api.JobFailed exit = -1 errMsg = err.Error() + // If the context was canceled, the failure is operator-driven + // (or shutdown). Surface as JobCancelled so the UI shows a + // neutral "canceled" state rather than a red "failed" one. + // exec.CommandContext returns the process's exit error on + // ctx-cancel, which we'd otherwise rebadge as failed. + if ctxErr := ctx.Err(); ctxErr != nil { + status = api.JobCancelled + exit = 130 // POSIX convention for SIGINT/SIGTERM-killed + errMsg = "" // no need to surface the underlying restic error + } } finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{ JobID: jobID, @@ -138,13 +152,13 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t if hooks.Pre != "" { if err := r.runHook(ctx, jobID, "pre", hooks.Pre, "", &seq); err != nil { finishedAt := time.Now().UTC() - r.sendFinished(jobID, finishedAt, err, nil) + r.sendFinished(ctx, jobID, finishedAt, err, nil) return fmt.Errorf("pre_hook failed: %w", err) } } env := r.resticEnv() - lastProgress := time.Now() + lastProgress := time.Time{} // zero time → first status event always emits handle := func(stream string, line string, ev any) { // Throttled progress events come from restic's `status` JSON. @@ -206,7 +220,7 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t } } - r.sendFinished(jobID, finishedAt, err, statsBlob) + r.sendFinished(ctx, jobID, finishedAt, err, statsBlob) // On a successful backup, refresh the server's snapshot projection. // We do this *after* job.finished so the UI sees the job land first; @@ -240,7 +254,7 @@ func (r *Runner) RunInit(ctx context.Context, jobID string) error { var seq atomic.Int64 err := env.RunInit(ctx, r.streamHandler(jobID, &seq)) finishedAt := time.Now().UTC() - r.sendFinished(jobID, finishedAt, err, nil) + r.sendFinished(ctx, jobID, finishedAt, err, nil) if err != nil { return fmt.Errorf("runner init: %w", err) } @@ -262,7 +276,7 @@ func (r *Runner) RunForget(ctx context.Context, jobID string, groups []restic.Fo var seq atomic.Int64 err := env.RunForget(ctx, groups, r.streamHandler(jobID, &seq)) finishedAt := time.Now().UTC() - r.sendFinished(jobID, finishedAt, err, nil) + r.sendFinished(ctx, jobID, finishedAt, err, nil) // Refresh the server's snapshot projection — forget rewrites the // index so the host's snapshot list almost certainly shrunk. @@ -300,7 +314,7 @@ func (r *Runner) RunPrune(ctx context.Context, jobID string) error { } } - r.sendFinished(jobID, finishedAt, err, nil) + r.sendFinished(ctx, jobID, finishedAt, err, nil) if err != nil { return fmt.Errorf("runner prune: %w", err) @@ -339,7 +353,7 @@ func (r *Runner) RunCheck(ctx context.Context, jobID string, subsetPct int) erro slog.Warn("runner: stats.report after check failed", "job_id", jobID, "err", rerr) } - r.sendFinished(jobID, finishedAt, err, nil) + r.sendFinished(ctx, jobID, finishedAt, err, nil) if err != nil { return fmt.Errorf("runner check: %w", err) @@ -347,6 +361,102 @@ func (r *Runner) RunCheck(ctx context.Context, jobID string, subsetPct int) erro return nil } +// RunRestore executes a restic restore job and reports back via the +// sender. paths is the operator-selected file/dir list to restore. +// inPlace=true preserves uid/gid/mode and writes at "/"; inPlace=false +// writes at targetDir with --no-ownership. +// +// Status events from restic are throttled into job.progress in the +// same shape as backup; raw status lines are dropped from log.stream +// (they would drown the log on a fast restore — the progress widget +// already covers them). +func (r *Runner) RunRestore(ctx context.Context, jobID, snapshotID string, paths []string, inPlace bool, targetDir string) error { + startedAt := time.Now().UTC() + r.sendStarted(jobID, api.JobRestore, startedAt) + + env := r.resticEnv() + var seq atomic.Int64 + lastProgress := time.Time{} // zero time → first status event always emits + + handle := func(stream string, line string, ev any) { + status, isStatus := ev.(restic.RestoreStatus) + if !isStatus { + now := time.Now().UTC() + logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{ + JobID: jobID, + Seq: seq.Add(1), + TS: now, + Stream: api.LogStream(stream), + Payload: line, + }) + _ = r.tx.Send(logEnv) + } + if isStatus { + if time.Since(lastProgress) < r.progressMinPeriod { + return + } + lastProgress = time.Now() + progEnv, _ := api.Marshal(api.MsgJobProgress, jobID, api.JobProgressPayload{ + JobID: jobID, + PercentDone: status.PercentDone, + FilesDone: status.FilesRestored, + TotalFiles: status.TotalFiles, + BytesDone: status.BytesRestored, + TotalBytes: status.TotalBytes, + ETASeconds: estimateETA(status.BytesRestored, status.TotalBytes, status.SecondsElapsed), + ThroughputBps: throughput(status.BytesRestored, status.SecondsElapsed), + }) + _ = r.tx.Send(progEnv) + } + } + + summary, err := env.RunRestore(ctx, snapshotID, paths, inPlace, targetDir, handle) + finishedAt := time.Now().UTC() + + var statsBlob json.RawMessage + if summary != nil { + statsBlob, _ = json.Marshal(summary) + } + r.sendFinished(ctx, jobID, finishedAt, err, statsBlob) + if err != nil { + return fmt.Errorf("runner restore: %w", err) + } + return nil +} + +// estimateETA computes an ETA in seconds based on current bytes +// progress + elapsed seconds. Restic restore's --json doesn't emit an +// ETA field of its own (unlike backup), so we approximate by linear +// extrapolation. Returns 0 when we don't have enough data. +func estimateETA(bytesDone, totalBytes, secondsElapsed int64) int64 { + if bytesDone <= 0 || totalBytes <= 0 || secondsElapsed <= 0 || bytesDone >= totalBytes { + return 0 + } + rate := float64(bytesDone) / float64(secondsElapsed) + if rate <= 0 { + return 0 + } + return int64(float64(totalBytes-bytesDone) / rate) +} + +// RunDiff executes `restic diff --json ` and forwards output +// as log.stream lines. No snapshot-list refresh, no stats update — +// diff is purely informational. +func (r *Runner) RunDiff(ctx context.Context, jobID, snapshotA, snapshotB string) error { + startedAt := time.Now().UTC() + r.sendStarted(jobID, api.JobDiff, startedAt) + + env := r.resticEnv() + var seq atomic.Int64 + err := env.RunDiff(ctx, snapshotA, snapshotB, r.streamHandler(jobID, &seq)) + finishedAt := time.Now().UTC() + r.sendFinished(ctx, jobID, finishedAt, err, nil) + if err != nil { + return fmt.Errorf("runner diff: %w", err) + } + return nil +} + // RunUnlock executes a `restic unlock` job. On success it ships a // repo.stats envelope with LockPresent=false so the UI banner clears. func (r *Runner) RunUnlock(ctx context.Context, jobID string) error { @@ -366,7 +476,7 @@ func (r *Runner) RunUnlock(ctx context.Context, jobID string) error { } } - r.sendFinished(jobID, finishedAt, err, nil) + r.sendFinished(ctx, jobID, finishedAt, err, nil) if err != nil { return fmt.Errorf("runner unlock: %w", err) diff --git a/internal/agent/runner/runner_test.go b/internal/agent/runner/runner_test.go index c9fb042..239cdf7 100644 --- a/internal/agent/runner/runner_test.go +++ b/internal/agent/runner/runner_test.go @@ -4,20 +4,42 @@ import ( "context" "os" "path/filepath" + "sync" "testing" "gitea.dcglab.co.uk/steve/restic-manager/internal/api" "gitea.dcglab.co.uk/steve/restic-manager/internal/restic" ) -// fakeSender collects sent envelopes for assertions. -type fakeSender struct{ envs []api.Envelope } +// fakeSender collects sent envelopes for assertions. Lock-protected +// because the runner's pumpStdout / pumpStderr goroutines call Send +// concurrently — without the mutex, -race in CI flags every test +// that exercises a Run* method with both pumps active. +type fakeSender struct { + mu sync.Mutex + envs []api.Envelope +} func (s *fakeSender) Send(e api.Envelope) error { + s.mu.Lock() s.envs = append(s.envs, e) + s.mu.Unlock() return nil } +// snapshot returns a copy of the captured envelopes safe to read +// without holding the lock. Tests use this when iterating envs while +// other goroutines may still be writing — though in practice all +// runner Run* methods join their pumps before returning, so callers +// can also read .envs directly post-return. +func (s *fakeSender) snapshot() []api.Envelope { + s.mu.Lock() + defer s.mu.Unlock() + out := make([]api.Envelope, len(s.envs)) + copy(out, s.envs) + return out +} + // setupScript writes a shell script (without shebang) to a temp dir, // names it "restic", makes it executable, and returns the path. // @@ -320,7 +342,7 @@ esac // still produces job.started and job.finished envelopes. func TestRunInitShipsStartedAndFinished(t *testing.T) { t.Parallel() - bin := setupScript(t, `echo "initialized repository"`) + bin := setupScript(t, `echo "initialised repository"`) tx := &fakeSender{} r := New(Config{ResticBin: bin}, tx, 0) if err := r.RunInit(context.Background(), "job-init"); err != nil { diff --git a/internal/agent/scheduler/scheduler.go b/internal/agent/scheduler/scheduler.go index e9576ba..b3f236d 100644 --- a/internal/agent/scheduler/scheduler.go +++ b/internal/agent/scheduler/scheduler.go @@ -110,7 +110,7 @@ func (s *Scheduler) Apply(payload api.ScheduleSetPayload, tx Sender) { "received", len(payload.Schedules), "active", added) // Ack outside the lock — Send() shouldn't take long, but holding - // s.mu across an external call would needlessly serialize other + // s.mu across an external call would needlessly serialise other // callers (e.g. a future Status() inspection from the UI). ackEnv, err := api.Marshal(api.MsgScheduleAck, "", api.ScheduleAckPayload{ Version: payload.Version, diff --git a/internal/agent/secrets/secrets.go b/internal/agent/secrets/secrets.go index ff285e9..e6ac2a0 100644 --- a/internal/agent/secrets/secrets.go +++ b/internal/agent/secrets/secrets.go @@ -21,7 +21,7 @@ import ( // additionalData binds ciphertexts to the agent-secrets context, so a // blob lifted from one role's file can't be replayed into another's -// row in some unrelated table that uses the same key. (Defense in +// row in some unrelated table that uses the same key. (Defence in // depth — the key is per-host today, but cheap to be careful.) const additionalData = "rm-agent-repo-creds-v1" diff --git a/internal/agent/sysinfo/sysinfo.go b/internal/agent/sysinfo/sysinfo.go index e0c369e..0380952 100644 --- a/internal/agent/sysinfo/sysinfo.go +++ b/internal/agent/sysinfo/sysinfo.go @@ -76,5 +76,5 @@ func detectResticVersion(ctx context.Context, override string) (string, error) { if len(parts) >= 2 && parts[0] == "restic" { return parts[1], nil } - return "", fmt.Errorf("sysinfo: unrecognized restic version output: %q", first) + return "", fmt.Errorf("sysinfo: unrecognised restic version output: %q", first) } diff --git a/internal/agent/wsclient/client.go b/internal/agent/wsclient/client.go index 4e5d0b0..77f3ee1 100644 --- a/internal/agent/wsclient/client.go +++ b/internal/agent/wsclient/client.go @@ -40,7 +40,7 @@ type Config struct { // Sender is what handlers use to push agent → server messages // (job.progress, job.finished, log.stream, command.result, …). // Returned by the WS client to the dispatch handler. Write operations -// serialize behind a single mutex on the conn; concurrent calls are +// serialise behind a single mutex on the conn; concurrent calls are // safe. type Sender interface { Send(env api.Envelope) error diff --git a/internal/api/messages.go b/internal/api/messages.go index ce43bc3..8ea18f2 100644 --- a/internal/api/messages.go +++ b/internal/api/messages.go @@ -52,14 +52,17 @@ type JobKind string // Allowed JobKind values. backup is operator/cron driven; init runs // once per host on first connect; forget/prune/check fire from the -// server-side maintenance ticker; unlock is operator-only. +// server-side maintenance ticker; unlock and restore are operator- +// only; diff is operator-only and read-only. const ( - JobBackup JobKind = "backup" - JobInit JobKind = "init" - JobForget JobKind = "forget" - JobPrune JobKind = "prune" - JobCheck JobKind = "check" - JobUnlock JobKind = "unlock" + JobBackup JobKind = "backup" + JobInit JobKind = "init" + JobForget JobKind = "forget" + JobPrune JobKind = "prune" + JobCheck JobKind = "check" + JobUnlock JobKind = "unlock" + JobRestore JobKind = "restore" + JobDiff JobKind = "diff" ) // JobStatus is the lifecycle state of a job. @@ -143,6 +146,35 @@ type CommandRunPayload struct { // just executes whatever is here. PreHook string `json:"pre_hook,omitempty"` PostHook string `json:"post_hook,omitempty"` + + // Restore is populated only for kind=restore. See RestorePayload + // for the shape; nil for every other kind. + Restore *RestorePayload `json:"restore,omitempty"` + + // Diff is populated only for kind=diff. See DiffPayload for + // shape; nil for every other kind. + Diff *DiffPayload `json:"diff,omitempty"` +} + +// RestorePayload carries restore-specific arguments on a JobRestore +// command.run. Paths are absolute paths inside the snapshot (same +// shape restic accepts as positional args). When InPlace is true the +// agent restores at root (`--target /`) and preserves uid/gid/mode; +// otherwise it restores into TargetDir with --no-ownership so the +// operator can inspect the files as the agent user. +type RestorePayload struct { + SnapshotID string `json:"snapshot_id"` + Paths []string `json:"paths"` + InPlace bool `json:"in_place"` + TargetDir string `json:"target_dir,omitempty"` // ignored when in_place=true +} + +// DiffPayload carries snapshot-diff arguments on a JobDiff command.run. +// SnapshotA / SnapshotB may be either short or long IDs; restic +// accepts both. +type DiffPayload struct { + SnapshotA string `json:"snapshot_a"` + SnapshotB string `json:"snapshot_b"` } // CommandCancelPayload is the server → agent cancel signal. @@ -337,3 +369,37 @@ type AgentUpdateAvailablePayload struct { PackageURL string `json:"package_url"` // apt repo / choco source Changelog string `json:"changelog,omitempty"` } + +// TreeListRequestPayload is the body of a tree.list RPC. Used by the +// restore wizard to lazy-load directory contents from a snapshot. +// +// The exchange is synchronous: the server marshals MsgTreeList with a +// fresh Envelope.ID, sends to the agent, blocks on a channel keyed by +// that ID. The agent runs `restic ls --json `, +// emits direct children, and replies with MsgTreeListResult carrying +// the same ID. The server-side handler matches on ID and forwards to +// the waiting channel. See internal/server/ws/rpc.go for the helper. +type TreeListRequestPayload struct { + SnapshotID string `json:"snapshot_id"` + Path string `json:"path"` // absolute path inside the snapshot, "/" for root +} + +// TreeListEntry is one direct child returned by a tree.list call. +// Type is "dir" | "file" | "symlink"; size is best-effort (zero on +// directories and symlinks). +type TreeListEntry struct { + Name string `json:"name"` + Type string `json:"type"` + Size int64 `json:"size,omitempty"` +} + +// TreeListResultPayload is the reply to a tree.list. Error is set +// when the agent couldn't fulfil the request (missing snapshot, +// path doesn't exist, restic invocation failed); Entries is empty in +// that case. A successful empty directory has Error="" + nil Entries. +type TreeListResultPayload struct { + SnapshotID string `json:"snapshot_id"` + Path string `json:"path"` + Entries []TreeListEntry `json:"entries,omitempty"` + Error string `json:"error,omitempty"` +} diff --git a/internal/api/wire.go b/internal/api/wire.go index df646a5..005827f 100644 --- a/internal/api/wire.go +++ b/internal/api/wire.go @@ -12,18 +12,19 @@ type MessageType string // Agent → server message types. const ( - MsgHello MessageType = "hello" - MsgHeartbeat MessageType = "heartbeat" - MsgJobStarted MessageType = "job.started" - MsgJobProgress MessageType = "job.progress" - MsgJobFinished MessageType = "job.finished" - MsgSnapshotsRpt MessageType = "snapshots.report" - MsgRepoStats MessageType = "repo.stats" - MsgLogStream MessageType = "log.stream" - MsgScheduleAck MessageType = "schedule.ack" - MsgScheduleFire MessageType = "schedule.fire" // agent: a local cron entry fired, please dispatch a job - MsgCommandResult MessageType = "command.result" // ack for command.run - MsgError MessageType = "error" + MsgHello MessageType = "hello" + MsgHeartbeat MessageType = "heartbeat" + MsgJobStarted MessageType = "job.started" + MsgJobProgress MessageType = "job.progress" + MsgJobFinished MessageType = "job.finished" + MsgSnapshotsRpt MessageType = "snapshots.report" + MsgRepoStats MessageType = "repo.stats" + MsgLogStream MessageType = "log.stream" + MsgScheduleAck MessageType = "schedule.ack" + MsgScheduleFire MessageType = "schedule.fire" // agent: a local cron entry fired, please dispatch a job + MsgCommandResult MessageType = "command.result" // ack for command.run + MsgTreeListResult MessageType = "tree.list.result" // reply to a server-driven tree.list + MsgError MessageType = "error" ) // Server → agent message types. @@ -33,6 +34,7 @@ const ( MsgScheduleSet MessageType = "schedule.set" MsgConfigUpdate MessageType = "config.update" MsgAgentUpdateAvail MessageType = "agent.update.available" + MsgTreeList MessageType = "tree.list" // sync RPC: list a snapshot's children ) // Envelope is the framing for every WS message in either direction. @@ -76,7 +78,7 @@ type ErrorCode string const ( ErrProtocolTooOld ErrorCode = "protocol_too_old" ErrProtocolTooNew ErrorCode = "protocol_too_new" - ErrUnauthorized ErrorCode = "unauthorized" + ErrUnauthorized ErrorCode = "unauthorised" ErrBadRequest ErrorCode = "bad_request" ErrInternal ErrorCode = "internal" ) diff --git a/internal/auth/passwords.go b/internal/auth/passwords.go index d245ace..6e35321 100644 --- a/internal/auth/passwords.go +++ b/internal/auth/passwords.go @@ -56,7 +56,7 @@ func VerifyPassword(encoded, password string) error { parts := strings.Split(encoded, "$") // "$argon2id$v=...$m=...,t=...,p=...$$" → 6 parts (leading empty) if len(parts) != 6 || parts[1] != "argon2id" { - return errors.New("auth: unrecognized hash format") + return errors.New("auth: unrecognised hash format") } var version int if _, err := fmt.Sscanf(parts[2], "v=%d", &version); err != nil { diff --git a/internal/crypto/aead.go b/internal/crypto/aead.go index 7a68264..388e97b 100644 --- a/internal/crypto/aead.go +++ b/internal/crypto/aead.go @@ -2,7 +2,7 @@ // passwords, REST-server credentials, hook bodies, and any other // secret that lands in the SQLite store. // -// The threat model is "defense in depth against a stolen DB file" — +// The threat model is "defence in depth against a stolen DB file" — // not "an attacker with code execution can't read secrets at runtime." // We need the encryption key at runtime to do any actual work, so // anyone with a memory dump of the running server can extract it. diff --git a/internal/restic/cancel_unix.go b/internal/restic/cancel_unix.go new file mode 100644 index 0000000..f74fdd6 --- /dev/null +++ b/internal/restic/cancel_unix.go @@ -0,0 +1,7 @@ +//go:build !windows + +package restic + +import "syscall" + +var sigterm = syscall.SIGTERM diff --git a/internal/restic/cancel_windows.go b/internal/restic/cancel_windows.go new file mode 100644 index 0000000..b8f47b7 --- /dev/null +++ b/internal/restic/cancel_windows.go @@ -0,0 +1,12 @@ +//go:build windows + +package restic + +import "os" + +// Windows has no SIGTERM. The closest equivalent is os.Interrupt +// (CTRL_BREAK_EVENT), but Go's exec.Cmd.Process.Signal() on Windows +// only supports os.Kill — sending anything else returns an error and +// no signal is delivered. Fall back to os.Kill so Cancel still works +// (immediate force-kill); WaitDelay is unused but harmless. +var sigterm = os.Kill diff --git a/internal/restic/ls.go b/internal/restic/ls.go new file mode 100644 index 0000000..0371c4d --- /dev/null +++ b/internal/restic/ls.go @@ -0,0 +1,140 @@ +package restic + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "os/exec" + "path" + "strings" +) + +// LsEntry is one node from `restic ls --json`. Restic emits these as +// line-delimited JSON; we keep only the fields the restore wizard +// needs. +type LsEntry struct { + Name string `json:"name"` + Type string `json:"type"` + Path string `json:"path"` + Size int64 `json:"size,omitempty"` + Struct string `json:"struct_type,omitempty"` +} + +// ListTreeChildren runs `restic ls --json ` and +// returns only the direct children of dirPath. Restic ls is recursive +// by default, so we filter post-hoc — for a typical interactive +// drill-down ("expand /etc/nginx") the subtree is small (a few KB of +// JSON); for huge subtrees this is suboptimal but correct. +// +// The first emitted line is restic's "snapshot" preamble (struct_type +// = "snapshot") which we discard. Subsequent lines are nodes; we +// match on path equal to dirPath + "/" + name (with normalisation so +// trailing slashes don't break the comparison). +// +// dirPath="" or "/" lists the snapshot root. +func (e Env) ListTreeChildren(ctx context.Context, snapshotID, dirPath string) ([]LsEntry, error) { + if snapshotID == "" { + return nil, fmt.Errorf("restic ls: snapshot id required") + } + parent := normalizeTreePath(dirPath) + + args := []string{"ls", "--json", snapshotID} + if parent != "/" { + args = append(args, parent) + } + cmd := e.resticCmd(ctx, args...) + + var stderr bytes.Buffer + cmd.Stderr = &stderr + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("restic ls: stdout pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("restic ls: start: %w", err) + } + + out, parseErr := parseLsChildren(stdout, parent) + + werr := cmd.Wait() + if werr != nil { + var ee *exec.ExitError + if errors.As(werr, &ee) { + return nil, fmt.Errorf("restic ls: exit %d: %s", + ee.ExitCode(), strings.TrimSpace(stderr.String())) + } + return nil, fmt.Errorf("restic ls: %w", werr) + } + if parseErr != nil { + return nil, parseErr + } + return out, nil +} + +// parseLsChildren reads line-delimited JSON from r and returns nodes +// whose Path is a direct child of parent. Exposed for testing. +func parseLsChildren(r io.Reader, parent string) ([]LsEntry, error) { + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + var out []LsEntry + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 { + continue + } + var entry LsEntry + if err := json.Unmarshal(line, &entry); err != nil { + return nil, fmt.Errorf("restic ls: parse line: %w", err) + } + // Skip the snapshot preamble and any future struct_type + // entries we don't care about. + if entry.Struct == "snapshot" || entry.Path == "" { + continue + } + if isDirectChild(entry.Path, parent) { + out = append(out, entry) + } + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("restic ls: read output: %w", err) + } + return out, nil +} + +// normalizeTreePath turns "" / "/" / "/etc/" / "etc" all into a +// canonical absolute form with a leading slash and no trailing slash +// (except the root, which is "/" alone). +func normalizeTreePath(p string) string { + p = strings.TrimSpace(p) + if p == "" || p == "/" { + return "/" + } + if !strings.HasPrefix(p, "/") { + p = "/" + p + } + cleaned := path.Clean(p) + return cleaned +} + +// isDirectChild reports whether childPath is a direct child of parent. +// "/etc/nginx" is a direct child of "/etc"; "/etc/nginx/conf" is not. +// "/etc" is a direct child of "/". +func isDirectChild(childPath, parent string) bool { + cp := normalizeTreePath(childPath) + pp := normalizeTreePath(parent) + if pp == "/" { + // Direct children of root: exactly one slash-delimited segment. + return cp != "/" && strings.Count(cp, "/") == 1 + } + // Must start with parent + "/" and have no further slashes. + prefix := pp + "/" + if !strings.HasPrefix(cp, prefix) { + return false + } + rest := cp[len(prefix):] + return rest != "" && !strings.Contains(rest, "/") +} diff --git a/internal/restic/ls_test.go b/internal/restic/ls_test.go new file mode 100644 index 0000000..4688383 --- /dev/null +++ b/internal/restic/ls_test.go @@ -0,0 +1,123 @@ +package restic + +import ( + "strings" + "testing" +) + +// realistic restic ls --json output sample. First line is the +// snapshot preamble, subsequent lines are nodes. Trimmed to a few +// entries that exercise depth filtering. +const sampleLsOutput = `{"struct_type":"snapshot","time":"2026-05-04T09:14:00Z","id":"f3a7b2c1"} +{"name":"etc","type":"dir","path":"/etc","permissions":"drwxr-xr-x","struct_type":"node"} +{"name":"nginx","type":"dir","path":"/etc/nginx","permissions":"drwxr-xr-x","struct_type":"node"} +{"name":"nginx.conf","type":"file","path":"/etc/nginx/nginx.conf","size":2400,"struct_type":"node"} +{"name":"sites-available","type":"dir","path":"/etc/nginx/sites-available","struct_type":"node"} +{"name":"alfa.conf","type":"file","path":"/etc/nginx/sites-available/alfa.conf","size":3100,"struct_type":"node"} +{"name":"default.conf","type":"file","path":"/etc/nginx/sites-available/default.conf","size":2900,"struct_type":"node"} +` + +func TestParseLsChildrenAtRoot(t *testing.T) { + t.Parallel() + entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/") + if err != nil { + t.Fatalf("parse: %v", err) + } + if len(entries) != 1 { + t.Fatalf("entries: got %d (%+v), want 1", len(entries), entries) + } + if entries[0].Name != "etc" || entries[0].Path != "/etc" || entries[0].Type != "dir" { + t.Fatalf("entry: %+v", entries[0]) + } +} + +func TestParseLsChildrenAtEtc(t *testing.T) { + t.Parallel() + entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/etc") + if err != nil { + t.Fatalf("parse: %v", err) + } + if len(entries) != 1 { + t.Fatalf("entries: got %d, want 1 (just nginx, not nested children)", len(entries)) + } + if entries[0].Name != "nginx" { + t.Fatalf("entry: %+v", entries[0]) + } +} + +func TestParseLsChildrenAtNginx(t *testing.T) { + t.Parallel() + entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/etc/nginx") + if err != nil { + t.Fatalf("parse: %v", err) + } + if len(entries) != 2 { + t.Fatalf("entries: got %d (%+v), want 2 (nginx.conf + sites-available, not nested)", + len(entries), entries) + } + gotNames := []string{entries[0].Name, entries[1].Name} + want := map[string]bool{"nginx.conf": true, "sites-available": true} + for _, n := range gotNames { + if !want[n] { + t.Errorf("unexpected name %q in result", n) + } + } +} + +func TestParseLsChildrenAtSitesAvailable(t *testing.T) { + t.Parallel() + entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/etc/nginx/sites-available") + if err != nil { + t.Fatalf("parse: %v", err) + } + if len(entries) != 2 { + t.Fatalf("entries: got %d, want 2", len(entries)) + } + for _, e := range entries { + if e.Type != "file" { + t.Errorf("expected file type, got %q on %q", e.Type, e.Name) + } + } +} + +func TestNormalizeTreePath(t *testing.T) { + t.Parallel() + cases := []struct{ in, want string }{ + {"", "/"}, + {"/", "/"}, + {"/etc", "/etc"}, + {"/etc/", "/etc"}, + {"etc/nginx", "/etc/nginx"}, + {"/etc//nginx", "/etc/nginx"}, + {"/etc/./nginx", "/etc/nginx"}, + } + for _, c := range cases { + got := normalizeTreePath(c.in) + if got != c.want { + t.Errorf("normalizeTreePath(%q): got %q, want %q", c.in, got, c.want) + } + } +} + +func TestIsDirectChild(t *testing.T) { + t.Parallel() + cases := []struct { + child, parent string + want bool + }{ + {"/etc", "/", true}, + {"/etc/nginx", "/", false}, + {"/etc/nginx", "/etc", true}, + {"/etc/nginx/conf", "/etc", false}, + {"/etc/nginx/conf", "/etc/nginx", true}, + {"/etc", "/etc", false}, + {"/etcc", "/etc", false}, // prefix match guard + } + for _, c := range cases { + got := isDirectChild(c.child, c.parent) + if got != c.want { + t.Errorf("isDirectChild(%q, %q): got %v, want %v", + c.child, c.parent, got, c.want) + } + } +} diff --git a/internal/restic/restore.go b/internal/restic/restore.go new file mode 100644 index 0000000..0f2af31 --- /dev/null +++ b/internal/restic/restore.go @@ -0,0 +1,271 @@ +package restic + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" +) + +// RestoreStatus mirrors the JSON `status` lines `restic restore --json` +// emits while restoring. Field names track restic's wire format; we +// project a subset (the rest are cosmetic). +type RestoreStatus struct { + MessageType string `json:"message_type"` + SecondsElapsed int64 `json:"seconds_elapsed"` + PercentDone float64 `json:"percent_done"` + TotalFiles int64 `json:"total_files"` + FilesRestored int64 `json:"files_restored"` + FilesSkipped int64 `json:"files_skipped"` + TotalBytes int64 `json:"total_bytes"` + BytesRestored int64 `json:"bytes_restored"` + BytesSkipped int64 `json:"bytes_skipped"` +} + +// RestoreSummary is the final summary line emitted after a successful +// restore. Newer restic prints it; older clients leave us with no +// summary, in which case the agent skips the stats and the live UI +// just sees percent reach 100%. +type RestoreSummary struct { + MessageType string `json:"message_type"` + SecondsElapsed int64 `json:"seconds_elapsed"` + TotalFiles int64 `json:"total_files"` + FilesRestored int64 `json:"files_restored"` + FilesSkipped int64 `json:"files_skipped"` + TotalBytes int64 `json:"total_bytes"` + BytesRestored int64 `json:"bytes_restored"` + BytesSkipped int64 `json:"bytes_skipped"` +} + +// RunRestore executes `restic restore --target +// [--include

...]` with --json and pumps progress events into +// handle. paths is the operator-selected list (each becomes an +// `--include` flag); preserveOwner controls --no-ownership. +// +// inPlace toggles target semantics: +// - true → target is "/" and ownership is preserved +// - false → target is targetDir and --no-ownership is passed +// +// targetDir is created on demand by restic itself. +func (e Env) RunRestore(ctx context.Context, snapshotID string, paths []string, inPlace bool, targetDir string, handle LineHandler) (*RestoreSummary, error) { + if snapshotID == "" { + return nil, fmt.Errorf("restic restore: snapshot id required") + } + if !inPlace && targetDir == "" { + return nil, fmt.Errorf("restic restore: target dir required for non-in-place restore") + } + + args := []string{"restore", "--json", snapshotID} + target := targetDir + if inPlace { + target = "/" + } else { + // Expand $HOME / ${HOME} / leading ~/ in the operator-supplied + // path, using the agent's own HOME (typically /root for the + // User=root unit). The expansion runs agent-side so the + // operator can specify a portable default like + // $HOME/rm-restore// in the wizard without the server + // needing to know which user the agent runs as. + target = expandHome(target) + // Ensure the target directory exists. Restic itself creates + // missing leaves but won't traverse multiple missing levels + // (and we don't want the operator to have to pre-create the + // per-job subdir). 0700 keeps the data root-only — the agent + // runs as root, and operators who want a different mode can + // chmod after the fact. If MkdirAll fails (operator typed a + // path inside a read-only sandbox mount, ENOSPC, etc.) we + // surface a clean error rather than letting restic fail with + // something cryptic. + if err := os.MkdirAll(target, 0o700); err != nil { + return nil, fmt.Errorf("restic restore: prepare target %q: %w", target, err) + } + } + args = append(args, "--target", target) + // --no-ownership was added in restic 0.17. Older versions reject + // the flag with "unknown flag: --no-ownership". For new-dir + // restores we want the files owned by the agent user (operator + // can cp them without juggling chown), so pass the flag iff the + // running restic supports it. In-place restores always preserve + // ownership — that's the whole point of in-place. + if !inPlace && e.AtLeastVersion(0, 17) { + args = append(args, "--no-ownership") + } + for _, p := range paths { + args = append(args, "--include", p) + } + + cmd := e.resticCmd(ctx, args...) + + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("restic restore: stdout pipe: %w", err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + return nil, fmt.Errorf("restic restore: stderr pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("restic restore: start: %w", err) + } + + var summary *RestoreSummary + done := make(chan error, 2) + go func() { done <- pumpRestoreStdout(stdout, handle, &summary) }() + go func() { done <- pumpStderr(stderr, handle) }() + for i := 0; i < 2; i++ { + if err := <-done; err != nil && handle != nil { + handle("event", fmt.Sprintf("pump error: %v", err), nil) + } + } + werr := cmd.Wait() + if werr != nil { + var ee *exec.ExitError + if errors.As(werr, &ee) { + return summary, fmt.Errorf("restic restore: exit %d", ee.ExitCode()) + } + return summary, fmt.Errorf("restic restore: %w", werr) + } + return summary, nil +} + +// pumpRestoreStdout is the restore variant of pumpStdout: it emits +// `event` lines for the parsed status/summary objects (so the runner +// can shape them into job.progress) and forwards everything else as +// stdout — but unlike backup we include the raw status JSON in +// log.stream too because restore is short and the live log audience +// genuinely benefits from the per-file traffic. Actually — we mirror +// backup's behaviour and DROP raw status lines from log.stream +// (they'd drown the log on a fast restore); the progress envelope +// covers them. +func pumpRestoreStdout(r io.Reader, handle LineHandler, summary **RestoreSummary) error { + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 0, 64*1024), 4*1024*1024) + for scanner.Scan() { + line := scanner.Text() + if handle == nil { + continue + } + if !strings.HasPrefix(line, "{") { + handle("stdout", line, nil) + continue + } + var probe struct { + MessageType string `json:"message_type"` + } + if err := json.Unmarshal([]byte(line), &probe); err != nil { + handle("stdout", line, nil) + continue + } + switch probe.MessageType { + case "status": + var ev RestoreStatus + if json.Unmarshal([]byte(line), &ev) == nil { + // Don't tee status lines to log.stream — too chatty. + handle("event", line, ev) + continue + } + case "summary": + var ev RestoreSummary + if json.Unmarshal([]byte(line), &ev) == nil { + if summary != nil { + s := ev + *summary = &s + } + handle("event", line, ev) + continue + } + case "verbose_status": + handle("event", line, nil) + continue + } + handle("stdout", line, nil) + } + return scanner.Err() +} + +// expandHome rewrites $HOME, ${HOME}, or a leading ~/ in p to the +// agent process's home directory. Other env-var references are left +// untouched on purpose (operator-supplied paths shouldn't be able to +// pick up arbitrary agent env values like $PATH or $RESTIC_PASSWORD). +// Returns p unchanged if HOME can't be resolved. +func expandHome(p string) string { + if p == "" { + return p + } + home, err := os.UserHomeDir() + if err != nil || home == "" { + return p + } + switch { + case strings.HasPrefix(p, "$HOME/"): + return filepath.Join(home, p[len("$HOME/"):]) + case p == "$HOME": + return home + case strings.HasPrefix(p, "${HOME}/"): + return filepath.Join(home, p[len("${HOME}/"):]) + case p == "${HOME}": + return home + case strings.HasPrefix(p, "~/"): + return filepath.Join(home, p[2:]) + case p == "~": + return home + } + return p +} + +// RunDiff executes `restic diff --json ` and forwards every +// line to handle as stdout. Restic emits per-line "change" objects +// plus a final "statistics" object; we don't parse them server-side — +// the operator reads the raw output on the live job log page. +func (e Env) RunDiff(ctx context.Context, snapshotA, snapshotB string, handle LineHandler) error { + if snapshotA == "" || snapshotB == "" { + return fmt.Errorf("restic diff: two snapshot ids required") + } + cmd := e.resticCmd(ctx, "diff", "--json", snapshotA, snapshotB) + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("restic diff: stdout pipe: %w", err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("restic diff: stderr pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return fmt.Errorf("restic diff: start: %w", err) + } + done := make(chan error, 2) + // diff output isn't huge; pumpStderr-ish line-by-line forwarding + // is fine. + go func() { + s := bufio.NewScanner(stdout) + s.Buffer(make([]byte, 0, 64*1024), 1024*1024) + for s.Scan() { + if handle != nil { + handle("stdout", s.Text(), nil) + } + } + done <- s.Err() + }() + go func() { done <- pumpStderr(stderr, handle) }() + for i := 0; i < 2; i++ { + if err := <-done; err != nil && handle != nil { + handle("event", fmt.Sprintf("pump error: %v", err), nil) + } + } + werr := cmd.Wait() + if werr != nil { + var ee *exec.ExitError + if errors.As(werr, &ee) { + return fmt.Errorf("restic diff: exit %d", ee.ExitCode()) + } + return fmt.Errorf("restic diff: %w", werr) + } + return nil +} diff --git a/internal/restic/runner.go b/internal/restic/runner.go index 6104e7a..2c01233 100644 --- a/internal/restic/runner.go +++ b/internal/restic/runner.go @@ -15,7 +15,7 @@ import ( "time" ) -// Locate resolves the path to the restic binary. Honor an explicit +// Locate resolves the path to the restic binary. Honour an explicit // override if provided, else fall back to PATH. func Locate(override string) (string, error) { if override != "" { @@ -42,6 +42,7 @@ func Locate(override string) (string, error) { // in this package ever needs to *log* a URL, use RedactURL. type Env struct { Bin string // path to restic binary + Version string // e.g. "0.17.1"; empty if unknown RepoURL string // RESTIC_REPOSITORY (no embedded creds) RepoUsername string // optional HTTP basic-auth user for rest: URLs RepoPassword string // doubles as RESTIC_PASSWORD and (for rest:) HTTP basic-auth password @@ -55,6 +56,45 @@ type Env struct { LimitDownloadKBps int } +// AtLeastVersion reports whether e.Version >= the given major/minor. +// Comparison is best-effort: empty / unparseable versions return false +// (callers stay on the conservative path). Patch level is ignored. +func (e Env) AtLeastVersion(major, minor int) bool { + v := strings.TrimSpace(e.Version) + if v == "" { + return false + } + parts := strings.SplitN(v, ".", 3) + if len(parts) < 2 { + return false + } + maj, err1 := atoi(parts[0]) + min, err2 := atoi(parts[1]) + if err1 != nil || err2 != nil { + return false + } + if maj != major { + return maj > major + } + return min >= minor +} + +// atoi is strconv.Atoi without dragging the import into a file that +// only needs it for one helper. +func atoi(s string) (int, error) { + n := 0 + if len(s) == 0 { + return 0, fmt.Errorf("empty") + } + for _, r := range s { + if r < '0' || r > '9' { + return 0, fmt.Errorf("not a digit: %q", r) + } + n = n*10 + int(r-'0') + } + return n, nil +} + // globalArgs returns restic's pre-subcommand global flags derived // from the Env. Currently just bandwidth caps. func (e Env) globalArgs() []string { @@ -69,14 +109,33 @@ func (e Env) globalArgs() []string { } // resticCmd builds an exec.Cmd with bandwidth-limit globals prefixed -// before the supplied subcommand args. Centralizing this so every -// command (backup/forget/prune/check/unlock/init/stats) honors +// before the supplied subcommand args. Centralising this so every +// command (backup/forget/prune/check/unlock/init/stats) honours // the caps without each call site having to remember. +// +// Cancellation: by default exec.CommandContext sends SIGKILL when +// ctx is canceled, which leaves restic no chance to clean up its +// repository lock. Override Cmd.Cancel to send SIGTERM first, and +// set Cmd.WaitDelay so the process is force-killed if it doesn't +// exit within five seconds. Restic responds to SIGTERM by removing +// its lock file before exiting, which is what we want when an +// operator cancels a long-running backup/restore from the UI. func (e Env) resticCmd(ctx context.Context, sub ...string) *exec.Cmd { args := append(e.globalArgs(), sub...) cmd := exec.CommandContext(ctx, e.Bin, args...) cmd.Env = e.envSlice() cmd.Dir = e.WorkDir + cmd.Cancel = func() error { + // Cmd.Process is set after Start; Cancel only fires post-Start + // so the nil check is defensive against the documented but + // unlikely race. Signal returns ErrProcessDone if the process + // already exited; that's not a problem here either. + if cmd.Process == nil { + return nil + } + return cmd.Process.Signal(sigterm) + } + cmd.WaitDelay = 5 * time.Second return cmd } @@ -123,7 +182,7 @@ type BackupSummary struct { } // LineHandler receives every stdout/stderr line. event is non-nil -// when the line is a recognized JSON status; raw always carries the +// when the line is a recognised JSON status; raw always carries the // original text (so we can also tee to job_logs as `stdout`). type LineHandler func(stream string, raw string, event any) @@ -263,7 +322,7 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error { // Sniff for "config file already exists" on stderr; if we see it // we'll treat the non-zero exit as a soft success — running init - // against an already-initialized repo is a no-op semantically, + // against an already-initialised repo is a no-op semantically, // not a failure. Wraps the caller's handle so the line still // gets streamed verbatim to the operator-facing log. alreadyInited := false @@ -279,7 +338,7 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error { if err := runWithPump(cmd, sniff); err != nil { if alreadyInited { if handle != nil { - handle("event", "repo already initialized — treating as success", nil) + handle("event", "repo already initialised — treating as success", nil) } return nil } @@ -375,7 +434,7 @@ func (e Env) RunStats(ctx context.Context, handle LineHandler) (*RepoStats, erro return out, nil } -// CheckResult summarizes a `restic check` invocation. LockPresent is +// CheckResult summarises a `restic check` invocation. LockPresent is // true if the stderr stream contained a stale-lock signal (caller is // expected to surface this in the UI so the operator can run unlock). // ErrorsFound is true if check exited with a non-zero status (errors @@ -387,7 +446,7 @@ type CheckResult struct { // RunCheck executes `restic check` with optional --read-data-subset. // subsetPct of 0 omits the flag (full data check); >0 passes -// --read-data-subset N%. Returns a CheckResult summarizing what was +// --read-data-subset N%. Returns a CheckResult summarising what was // sniffed from stderr; the result is set even if check itself // returns an error (so the caller can persist last_check_status). func (e Env) RunCheck(ctx context.Context, subsetPct int, handle LineHandler) (CheckResult, error) { diff --git a/internal/restic/snapshots.go b/internal/restic/snapshots.go index 5c62a82..9593446 100644 --- a/internal/restic/snapshots.go +++ b/internal/restic/snapshots.go @@ -13,9 +13,11 @@ import ( // decode only the fields we project to the server; restic's full // shape has more (parent, tree, program version) that we don't need. // -// Summary is only populated by restic 0.16+ (which embeds the backup -// summary inside each snapshot). Older clients leave it nil and the -// agent reports zero size/file-count — the UI degrades to "—". +// Summary is only populated by restic 0.17+ (which embeds the backup +// summary inside each snapshot record). Older clients leave it nil +// and the agent reports zero size/file-count — the UI degrades to +// "—" and the column headers carry a tooltip explaining the version +// requirement (see web/templates/pages/host_detail.html). type Snapshot struct { ID string `json:"id"` ShortID string `json:"short_id"` diff --git a/internal/restic/version_test.go b/internal/restic/version_test.go new file mode 100644 index 0000000..d4a934f --- /dev/null +++ b/internal/restic/version_test.go @@ -0,0 +1,64 @@ +package restic + +import ( + "path/filepath" + "testing" +) + +func TestEnvAtLeastVersion(t *testing.T) { + t.Parallel() + cases := []struct { + ver string + major int + minor int + want bool + shortDesc string + }{ + {"0.17.0", 0, 17, true, "exact match"}, + {"0.17.1", 0, 17, true, "patch above"}, + {"0.18.0", 0, 17, true, "minor above"}, + {"1.0.0", 0, 17, true, "major above"}, + {"0.16.4", 0, 17, false, "minor below"}, + {"0.16", 0, 17, false, "two-part minor below"}, + {"", 0, 17, false, "empty"}, + {"v0.17", 0, 17, false, "prefixed v rejected"}, + {"unknown", 0, 17, false, "non-numeric rejected"}, + } + for _, c := range cases { + got := Env{Version: c.ver}.AtLeastVersion(c.major, c.minor) + if got != c.want { + t.Errorf("AtLeastVersion(%q, %d, %d): got %v want %v · %s", + c.ver, c.major, c.minor, got, c.want, c.shortDesc) + } + } +} + +func TestExpandHome(t *testing.T) { + // Not parallel — t.Setenv on HOME would race with sibling tests. + tmp := t.TempDir() + t.Setenv("HOME", tmp) + + cases := []struct { + in, want string + }{ + {"$HOME/rm-restore/job-1/", filepath.Join(tmp, "rm-restore/job-1")}, + {"${HOME}/rm-restore/job-2/", filepath.Join(tmp, "rm-restore/job-2")}, + {"~/rm-restore/job-3/", filepath.Join(tmp, "rm-restore/job-3")}, + {"$HOME", tmp}, + {"~", tmp}, + {"/var/lib/x/y", "/var/lib/x/y"}, // absolute path passes through + {"", ""}, + {"$PATH/foo", "$PATH/foo"}, // other env vars not expanded + } + for _, c := range cases { + got := expandHome(c.in) + if got != c.want { + t.Errorf("expandHome(%q): got %q want %q", c.in, got, c.want) + } + } + + // Sanity: an absolute path always passes through regardless of HOME. + if got := expandHome("/abs"); got != "/abs" { + t.Errorf("expandHome(/abs): got %q", got) + } +} diff --git a/internal/server/http/agent_assets.go b/internal/server/http/agent_assets.go index 4808504..2efbd8e 100644 --- a/internal/server/http/agent_assets.go +++ b/internal/server/http/agent_assets.go @@ -57,7 +57,7 @@ func (s *Server) handleAgentBinary(w stdhttp.ResponseWriter, r *stdhttp.Request) } func (s *Server) handleInstallAsset(w stdhttp.ResponseWriter, r *stdhttp.Request) { - // chi's TrimPrefix-like behavior: r.URL.Path is "/install/". + // chi's TrimPrefix-like behaviour: r.URL.Path is "/install/". rel := strings.TrimPrefix(r.URL.Path, "/install/") // Reject any path traversal — must be a flat filename. if rel == "" || strings.ContainsAny(rel, "/\\") { diff --git a/internal/server/http/announce.go b/internal/server/http/announce.go index 8635cb8..82377fe 100644 --- a/internal/server/http/announce.go +++ b/internal/server/http/announce.go @@ -133,7 +133,7 @@ func (s *Server) handleAnnounce(w stdhttp.ResponseWriter, r *stdhttp.Request) { keyBytes, err := base64.StdEncoding.DecodeString(req.PublicKey) if err != nil { - // Try URL-safe / no-padding flavors before giving up. + // Try URL-safe / no-padding flavours before giving up. if k2, e2 := base64.RawStdEncoding.DecodeString(req.PublicKey); e2 == nil { keyBytes = k2 } else { @@ -195,7 +195,7 @@ func (s *Server) handleAnnounce(w stdhttp.ResponseWriter, r *stdhttp.Request) { // remoteIP returns r.RemoteAddr stripped of any :port suffix, plus // the X-Forwarded-For chain's first hop when behind a trusted proxy // (RM_TRUSTED_PROXY in the deployment doc). Trust-proxy lookup -// matches the framework's existing behavior elsewhere. +// matches the framework's existing behaviour elsewhere. func remoteIP(r *stdhttp.Request) string { if xff := r.Header.Get("X-Forwarded-For"); xff != "" { // Take the first IP in the chain (closest to the original diff --git a/internal/server/http/auth.go b/internal/server/http/auth.go index cb25f71..6c0fc2e 100644 --- a/internal/server/http/auth.go +++ b/internal/server/http/auth.go @@ -137,7 +137,7 @@ func (s *Server) handleBootstrap(w stdhttp.ResponseWriter, r *stdhttp.Request) { return } if n > 0 { - writeJSONError(w, stdhttp.StatusConflict, "already_initialized", + writeJSONError(w, stdhttp.StatusConflict, "already_initialised", "a user already exists; bootstrap is disabled") return } diff --git a/internal/server/http/cancel.go b/internal/server/http/cancel.go new file mode 100644 index 0000000..4e24227 --- /dev/null +++ b/internal/server/http/cancel.go @@ -0,0 +1,86 @@ +package http + +import ( + stdhttp "net/http" + "time" + + "github.com/go-chi/chi/v5" + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// handleCancelJob is POST /api/jobs/{id}/cancel. Sends a command.cancel +// envelope to the host that owns the job; the agent kills the running +// restic subprocess, and the resulting job.finished envelope (status = +// canceled) is what actually transitions the job row — this handler +// does not touch the jobs table directly. Returning 202 makes that +// asynchronicity explicit. +// +// 4xx cases: +// - job not found (404) +// - job already in a terminal state (409 — nothing to cancel) +// - host offline (503 — same code path the run-now endpoint uses) +// +// Audit-logged as job.cancel with the job ID as target. +func (s *Server) handleCancelJob(w stdhttp.ResponseWriter, r *stdhttp.Request) { + user, ok := s.requireUser(r) + if !ok { + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") + return + } + jobID := chi.URLParam(r, "id") + if jobID == "" { + writeJSONError(w, stdhttp.StatusBadRequest, "missing_job_id", "") + return + } + + job, err := s.deps.Store.GetJob(r.Context(), jobID) + if err != nil { + writeJSONError(w, stdhttp.StatusNotFound, "job_not_found", "") + return + } + switch api.JobStatus(job.Status) { + case api.JobSucceeded, api.JobFailed, api.JobCancelled: + writeJSONError(w, stdhttp.StatusConflict, "job_terminal", + "job is already in a terminal state ("+job.Status+")") + return + } + + if !s.deps.Hub.Connected(job.HostID) { + writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline", + "agent is not connected; can't deliver cancel signal") + return + } + + env, err := api.Marshal(api.MsgCommandCancel, jobID, api.CommandCancelPayload{ + JobID: jobID, + }) + if err != nil { + writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "") + return + } + if err := s.deps.Hub.Send(r.Context(), job.HostID, env); err != nil { + writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline", err.Error()) + return + } + + var actorID *string + actor := "system" + if user != nil { + actor = "user" + actorID = &user.ID + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), + UserID: actorID, + Actor: actor, + Action: "job.cancel", + TargetKind: ptr("job"), + TargetID: &jobID, + TS: time.Now().UTC(), + }) + + w.WriteHeader(stdhttp.StatusAccepted) +} diff --git a/internal/server/http/cancel_test.go b/internal/server/http/cancel_test.go new file mode 100644 index 0000000..efcc953 --- /dev/null +++ b/internal/server/http/cancel_test.go @@ -0,0 +1,204 @@ +// cancel_test.go — covers POST /api/jobs/{id}/cancel. +package http + +import ( + "context" + "encoding/json" + stdhttp "net/http" + "strings" + "testing" + "time" + + "github.com/coder/websocket" + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// TestCancelJobRunningHappyPath: a running job's cancel endpoint sends +// a command.cancel envelope with the right job id, returns 202, and +// writes a job.cancel audit row. +func TestCancelJobRunningHappyPath(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServer(t) + hostID, token := enrolHostForWS(t, srv, st, "cancel-host") + c := agentDial(t, srv, ts, hostID, token) + sendHello(t, c, "cancel-host") + _ = drainUntil(t, c, api.MsgScheduleSet) + + // Seed a running job we can target. + jobID := ulid.Make().String() + now := time.Now().UTC() + if err := st.CreateJob(context.Background(), store.Job{ + ID: jobID, HostID: hostID, Kind: "backup", + ActorKind: "user", CreatedAt: now, + }); err != nil { + t.Fatalf("create job: %v", err) + } + if err := st.MarkJobStarted(context.Background(), jobID, now); err != nil { + t.Fatalf("mark started: %v", err) + } + + cookie := loginAsAdmin(t, st) + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/api/jobs/"+jobID+"/cancel", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusAccepted { + t.Fatalf("status: got %d, want 202", res.StatusCode) + } + + // Read the dispatched command.cancel envelope. + deadline := time.Now().Add(2 * time.Second) + var got api.Envelope + for time.Now().Before(deadline) { + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + mt, raw, rerr := c.Read(ctx) + cancel() + if rerr != nil { + break + } + if mt != websocket.MessageText { + continue + } + if !strings.Contains(string(raw), `"command.cancel"`) { + continue + } + if err := json.Unmarshal(raw, &got); err != nil { + t.Fatalf("unmarshal: %v", err) + } + break + } + if got.Type != api.MsgCommandCancel { + t.Fatalf("never received command.cancel envelope") + } + var cp api.CommandCancelPayload + if err := got.UnmarshalPayload(&cp); err != nil { + t.Fatalf("unmarshal payload: %v", err) + } + if cp.JobID != jobID { + t.Fatalf("payload job_id: got %q want %q", cp.JobID, jobID) + } + + // Audit row exists. + var n int + if err := st.DB().QueryRow( + `SELECT COUNT(*) FROM audit_log WHERE action = 'job.cancel' AND target_id = ?`, + jobID).Scan(&n); err != nil { + t.Fatalf("audit count: %v", err) + } + if n != 1 { + t.Fatalf("audit rows: got %d, want 1", n) + } +} + +// TestCancelJobAlreadyTerminal: a job in succeeded/failed/canceled +// state returns 409 and does NOT send a WS envelope. +func TestCancelJobAlreadyTerminal(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServer(t) + hostID, token := enrolHostForWS(t, srv, st, "term-host") + c := agentDial(t, srv, ts, hostID, token) + sendHello(t, c, "term-host") + _ = drainUntil(t, c, api.MsgScheduleSet) + + jobID := ulid.Make().String() + now := time.Now().UTC() + if err := st.CreateJob(context.Background(), store.Job{ + ID: jobID, HostID: hostID, Kind: "backup", + ActorKind: "user", CreatedAt: now, + }); err != nil { + t.Fatalf("create job: %v", err) + } + if err := st.MarkJobFinished(context.Background(), jobID, "succeeded", 0, nil, "", now); err != nil { + t.Fatalf("mark finished: %v", err) + } + + cookie := loginAsAdmin(t, st) + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/api/jobs/"+jobID+"/cancel", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusConflict { + t.Fatalf("status: got %d, want 409", res.StatusCode) + } + + // Drain — no command.cancel should arrive. + ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond) + defer cancel() + for { + mt, raw, rerr := c.Read(ctx) + if rerr != nil { + break + } + if mt == websocket.MessageText && strings.Contains(string(raw), `"command.cancel"`) { + t.Fatalf("unexpected command.cancel envelope for terminal job") + } + } +} + +// TestCancelJobNotFound: 404 for a job id that doesn't exist. +func TestCancelJobNotFound(t *testing.T) { + t.Parallel() + _, ts, st := rawTestServer(t) + cookie := loginAsAdmin(t, st) + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/api/jobs/"+ulid.Make().String()+"/cancel", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusNotFound { + t.Fatalf("status: got %d, want 404", res.StatusCode) + } +} + +// TestCancelJobHostOffline: a queued/running job whose host has no +// active WS connection returns 503. +func TestCancelJobHostOffline(t *testing.T) { + t.Parallel() + _, ts, st := rawTestServer(t) + // Create a host but don't connect a WS for it. + hostID := ulid.Make().String() + if err := st.CreateHost(context.Background(), store.Host{ + ID: hostID, Name: "offline-host", OS: "linux", Arch: "amd64", + EnrolledAt: time.Now().UTC(), + }, "deadbeef", ""); err != nil { + t.Fatalf("create host: %v", err) + } + jobID := ulid.Make().String() + now := time.Now().UTC() + if err := st.CreateJob(context.Background(), store.Job{ + ID: jobID, HostID: hostID, Kind: "backup", + ActorKind: "user", CreatedAt: now, + }); err != nil { + t.Fatalf("create job: %v", err) + } + if err := st.MarkJobStarted(context.Background(), jobID, now); err != nil { + t.Fatalf("mark started: %v", err) + } + + cookie := loginAsAdmin(t, st) + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/api/jobs/"+jobID+"/cancel", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusServiceUnavailable { + t.Fatalf("status: got %d, want 503", res.StatusCode) + } +} diff --git a/internal/server/http/diff.go b/internal/server/http/diff.go new file mode 100644 index 0000000..863602f --- /dev/null +++ b/internal/server/http/diff.go @@ -0,0 +1,150 @@ +package http + +import ( + "encoding/json" + stdhttp "net/http" + "strings" + "time" + + "github.com/go-chi/chi/v5" + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// snapshotDiffRequest is the JSON body for POST .../snapshots/diff. +// Either short or long snapshot IDs are accepted (restic's diff +// command takes both). +type snapshotDiffRequest struct { + SnapshotA string `json:"snapshot_a"` + SnapshotB string `json:"snapshot_b"` +} + +// handleSnapshotDiff dispatches a JobDiff. Output streams as +// log.stream lines to the standard live job page; the operator reads +// the diff text directly there. Behaves like the run-now endpoints: +// 503 if the host is offline, 400 if the IDs are missing, 422 if +// they're not in the host's snapshot list (we don't want operators +// running diffs against arbitrary snapshot strings). +func (s *Server) handleSnapshotDiff(w stdhttp.ResponseWriter, r *stdhttp.Request) { + user, ok := s.requireUser(r) + if !ok { + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") + return + } + hostID := chi.URLParam(r, "id") + host, err := s.deps.Store.GetHost(r.Context(), hostID) + if err != nil { + writeJSONError(w, stdhttp.StatusNotFound, "host_not_found", "") + return + } + + var req snapshotDiffRequest + // HTMX form posts arrive as application/x-www-form-urlencoded; + // the JSON shape is also accepted for REST callers. + ct := r.Header.Get("Content-Type") + if strings.HasPrefix(ct, "application/x-www-form-urlencoded") { + if err := r.ParseForm(); err != nil { + writeJSONError(w, stdhttp.StatusBadRequest, "invalid_form", err.Error()) + return + } + req.SnapshotA = strings.TrimSpace(r.PostForm.Get("snapshot_a")) + req.SnapshotB = strings.TrimSpace(r.PostForm.Get("snapshot_b")) + } else { + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error()) + return + } + req.SnapshotA = strings.TrimSpace(req.SnapshotA) + req.SnapshotB = strings.TrimSpace(req.SnapshotB) + } + if req.SnapshotA == "" || req.SnapshotB == "" { + writeJSONError(w, stdhttp.StatusBadRequest, "missing_snapshot", + "snapshot_a and snapshot_b are both required") + return + } + if req.SnapshotA == req.SnapshotB { + writeJSONError(w, stdhttp.StatusUnprocessableEntity, "same_snapshot", + "diff requires two different snapshots") + return + } + + // Validate the IDs are known to this host. Match on long ID, short + // ID, or any prefix match — operators sometimes paste a 6-char + // shortened form. + snaps, err := s.deps.Store.ListSnapshotsByHost(r.Context(), host.ID) + if err != nil { + writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "") + return + } + resolveID := func(idOrShort string) string { + for _, s := range snaps { + if s.ID == idOrShort || s.ShortID == idOrShort { + return s.ID + } + } + // Prefix fallback (operator pasted 6 chars of a long id). + for _, s := range snaps { + if strings.HasPrefix(s.ID, idOrShort) { + return s.ID + } + } + return "" + } + a := resolveID(req.SnapshotA) + b := resolveID(req.SnapshotB) + if a == "" || b == "" { + writeJSONError(w, stdhttp.StatusUnprocessableEntity, "snapshot_not_found", + "one or both snapshot ids are not in this host's snapshot list") + return + } + + if !s.deps.Hub.Connected(host.ID) { + writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline", + "agent is not connected; try again when it reconnects") + return + } + + jobID := ulid.Make().String() + now := time.Now().UTC() + if err := s.deps.Store.CreateJob(r.Context(), store.Job{ + ID: jobID, HostID: host.ID, Kind: string(api.JobDiff), + ActorKind: "user", ActorID: &user.ID, CreatedAt: now, + }); err != nil { + writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error()) + return + } + env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{ + JobID: jobID, Kind: api.JobDiff, + Diff: &api.DiffPayload{SnapshotA: a, SnapshotB: b}, + }) + if err != nil { + writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "") + return + } + if err := s.deps.Hub.Send(r.Context(), host.ID, env); err != nil { + writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline", err.Error()) + return + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), + UserID: &user.ID, + Actor: "user", + Action: "host.snapshot_diff", + TargetKind: ptr("host"), + TargetID: &host.ID, + TS: now, + }) + + jobURL := "/jobs/" + jobID + if r.Header.Get("HX-Request") == "true" { + w.Header().Set("HX-Redirect", jobURL) + w.WriteHeader(stdhttp.StatusNoContent) + return + } + writeJSON(w, stdhttp.StatusAccepted, map[string]string{ + "job_id": jobID, + "job_url": jobURL, + }) +} diff --git a/internal/server/http/diff_test.go b/internal/server/http/diff_test.go new file mode 100644 index 0000000..1c69275 --- /dev/null +++ b/internal/server/http/diff_test.go @@ -0,0 +1,136 @@ +// diff_test.go — covers POST /api/hosts/{id}/snapshots/diff (P3-09). +package http + +import ( + "context" + "encoding/json" + stdhttp "net/http" + "net/url" + "strings" + "testing" + "time" + + "github.com/coder/websocket" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" +) + +// TestSnapshotDiffHappyPath verifies a valid two-snapshot form ships +// a JobDiff command.run with the right payload. +func TestSnapshotDiffHappyPath(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, token := enrolHostForUI(t, srv, st, "diff-host") + a, b := seedTwoSnapshots(t, st, hostID, "diff-host") + c := agentDial(t, srv, ts, hostID, token) + sendHello(t, c, "diff-host") + _ = drainUntil(t, c, api.MsgScheduleSet) + cookie := loginAsAdmin(t, st) + + form := url.Values{ + "snapshot_a": {a}, + "snapshot_b": {b}, + } + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/hosts/"+hostID+"/snapshots/diff", + strings.NewReader(form.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.Header.Set("HX-Request", "true") + req.AddCookie(cookie) + client := &stdhttp.Client{ + CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error { + return stdhttp.ErrUseLastResponse + }, + } + res, err := client.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusNoContent { + t.Fatalf("status: got %d, want 204", res.StatusCode) + } + if res.Header.Get("HX-Redirect") == "" { + t.Fatal("expected HX-Redirect to live job page") + } + + deadline := time.Now().Add(2 * time.Second) + var got api.Envelope + for time.Now().Before(deadline) { + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + mt, raw, rerr := c.Read(ctx) + cancel() + if rerr != nil { + break + } + if mt != websocket.MessageText { + continue + } + if !strings.Contains(string(raw), `"kind":"diff"`) { + continue + } + _ = json.Unmarshal(raw, &got) + break + } + if got.Type != api.MsgCommandRun { + t.Fatal("never received diff command.run") + } + var cp api.CommandRunPayload + _ = got.UnmarshalPayload(&cp) + if cp.Diff == nil { + t.Fatal("diff payload nil") + } + if cp.Diff.SnapshotA != a || cp.Diff.SnapshotB != b { + t.Fatalf("diff payload: got %+v want a=%s b=%s", cp.Diff, a, b) + } +} + +// TestSnapshotDiffSameID rejects diff(a,a) with 422. +func TestSnapshotDiffSameID(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, _ := enrolHostForUI(t, srv, st, "diff-same") + a := seedSnapshot(t, st, hostID, "diff-same") + cookie := loginAsAdmin(t, st) + + form := url.Values{"snapshot_a": {a}, "snapshot_b": {a}} + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/hosts/"+hostID+"/snapshots/diff", + strings.NewReader(form.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusUnprocessableEntity { + t.Fatalf("status: got %d, want 422", res.StatusCode) + } + _ = srv +} + +// TestSnapshotDiffUnknownID rejects ids not in the host's snapshot list. +func TestSnapshotDiffUnknownID(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, _ := enrolHostForUI(t, srv, st, "diff-unknown") + _ = seedSnapshot(t, st, hostID, "diff-unknown") + cookie := loginAsAdmin(t, st) + + form := url.Values{"snapshot_a": {"deadbeef"}, "snapshot_b": {"cafebabe"}} + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/hosts/"+hostID+"/snapshots/diff", + strings.NewReader(form.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusUnprocessableEntity { + t.Fatalf("status: got %d, want 422", res.StatusCode) + } + _ = srv +} diff --git a/internal/server/http/enrollment.go b/internal/server/http/enrollment.go index f1615e0..5a8203f 100644 --- a/internal/server/http/enrollment.go +++ b/internal/server/http/enrollment.go @@ -213,7 +213,7 @@ func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) // session cookie and trust it, validating the cookie via store. func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } diff --git a/internal/server/http/host_bandwidth.go b/internal/server/http/host_bandwidth.go index 8165a09..d7ced96 100644 --- a/internal/server/http/host_bandwidth.go +++ b/internal/server/http/host_bandwidth.go @@ -27,7 +27,7 @@ type hostBandwidthView struct { func (s *Server) handleUpdateHostBandwidth(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") diff --git a/internal/server/http/host_bandwidth_push.go b/internal/server/http/host_bandwidth_push.go index 2cfa7fd..f8512f6 100644 --- a/internal/server/http/host_bandwidth_push.go +++ b/internal/server/http/host_bandwidth_push.go @@ -58,7 +58,7 @@ func (s *Server) pushBandwidthToAgent(ctx context.Context, hostID string, up, do // bandwidthPayload builds a ConfigUpdatePayload with only the // bandwidth fields populated. Pointers are passed through verbatim; // callers wanting to clear a cap should pass a non-nil pointer to 0. -// On the on-hello path we materialize zero-valued pointers when the +// On the on-hello path we materialise zero-valued pointers when the // host record has no cap set, so the agent's stored state is always // in sync (rather than retaining whatever value it last received). func bandwidthPayload(up, down *int) api.ConfigUpdatePayload { diff --git a/internal/server/http/host_credentials.go b/internal/server/http/host_credentials.go index c414eba..4d033a4 100644 --- a/internal/server/http/host_credentials.go +++ b/internal/server/http/host_credentials.go @@ -32,7 +32,7 @@ type hostRepoCredsView struct { // creds for UI display. 404 if no credential has ever been set. func (s *Server) handleGetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -88,7 +88,7 @@ type hostRepoCredsRequest struct { func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) { user, ok := s.requireUser(r) if !ok { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -165,7 +165,7 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R w.WriteHeader(stdhttp.StatusNoContent) } -// pushRepoCredsToAgent serializes blob into a config.update envelope +// pushRepoCredsToAgent serialises blob into a config.update envelope // and ships it down the agent's WS. Returns an error from the hub // (no-op if not connected — caller is expected to check first when it // matters). @@ -192,7 +192,7 @@ func (s *Server) pushRepoCredsToAgent(ctx context.Context, hostID string, blob r // uses this to pre-fill the edit form. func (s *Server) handleGetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -234,7 +234,7 @@ func (s *Server) handleGetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp. func (s *Server) handleSetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) { user, ok := s.requireUser(r) if !ok { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -319,7 +319,7 @@ func (s *Server) handleSetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp. func (s *Server) handleDeleteAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) { user, ok := s.requireUser(r) if !ok { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") diff --git a/internal/server/http/hosts.go b/internal/server/http/hosts.go index e1d5ca3..59f913b 100644 --- a/internal/server/http/hosts.go +++ b/internal/server/http/hosts.go @@ -34,7 +34,7 @@ type hostView struct { // see the same projection. func (s *Server) handleListHosts(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hosts, err := s.deps.Store.ListHosts(r.Context()) @@ -55,7 +55,7 @@ func (s *Server) handleListHosts(w stdhttp.ResponseWriter, r *stdhttp.Request) { // handleFleetSummary returns the dashboard tile aggregate. func (s *Server) handleFleetSummary(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } fs, err := s.deps.Store.FleetSummary(r.Context()) diff --git a/internal/server/http/job_download.go b/internal/server/http/job_download.go new file mode 100644 index 0000000..03ad7a7 --- /dev/null +++ b/internal/server/http/job_download.go @@ -0,0 +1,135 @@ +package http + +import ( + "bufio" + "encoding/json" + "fmt" + stdhttp "net/http" + "strings" + + "github.com/go-chi/chi/v5" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// handleJobLogDownload is GET /api/jobs/{id}/log{.txt,.ndjson}. +// +// Source of truth is the persisted job_logs table — works any time, +// regardless of whether the job is running or already finished. The +// download is "everything the server has up to right now"; the live +// stream is unaffected (no pause needed). If the operator wants a +// fuller snapshot of a still-running job, they hit Download again. +// +// Format is picked from the URL suffix (.txt | .ndjson) for a +// sensible filename in the browser, or the ?format= query param for +// REST callers. Default is txt. +func (s *Server) handleJobLogDownload(w stdhttp.ResponseWriter, r *stdhttp.Request) { + if _, ok := s.requireUser(r); !ok { + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") + return + } + jobID := chi.URLParam(r, "id") + if jobID == "" { + writeJSONError(w, stdhttp.StatusBadRequest, "missing_job_id", "") + return + } + job, err := s.deps.Store.GetJob(r.Context(), jobID) + if err != nil { + writeJSONError(w, stdhttp.StatusNotFound, "job_not_found", "") + return + } + + format := r.URL.Query().Get("format") + if format == "" { + // Sniff the URL — chi routes both /log.txt and /log.ndjson here + // (or .log if a future route adds it) via the {format} matcher. + fmtParam := chi.URLParam(r, "format") + switch fmtParam { + case "ndjson": + format = "ndjson" + default: + format = "txt" + } + } + + logs, err := s.deps.Store.ListJobLogs(r.Context(), jobID, 0, 0) + if err != nil { + writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error()) + return + } + + short := jobID + if len(short) > 8 { + short = short[:8] + } + filename := "job-" + job.Kind + "-" + short + switch format { + case "ndjson": + w.Header().Set("Content-Type", "application/x-ndjson; charset=utf-8") + w.Header().Set("Content-Disposition", + `attachment; filename="`+filename+`.ndjson"`) + writeLogsNDJSON(w, logs) + default: + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + w.Header().Set("Content-Disposition", + `attachment; filename="`+filename+`.txt"`) + writeLogsText(w, job, logs) + } +} + +// writeLogsText renders the logs in the same shape the live page shows: +// "HH:MM:SS.mmm TAG payload". Adds a small header so the file is +// useful as a standalone artefact (operator pastes it into a ticket). +func writeLogsText(w stdhttp.ResponseWriter, job *store.Job, logs []store.JobLogLine) { + bw := bufio.NewWriter(w) + defer func() { _ = bw.Flush() }() + _, _ = fmt.Fprintf(bw, "# job %s · kind %s · status %s\n", + job.ID, job.Kind, job.Status) + if job.StartedAt != nil { + _, _ = fmt.Fprintf(bw, "# started %s\n", job.StartedAt.UTC().Format("2006-01-02T15:04:05.000Z")) + } + if job.FinishedAt != nil { + _, _ = fmt.Fprintf(bw, "# finished %s\n", job.FinishedAt.UTC().Format("2006-01-02T15:04:05.000Z")) + } + _, _ = fmt.Fprintf(bw, "# %d log lines\n\n", len(logs)) + for _, l := range logs { + tag := streamTag(l.Stream) + ts := l.TS.UTC().Format("15:04:05.000") + // Strip embedded newlines from payload — log lines should be + // single-line, but defensive: a stray '\n' in stderr would + // break grep -n. + payload := strings.ReplaceAll(l.Payload, "\n", " ") + _, _ = fmt.Fprintf(bw, "%s %s %s\n", ts, tag, payload) + } +} + +// writeLogsNDJSON emits one JSON object per line. Each object stands +// alone — appending to the file remains valid NDJSON. +func writeLogsNDJSON(w stdhttp.ResponseWriter, logs []store.JobLogLine) { + enc := json.NewEncoder(w) + for _, l := range logs { + _ = enc.Encode(struct { + Seq int64 `json:"seq"` + TS string `json:"ts"` + Stream string `json:"stream"` + Payload string `json:"payload"` + }{ + Seq: l.Seq, + TS: l.TS.UTC().Format("2006-01-02T15:04:05.000Z"), + Stream: l.Stream, + Payload: l.Payload, + }) + } +} + +func streamTag(s string) string { + switch s { + case "stdout": + return "OUT" + case "stderr": + return "ERR" + case "event": + return "EVENT" + } + return strings.ToUpper(s) +} diff --git a/internal/server/http/job_download_test.go b/internal/server/http/job_download_test.go new file mode 100644 index 0000000..429f76c --- /dev/null +++ b/internal/server/http/job_download_test.go @@ -0,0 +1,181 @@ +// job_download_test.go — covers GET /api/jobs/{id}/log.{txt,ndjson}. +package http + +import ( + "context" + "encoding/json" + stdhttp "net/http" + "strings" + "testing" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// seedJobWithLogs creates a job + a few log lines for it. Returns the +// job ID. Caller is responsible for the test server + auth. +func seedJobWithLogs(t *testing.T, st *store.Store, hostID string, lineCount int) string { + t.Helper() + jobID := ulid.Make().String() + now := time.Now().UTC() + if err := st.CreateJob(context.Background(), store.Job{ + ID: jobID, HostID: hostID, Kind: "diff", + ActorKind: "user", CreatedAt: now, + }); err != nil { + t.Fatalf("create job: %v", err) + } + if err := st.MarkJobStarted(context.Background(), jobID, now); err != nil { + t.Fatalf("mark started: %v", err) + } + for i := 0; i < lineCount; i++ { + stream := "stdout" + if i%5 == 0 { + stream = "stderr" + } + payload := `{"message_type":"change","path":"/etc/file` + + ulid.Make().String()[:6] + `","modifier":"M"}` + if err := st.AppendJobLog(context.Background(), jobID, int64(i+1), + now.Add(time.Duration(i)*time.Millisecond), + stream, payload); err != nil { + t.Fatalf("append log: %v", err) + } + } + if err := st.MarkJobFinished(context.Background(), jobID, "succeeded", 0, nil, "", now); err != nil { + t.Fatalf("mark finished: %v", err) + } + return jobID +} + +// TestJobLogDownloadTxt: plain-text format includes a header + one +// line per log row in the expected shape. +func TestJobLogDownloadTxt(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServer(t) + hostID, _ := enrolHostForWS(t, srv, st, "dl-txt-host") + jobID := seedJobWithLogs(t, st, hostID, 12) + cookie := loginAsAdmin(t, st) + + req, _ := stdhttp.NewRequest("GET", + ts.URL+"/api/jobs/"+jobID+"/log.txt", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusOK { + t.Fatalf("status: got %d, want 200", res.StatusCode) + } + if ct := res.Header.Get("Content-Type"); !strings.HasPrefix(ct, "text/plain") { + t.Errorf("content-type: got %q", ct) + } + if cd := res.Header.Get("Content-Disposition"); !strings.Contains(cd, ".txt") { + t.Errorf("content-disposition: got %q", cd) + } + body := readBody(t, res.Body) + // Header lines. + if !strings.HasPrefix(body, "# job ") { + t.Errorf("expected '# job ...' header line; got %q", short(body)) + } + if !strings.Contains(body, "12 log lines") { + t.Errorf("expected '12 log lines'; got %q", short(body)) + } + // One body line per log row — count non-comment, non-empty lines. + var rows int + for _, line := range strings.Split(body, "\n") { + l := strings.TrimSpace(line) + if l == "" || strings.HasPrefix(l, "#") { + continue + } + rows++ + } + if rows != 12 { + t.Errorf("expected 12 body rows, got %d", rows) + } + // Tag check: at least one ERR row (every 5th was stderr). + if !strings.Contains(body, " ERR ") { + t.Errorf("expected at least one ERR row") + } +} + +// TestJobLogDownloadNDJSON: each line is a self-contained JSON object. +func TestJobLogDownloadNDJSON(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServer(t) + hostID, _ := enrolHostForWS(t, srv, st, "dl-ndjson-host") + jobID := seedJobWithLogs(t, st, hostID, 5) + cookie := loginAsAdmin(t, st) + + req, _ := stdhttp.NewRequest("GET", + ts.URL+"/api/jobs/"+jobID+"/log.ndjson", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusOK { + t.Fatalf("status: got %d, want 200", res.StatusCode) + } + if ct := res.Header.Get("Content-Type"); !strings.HasPrefix(ct, "application/x-ndjson") { + t.Errorf("content-type: got %q", ct) + } + body := readBody(t, res.Body) + // Each non-empty line should parse as an object with seq/ts/stream/payload. + var seen int + for _, line := range strings.Split(body, "\n") { + if strings.TrimSpace(line) == "" { + continue + } + var obj struct { + Seq int64 `json:"seq"` + TS string `json:"ts"` + Stream string `json:"stream"` + Payload string `json:"payload"` + } + if err := json.Unmarshal([]byte(line), &obj); err != nil { + t.Fatalf("parse line %q: %v", line, err) + } + if obj.Seq == 0 || obj.TS == "" || obj.Stream == "" || obj.Payload == "" { + t.Errorf("incomplete object: %+v", obj) + } + seen++ + } + if seen != 5 { + t.Errorf("parsed %d objects, want 5", seen) + } +} + +// TestJobLogDownloadNotFound: 404 for an unknown job id. +func TestJobLogDownloadNotFound(t *testing.T) { + t.Parallel() + _, ts, st := rawTestServer(t) + cookie := loginAsAdmin(t, st) + req, _ := stdhttp.NewRequest("GET", + ts.URL+"/api/jobs/"+ulid.Make().String()+"/log.txt", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusNotFound { + t.Fatalf("status: got %d, want 404", res.StatusCode) + } +} + +// TestJobLogDownloadUnauthenticated: without a session cookie, 401. +func TestJobLogDownloadUnauthenticated(t *testing.T) { + t.Parallel() + _, ts, _ := rawTestServer(t) + res, err := stdhttp.Get(ts.URL + "/api/jobs/x/log.txt") + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusUnauthorized { + t.Fatalf("status: got %d, want 401", res.StatusCode) + } +} diff --git a/internal/server/http/jobs.go b/internal/server/http/jobs.go index d4efa63..8740abe 100644 --- a/internal/server/http/jobs.go +++ b/internal/server/http/jobs.go @@ -31,7 +31,7 @@ type runNowResponse struct { func (s *Server) handleRunNow(w stdhttp.ResponseWriter, r *stdhttp.Request) { user, ok := s.requireUser(r) if !ok { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -152,7 +152,8 @@ func (s *Server) requireUser(r *stdhttp.Request) (*store.User, bool) { func validJobKind(k api.JobKind) bool { switch k { - case api.JobBackup, api.JobInit, api.JobForget, api.JobPrune, api.JobCheck, api.JobUnlock: + case api.JobBackup, api.JobInit, api.JobForget, api.JobPrune, + api.JobCheck, api.JobUnlock, api.JobRestore, api.JobDiff: return true } return false diff --git a/internal/server/http/p2r01_ws_test.go b/internal/server/http/p2r01_ws_test.go index 23bb9a0..1cbe810 100644 --- a/internal/server/http/p2r01_ws_test.go +++ b/internal/server/http/p2r01_ws_test.go @@ -81,7 +81,7 @@ func drainUntil(t *testing.T, c *websocket.Conn, wantType api.MessageType) api.E return api.Envelope{} } -// enrolHostForWS pre-enrolls a host with bound repo creds so the server +// enrolHostForWS pre-enrols a host with bound repo creds so the server // will treat it as ready to receive command.run. func enrolHostForWS(t *testing.T, srv *Server, st *store.Store, name string) (hostID, token string) { t.Helper() diff --git a/internal/server/http/pending_drain_test.go b/internal/server/http/pending_drain_test.go index 0cec822..a1714a9 100644 --- a/internal/server/http/pending_drain_test.go +++ b/internal/server/http/pending_drain_test.go @@ -506,12 +506,12 @@ func TestEnqueueOnDispatchFailure(t *testing.T) { func TestDrainPendingSerializesPerHost(t *testing.T) { t.Parallel() srv, ts, st := rawTestServer(t) - hostID, token := enrolHostForWS(t, srv, st, "serialize-host") + hostID, token := enrolHostForWS(t, srv, st, "serialise-host") gid, sid := seedSchedAndGroup(t, st, hostID, 10) // Connect the agent so DrainPending can dispatch. c := agentDial(t, srv, ts, hostID, token) - sendHello(t, c, "serialize-host") + sendHello(t, c, "serialise-host") // Drain the on-hello goroutine's pass first (no pending rows yet), // then wait for the schedule.set so the connection is fully settled. _ = drainUntil(t, c, api.MsgScheduleSet) diff --git a/internal/server/http/pending_ws.go b/internal/server/http/pending_ws.go index 9373928..edd0abb 100644 --- a/internal/server/http/pending_ws.go +++ b/internal/server/http/pending_ws.go @@ -214,7 +214,7 @@ type acceptForm struct { func (s *Server) handleAcceptPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Request) { user, ok := s.requireUser(r) if !ok { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } pendingID := chi.URLParam(r, "id") @@ -315,7 +315,7 @@ func (s *Server) handleAcceptPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Re func (s *Server) handleRejectPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Request) { user, ok := s.requireUser(r) if !ok { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } pendingID := chi.URLParam(r, "id") diff --git a/internal/server/http/repo_maintenance.go b/internal/server/http/repo_maintenance.go index 364024b..6122352 100644 --- a/internal/server/http/repo_maintenance.go +++ b/internal/server/http/repo_maintenance.go @@ -41,7 +41,7 @@ func toRepoMaintenanceView(m store.HostRepoMaintenance) repoMaintenanceView { func (s *Server) handleGetRepoMaintenance(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -84,7 +84,7 @@ type repoMaintenanceWriteRequest struct { func (s *Server) handleUpdateRepoMaintenance(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") diff --git a/internal/server/http/repo_ops.go b/internal/server/http/repo_ops.go index 920677d..00c8078 100644 --- a/internal/server/http/repo_ops.go +++ b/internal/server/http/repo_ops.go @@ -26,7 +26,7 @@ func (s *Server) handleRunRepoPrune(w stdhttp.ResponseWriter, r *stdhttp.Request stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther) return } - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -72,7 +72,7 @@ func (s *Server) handleRunRepoCheck(w stdhttp.ResponseWriter, r *stdhttp.Request stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther) return } - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -125,7 +125,7 @@ func (s *Server) handleRunRepoUnlock(w stdhttp.ResponseWriter, r *stdhttp.Reques stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther) return } - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") diff --git a/internal/server/http/run_group.go b/internal/server/http/run_group.go index 1ac33db..d47ecf3 100644 --- a/internal/server/http/run_group.go +++ b/internal/server/http/run_group.go @@ -53,7 +53,7 @@ func (s *Server) handleRunSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Reque stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther) return } - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") diff --git a/internal/server/http/schedules.go b/internal/server/http/schedules.go index 1e33e9d..6e7d3a1 100644 --- a/internal/server/http/schedules.go +++ b/internal/server/http/schedules.go @@ -61,7 +61,7 @@ var cronParser = cron.NewParser( func (s *Server) handleListSchedules(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -89,7 +89,7 @@ func (s *Server) handleListSchedules(w stdhttp.ResponseWriter, r *stdhttp.Reques func (s *Server) handleCreateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -126,7 +126,7 @@ func (s *Server) handleCreateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Reque func (s *Server) handleUpdateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -173,7 +173,7 @@ func (s *Server) handleUpdateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Reque func (s *Server) handleDeleteSchedule(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") diff --git a/internal/server/http/server.go b/internal/server/http/server.go index 5fd6539..3a20733 100644 --- a/internal/server/http/server.go +++ b/internal/server/http/server.go @@ -43,7 +43,7 @@ type Server struct { srv *stdhttp.Server deps Deps - // drainLocks serializes DrainPending per host. The on-hello + // drainLocks serialises DrainPending per host. The on-hello // goroutine and the 30s ticker can otherwise race for the same // host, double-dispatching every pending row. Map of hostID → // sync.Mutex; checked-and-locked atomically via drainLocksMu. @@ -58,6 +58,11 @@ type Server struct { // pending_id so the accept/reject handlers can push the bearer // or close cleanly (P2-18b). pendingHub *pendingHub + + // treeCache holds per-wizard-session listings of snapshot + // directories (P3-X2). Pre-allocated in New so the lazy-init + // race is impossible. + treeCache *treeCache } // New builds a configured but not-yet-started server. @@ -81,6 +86,7 @@ func New(deps Deps) *Server { drainLocks: make(map[string]*sync.Mutex), announceRL: newAnnounceLimiter(), pendingHub: newPendingHub(), + treeCache: newTreeCache(), } s.routes(r) @@ -178,8 +184,22 @@ func (s *Server) routes(r chi.Router) { r.Post("/hosts/{id}/repo/prune", s.handleRunRepoPrune) r.Post("/hosts/{id}/repo/check", s.handleRunRepoCheck) r.Post("/hosts/{id}/repo/unlock", s.handleRunRepoUnlock) + + // Cancel a running job. Operator-driven, sends command.cancel + // to the agent which kills the restic subprocess; the agent's + // resulting job.finished (status=canceled) is what flips the + // job row. + r.Post("/jobs/{id}/cancel", s.handleCancelJob) + + // Snapshot diff (P3-09). Dispatches a JobDiff against two + // snapshots; output streams to the standard live job page. + r.Post("/hosts/{id}/snapshots/diff", s.handleSnapshotDiff) }) + // HTMX form variant of diff (mounted outside /api so HTMX forms + // can post against it without the api/ prefix). + r.Post("/hosts/{id}/snapshots/diff", s.handleSnapshotDiff) + // Per-source-group Run-now (HTMX form action). Available even // when the server is started without UI templates so REST callers // against the non-/api path also work. @@ -237,7 +257,7 @@ func (s *Server) routes(r chi.Router) { // Durable post-Add-host page (operator can refresh / come // back; password decrypted from the token row each render). // Polled fragment under /awaiting flips to "connected" once - // the agent enrolls. + // the agent enrols. r.Get("/hosts/pending/{token}", s.handleUIPendingHost) r.Get("/hosts/pending/{token}/awaiting", s.handleUIPendingAwaiting) // Host detail (Snapshots tab is the default). @@ -270,6 +290,12 @@ func (s *Server) routes(r chi.Router) { r.Post("/hosts/{id}/schedules/{sid}/run", s.handleUIScheduleRun) // Live job log. r.Get("/jobs/{id}", s.handleUIJobDetail) + // Restore wizard (P3-01/P3-02). Two GET variants land on the + // same handler; the second deep-links a chosen snapshot. + r.Get("/hosts/{id}/restore", s.handleUIRestoreGet) + r.Get("/hosts/{id}/snapshots/{sid}/restore", s.handleUIRestoreGet) + r.Post("/hosts/{id}/restore", s.handleUIRestorePost) + r.Get("/hosts/{id}/restore/tree", s.handleUIRestoreTree) } // Browser job-log stream (separate from /ws/agent so the auth @@ -278,6 +304,11 @@ func (s *Server) routes(r chi.Router) { if s.deps.JobHub != nil { r.Get("/api/jobs/{id}/stream", s.handleJobStream) } + + // Job log download (txt + ndjson). Source of truth is the + // persisted job_logs table; safe to call any time, no pause + // needed against the live stream. + r.Get("/api/jobs/{id}/log.{format:txt|ndjson}", s.handleJobLogDownload) } // Start begins listening. Blocks until ListenAndServe returns diff --git a/internal/server/http/snapshots.go b/internal/server/http/snapshots.go index 138ce84..d88e18b 100644 --- a/internal/server/http/snapshots.go +++ b/internal/server/http/snapshots.go @@ -35,7 +35,7 @@ type listSnapshotsResponse struct { // onto whatever the server most recently received. func (s *Server) handleListHostSnapshots(w stdhttp.ResponseWriter, r *stdhttp.Request) { if _, ok := s.requireUser(r); !ok { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } diff --git a/internal/server/http/source_groups.go b/internal/server/http/source_groups.go index 7fcb5a5..88c2688 100644 --- a/internal/server/http/source_groups.go +++ b/internal/server/http/source_groups.go @@ -66,7 +66,7 @@ type sourceGroupWriteRequest struct { func (s *Server) handleListSourceGroups(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -90,7 +90,7 @@ func (s *Server) handleListSourceGroups(w stdhttp.ResponseWriter, r *stdhttp.Req func (s *Server) handleGetSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -109,7 +109,7 @@ func (s *Server) handleGetSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Reque func (s *Server) handleCreateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -152,7 +152,7 @@ func (s *Server) handleCreateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Re func (s *Server) handleUpdateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") @@ -207,7 +207,7 @@ func (s *Server) handleUpdateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Re // the UI can offer "remove from these schedules first." func (s *Server) handleDeleteSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) { if !s.authedUser(r) { - writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "") return } hostID := chi.URLParam(r, "id") diff --git a/internal/server/http/tree_cache.go b/internal/server/http/tree_cache.go new file mode 100644 index 0000000..9d7c077 --- /dev/null +++ b/internal/server/http/tree_cache.go @@ -0,0 +1,112 @@ +package http + +import ( + "context" + "sync" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" +) + +// treeCacheTTL is how long a per-session cached directory listing +// stays valid. The whole point of the cache is to make re-expanding +// nodes within the same wizard session snappy; 30 minutes covers a +// generous wizard interaction window without holding stale data +// indefinitely. +const treeCacheTTL = 30 * time.Minute + +// treeCacheKey identifies one cached listing. session_id scopes +// entries to a single browser session so two operators don't share +// view state; snapshot_id + path identify the directory inside the +// snapshot. +type treeCacheKey struct { + SessionID string + HostID string + SnapshotID string + Path string +} + +type treeCacheEntry struct { + Result api.TreeListResultPayload + ExpiresAt time.Time +} + +// treeCache is a per-process map of synchronously fetched directory +// listings. Concurrency is light (a few entries per active wizard +// session) so a single mutex is fine. +type treeCache struct { + mu sync.Mutex + entries map[treeCacheKey]treeCacheEntry +} + +func newTreeCache() *treeCache { + return &treeCache{entries: make(map[treeCacheKey]treeCacheEntry)} +} + +// Get returns a cached entry if one exists and hasn't expired. +func (c *treeCache) Get(k treeCacheKey, now time.Time) (api.TreeListResultPayload, bool) { + c.mu.Lock() + defer c.mu.Unlock() + e, ok := c.entries[k] + if !ok { + return api.TreeListResultPayload{}, false + } + if now.After(e.ExpiresAt) { + delete(c.entries, k) + return api.TreeListResultPayload{}, false + } + return e.Result, true +} + +// Put records a fresh listing under k. Caller is responsible for +// having validated the result first (Error == ""). +func (c *treeCache) Put(k treeCacheKey, result api.TreeListResultPayload, now time.Time) { + c.mu.Lock() + c.entries[k] = treeCacheEntry{ + Result: result, + ExpiresAt: now.Add(treeCacheTTL), + } + c.mu.Unlock() +} + +// Sweep deletes expired entries. Called opportunistically from the +// wizard handler — no separate goroutine needed; cache size is small. +func (c *treeCache) Sweep(now time.Time) { + c.mu.Lock() + for k, e := range c.entries { + if now.After(e.ExpiresAt) { + delete(c.entries, k) + } + } + c.mu.Unlock() +} + +// fetchTreeWithCache returns a directory listing — cache hit, or a +// synchronous tree.list RPC against the agent on miss. On agent error +// (not transport error), the result is returned as-is with Error set +// rather than cached, so a transient failure doesn't poison subsequent +// requests for the same path. +// +//nolint:unused // wired in by the wizard handler in the next slice +func (s *Server) fetchTreeWithCache(ctx context.Context, sessionID, hostID, snapshotID, path string) (api.TreeListResultPayload, error) { + now := time.Now() + k := treeCacheKey{SessionID: sessionID, HostID: hostID, SnapshotID: snapshotID, Path: path} + if cached, ok := s.treeCache.Get(k, now); ok { + return cached, nil + } + + reply, err := s.deps.Hub.SendRPC(ctx, hostID, api.MsgTreeList, + api.TreeListRequestPayload{SnapshotID: snapshotID, Path: path}, + 30*time.Second) + if err != nil { + return api.TreeListResultPayload{}, err + } + var result api.TreeListResultPayload + if perr := reply.UnmarshalPayload(&result); perr != nil { + return api.TreeListResultPayload{}, perr + } + if result.Error == "" { + s.treeCache.Put(k, result, now) + } + return result, nil +} diff --git a/internal/server/http/tree_rpc_test.go b/internal/server/http/tree_rpc_test.go new file mode 100644 index 0000000..e627235 --- /dev/null +++ b/internal/server/http/tree_rpc_test.go @@ -0,0 +1,146 @@ +// tree_rpc_test.go — full round-trip test for the tree.list synchronous +// RPC (P3-X2). A fake agent reads the inbound tree.list, replies with a +// canned tree.list.result, and we assert the server's SendRPC returned +// the expected payload. +package http + +import ( + "context" + "encoding/json" + "testing" + "time" + + "github.com/coder/websocket" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" +) + +func TestSendRPCTreeListRoundTrip(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServer(t) + hostID, token := enrolHostForWS(t, srv, st, "rpc-host") + c := agentDial(t, srv, ts, hostID, token) + sendHello(t, c, "rpc-host") + _ = drainUntil(t, c, api.MsgScheduleSet) + + // Fake agent: read inbound envelopes, mirror tree.list with a + // canned result. Other inbound envelopes (config.update etc) are + // already drained above. + done := make(chan error, 1) + go func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + for { + mt, raw, err := c.Read(ctx) + if err != nil { + done <- err + return + } + if mt != websocket.MessageText { + continue + } + var env api.Envelope + if err := json.Unmarshal(raw, &env); err != nil { + done <- err + return + } + if env.Type != api.MsgTreeList { + continue + } + var req api.TreeListRequestPayload + if err := env.UnmarshalPayload(&req); err != nil { + done <- err + return + } + result := api.TreeListResultPayload{ + SnapshotID: req.SnapshotID, + Path: req.Path, + Entries: []api.TreeListEntry{ + {Name: "etc", Type: "dir"}, + {Name: "var", Type: "dir"}, + }, + } + out, err := api.Marshal(api.MsgTreeListResult, env.ID, result) + if err != nil { + done <- err + return + } + rawOut, _ := json.Marshal(out) + if err := c.Write(ctx, websocket.MessageText, rawOut); err != nil { + done <- err + return + } + done <- nil + return + } + }() + + // Server-side SendRPC. + ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second) + defer cancel() + reply, err := srv.deps.Hub.SendRPC(ctx, hostID, api.MsgTreeList, + api.TreeListRequestPayload{SnapshotID: "f3a7b2c1", Path: "/"}, + 3*time.Second) + if err != nil { + t.Fatalf("SendRPC: %v", err) + } + if reply.Type != api.MsgTreeListResult { + t.Fatalf("reply type: got %q want %q", reply.Type, api.MsgTreeListResult) + } + var result api.TreeListResultPayload + if err := reply.UnmarshalPayload(&result); err != nil { + t.Fatalf("unmarshal reply: %v", err) + } + if result.SnapshotID != "f3a7b2c1" || result.Path != "/" { + t.Fatalf("payload: got %+v", result) + } + if len(result.Entries) != 2 || result.Entries[0].Name != "etc" { + t.Fatalf("entries: %+v", result.Entries) + } + + // Make sure the fake agent didn't error out. + select { + case err := <-done: + if err != nil { + t.Fatalf("fake agent: %v", err) + } + case <-time.After(2 * time.Second): + t.Fatal("fake agent didn't finish") + } +} + +// TestSendRPCTimeoutNoReply: SendRPC times out cleanly when the agent +// never replies; the registry entry is released so a stray late reply +// wouldn't deadlock anything. +func TestSendRPCTimeoutNoReply(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServer(t) + hostID, token := enrolHostForWS(t, srv, st, "rpc-timeout-host") + c := agentDial(t, srv, ts, hostID, token) + sendHello(t, c, "rpc-timeout-host") + _ = drainUntil(t, c, api.MsgScheduleSet) + + // Fake agent reads but never replies. + go func() { + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + for { + if _, _, err := c.Read(ctx); err != nil { + return + } + } + }() + + ctx := context.Background() + t0 := time.Now() + _, err := srv.deps.Hub.SendRPC(ctx, hostID, api.MsgTreeList, + api.TreeListRequestPayload{SnapshotID: "x", Path: "/"}, + 300*time.Millisecond) + if err == nil { + t.Fatal("expected timeout error") + } + elapsed := time.Since(t0) + if elapsed < 250*time.Millisecond || elapsed > 2*time.Second { + t.Fatalf("timeout took %s, expected ~300ms", elapsed) + } +} diff --git a/internal/server/http/ui_handlers.go b/internal/server/http/ui_handlers.go index f5d9594..bd76a2a 100644 --- a/internal/server/http/ui_handlers.go +++ b/internal/server/http/ui_handlers.go @@ -16,6 +16,7 @@ import ( "gitea.dcglab.co.uk/steve/restic-manager/internal/api" "gitea.dcglab.co.uk/steve/restic-manager/internal/auth" + "gitea.dcglab.co.uk/steve/restic-manager/internal/restic" "gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui" "gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws" "gitea.dcglab.co.uk/steve/restic-manager/internal/store" @@ -276,7 +277,7 @@ type addHostPage struct { } // pendingHostPage is the GET /hosts/pending/{token} view. Lives -// for as long as the token does (1h ttl); once the agent enrolls, +// for as long as the token does (1h ttl); once the agent enrols, // the handler redirects to /hosts/{host_id} and this page is gone. type pendingHostPage struct { Token string @@ -377,7 +378,7 @@ func (s *Server) handleUIAddHostPost(w stdhttp.ResponseWriter, r *stdhttp.Reques // handleUIPendingHost serves the durable Add-host result page — // shown after a successful POST /hosts/new and reachable until the -// agent enrolls (the page redirects to /hosts/{id} once that +// agent enrols (the page redirects to /hosts/{id} once that // happens) or the token expires (1h ttl). The password is // re-decrypted from the encrypted token row on every render so // the operator can refresh, bookmark, navigate away and come back. @@ -512,6 +513,14 @@ type hostChromeData struct { InitStatus string InitAt *time.Time // started_at if non-nil else created_at InitJobID string + + // Latest 'restore' job — surfaced as a small line below the + // init-status one so the operator has at-a-glance visibility into + // recent destructive activity. Empty status means no restore has + // ever run on this host. + RestoreStatus string + RestoreAt *time.Time + RestoreJobID string } // loadHostChrome fetches the per-tab counts that every host-detail tab @@ -542,6 +551,15 @@ func (s *Server) loadHostChrome(r *stdhttp.Request, host store.Host, subtab, cru } d.InitAt = &t } + if j, err := s.deps.Store.LatestJobByKind(r.Context(), host.ID, "restore"); err == nil && j != nil { + d.RestoreStatus = j.Status + d.RestoreJobID = j.ID + t := j.CreatedAt + if j.StartedAt != nil { + t = *j.StartedAt + } + d.RestoreAt = &t + } return d } @@ -552,6 +570,12 @@ type hostDetailPage struct { // SnapshotsShown is the number rendered (we cap at ~50 for the // first slice; pagination lands when it matters). SnapshotsShown int + // LegacyRestic is true when the host's restic version predates + // 0.17, in which case `restic snapshots --json` doesn't embed the + // per-snapshot summary block and the Size/Files columns render + // blank. The template uses this to attach a tooltip to those + // column headers explaining the version requirement. + LegacyRestic bool } // handleUIHostDetail is the host detail page (snapshots tab by default). @@ -594,6 +618,7 @@ func (s *Server) handleUIHostDetail(w stdhttp.ResponseWriter, r *stdhttp.Request hostChromeData: s.loadHostChrome(r, *host, "snapshots", "snapshots"), Snapshots: shown, SnapshotsShown: len(shown), + LegacyRestic: !restic.Env{Version: host.ResticVersion}.AtLeastVersion(0, 17), } if err := s.deps.UI.Render(w, "host_detail", view); err != nil { slog.Error("ui: render host_detail", "err", err) @@ -713,7 +738,7 @@ func (s *Server) handleUIJobDetail(w stdhttp.ResponseWriter, r *stdhttp.Request) // same way our Go code does. func (s *Server) handleJobStream(w stdhttp.ResponseWriter, r *stdhttp.Request) { if u, _ := s.sessionUser(r); u == nil { - stdhttp.Error(w, "unauthorized", stdhttp.StatusUnauthorized) + stdhttp.Error(w, "unauthorised", stdhttp.StatusUnauthorized) return } jobID := chi.URLParam(r, "id") diff --git a/internal/server/http/ui_repo_reinit.go b/internal/server/http/ui_repo_reinit.go index c817df0..ab71bdf 100644 --- a/internal/server/http/ui_repo_reinit.go +++ b/internal/server/http/ui_repo_reinit.go @@ -49,7 +49,7 @@ func (s *Server) handleUIRepoReinit(w stdhttp.ResponseWriter, r *stdhttp.Request } if !s.deps.Hub.Connected(host.ID) { s.renderRepoPage(w, r, u, host, - "Host is offline — bring the agent back up before re-initializing.", + "Host is offline — bring the agent back up before re-initialising.", "", "", "") return } @@ -58,7 +58,7 @@ func (s *Server) handleUIRepoReinit(w stdhttp.ResponseWriter, r *stdhttp.Request if _, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindRepo); err != nil { if errors.Is(err, store.ErrNotFound) { s.renderRepoPage(w, r, u, host, - "Bind repo credentials before re-initializing.", + "Bind repo credentials before re-initialising.", "", "", "") return } diff --git a/internal/server/http/ui_restore.go b/internal/server/http/ui_restore.go new file mode 100644 index 0000000..c43fa31 --- /dev/null +++ b/internal/server/http/ui_restore.go @@ -0,0 +1,447 @@ +package http + +import ( + "context" + "errors" + "log/slog" + stdhttp "net/http" + "sort" + "strings" + "time" + + "github.com/go-chi/chi/v5" + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" + "gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// ui_restore.go — restore wizard backend (P3-01). +// +// GET /hosts/{id}/restore wizard step 1 (snapshot picker) +// GET /hosts/{id}/snapshots/{sid}/restore wizard with snapshot pre-selected +// GET /hosts/{id}/restore/tree HTMX partial: one tree node + children +// POST /hosts/{id}/restore dispatch the restore job + +// hostRestorePage is the model for the wizard template. +type hostRestorePage struct { + hostChromeData + + // Snapshot picker rows; rendered by the template into the step-1 + // table. Limited to most-recent N (the operator can refine on + // snapshot ID if they need an older one — out of scope for v1). + Snapshots []store.Snapshot + + // Selected is non-nil iff a snapshot has been chosen — either via + // the deep-link path /hosts/{id}/snapshots/{sid}/restore or by a + // previous form submission that the wizard re-rendered. + Selected *store.Snapshot + + // Default target dir — surfaced in the step-3 radio card. + DefaultTargetDir string + + // Online mirrors Hub.Connected so the dispatch button can be + // disabled at render time when the agent is offline. + Online bool + + // Error is shown as a banner above the wizard. Re-render-friendly: + // the operator's snapshot/path/target choices survive the round-trip. + Error string + + // Form fields preserved on validation re-render. The template + // reads these to pre-tick checkboxes etc; the names match the + // POST form keys. + FormPaths []string // "/etc/nginx/sites-available/alfa.conf" + FormInPlace bool + FormTargetDir string + FormConfirmHN string // typed-confirm input value +} + +// handleUIRestoreGet renders the wizard. URL variants: +// - /hosts/{id}/restore — step 1 = pick snapshot +// - /hosts/{id}/snapshots/{sid}/restore — snapshot pre-selected +func (s *Server) handleUIRestoreGet(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + hostID := chi.URLParam(r, "id") + host, err := s.deps.Store.GetHost(r.Context(), hostID) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + stdhttp.NotFound(w, r) + return + } + slog.Error("ui restore: get host", "host_id", hostID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + + page := hostRestorePage{ + hostChromeData: s.loadHostChrome(r, *host, "snapshots", "restore"), + DefaultTargetDir: defaultRestoreTargetDir(), + Online: s.deps.Hub.Connected(host.ID), + } + snaps, err := s.deps.Store.ListSnapshotsByHost(r.Context(), hostID) + if err != nil { + slog.Error("ui restore: list snapshots", "host_id", hostID, "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + if len(snaps) > 100 { + snaps = snaps[:100] + } + page.Snapshots = snaps + + // Snapshot deep-link variant — if the URL carries a sid, prefill it. + if sid := chi.URLParam(r, "sid"); sid != "" { + for i := range snaps { + if snaps[i].ID == sid || snaps[i].ShortID == sid { + p := snaps[i] + page.Selected = &p + break + } + } + } + + view := s.baseView(u) + view.Title = "Restore · " + host.Name + view.Page = page + if err := s.deps.UI.Render(w, "host_restore", view); err != nil { + slog.Error("ui restore: render", "err", err) + } +} + +// handleUIRestorePost validates the form and dispatches the restore +// job. On validation error re-renders the wizard with the error +// banner + the operator's input intact. +func (s *Server) handleUIRestorePost(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + hostID := chi.URLParam(r, "id") + host, err := s.deps.Store.GetHost(r.Context(), hostID) + if err != nil { + stdhttp.NotFound(w, r) + return + } + if err := r.ParseForm(); err != nil { + stdhttp.Error(w, "bad form", stdhttp.StatusBadRequest) + return + } + + snapshotID := strings.TrimSpace(r.PostForm.Get("snapshot_id")) + paths := r.PostForm["paths"] // multiple checkbox values + inPlace := r.PostForm.Get("target_mode") == "in_place" + targetDir := strings.TrimSpace(r.PostForm.Get("target_dir")) + confirmHN := strings.TrimSpace(r.PostForm.Get("confirm_hostname")) + + rerender := func(errMsg string, status int) { + page := hostRestorePage{ + hostChromeData: s.loadHostChrome(r, *host, "snapshots", "restore"), + DefaultTargetDir: defaultRestoreTargetDir(), + Online: s.deps.Hub.Connected(host.ID), + Error: errMsg, + FormPaths: paths, + FormInPlace: inPlace, + FormTargetDir: targetDir, + FormConfirmHN: confirmHN, + } + snaps, _ := s.deps.Store.ListSnapshotsByHost(r.Context(), hostID) + if len(snaps) > 100 { + snaps = snaps[:100] + } + page.Snapshots = snaps + for i := range snaps { + if snaps[i].ID == snapshotID || snaps[i].ShortID == snapshotID { + ss := snaps[i] + page.Selected = &ss + break + } + } + view := s.baseView(u) + view.Title = "Restore · " + host.Name + view.Page = page + w.WriteHeader(status) + _ = s.deps.UI.Render(w, "host_restore", view) + } + + if snapshotID == "" { + rerender("Pick a snapshot first.", stdhttp.StatusUnprocessableEntity) + return + } + cleanPaths := make([]string, 0, len(paths)) + for _, p := range paths { + p = strings.TrimSpace(p) + if p == "" { + continue + } + if !strings.HasPrefix(p, "/") { + rerender("Paths must be absolute (start with /).", stdhttp.StatusUnprocessableEntity) + return + } + cleanPaths = append(cleanPaths, p) + } + if len(cleanPaths) == 0 { + rerender("Pick at least one file or directory to restore.", stdhttp.StatusUnprocessableEntity) + return + } + + if inPlace { + if confirmHN != host.Name { + rerender("Type the host name exactly to confirm an in-place (overwrite) restore.", + stdhttp.StatusUnprocessableEntity) + return + } + } else { + // New-directory mode: trust the operator's chosen target. + // Empty falls back to the default. Validate it's either + // absolute or starts with $HOME / ~/ (the agent expands + // these at run time). + if targetDir == "" { + targetDir = defaultRestoreTargetDir() + } + if !looksLikeRestoreTarget(targetDir) { + rerender("Target must be an absolute path, or start with $HOME or ~/.", + stdhttp.StatusUnprocessableEntity) + return + } + } + + if !s.deps.Hub.Connected(host.ID) { + rerender("Agent is offline. Try again when it reconnects.", + stdhttp.StatusServiceUnavailable) + return + } + + // Build a new job id up-front so we can substitute it into the + // new-directory target path. The agent will additionally expand + // $HOME / ~/ before invoking restic. + jobID := ulid.Make().String() + finalTarget := "" + if !inPlace { + finalTarget = strings.ReplaceAll(targetDir, "", jobID) + } + + now := time.Now().UTC() + if err := s.deps.Store.CreateJob(r.Context(), store.Job{ + ID: jobID, + HostID: host.ID, + Kind: string(api.JobRestore), + ActorKind: "user", + ActorID: &u.ID, + CreatedAt: now, + }); err != nil { + slog.Error("ui restore: create job", "err", err) + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + + payload := api.CommandRunPayload{ + JobID: jobID, + Kind: api.JobRestore, + Restore: &api.RestorePayload{ + SnapshotID: snapshotID, + Paths: cleanPaths, + InPlace: inPlace, + TargetDir: finalTarget, + }, + } + env, err := api.Marshal(api.MsgCommandRun, jobID, payload) + if err != nil { + stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError) + return + } + if err := s.deps.Hub.Send(r.Context(), host.ID, env); err != nil { + slog.Warn("ui restore: dispatch failed", "err", err) + rerender("Couldn't deliver the restore command (agent went offline).", + stdhttp.StatusServiceUnavailable) + return + } + _ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{ + ID: ulid.Make().String(), + UserID: &u.ID, + Actor: "user", + Action: "host.restore", + TargetKind: ptr("host"), + TargetID: &host.ID, + TS: now, + }) + + // HTMX redirect (or vanilla redirect) to the live job log. + jobURL := "/jobs/" + jobID + if r.Header.Get("HX-Request") == "true" { + w.Header().Set("HX-Redirect", jobURL) + w.WriteHeader(stdhttp.StatusNoContent) + return + } + stdhttp.Redirect(w, r, jobURL, stdhttp.StatusSeeOther) +} + +// hostRestoreTreePage is the data shape for the tree-node HTMX partial. +type hostRestoreTreePage struct { + HostID string + SnapshotID string + Path string + Children []treeChildView + Error string +} + +// treeChildView is one row of the tree (a direct child of Path). +type treeChildView struct { + Name string + Type string // dir | file | symlink + Path string // full path, used in the checkbox value + Size int64 + IsDir bool +} + +// handleUIRestoreTree is the HTMX-served partial that loads one +// directory's children. Called when the operator clicks an expand +// chevron in the wizard's tree browser. Caches via fetchTreeWithCache. +func (s *Server) handleUIRestoreTree(w stdhttp.ResponseWriter, r *stdhttp.Request) { + u := s.requireUIUser(w, r) + if u == nil { + return + } + hostID := chi.URLParam(r, "id") + host, err := s.deps.Store.GetHost(r.Context(), hostID) + if err != nil { + stdhttp.NotFound(w, r) + return + } + q := r.URL.Query() + snapshotID := strings.TrimSpace(q.Get("snapshot")) + pathArg := strings.TrimSpace(q.Get("path")) + if pathArg == "" { + pathArg = "/" + } + if snapshotID == "" { + stdhttp.Error(w, "snapshot required", stdhttp.StatusBadRequest) + return + } + if !s.deps.Hub.Connected(host.ID) { + // Render the partial with an error message rather than 503ing + // — the wizard renders the error inline next to the failed node. + page := hostRestoreTreePage{ + HostID: host.ID, SnapshotID: snapshotID, Path: pathArg, + Error: "agent offline", + } + view := s.baseView(u) + view.Page = page + _ = s.deps.UI.RenderPartial(w, "tree_node", view) + return + } + + sessionID := sessionIDFromCookie(r) + ctx, cancel := context.WithTimeout(r.Context(), 35*time.Second) + defer cancel() + + result, err := s.fetchTreeWithCache(ctx, sessionID, host.ID, snapshotID, pathArg) + if err != nil { + page := hostRestoreTreePage{ + HostID: host.ID, SnapshotID: snapshotID, Path: pathArg, + Error: err.Error(), + } + view := s.baseView(u) + view.Page = page + _ = s.deps.UI.RenderPartial(w, "tree_node", view) + return + } + if result.Error != "" { + page := hostRestoreTreePage{ + HostID: host.ID, SnapshotID: snapshotID, Path: pathArg, + Error: result.Error, + } + view := s.baseView(u) + view.Page = page + _ = s.deps.UI.RenderPartial(w, "tree_node", view) + return + } + + children := make([]treeChildView, 0, len(result.Entries)) + for _, e := range result.Entries { + full := joinTreePath(pathArg, e.Name) + children = append(children, treeChildView{ + Name: e.Name, Type: e.Type, Path: full, + Size: e.Size, + IsDir: e.Type == "dir", + }) + } + // Stable order: dirs first, then files, alphabetically. + sort.SliceStable(children, func(i, j int) bool { + if children[i].IsDir != children[j].IsDir { + return children[i].IsDir + } + return children[i].Name < children[j].Name + }) + + page := hostRestoreTreePage{ + HostID: host.ID, SnapshotID: snapshotID, Path: pathArg, + Children: children, + } + view := s.baseView(u) + view.Page = page + if err := s.deps.UI.RenderPartial(w, "tree_node", view); err != nil { + slog.Warn("ui restore tree: render partial", "err", err) + } +} + +// defaultRestoreTargetDir is the placeholder shown on the step-3 +// New-directory radio card and the value used when the operator +// leaves the field blank. $HOME resolves agent-side (typically /root +// for the systemd-as-root unit); is substituted at dispatch. +// The systemd unit pins ReadWritePaths to include the agent user's +// home/rm-restore subdir so this default actually works under the +// sandbox. +func defaultRestoreTargetDir() string { + return "$HOME/rm-restore//" +} + +// looksLikeRestoreTarget validates the operator-supplied target dir +// is a shape the agent can sensibly resolve. We accept absolute +// paths and a couple of agent-side expansions ($HOME, ~/). Other env +// vars are deliberately rejected — operator-supplied paths shouldn't +// be able to pick up arbitrary agent env values. +func looksLikeRestoreTarget(p string) bool { + if p == "" { + return false + } + switch { + case strings.HasPrefix(p, "/"): + return true + case strings.HasPrefix(p, "$HOME/"), p == "$HOME": + return true + case strings.HasPrefix(p, "${HOME}/"), p == "${HOME}": + return true + case strings.HasPrefix(p, "~/"), p == "~": + return true + } + return false +} + +// sessionIDFromCookie returns the operator's session cookie value, +// used as the cache key scope for the tree-list cache. Unauthenticated +// requests don't reach this point, so an empty cookie value would +// only happen if requireUIUser is bypassed in tests — fall back to +// the request remote addr for those cases. +func sessionIDFromCookie(r *stdhttp.Request) string { + if c, err := r.Cookie(sessionCookieName); err == nil && c.Value != "" { + return c.Value + } + return r.RemoteAddr +} + +// joinTreePath combines a directory path and a child name into an +// absolute snapshot-relative path, normalising any duplicate slashes. +func joinTreePath(dir, name string) string { + if dir == "" || dir == "/" { + return "/" + name + } + return strings.TrimRight(dir, "/") + "/" + name +} + +// satisfy unused-import if compile order shifts. +var _ = ui.User{} diff --git a/internal/server/http/ui_restore_test.go b/internal/server/http/ui_restore_test.go new file mode 100644 index 0000000..16a04b8 --- /dev/null +++ b/internal/server/http/ui_restore_test.go @@ -0,0 +1,380 @@ +// ui_restore_test.go — covers the restore wizard backend (P3-01). +package http + +import ( + "context" + "encoding/json" + stdhttp "net/http" + "net/url" + "strings" + "testing" + "time" + + "github.com/coder/websocket" + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// seedSnapshot creates a snapshot row directly via ReplaceHostSnapshots. +// Returns the snapshot ID. +func seedSnapshot(t *testing.T, st *store.Store, hostID, hostname string) string { + t.Helper() + id := strings.ReplaceAll(ulid.Make().String(), "-", "") + short := id[:8] + if err := st.ReplaceHostSnapshots(context.Background(), hostID, []store.Snapshot{{ + ID: id, ShortID: short, Time: time.Now().UTC().Add(-2 * time.Hour), + Hostname: hostname, Paths: []string{"/etc"}, Tags: []string{"system-config"}, + SizeBytes: 612 * 1024 * 1024, FileCount: 100, + }}, time.Now().UTC()); err != nil { + t.Fatalf("seed snapshot: %v", err) + } + return id +} + +// seedTwoSnapshots seeds two snapshots in one ReplaceHostSnapshots call +// so both end up in the host's list. ReplaceHostSnapshots is atomic- +// swap, so calling seedSnapshot twice would only leave the second. +func seedTwoSnapshots(t *testing.T, st *store.Store, hostID, hostname string) (string, string) { + t.Helper() + a := strings.ReplaceAll(ulid.Make().String(), "-", "") + b := strings.ReplaceAll(ulid.Make().String(), "-", "") + if err := st.ReplaceHostSnapshots(context.Background(), hostID, []store.Snapshot{ + { + ID: a, ShortID: a[:8], Time: time.Now().UTC().Add(-3 * time.Hour), + Hostname: hostname, Paths: []string{"/etc"}, Tags: []string{"system-config"}, + }, + { + ID: b, ShortID: b[:8], Time: time.Now().UTC().Add(-1 * time.Hour), + Hostname: hostname, Paths: []string{"/etc"}, Tags: []string{"system-config"}, + }, + }, time.Now().UTC()); err != nil { + t.Fatalf("seed snapshots: %v", err) + } + return a, b +} + +// TestRestoreWizardGetRendersStep1 verifies the snapshot picker is on +// the page when no snapshot is pre-selected. +func TestRestoreWizardGetRendersStep1(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, _ := enrolHostForUI(t, srv, st, "rstore-host-1") + _ = seedSnapshot(t, st, hostID, "rstore-host-1") + cookie := loginAsAdmin(t, st) + + req, _ := stdhttp.NewRequest("GET", ts.URL+"/hosts/"+hostID+"/restore", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusOK { + t.Fatalf("status: got %d, want 200", res.StatusCode) + } + body := readBody(t, res.Body) + if !strings.Contains(body, "Restore from snapshot") { + t.Errorf("expected wizard heading; body: %s", short(body)) + } + if !strings.Contains(body, "Pick a snapshot first") && + !strings.Contains(body, "Pick the point-in-time you want to restore from") { + t.Errorf("expected step-1 prompt") + } +} + +// TestRestoreWizardGetWithSnapshotPreselected verifies the deep-link +// path puts the snapshot summary card on the page. +func TestRestoreWizardGetWithSnapshotPreselected(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, _ := enrolHostForUI(t, srv, st, "rstore-host-2") + sid := seedSnapshot(t, st, hostID, "rstore-host-2") + cookie := loginAsAdmin(t, st) + + req, _ := stdhttp.NewRequest("GET", + ts.URL+"/hosts/"+hostID+"/snapshots/"+sid+"/restore", nil) + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusOK { + t.Fatalf("status: got %d", res.StatusCode) + } + body := readBody(t, res.Body) + // The selected summary card should reference the snapshot's short ID. + if !strings.Contains(body, sid[:8]) { + t.Errorf("expected snapshot short id in body") + } + if !strings.Contains(body, "picked from") { + t.Errorf("expected 'picked from N snapshots' summary line") + } +} + +// TestRestorePostRequiresSnapshot: form without snapshot_id re-renders +// with an error. +func TestRestorePostRequiresSnapshot(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, _ := enrolHostForUI(t, srv, st, "rstore-no-snap") + cookie := loginAsAdmin(t, st) + + form := url.Values{ + "snapshot_id": {""}, + "target_mode": {"new_dir"}, + "paths": {"/etc/foo"}, + } + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusUnprocessableEntity { + t.Fatalf("status: got %d, want 422", res.StatusCode) + } + body := readBody(t, res.Body) + if !strings.Contains(body, "Pick a snapshot") { + t.Errorf("expected 'Pick a snapshot' error in body") + } +} + +// TestRestorePostRequiresPaths: form with snapshot but no paths is rejected. +func TestRestorePostRequiresPaths(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, _ := enrolHostForUI(t, srv, st, "rstore-no-paths") + sid := seedSnapshot(t, st, hostID, "rstore-no-paths") + cookie := loginAsAdmin(t, st) + + form := url.Values{ + "snapshot_id": {sid}, + "target_mode": {"new_dir"}, + } + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusUnprocessableEntity { + t.Fatalf("status: got %d, want 422", res.StatusCode) + } + body := readBody(t, res.Body) + if !strings.Contains(body, "at least one file") { + t.Errorf("expected paths-required error") + } +} + +// TestRestorePostInPlaceRequiresHostnameMatch: in-place mode with the +// wrong hostname typed re-renders + does not dispatch. +func TestRestorePostInPlaceRequiresHostnameMatch(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, token := enrolHostForUI(t, srv, st, "rstore-inplace") + sid := seedSnapshot(t, st, hostID, "rstore-inplace") + c := agentDial(t, srv, ts, hostID, token) + sendHello(t, c, "rstore-inplace") + _ = drainUntil(t, c, api.MsgScheduleSet) + cookie := loginAsAdmin(t, st) + + form := url.Values{ + "snapshot_id": {sid}, + "target_mode": {"in_place"}, + "paths": {"/etc/nginx/nginx.conf"}, + "confirm_hostname": {"WRONG"}, + } + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusUnprocessableEntity { + t.Fatalf("status: got %d, want 422", res.StatusCode) + } + + // No restore command should arrive at the agent. + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + for { + mt, raw, rerr := c.Read(ctx) + if rerr != nil { + break + } + if mt == websocket.MessageText && strings.Contains(string(raw), `"command.run"`) && + strings.Contains(string(raw), `"kind":"restore"`) { + t.Fatal("unexpected restore command.run after wrong-hostname rejection") + } + } +} + +// TestRestorePostHappyPathDispatches: well-formed new-directory form +// dispatches a JobRestore command.run with the expected payload + writes +// an audit row + redirects. +func TestRestorePostHappyPathDispatches(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, token := enrolHostForUI(t, srv, st, "rstore-happy") + sid := seedSnapshot(t, st, hostID, "rstore-happy") + c := agentDial(t, srv, ts, hostID, token) + sendHello(t, c, "rstore-happy") + _ = drainUntil(t, c, api.MsgScheduleSet) + cookie := loginAsAdmin(t, st) + + form := url.Values{ + "snapshot_id": {sid}, + "target_mode": {"new_dir"}, + "paths": {"/etc/nginx/nginx.conf", "/etc/nginx/sites-available/alfa.conf"}, + } + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.Header.Set("HX-Request", "true") + req.AddCookie(cookie) + // Don't follow redirects — we want to inspect the HX-Redirect header. + client := &stdhttp.Client{ + CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error { + return stdhttp.ErrUseLastResponse + }, + } + res, err := client.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusNoContent { + t.Fatalf("status: got %d, want 204", res.StatusCode) + } + if res.Header.Get("HX-Redirect") == "" { + t.Fatal("expected HX-Redirect header pointing at the live job page") + } + + // Find the dispatched command.run on the agent socket. + deadline := time.Now().Add(2 * time.Second) + var got api.Envelope + for time.Now().Before(deadline) { + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + mt, raw, rerr := c.Read(ctx) + cancel() + if rerr != nil { + break + } + if mt != websocket.MessageText { + continue + } + if !strings.Contains(string(raw), `"command.run"`) || !strings.Contains(string(raw), `"kind":"restore"`) { + continue + } + if err := json.Unmarshal(raw, &got); err != nil { + t.Fatalf("unmarshal: %v", err) + } + break + } + if got.Type != api.MsgCommandRun { + t.Fatal("never received restore command.run") + } + var cp api.CommandRunPayload + if err := got.UnmarshalPayload(&cp); err != nil { + t.Fatalf("unmarshal payload: %v", err) + } + if cp.Kind != api.JobRestore { + t.Fatalf("kind: got %q", cp.Kind) + } + if cp.Restore == nil { + t.Fatal("restore payload is nil") + } + if cp.Restore.SnapshotID != sid { + t.Fatalf("snapshot id: got %q want %q", cp.Restore.SnapshotID, sid) + } + if cp.Restore.InPlace { + t.Fatal("expected new-directory mode (in_place=false)") + } + if !strings.HasPrefix(cp.Restore.TargetDir, "$HOME/rm-restore/") { + t.Fatalf("target_dir: got %q, want prefix $HOME/rm-restore/", cp.Restore.TargetDir) + } + // placeholder substituted with the dispatched job_id. + if !strings.Contains(cp.Restore.TargetDir, "/01") { + t.Errorf("target_dir: expected job_id substituted into the path; got %q", cp.Restore.TargetDir) + } + if len(cp.Restore.Paths) != 2 { + t.Fatalf("paths: got %d, want 2", len(cp.Restore.Paths)) + } + + // Audit row. + var n int + if err := st.DB().QueryRow( + `SELECT COUNT(*) FROM audit_log WHERE action = 'host.restore' AND target_id = ?`, + hostID).Scan(&n); err != nil { + t.Fatalf("audit count: %v", err) + } + if n != 1 { + t.Fatalf("audit rows: got %d, want 1", n) + } +} + +// TestRestorePostOfflineHostRejected: agent not connected → 503 + +// no command.run. +func TestRestorePostOfflineHostRejected(t *testing.T) { + t.Parallel() + srv, ts, st := rawTestServerWithUI(t) + hostID, _ := enrolHostForUI(t, srv, st, "rstore-offline") + sid := seedSnapshot(t, st, hostID, "rstore-offline") + cookie := loginAsAdmin(t, st) + + form := url.Values{ + "snapshot_id": {sid}, + "target_mode": {"new_dir"}, + "paths": {"/etc/foo"}, + } + req, _ := stdhttp.NewRequest("POST", + ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.AddCookie(cookie) + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + t.Fatalf("do: %v", err) + } + defer res.Body.Close() + if res.StatusCode != stdhttp.StatusServiceUnavailable { + t.Fatalf("status: got %d, want 503", res.StatusCode) + } + _ = srv +} + +// helpers -------------------------------------------------------------- + +func readBody(t *testing.T, body interface{ Read(p []byte) (int, error) }) string { + t.Helper() + buf := make([]byte, 0, 16*1024) + tmp := make([]byte, 4096) + for { + n, err := body.Read(tmp) + if n > 0 { + buf = append(buf, tmp[:n]...) + } + if err != nil { + break + } + } + return string(buf) +} + +func short(s string) string { + if len(s) > 400 { + return s[:400] + "…" + } + return s +} diff --git a/internal/server/ui/ui.go b/internal/server/ui/ui.go index 905fe7f..8c5e52b 100644 --- a/internal/server/ui/ui.go +++ b/internal/server/ui/ui.go @@ -92,6 +92,7 @@ func New() (*Renderer, error) { "templates/partials/toast.html", "templates/partials/awaiting_agent.html", "templates/partials/host_chrome.html", + "templates/partials/tree_node.html", } pageEntries, err := fs.Glob(web.FS, "templates/pages/*.html") diff --git a/internal/server/ws/handler.go b/internal/server/ws/handler.go index 5706693..b488095 100644 --- a/internal/server/ws/handler.go +++ b/internal/server/ws/handler.go @@ -54,7 +54,7 @@ func AgentHandler(deps HandlerDeps) stdhttp.Handler { return stdhttp.HandlerFunc(func(w stdhttp.ResponseWriter, r *stdhttp.Request) { host, ok := authenticateAgent(r, deps.Store) if !ok { - stdhttp.Error(w, "unauthorized", stdhttp.StatusUnauthorized) + stdhttp.Error(w, "unauthorised", stdhttp.StatusUnauthorized) return } @@ -204,7 +204,7 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E string(p.Status), p.ExitCode, p.Stats, errMsg, p.FinishedAt); err != nil { slog.Warn("ws: mark job finished", "job_id", p.JobID, "err", err) } - // repo_initialized_at projection has been removed — auto-init + // repo_initialised_at projection has been removed — auto-init // at host enrolment makes "is the repo init'd" derivable from // the latest init job's status, no separate column needed. if deps.JobHub != nil { @@ -297,6 +297,20 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E // (job.started → job.finished) is sufficient signal. slog.Debug("ws msg not yet handled", "type", env.Type, "host_id", hostID) + case api.MsgTreeListResult: + // Reply to a synchronous tree.list RPC. Route to the waiter + // registered against the request envelope's ID; if none is + // registered the caller already gave up (ctx expired) — drop + // the stray reply quietly. + if env.ID == "" { + slog.Warn("ws: tree.list.result missing envelope ID", "host_id", hostID) + break + } + if !deps.Hub.rpcs.resolve(env.ID, env) { + slog.Debug("ws: tree.list.result with no waiter (timeout?)", + "id", env.ID, "host_id", hostID) + } + case api.MsgError: var ep api.ErrorPayload _ = env.UnmarshalPayload(&ep) diff --git a/internal/server/ws/hub.go b/internal/server/ws/hub.go index 8ad732f..9085fc5 100644 --- a/internal/server/ws/hub.go +++ b/internal/server/ws/hub.go @@ -21,6 +21,11 @@ import ( type Hub struct { mu sync.RWMutex conns map[string]*Conn // hostID → conn + + // rpcs tracks in-flight synchronous RPC calls (e.g. tree.list). + // See rpc.go for details. Lazy-initialised via the registry's + // own register() so callers don't have to juggle a constructor. + rpcs rpcRegistry } // NewHub returns an empty hub. @@ -100,7 +105,7 @@ func NewConn(hostID string, c *websocket.Conn) *Conn { } // Send writes an envelope as a JSON text message. Concurrent calls -// are serialized; the underlying socket is not safe for parallel +// are serialised; the underlying socket is not safe for parallel // writers. func (c *Conn) Send(ctx context.Context, env api.Envelope) error { c.writeMu.Lock() diff --git a/internal/server/ws/rpc.go b/internal/server/ws/rpc.go new file mode 100644 index 0000000..e4da8c3 --- /dev/null +++ b/internal/server/ws/rpc.go @@ -0,0 +1,112 @@ +package ws + +import ( + "context" + "errors" + "sync" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" +) + +// rpcRegistry holds in-flight synchronous RPC calls. SendRPC registers +// a channel keyed by the request envelope's ID; the WS read loop's +// dispatcher routes incoming reply envelopes to the matching channel +// when their type is one of the known reply types (currently just +// tree.list.result). +// +// A single global registry keyed by envelope ID is fine because IDs +// are ULIDs — globally unique without coordinating across hubs. +type rpcRegistry struct { + mu sync.Mutex + pending map[string]chan api.Envelope +} + +// register reserves a channel for the given request ID. The channel +// is buffered (cap 1) so a slow waiter doesn't block the read loop's +// dispatcher when the reply lands. +func (r *rpcRegistry) register(id string) chan api.Envelope { + ch := make(chan api.Envelope, 1) + r.mu.Lock() + if r.pending == nil { + r.pending = make(map[string]chan api.Envelope) + } + r.pending[id] = ch + r.mu.Unlock() + return ch +} + +// resolve delivers an envelope to its waiter and removes the entry. +// Returns whether a waiter was actually present (the dispatcher uses +// this to decide whether to log a stray-reply warning). +func (r *rpcRegistry) resolve(id string, env api.Envelope) bool { + r.mu.Lock() + ch, ok := r.pending[id] + if ok { + delete(r.pending, id) + } + r.mu.Unlock() + if !ok { + return false + } + // Buffered chan cap 1 — non-blocking send. The waiter goroutine + // owns the receive side so this is the only sender. + ch <- env + close(ch) + return true +} + +// release abandons the entry without delivering a value. Used when +// the caller's context expires before a reply arrives — the next +// stray reply (if any) will hit the no-waiter case in resolve and +// just be dropped. +func (r *rpcRegistry) release(id string) { + r.mu.Lock() + delete(r.pending, id) + r.mu.Unlock() +} + +// SendRPC sends a request envelope to the host and blocks until a +// matching reply lands or the context expires. The hub picks a fresh +// envelope ID, marshals the payload, registers a waiter, and sends. +// +// timeout caps the wait; a too-aggressive value relative to the +// expected restic-side latency will leak the registry entry until the +// reply finally arrives (which is then silently dropped). The default +// callers use is 30s, which covers a slow network round-trip plus a +// restic ls invocation against a remote rest-server. +// +// If the host disconnects mid-flight, the read loop ends and no reply +// will ever come — the caller's ctx.Done()/timeout is the only path +// out. We could pre-fail by tracking conn lifetime, but the bound +// keeps the code simple and the worst case is a 30s wait. +func (h *Hub) SendRPC(ctx context.Context, hostID string, reqType api.MessageType, payload any, timeout time.Duration) (api.Envelope, error) { + if timeout <= 0 { + timeout = 30 * time.Second + } + id := ulid.Make().String() + env, err := api.Marshal(reqType, id, payload) + if err != nil { + return api.Envelope{}, err + } + + ch := h.rpcs.register(id) + + if err := h.Send(ctx, hostID, env); err != nil { + h.rpcs.release(id) + return api.Envelope{}, err + } + + select { + case reply := <-ch: + return reply, nil + case <-ctx.Done(): + h.rpcs.release(id) + return api.Envelope{}, ctx.Err() + case <-time.After(timeout): + h.rpcs.release(id) + return api.Envelope{}, errors.New("ws rpc: timed out waiting for reply") + } +} diff --git a/internal/server/ws/rpc_test.go b/internal/server/ws/rpc_test.go new file mode 100644 index 0000000..7e9e290 --- /dev/null +++ b/internal/server/ws/rpc_test.go @@ -0,0 +1,122 @@ +package ws + +import ( + "context" + "encoding/json" + "sync" + "testing" + "time" + + "github.com/oklog/ulid/v2" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" +) + +// TestRPCRegistryRoundTrip: register a waiter, resolve it, get the +// envelope back. Cover the no-waiter and double-resolve cases too. +func TestRPCRegistryRoundTrip(t *testing.T) { + t.Parallel() + var r rpcRegistry + id := ulid.Make().String() + ch := r.register(id) + + want := api.Envelope{Type: api.MsgTreeListResult, ID: id, Payload: json.RawMessage(`{"path":"/"}`)} + if !r.resolve(id, want) { + t.Fatal("resolve: returned false for registered id") + } + got := <-ch + if got.ID != id { + t.Fatalf("id mismatch: got %q want %q", got.ID, id) + } + + // A second resolve for the same id has no waiter and should not panic. + if r.resolve(id, want) { + t.Fatal("resolve: returned true for already-resolved id") + } +} + +// TestRPCRegistryRelease: release abandons the waiter; a subsequent +// resolve is a no-op (no goroutine leak, no panic). +func TestRPCRegistryRelease(t *testing.T) { + t.Parallel() + var r rpcRegistry + id := ulid.Make().String() + _ = r.register(id) + r.release(id) + if r.resolve(id, api.Envelope{ID: id}) { + t.Fatal("resolve after release: should be no-op") + } +} + +// TestRPCRegistryConcurrent: many waiters in flight concurrently get +// only their own reply. This catches buggy keying/locking. +func TestRPCRegistryConcurrent(t *testing.T) { + t.Parallel() + var r rpcRegistry + const n = 64 + + ids := make([]string, n) + chs := make([]chan api.Envelope, n) + for i := 0; i < n; i++ { + ids[i] = ulid.Make().String() + chs[i] = r.register(ids[i]) + } + + // Resolve in random-ish order from many goroutines. + var wg sync.WaitGroup + for i := 0; i < n; i++ { + wg.Add(1) + go func(idx int) { + defer wg.Done() + r.resolve(ids[idx], api.Envelope{ID: ids[idx], Type: api.MsgTreeListResult}) + }(i) + } + wg.Wait() + + for i := 0; i < n; i++ { + select { + case got := <-chs[i]: + if got.ID != ids[i] { + t.Fatalf("waiter %d: got id %q want %q", i, got.ID, ids[i]) + } + case <-time.After(2 * time.Second): + t.Fatalf("waiter %d: timed out", i) + } + } +} + +// TestSendRPCContextCancelReleases ensures that canceling the caller's +// ctx releases the registry entry so a stray late reply is harmlessly +// dropped. Skips if the hub isn't reachable for direct access — this +// is purely a unit test on the registry path inside SendRPC. +func TestSendRPCContextCancelReleases(t *testing.T) { + t.Parallel() + h := NewHub() + + // No host registered, so Hub.Send returns "host offline" and + // SendRPC bails without ever waiting. We test the timeout/ctx + // path by going through register() directly. + id := ulid.Make().String() + ch := h.rpcs.register(id) + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(20 * time.Millisecond) + cancel() + }() + + // Simulate the SendRPC select: ctx wins. + select { + case <-ch: + t.Fatal("unexpected reply") + case <-ctx.Done(): + h.rpcs.release(id) + } + + // Now a late reply should not block (ch is still open but no + // receiver — buffered size 1 absorbs it). + resolved := h.rpcs.resolve(id, api.Envelope{ID: id}) + if resolved { + t.Fatal("resolve after release should return false") + } +} diff --git a/internal/store/host_repo_stats.go b/internal/store/host_repo_stats.go index 1952f68..9889b29 100644 --- a/internal/store/host_repo_stats.go +++ b/internal/store/host_repo_stats.go @@ -38,7 +38,7 @@ func (s *Store) GetHostRepoStats(ctx context.Context, hostID string) (*HostRepoS // getHostRepoStatsTx is identical to GetHostRepoStats but runs on an // existing transaction so the fetch-merge-upsert in UpsertHostRepoStats -// is fully serialized. +// is fully serialised. func getHostRepoStatsTx(ctx context.Context, tx *sql.Tx, hostID string) (*HostRepoStats, error) { row := tx.QueryRowContext(ctx, `SELECT host_id, total_size_bytes, raw_size_bytes, unique_files, diff --git a/internal/store/migrations/0012_jobs_restore_diff_kind.sql b/internal/store/migrations/0012_jobs_restore_diff_kind.sql new file mode 100644 index 0000000..7c4673c --- /dev/null +++ b/internal/store/migrations/0012_jobs_restore_diff_kind.sql @@ -0,0 +1,61 @@ +-- 0012_jobs_restore_diff_kind.sql +-- +-- Add 'restore' and 'diff' to the jobs.kind CHECK constraint so the +-- restore wizard (P3-01) and the snapshot-diff endpoint (P3-09) can +-- persist their job rows. SQLite can't ALTER a CHECK in place, so we +-- rebuild the table. +-- +-- Rebuild safety: jobs has an inbound FK from job_logs (ON DELETE +-- CASCADE) and from schedules.jobs is referenced via scheduled_id. +-- CLAUDE.md flags DROP TABLE on a parent as risky under +-- foreign_keys=ON; we mitigate two ways: +-- +-- 1. Stash job_logs into a temp table BEFORE rebuilding jobs, then +-- restore the rows after the rebuild settles. If a cascade +-- misbehaves we can still recover. +-- 2. Use the safe rebuild order from 0005: create jobs_new with the +-- wider CHECK → copy data → DROP jobs → RENAME jobs_new TO jobs. +-- Do NOT rename the original first (the dangling-FK trap that +-- 0005's first draft hit and 0006 cleaned up). + +CREATE TEMPORARY TABLE _job_logs_backup AS + SELECT job_id, seq, ts, stream, payload FROM job_logs; + +CREATE TABLE jobs_new ( + id TEXT PRIMARY KEY, + host_id TEXT NOT NULL REFERENCES hosts(id) ON DELETE CASCADE, + kind TEXT NOT NULL CHECK (kind IN + ('backup','init','forget','prune','check','unlock','restore','diff')), + status TEXT NOT NULL CHECK (status IN ('queued','running','succeeded','failed','cancelled')), + scheduled_id TEXT REFERENCES schedules(id) ON DELETE SET NULL, + actor_kind TEXT NOT NULL CHECK (actor_kind IN ('user','schedule','system')), + actor_id TEXT, + started_at TEXT, + finished_at TEXT, + exit_code INTEGER, + stats TEXT, + error TEXT, + created_at TEXT NOT NULL +); + +INSERT INTO jobs_new + SELECT id, host_id, kind, status, scheduled_id, actor_kind, actor_id, + started_at, finished_at, exit_code, stats, error, created_at + FROM jobs; + +DROP TABLE jobs; + +ALTER TABLE jobs_new RENAME TO jobs; + +CREATE INDEX jobs_host_id ON jobs(host_id); +CREATE INDEX jobs_status ON jobs(status); +CREATE INDEX jobs_created_at ON jobs(created_at); + +-- Defensive: if cascade-on-DROP wiped job_logs (it shouldn't with the +-- foreign_keys behaviour SQLite documents, but the codebase has hit +-- "lost rows" before during rebuilds), restore from the temp backup. +-- INSERT OR IGNORE so re-running is harmless. +INSERT OR IGNORE INTO job_logs (job_id, seq, ts, stream, payload) + SELECT job_id, seq, ts, stream, payload FROM _job_logs_backup; + +DROP TABLE _job_logs_backup; diff --git a/tasks.md b/tasks.md index 7a65c72..1d37642 100644 --- a/tasks.md +++ b/tasks.md @@ -233,19 +233,58 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days. ## Phase 3 — Restore, alerts, audit -- [ ] **P3-01** (L) Restore wizard backend: snapshot tree browse via `restic ls --json`, path picker, target selection -- [ ] **P3-02** (L) Restore wizard UI (multi-step: host → snapshot → paths → target → confirm) -- [ ] **P3-03** (M) Restore execution: `restic restore` invocation, progress streaming -- [ ] **P3-04** (L) Cross-host restore: target agent receives a temporary scoped read credential for source host's repo (single-job, auto-revoked); UI supports source→target path remapping; warns when source paths need root and target service user is non-root +> Phase 3 is split into three independently-shippable sub-phases: +> **Restore** (P3-01..03 + P3-09 + P3-X1 cancel + P3-X2 tree-list RPC), +> **Alerts** (P3-05..07), **Audit UI** (P3-08). Each sub-phase has its own +> spec → plan → implement cycle; we hand back at sub-phase boundaries. +> +> P3-04 (cross-host restore) was de-scoped during the Phase-3 brainstorm +> on 2026-05-04: disaster recovery is already covered by re-enrolling a +> replacement host with the same repo creds (snapshots reappear, restore +> is same-host). The remaining "pull a file from host A onto host C +> without giving C permanent access" use case is genuinely different and +> doesn't have a confirmed need yet, so it's moved to the **Future / +> unscheduled** section at the end of this file. + +### Phase 3 — Restore ✅ + +> Spec: `docs/superpowers/specs/2026-05-04-p3-restore-design.md`. +> Wireframe: `_diag/p3-restore-wizard/wireframe.html`. +> Sweep screenshots: `_diag/p3-restore-sweep/`. +> Shipped on branch `p3-restore`. + +- [x] **P3-X1** (S) Cancel-job feature. `command.cancel` WS envelope; agent tracks per-job ctx.CancelFunc and kills the running `restic` subprocess via context cancel (SIGTERM, SIGKILL after 5s grace via `cmd.Cancel` + `cmd.WaitDelay`); server endpoint `POST /api/jobs/{id}/cancel` bridges UI → WS; the existing UI Cancel button on `/jobs/{id}` is now real for any running kind. Sandbox-aware: `internal/restic/cancel_{unix,windows}.go` build-tags pick SIGTERM on POSIX vs `os.Kill` on Windows (which can't deliver SIGTERM). Tests: cancel mid-run via 'sleep 30' fake-restic returns JobCancelled with exit 130 in <200ms. +- [x] **P3-X2** (S) Tree-list synchronous WS RPC. `MsgTreeList` ↔ `MsgTreeListResult` with `Envelope.ID` correlation; generic `Hub.SendRPC` helper (registry of buffered channels keyed by ULID, ctx-cancel + timeout aware). `internal/restic.ListTreeChildren` wraps `restic ls --json` and filters its recursive output to direct children. Server-side `treeCache` is per-wizard-session (keyed by session cookie + host + snapshot + path) with a 30-min TTL and lazy sweep. +- [x] **P3-01** (L) Restore wizard backend (`internal/server/http/ui_restore.go`). GET handlers render the four-step wizard against the wireframe. HTMX/fetch tree partial endpoint hits `fetchTreeWithCache`. POST validates: snapshot_id, ≥1 absolute path, in-place ⇒ confirm_hostname == host name, agent online; on error re-renders with operator's input intact. Happy path mints job_id, target = `/var/lib/restic-manager/restore/` (server-picked, agent's writable dir under the systemd sandbox's `ReadWritePaths`), creates job row, ships `command.run` with `RestorePayload`, writes `host.restore` audit row, returns HX-Redirect (or 303) to the live job page. +- [x] **P3-02** (L) Wizard UI templates (`web/templates/pages/host_restore.html` + `partials/tree_node.html`). Single-page progressively-enabled four-step form. Form-state-driven JS computes a running tally + step-4 confirm summary client-side. Tree expansion uses plain fetch (not HTMX) for simpler target lookup; loaded-state cached per node. Top-level Restore button on host detail right rail + per-snapshot Restore action on snapshot rows. New `.snap-row` token in `web/styles/input.css`. +- [x] **P3-03** (M) Restore execution. `restic.RunRestore` builds `restore --target

[--include p]...` with --json; new `pumpRestoreStdout` parses status + summary objects. `--no-ownership` is gated on the agent's restic version via `Env.AtLeastVersion(0, 17)` — the flag was added in 0.17 and 0.16 rejects it. Restic version is threaded through `runner.Config.ResticVersion` from the agent's sysinfo snapshot. New-dir target is operator-editable (default `$HOME/rm-restore//`); agent expands `$HOME` / `${HOME}` / `~/` at run time and calls `os.MkdirAll` on the target chain so the operator never has to pre-create the per-job subdir. `runner.RunRestore` translates `RestoreStatus` into `job.progress` (mapping FilesRestored → FilesDone, etc.); agent dispatcher case `JobRestore` reuses the `spawn()` helper from P3-X1 so cancel works. Restore-shaped job-detail variant with current-file display under the progress bar. +- [x] **P3-09** (S) `diff` between two snapshots. `JobDiff` JobKind + `restic.RunDiff` + `runner.RunDiff`; `POST /api/hosts/{id}/snapshots/diff` (and HTMX-form variant on the unprefixed path) dispatcher with two-snapshot guard + per-host snapshot-list validation; UI panel on host detail right rail (visible when 2+ snapshots) with two short-id inputs + Diff button. Output streams as log.stream to the standard live job log page. +- [x] **P3-X3** (S) Recent-restores line on host detail. `hostChromeData` grows `RestoreStatus` / `RestoreAt` / `RestoreJobID` populated via `store.LatestJobByKind(host_id, 'restore')` (already exists from P2R). `host_chrome.html` renders a small line below the init-status one with status-coloured copy + a link to the job log. Hidden when no restore has ever run on this host. +- [x] **P3-X4** (S) Job log download (txt + ndjson). New `GET /api/jobs/{id}/log.{txt|ndjson}` endpoint backed by the persisted `job_logs` table — works any time (running or finished) without pausing the live WS stream because the source is the DB, not the live socket. Plain-text format mirrors the on-screen "HH:MM:SS.mmm TAG payload" shape with a small `# job ... · kind ... · status ...` header; ndjson emits one self-contained `{seq,ts,stream,payload}` JSON object per line for `jq` / tooling. Surfaced as a single header dropdown on the live job page (`details/summary`-driven, native keyboard support, click-outside-to-close). New reusable `.dropdown` / `.dropdown-menu` / `.dropdown-item` tokens in `web/styles/input.css`. +- [x] **P3-X5** (S) UK lint locale + sweep. `.golangci.yml` misspell locale switched US → UK and the codebase swept (~73 corrections — behaviour, serialise, recognise, honour, initialise, enrol, unauthorised, etc.). Wire `ErrorCode` value `"unauthorized"` → `"unauthorised"` is a tiny contract change but the agent doesn't parse those codes today and no external clients exist yet. +- [x] **P3-X6** (S) Snapshot SIZE/FILES tooltip on host detail. The per-snapshot summary block was added by restic 0.17 (the source comment in `internal/restic/snapshots.go` incorrectly said 0.16+); on 0.16 hosts the columns render `—`. `hostDetailPage.LegacyRestic` (computed via `Env.AtLeastVersion(0, 17)`) drives a `title="Needs restic 0.17+ on the agent host. This host runs ."` + `cursor: help` on the column headers, hidden once the host upgrades. + +> **Migration 0012** widens the `jobs.kind` CHECK constraint to include `restore` and `diff`. Rebuild required (SQLite can't ALTER CHECK in place); follows the safe pattern from 0005, with a defensive temp-table backup of `job_logs` so the cascade-trap that bit migration 0007 wouldn't take the log history with it. + +> **install.sh + systemd unit:** the install script now pre-creates `/root/rm-restore` (root-owned 0700) so the default new-dir restore target works under the sandbox out of the box; the unit's `ReadWritePaths` gains `-/root/rm-restore` (soft-fail prefix). Existing installs need a re-run of `install.sh` to pick up the new dir; new operator-typed targets are auto-created by the agent at job time. + +> **As shipped (Playwright sweep against the live smoke env, 2026-05-04):** login → host detail → Restore button → wizard step 1 picks snapshot a1ac4006 (most recent) → tree drill-down `/home/steve/test` (3 lazy loads) → tick `file1` + `file2` → step 4 confirm summary populated → dispatch → live job page with running progress widget → restore succeeds, files land on disk at `/root/rm-restore//home/steve/test/file{1,2}` (default `$HOME/rm-restore//` after agent-side expansion). Custom-target restore to `/tmp/custom-restore//` lands inside the agent's `PrivateTmp` namespace. Snapshot diff between `a1ac4006` and `5f78c788` → diff job page, statistics output streamed (738 bytes added, 0 removed). Recent-restores line on host detail reads "last restore · succeeded 28s ago · job log →". Download dropdown serves both `.txt` and `.ndjson` with correct `Content-Type` + `Content-Disposition`. SIZE/FILES tooltip "Needs restic 0.17+ on the agent host. This host runs 0.16.4." renders on column hover. + +### Phase 3 — Alerts (not started) + - [ ] **P3-05** (M) Alert engine: rule evaluation loop (failed backup, stale schedule, agent offline, check failed) - [ ] **P3-06** (M) Notification channels: webhook, ntfy, SMTP email - [ ] **P3-07** (S) Alert UI: list, acknowledge, resolve + +### Phase 3 — Audit log UI (not started) + - [ ] **P3-08** (S) Audit log UI with filters (user, action, target, time range) -- [ ] **P3-09** (S) `diff` between two snapshots in UI ### Phase 3 acceptance -- A file deleted on a host can be restored from the UI in under 2 minutes. A failed backup raises an alert via the configured channel within 60s. +- A file deleted on a host can be restored from the UI in under 2 minutes via the wizard at `/hosts/{id}/restore`; the operator can cancel a running restore (or any other running job) from the live job page. Snapshot diff between two snapshots renders as a normal job page. +- A failed backup raises an alert via the configured channel within 60s. +- The audit-log UI lets an admin filter by user / action / target / time range. --- @@ -290,3 +329,14 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days. - [ ] **X-03** Periodic dependency updates (`dependabot` or `renovate`) - [ ] **X-04** Threat-model review at end of each phase - [ ] **X-05** Proper first-run onboarding UI: admin shouldn't need to `curl` `/api/bootstrap` by hand. Render the bootstrap form on the same login page (extra "setup token" field shown only while no admin user exists, hidden after); on submit POST to `/api/bootstrap`, then drop straight into a session. Surface the one-time token from the server log somewhere copy-able (or print a clickable URL with the token in the query string at first-run). Also: relax the 12-char password floor for the first-run path or document it in the form so `admin` doesn't silently fail validation. + +--- + +## Future / unscheduled + +> Items here have a plausible use case but no confirmed need. They live +> outside numbered phases until a concrete trigger (a user request, a +> security review finding, a real disaster-recovery exercise) bumps them +> back into a phase. + +- [ ] **F-01** ~~P3-04~~ Cross-host restore. De-scoped from Phase 3 on 2026-05-04. Disaster recovery is already covered: stand up a replacement host, paste the original repo creds at enrolment, snapshots reappear, restore is same-host. The remaining "pull a file from host A onto host C without granting C permanent access" use case is genuinely different (file sharing / migration, not DR) and hasn't been requested. Original spec language was: "target agent receives a temporary scoped read credential for source host's repo (single-job, auto-revoked); UI supports source→target path remapping; warns when source paths need root and target service user is non-root". Re-promote when there's a real ask. diff --git a/web/static/css/styles.css b/web/static/css/styles.css index 4f00775..a584aa2 100644 --- a/web/static/css/styles.css +++ b/web/static/css/styles.css @@ -1,3 +1,3 @@ *,:after,:before{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }::backdrop{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: } -/*! tailwindcss v3.4.17 | MIT License | https://tailwindcss.com*/*,:after,:before{border:0 solid #e5e7eb;box-sizing:border-box}:after,:before{--tw-content:""}:host,html{line-height:1.5;-webkit-text-size-adjust:100%;font-family:Inter,system-ui,-apple-system,sans-serif;font-feature-settings:normal;font-variation-settings:normal;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-tap-highlight-color:transparent}body{line-height:inherit;margin:0}hr{border-top-width:1px;color:inherit;height:0}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,pre,samp{font-family:JetBrains Mono,ui-monospace,monospace;font-feature-settings:normal;font-size:1em;font-variation-settings:normal}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{border-collapse:collapse;border-color:inherit;text-indent:0}button,input,optgroup,select,textarea{color:inherit;font-family:inherit;font-feature-settings:inherit;font-size:100%;font-variation-settings:inherit;font-weight:inherit;letter-spacing:inherit;line-height:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dd,dl,figure,h1,h2,h3,h4,h5,h6,hr,p,pre{margin:0}fieldset{margin:0}fieldset,legend{padding:0}menu,ol,ul{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{color:#9ca3af;opacity:1}input::placeholder,textarea::placeholder{color:#9ca3af;opacity:1}[role=button],button{cursor:pointer}:disabled{cursor:default}audio,canvas,embed,iframe,img,object,svg,video{display:block;vertical-align:middle}img,video{height:auto;max-width:100%}[hidden]:where(:not([hidden=until-found])){display:none}:root{--bg:oklch(0.17 0.006 250);--panel:oklch(0.20 0.007 250);--panel-hi:oklch(0.23 0.008 250);--line:oklch(0.27 0.010 250);--line-soft:oklch(0.23 0.008 250);--ink:oklch(0.96 0.005 250);--ink-mid:oklch(0.78 0.005 250);--ink-mute:oklch(0.58 0.006 250);--ink-fade:oklch(0.42 0.006 250);--ok:oklch(0.78 0.14 155);--warn:oklch(0.82 0.13 80);--bad:oklch(0.70 0.20 25);--off:oklch(0.50 0.005 250);--accent:oklch(0.82 0.12 195)}body,html{background:var(--bg);color:var(--ink);font-family:Inter,system-ui,-apple-system,sans-serif;-webkit-font-smoothing:antialiased}body{font-feature-settings:"cv11","ss01","ss03"}::-moz-selection{background:color-mix(in oklch,var(--accent),transparent 70%)}::selection{background:color-mix(in oklch,var(--accent),transparent 70%)}.\!container{width:100%!important}.container{width:100%}@media (min-width:640px){.\!container{max-width:640px!important}.container{max-width:640px}}@media (min-width:768px){.\!container{max-width:768px!important}.container{max-width:768px}}@media (min-width:1024px){.\!container{max-width:1024px!important}.container{max-width:1024px}}@media (min-width:1280px){.\!container{max-width:1280px!important}.container{max-width:1280px}}@media (min-width:1536px){.\!container{max-width:1536px!important}.container{max-width:1536px}}.mono{font-family:JetBrains Mono,ui-monospace,monospace;font-variant-numeric:tabular-nums}.panel{background:var(--panel);border:1px solid var(--line-soft)}.hairline{box-shadow:inset 0 -1px 0 var(--line-soft)}.dot{border-radius:9999px;display:inline-block;height:7px;width:7px}.dot-online{background:var(--ok);box-shadow:0 0 0 3px color-mix(in oklch,var(--ok),transparent 80%)}.dot-degraded{background:var(--warn);box-shadow:0 0 0 3px color-mix(in oklch,var(--warn),transparent 80%)}.dot-offline{background:var(--off)}.dot-failed{background:var(--bad);box-shadow:0 0 0 3px color-mix(in oklch,var(--bad),transparent 80%)}.pulse{animation:rm-pulse 2.4s ease-in-out infinite}@keyframes rm-pulse{0%,to{box-shadow:0 0 0 3px color-mix(in oklch,var(--accent),transparent 80%)}50%{box-shadow:0 0 0 6px color-mix(in oklch,var(--accent),transparent 92%)}}.btn{align-items:center;background:transparent;border:1px solid var(--line);border-radius:5px;color:var(--ink-mid);cursor:pointer;display:inline-flex;font-size:12px;font-weight:500;gap:6px;padding:6px 11px;text-decoration:none;transition:all .12s ease}.btn:hover{background:var(--panel-hi);color:var(--ink)}.btn:disabled,.btn[disabled]{cursor:not-allowed;opacity:.4;pointer-events:none}.btn-primary{background:var(--accent);border-color:var(--accent);color:oklch(.18 .01 195)}.btn-primary:hover{filter:brightness(1.08)}.btn-ghost,.btn-ghost:hover{border-color:transparent}.btn-ghost:hover{background:var(--panel-hi)}.btn-danger{border-color:color-mix(in oklch,var(--bad),transparent 70%);color:var(--bad)}.btn-danger:hover{background:color-mix(in oklch,var(--bad),transparent 88%);border-color:color-mix(in oklch,var(--bad),transparent 50%);color:oklch(.85 .1 25)}.btn-lg{font-size:13px;padding:9px 14px}.btn-block{justify-content:center;width:100%}.nav-tab{border-bottom:2px solid transparent;color:var(--ink-mute);cursor:pointer;font-size:13px;margin-right:28px;padding:18px 0;text-decoration:none}.nav-tab.active{border-color:var(--accent)}.nav-tab.active,.nav-tab:hover{color:var(--ink)}.sub-tab{border-bottom:1.5px solid transparent;color:var(--ink-mute);cursor:pointer;font-size:13px;margin-right:24px;padding:12px 0;text-decoration:none}.sub-tab.active{border-color:var(--ink);color:var(--ink)}.tag{align-items:center;border:1px solid var(--line);border-radius:3px;display:inline-flex;font-size:11px;gap:5px;letter-spacing:.01em;line-height:1;padding:4px 7px}.field-label,.tag{color:var(--ink-mid)}.field-label{display:block;font-size:12px;margin-bottom:6px}.field-help{color:var(--ink-mute);font-size:12px;line-height:1.55;margin-top:6px}.field{background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;color:var(--ink);font-family:inherit;font-size:13px;outline:none;padding:9px 12px;transition:border-color .12s ease;width:100%}.field:focus{border-color:var(--accent)}.field.invalid{border-color:color-mix(in oklch,var(--bad),transparent 50%)}.field.mono{font-family:JetBrains Mono,monospace;font-size:12px}.field.with-prefix{padding-left:64px}.host-row{align-items:center;border-left:3px solid transparent;-moz-column-gap:18px;column-gap:18px;display:grid;font-size:13px;grid-template-columns:24px 1.4fr .95fr 1.5fr .75fr .7fr .7fr 1.1fr 92px;padding:11px 16px}.host-row.head{color:var(--ink-fade);font-size:11px;letter-spacing:.08em;padding-bottom:10px;padding-top:10px;text-transform:uppercase}.host-row.degraded{border-left-color:color-mix(in oklch,var(--warn),transparent 50%)}.host-row.failed{border-left-color:color-mix(in oklch,var(--bad),transparent 50%)}.host-row.offline{border-left-color:color-mix(in oklch,var(--off),transparent 70%)}.host-row:hover{background:var(--panel-hi)}.host-row.clickable{position:relative}.host-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.host-row.clickable:hover{cursor:pointer}.host-row.clickable>*{pointer-events:none;position:relative;z-index:1}.host-row.clickable>.row-action,.host-row.clickable>.row-link{pointer-events:auto}.src-row{align-items:center;-moz-column-gap:18px;column-gap:18px;display:grid;grid-template-columns:1fr auto;padding:14px 18px}.src-row.clickable{position:relative}.src-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.src-row.clickable:hover{background:var(--panel-hi);cursor:pointer}.src-row.clickable>*{pointer-events:none;position:relative;z-index:1}.src-row.clickable>.row-action,.src-row.clickable>.row-link{pointer-events:auto}.schd-row{align-items:center;-moz-column-gap:14px;column-gap:14px;display:grid;font-size:13px;grid-template-columns:78px 1fr 1.6fr 100px 110px auto;padding:12px 18px}.schd-row.head{color:var(--ink-fade);font-size:11px;letter-spacing:.08em;padding-bottom:10px;padding-top:10px;text-transform:uppercase}.schd-row.clickable{position:relative}.schd-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.schd-row.clickable:hover{background:var(--panel-hi);cursor:pointer}.schd-row.clickable>*{pointer-events:none;position:relative;z-index:1}.schd-row.clickable>.row-action,.schd-row.clickable>.row-link{pointer-events:auto}.preset-chip{background:var(--bg);border:1px solid var(--line-soft);border-radius:4px;color:var(--ink-mid);cursor:pointer;font-family:JetBrains Mono,monospace;font-size:11.5px;padding:4px 9px;transition:border-color .1s ease,color .1s ease;-webkit-user-select:none;-moz-user-select:none;user-select:none}.preset-chip:hover{border-color:var(--accent);color:var(--ink)}.picker{align-items:center;background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;cursor:pointer;display:flex;font-size:13px;gap:12px;padding:10px 12px;transition:border-color .1s ease,background .1s ease}.picker:hover{border-color:var(--ink-mute)}.picker .check{border:1px solid var(--line);border-radius:3px;display:inline-block;flex-shrink:0;height:14px;position:relative;width:14px}.picker.checked{background:color-mix(in oklch,var(--accent),transparent 92%);border-color:color-mix(in oklch,var(--accent),transparent 50%)}.picker.checked .check{background:var(--accent);border-color:var(--accent)}.picker.checked .check:after{border:solid oklch(.18 .01 195);border-width:0 1.5px 1.5px 0;content:"";height:8px;left:4px;position:absolute;top:1px;transform:rotate(45deg);width:4px}.picker input[type=checkbox]{opacity:0;pointer-events:none;position:absolute}.keep-cell{background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;display:flex;flex-direction:column;gap:4px;padding:9px 11px}.keep-cell label{color:var(--ink-fade);font-size:10.5px;letter-spacing:.08em;text-transform:uppercase}.keep-cell input{background:transparent;border:none;color:var(--ink);font-size:14px;outline:none;padding:0;width:100%}.keep-cell input,.log{font-family:JetBrains Mono,monospace}.log{background:var(--bg);border:1px solid var(--line-soft);border-radius:7px;font-size:12px;line-height:1.7;overflow:hidden}.log-line{align-items:baseline;-moz-column-gap:14px;column-gap:14px;display:grid;grid-template-columns:14ch 8ch 1fr;padding:1px 16px}.log-line:first-child{padding-top:12px}.log-line:last-child{padding-bottom:12px}.log-tag,.log-ts{color:var(--ink-fade)}.log-tag{font-size:10px;letter-spacing:.08em;text-transform:uppercase}.progress-track{background:var(--bg);border:1px solid var(--line-soft);border-radius:9999px;height:6px;overflow:hidden}.progress-fill{background:var(--accent);border-radius:9999px;height:100%;transition:width .25s ease}.progress-fill.ok{background:var(--ok)}.progress-fill.bad{background:var(--bad)}.crumbs{font-size:12px}.crumbs,.crumbs a{color:var(--ink-mute)}.crumbs a{text-decoration:underline;text-decoration-color:var(--line);text-underline-offset:3px}.crumbs .sep{color:var(--ink-fade);margin:0 8px}.snippet{border:1px solid var(--line-soft);border-radius:6px;overflow:hidden}.snippet-head{align-items:center;border-bottom:1px solid var(--line-soft);color:var(--ink-fade);display:flex;font-size:11px;justify-content:space-between;letter-spacing:.1em;padding:10px 14px;text-transform:uppercase}.snippet pre{color:var(--ink-mid);font-family:JetBrains Mono,monospace;font-size:12px;line-height:1.7;margin:0;padding:14px;white-space:pre-wrap;word-break:break-all}.snippet pre .var{color:var(--accent)}.empty-state{background:radial-gradient(ellipse at top,color-mix(in oklch,var(--accent),transparent 95%),transparent 60%),var(--panel);border:1px dashed var(--line);border-radius:8px;padding:60px 40px;text-align:center}.pointer-events-none{pointer-events:none}.fixed{position:fixed}.absolute{position:absolute}.relative{position:relative}.bottom-5{bottom:1.25rem}.left-0{left:0}.right-5{right:1.25rem}.top-0{top:0}.z-50{z-index:50}.col-span-2{grid-column:span 2/span 2}.col-span-3{grid-column:span 3/span 3}.col-span-4{grid-column:span 4/span 4}.col-span-5{grid-column:span 5/span 5}.col-span-7{grid-column:span 7/span 7}.col-span-8{grid-column:span 8/span 8}.col-span-9{grid-column:span 9/span 9}.m-0{margin:0}.mx-auto{margin-left:auto;margin-right:auto}.mb-1\.5{margin-bottom:.375rem}.mb-10{margin-bottom:2.5rem}.mb-2{margin-bottom:.5rem}.mb-2\.5{margin-bottom:.625rem}.mb-3{margin-bottom:.75rem}.mb-3\.5{margin-bottom:.875rem}.mb-4{margin-bottom:1rem}.mb-5{margin-bottom:1.25rem}.mb-7{margin-bottom:1.75rem}.ml-1{margin-left:.25rem}.ml-1\.5{margin-left:.375rem}.ml-2{margin-left:.5rem}.mt-0\.5{margin-top:.125rem}.mt-1{margin-top:.25rem}.mt-1\.5{margin-top:.375rem}.mt-2{margin-top:.5rem}.mt-2\.5{margin-top:.625rem}.mt-20{margin-top:5rem}.mt-3{margin-top:.75rem}.mt-3\.5{margin-top:.875rem}.mt-4{margin-top:1rem}.mt-5{margin-top:1.25rem}.mt-6{margin-top:1.5rem}.mt-7{margin-top:1.75rem}.mt-8{margin-top:2rem}.mt-9{margin-top:2.25rem}.block{display:block}.inline-block{display:inline-block}.inline{display:inline}.flex{display:flex}.inline-flex{display:inline-flex}.table{display:table}.grid{display:grid}.h-3\.5{height:.875rem}.h-\[22px\]{height:22px}.min-h-screen{min-height:100vh}.w-16{width:4rem}.w-3\.5{width:.875rem}.w-\[22px\]{width:22px}.w-\[360px\]{width:360px}.w-full{width:100%}.min-w-0{min-width:0}.max-w-\[1280px\]{max-width:1280px}.max-w-\[440px\]{max-width:440px}.max-w-\[480px\]{max-width:480px}.max-w-\[520px\]{max-width:520px}.max-w-\[580px\]{max-width:580px}.max-w-\[640px\]{max-width:640px}.max-w-\[680px\]{max-width:680px}.max-w-\[720px\]{max-width:720px}.max-w-\[760px\]{max-width:760px}.flex-1{flex:1 1 0%}.flex-none{flex:none}.transform{transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.cursor-pointer{cursor:pointer}.select-none{-webkit-user-select:none;-moz-user-select:none;user-select:none}.select-all{-webkit-user-select:all;-moz-user-select:all;user-select:all}.resize{resize:both}.list-none{list-style-type:none}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-12{grid-template-columns:repeat(12,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.flex-col{flex-direction:column}.flex-wrap{flex-wrap:wrap}.items-start{align-items:flex-start}.items-end{align-items:flex-end}.items-center{align-items:center}.items-baseline{align-items:baseline}.justify-end{justify-content:flex-end}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.gap-1\.5{gap:.375rem}.gap-2{gap:.5rem}.gap-2\.5{gap:.625rem}.gap-3{gap:.75rem}.gap-3\.5{gap:.875rem}.gap-4{gap:1rem}.gap-5{gap:1.25rem}.gap-6{gap:1.5rem}.gap-8{gap:2rem}.gap-x-4{-moz-column-gap:1rem;column-gap:1rem}.gap-y-2{row-gap:.5rem}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-bottom:calc(1rem*var(--tw-space-y-reverse));margin-top:calc(1rem*(1 - var(--tw-space-y-reverse)))}.overflow-hidden,.truncate{overflow:hidden}.truncate{text-overflow:ellipsis}.truncate,.whitespace-nowrap{white-space:nowrap}.text-pretty{text-wrap:pretty}.break-all{word-break:break-all}.rounded-\[3px\]{border-radius:3px}.rounded-\[5px\]{border-radius:5px}.rounded-\[6px\]{border-radius:6px}.rounded-\[7px\]{border-radius:7px}.rounded-full{border-radius:9999px}.border{border-width:1px}.border-y{border-top-width:1px}.border-b,.border-y{border-bottom-width:1px}.border-l{border-left-width:1px}.border-t{border-top-width:1px}.border-line{border-color:oklch(.27 .01 250)}.border-line-soft{border-color:oklch(.23 .008 250)}.p-0{padding:0}.p-4{padding:1rem}.p-5{padding:1.25rem}.p-7{padding:1.75rem}.px-1{padding-left:.25rem;padding-right:.25rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-2\.5{padding-left:.625rem;padding-right:.625rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-3\.5{padding-left:.875rem;padding-right:.875rem}.px-4{padding-left:1rem;padding-right:1rem}.px-7{padding-left:1.75rem;padding-right:1.75rem}.px-8{padding-left:2rem;padding-right:2rem}.py-0\.5{padding-bottom:.125rem;padding-top:.125rem}.py-1{padding-bottom:.25rem;padding-top:.25rem}.py-12{padding-bottom:3rem;padding-top:3rem}.py-2{padding-bottom:.5rem;padding-top:.5rem}.py-2\.5{padding-bottom:.625rem;padding-top:.625rem}.py-3{padding-bottom:.75rem;padding-top:.75rem}.py-3\.5{padding-bottom:.875rem;padding-top:.875rem}.py-4{padding-bottom:1rem;padding-top:1rem}.py-5{padding-bottom:1.25rem;padding-top:1.25rem}.py-6{padding-bottom:1.5rem;padding-top:1.5rem}.py-7{padding-bottom:1.75rem;padding-top:1.75rem}.pb-14{padding-bottom:3.5rem}.pb-2{padding-bottom:.5rem}.pb-24{padding-bottom:6rem}.pb-3{padding-bottom:.75rem}.pb-4{padding-bottom:1rem}.pl-6{padding-left:1.5rem}.pl-9{padding-left:2.25rem}.pt-1{padding-top:.25rem}.pt-14{padding-top:3.5rem}.pt-4{padding-top:1rem}.pt-5{padding-top:1.25rem}.pt-6{padding-top:1.5rem}.pt-7{padding-top:1.75rem}.pt-9{padding-top:2.25rem}.pt-\[1px\]{padding-top:1px}.text-center{text-align:center}.text-right{text-align:right}.text-2xl{font-size:1.5rem;line-height:2rem}.text-\[10\.5px\]{font-size:10.5px}.text-\[11\.5px\]{font-size:11.5px}.text-\[11px\]{font-size:11px}.text-\[12\.5px\]{font-size:12.5px}.text-\[12px\]{font-size:12px}.text-\[13px\]{font-size:13px}.text-\[14px\]{font-size:14px}.text-\[16px\]{font-size:16px}.text-\[18px\]{font-size:18px}.text-\[20px\]{font-size:20px}.text-\[22px\]{font-size:22px}.text-\[26px\]{font-size:26px}.text-\[28px\]{font-size:28px}.text-base{font-size:1rem;line-height:1.5rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xs{font-size:.75rem;line-height:1rem}.font-medium{font-weight:500}.font-normal{font-weight:400}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.normal-case{text-transform:none}.italic{font-style:italic}.leading-\[1\.55\]{line-height:1.55}.leading-\[1\.5\]{line-height:1.5}.leading-\[1\.65\]{line-height:1.65}.leading-\[1\.6\]{line-height:1.6}.leading-\[1\.7\]{line-height:1.7}.leading-\[20px\]{line-height:20px}.leading-none{line-height:1}.tracking-\[-0\.005em\]{letter-spacing:-.005em}.tracking-\[-0\.012em\]{letter-spacing:-.012em}.tracking-\[-0\.01em\]{letter-spacing:-.01em}.tracking-\[-0\.02em\]{letter-spacing:-.02em}.tracking-\[0\.005em\]{letter-spacing:.005em}.tracking-\[0\.01em\]{letter-spacing:.01em}.tracking-\[0\.02em\]{letter-spacing:.02em}.tracking-\[0\.08em\]{letter-spacing:.08em}.tracking-\[0\.1em\]{letter-spacing:.1em}.text-accent{color:oklch(.82 .12 195)}.text-bad{color:oklch(.7 .2 25)}.text-ink{color:oklch(.96 .005 250)}.text-ink-fade{color:oklch(.42 .006 250)}.text-ink-mid{color:oklch(.78 .005 250)}.text-ink-mute{color:oklch(.58 .006 250)}.text-ok{color:oklch(.78 .14 155)}.text-warn{color:oklch(.82 .13 80)}.underline{text-decoration-line:underline}.no-underline{text-decoration-line:none}.decoration-line{text-decoration-color:oklch(.27 .01 250)}.underline-offset-4{text-underline-offset:4px}.transition{transition-duration:.15s;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,-webkit-backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter,-webkit-backdrop-filter;transition-timing-function:cubic-bezier(.4,0,.2,1)}.hover\:text-ink-mid:hover{color:oklch(.78 .005 250)} +/*! tailwindcss v3.4.17 | MIT License | https://tailwindcss.com*/*,:after,:before{border:0 solid #e5e7eb;box-sizing:border-box}:after,:before{--tw-content:""}:host,html{line-height:1.5;-webkit-text-size-adjust:100%;font-family:Inter,system-ui,-apple-system,sans-serif;font-feature-settings:normal;font-variation-settings:normal;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-tap-highlight-color:transparent}body{line-height:inherit;margin:0}hr{border-top-width:1px;color:inherit;height:0}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,pre,samp{font-family:JetBrains Mono,ui-monospace,monospace;font-feature-settings:normal;font-size:1em;font-variation-settings:normal}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{border-collapse:collapse;border-color:inherit;text-indent:0}button,input,optgroup,select,textarea{color:inherit;font-family:inherit;font-feature-settings:inherit;font-size:100%;font-variation-settings:inherit;font-weight:inherit;letter-spacing:inherit;line-height:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dd,dl,figure,h1,h2,h3,h4,h5,h6,hr,p,pre{margin:0}fieldset{margin:0}fieldset,legend{padding:0}menu,ol,ul{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{color:#9ca3af;opacity:1}input::placeholder,textarea::placeholder{color:#9ca3af;opacity:1}[role=button],button{cursor:pointer}:disabled{cursor:default}audio,canvas,embed,iframe,img,object,svg,video{display:block;vertical-align:middle}img,video{height:auto;max-width:100%}[hidden]:where(:not([hidden=until-found])){display:none}:root{--bg:oklch(0.17 0.006 250);--panel:oklch(0.20 0.007 250);--panel-hi:oklch(0.23 0.008 250);--line:oklch(0.27 0.010 250);--line-soft:oklch(0.23 0.008 250);--ink:oklch(0.96 0.005 250);--ink-mid:oklch(0.78 0.005 250);--ink-mute:oklch(0.58 0.006 250);--ink-fade:oklch(0.42 0.006 250);--ok:oklch(0.78 0.14 155);--warn:oklch(0.82 0.13 80);--bad:oklch(0.70 0.20 25);--off:oklch(0.50 0.005 250);--accent:oklch(0.82 0.12 195)}body,html{background:var(--bg);color:var(--ink);font-family:Inter,system-ui,-apple-system,sans-serif;-webkit-font-smoothing:antialiased}body{font-feature-settings:"cv11","ss01","ss03"}::-moz-selection{background:color-mix(in oklch,var(--accent),transparent 70%)}::selection{background:color-mix(in oklch,var(--accent),transparent 70%)}.\!container{width:100%!important}.container{width:100%}@media (min-width:640px){.\!container{max-width:640px!important}.container{max-width:640px}}@media (min-width:768px){.\!container{max-width:768px!important}.container{max-width:768px}}@media (min-width:1024px){.\!container{max-width:1024px!important}.container{max-width:1024px}}@media (min-width:1280px){.\!container{max-width:1280px!important}.container{max-width:1280px}}@media (min-width:1536px){.\!container{max-width:1536px!important}.container{max-width:1536px}}.mono{font-family:JetBrains Mono,ui-monospace,monospace;font-variant-numeric:tabular-nums}.panel{background:var(--panel);border:1px solid var(--line-soft)}.hairline{box-shadow:inset 0 -1px 0 var(--line-soft)}.dot{border-radius:9999px;display:inline-block;height:7px;width:7px}.dot-online{background:var(--ok);box-shadow:0 0 0 3px color-mix(in oklch,var(--ok),transparent 80%)}.dot-degraded{background:var(--warn);box-shadow:0 0 0 3px color-mix(in oklch,var(--warn),transparent 80%)}.dot-offline{background:var(--off)}.dot-failed{background:var(--bad);box-shadow:0 0 0 3px color-mix(in oklch,var(--bad),transparent 80%)}.pulse{animation:rm-pulse 2.4s ease-in-out infinite}@keyframes rm-pulse{0%,to{box-shadow:0 0 0 3px color-mix(in oklch,var(--accent),transparent 80%)}50%{box-shadow:0 0 0 6px color-mix(in oklch,var(--accent),transparent 92%)}}.btn{align-items:center;background:transparent;border:1px solid var(--line);border-radius:5px;color:var(--ink-mid);cursor:pointer;display:inline-flex;font-size:12px;font-weight:500;gap:6px;padding:6px 11px;text-decoration:none;transition:all .12s ease}.btn:hover{background:var(--panel-hi);color:var(--ink)}.btn:disabled,.btn[disabled]{cursor:not-allowed;opacity:.4;pointer-events:none}.btn-primary{background:var(--accent);border-color:var(--accent);color:oklch(.18 .01 195)}.btn-primary:hover{filter:brightness(1.08)}.btn-ghost,.btn-ghost:hover{border-color:transparent}.btn-ghost:hover{background:var(--panel-hi)}.btn-danger{border-color:color-mix(in oklch,var(--bad),transparent 70%);color:var(--bad)}.btn-danger:hover{background:color-mix(in oklch,var(--bad),transparent 88%);border-color:color-mix(in oklch,var(--bad),transparent 50%);color:oklch(.85 .1 25)}.btn-lg{font-size:13px;padding:9px 14px}.btn-block{justify-content:center;width:100%}.nav-tab{border-bottom:2px solid transparent;color:var(--ink-mute);cursor:pointer;font-size:13px;margin-right:28px;padding:18px 0;text-decoration:none}.nav-tab.active{border-color:var(--accent)}.nav-tab.active,.nav-tab:hover{color:var(--ink)}.sub-tab{border-bottom:1.5px solid transparent;color:var(--ink-mute);cursor:pointer;font-size:13px;margin-right:24px;padding:12px 0;text-decoration:none}.sub-tab.active{border-color:var(--ink);color:var(--ink)}.tag{align-items:center;border:1px solid var(--line);border-radius:3px;display:inline-flex;font-size:11px;gap:5px;letter-spacing:.01em;line-height:1;padding:4px 7px}.field-label,.tag{color:var(--ink-mid)}.field-label{display:block;font-size:12px;margin-bottom:6px}.field-help{color:var(--ink-mute);font-size:12px;line-height:1.55;margin-top:6px}.field{background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;color:var(--ink);font-family:inherit;font-size:13px;outline:none;padding:9px 12px;transition:border-color .12s ease;width:100%}.field:focus{border-color:var(--accent)}.field.invalid{border-color:color-mix(in oklch,var(--bad),transparent 50%)}.field.mono{font-family:JetBrains Mono,monospace;font-size:12px}.field.with-prefix{padding-left:64px}.host-row{align-items:center;border-left:3px solid transparent;-moz-column-gap:18px;column-gap:18px;display:grid;font-size:13px;grid-template-columns:24px 1.4fr .95fr 1.5fr .75fr .7fr .7fr 1.1fr 92px;padding:11px 16px}.host-row.head{color:var(--ink-fade);font-size:11px;letter-spacing:.08em;padding-bottom:10px;padding-top:10px;text-transform:uppercase}.host-row.degraded{border-left-color:color-mix(in oklch,var(--warn),transparent 50%)}.host-row.failed{border-left-color:color-mix(in oklch,var(--bad),transparent 50%)}.host-row.offline{border-left-color:color-mix(in oklch,var(--off),transparent 70%)}.host-row:hover{background:var(--panel-hi)}.host-row.clickable{position:relative}.host-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.host-row.clickable:hover{cursor:pointer}.host-row.clickable>*{pointer-events:none;position:relative;z-index:1}.host-row.clickable>.row-action,.host-row.clickable>.row-link{pointer-events:auto}.src-row{align-items:center;-moz-column-gap:18px;column-gap:18px;display:grid;grid-template-columns:1fr auto;padding:14px 18px}.src-row.clickable{position:relative}.src-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.src-row.clickable:hover{background:var(--panel-hi);cursor:pointer}.src-row.clickable>*{pointer-events:none;position:relative;z-index:1}.src-row.clickable>.row-action,.src-row.clickable>.row-link{pointer-events:auto}.dropdown{display:inline-block;position:relative}.dropdown summary{align-items:center;background:transparent;border:1px solid var(--line);border-radius:5px;color:var(--ink-mid);cursor:pointer;display:inline-flex;font-size:12px;font-weight:500;gap:6px;list-style:none;padding:6px 11px;transition:all .12s ease;-webkit-user-select:none;-moz-user-select:none;user-select:none}.dropdown summary::-webkit-details-marker{display:none}.dropdown summary::marker{content:""}.dropdown summary:hover{background:var(--panel-hi);color:var(--ink)}.dropdown summary .chev{color:var(--ink-fade);font-size:9px;transition:transform .12s ease}.dropdown[open] summary .chev{transform:rotate(180deg)}.dropdown[open] summary{background:var(--panel-hi);color:var(--ink)}.dropdown-menu{background:var(--panel);border:1px solid var(--line);border-radius:6px;box-shadow:0 6px 24px -8px rgba(0,0,0,.55);min-width:220px;padding:4px;position:absolute;right:0;top:calc(100% + 4px);z-index:30}.dropdown-item{border-radius:4px;color:var(--ink-mid);display:block;font-size:12.5px;line-height:1.35;padding:8px 11px;text-decoration:none}.dropdown-item:hover{background:var(--panel-hi);color:var(--ink)}.dropdown-item .label{color:var(--ink);display:block;font-weight:500}.dropdown-item .hint{color:var(--ink-mute);display:block;font-family:JetBrains Mono,ui-monospace,monospace;font-size:11px;margin-top:2px}.snap-row{align-items:center;border-bottom:1px solid var(--line-soft);-moz-column-gap:16px;column-gap:16px;cursor:pointer;display:grid;font-size:13px;grid-template-columns:150px 130px 1fr 90px 130px 80px;padding:11px 14px;transition:background .1s ease}.snap-row:last-child{border-bottom:0}.snap-row:hover{background:var(--panel-hi)}.snap-row.head{color:var(--ink-fade);cursor:default;font-size:11px;letter-spacing:.08em;padding-bottom:9px;padding-top:9px;text-transform:uppercase}.snap-row.head:hover{background:transparent}.schd-row{align-items:center;-moz-column-gap:14px;column-gap:14px;display:grid;font-size:13px;grid-template-columns:78px 1fr 1.6fr 100px 110px auto;padding:12px 18px}.schd-row.head{color:var(--ink-fade);font-size:11px;letter-spacing:.08em;padding-bottom:10px;padding-top:10px;text-transform:uppercase}.schd-row.clickable{position:relative}.schd-row.clickable .row-link{inset:0;overflow:hidden;position:absolute;text-indent:-9999px;z-index:0}.schd-row.clickable:hover{background:var(--panel-hi);cursor:pointer}.schd-row.clickable>*{pointer-events:none;position:relative;z-index:1}.schd-row.clickable>.row-action,.schd-row.clickable>.row-link{pointer-events:auto}.preset-chip{background:var(--bg);border:1px solid var(--line-soft);border-radius:4px;color:var(--ink-mid);cursor:pointer;font-family:JetBrains Mono,monospace;font-size:11.5px;padding:4px 9px;transition:border-color .1s ease,color .1s ease;-webkit-user-select:none;-moz-user-select:none;user-select:none}.preset-chip:hover{border-color:var(--accent);color:var(--ink)}.picker{align-items:center;background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;cursor:pointer;display:flex;font-size:13px;gap:12px;padding:10px 12px;transition:border-color .1s ease,background .1s ease}.picker:hover{border-color:var(--ink-mute)}.picker .check{border:1px solid var(--line);border-radius:3px;display:inline-block;flex-shrink:0;height:14px;position:relative;width:14px}.picker.checked{background:color-mix(in oklch,var(--accent),transparent 92%);border-color:color-mix(in oklch,var(--accent),transparent 50%)}.picker.checked .check{background:var(--accent);border-color:var(--accent)}.picker.checked .check:after{border:solid oklch(.18 .01 195);border-width:0 1.5px 1.5px 0;content:"";height:8px;left:4px;position:absolute;top:1px;transform:rotate(45deg);width:4px}.picker input[type=checkbox]{opacity:0;pointer-events:none;position:absolute}.keep-cell{background:var(--bg);border:1px solid var(--line-soft);border-radius:5px;display:flex;flex-direction:column;gap:4px;padding:9px 11px}.keep-cell label{color:var(--ink-fade);font-size:10.5px;letter-spacing:.08em;text-transform:uppercase}.keep-cell input{background:transparent;border:none;color:var(--ink);font-size:14px;outline:none;padding:0;width:100%}.keep-cell input,.log{font-family:JetBrains Mono,monospace}.log{background:var(--bg);border:1px solid var(--line-soft);border-radius:7px;font-size:12px;line-height:1.7;overflow:hidden}.log-line{align-items:baseline;-moz-column-gap:14px;column-gap:14px;display:grid;grid-template-columns:14ch 8ch 1fr;padding:1px 16px}.log-line:first-child{padding-top:12px}.log-line:last-child{padding-bottom:12px}.log-tag,.log-ts{color:var(--ink-fade)}.log-tag{font-size:10px;letter-spacing:.08em;text-transform:uppercase}.progress-track{background:var(--bg);border:1px solid var(--line-soft);border-radius:9999px;height:6px;overflow:hidden}.progress-fill{background:var(--accent);border-radius:9999px;height:100%;transition:width .25s ease}.progress-fill.ok{background:var(--ok)}.progress-fill.bad{background:var(--bad)}.crumbs{font-size:12px}.crumbs,.crumbs a{color:var(--ink-mute)}.crumbs a{text-decoration:underline;text-decoration-color:var(--line);text-underline-offset:3px}.crumbs .sep{color:var(--ink-fade);margin:0 8px}.snippet{border:1px solid var(--line-soft);border-radius:6px;overflow:hidden}.snippet-head{align-items:center;border-bottom:1px solid var(--line-soft);color:var(--ink-fade);display:flex;font-size:11px;justify-content:space-between;letter-spacing:.1em;padding:10px 14px;text-transform:uppercase}.snippet pre{color:var(--ink-mid);font-family:JetBrains Mono,monospace;font-size:12px;line-height:1.7;margin:0;padding:14px;white-space:pre-wrap;word-break:break-all}.snippet pre .var{color:var(--accent)}.empty-state{background:radial-gradient(ellipse at top,color-mix(in oklch,var(--accent),transparent 95%),transparent 60%),var(--panel);border:1px dashed var(--line);border-radius:8px;padding:60px 40px;text-align:center}.pointer-events-none{pointer-events:none}.fixed{position:fixed}.absolute{position:absolute}.relative{position:relative}.bottom-5{bottom:1.25rem}.left-0{left:0}.right-5{right:1.25rem}.top-0{top:0}.z-50{z-index:50}.col-span-2{grid-column:span 2/span 2}.col-span-3{grid-column:span 3/span 3}.col-span-4{grid-column:span 4/span 4}.col-span-5{grid-column:span 5/span 5}.col-span-7{grid-column:span 7/span 7}.col-span-8{grid-column:span 8/span 8}.col-span-9{grid-column:span 9/span 9}.m-0{margin:0}.mx-2{margin-left:.5rem;margin-right:.5rem}.mx-auto{margin-left:auto;margin-right:auto}.mb-1\.5{margin-bottom:.375rem}.mb-10{margin-bottom:2.5rem}.mb-2{margin-bottom:.5rem}.mb-2\.5{margin-bottom:.625rem}.mb-3{margin-bottom:.75rem}.mb-3\.5{margin-bottom:.875rem}.mb-4{margin-bottom:1rem}.mb-5{margin-bottom:1.25rem}.mb-7{margin-bottom:1.75rem}.ml-1{margin-left:.25rem}.ml-1\.5{margin-left:.375rem}.ml-2{margin-left:.5rem}.ml-2\.5{margin-left:.625rem}.ml-5{margin-left:1.25rem}.ml-auto{margin-left:auto}.mr-1\.5{margin-right:.375rem}.mt-0\.5{margin-top:.125rem}.mt-1{margin-top:.25rem}.mt-1\.5{margin-top:.375rem}.mt-2{margin-top:.5rem}.mt-2\.5{margin-top:.625rem}.mt-20{margin-top:5rem}.mt-3{margin-top:.75rem}.mt-3\.5{margin-top:.875rem}.mt-4{margin-top:1rem}.mt-5{margin-top:1.25rem}.mt-6{margin-top:1.5rem}.mt-7{margin-top:1.75rem}.mt-8{margin-top:2rem}.mt-9{margin-top:2.25rem}.block{display:block}.inline-block{display:inline-block}.inline{display:inline}.flex{display:flex}.inline-flex{display:inline-flex}.table{display:table}.grid{display:grid}.hidden{display:none}.h-3\.5{height:.875rem}.h-\[13px\]{height:13px}.h-\[22px\]{height:22px}.min-h-screen{min-height:100vh}.w-16{width:4rem}.w-3\.5{width:.875rem}.w-\[13px\]{width:13px}.w-\[22px\]{width:22px}.w-\[360px\]{width:360px}.w-full{width:100%}.min-w-0{min-width:0}.max-w-\[1280px\]{max-width:1280px}.max-w-\[440px\]{max-width:440px}.max-w-\[480px\]{max-width:480px}.max-w-\[520px\]{max-width:520px}.max-w-\[580px\]{max-width:580px}.max-w-\[640px\]{max-width:640px}.max-w-\[680px\]{max-width:680px}.max-w-\[720px\]{max-width:720px}.max-w-\[760px\]{max-width:760px}.flex-1{flex:1 1 0%}.flex-none{flex:none}.transform{transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.cursor-help{cursor:help}.cursor-pointer{cursor:pointer}.select-none{-webkit-user-select:none;-moz-user-select:none;user-select:none}.select-all{-webkit-user-select:all;-moz-user-select:all;user-select:all}.resize{resize:both}.list-none{list-style-type:none}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-12{grid-template-columns:repeat(12,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.flex-col{flex-direction:column}.flex-wrap{flex-wrap:wrap}.items-start{align-items:flex-start}.items-end{align-items:flex-end}.items-center{align-items:center}.items-baseline{align-items:baseline}.justify-end{justify-content:flex-end}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.gap-1\.5{gap:.375rem}.gap-2{gap:.5rem}.gap-2\.5{gap:.625rem}.gap-3{gap:.75rem}.gap-3\.5{gap:.875rem}.gap-4{gap:1rem}.gap-5{gap:1.25rem}.gap-6{gap:1.5rem}.gap-8{gap:2rem}.gap-x-4{-moz-column-gap:1rem;column-gap:1rem}.gap-y-2{row-gap:.5rem}.gap-y-2\.5{row-gap:.625rem}.space-y-2>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-bottom:calc(.5rem*var(--tw-space-y-reverse));margin-top:calc(.5rem*(1 - var(--tw-space-y-reverse)))}.space-y-4>:not([hidden])~:not([hidden]){--tw-space-y-reverse:0;margin-bottom:calc(1rem*var(--tw-space-y-reverse));margin-top:calc(1rem*(1 - var(--tw-space-y-reverse)))}.overflow-hidden,.truncate{overflow:hidden}.truncate{text-overflow:ellipsis}.truncate,.whitespace-nowrap{white-space:nowrap}.text-pretty{text-wrap:pretty}.break-all{word-break:break-all}.rounded-\[3px\]{border-radius:3px}.rounded-\[5px\]{border-radius:5px}.rounded-\[6px\]{border-radius:6px}.rounded-\[7px\]{border-radius:7px}.rounded-\[8px\]{border-radius:8px}.rounded-full{border-radius:9999px}.border{border-width:1px}.border-y{border-top-width:1px}.border-b,.border-y{border-bottom-width:1px}.border-l{border-left-width:1px}.border-t{border-top-width:1px}.border-line{border-color:oklch(.27 .01 250)}.border-line-soft{border-color:oklch(.23 .008 250)}.bg-bg{background-color:oklch(.17 .006 250)}.bg-panel{background-color:oklch(.2 .007 250)}.p-0{padding:0}.p-2{padding:.5rem}.p-3\.5{padding:.875rem}.p-4{padding:1rem}.p-5{padding:1.25rem}.p-7{padding:1.75rem}.p-\[18px\]{padding:18px}.px-1{padding-left:.25rem;padding-right:.25rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-2\.5{padding-left:.625rem;padding-right:.625rem}.px-3{padding-left:.75rem;padding-right:.75rem}.px-3\.5{padding-left:.875rem;padding-right:.875rem}.px-4{padding-left:1rem;padding-right:1rem}.px-7{padding-left:1.75rem;padding-right:1.75rem}.px-8{padding-left:2rem;padding-right:2rem}.px-\[18px\]{padding-left:18px;padding-right:18px}.py-0\.5{padding-bottom:.125rem;padding-top:.125rem}.py-1{padding-bottom:.25rem;padding-top:.25rem}.py-1\.5{padding-bottom:.375rem;padding-top:.375rem}.py-12{padding-bottom:3rem;padding-top:3rem}.py-2{padding-bottom:.5rem;padding-top:.5rem}.py-2\.5{padding-bottom:.625rem;padding-top:.625rem}.py-3{padding-bottom:.75rem;padding-top:.75rem}.py-3\.5{padding-bottom:.875rem;padding-top:.875rem}.py-4{padding-bottom:1rem;padding-top:1rem}.py-5{padding-bottom:1.25rem;padding-top:1.25rem}.py-6{padding-bottom:1.5rem;padding-top:1.5rem}.py-7{padding-bottom:1.75rem;padding-top:1.75rem}.py-8{padding-bottom:2rem;padding-top:2rem}.py-\[14px\]{padding-bottom:14px;padding-top:14px}.py-\[5px\]{padding-bottom:5px;padding-top:5px}.pb-14{padding-bottom:3.5rem}.pb-2{padding-bottom:.5rem}.pb-24{padding-bottom:6rem}.pb-3{padding-bottom:.75rem}.pb-4{padding-bottom:1rem}.pb-\[18px\]{padding-bottom:18px}.pl-5{padding-left:1.25rem}.pl-6{padding-left:1.5rem}.pl-9{padding-left:2.25rem}.pt-0\.5{padding-top:.125rem}.pt-1{padding-top:.25rem}.pt-14{padding-top:3.5rem}.pt-4{padding-top:1rem}.pt-5{padding-top:1.25rem}.pt-6{padding-top:1.5rem}.pt-7{padding-top:1.75rem}.pt-9{padding-top:2.25rem}.pt-\[1px\]{padding-top:1px}.text-center{text-align:center}.text-right{text-align:right}.text-2xl{font-size:1.5rem;line-height:2rem}.text-\[10\.5px\]{font-size:10.5px}.text-\[10px\]{font-size:10px}.text-\[11\.5px\]{font-size:11.5px}.text-\[11px\]{font-size:11px}.text-\[12\.5px\]{font-size:12.5px}.text-\[12px\]{font-size:12px}.text-\[13px\]{font-size:13px}.text-\[14px\]{font-size:14px}.text-\[16px\]{font-size:16px}.text-\[18px\]{font-size:18px}.text-\[19px\]{font-size:19px}.text-\[20px\]{font-size:20px}.text-\[22px\]{font-size:22px}.text-\[26px\]{font-size:26px}.text-\[28px\]{font-size:28px}.text-base{font-size:1rem;line-height:1.5rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xs{font-size:.75rem;line-height:1rem}.font-medium{font-weight:500}.font-normal{font-weight:400}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.normal-case{text-transform:none}.italic{font-style:italic}.leading-\[1\.55\]{line-height:1.55}.leading-\[1\.5\]{line-height:1.5}.leading-\[1\.65\]{line-height:1.65}.leading-\[1\.6\]{line-height:1.6}.leading-\[1\.7\]{line-height:1.7}.leading-\[20px\]{line-height:20px}.leading-none{line-height:1}.tracking-\[-0\.005em\]{letter-spacing:-.005em}.tracking-\[-0\.012em\]{letter-spacing:-.012em}.tracking-\[-0\.01em\]{letter-spacing:-.01em}.tracking-\[-0\.02em\]{letter-spacing:-.02em}.tracking-\[0\.005em\]{letter-spacing:.005em}.tracking-\[0\.01em\]{letter-spacing:.01em}.tracking-\[0\.02em\]{letter-spacing:.02em}.tracking-\[0\.08em\]{letter-spacing:.08em}.tracking-\[0\.1em\]{letter-spacing:.1em}.text-accent{color:oklch(.82 .12 195)}.text-bad{color:oklch(.7 .2 25)}.text-ink{color:oklch(.96 .005 250)}.text-ink-fade{color:oklch(.42 .006 250)}.text-ink-mid{color:oklch(.78 .005 250)}.text-ink-mute{color:oklch(.58 .006 250)}.text-ok{color:oklch(.78 .14 155)}.text-warn{color:oklch(.82 .13 80)}.underline{text-decoration-line:underline}.no-underline{text-decoration-line:none}.decoration-line{text-decoration-color:oklch(.27 .01 250)}.underline-offset-4{text-underline-offset:4px}.opacity-40{opacity:.4}.transition{transition-duration:.15s;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,-webkit-backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter;transition-property:color,background-color,border-color,text-decoration-color,fill,stroke,opacity,box-shadow,transform,filter,backdrop-filter,-webkit-backdrop-filter;transition-timing-function:cubic-bezier(.4,0,.2,1)}.hover\:text-ink-mid:hover{color:oklch(.78 .005 250)} diff --git a/web/styles/input.css b/web/styles/input.css index 7f0ff24..dfa34f6 100644 --- a/web/styles/input.css +++ b/web/styles/input.css @@ -206,6 +206,78 @@ .src-row.clickable > .row-link { pointer-events: auto; } .src-row.clickable > .row-action { pointer-events: auto; } + /* ---------- dropdown menu (header actions) ---------- + * Uses native
for keyboard + no-JS support. + * The summary is styled like a .btn, the panel sits absolute below. + * Click-outside-to-close handled by CSS via :has() — no JS. + */ + .dropdown { position: relative; display: inline-block; } + .dropdown summary { + list-style: none; cursor: pointer; + /* match .btn shape */ + font-size: 12px; font-weight: 500; + padding: 6px 11px; border-radius: 5px; + background: transparent; + border: 1px solid var(--line); + color: var(--ink-mid); + transition: all 120ms ease; + display: inline-flex; align-items: center; gap: 6px; + user-select: none; + } + .dropdown summary::-webkit-details-marker { display: none; } + .dropdown summary::marker { content: ""; } + .dropdown summary:hover { background: var(--panel-hi); color: var(--ink); } + .dropdown summary .chev { + font-size: 9px; color: var(--ink-fade); + transition: transform 120ms ease; + } + .dropdown[open] summary .chev { transform: rotate(180deg); } + .dropdown[open] summary { background: var(--panel-hi); color: var(--ink); } + .dropdown-menu { + position: absolute; top: calc(100% + 4px); right: 0; + z-index: 30; + min-width: 220px; + background: var(--panel); + border: 1px solid var(--line); + border-radius: 6px; + box-shadow: 0 6px 24px -8px rgba(0,0,0,0.55); + padding: 4px; + } + .dropdown-item { + display: block; + padding: 8px 11px; + border-radius: 4px; + text-decoration: none; + color: var(--ink-mid); + font-size: 12.5px; + line-height: 1.35; + } + .dropdown-item:hover { background: var(--panel-hi); color: var(--ink); } + .dropdown-item .label { display: block; color: var(--ink); font-weight: 500; } + .dropdown-item .hint { + display: block; font-size: 11px; color: var(--ink-mute); margin-top: 2px; + font-family: 'JetBrains Mono', ui-monospace, monospace; + } + + /* ---------- snapshot picker rows (Restore wizard step 1) ---------- */ + .snap-row { + display: grid; align-items: center; + grid-template-columns: 150px 130px 1fr 90px 130px 80px; + column-gap: 16px; + padding: 11px 14px; font-size: 13px; + border-bottom: 1px solid var(--line-soft); + cursor: pointer; + transition: background 100ms ease; + } + .snap-row:last-child { border-bottom: 0; } + .snap-row:hover { background: var(--panel-hi); } + .snap-row.head { + font-size: 11px; color: var(--ink-fade); + text-transform: uppercase; letter-spacing: 0.08em; + padding-top: 9px; padding-bottom: 9px; cursor: default; + } + .snap-row.head:hover { background: transparent; } + /* ---------- schedule rows (Schedules tab) ---------- */ .schd-row { display: grid; align-items: center; diff --git a/web/templates/pages/host_detail.html b/web/templates/pages/host_detail.html index 1ccf63b..9d87f47 100644 --- a/web/templates/pages/host_detail.html +++ b/web/templates/pages/host_detail.html @@ -35,8 +35,10 @@
Snapshot id
Time
Paths
-
Size
-
Files
+
Size
+
Files
@@ -51,7 +53,7 @@ {{if eq $s.FileCount 0}}{{else}}{{comma $s.FileCount}}{{end}}
{{end}} @@ -76,6 +78,35 @@

+
+
Restore
+

+ Pick a snapshot, choose paths, dispatch. Live progress streams once the + agent starts. +

+ Restore from snapshot… +
+ + {{if gt $host.SnapshotCount 1}} +
+
Compare snapshots
+

+ Diff two snapshots to see what changed. Output streams to a live + job page like a regular run. +

+
+ + + +
+
+ {{end}} +
Danger zone

diff --git a/web/templates/pages/host_restore.html b/web/templates/pages/host_restore.html new file mode 100644 index 0000000..0f6fed6 --- /dev/null +++ b/web/templates/pages/host_restore.html @@ -0,0 +1,380 @@ +{{define "title"}}{{.Title}}{{end}} + +{{define "content"}} +{{template "host_chrome" .}} +{{$page := .Page}} +{{$host := $page.Host}} +

+ +
+
+

Restore from snapshot

+
+ Pick a snapshot, choose paths, decide where files go, then dispatch. + Live progress streams to a job page once you start. +
+
+
+ Cancel +
+
+ + {{if $page.Error}} +
+ {{$page.Error}} +
+ {{end}} + +
+ + {{/* ============ STEP 1 — snapshot picker ============ */}} +
+
+
+ {{if $page.Selected}} + + {{else}} + 1 + {{end}} +
+
Snapshot
+
Pick the point-in-time you want to restore from.
+
+
+ step 1 of 4 +
+
+ {{if $page.Selected}} + {{/* selected summary card */}} +
+ {{$page.Selected.ShortID}} +
+
{{$page.Selected.Time.Format "2006-01-02 15:04 MST"}} ·{{relTime $page.Selected.Time}}
+
+ {{range $page.Selected.Tags}}{{.}}{{end}} + paths: + {{range $i, $p := $page.Selected.Paths}}{{if $i}}, {{end}}{{$p}}{{end}} + {{if $page.Selected.SizeBytes}} · {{bytes $page.Selected.SizeBytes}}{{end}} +
+
+ picked from {{len $page.Snapshots}} snapshots + Change +
+ + {{else}} + {{/* full picker table */}} +
+
+
Time
+
Tag
+
Paths
+
Size
+
Snapshot ID
+
+
+ {{if not $page.Snapshots}} +
No snapshots yet. Run a backup first.
+ {{end}} + {{range $page.Snapshots}} + +
{{relTime .Time}}
+
{{range .Tags}}{{.}}{{end}}
+
+ {{range $i, $p := .Paths}}{{if $i}}, {{end}}{{$p}}{{end}} +
+
{{if .SizeBytes}}{{bytes .SizeBytes}}{{else}}—{{end}}
+
{{.ShortID}}
+
+
+ {{end}} +
+ {{end}} +
+
+ + {{/* ============ STEP 2 — paths (tree browser) ============ */}} +
+
+
+ 2 +
+
Paths
+
Tick files and directories to restore. Folders restore recursively.
+
+
+ step 2 of 4 +
+
+ {{if $page.Selected}} +
+ {{/* Root tree node — fetched on first wizard render; child + expansions reuse the same tree.list cache server-side. */}} +
+
loading…
+
+
+ +
+ 0 files selected + · + tick a file or directory above +
+ {{else}} +
Pick a snapshot above to load its paths.
+ {{end}} +
+
+ + {{/* ============ STEP 3 — target ============ */}} +
+
+
+ 3 +
+
Target
+
Where should the files land? Defaults to a fresh, isolated directory.
+
+
+ step 3 of 4 +
+
+
+ + + +
+
+
+ + {{/* ============ STEP 4 — confirm ============ */}} +
+
+
+ 4 +
+
Confirm & start
+
Final review. Logs and progress will stream live.
+
+
+ step 4 of 4 +
+
+
A summary will appear here once you've made your selections.
+
+
+ + {{/* sticky-style action bar */}} +
+
+ Audit row host.restore will be written on dispatch. +
+
+ Back + +
+
+
+
+ +{{/* Lightweight JS to drive the live tally + summary card + tree toggle. + The tree-toggle is plain fetch (not HTMX) so its target lookup is + trivial — the .tree-children div is always the next sibling + inside the same .tree-pair wrapper. */}} + + +{{end}} diff --git a/web/templates/pages/job_detail.html b/web/templates/pages/job_detail.html index 8ba18af..3caa637 100644 --- a/web/templates/pages/job_detail.html +++ b/web/templates/pages/job_detail.html @@ -63,6 +63,22 @@
+ {{if $page.IsActive}}
+ {{/* ---------- progress (running only) ---------- */}} {{if $page.IsActive}} -
+
-
- +
+
@@ -86,6 +112,12 @@
+ {{if eq (printf "%s" $job.Kind) "restore"}} +
+ Current + +
+ {{end}}
{{end}} @@ -194,6 +226,18 @@ return (i === 0 ? n.toFixed(0) : n.toFixed(1)) + ' ' + u[i]; } + const currentFileEl = document.getElementById('restore-current-file'); + function maybeUpdateCurrent(p) { + // Restore-specific: surface the most recent stdout path in the + // "Current" slot. Restic restore --json prints per-file lines on + // stdout (no JSON wrapper) so any line starting with "/" is a + // good candidate. + if (!currentFileEl || p.stream !== 'stdout') return; + const v = (p.payload || '').trim(); + if (v.startsWith('/') && v.length < 400) { + currentFileEl.textContent = v; + } + } function appendLine(p) { // Drop the "awaiting" placeholder once real lines arrive. if (stream.children.length === 1 && stream.firstElementChild.textContent.includes('awaiting agent')) { @@ -208,6 +252,7 @@ `${escapeHtml(p.payload)}`; stream.appendChild(line); if (autoScroll) container.scrollTop = container.scrollHeight; + maybeUpdateCurrent(p); } ws.onmessage = (ev) => { diff --git a/web/templates/partials/host_chrome.html b/web/templates/partials/host_chrome.html index 9e3f741..41785e1 100644 --- a/web/templates/partials/host_chrome.html +++ b/web/templates/partials/host_chrome.html @@ -121,6 +121,26 @@
{{end}} + {{/* ---------- latest restore line (P3-X3) ---------- */}} + {{if $page.RestoreStatus}} +
+ {{if eq $page.RestoreStatus "succeeded"}} + last restore · succeeded {{relTime $page.RestoreAt}} · + job log → + {{else if eq $page.RestoreStatus "failed"}} + last restore · failed {{relTime $page.RestoreAt}} · + job log → + {{else if eq $page.RestoreStatus "running"}} + restore running… · live log → + {{else if eq $page.RestoreStatus "cancelled"}} + last restore · cancelled {{relTime $page.RestoreAt}} · + job log → + {{else if eq $page.RestoreStatus "queued"}} + restore queued · job {{$page.RestoreJobID}} + {{end}} +
+ {{end}} + {{/* ---------- secondary tabs ---------- */}}
Snapshots {{comma $host.SnapshotCount}} diff --git a/web/templates/partials/tree_node.html b/web/templates/partials/tree_node.html new file mode 100644 index 0000000..a3fb45a --- /dev/null +++ b/web/templates/partials/tree_node.html @@ -0,0 +1,39 @@ +{{define "tree_node"}} +{{$page := .Page}} +{{if $page.Error}} +
error: {{$page.Error}}
+{{else}} +
+ {{$page.Path}} + {{if not $page.Children}} + empty directory + {{end}} +
+ {{range $page.Children}} +
+
+ {{if .IsDir}} + + {{else}} + · + {{end}} + + {{.Name}}{{if .IsDir}}/{{end}} + + {{if not .IsDir}}{{if .Size}}{{bytes .Size}}{{else}}—{{end}}{{end}} +
+ {{if .IsDir}} + + {{end}} +
+ {{end}} +{{end}} +{{end}}