Merge pull request 'Phase 3 — Restore (P3-X1, X2, 01, 02, 03, 09, X3-X6)' (#6) from p3-restore into main
Reviewed-on: #6
This commit was merged in pull request #6.
This commit is contained in:
+1
-1
@@ -26,7 +26,7 @@ linters:
|
||||
- name: exported
|
||||
arguments: ["disableStutteringCheck"]
|
||||
misspell:
|
||||
locale: US
|
||||
locale: UK
|
||||
exclusions:
|
||||
rules:
|
||||
- path: _test\.go
|
||||
|
||||
+196
-38
@@ -136,6 +136,7 @@ func run() error {
|
||||
|
||||
d := &dispatcher{
|
||||
resticBin: resticBin,
|
||||
resticVer: snap.ResticVersion,
|
||||
secrets: sec,
|
||||
scheduler: scheduler.New(),
|
||||
}
|
||||
@@ -200,6 +201,7 @@ func openSecretsStore(cfg *config.Config) (*secrets.Store, error) {
|
||||
// so a job dispatched in the same session sees the latest values.
|
||||
type dispatcher struct {
|
||||
resticBin string
|
||||
resticVer string // e.g. "0.17.1"; empty if restic isn't installed yet
|
||||
secrets *secrets.Store
|
||||
scheduler *scheduler.Scheduler
|
||||
|
||||
@@ -210,6 +212,45 @@ type dispatcher struct {
|
||||
bwMu sync.Mutex
|
||||
bwUpKBps int
|
||||
bwDownKBps int
|
||||
|
||||
// Per-running-job cancellation handles. Populated when runJob
|
||||
// spawns the goroutine, removed when it returns. Looked up by
|
||||
// the command.cancel handler (server → agent) to abort an
|
||||
// in-flight restic invocation.
|
||||
cancelMu sync.Mutex
|
||||
cancels map[string]context.CancelFunc
|
||||
}
|
||||
|
||||
// trackJob registers a cancel func for an in-flight job and returns a
|
||||
// cleanup that removes it. Call cleanup when the job goroutine exits
|
||||
// regardless of outcome — runs even on panic.
|
||||
func (d *dispatcher) trackJob(jobID string, cancel context.CancelFunc) func() {
|
||||
d.cancelMu.Lock()
|
||||
if d.cancels == nil {
|
||||
d.cancels = make(map[string]context.CancelFunc)
|
||||
}
|
||||
d.cancels[jobID] = cancel
|
||||
d.cancelMu.Unlock()
|
||||
return func() {
|
||||
d.cancelMu.Lock()
|
||||
delete(d.cancels, jobID)
|
||||
d.cancelMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// cancelJob fires the cancel func for jobID if there is one and
|
||||
// returns whether the job was actually known. The runner is expected
|
||||
// to surface the resulting context.Canceled as a JobCancelled status
|
||||
// in its job.finished envelope (see runner.sendFinished).
|
||||
func (d *dispatcher) cancelJob(jobID string) bool {
|
||||
d.cancelMu.Lock()
|
||||
cancel, ok := d.cancels[jobID]
|
||||
d.cancelMu.Unlock()
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
cancel()
|
||||
return true
|
||||
}
|
||||
|
||||
func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.Sender) error {
|
||||
@@ -222,8 +263,29 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
|
||||
return d.runJob(ctx, p, tx)
|
||||
|
||||
case api.MsgCommandCancel:
|
||||
// TODO(P2): cancellation requires keeping a job→cancelFunc map.
|
||||
slog.Info("ws agent: command.cancel received (cancellation lands in P2)", "id", env.ID)
|
||||
var p api.CommandCancelPayload
|
||||
if err := env.UnmarshalPayload(&p); err != nil {
|
||||
return fmt.Errorf("command.cancel: %w", err)
|
||||
}
|
||||
if d.cancelJob(p.JobID) {
|
||||
slog.Info("ws agent: command.cancel applied", "job_id", p.JobID)
|
||||
} else {
|
||||
// Job already finished or was never seen on this agent.
|
||||
// Not an error — operator may have raced cancel against
|
||||
// natural completion. Server-side state is authoritative.
|
||||
slog.Info("ws agent: command.cancel for unknown job (already finished?)", "job_id", p.JobID)
|
||||
}
|
||||
|
||||
case api.MsgTreeList:
|
||||
// Synchronous RPC for the restore wizard's tree browser. The
|
||||
// server has serialised access; we just run restic ls and reply
|
||||
// with the same envelope ID. Run in a goroutine so the WS read
|
||||
// loop keeps draining.
|
||||
var p api.TreeListRequestPayload
|
||||
if err := env.UnmarshalPayload(&p); err != nil {
|
||||
return fmt.Errorf("tree.list: %w", err)
|
||||
}
|
||||
go d.handleTreeList(ctx, env.ID, p, tx)
|
||||
|
||||
case api.MsgScheduleSet:
|
||||
var p api.ScheduleSetPayload
|
||||
@@ -332,6 +394,72 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleTreeList runs `restic ls --json <snapshot> <path>` and ships
|
||||
// the matching tree.list.result envelope back, correlated by the
|
||||
// request envelope's ID. Errors (missing creds, restic failure)
|
||||
// surface in the result's Error field rather than as transport-level
|
||||
// failures so the server-side waiter can render a sensible message.
|
||||
func (d *dispatcher) handleTreeList(ctx context.Context, reqID string, p api.TreeListRequestPayload, tx wsclient.Sender) {
|
||||
reply := func(result api.TreeListResultPayload) {
|
||||
result.SnapshotID = p.SnapshotID
|
||||
result.Path = p.Path
|
||||
env, err := api.Marshal(api.MsgTreeListResult, reqID, result)
|
||||
if err != nil {
|
||||
slog.Warn("ws agent: marshal tree.list.result", "err", err)
|
||||
return
|
||||
}
|
||||
_ = tx.Send(env)
|
||||
}
|
||||
|
||||
if d.resticBin == "" {
|
||||
reply(api.TreeListResultPayload{Error: "restic binary not located on this agent"})
|
||||
return
|
||||
}
|
||||
creds, err := d.secrets.Load()
|
||||
if err != nil {
|
||||
reply(api.TreeListResultPayload{Error: "load credentials: " + err.Error()})
|
||||
return
|
||||
}
|
||||
if creds.Empty() {
|
||||
reply(api.TreeListResultPayload{Error: "repo credentials not configured"})
|
||||
return
|
||||
}
|
||||
|
||||
d.bwMu.Lock()
|
||||
upKBps, downKBps := d.bwUpKBps, d.bwDownKBps
|
||||
d.bwMu.Unlock()
|
||||
|
||||
env := restic.Env{
|
||||
Bin: d.resticBin,
|
||||
RepoURL: creds.URL,
|
||||
RepoUsername: creds.Username,
|
||||
RepoPassword: creds.Password,
|
||||
LimitUploadKBps: upKBps,
|
||||
LimitDownloadKBps: downKBps,
|
||||
}
|
||||
|
||||
// 60s ceiling matches snapshots/stats — restic ls on a single
|
||||
// directory is normally sub-second; if the repo is unreachable we
|
||||
// want to surface the failure rather than block the wizard.
|
||||
listCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
entries, err := env.ListTreeChildren(listCtx, p.SnapshotID, p.Path)
|
||||
if err != nil {
|
||||
reply(api.TreeListResultPayload{Error: err.Error()})
|
||||
return
|
||||
}
|
||||
apiEntries := make([]api.TreeListEntry, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
apiEntries = append(apiEntries, api.TreeListEntry{
|
||||
Name: e.Name,
|
||||
Type: e.Type,
|
||||
Size: e.Size,
|
||||
})
|
||||
}
|
||||
reply(api.TreeListResultPayload{Entries: apiEntries})
|
||||
}
|
||||
|
||||
// runJob spawns a runner for one job. We launch a goroutine so the
|
||||
// WS read loop keeps draining messages while restic chugs along.
|
||||
func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsclient.Sender) error {
|
||||
@@ -367,6 +495,7 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
|
||||
|
||||
r := runner.New(runner.Config{
|
||||
ResticBin: d.resticBin,
|
||||
ResticVersion: d.resticVer,
|
||||
RepoURL: creds.URL,
|
||||
RepoUsername: creds.Username,
|
||||
RepoPassword: creds.Password,
|
||||
@@ -374,6 +503,25 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
|
||||
LimitDownloadKBps: downKBps,
|
||||
}, tx, time.Second)
|
||||
|
||||
// spawn wraps the kind-specific goroutine: derives a per-job
|
||||
// cancellable context from the connection-scoped ctx, registers
|
||||
// the cancel func so command.cancel can fire it, deregisters on
|
||||
// completion. Per-job ctx means canceling one job doesn't kill
|
||||
// any other in-flight invocations.
|
||||
spawn := func(name string, fn func(ctx context.Context) error) {
|
||||
jobCtx, cancel := context.WithCancel(ctx)
|
||||
cleanup := d.trackJob(p.JobID, cancel)
|
||||
go func() {
|
||||
defer cleanup()
|
||||
defer cancel() // release ctx resources on goroutine exit
|
||||
if err := fn(jobCtx); err != nil {
|
||||
slog.Warn("agent: "+name+" job failed", "job_id", p.JobID, "err", err)
|
||||
return
|
||||
}
|
||||
slog.Info("agent: "+name+" job complete", "job_id", p.JobID)
|
||||
}()
|
||||
}
|
||||
|
||||
switch p.Kind {
|
||||
case api.JobBackup:
|
||||
// Includes/Excludes/Tag come from the source group resolved
|
||||
@@ -391,22 +539,14 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
|
||||
slog.Info("agent: accepting backup job",
|
||||
"job_id", p.JobID, "paths", paths, "excludes", p.Excludes, "tag", p.Tag)
|
||||
hooks := runner.BackupHooks{Pre: p.PreHook, Post: p.PostHook}
|
||||
go func() {
|
||||
if err := r.RunBackup(ctx, p.JobID, paths, p.Excludes, tags, hooks); err != nil {
|
||||
slog.Warn("agent: backup job failed", "job_id", p.JobID, "err", err)
|
||||
return
|
||||
}
|
||||
slog.Info("agent: backup job complete", "job_id", p.JobID)
|
||||
}()
|
||||
spawn("backup", func(jobCtx context.Context) error {
|
||||
return r.RunBackup(jobCtx, p.JobID, paths, p.Excludes, tags, hooks)
|
||||
})
|
||||
case api.JobInit:
|
||||
slog.Info("agent: accepting init job", "job_id", p.JobID)
|
||||
go func() {
|
||||
if err := r.RunInit(ctx, p.JobID); err != nil {
|
||||
slog.Warn("agent: init job failed", "job_id", p.JobID, "err", err)
|
||||
return
|
||||
}
|
||||
slog.Info("agent: init job complete", "job_id", p.JobID)
|
||||
}()
|
||||
spawn("init", func(jobCtx context.Context) error {
|
||||
return r.RunInit(jobCtx, p.JobID)
|
||||
})
|
||||
case api.JobForget:
|
||||
if len(p.ForgetGroups) == 0 {
|
||||
// Hard-error rather than fall back to a single-policy form:
|
||||
@@ -433,13 +573,9 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
|
||||
})
|
||||
}
|
||||
slog.Info("agent: accepting forget job", "job_id", p.JobID, "groups", len(groups))
|
||||
go func() {
|
||||
if err := r.RunForget(ctx, p.JobID, groups); err != nil {
|
||||
slog.Warn("agent: forget job failed", "job_id", p.JobID, "err", err)
|
||||
return
|
||||
}
|
||||
slog.Info("agent: forget job complete", "job_id", p.JobID)
|
||||
}()
|
||||
spawn("forget", func(jobCtx context.Context) error {
|
||||
return r.RunForget(jobCtx, p.JobID, groups)
|
||||
})
|
||||
case api.JobPrune:
|
||||
// Prune may require admin creds (delete authority on rest-server).
|
||||
runCreds := creds
|
||||
@@ -455,6 +591,7 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
|
||||
}
|
||||
prr := runner.New(runner.Config{
|
||||
ResticBin: d.resticBin,
|
||||
ResticVersion: d.resticVer,
|
||||
RepoURL: runCreds.URL,
|
||||
RepoUsername: runCreds.Username,
|
||||
RepoPassword: runCreds.Password,
|
||||
@@ -462,29 +599,50 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
|
||||
LimitDownloadKBps: downKBps,
|
||||
}, tx, time.Second)
|
||||
slog.Info("agent: accepting prune job", "job_id", p.JobID, "admin_creds", p.RequiresAdminCreds)
|
||||
go func() {
|
||||
if err := prr.RunPrune(ctx, p.JobID); err != nil {
|
||||
slog.Warn("agent: prune job failed", "job_id", p.JobID, "err", err)
|
||||
}
|
||||
}()
|
||||
spawn("prune", func(jobCtx context.Context) error {
|
||||
return prr.RunPrune(jobCtx, p.JobID)
|
||||
})
|
||||
case api.JobCheck:
|
||||
subset := 0
|
||||
if len(p.Args) > 0 {
|
||||
subset, _ = strconv.Atoi(p.Args[0])
|
||||
}
|
||||
slog.Info("agent: accepting check job", "job_id", p.JobID, "subset_pct", subset)
|
||||
go func() {
|
||||
if err := r.RunCheck(ctx, p.JobID, subset); err != nil {
|
||||
slog.Warn("agent: check job failed", "job_id", p.JobID, "err", err)
|
||||
}
|
||||
}()
|
||||
spawn("check", func(jobCtx context.Context) error {
|
||||
return r.RunCheck(jobCtx, p.JobID, subset)
|
||||
})
|
||||
case api.JobUnlock:
|
||||
slog.Info("agent: accepting unlock job", "job_id", p.JobID)
|
||||
go func() {
|
||||
if err := r.RunUnlock(ctx, p.JobID); err != nil {
|
||||
slog.Warn("agent: unlock job failed", "job_id", p.JobID, "err", err)
|
||||
}
|
||||
}()
|
||||
spawn("unlock", func(jobCtx context.Context) error {
|
||||
return r.RunUnlock(jobCtx, p.JobID)
|
||||
})
|
||||
case api.JobRestore:
|
||||
if p.Restore == nil {
|
||||
return fmt.Errorf("restore: command.run carried no restore payload")
|
||||
}
|
||||
rp := *p.Restore
|
||||
if rp.SnapshotID == "" {
|
||||
return fmt.Errorf("restore: snapshot_id is required")
|
||||
}
|
||||
if !rp.InPlace && rp.TargetDir == "" {
|
||||
return fmt.Errorf("restore: target_dir required for non-in-place restore")
|
||||
}
|
||||
slog.Info("agent: accepting restore job",
|
||||
"job_id", p.JobID, "snapshot_id", rp.SnapshotID,
|
||||
"paths", rp.Paths, "in_place", rp.InPlace, "target", rp.TargetDir)
|
||||
spawn("restore", func(jobCtx context.Context) error {
|
||||
return r.RunRestore(jobCtx, p.JobID, rp.SnapshotID, rp.Paths, rp.InPlace, rp.TargetDir)
|
||||
})
|
||||
case api.JobDiff:
|
||||
if p.Diff == nil || p.Diff.SnapshotA == "" || p.Diff.SnapshotB == "" {
|
||||
return fmt.Errorf("diff: command.run carried incomplete diff payload")
|
||||
}
|
||||
dp := *p.Diff
|
||||
slog.Info("agent: accepting diff job",
|
||||
"job_id", p.JobID, "a", dp.SnapshotA, "b", dp.SnapshotB)
|
||||
spawn("diff", func(jobCtx context.Context) error {
|
||||
return r.RunDiff(jobCtx, p.JobID, dp.SnapshotA, dp.SnapshotB)
|
||||
})
|
||||
default:
|
||||
return fmt.Errorf("kind %q not implemented yet (Phase 2 lands the rest)", p.Kind)
|
||||
}
|
||||
|
||||
@@ -49,6 +49,13 @@ detect_arch() {
|
||||
ensure_dirs() {
|
||||
install -d -m 0700 -o root -g root "$RM_CONFIG_DIR"
|
||||
install -d -m 0700 -o root -g root "$RM_STATE_DIR"
|
||||
# Default new-directory restore target: $HOME/rm-restore. Pre-create
|
||||
# so the systemd unit's ReadWritePaths bind-mount applies cleanly
|
||||
# (paths that don't exist when systemd starts get a soft-fail
|
||||
# because of the '-' prefix, but the agent then can't mkdir into
|
||||
# the read-only /root). Mode 0700 + root-owned matches the threat
|
||||
# model — files restored here are operator-readable as root.
|
||||
install -d -m 0700 -o root -g root /root/rm-restore
|
||||
}
|
||||
|
||||
detect_existing_schedulers() {
|
||||
|
||||
@@ -37,7 +37,12 @@ AmbientCapabilities=CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_CHOWN
|
||||
# needs. Filesystem reads stay open: that's the whole job.
|
||||
NoNewPrivileges=true
|
||||
ProtectSystem=strict
|
||||
ReadWritePaths=/etc/restic-manager /var/lib/restic-manager
|
||||
# /etc/restic-manager: agent.yaml + secrets.enc.
|
||||
# /var/lib/restic-manager: agent state (currently unused but reserved).
|
||||
# /root/rm-restore: default target for new-directory restores
|
||||
# ($HOME/rm-restore/<job-id>/ resolves here for User=root).
|
||||
# ReadWritePaths overrides ProtectHome=read-only on this subdir only.
|
||||
ReadWritePaths=/etc/restic-manager /var/lib/restic-manager -/root/rm-restore
|
||||
ProtectHome=read-only
|
||||
ProtectHostname=true
|
||||
ProtectKernelTunables=true
|
||||
|
||||
@@ -0,0 +1,342 @@
|
||||
# P3 — Restore (design)
|
||||
|
||||
> Phase 3 sub-spec covering single-host restore (P3-01, P3-02, P3-03, P3-09).
|
||||
> P3-04 (cross-host restore) is deferred to a new "Future / unscheduled"
|
||||
> section in `tasks.md` — disaster recovery is already covered by re-enrolling
|
||||
> a replacement host with the same repo credentials.
|
||||
>
|
||||
> Wireframe: `_diag/p3-restore-wizard/wireframe.html`. Screenshot:
|
||||
> `_diag/p3-restore-wizard/01-full-wizard.png`.
|
||||
|
||||
## Scope locked
|
||||
|
||||
Brainstorm decisions (in order asked):
|
||||
|
||||
1. **In-place vs new-directory.** Default is a new directory under
|
||||
`/var/restic-restore/<job-id>/`. An "Restore in place (overwrite original
|
||||
paths)" toggle is gated by typed-confirmation of the host name, mirroring
|
||||
the repo re-init pattern.
|
||||
2. **Path-selection granularity.** Tree browser as the path selector, lazy-
|
||||
loaded via `restic ls --json <snapshot> <path>` per directory expansion.
|
||||
3. **Cross-host restore (P3-04).** Out of scope this phase. Move to
|
||||
"Future / unscheduled" in `tasks.md`. The disaster-recovery case is covered
|
||||
by the standard enrolment flow: stand up a replacement host, paste the
|
||||
original repo creds at enrolment, snapshots reappear, restore is
|
||||
same-host.
|
||||
4. **Snapshot diff (P3-09).** Diff-as-a-job. New `JobDiff` JobKind dispatched
|
||||
like every other agent operation. Output streams as `log.stream` and
|
||||
renders on the live job log page.
|
||||
5. **Wizard entry points.** Top-level "Restore" button on host detail
|
||||
(`/hosts/{id}/restore`, opens wizard at step 1) plus a per-snapshot
|
||||
Restore action on snapshot rows (`/hosts/{id}/snapshots/{sid}/restore`,
|
||||
skips step 1).
|
||||
6. **Wizard interaction model.** Single-page, sections progressively enable;
|
||||
tree-browser nodes lazy-load via HTMX partials. No `restore_drafts` table.
|
||||
7. **Tree-browser data path.** Synchronous WS RPC (`tree.list` ↔
|
||||
`tree.list.result`, correlation-ID) plus a per-wizard-session in-memory
|
||||
cache keyed by `{snapshot_id, path}` with ~30-min TTL.
|
||||
8. **Restore progress UI.** Restore-specific job-page variant: files-restored
|
||||
/ bytes-restored / throughput / ETA / current-file display, driven by
|
||||
restic restore's JSON status events surfaced through `job.progress`.
|
||||
9. **Permissions/ownership.** Policy, not toggle. In-place restore preserves
|
||||
original ownership; new-directory restore drops ownership
|
||||
(`--no-ownership`).
|
||||
10. **Concurrency.** Single-flight per host (one job at a time across all
|
||||
kinds). Plus a real cancel-job feature: `command.cancel` envelope, agent
|
||||
kills the `restic` subprocess via context cancel (SIGTERM, SIGKILL after
|
||||
grace), server transitions the job to `cancelled`. The "Cancel" button
|
||||
already in the `job_detail` template becomes real for any running job
|
||||
kind.
|
||||
11. **Audit + safety.** Audit row on every restore dispatch (`host.restore`
|
||||
with snapshot ID, paths, target, in-place flag). Recent-restores panel
|
||||
on the host page surfacing the latest restore job alongside last-backup
|
||||
and last-init signals. Role gate deferred to P4-03.
|
||||
|
||||
## Architecture
|
||||
|
||||
Restore composes from existing primitives plus three new pieces:
|
||||
|
||||
- **New JobKind values**: `JobRestore`, `JobDiff`. Dispatcher cases mirror
|
||||
the prune/check pattern. Agent-side handlers wrap `restic.RunRestore` and
|
||||
`restic.RunDiff` (new methods on the `restic` package).
|
||||
- **New WS RPC**: `tree.list` request (`{snapshot_id, path}`) ↔
|
||||
`tree.list.result` reply (`{entries: [{name, type, size}], ...}` or
|
||||
`{error}`). Reuses existing correlation-ID infrastructure from P1-09. No
|
||||
`jobs` row.
|
||||
- **New cancel surface**: `command.cancel` request (`{job_id}`), agent
|
||||
cancels the running subprocess context, returns `command.ack` + `job.finished`
|
||||
with status `cancelled`. Server endpoint `POST /api/jobs/{id}/cancel`
|
||||
bridges UI button → WS envelope.
|
||||
|
||||
Everything else (job lifecycle, log streaming, progress envelope, snapshot
|
||||
listing, audit log writer, host_chrome partial, danger-zone typed-confirmation)
|
||||
already exists and is reused verbatim.
|
||||
|
||||
### Component boundaries
|
||||
|
||||
| Component | Purpose | Depends on |
|
||||
| ---------------------------------- | ---------------------------------------------------- | ----------------------------------------- |
|
||||
| `internal/restic.RunRestore` | Run `restic restore` with paths + target + ownership | `restic.Env` |
|
||||
| `internal/restic.RunDiff` | Run `restic diff --json a b` | `restic.Env` |
|
||||
| `internal/agent/runner` cases | Dispatch `JobRestore` / `JobDiff` jobs | `restic.Run*`, hooks (skipped: backup-only) |
|
||||
| `internal/agent/runner` cancel hook | Wire WS `command.cancel` → ctx.CancelFunc per job | runner job map |
|
||||
| `internal/agent/runner` tree-list | Sync RPC handler: `restic ls --json` for one path | `restic.Env` |
|
||||
| `internal/server/ws/cancel.go` | Validate + send `command.cancel` envelope | hub.Send, store.UpdateJobStatus |
|
||||
| `internal/server/ws/tree.go` | RPC mediator: `tree.list` request → reply, with cache | hub.SendRPC, in-memory cache |
|
||||
| `internal/server/http/restore.go` | Wizard routes + dispatch endpoint | store, ws, audit |
|
||||
| `internal/server/http/diff.go` | Snapshot-diff dispatch endpoint | store, ws |
|
||||
| `internal/server/http/cancel.go` | `POST /api/jobs/{id}/cancel` | ws |
|
||||
| `web/templates/pages/host_restore.html` | Wizard page | host_chrome partial |
|
||||
| `web/templates/partials/tree_node.html` | Lazy-loaded tree node fragment for HTMX swap | — |
|
||||
| `web/templates/pages/job_detail.html` | Restore-kind progress widget (variant) | existing job_detail |
|
||||
|
||||
### Data flow — wizard happy path
|
||||
|
||||
```
|
||||
operator
|
||||
├─ GET /hosts/{id}/restore
|
||||
│ server renders wizard shell, snapshot table from store.ListSnapshotsByHost
|
||||
│
|
||||
├─ click snapshot row (or arrives via /hosts/{id}/snapshots/{sid}/restore)
|
||||
│ wizard advances to step 2, snapshot summary card rendered
|
||||
│
|
||||
├─ expand a tree node (chevron click)
|
||||
│ HTMX GET /hosts/{id}/restore/tree?snapshot={sid}&path=/etc
|
||||
│ server checks per-session cache (keyed by sid+path)
|
||||
│ hit → render tree_node fragment from cache
|
||||
│ miss → hub.SendRPC(host_id, "tree.list", {sid, path}) → wait reply
|
||||
│ cache result, render tree_node fragment
|
||||
│
|
||||
├─ tick file/dir checkboxes (form state, no round-trip)
|
||||
│
|
||||
├─ pick target radio (and optionally type host name to unlock in-place)
|
||||
│
|
||||
└─ POST /hosts/{id}/restore (form submit)
|
||||
server validates: ≥1 path, target mode, in-place ⇒ host name match
|
||||
write audit row host.restore
|
||||
store.CreateJob{kind=restore, payload={snapshot_id, paths, target, in_place}}
|
||||
hub.Send(host_id, "command.run", {job_id, kind=restore, payload})
|
||||
HX-Redirect: /jobs/{job_id}
|
||||
```
|
||||
|
||||
### Data flow — agent restore execution
|
||||
|
||||
```
|
||||
agent.runner receives command.run kind=restore
|
||||
├─ check single-flight: if r.activeJobID != "" → reply busy
|
||||
│ (server queues to pending_runs only for kind=backup; restore returns busy)
|
||||
├─ allocate ctx, ctxCancel — store cancelFunc against job_id in r.cancels
|
||||
├─ sendStarted(job_id, JobRestore, now)
|
||||
├─ build target path: if in_place → "/" else "/var/restic-restore/<job_id>/"
|
||||
├─ build flags: paths from payload, --no-ownership when !in_place
|
||||
├─ restic.RunRestore(ctx, env, snapshot_id, paths, target, in_place):
|
||||
│ restic restore <sid> --target <path> [--no-ownership] -- <p1> <p2> ...
|
||||
│ parse stdout JSON: forward "status" → job.progress (1Hz throttle), "summary" → final
|
||||
├─ on success: sendFinished(job_id, succeeded, exit=0)
|
||||
├─ on ctx.Err() == context.Canceled: sendFinished(job_id, cancelled, exit=130)
|
||||
└─ delete cancel func from r.cancels
|
||||
```
|
||||
|
||||
### Data flow — cancel
|
||||
|
||||
```
|
||||
operator clicks Cancel on /jobs/{id} (running)
|
||||
POST /api/jobs/{id}/cancel
|
||||
server: lookup job, ensure status=running, find host
|
||||
hub.Send(host_id, "command.cancel", {job_id})
|
||||
→ agent.runner receives command.cancel
|
||||
cancelFunc, ok := r.cancels[job_id]
|
||||
ok && cancelFunc()
|
||||
→ restic subprocess context done → exec.Cmd kills via SIGTERM
|
||||
→ if still alive after 5s grace → SIGKILL
|
||||
→ runner sendFinished(job_id, cancelled, exit=130)
|
||||
→ server receives job.finished status=cancelled, persists, broadcasts
|
||||
→ browser refresh shows cancelled state
|
||||
```
|
||||
|
||||
The cancel surface is independently useful for any kind (prune/check/backup) —
|
||||
not gated to restore. The button already in `job_detail.html` becomes real.
|
||||
|
||||
### Tree-list RPC details
|
||||
|
||||
New WS message types (added to `internal/api/messages.go`):
|
||||
|
||||
```
|
||||
type TreeListRequestPayload struct {
|
||||
SnapshotID string `json:"snapshot_id"`
|
||||
Path string `json:"path"`
|
||||
}
|
||||
|
||||
type TreeListEntry struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"` // "dir" | "file" | "symlink"
|
||||
Size int64 `json:"size,omitempty"`
|
||||
}
|
||||
|
||||
type TreeListResultPayload struct {
|
||||
SnapshotID string `json:"snapshot_id"`
|
||||
Path string `json:"path"`
|
||||
Entries []TreeListEntry `json:"entries,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
```
|
||||
|
||||
Server-side mediator (`ws.SendRPC`) takes a request envelope, registers the
|
||||
correlation ID in a pending map, sends, blocks on a per-call channel until
|
||||
the matching reply arrives (or 30s timeout). The pattern is small enough
|
||||
to inline in `internal/server/ws/rpc.go` as a generic helper — future
|
||||
synchronous RPCs reuse it.
|
||||
|
||||
In-memory cache: `map[sessionID]map[cacheKey]TreeListResultPayload` with
|
||||
`cacheKey = snapshot_id + "\x00" + path`. Session ID minted per wizard
|
||||
load (HTTP-only cookie scoped to `/hosts/{id}/restore/tree`, lifetime 30
|
||||
min). On wizard close (browser navigation away) the entry expires
|
||||
naturally. No persistence, no migration.
|
||||
|
||||
Agent handler runs `restic ls --json <sid> <path>` (non-recursive — restic
|
||||
defaults to recursive but `restic ls` accepts `--long` and a path filter;
|
||||
parse output line-by-line and emit only direct children of `path`). 60s
|
||||
context timeout, mirroring existing `restic snapshots` invocation.
|
||||
|
||||
### Restore payload
|
||||
|
||||
`api.CommandRunPayload` gains a nested optional `restore` field:
|
||||
|
||||
```
|
||||
type RestorePayload struct {
|
||||
SnapshotID string `json:"snapshot_id"`
|
||||
Paths []string `json:"paths"` // absolute paths inside the snapshot
|
||||
InPlace bool `json:"in_place"`
|
||||
TargetDir string `json:"target_dir"` // empty when in_place=true
|
||||
PreserveOwner bool `json:"preserve_owner"` // mirrors policy: in_place=>true, else=>false
|
||||
}
|
||||
```
|
||||
|
||||
The payload is set by the server when dispatching `JobRestore` and ignored
|
||||
on every other kind. Wire-shape test pinned in `wire_test.go`.
|
||||
|
||||
### Diff payload
|
||||
|
||||
`api.CommandRunPayload` gains:
|
||||
|
||||
```
|
||||
type DiffPayload struct {
|
||||
SnapshotA string `json:"snapshot_a"`
|
||||
SnapshotB string `json:"snapshot_b"`
|
||||
}
|
||||
```
|
||||
|
||||
Set on `JobDiff`. Output is plain `restic diff --json <a> <b>` forwarded as
|
||||
`log.stream` lines. Job page renders unchanged — operator reads the diff
|
||||
output directly.
|
||||
|
||||
### Recent-restores panel
|
||||
|
||||
A small panel rendered on the host detail page below the existing init-status
|
||||
line:
|
||||
|
||||
```
|
||||
last restore: succeeded 2h ago · job f73ab4c1… · 3 files to /var/restic-restore/...
|
||||
```
|
||||
|
||||
Backed by a new `store.LatestJobByKind(host_id, JobRestore)` query (mirroring
|
||||
the existing `store.LatestJobByKind` already used for init/forget/prune/check
|
||||
in P2R-06). One template addition in `host_chrome.html` next to the
|
||||
`InitStatus` block.
|
||||
|
||||
## Routes added
|
||||
|
||||
| Method | Path | Purpose |
|
||||
| ------- | --------------------------------------------------------- | ----------------------------------------------------------- |
|
||||
| GET | `/hosts/{id}/restore` | Wizard shell (step 1 = snapshot picker) |
|
||||
| GET | `/hosts/{id}/snapshots/{sid}/restore` | Wizard shell with snapshot pre-selected (skips step 1) |
|
||||
| GET | `/hosts/{id}/restore/tree` | HTMX partial: tree node listing for `?snapshot=&path=` |
|
||||
| POST | `/hosts/{id}/restore` | Validate + dispatch restore job, redirect to live job page |
|
||||
| POST | `/api/hosts/{id}/snapshots/diff` | Dispatch a diff job for `{snapshot_a, snapshot_b}` |
|
||||
| POST | `/api/jobs/{id}/cancel` | Send `command.cancel` to host, transition job → cancelled |
|
||||
|
||||
## Migrations
|
||||
|
||||
None. Restore + diff piggyback on the existing `jobs` table (their `kind` is
|
||||
new but the schema already accepts arbitrary kind strings — there's no
|
||||
CHECK constraint on `kind`). The cancel feature uses the existing
|
||||
`JobCancelled` terminal status. The tree-list cache lives in process memory.
|
||||
|
||||
## Tests (target coverage)
|
||||
|
||||
- `internal/restic/restore_test.go` — `RunRestore` invocation builds the
|
||||
expected argv (paths, --target, --no-ownership flag presence, in-place
|
||||
variant); JSON status parsing → `BackupStatus`-shaped progress envelopes.
|
||||
- `internal/restic/diff_test.go` — `RunDiff` argv shape and JSON forwarding.
|
||||
- `internal/agent/runner/restore_test.go` — happy path, cancel mid-run
|
||||
produces `cancelled` finished, in-place vs new-directory dispatch,
|
||||
single-flight rejects when another job is running.
|
||||
- `internal/agent/runner/tree_test.go` — `tree.list` handler returns
|
||||
direct children for a synthetic restic ls output, surfaces error on
|
||||
missing snapshot.
|
||||
- `internal/server/ws/rpc_test.go` — `SendRPC` correlation matching,
|
||||
timeout, concurrent calls.
|
||||
- `internal/server/http/restore_test.go` — wizard renders with snapshots,
|
||||
POST validates ≥1 path + in-place host-name match, audit row written,
|
||||
job dispatched with correct payload, in-place without typed-confirm
|
||||
re-renders form with input intact and an error.
|
||||
- `internal/server/http/diff_test.go` — POST dispatches `JobDiff`,
|
||||
snapshot IDs validated against the host's snapshot list.
|
||||
- `internal/server/http/cancel_test.go` — POST cancel happy path
|
||||
(running → cancelled), 4xx for non-running jobs, 4xx when host offline.
|
||||
- `internal/server/http/restore_e2e_test.go` — happy path: GET wizard,
|
||||
expand `/etc` (HTMX call returns expected fragment), submit, follow
|
||||
HX-Redirect to job page, see status.
|
||||
- `web/templates/pages/host_restore_test.go` (template-render test) —
|
||||
wizard renders all four sections; in-place card disabled until typed
|
||||
confirm.
|
||||
|
||||
## Playwright iteration / sweep
|
||||
|
||||
A Playwright sweep at the end (mirroring P2R-02 Slice 6) runs against the
|
||||
local smoke server with a real agent enrolled. Steps:
|
||||
|
||||
1. Login → navigate to alfa-01 host → click Restore.
|
||||
2. Wizard step 1: pick the most recent snapshot.
|
||||
3. Wizard step 2: expand a directory two levels, tick three files,
|
||||
verify tally updates.
|
||||
4. Wizard step 3: leave default new-directory.
|
||||
5. Wizard step 4: dispatch.
|
||||
6. Land on live job page, see progress widget animating, see log lines.
|
||||
7. Click Cancel mid-flight, verify status transitions to cancelled and
|
||||
the agent's subprocess actually died (log line `signal: killed` or exit
|
||||
130).
|
||||
8. Repeat with in-place mode: type host name, dispatch, verify red
|
||||
primary button, verify files actually overwritten on host.
|
||||
9. Snapshot diff: navigate to snapshots, pick two, dispatch diff, see
|
||||
diff output streamed.
|
||||
10. Screenshots into `_diag/p3-restore-sweep/`.
|
||||
|
||||
End-to-end clean, zero console errors, before handing back.
|
||||
|
||||
## What does NOT change
|
||||
|
||||
- `host_chrome.html` only grows the recent-restores line; sub-tab list
|
||||
unchanged (Restore is a top-level button on the host page, not a sub-tab).
|
||||
- `enrollment.go`, schedule reconciliation, source-group CRUD, repo
|
||||
maintenance ticker, hook execution — none of these are touched.
|
||||
- The CLAUDE.md restage block applies as-is when the agent binary changes
|
||||
(it does — runner gains restore/diff/cancel/tree handlers). The unit
|
||||
file does not change.
|
||||
|
||||
## Open questions / explicit non-goals
|
||||
|
||||
- **Restore preview / dry-run.** Restic doesn't have a dry-run for restore.
|
||||
Out of scope.
|
||||
- **Resumable restore.** Restic restore is idempotent per-file but not
|
||||
resumable mid-stream from where it left off. If a restore is cancelled,
|
||||
the operator re-runs (files already written are overwritten). No state
|
||||
to track.
|
||||
- **Restore to a glob/pattern (e.g. `*.conf`).** Out of scope; the tree
|
||||
picker requires explicit ticks. Power users can edit the URL or use the
|
||||
CLI.
|
||||
- **Bandwidth caps for restore.** Honoured automatically — restic's
|
||||
`--limit-download` is part of `restic.Env` already (P2R-13) and applies
|
||||
to restore unchanged.
|
||||
- **Pre/post hooks for restore.** Hooks today gate only `kind=backup`
|
||||
(P2R-11). Out of scope.
|
||||
@@ -0,0 +1,81 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
)
|
||||
|
||||
// (fakeSender is defined in runner_test.go; it's already lock-protected
|
||||
// because the runner's stdout + stderr pump goroutines call Send
|
||||
// concurrently. The original local 'safeSender' here was a workaround
|
||||
// from before fakeSender itself grew the mutex.)
|
||||
|
||||
// TestRunBackupCanceledMidRunReportsCanceled spawns a backup against
|
||||
// a fake restic that sleeps for 30 seconds, cancels the context after
|
||||
// a short delay, and confirms the resulting job.finished envelope
|
||||
// reports status=canceled (not failed).
|
||||
func TestRunBackupCanceledMidRunReportsCanceled(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Fake restic: replace the shell with a long sleep via `exec` so the
|
||||
// process tree is one process — SIGTERM goes directly to sleep and
|
||||
// it exits. Without `exec`, the shell stays in the foreground while
|
||||
// sleep is its child; SIGTERM-to-shell may or may not propagate to
|
||||
// sleep depending on the shell, leading to the WaitDelay-then-
|
||||
// SIGKILL fallback path firing — slower and noisier.
|
||||
bin := setupScript(t, `exec sleep 30`)
|
||||
|
||||
tx := &fakeSender{}
|
||||
r := New(Config{ResticBin: bin}, tx, 0)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
done <- r.RunBackup(ctx, "job-cancel", []string{"/tmp/x"}, nil, nil, BackupHooks{})
|
||||
}()
|
||||
|
||||
// Wait long enough for the subprocess to actually start before
|
||||
// canceling. Without this, exec.CommandContext can race the
|
||||
// kill against Start and produce a different error path.
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
cancel()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(15 * time.Second):
|
||||
t.Fatal("RunBackup did not return within 15s of cancel")
|
||||
}
|
||||
|
||||
// Locate the job.finished envelope and check its status.
|
||||
envs := tx.snapshot()
|
||||
var finEnv api.Envelope
|
||||
var found bool
|
||||
for _, e := range envs {
|
||||
if e.Type == api.MsgJobFinished {
|
||||
finEnv = e
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatal("no job.finished envelope was sent")
|
||||
}
|
||||
var fin api.JobFinishedPayload
|
||||
if err := finEnv.UnmarshalPayload(&fin); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if fin.Status != api.JobCancelled {
|
||||
t.Fatalf("status: got %q, want %q", fin.Status, api.JobCancelled)
|
||||
}
|
||||
if fin.ExitCode != 130 {
|
||||
t.Errorf("exit_code: got %d, want 130 (POSIX cancel convention)", fin.ExitCode)
|
||||
}
|
||||
// The error message should be empty for canceled jobs (see runner.sendFinished).
|
||||
if !strings.HasPrefix(fin.Error, "") || fin.Error != "" {
|
||||
t.Errorf("error: got %q, want empty for canceled jobs", fin.Error)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,266 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
)
|
||||
|
||||
// TestRunRestoreShipsExpectedEnvelopes: a fake restic emits a couple
|
||||
// of restore status lines and a summary; the runner translates them
|
||||
// into job.progress envelopes and finishes the job successfully.
|
||||
func TestRunRestoreShipsExpectedEnvelopes(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
bin := setupScript(t, `
|
||||
case "$1" in
|
||||
restore)
|
||||
echo '{"message_type":"status","seconds_elapsed":1,"percent_done":0.5,"total_files":10,"files_restored":5,"total_bytes":1000,"bytes_restored":500}'
|
||||
echo '{"message_type":"status","seconds_elapsed":2,"percent_done":1.0,"total_files":10,"files_restored":10,"total_bytes":1000,"bytes_restored":1000}'
|
||||
echo '{"message_type":"summary","seconds_elapsed":2,"total_files":10,"files_restored":10,"total_bytes":1000,"bytes_restored":1000}'
|
||||
;;
|
||||
*)
|
||||
echo "unknown: $*" ;;
|
||||
esac
|
||||
`)
|
||||
|
||||
tx := &fakeSender{}
|
||||
r := New(Config{ResticBin: bin}, tx, 0)
|
||||
|
||||
if err := r.RunRestore(context.Background(), "job-r1", "f3a7b2c1",
|
||||
[]string{"/etc/nginx/sites-available/alfa.conf"},
|
||||
false, "/tmp/restore-out"); err != nil {
|
||||
t.Fatalf("RunRestore: %v", err)
|
||||
}
|
||||
|
||||
// Confirm landmarks: started → progress → finished.
|
||||
order := envelopeOrder(tx.envs)
|
||||
wants := []api.MessageType{api.MsgJobStarted, api.MsgJobProgress, api.MsgJobFinished}
|
||||
positions := map[api.MessageType]int{}
|
||||
for i, mt := range order {
|
||||
if _, seen := positions[mt]; !seen {
|
||||
positions[mt] = i
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(wants)-1; i++ {
|
||||
a, b := wants[i], wants[i+1]
|
||||
pa, aOK := positions[a]
|
||||
pb, bOK := positions[b]
|
||||
if !aOK {
|
||||
t.Fatalf("envelope %q not found in %v", a, order)
|
||||
}
|
||||
if !bOK {
|
||||
t.Fatalf("envelope %q not found in %v", b, order)
|
||||
}
|
||||
if pa >= pb {
|
||||
t.Fatalf("expected %q before %q (positions %d, %d)", a, b, pa, pb)
|
||||
}
|
||||
}
|
||||
|
||||
// Started carries the right kind.
|
||||
startEnv := firstEnvOfType(t, tx.envs, api.MsgJobStarted)
|
||||
var startP api.JobStartedPayload
|
||||
if err := startEnv.UnmarshalPayload(&startP); err != nil {
|
||||
t.Fatalf("unmarshal started: %v", err)
|
||||
}
|
||||
if startP.Kind != api.JobRestore {
|
||||
t.Fatalf("kind: got %q want %q", startP.Kind, api.JobRestore)
|
||||
}
|
||||
|
||||
// Finished is succeeded.
|
||||
finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished)
|
||||
var finP api.JobFinishedPayload
|
||||
if err := finEnv.UnmarshalPayload(&finP); err != nil {
|
||||
t.Fatalf("unmarshal finished: %v", err)
|
||||
}
|
||||
if finP.Status != api.JobSucceeded {
|
||||
t.Fatalf("status: got %q want %q", finP.Status, api.JobSucceeded)
|
||||
}
|
||||
// Progress envelope reflects the last status line: 100% with 10 files.
|
||||
progEnv := firstEnvOfType(t, tx.envs, api.MsgJobProgress)
|
||||
var progP api.JobProgressPayload
|
||||
if err := progEnv.UnmarshalPayload(&progP); err != nil {
|
||||
t.Fatalf("unmarshal progress: %v", err)
|
||||
}
|
||||
// First progress will be from line 1 (50%) since we send first status
|
||||
// immediately. Verify we at least see a sensible value.
|
||||
if progP.PercentDone <= 0 {
|
||||
t.Fatalf("expected non-zero progress, got %v", progP.PercentDone)
|
||||
}
|
||||
if progP.FilesDone <= 0 || progP.TotalFiles <= 0 {
|
||||
t.Fatalf("expected file counters set, got %+v", progP)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunRestoreInPlaceArgvHasNoNoOwnership: indirectly verifies that
|
||||
// in-place mode doesn't pass --no-ownership. We can't see the actual
|
||||
// argv without a custom test harness, so we use a fake restic that
|
||||
// echoes its args and check the captured log.stream.
|
||||
func TestRunRestoreInPlaceArgvHasNoNoOwnership(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
bin := setupScript(t, `
|
||||
case "$1" in
|
||||
restore)
|
||||
# Print all args on stderr so they're forwarded as log.stream.
|
||||
echo "argv: $*" 1>&2
|
||||
echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}'
|
||||
;;
|
||||
esac
|
||||
`)
|
||||
|
||||
tx := &fakeSender{}
|
||||
r := New(Config{ResticBin: bin}, tx, 0)
|
||||
if err := r.RunRestore(context.Background(), "job-r2", "abc",
|
||||
nil, true, ""); err != nil {
|
||||
t.Fatalf("RunRestore: %v", err)
|
||||
}
|
||||
|
||||
// Reconstruct the argv from the captured stderr log line.
|
||||
var argv string
|
||||
for _, e := range tx.envs {
|
||||
if e.Type == api.MsgLogStream {
|
||||
var p api.LogStreamLine
|
||||
_ = e.UnmarshalPayload(&p)
|
||||
if p.Stream == api.LogStderr && strings.HasPrefix(p.Payload, "argv:") {
|
||||
argv = p.Payload
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if argv == "" {
|
||||
t.Fatal("never captured argv echo from fake restic")
|
||||
}
|
||||
if strings.Contains(argv, "--no-ownership") {
|
||||
t.Errorf("in-place restore should NOT pass --no-ownership; got argv=%q", argv)
|
||||
}
|
||||
if !strings.Contains(argv, "--target /") {
|
||||
t.Errorf("in-place restore should pass --target /; got argv=%q", argv)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunRestoreNewDirArgvShape: non-in-place restore passes --target
|
||||
// to the operator-chosen new directory and includes the path filters.
|
||||
// We deliberately do NOT pass --no-ownership (added in restic 0.17;
|
||||
// older versions error out — the comment in restore.go explains why).
|
||||
func TestRunRestoreNewDirArgvShape(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
bin := setupScript(t, `
|
||||
case "$1" in
|
||||
restore)
|
||||
echo "argv: $*" 1>&2
|
||||
echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}'
|
||||
;;
|
||||
esac
|
||||
`)
|
||||
tx := &fakeSender{}
|
||||
r := New(Config{ResticBin: bin}, tx, 0)
|
||||
if err := r.RunRestore(context.Background(), "job-r3", "abc",
|
||||
[]string{"/etc/foo"}, false, "/tmp/restore-out"); err != nil {
|
||||
t.Fatalf("RunRestore: %v", err)
|
||||
}
|
||||
|
||||
var argv string
|
||||
for _, e := range tx.envs {
|
||||
if e.Type == api.MsgLogStream {
|
||||
var p api.LogStreamLine
|
||||
_ = e.UnmarshalPayload(&p)
|
||||
if p.Stream == api.LogStderr && strings.HasPrefix(p.Payload, "argv:") {
|
||||
argv = p.Payload
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if argv == "" {
|
||||
t.Fatal("no argv echo")
|
||||
}
|
||||
if strings.Contains(argv, "--no-ownership") {
|
||||
t.Errorf("restic 0.16 doesn't accept --no-ownership; got argv=%q", argv)
|
||||
}
|
||||
if !strings.Contains(argv, "--target /tmp/restore-out") {
|
||||
t.Errorf("expected --target /tmp/restore-out; got argv=%q", argv)
|
||||
}
|
||||
if !strings.Contains(argv, "--include /etc/foo") {
|
||||
t.Errorf("expected --include /etc/foo; got argv=%q", argv)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunRestoreNewDirAutoCreatesTarget: a new-directory restore
|
||||
// should mkdir the requested target chain before invoking restic, so
|
||||
// operators don't have to pre-create the per-job subdir.
|
||||
func TestRunRestoreNewDirAutoCreatesTarget(t *testing.T) {
|
||||
t.Parallel()
|
||||
bin := setupScript(t, `
|
||||
case "$1" in
|
||||
restore)
|
||||
echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}'
|
||||
;;
|
||||
esac
|
||||
`)
|
||||
tx := &fakeSender{}
|
||||
r := New(Config{ResticBin: bin}, tx, 0)
|
||||
|
||||
// Multi-level path the operator hasn't created yet.
|
||||
target := filepath.Join(t.TempDir(), "deep", "deeper", "deepest")
|
||||
if err := r.RunRestore(context.Background(), "job-rmkdir", "abc",
|
||||
[]string{"/etc/foo"}, false, target); err != nil {
|
||||
t.Fatalf("RunRestore: %v", err)
|
||||
}
|
||||
|
||||
if st, err := os.Stat(target); err != nil {
|
||||
t.Fatalf("expected target dir to exist: %v", err)
|
||||
} else if !st.IsDir() {
|
||||
t.Fatalf("expected directory, got %v", st.Mode())
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunDiffShipsLogLines: diff output is forwarded as log.stream.
|
||||
func TestRunDiffShipsLogLines(t *testing.T) {
|
||||
t.Parallel()
|
||||
bin := setupScript(t, `
|
||||
case "$1" in
|
||||
diff)
|
||||
echo '{"message_type":"change","path":"/etc/nginx/nginx.conf","modifier":"M"}'
|
||||
echo '{"message_type":"statistics","added":{"files":0,"dirs":0}}'
|
||||
;;
|
||||
esac
|
||||
`)
|
||||
tx := &fakeSender{}
|
||||
r := New(Config{ResticBin: bin}, tx, 0)
|
||||
if err := r.RunDiff(context.Background(), "job-d1", "snap-a", "snap-b"); err != nil {
|
||||
t.Fatalf("RunDiff: %v", err)
|
||||
}
|
||||
|
||||
startEnv := firstEnvOfType(t, tx.envs, api.MsgJobStarted)
|
||||
var startP api.JobStartedPayload
|
||||
_ = startEnv.UnmarshalPayload(&startP)
|
||||
if startP.Kind != api.JobDiff {
|
||||
t.Fatalf("kind: got %q want %q", startP.Kind, api.JobDiff)
|
||||
}
|
||||
finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished)
|
||||
var finP api.JobFinishedPayload
|
||||
_ = finEnv.UnmarshalPayload(&finP)
|
||||
if finP.Status != api.JobSucceeded {
|
||||
t.Fatalf("status: %q", finP.Status)
|
||||
}
|
||||
// At least one log line should carry the change payload.
|
||||
var sawChange bool
|
||||
for _, e := range tx.envs {
|
||||
if e.Type != api.MsgLogStream {
|
||||
continue
|
||||
}
|
||||
var p api.LogStreamLine
|
||||
_ = e.UnmarshalPayload(&p)
|
||||
if strings.Contains(p.Payload, `"message_type":"change"`) {
|
||||
sawChange = true
|
||||
}
|
||||
}
|
||||
if !sawChange {
|
||||
t.Fatal("never saw a change log line in diff output")
|
||||
}
|
||||
}
|
||||
+124
-14
@@ -26,10 +26,11 @@ type Sender interface {
|
||||
// from the agent's config file (server-pushed config.update payloads
|
||||
// override these in memory).
|
||||
type Config struct {
|
||||
ResticBin string
|
||||
RepoURL string
|
||||
RepoUsername string
|
||||
RepoPassword string
|
||||
ResticBin string
|
||||
ResticVersion string // e.g. "0.17.1" — empty if unknown
|
||||
RepoURL string
|
||||
RepoUsername string
|
||||
RepoPassword string
|
||||
|
||||
// Bandwidth caps in KB/s applied to every restic invocation.
|
||||
// <=0 means "no cap". Per-job override: callers that build a
|
||||
@@ -61,6 +62,7 @@ func New(cfg Config, tx Sender, progressMinPeriod time.Duration) *Runner {
|
||||
func (r *Runner) resticEnv() restic.Env {
|
||||
return restic.Env{
|
||||
Bin: r.cfg.ResticBin,
|
||||
Version: r.cfg.ResticVersion,
|
||||
RepoURL: r.cfg.RepoURL,
|
||||
RepoUsername: r.cfg.RepoUsername,
|
||||
RepoPassword: r.cfg.RepoPassword,
|
||||
@@ -95,8 +97,10 @@ func (r *Runner) streamHandler(jobID string, seq *atomic.Int64) restic.LineHandl
|
||||
}
|
||||
|
||||
// sendFinished ships a job.finished envelope. err==nil → succeeded;
|
||||
// otherwise failed. statsBlob is forwarded as JobFinishedPayload.Stats.
|
||||
func (r *Runner) sendFinished(jobID string, finishedAt time.Time, err error, statsBlob json.RawMessage) {
|
||||
// otherwise failed (or canceled if ctx was canceled — operator
|
||||
// hit the Cancel button or the agent is shutting down).
|
||||
// statsBlob is forwarded as JobFinishedPayload.Stats.
|
||||
func (r *Runner) sendFinished(ctx context.Context, jobID string, finishedAt time.Time, err error, statsBlob json.RawMessage) {
|
||||
status := api.JobSucceeded
|
||||
exit := 0
|
||||
errMsg := ""
|
||||
@@ -104,6 +108,16 @@ func (r *Runner) sendFinished(jobID string, finishedAt time.Time, err error, sta
|
||||
status = api.JobFailed
|
||||
exit = -1
|
||||
errMsg = err.Error()
|
||||
// If the context was canceled, the failure is operator-driven
|
||||
// (or shutdown). Surface as JobCancelled so the UI shows a
|
||||
// neutral "canceled" state rather than a red "failed" one.
|
||||
// exec.CommandContext returns the process's exit error on
|
||||
// ctx-cancel, which we'd otherwise rebadge as failed.
|
||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||
status = api.JobCancelled
|
||||
exit = 130 // POSIX convention for SIGINT/SIGTERM-killed
|
||||
errMsg = "" // no need to surface the underlying restic error
|
||||
}
|
||||
}
|
||||
finEnv, _ := api.Marshal(api.MsgJobFinished, jobID, api.JobFinishedPayload{
|
||||
JobID: jobID,
|
||||
@@ -138,13 +152,13 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
|
||||
if hooks.Pre != "" {
|
||||
if err := r.runHook(ctx, jobID, "pre", hooks.Pre, "", &seq); err != nil {
|
||||
finishedAt := time.Now().UTC()
|
||||
r.sendFinished(jobID, finishedAt, err, nil)
|
||||
r.sendFinished(ctx, jobID, finishedAt, err, nil)
|
||||
return fmt.Errorf("pre_hook failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
env := r.resticEnv()
|
||||
lastProgress := time.Now()
|
||||
lastProgress := time.Time{} // zero time → first status event always emits
|
||||
|
||||
handle := func(stream string, line string, ev any) {
|
||||
// Throttled progress events come from restic's `status` JSON.
|
||||
@@ -206,7 +220,7 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
|
||||
}
|
||||
}
|
||||
|
||||
r.sendFinished(jobID, finishedAt, err, statsBlob)
|
||||
r.sendFinished(ctx, jobID, finishedAt, err, statsBlob)
|
||||
|
||||
// On a successful backup, refresh the server's snapshot projection.
|
||||
// We do this *after* job.finished so the UI sees the job land first;
|
||||
@@ -240,7 +254,7 @@ func (r *Runner) RunInit(ctx context.Context, jobID string) error {
|
||||
var seq atomic.Int64
|
||||
err := env.RunInit(ctx, r.streamHandler(jobID, &seq))
|
||||
finishedAt := time.Now().UTC()
|
||||
r.sendFinished(jobID, finishedAt, err, nil)
|
||||
r.sendFinished(ctx, jobID, finishedAt, err, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("runner init: %w", err)
|
||||
}
|
||||
@@ -262,7 +276,7 @@ func (r *Runner) RunForget(ctx context.Context, jobID string, groups []restic.Fo
|
||||
var seq atomic.Int64
|
||||
err := env.RunForget(ctx, groups, r.streamHandler(jobID, &seq))
|
||||
finishedAt := time.Now().UTC()
|
||||
r.sendFinished(jobID, finishedAt, err, nil)
|
||||
r.sendFinished(ctx, jobID, finishedAt, err, nil)
|
||||
|
||||
// Refresh the server's snapshot projection — forget rewrites the
|
||||
// index so the host's snapshot list almost certainly shrunk.
|
||||
@@ -300,7 +314,7 @@ func (r *Runner) RunPrune(ctx context.Context, jobID string) error {
|
||||
}
|
||||
}
|
||||
|
||||
r.sendFinished(jobID, finishedAt, err, nil)
|
||||
r.sendFinished(ctx, jobID, finishedAt, err, nil)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("runner prune: %w", err)
|
||||
@@ -339,7 +353,7 @@ func (r *Runner) RunCheck(ctx context.Context, jobID string, subsetPct int) erro
|
||||
slog.Warn("runner: stats.report after check failed", "job_id", jobID, "err", rerr)
|
||||
}
|
||||
|
||||
r.sendFinished(jobID, finishedAt, err, nil)
|
||||
r.sendFinished(ctx, jobID, finishedAt, err, nil)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("runner check: %w", err)
|
||||
@@ -347,6 +361,102 @@ func (r *Runner) RunCheck(ctx context.Context, jobID string, subsetPct int) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
// RunRestore executes a restic restore job and reports back via the
|
||||
// sender. paths is the operator-selected file/dir list to restore.
|
||||
// inPlace=true preserves uid/gid/mode and writes at "/"; inPlace=false
|
||||
// writes at targetDir with --no-ownership.
|
||||
//
|
||||
// Status events from restic are throttled into job.progress in the
|
||||
// same shape as backup; raw status lines are dropped from log.stream
|
||||
// (they would drown the log on a fast restore — the progress widget
|
||||
// already covers them).
|
||||
func (r *Runner) RunRestore(ctx context.Context, jobID, snapshotID string, paths []string, inPlace bool, targetDir string) error {
|
||||
startedAt := time.Now().UTC()
|
||||
r.sendStarted(jobID, api.JobRestore, startedAt)
|
||||
|
||||
env := r.resticEnv()
|
||||
var seq atomic.Int64
|
||||
lastProgress := time.Time{} // zero time → first status event always emits
|
||||
|
||||
handle := func(stream string, line string, ev any) {
|
||||
status, isStatus := ev.(restic.RestoreStatus)
|
||||
if !isStatus {
|
||||
now := time.Now().UTC()
|
||||
logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
|
||||
JobID: jobID,
|
||||
Seq: seq.Add(1),
|
||||
TS: now,
|
||||
Stream: api.LogStream(stream),
|
||||
Payload: line,
|
||||
})
|
||||
_ = r.tx.Send(logEnv)
|
||||
}
|
||||
if isStatus {
|
||||
if time.Since(lastProgress) < r.progressMinPeriod {
|
||||
return
|
||||
}
|
||||
lastProgress = time.Now()
|
||||
progEnv, _ := api.Marshal(api.MsgJobProgress, jobID, api.JobProgressPayload{
|
||||
JobID: jobID,
|
||||
PercentDone: status.PercentDone,
|
||||
FilesDone: status.FilesRestored,
|
||||
TotalFiles: status.TotalFiles,
|
||||
BytesDone: status.BytesRestored,
|
||||
TotalBytes: status.TotalBytes,
|
||||
ETASeconds: estimateETA(status.BytesRestored, status.TotalBytes, status.SecondsElapsed),
|
||||
ThroughputBps: throughput(status.BytesRestored, status.SecondsElapsed),
|
||||
})
|
||||
_ = r.tx.Send(progEnv)
|
||||
}
|
||||
}
|
||||
|
||||
summary, err := env.RunRestore(ctx, snapshotID, paths, inPlace, targetDir, handle)
|
||||
finishedAt := time.Now().UTC()
|
||||
|
||||
var statsBlob json.RawMessage
|
||||
if summary != nil {
|
||||
statsBlob, _ = json.Marshal(summary)
|
||||
}
|
||||
r.sendFinished(ctx, jobID, finishedAt, err, statsBlob)
|
||||
if err != nil {
|
||||
return fmt.Errorf("runner restore: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// estimateETA computes an ETA in seconds based on current bytes
|
||||
// progress + elapsed seconds. Restic restore's --json doesn't emit an
|
||||
// ETA field of its own (unlike backup), so we approximate by linear
|
||||
// extrapolation. Returns 0 when we don't have enough data.
|
||||
func estimateETA(bytesDone, totalBytes, secondsElapsed int64) int64 {
|
||||
if bytesDone <= 0 || totalBytes <= 0 || secondsElapsed <= 0 || bytesDone >= totalBytes {
|
||||
return 0
|
||||
}
|
||||
rate := float64(bytesDone) / float64(secondsElapsed)
|
||||
if rate <= 0 {
|
||||
return 0
|
||||
}
|
||||
return int64(float64(totalBytes-bytesDone) / rate)
|
||||
}
|
||||
|
||||
// RunDiff executes `restic diff --json <a> <b>` and forwards output
|
||||
// as log.stream lines. No snapshot-list refresh, no stats update —
|
||||
// diff is purely informational.
|
||||
func (r *Runner) RunDiff(ctx context.Context, jobID, snapshotA, snapshotB string) error {
|
||||
startedAt := time.Now().UTC()
|
||||
r.sendStarted(jobID, api.JobDiff, startedAt)
|
||||
|
||||
env := r.resticEnv()
|
||||
var seq atomic.Int64
|
||||
err := env.RunDiff(ctx, snapshotA, snapshotB, r.streamHandler(jobID, &seq))
|
||||
finishedAt := time.Now().UTC()
|
||||
r.sendFinished(ctx, jobID, finishedAt, err, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("runner diff: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// RunUnlock executes a `restic unlock` job. On success it ships a
|
||||
// repo.stats envelope with LockPresent=false so the UI banner clears.
|
||||
func (r *Runner) RunUnlock(ctx context.Context, jobID string) error {
|
||||
@@ -366,7 +476,7 @@ func (r *Runner) RunUnlock(ctx context.Context, jobID string) error {
|
||||
}
|
||||
}
|
||||
|
||||
r.sendFinished(jobID, finishedAt, err, nil)
|
||||
r.sendFinished(ctx, jobID, finishedAt, err, nil)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("runner unlock: %w", err)
|
||||
|
||||
@@ -4,20 +4,42 @@ import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
|
||||
)
|
||||
|
||||
// fakeSender collects sent envelopes for assertions.
|
||||
type fakeSender struct{ envs []api.Envelope }
|
||||
// fakeSender collects sent envelopes for assertions. Lock-protected
|
||||
// because the runner's pumpStdout / pumpStderr goroutines call Send
|
||||
// concurrently — without the mutex, -race in CI flags every test
|
||||
// that exercises a Run* method with both pumps active.
|
||||
type fakeSender struct {
|
||||
mu sync.Mutex
|
||||
envs []api.Envelope
|
||||
}
|
||||
|
||||
func (s *fakeSender) Send(e api.Envelope) error {
|
||||
s.mu.Lock()
|
||||
s.envs = append(s.envs, e)
|
||||
s.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// snapshot returns a copy of the captured envelopes safe to read
|
||||
// without holding the lock. Tests use this when iterating envs while
|
||||
// other goroutines may still be writing — though in practice all
|
||||
// runner Run* methods join their pumps before returning, so callers
|
||||
// can also read .envs directly post-return.
|
||||
func (s *fakeSender) snapshot() []api.Envelope {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
out := make([]api.Envelope, len(s.envs))
|
||||
copy(out, s.envs)
|
||||
return out
|
||||
}
|
||||
|
||||
// setupScript writes a shell script (without shebang) to a temp dir,
|
||||
// names it "restic", makes it executable, and returns the path.
|
||||
//
|
||||
@@ -320,7 +342,7 @@ esac
|
||||
// still produces job.started and job.finished envelopes.
|
||||
func TestRunInitShipsStartedAndFinished(t *testing.T) {
|
||||
t.Parallel()
|
||||
bin := setupScript(t, `echo "initialized repository"`)
|
||||
bin := setupScript(t, `echo "initialised repository"`)
|
||||
tx := &fakeSender{}
|
||||
r := New(Config{ResticBin: bin}, tx, 0)
|
||||
if err := r.RunInit(context.Background(), "job-init"); err != nil {
|
||||
|
||||
@@ -110,7 +110,7 @@ func (s *Scheduler) Apply(payload api.ScheduleSetPayload, tx Sender) {
|
||||
"received", len(payload.Schedules), "active", added)
|
||||
|
||||
// Ack outside the lock — Send() shouldn't take long, but holding
|
||||
// s.mu across an external call would needlessly serialize other
|
||||
// s.mu across an external call would needlessly serialise other
|
||||
// callers (e.g. a future Status() inspection from the UI).
|
||||
ackEnv, err := api.Marshal(api.MsgScheduleAck, "", api.ScheduleAckPayload{
|
||||
Version: payload.Version,
|
||||
|
||||
@@ -21,7 +21,7 @@ import (
|
||||
|
||||
// additionalData binds ciphertexts to the agent-secrets context, so a
|
||||
// blob lifted from one role's file can't be replayed into another's
|
||||
// row in some unrelated table that uses the same key. (Defense in
|
||||
// row in some unrelated table that uses the same key. (Defence in
|
||||
// depth — the key is per-host today, but cheap to be careful.)
|
||||
const additionalData = "rm-agent-repo-creds-v1"
|
||||
|
||||
|
||||
@@ -76,5 +76,5 @@ func detectResticVersion(ctx context.Context, override string) (string, error) {
|
||||
if len(parts) >= 2 && parts[0] == "restic" {
|
||||
return parts[1], nil
|
||||
}
|
||||
return "", fmt.Errorf("sysinfo: unrecognized restic version output: %q", first)
|
||||
return "", fmt.Errorf("sysinfo: unrecognised restic version output: %q", first)
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ type Config struct {
|
||||
// Sender is what handlers use to push agent → server messages
|
||||
// (job.progress, job.finished, log.stream, command.result, …).
|
||||
// Returned by the WS client to the dispatch handler. Write operations
|
||||
// serialize behind a single mutex on the conn; concurrent calls are
|
||||
// serialise behind a single mutex on the conn; concurrent calls are
|
||||
// safe.
|
||||
type Sender interface {
|
||||
Send(env api.Envelope) error
|
||||
|
||||
@@ -52,14 +52,17 @@ type JobKind string
|
||||
|
||||
// Allowed JobKind values. backup is operator/cron driven; init runs
|
||||
// once per host on first connect; forget/prune/check fire from the
|
||||
// server-side maintenance ticker; unlock is operator-only.
|
||||
// server-side maintenance ticker; unlock and restore are operator-
|
||||
// only; diff is operator-only and read-only.
|
||||
const (
|
||||
JobBackup JobKind = "backup"
|
||||
JobInit JobKind = "init"
|
||||
JobForget JobKind = "forget"
|
||||
JobPrune JobKind = "prune"
|
||||
JobCheck JobKind = "check"
|
||||
JobUnlock JobKind = "unlock"
|
||||
JobBackup JobKind = "backup"
|
||||
JobInit JobKind = "init"
|
||||
JobForget JobKind = "forget"
|
||||
JobPrune JobKind = "prune"
|
||||
JobCheck JobKind = "check"
|
||||
JobUnlock JobKind = "unlock"
|
||||
JobRestore JobKind = "restore"
|
||||
JobDiff JobKind = "diff"
|
||||
)
|
||||
|
||||
// JobStatus is the lifecycle state of a job.
|
||||
@@ -143,6 +146,35 @@ type CommandRunPayload struct {
|
||||
// just executes whatever is here.
|
||||
PreHook string `json:"pre_hook,omitempty"`
|
||||
PostHook string `json:"post_hook,omitempty"`
|
||||
|
||||
// Restore is populated only for kind=restore. See RestorePayload
|
||||
// for the shape; nil for every other kind.
|
||||
Restore *RestorePayload `json:"restore,omitempty"`
|
||||
|
||||
// Diff is populated only for kind=diff. See DiffPayload for
|
||||
// shape; nil for every other kind.
|
||||
Diff *DiffPayload `json:"diff,omitempty"`
|
||||
}
|
||||
|
||||
// RestorePayload carries restore-specific arguments on a JobRestore
|
||||
// command.run. Paths are absolute paths inside the snapshot (same
|
||||
// shape restic accepts as positional args). When InPlace is true the
|
||||
// agent restores at root (`--target /`) and preserves uid/gid/mode;
|
||||
// otherwise it restores into TargetDir with --no-ownership so the
|
||||
// operator can inspect the files as the agent user.
|
||||
type RestorePayload struct {
|
||||
SnapshotID string `json:"snapshot_id"`
|
||||
Paths []string `json:"paths"`
|
||||
InPlace bool `json:"in_place"`
|
||||
TargetDir string `json:"target_dir,omitempty"` // ignored when in_place=true
|
||||
}
|
||||
|
||||
// DiffPayload carries snapshot-diff arguments on a JobDiff command.run.
|
||||
// SnapshotA / SnapshotB may be either short or long IDs; restic
|
||||
// accepts both.
|
||||
type DiffPayload struct {
|
||||
SnapshotA string `json:"snapshot_a"`
|
||||
SnapshotB string `json:"snapshot_b"`
|
||||
}
|
||||
|
||||
// CommandCancelPayload is the server → agent cancel signal.
|
||||
@@ -337,3 +369,37 @@ type AgentUpdateAvailablePayload struct {
|
||||
PackageURL string `json:"package_url"` // apt repo / choco source
|
||||
Changelog string `json:"changelog,omitempty"`
|
||||
}
|
||||
|
||||
// TreeListRequestPayload is the body of a tree.list RPC. Used by the
|
||||
// restore wizard to lazy-load directory contents from a snapshot.
|
||||
//
|
||||
// The exchange is synchronous: the server marshals MsgTreeList with a
|
||||
// fresh Envelope.ID, sends to the agent, blocks on a channel keyed by
|
||||
// that ID. The agent runs `restic ls --json <SnapshotID> <Path>`,
|
||||
// emits direct children, and replies with MsgTreeListResult carrying
|
||||
// the same ID. The server-side handler matches on ID and forwards to
|
||||
// the waiting channel. See internal/server/ws/rpc.go for the helper.
|
||||
type TreeListRequestPayload struct {
|
||||
SnapshotID string `json:"snapshot_id"`
|
||||
Path string `json:"path"` // absolute path inside the snapshot, "/" for root
|
||||
}
|
||||
|
||||
// TreeListEntry is one direct child returned by a tree.list call.
|
||||
// Type is "dir" | "file" | "symlink"; size is best-effort (zero on
|
||||
// directories and symlinks).
|
||||
type TreeListEntry struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Size int64 `json:"size,omitempty"`
|
||||
}
|
||||
|
||||
// TreeListResultPayload is the reply to a tree.list. Error is set
|
||||
// when the agent couldn't fulfil the request (missing snapshot,
|
||||
// path doesn't exist, restic invocation failed); Entries is empty in
|
||||
// that case. A successful empty directory has Error="" + nil Entries.
|
||||
type TreeListResultPayload struct {
|
||||
SnapshotID string `json:"snapshot_id"`
|
||||
Path string `json:"path"`
|
||||
Entries []TreeListEntry `json:"entries,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
+15
-13
@@ -12,18 +12,19 @@ type MessageType string
|
||||
|
||||
// Agent → server message types.
|
||||
const (
|
||||
MsgHello MessageType = "hello"
|
||||
MsgHeartbeat MessageType = "heartbeat"
|
||||
MsgJobStarted MessageType = "job.started"
|
||||
MsgJobProgress MessageType = "job.progress"
|
||||
MsgJobFinished MessageType = "job.finished"
|
||||
MsgSnapshotsRpt MessageType = "snapshots.report"
|
||||
MsgRepoStats MessageType = "repo.stats"
|
||||
MsgLogStream MessageType = "log.stream"
|
||||
MsgScheduleAck MessageType = "schedule.ack"
|
||||
MsgScheduleFire MessageType = "schedule.fire" // agent: a local cron entry fired, please dispatch a job
|
||||
MsgCommandResult MessageType = "command.result" // ack for command.run
|
||||
MsgError MessageType = "error"
|
||||
MsgHello MessageType = "hello"
|
||||
MsgHeartbeat MessageType = "heartbeat"
|
||||
MsgJobStarted MessageType = "job.started"
|
||||
MsgJobProgress MessageType = "job.progress"
|
||||
MsgJobFinished MessageType = "job.finished"
|
||||
MsgSnapshotsRpt MessageType = "snapshots.report"
|
||||
MsgRepoStats MessageType = "repo.stats"
|
||||
MsgLogStream MessageType = "log.stream"
|
||||
MsgScheduleAck MessageType = "schedule.ack"
|
||||
MsgScheduleFire MessageType = "schedule.fire" // agent: a local cron entry fired, please dispatch a job
|
||||
MsgCommandResult MessageType = "command.result" // ack for command.run
|
||||
MsgTreeListResult MessageType = "tree.list.result" // reply to a server-driven tree.list
|
||||
MsgError MessageType = "error"
|
||||
)
|
||||
|
||||
// Server → agent message types.
|
||||
@@ -33,6 +34,7 @@ const (
|
||||
MsgScheduleSet MessageType = "schedule.set"
|
||||
MsgConfigUpdate MessageType = "config.update"
|
||||
MsgAgentUpdateAvail MessageType = "agent.update.available"
|
||||
MsgTreeList MessageType = "tree.list" // sync RPC: list a snapshot's children
|
||||
)
|
||||
|
||||
// Envelope is the framing for every WS message in either direction.
|
||||
@@ -76,7 +78,7 @@ type ErrorCode string
|
||||
const (
|
||||
ErrProtocolTooOld ErrorCode = "protocol_too_old"
|
||||
ErrProtocolTooNew ErrorCode = "protocol_too_new"
|
||||
ErrUnauthorized ErrorCode = "unauthorized"
|
||||
ErrUnauthorized ErrorCode = "unauthorised"
|
||||
ErrBadRequest ErrorCode = "bad_request"
|
||||
ErrInternal ErrorCode = "internal"
|
||||
)
|
||||
|
||||
@@ -56,7 +56,7 @@ func VerifyPassword(encoded, password string) error {
|
||||
parts := strings.Split(encoded, "$")
|
||||
// "$argon2id$v=...$m=...,t=...,p=...$<salt>$<hash>" → 6 parts (leading empty)
|
||||
if len(parts) != 6 || parts[1] != "argon2id" {
|
||||
return errors.New("auth: unrecognized hash format")
|
||||
return errors.New("auth: unrecognised hash format")
|
||||
}
|
||||
var version int
|
||||
if _, err := fmt.Sscanf(parts[2], "v=%d", &version); err != nil {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// passwords, REST-server credentials, hook bodies, and any other
|
||||
// secret that lands in the SQLite store.
|
||||
//
|
||||
// The threat model is "defense in depth against a stolen DB file" —
|
||||
// The threat model is "defence in depth against a stolen DB file" —
|
||||
// not "an attacker with code execution can't read secrets at runtime."
|
||||
// We need the encryption key at runtime to do any actual work, so
|
||||
// anyone with a memory dump of the running server can extract it.
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
//go:build !windows
|
||||
|
||||
package restic
|
||||
|
||||
import "syscall"
|
||||
|
||||
var sigterm = syscall.SIGTERM
|
||||
@@ -0,0 +1,12 @@
|
||||
//go:build windows
|
||||
|
||||
package restic
|
||||
|
||||
import "os"
|
||||
|
||||
// Windows has no SIGTERM. The closest equivalent is os.Interrupt
|
||||
// (CTRL_BREAK_EVENT), but Go's exec.Cmd.Process.Signal() on Windows
|
||||
// only supports os.Kill — sending anything else returns an error and
|
||||
// no signal is delivered. Fall back to os.Kill so Cancel still works
|
||||
// (immediate force-kill); WaitDelay is unused but harmless.
|
||||
var sigterm = os.Kill
|
||||
@@ -0,0 +1,140 @@
|
||||
package restic
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os/exec"
|
||||
"path"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// LsEntry is one node from `restic ls --json`. Restic emits these as
|
||||
// line-delimited JSON; we keep only the fields the restore wizard
|
||||
// needs.
|
||||
type LsEntry struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Path string `json:"path"`
|
||||
Size int64 `json:"size,omitempty"`
|
||||
Struct string `json:"struct_type,omitempty"`
|
||||
}
|
||||
|
||||
// ListTreeChildren runs `restic ls --json <snapshot> <dirPath>` and
|
||||
// returns only the direct children of dirPath. Restic ls is recursive
|
||||
// by default, so we filter post-hoc — for a typical interactive
|
||||
// drill-down ("expand /etc/nginx") the subtree is small (a few KB of
|
||||
// JSON); for huge subtrees this is suboptimal but correct.
|
||||
//
|
||||
// The first emitted line is restic's "snapshot" preamble (struct_type
|
||||
// = "snapshot") which we discard. Subsequent lines are nodes; we
|
||||
// match on path equal to dirPath + "/" + name (with normalisation so
|
||||
// trailing slashes don't break the comparison).
|
||||
//
|
||||
// dirPath="" or "/" lists the snapshot root.
|
||||
func (e Env) ListTreeChildren(ctx context.Context, snapshotID, dirPath string) ([]LsEntry, error) {
|
||||
if snapshotID == "" {
|
||||
return nil, fmt.Errorf("restic ls: snapshot id required")
|
||||
}
|
||||
parent := normalizeTreePath(dirPath)
|
||||
|
||||
args := []string{"ls", "--json", snapshotID}
|
||||
if parent != "/" {
|
||||
args = append(args, parent)
|
||||
}
|
||||
cmd := e.resticCmd(ctx, args...)
|
||||
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("restic ls: stdout pipe: %w", err)
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("restic ls: start: %w", err)
|
||||
}
|
||||
|
||||
out, parseErr := parseLsChildren(stdout, parent)
|
||||
|
||||
werr := cmd.Wait()
|
||||
if werr != nil {
|
||||
var ee *exec.ExitError
|
||||
if errors.As(werr, &ee) {
|
||||
return nil, fmt.Errorf("restic ls: exit %d: %s",
|
||||
ee.ExitCode(), strings.TrimSpace(stderr.String()))
|
||||
}
|
||||
return nil, fmt.Errorf("restic ls: %w", werr)
|
||||
}
|
||||
if parseErr != nil {
|
||||
return nil, parseErr
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// parseLsChildren reads line-delimited JSON from r and returns nodes
|
||||
// whose Path is a direct child of parent. Exposed for testing.
|
||||
func parseLsChildren(r io.Reader, parent string) ([]LsEntry, error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||
var out []LsEntry
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
var entry LsEntry
|
||||
if err := json.Unmarshal(line, &entry); err != nil {
|
||||
return nil, fmt.Errorf("restic ls: parse line: %w", err)
|
||||
}
|
||||
// Skip the snapshot preamble and any future struct_type
|
||||
// entries we don't care about.
|
||||
if entry.Struct == "snapshot" || entry.Path == "" {
|
||||
continue
|
||||
}
|
||||
if isDirectChild(entry.Path, parent) {
|
||||
out = append(out, entry)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("restic ls: read output: %w", err)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// normalizeTreePath turns "" / "/" / "/etc/" / "etc" all into a
|
||||
// canonical absolute form with a leading slash and no trailing slash
|
||||
// (except the root, which is "/" alone).
|
||||
func normalizeTreePath(p string) string {
|
||||
p = strings.TrimSpace(p)
|
||||
if p == "" || p == "/" {
|
||||
return "/"
|
||||
}
|
||||
if !strings.HasPrefix(p, "/") {
|
||||
p = "/" + p
|
||||
}
|
||||
cleaned := path.Clean(p)
|
||||
return cleaned
|
||||
}
|
||||
|
||||
// isDirectChild reports whether childPath is a direct child of parent.
|
||||
// "/etc/nginx" is a direct child of "/etc"; "/etc/nginx/conf" is not.
|
||||
// "/etc" is a direct child of "/".
|
||||
func isDirectChild(childPath, parent string) bool {
|
||||
cp := normalizeTreePath(childPath)
|
||||
pp := normalizeTreePath(parent)
|
||||
if pp == "/" {
|
||||
// Direct children of root: exactly one slash-delimited segment.
|
||||
return cp != "/" && strings.Count(cp, "/") == 1
|
||||
}
|
||||
// Must start with parent + "/" and have no further slashes.
|
||||
prefix := pp + "/"
|
||||
if !strings.HasPrefix(cp, prefix) {
|
||||
return false
|
||||
}
|
||||
rest := cp[len(prefix):]
|
||||
return rest != "" && !strings.Contains(rest, "/")
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
package restic
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// realistic restic ls --json output sample. First line is the
|
||||
// snapshot preamble, subsequent lines are nodes. Trimmed to a few
|
||||
// entries that exercise depth filtering.
|
||||
const sampleLsOutput = `{"struct_type":"snapshot","time":"2026-05-04T09:14:00Z","id":"f3a7b2c1"}
|
||||
{"name":"etc","type":"dir","path":"/etc","permissions":"drwxr-xr-x","struct_type":"node"}
|
||||
{"name":"nginx","type":"dir","path":"/etc/nginx","permissions":"drwxr-xr-x","struct_type":"node"}
|
||||
{"name":"nginx.conf","type":"file","path":"/etc/nginx/nginx.conf","size":2400,"struct_type":"node"}
|
||||
{"name":"sites-available","type":"dir","path":"/etc/nginx/sites-available","struct_type":"node"}
|
||||
{"name":"alfa.conf","type":"file","path":"/etc/nginx/sites-available/alfa.conf","size":3100,"struct_type":"node"}
|
||||
{"name":"default.conf","type":"file","path":"/etc/nginx/sites-available/default.conf","size":2900,"struct_type":"node"}
|
||||
`
|
||||
|
||||
func TestParseLsChildrenAtRoot(t *testing.T) {
|
||||
t.Parallel()
|
||||
entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/")
|
||||
if err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("entries: got %d (%+v), want 1", len(entries), entries)
|
||||
}
|
||||
if entries[0].Name != "etc" || entries[0].Path != "/etc" || entries[0].Type != "dir" {
|
||||
t.Fatalf("entry: %+v", entries[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseLsChildrenAtEtc(t *testing.T) {
|
||||
t.Parallel()
|
||||
entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/etc")
|
||||
if err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("entries: got %d, want 1 (just nginx, not nested children)", len(entries))
|
||||
}
|
||||
if entries[0].Name != "nginx" {
|
||||
t.Fatalf("entry: %+v", entries[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseLsChildrenAtNginx(t *testing.T) {
|
||||
t.Parallel()
|
||||
entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/etc/nginx")
|
||||
if err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
if len(entries) != 2 {
|
||||
t.Fatalf("entries: got %d (%+v), want 2 (nginx.conf + sites-available, not nested)",
|
||||
len(entries), entries)
|
||||
}
|
||||
gotNames := []string{entries[0].Name, entries[1].Name}
|
||||
want := map[string]bool{"nginx.conf": true, "sites-available": true}
|
||||
for _, n := range gotNames {
|
||||
if !want[n] {
|
||||
t.Errorf("unexpected name %q in result", n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseLsChildrenAtSitesAvailable(t *testing.T) {
|
||||
t.Parallel()
|
||||
entries, err := parseLsChildren(strings.NewReader(sampleLsOutput), "/etc/nginx/sites-available")
|
||||
if err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
if len(entries) != 2 {
|
||||
t.Fatalf("entries: got %d, want 2", len(entries))
|
||||
}
|
||||
for _, e := range entries {
|
||||
if e.Type != "file" {
|
||||
t.Errorf("expected file type, got %q on %q", e.Type, e.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeTreePath(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct{ in, want string }{
|
||||
{"", "/"},
|
||||
{"/", "/"},
|
||||
{"/etc", "/etc"},
|
||||
{"/etc/", "/etc"},
|
||||
{"etc/nginx", "/etc/nginx"},
|
||||
{"/etc//nginx", "/etc/nginx"},
|
||||
{"/etc/./nginx", "/etc/nginx"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := normalizeTreePath(c.in)
|
||||
if got != c.want {
|
||||
t.Errorf("normalizeTreePath(%q): got %q, want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsDirectChild(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
child, parent string
|
||||
want bool
|
||||
}{
|
||||
{"/etc", "/", true},
|
||||
{"/etc/nginx", "/", false},
|
||||
{"/etc/nginx", "/etc", true},
|
||||
{"/etc/nginx/conf", "/etc", false},
|
||||
{"/etc/nginx/conf", "/etc/nginx", true},
|
||||
{"/etc", "/etc", false},
|
||||
{"/etcc", "/etc", false}, // prefix match guard
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := isDirectChild(c.child, c.parent)
|
||||
if got != c.want {
|
||||
t.Errorf("isDirectChild(%q, %q): got %v, want %v",
|
||||
c.child, c.parent, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,271 @@
|
||||
package restic
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RestoreStatus mirrors the JSON `status` lines `restic restore --json`
|
||||
// emits while restoring. Field names track restic's wire format; we
|
||||
// project a subset (the rest are cosmetic).
|
||||
type RestoreStatus struct {
|
||||
MessageType string `json:"message_type"`
|
||||
SecondsElapsed int64 `json:"seconds_elapsed"`
|
||||
PercentDone float64 `json:"percent_done"`
|
||||
TotalFiles int64 `json:"total_files"`
|
||||
FilesRestored int64 `json:"files_restored"`
|
||||
FilesSkipped int64 `json:"files_skipped"`
|
||||
TotalBytes int64 `json:"total_bytes"`
|
||||
BytesRestored int64 `json:"bytes_restored"`
|
||||
BytesSkipped int64 `json:"bytes_skipped"`
|
||||
}
|
||||
|
||||
// RestoreSummary is the final summary line emitted after a successful
|
||||
// restore. Newer restic prints it; older clients leave us with no
|
||||
// summary, in which case the agent skips the stats and the live UI
|
||||
// just sees percent reach 100%.
|
||||
type RestoreSummary struct {
|
||||
MessageType string `json:"message_type"`
|
||||
SecondsElapsed int64 `json:"seconds_elapsed"`
|
||||
TotalFiles int64 `json:"total_files"`
|
||||
FilesRestored int64 `json:"files_restored"`
|
||||
FilesSkipped int64 `json:"files_skipped"`
|
||||
TotalBytes int64 `json:"total_bytes"`
|
||||
BytesRestored int64 `json:"bytes_restored"`
|
||||
BytesSkipped int64 `json:"bytes_skipped"`
|
||||
}
|
||||
|
||||
// RunRestore executes `restic restore <snapshotID> --target <dir>
|
||||
// [--include <p>...]` with --json and pumps progress events into
|
||||
// handle. paths is the operator-selected list (each becomes an
|
||||
// `--include` flag); preserveOwner controls --no-ownership.
|
||||
//
|
||||
// inPlace toggles target semantics:
|
||||
// - true → target is "/" and ownership is preserved
|
||||
// - false → target is targetDir and --no-ownership is passed
|
||||
//
|
||||
// targetDir is created on demand by restic itself.
|
||||
func (e Env) RunRestore(ctx context.Context, snapshotID string, paths []string, inPlace bool, targetDir string, handle LineHandler) (*RestoreSummary, error) {
|
||||
if snapshotID == "" {
|
||||
return nil, fmt.Errorf("restic restore: snapshot id required")
|
||||
}
|
||||
if !inPlace && targetDir == "" {
|
||||
return nil, fmt.Errorf("restic restore: target dir required for non-in-place restore")
|
||||
}
|
||||
|
||||
args := []string{"restore", "--json", snapshotID}
|
||||
target := targetDir
|
||||
if inPlace {
|
||||
target = "/"
|
||||
} else {
|
||||
// Expand $HOME / ${HOME} / leading ~/ in the operator-supplied
|
||||
// path, using the agent's own HOME (typically /root for the
|
||||
// User=root unit). The expansion runs agent-side so the
|
||||
// operator can specify a portable default like
|
||||
// $HOME/rm-restore/<job-id>/ in the wizard without the server
|
||||
// needing to know which user the agent runs as.
|
||||
target = expandHome(target)
|
||||
// Ensure the target directory exists. Restic itself creates
|
||||
// missing leaves but won't traverse multiple missing levels
|
||||
// (and we don't want the operator to have to pre-create the
|
||||
// per-job subdir). 0700 keeps the data root-only — the agent
|
||||
// runs as root, and operators who want a different mode can
|
||||
// chmod after the fact. If MkdirAll fails (operator typed a
|
||||
// path inside a read-only sandbox mount, ENOSPC, etc.) we
|
||||
// surface a clean error rather than letting restic fail with
|
||||
// something cryptic.
|
||||
if err := os.MkdirAll(target, 0o700); err != nil {
|
||||
return nil, fmt.Errorf("restic restore: prepare target %q: %w", target, err)
|
||||
}
|
||||
}
|
||||
args = append(args, "--target", target)
|
||||
// --no-ownership was added in restic 0.17. Older versions reject
|
||||
// the flag with "unknown flag: --no-ownership". For new-dir
|
||||
// restores we want the files owned by the agent user (operator
|
||||
// can cp them without juggling chown), so pass the flag iff the
|
||||
// running restic supports it. In-place restores always preserve
|
||||
// ownership — that's the whole point of in-place.
|
||||
if !inPlace && e.AtLeastVersion(0, 17) {
|
||||
args = append(args, "--no-ownership")
|
||||
}
|
||||
for _, p := range paths {
|
||||
args = append(args, "--include", p)
|
||||
}
|
||||
|
||||
cmd := e.resticCmd(ctx, args...)
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("restic restore: stdout pipe: %w", err)
|
||||
}
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("restic restore: stderr pipe: %w", err)
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("restic restore: start: %w", err)
|
||||
}
|
||||
|
||||
var summary *RestoreSummary
|
||||
done := make(chan error, 2)
|
||||
go func() { done <- pumpRestoreStdout(stdout, handle, &summary) }()
|
||||
go func() { done <- pumpStderr(stderr, handle) }()
|
||||
for i := 0; i < 2; i++ {
|
||||
if err := <-done; err != nil && handle != nil {
|
||||
handle("event", fmt.Sprintf("pump error: %v", err), nil)
|
||||
}
|
||||
}
|
||||
werr := cmd.Wait()
|
||||
if werr != nil {
|
||||
var ee *exec.ExitError
|
||||
if errors.As(werr, &ee) {
|
||||
return summary, fmt.Errorf("restic restore: exit %d", ee.ExitCode())
|
||||
}
|
||||
return summary, fmt.Errorf("restic restore: %w", werr)
|
||||
}
|
||||
return summary, nil
|
||||
}
|
||||
|
||||
// pumpRestoreStdout is the restore variant of pumpStdout: it emits
|
||||
// `event` lines for the parsed status/summary objects (so the runner
|
||||
// can shape them into job.progress) and forwards everything else as
|
||||
// stdout — but unlike backup we include the raw status JSON in
|
||||
// log.stream too because restore is short and the live log audience
|
||||
// genuinely benefits from the per-file traffic. Actually — we mirror
|
||||
// backup's behaviour and DROP raw status lines from log.stream
|
||||
// (they'd drown the log on a fast restore); the progress envelope
|
||||
// covers them.
|
||||
func pumpRestoreStdout(r io.Reader, handle LineHandler, summary **RestoreSummary) error {
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 4*1024*1024)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if handle == nil {
|
||||
continue
|
||||
}
|
||||
if !strings.HasPrefix(line, "{") {
|
||||
handle("stdout", line, nil)
|
||||
continue
|
||||
}
|
||||
var probe struct {
|
||||
MessageType string `json:"message_type"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(line), &probe); err != nil {
|
||||
handle("stdout", line, nil)
|
||||
continue
|
||||
}
|
||||
switch probe.MessageType {
|
||||
case "status":
|
||||
var ev RestoreStatus
|
||||
if json.Unmarshal([]byte(line), &ev) == nil {
|
||||
// Don't tee status lines to log.stream — too chatty.
|
||||
handle("event", line, ev)
|
||||
continue
|
||||
}
|
||||
case "summary":
|
||||
var ev RestoreSummary
|
||||
if json.Unmarshal([]byte(line), &ev) == nil {
|
||||
if summary != nil {
|
||||
s := ev
|
||||
*summary = &s
|
||||
}
|
||||
handle("event", line, ev)
|
||||
continue
|
||||
}
|
||||
case "verbose_status":
|
||||
handle("event", line, nil)
|
||||
continue
|
||||
}
|
||||
handle("stdout", line, nil)
|
||||
}
|
||||
return scanner.Err()
|
||||
}
|
||||
|
||||
// expandHome rewrites $HOME, ${HOME}, or a leading ~/ in p to the
|
||||
// agent process's home directory. Other env-var references are left
|
||||
// untouched on purpose (operator-supplied paths shouldn't be able to
|
||||
// pick up arbitrary agent env values like $PATH or $RESTIC_PASSWORD).
|
||||
// Returns p unchanged if HOME can't be resolved.
|
||||
func expandHome(p string) string {
|
||||
if p == "" {
|
||||
return p
|
||||
}
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil || home == "" {
|
||||
return p
|
||||
}
|
||||
switch {
|
||||
case strings.HasPrefix(p, "$HOME/"):
|
||||
return filepath.Join(home, p[len("$HOME/"):])
|
||||
case p == "$HOME":
|
||||
return home
|
||||
case strings.HasPrefix(p, "${HOME}/"):
|
||||
return filepath.Join(home, p[len("${HOME}/"):])
|
||||
case p == "${HOME}":
|
||||
return home
|
||||
case strings.HasPrefix(p, "~/"):
|
||||
return filepath.Join(home, p[2:])
|
||||
case p == "~":
|
||||
return home
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// RunDiff executes `restic diff --json <a> <b>` and forwards every
|
||||
// line to handle as stdout. Restic emits per-line "change" objects
|
||||
// plus a final "statistics" object; we don't parse them server-side —
|
||||
// the operator reads the raw output on the live job log page.
|
||||
func (e Env) RunDiff(ctx context.Context, snapshotA, snapshotB string, handle LineHandler) error {
|
||||
if snapshotA == "" || snapshotB == "" {
|
||||
return fmt.Errorf("restic diff: two snapshot ids required")
|
||||
}
|
||||
cmd := e.resticCmd(ctx, "diff", "--json", snapshotA, snapshotB)
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("restic diff: stdout pipe: %w", err)
|
||||
}
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("restic diff: stderr pipe: %w", err)
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return fmt.Errorf("restic diff: start: %w", err)
|
||||
}
|
||||
done := make(chan error, 2)
|
||||
// diff output isn't huge; pumpStderr-ish line-by-line forwarding
|
||||
// is fine.
|
||||
go func() {
|
||||
s := bufio.NewScanner(stdout)
|
||||
s.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||
for s.Scan() {
|
||||
if handle != nil {
|
||||
handle("stdout", s.Text(), nil)
|
||||
}
|
||||
}
|
||||
done <- s.Err()
|
||||
}()
|
||||
go func() { done <- pumpStderr(stderr, handle) }()
|
||||
for i := 0; i < 2; i++ {
|
||||
if err := <-done; err != nil && handle != nil {
|
||||
handle("event", fmt.Sprintf("pump error: %v", err), nil)
|
||||
}
|
||||
}
|
||||
werr := cmd.Wait()
|
||||
if werr != nil {
|
||||
var ee *exec.ExitError
|
||||
if errors.As(werr, &ee) {
|
||||
return fmt.Errorf("restic diff: exit %d", ee.ExitCode())
|
||||
}
|
||||
return fmt.Errorf("restic diff: %w", werr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// Locate resolves the path to the restic binary. Honor an explicit
|
||||
// Locate resolves the path to the restic binary. Honour an explicit
|
||||
// override if provided, else fall back to PATH.
|
||||
func Locate(override string) (string, error) {
|
||||
if override != "" {
|
||||
@@ -42,6 +42,7 @@ func Locate(override string) (string, error) {
|
||||
// in this package ever needs to *log* a URL, use RedactURL.
|
||||
type Env struct {
|
||||
Bin string // path to restic binary
|
||||
Version string // e.g. "0.17.1"; empty if unknown
|
||||
RepoURL string // RESTIC_REPOSITORY (no embedded creds)
|
||||
RepoUsername string // optional HTTP basic-auth user for rest: URLs
|
||||
RepoPassword string // doubles as RESTIC_PASSWORD and (for rest:) HTTP basic-auth password
|
||||
@@ -55,6 +56,45 @@ type Env struct {
|
||||
LimitDownloadKBps int
|
||||
}
|
||||
|
||||
// AtLeastVersion reports whether e.Version >= the given major/minor.
|
||||
// Comparison is best-effort: empty / unparseable versions return false
|
||||
// (callers stay on the conservative path). Patch level is ignored.
|
||||
func (e Env) AtLeastVersion(major, minor int) bool {
|
||||
v := strings.TrimSpace(e.Version)
|
||||
if v == "" {
|
||||
return false
|
||||
}
|
||||
parts := strings.SplitN(v, ".", 3)
|
||||
if len(parts) < 2 {
|
||||
return false
|
||||
}
|
||||
maj, err1 := atoi(parts[0])
|
||||
min, err2 := atoi(parts[1])
|
||||
if err1 != nil || err2 != nil {
|
||||
return false
|
||||
}
|
||||
if maj != major {
|
||||
return maj > major
|
||||
}
|
||||
return min >= minor
|
||||
}
|
||||
|
||||
// atoi is strconv.Atoi without dragging the import into a file that
|
||||
// only needs it for one helper.
|
||||
func atoi(s string) (int, error) {
|
||||
n := 0
|
||||
if len(s) == 0 {
|
||||
return 0, fmt.Errorf("empty")
|
||||
}
|
||||
for _, r := range s {
|
||||
if r < '0' || r > '9' {
|
||||
return 0, fmt.Errorf("not a digit: %q", r)
|
||||
}
|
||||
n = n*10 + int(r-'0')
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// globalArgs returns restic's pre-subcommand global flags derived
|
||||
// from the Env. Currently just bandwidth caps.
|
||||
func (e Env) globalArgs() []string {
|
||||
@@ -69,14 +109,33 @@ func (e Env) globalArgs() []string {
|
||||
}
|
||||
|
||||
// resticCmd builds an exec.Cmd with bandwidth-limit globals prefixed
|
||||
// before the supplied subcommand args. Centralizing this so every
|
||||
// command (backup/forget/prune/check/unlock/init/stats) honors
|
||||
// before the supplied subcommand args. Centralising this so every
|
||||
// command (backup/forget/prune/check/unlock/init/stats) honours
|
||||
// the caps without each call site having to remember.
|
||||
//
|
||||
// Cancellation: by default exec.CommandContext sends SIGKILL when
|
||||
// ctx is canceled, which leaves restic no chance to clean up its
|
||||
// repository lock. Override Cmd.Cancel to send SIGTERM first, and
|
||||
// set Cmd.WaitDelay so the process is force-killed if it doesn't
|
||||
// exit within five seconds. Restic responds to SIGTERM by removing
|
||||
// its lock file before exiting, which is what we want when an
|
||||
// operator cancels a long-running backup/restore from the UI.
|
||||
func (e Env) resticCmd(ctx context.Context, sub ...string) *exec.Cmd {
|
||||
args := append(e.globalArgs(), sub...)
|
||||
cmd := exec.CommandContext(ctx, e.Bin, args...)
|
||||
cmd.Env = e.envSlice()
|
||||
cmd.Dir = e.WorkDir
|
||||
cmd.Cancel = func() error {
|
||||
// Cmd.Process is set after Start; Cancel only fires post-Start
|
||||
// so the nil check is defensive against the documented but
|
||||
// unlikely race. Signal returns ErrProcessDone if the process
|
||||
// already exited; that's not a problem here either.
|
||||
if cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
return cmd.Process.Signal(sigterm)
|
||||
}
|
||||
cmd.WaitDelay = 5 * time.Second
|
||||
return cmd
|
||||
}
|
||||
|
||||
@@ -123,7 +182,7 @@ type BackupSummary struct {
|
||||
}
|
||||
|
||||
// LineHandler receives every stdout/stderr line. event is non-nil
|
||||
// when the line is a recognized JSON status; raw always carries the
|
||||
// when the line is a recognised JSON status; raw always carries the
|
||||
// original text (so we can also tee to job_logs as `stdout`).
|
||||
type LineHandler func(stream string, raw string, event any)
|
||||
|
||||
@@ -263,7 +322,7 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
|
||||
|
||||
// Sniff for "config file already exists" on stderr; if we see it
|
||||
// we'll treat the non-zero exit as a soft success — running init
|
||||
// against an already-initialized repo is a no-op semantically,
|
||||
// against an already-initialised repo is a no-op semantically,
|
||||
// not a failure. Wraps the caller's handle so the line still
|
||||
// gets streamed verbatim to the operator-facing log.
|
||||
alreadyInited := false
|
||||
@@ -279,7 +338,7 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
|
||||
if err := runWithPump(cmd, sniff); err != nil {
|
||||
if alreadyInited {
|
||||
if handle != nil {
|
||||
handle("event", "repo already initialized — treating as success", nil)
|
||||
handle("event", "repo already initialised — treating as success", nil)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -375,7 +434,7 @@ func (e Env) RunStats(ctx context.Context, handle LineHandler) (*RepoStats, erro
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// CheckResult summarizes a `restic check` invocation. LockPresent is
|
||||
// CheckResult summarises a `restic check` invocation. LockPresent is
|
||||
// true if the stderr stream contained a stale-lock signal (caller is
|
||||
// expected to surface this in the UI so the operator can run unlock).
|
||||
// ErrorsFound is true if check exited with a non-zero status (errors
|
||||
@@ -387,7 +446,7 @@ type CheckResult struct {
|
||||
|
||||
// RunCheck executes `restic check` with optional --read-data-subset.
|
||||
// subsetPct of 0 omits the flag (full data check); >0 passes
|
||||
// --read-data-subset N%. Returns a CheckResult summarizing what was
|
||||
// --read-data-subset N%. Returns a CheckResult summarising what was
|
||||
// sniffed from stderr; the result is set even if check itself
|
||||
// returns an error (so the caller can persist last_check_status).
|
||||
func (e Env) RunCheck(ctx context.Context, subsetPct int, handle LineHandler) (CheckResult, error) {
|
||||
|
||||
@@ -13,9 +13,11 @@ import (
|
||||
// decode only the fields we project to the server; restic's full
|
||||
// shape has more (parent, tree, program version) that we don't need.
|
||||
//
|
||||
// Summary is only populated by restic 0.16+ (which embeds the backup
|
||||
// summary inside each snapshot). Older clients leave it nil and the
|
||||
// agent reports zero size/file-count — the UI degrades to "—".
|
||||
// Summary is only populated by restic 0.17+ (which embeds the backup
|
||||
// summary inside each snapshot record). Older clients leave it nil
|
||||
// and the agent reports zero size/file-count — the UI degrades to
|
||||
// "—" and the column headers carry a tooltip explaining the version
|
||||
// requirement (see web/templates/pages/host_detail.html).
|
||||
type Snapshot struct {
|
||||
ID string `json:"id"`
|
||||
ShortID string `json:"short_id"`
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
package restic
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestEnvAtLeastVersion(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
ver string
|
||||
major int
|
||||
minor int
|
||||
want bool
|
||||
shortDesc string
|
||||
}{
|
||||
{"0.17.0", 0, 17, true, "exact match"},
|
||||
{"0.17.1", 0, 17, true, "patch above"},
|
||||
{"0.18.0", 0, 17, true, "minor above"},
|
||||
{"1.0.0", 0, 17, true, "major above"},
|
||||
{"0.16.4", 0, 17, false, "minor below"},
|
||||
{"0.16", 0, 17, false, "two-part minor below"},
|
||||
{"", 0, 17, false, "empty"},
|
||||
{"v0.17", 0, 17, false, "prefixed v rejected"},
|
||||
{"unknown", 0, 17, false, "non-numeric rejected"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := Env{Version: c.ver}.AtLeastVersion(c.major, c.minor)
|
||||
if got != c.want {
|
||||
t.Errorf("AtLeastVersion(%q, %d, %d): got %v want %v · %s",
|
||||
c.ver, c.major, c.minor, got, c.want, c.shortDesc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpandHome(t *testing.T) {
|
||||
// Not parallel — t.Setenv on HOME would race with sibling tests.
|
||||
tmp := t.TempDir()
|
||||
t.Setenv("HOME", tmp)
|
||||
|
||||
cases := []struct {
|
||||
in, want string
|
||||
}{
|
||||
{"$HOME/rm-restore/job-1/", filepath.Join(tmp, "rm-restore/job-1")},
|
||||
{"${HOME}/rm-restore/job-2/", filepath.Join(tmp, "rm-restore/job-2")},
|
||||
{"~/rm-restore/job-3/", filepath.Join(tmp, "rm-restore/job-3")},
|
||||
{"$HOME", tmp},
|
||||
{"~", tmp},
|
||||
{"/var/lib/x/y", "/var/lib/x/y"}, // absolute path passes through
|
||||
{"", ""},
|
||||
{"$PATH/foo", "$PATH/foo"}, // other env vars not expanded
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := expandHome(c.in)
|
||||
if got != c.want {
|
||||
t.Errorf("expandHome(%q): got %q want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
|
||||
// Sanity: an absolute path always passes through regardless of HOME.
|
||||
if got := expandHome("/abs"); got != "/abs" {
|
||||
t.Errorf("expandHome(/abs): got %q", got)
|
||||
}
|
||||
}
|
||||
@@ -57,7 +57,7 @@ func (s *Server) handleAgentBinary(w stdhttp.ResponseWriter, r *stdhttp.Request)
|
||||
}
|
||||
|
||||
func (s *Server) handleInstallAsset(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
// chi's TrimPrefix-like behavior: r.URL.Path is "/install/<file>".
|
||||
// chi's TrimPrefix-like behaviour: r.URL.Path is "/install/<file>".
|
||||
rel := strings.TrimPrefix(r.URL.Path, "/install/")
|
||||
// Reject any path traversal — must be a flat filename.
|
||||
if rel == "" || strings.ContainsAny(rel, "/\\") {
|
||||
|
||||
@@ -133,7 +133,7 @@ func (s *Server) handleAnnounce(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
|
||||
keyBytes, err := base64.StdEncoding.DecodeString(req.PublicKey)
|
||||
if err != nil {
|
||||
// Try URL-safe / no-padding flavors before giving up.
|
||||
// Try URL-safe / no-padding flavours before giving up.
|
||||
if k2, e2 := base64.RawStdEncoding.DecodeString(req.PublicKey); e2 == nil {
|
||||
keyBytes = k2
|
||||
} else {
|
||||
@@ -195,7 +195,7 @@ func (s *Server) handleAnnounce(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
// remoteIP returns r.RemoteAddr stripped of any :port suffix, plus
|
||||
// the X-Forwarded-For chain's first hop when behind a trusted proxy
|
||||
// (RM_TRUSTED_PROXY in the deployment doc). Trust-proxy lookup
|
||||
// matches the framework's existing behavior elsewhere.
|
||||
// matches the framework's existing behaviour elsewhere.
|
||||
func remoteIP(r *stdhttp.Request) string {
|
||||
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
|
||||
// Take the first IP in the chain (closest to the original
|
||||
|
||||
@@ -137,7 +137,7 @@ func (s *Server) handleBootstrap(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
return
|
||||
}
|
||||
if n > 0 {
|
||||
writeJSONError(w, stdhttp.StatusConflict, "already_initialized",
|
||||
writeJSONError(w, stdhttp.StatusConflict, "already_initialised",
|
||||
"a user already exists; bootstrap is disabled")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
stdhttp "net/http"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// handleCancelJob is POST /api/jobs/{id}/cancel. Sends a command.cancel
|
||||
// envelope to the host that owns the job; the agent kills the running
|
||||
// restic subprocess, and the resulting job.finished envelope (status =
|
||||
// canceled) is what actually transitions the job row — this handler
|
||||
// does not touch the jobs table directly. Returning 202 makes that
|
||||
// asynchronicity explicit.
|
||||
//
|
||||
// 4xx cases:
|
||||
// - job not found (404)
|
||||
// - job already in a terminal state (409 — nothing to cancel)
|
||||
// - host offline (503 — same code path the run-now endpoint uses)
|
||||
//
|
||||
// Audit-logged as job.cancel with the job ID as target.
|
||||
func (s *Server) handleCancelJob(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
jobID := chi.URLParam(r, "id")
|
||||
if jobID == "" {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "missing_job_id", "")
|
||||
return
|
||||
}
|
||||
|
||||
job, err := s.deps.Store.GetJob(r.Context(), jobID)
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusNotFound, "job_not_found", "")
|
||||
return
|
||||
}
|
||||
switch api.JobStatus(job.Status) {
|
||||
case api.JobSucceeded, api.JobFailed, api.JobCancelled:
|
||||
writeJSONError(w, stdhttp.StatusConflict, "job_terminal",
|
||||
"job is already in a terminal state ("+job.Status+")")
|
||||
return
|
||||
}
|
||||
|
||||
if !s.deps.Hub.Connected(job.HostID) {
|
||||
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline",
|
||||
"agent is not connected; can't deliver cancel signal")
|
||||
return
|
||||
}
|
||||
|
||||
env, err := api.Marshal(api.MsgCommandCancel, jobID, api.CommandCancelPayload{
|
||||
JobID: jobID,
|
||||
})
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
||||
return
|
||||
}
|
||||
if err := s.deps.Hub.Send(r.Context(), job.HostID, env); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
var actorID *string
|
||||
actor := "system"
|
||||
if user != nil {
|
||||
actor = "user"
|
||||
actorID = &user.ID
|
||||
}
|
||||
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||
ID: ulid.Make().String(),
|
||||
UserID: actorID,
|
||||
Actor: actor,
|
||||
Action: "job.cancel",
|
||||
TargetKind: ptr("job"),
|
||||
TargetID: &jobID,
|
||||
TS: time.Now().UTC(),
|
||||
})
|
||||
|
||||
w.WriteHeader(stdhttp.StatusAccepted)
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
// cancel_test.go — covers POST /api/jobs/{id}/cancel.
|
||||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
stdhttp "net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/coder/websocket"
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// TestCancelJobRunningHappyPath: a running job's cancel endpoint sends
|
||||
// a command.cancel envelope with the right job id, returns 202, and
|
||||
// writes a job.cancel audit row.
|
||||
func TestCancelJobRunningHappyPath(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServer(t)
|
||||
hostID, token := enrolHostForWS(t, srv, st, "cancel-host")
|
||||
c := agentDial(t, srv, ts, hostID, token)
|
||||
sendHello(t, c, "cancel-host")
|
||||
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||
|
||||
// Seed a running job we can target.
|
||||
jobID := ulid.Make().String()
|
||||
now := time.Now().UTC()
|
||||
if err := st.CreateJob(context.Background(), store.Job{
|
||||
ID: jobID, HostID: hostID, Kind: "backup",
|
||||
ActorKind: "user", CreatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("create job: %v", err)
|
||||
}
|
||||
if err := st.MarkJobStarted(context.Background(), jobID, now); err != nil {
|
||||
t.Fatalf("mark started: %v", err)
|
||||
}
|
||||
|
||||
cookie := loginAsAdmin(t, st)
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/api/jobs/"+jobID+"/cancel", nil)
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusAccepted {
|
||||
t.Fatalf("status: got %d, want 202", res.StatusCode)
|
||||
}
|
||||
|
||||
// Read the dispatched command.cancel envelope.
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
var got api.Envelope
|
||||
for time.Now().Before(deadline) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
|
||||
mt, raw, rerr := c.Read(ctx)
|
||||
cancel()
|
||||
if rerr != nil {
|
||||
break
|
||||
}
|
||||
if mt != websocket.MessageText {
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(string(raw), `"command.cancel"`) {
|
||||
continue
|
||||
}
|
||||
if err := json.Unmarshal(raw, &got); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
break
|
||||
}
|
||||
if got.Type != api.MsgCommandCancel {
|
||||
t.Fatalf("never received command.cancel envelope")
|
||||
}
|
||||
var cp api.CommandCancelPayload
|
||||
if err := got.UnmarshalPayload(&cp); err != nil {
|
||||
t.Fatalf("unmarshal payload: %v", err)
|
||||
}
|
||||
if cp.JobID != jobID {
|
||||
t.Fatalf("payload job_id: got %q want %q", cp.JobID, jobID)
|
||||
}
|
||||
|
||||
// Audit row exists.
|
||||
var n int
|
||||
if err := st.DB().QueryRow(
|
||||
`SELECT COUNT(*) FROM audit_log WHERE action = 'job.cancel' AND target_id = ?`,
|
||||
jobID).Scan(&n); err != nil {
|
||||
t.Fatalf("audit count: %v", err)
|
||||
}
|
||||
if n != 1 {
|
||||
t.Fatalf("audit rows: got %d, want 1", n)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCancelJobAlreadyTerminal: a job in succeeded/failed/canceled
|
||||
// state returns 409 and does NOT send a WS envelope.
|
||||
func TestCancelJobAlreadyTerminal(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServer(t)
|
||||
hostID, token := enrolHostForWS(t, srv, st, "term-host")
|
||||
c := agentDial(t, srv, ts, hostID, token)
|
||||
sendHello(t, c, "term-host")
|
||||
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||
|
||||
jobID := ulid.Make().String()
|
||||
now := time.Now().UTC()
|
||||
if err := st.CreateJob(context.Background(), store.Job{
|
||||
ID: jobID, HostID: hostID, Kind: "backup",
|
||||
ActorKind: "user", CreatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("create job: %v", err)
|
||||
}
|
||||
if err := st.MarkJobFinished(context.Background(), jobID, "succeeded", 0, nil, "", now); err != nil {
|
||||
t.Fatalf("mark finished: %v", err)
|
||||
}
|
||||
|
||||
cookie := loginAsAdmin(t, st)
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/api/jobs/"+jobID+"/cancel", nil)
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusConflict {
|
||||
t.Fatalf("status: got %d, want 409", res.StatusCode)
|
||||
}
|
||||
|
||||
// Drain — no command.cancel should arrive.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond)
|
||||
defer cancel()
|
||||
for {
|
||||
mt, raw, rerr := c.Read(ctx)
|
||||
if rerr != nil {
|
||||
break
|
||||
}
|
||||
if mt == websocket.MessageText && strings.Contains(string(raw), `"command.cancel"`) {
|
||||
t.Fatalf("unexpected command.cancel envelope for terminal job")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestCancelJobNotFound: 404 for a job id that doesn't exist.
|
||||
func TestCancelJobNotFound(t *testing.T) {
|
||||
t.Parallel()
|
||||
_, ts, st := rawTestServer(t)
|
||||
cookie := loginAsAdmin(t, st)
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/api/jobs/"+ulid.Make().String()+"/cancel", nil)
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusNotFound {
|
||||
t.Fatalf("status: got %d, want 404", res.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
// TestCancelJobHostOffline: a queued/running job whose host has no
|
||||
// active WS connection returns 503.
|
||||
func TestCancelJobHostOffline(t *testing.T) {
|
||||
t.Parallel()
|
||||
_, ts, st := rawTestServer(t)
|
||||
// Create a host but don't connect a WS for it.
|
||||
hostID := ulid.Make().String()
|
||||
if err := st.CreateHost(context.Background(), store.Host{
|
||||
ID: hostID, Name: "offline-host", OS: "linux", Arch: "amd64",
|
||||
EnrolledAt: time.Now().UTC(),
|
||||
}, "deadbeef", ""); err != nil {
|
||||
t.Fatalf("create host: %v", err)
|
||||
}
|
||||
jobID := ulid.Make().String()
|
||||
now := time.Now().UTC()
|
||||
if err := st.CreateJob(context.Background(), store.Job{
|
||||
ID: jobID, HostID: hostID, Kind: "backup",
|
||||
ActorKind: "user", CreatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("create job: %v", err)
|
||||
}
|
||||
if err := st.MarkJobStarted(context.Background(), jobID, now); err != nil {
|
||||
t.Fatalf("mark started: %v", err)
|
||||
}
|
||||
|
||||
cookie := loginAsAdmin(t, st)
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/api/jobs/"+jobID+"/cancel", nil)
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusServiceUnavailable {
|
||||
t.Fatalf("status: got %d, want 503", res.StatusCode)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
stdhttp "net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// snapshotDiffRequest is the JSON body for POST .../snapshots/diff.
|
||||
// Either short or long snapshot IDs are accepted (restic's diff
|
||||
// command takes both).
|
||||
type snapshotDiffRequest struct {
|
||||
SnapshotA string `json:"snapshot_a"`
|
||||
SnapshotB string `json:"snapshot_b"`
|
||||
}
|
||||
|
||||
// handleSnapshotDiff dispatches a JobDiff. Output streams as
|
||||
// log.stream lines to the standard live job page; the operator reads
|
||||
// the diff text directly there. Behaves like the run-now endpoints:
|
||||
// 503 if the host is offline, 400 if the IDs are missing, 422 if
|
||||
// they're not in the host's snapshot list (we don't want operators
|
||||
// running diffs against arbitrary snapshot strings).
|
||||
func (s *Server) handleSnapshotDiff(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
host, err := s.deps.Store.GetHost(r.Context(), hostID)
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusNotFound, "host_not_found", "")
|
||||
return
|
||||
}
|
||||
|
||||
var req snapshotDiffRequest
|
||||
// HTMX form posts arrive as application/x-www-form-urlencoded;
|
||||
// the JSON shape is also accepted for REST callers.
|
||||
ct := r.Header.Get("Content-Type")
|
||||
if strings.HasPrefix(ct, "application/x-www-form-urlencoded") {
|
||||
if err := r.ParseForm(); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_form", err.Error())
|
||||
return
|
||||
}
|
||||
req.SnapshotA = strings.TrimSpace(r.PostForm.Get("snapshot_a"))
|
||||
req.SnapshotB = strings.TrimSpace(r.PostForm.Get("snapshot_b"))
|
||||
} else {
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
||||
return
|
||||
}
|
||||
req.SnapshotA = strings.TrimSpace(req.SnapshotA)
|
||||
req.SnapshotB = strings.TrimSpace(req.SnapshotB)
|
||||
}
|
||||
if req.SnapshotA == "" || req.SnapshotB == "" {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "missing_snapshot",
|
||||
"snapshot_a and snapshot_b are both required")
|
||||
return
|
||||
}
|
||||
if req.SnapshotA == req.SnapshotB {
|
||||
writeJSONError(w, stdhttp.StatusUnprocessableEntity, "same_snapshot",
|
||||
"diff requires two different snapshots")
|
||||
return
|
||||
}
|
||||
|
||||
// Validate the IDs are known to this host. Match on long ID, short
|
||||
// ID, or any prefix match — operators sometimes paste a 6-char
|
||||
// shortened form.
|
||||
snaps, err := s.deps.Store.ListSnapshotsByHost(r.Context(), host.ID)
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
||||
return
|
||||
}
|
||||
resolveID := func(idOrShort string) string {
|
||||
for _, s := range snaps {
|
||||
if s.ID == idOrShort || s.ShortID == idOrShort {
|
||||
return s.ID
|
||||
}
|
||||
}
|
||||
// Prefix fallback (operator pasted 6 chars of a long id).
|
||||
for _, s := range snaps {
|
||||
if strings.HasPrefix(s.ID, idOrShort) {
|
||||
return s.ID
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
a := resolveID(req.SnapshotA)
|
||||
b := resolveID(req.SnapshotB)
|
||||
if a == "" || b == "" {
|
||||
writeJSONError(w, stdhttp.StatusUnprocessableEntity, "snapshot_not_found",
|
||||
"one or both snapshot ids are not in this host's snapshot list")
|
||||
return
|
||||
}
|
||||
|
||||
if !s.deps.Hub.Connected(host.ID) {
|
||||
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline",
|
||||
"agent is not connected; try again when it reconnects")
|
||||
return
|
||||
}
|
||||
|
||||
jobID := ulid.Make().String()
|
||||
now := time.Now().UTC()
|
||||
if err := s.deps.Store.CreateJob(r.Context(), store.Job{
|
||||
ID: jobID, HostID: host.ID, Kind: string(api.JobDiff),
|
||||
ActorKind: "user", ActorID: &user.ID, CreatedAt: now,
|
||||
}); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
||||
return
|
||||
}
|
||||
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
|
||||
JobID: jobID, Kind: api.JobDiff,
|
||||
Diff: &api.DiffPayload{SnapshotA: a, SnapshotB: b},
|
||||
})
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
||||
return
|
||||
}
|
||||
if err := s.deps.Hub.Send(r.Context(), host.ID, env); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline", err.Error())
|
||||
return
|
||||
}
|
||||
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||
ID: ulid.Make().String(),
|
||||
UserID: &user.ID,
|
||||
Actor: "user",
|
||||
Action: "host.snapshot_diff",
|
||||
TargetKind: ptr("host"),
|
||||
TargetID: &host.ID,
|
||||
TS: now,
|
||||
})
|
||||
|
||||
jobURL := "/jobs/" + jobID
|
||||
if r.Header.Get("HX-Request") == "true" {
|
||||
w.Header().Set("HX-Redirect", jobURL)
|
||||
w.WriteHeader(stdhttp.StatusNoContent)
|
||||
return
|
||||
}
|
||||
writeJSON(w, stdhttp.StatusAccepted, map[string]string{
|
||||
"job_id": jobID,
|
||||
"job_url": jobURL,
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
// diff_test.go — covers POST /api/hosts/{id}/snapshots/diff (P3-09).
|
||||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
stdhttp "net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/coder/websocket"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
)
|
||||
|
||||
// TestSnapshotDiffHappyPath verifies a valid two-snapshot form ships
|
||||
// a JobDiff command.run with the right payload.
|
||||
func TestSnapshotDiffHappyPath(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, token := enrolHostForUI(t, srv, st, "diff-host")
|
||||
a, b := seedTwoSnapshots(t, st, hostID, "diff-host")
|
||||
c := agentDial(t, srv, ts, hostID, token)
|
||||
sendHello(t, c, "diff-host")
|
||||
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
form := url.Values{
|
||||
"snapshot_a": {a},
|
||||
"snapshot_b": {b},
|
||||
}
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/hosts/"+hostID+"/snapshots/diff",
|
||||
strings.NewReader(form.Encode()))
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.Header.Set("HX-Request", "true")
|
||||
req.AddCookie(cookie)
|
||||
client := &stdhttp.Client{
|
||||
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
|
||||
return stdhttp.ErrUseLastResponse
|
||||
},
|
||||
}
|
||||
res, err := client.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusNoContent {
|
||||
t.Fatalf("status: got %d, want 204", res.StatusCode)
|
||||
}
|
||||
if res.Header.Get("HX-Redirect") == "" {
|
||||
t.Fatal("expected HX-Redirect to live job page")
|
||||
}
|
||||
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
var got api.Envelope
|
||||
for time.Now().Before(deadline) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
|
||||
mt, raw, rerr := c.Read(ctx)
|
||||
cancel()
|
||||
if rerr != nil {
|
||||
break
|
||||
}
|
||||
if mt != websocket.MessageText {
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(string(raw), `"kind":"diff"`) {
|
||||
continue
|
||||
}
|
||||
_ = json.Unmarshal(raw, &got)
|
||||
break
|
||||
}
|
||||
if got.Type != api.MsgCommandRun {
|
||||
t.Fatal("never received diff command.run")
|
||||
}
|
||||
var cp api.CommandRunPayload
|
||||
_ = got.UnmarshalPayload(&cp)
|
||||
if cp.Diff == nil {
|
||||
t.Fatal("diff payload nil")
|
||||
}
|
||||
if cp.Diff.SnapshotA != a || cp.Diff.SnapshotB != b {
|
||||
t.Fatalf("diff payload: got %+v want a=%s b=%s", cp.Diff, a, b)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSnapshotDiffSameID rejects diff(a,a) with 422.
|
||||
func TestSnapshotDiffSameID(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, _ := enrolHostForUI(t, srv, st, "diff-same")
|
||||
a := seedSnapshot(t, st, hostID, "diff-same")
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
form := url.Values{"snapshot_a": {a}, "snapshot_b": {a}}
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/hosts/"+hostID+"/snapshots/diff",
|
||||
strings.NewReader(form.Encode()))
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
|
||||
t.Fatalf("status: got %d, want 422", res.StatusCode)
|
||||
}
|
||||
_ = srv
|
||||
}
|
||||
|
||||
// TestSnapshotDiffUnknownID rejects ids not in the host's snapshot list.
|
||||
func TestSnapshotDiffUnknownID(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, _ := enrolHostForUI(t, srv, st, "diff-unknown")
|
||||
_ = seedSnapshot(t, st, hostID, "diff-unknown")
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
form := url.Values{"snapshot_a": {"deadbeef"}, "snapshot_b": {"cafebabe"}}
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/hosts/"+hostID+"/snapshots/diff",
|
||||
strings.NewReader(form.Encode()))
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
|
||||
t.Fatalf("status: got %d, want 422", res.StatusCode)
|
||||
}
|
||||
_ = srv
|
||||
}
|
||||
@@ -213,7 +213,7 @@ func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request)
|
||||
// session cookie and trust it, validating the cookie via store.
|
||||
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ type hostBandwidthView struct {
|
||||
|
||||
func (s *Server) handleUpdateHostBandwidth(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
|
||||
@@ -58,7 +58,7 @@ func (s *Server) pushBandwidthToAgent(ctx context.Context, hostID string, up, do
|
||||
// bandwidthPayload builds a ConfigUpdatePayload with only the
|
||||
// bandwidth fields populated. Pointers are passed through verbatim;
|
||||
// callers wanting to clear a cap should pass a non-nil pointer to 0.
|
||||
// On the on-hello path we materialize zero-valued pointers when the
|
||||
// On the on-hello path we materialise zero-valued pointers when the
|
||||
// host record has no cap set, so the agent's stored state is always
|
||||
// in sync (rather than retaining whatever value it last received).
|
||||
func bandwidthPayload(up, down *int) api.ConfigUpdatePayload {
|
||||
|
||||
@@ -32,7 +32,7 @@ type hostRepoCredsView struct {
|
||||
// creds for UI display. 404 if no credential has ever been set.
|
||||
func (s *Server) handleGetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -88,7 +88,7 @@ type hostRepoCredsRequest struct {
|
||||
func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -165,7 +165,7 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
|
||||
w.WriteHeader(stdhttp.StatusNoContent)
|
||||
}
|
||||
|
||||
// pushRepoCredsToAgent serializes blob into a config.update envelope
|
||||
// pushRepoCredsToAgent serialises blob into a config.update envelope
|
||||
// and ships it down the agent's WS. Returns an error from the hub
|
||||
// (no-op if not connected — caller is expected to check first when it
|
||||
// matters).
|
||||
@@ -192,7 +192,7 @@ func (s *Server) pushRepoCredsToAgent(ctx context.Context, hostID string, blob r
|
||||
// uses this to pre-fill the edit form.
|
||||
func (s *Server) handleGetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -234,7 +234,7 @@ func (s *Server) handleGetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.
|
||||
func (s *Server) handleSetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -319,7 +319,7 @@ func (s *Server) handleSetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.
|
||||
func (s *Server) handleDeleteAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
|
||||
@@ -34,7 +34,7 @@ type hostView struct {
|
||||
// see the same projection.
|
||||
func (s *Server) handleListHosts(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hosts, err := s.deps.Store.ListHosts(r.Context())
|
||||
@@ -55,7 +55,7 @@ func (s *Server) handleListHosts(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
// handleFleetSummary returns the dashboard tile aggregate.
|
||||
func (s *Server) handleFleetSummary(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
fs, err := s.deps.Store.FleetSummary(r.Context())
|
||||
|
||||
@@ -0,0 +1,135 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
stdhttp "net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// handleJobLogDownload is GET /api/jobs/{id}/log{.txt,.ndjson}.
|
||||
//
|
||||
// Source of truth is the persisted job_logs table — works any time,
|
||||
// regardless of whether the job is running or already finished. The
|
||||
// download is "everything the server has up to right now"; the live
|
||||
// stream is unaffected (no pause needed). If the operator wants a
|
||||
// fuller snapshot of a still-running job, they hit Download again.
|
||||
//
|
||||
// Format is picked from the URL suffix (.txt | .ndjson) for a
|
||||
// sensible filename in the browser, or the ?format= query param for
|
||||
// REST callers. Default is txt.
|
||||
func (s *Server) handleJobLogDownload(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if _, ok := s.requireUser(r); !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
jobID := chi.URLParam(r, "id")
|
||||
if jobID == "" {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "missing_job_id", "")
|
||||
return
|
||||
}
|
||||
job, err := s.deps.Store.GetJob(r.Context(), jobID)
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusNotFound, "job_not_found", "")
|
||||
return
|
||||
}
|
||||
|
||||
format := r.URL.Query().Get("format")
|
||||
if format == "" {
|
||||
// Sniff the URL — chi routes both /log.txt and /log.ndjson here
|
||||
// (or .log if a future route adds it) via the {format} matcher.
|
||||
fmtParam := chi.URLParam(r, "format")
|
||||
switch fmtParam {
|
||||
case "ndjson":
|
||||
format = "ndjson"
|
||||
default:
|
||||
format = "txt"
|
||||
}
|
||||
}
|
||||
|
||||
logs, err := s.deps.Store.ListJobLogs(r.Context(), jobID, 0, 0)
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
short := jobID
|
||||
if len(short) > 8 {
|
||||
short = short[:8]
|
||||
}
|
||||
filename := "job-" + job.Kind + "-" + short
|
||||
switch format {
|
||||
case "ndjson":
|
||||
w.Header().Set("Content-Type", "application/x-ndjson; charset=utf-8")
|
||||
w.Header().Set("Content-Disposition",
|
||||
`attachment; filename="`+filename+`.ndjson"`)
|
||||
writeLogsNDJSON(w, logs)
|
||||
default:
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
w.Header().Set("Content-Disposition",
|
||||
`attachment; filename="`+filename+`.txt"`)
|
||||
writeLogsText(w, job, logs)
|
||||
}
|
||||
}
|
||||
|
||||
// writeLogsText renders the logs in the same shape the live page shows:
|
||||
// "HH:MM:SS.mmm TAG payload". Adds a small header so the file is
|
||||
// useful as a standalone artefact (operator pastes it into a ticket).
|
||||
func writeLogsText(w stdhttp.ResponseWriter, job *store.Job, logs []store.JobLogLine) {
|
||||
bw := bufio.NewWriter(w)
|
||||
defer func() { _ = bw.Flush() }()
|
||||
_, _ = fmt.Fprintf(bw, "# job %s · kind %s · status %s\n",
|
||||
job.ID, job.Kind, job.Status)
|
||||
if job.StartedAt != nil {
|
||||
_, _ = fmt.Fprintf(bw, "# started %s\n", job.StartedAt.UTC().Format("2006-01-02T15:04:05.000Z"))
|
||||
}
|
||||
if job.FinishedAt != nil {
|
||||
_, _ = fmt.Fprintf(bw, "# finished %s\n", job.FinishedAt.UTC().Format("2006-01-02T15:04:05.000Z"))
|
||||
}
|
||||
_, _ = fmt.Fprintf(bw, "# %d log lines\n\n", len(logs))
|
||||
for _, l := range logs {
|
||||
tag := streamTag(l.Stream)
|
||||
ts := l.TS.UTC().Format("15:04:05.000")
|
||||
// Strip embedded newlines from payload — log lines should be
|
||||
// single-line, but defensive: a stray '\n' in stderr would
|
||||
// break grep -n.
|
||||
payload := strings.ReplaceAll(l.Payload, "\n", " ")
|
||||
_, _ = fmt.Fprintf(bw, "%s %s %s\n", ts, tag, payload)
|
||||
}
|
||||
}
|
||||
|
||||
// writeLogsNDJSON emits one JSON object per line. Each object stands
|
||||
// alone — appending to the file remains valid NDJSON.
|
||||
func writeLogsNDJSON(w stdhttp.ResponseWriter, logs []store.JobLogLine) {
|
||||
enc := json.NewEncoder(w)
|
||||
for _, l := range logs {
|
||||
_ = enc.Encode(struct {
|
||||
Seq int64 `json:"seq"`
|
||||
TS string `json:"ts"`
|
||||
Stream string `json:"stream"`
|
||||
Payload string `json:"payload"`
|
||||
}{
|
||||
Seq: l.Seq,
|
||||
TS: l.TS.UTC().Format("2006-01-02T15:04:05.000Z"),
|
||||
Stream: l.Stream,
|
||||
Payload: l.Payload,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func streamTag(s string) string {
|
||||
switch s {
|
||||
case "stdout":
|
||||
return "OUT"
|
||||
case "stderr":
|
||||
return "ERR"
|
||||
case "event":
|
||||
return "EVENT"
|
||||
}
|
||||
return strings.ToUpper(s)
|
||||
}
|
||||
@@ -0,0 +1,181 @@
|
||||
// job_download_test.go — covers GET /api/jobs/{id}/log.{txt,ndjson}.
|
||||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
stdhttp "net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// seedJobWithLogs creates a job + a few log lines for it. Returns the
|
||||
// job ID. Caller is responsible for the test server + auth.
|
||||
func seedJobWithLogs(t *testing.T, st *store.Store, hostID string, lineCount int) string {
|
||||
t.Helper()
|
||||
jobID := ulid.Make().String()
|
||||
now := time.Now().UTC()
|
||||
if err := st.CreateJob(context.Background(), store.Job{
|
||||
ID: jobID, HostID: hostID, Kind: "diff",
|
||||
ActorKind: "user", CreatedAt: now,
|
||||
}); err != nil {
|
||||
t.Fatalf("create job: %v", err)
|
||||
}
|
||||
if err := st.MarkJobStarted(context.Background(), jobID, now); err != nil {
|
||||
t.Fatalf("mark started: %v", err)
|
||||
}
|
||||
for i := 0; i < lineCount; i++ {
|
||||
stream := "stdout"
|
||||
if i%5 == 0 {
|
||||
stream = "stderr"
|
||||
}
|
||||
payload := `{"message_type":"change","path":"/etc/file` +
|
||||
ulid.Make().String()[:6] + `","modifier":"M"}`
|
||||
if err := st.AppendJobLog(context.Background(), jobID, int64(i+1),
|
||||
now.Add(time.Duration(i)*time.Millisecond),
|
||||
stream, payload); err != nil {
|
||||
t.Fatalf("append log: %v", err)
|
||||
}
|
||||
}
|
||||
if err := st.MarkJobFinished(context.Background(), jobID, "succeeded", 0, nil, "", now); err != nil {
|
||||
t.Fatalf("mark finished: %v", err)
|
||||
}
|
||||
return jobID
|
||||
}
|
||||
|
||||
// TestJobLogDownloadTxt: plain-text format includes a header + one
|
||||
// line per log row in the expected shape.
|
||||
func TestJobLogDownloadTxt(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServer(t)
|
||||
hostID, _ := enrolHostForWS(t, srv, st, "dl-txt-host")
|
||||
jobID := seedJobWithLogs(t, st, hostID, 12)
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
req, _ := stdhttp.NewRequest("GET",
|
||||
ts.URL+"/api/jobs/"+jobID+"/log.txt", nil)
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusOK {
|
||||
t.Fatalf("status: got %d, want 200", res.StatusCode)
|
||||
}
|
||||
if ct := res.Header.Get("Content-Type"); !strings.HasPrefix(ct, "text/plain") {
|
||||
t.Errorf("content-type: got %q", ct)
|
||||
}
|
||||
if cd := res.Header.Get("Content-Disposition"); !strings.Contains(cd, ".txt") {
|
||||
t.Errorf("content-disposition: got %q", cd)
|
||||
}
|
||||
body := readBody(t, res.Body)
|
||||
// Header lines.
|
||||
if !strings.HasPrefix(body, "# job ") {
|
||||
t.Errorf("expected '# job ...' header line; got %q", short(body))
|
||||
}
|
||||
if !strings.Contains(body, "12 log lines") {
|
||||
t.Errorf("expected '12 log lines'; got %q", short(body))
|
||||
}
|
||||
// One body line per log row — count non-comment, non-empty lines.
|
||||
var rows int
|
||||
for _, line := range strings.Split(body, "\n") {
|
||||
l := strings.TrimSpace(line)
|
||||
if l == "" || strings.HasPrefix(l, "#") {
|
||||
continue
|
||||
}
|
||||
rows++
|
||||
}
|
||||
if rows != 12 {
|
||||
t.Errorf("expected 12 body rows, got %d", rows)
|
||||
}
|
||||
// Tag check: at least one ERR row (every 5th was stderr).
|
||||
if !strings.Contains(body, " ERR ") {
|
||||
t.Errorf("expected at least one ERR row")
|
||||
}
|
||||
}
|
||||
|
||||
// TestJobLogDownloadNDJSON: each line is a self-contained JSON object.
|
||||
func TestJobLogDownloadNDJSON(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServer(t)
|
||||
hostID, _ := enrolHostForWS(t, srv, st, "dl-ndjson-host")
|
||||
jobID := seedJobWithLogs(t, st, hostID, 5)
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
req, _ := stdhttp.NewRequest("GET",
|
||||
ts.URL+"/api/jobs/"+jobID+"/log.ndjson", nil)
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusOK {
|
||||
t.Fatalf("status: got %d, want 200", res.StatusCode)
|
||||
}
|
||||
if ct := res.Header.Get("Content-Type"); !strings.HasPrefix(ct, "application/x-ndjson") {
|
||||
t.Errorf("content-type: got %q", ct)
|
||||
}
|
||||
body := readBody(t, res.Body)
|
||||
// Each non-empty line should parse as an object with seq/ts/stream/payload.
|
||||
var seen int
|
||||
for _, line := range strings.Split(body, "\n") {
|
||||
if strings.TrimSpace(line) == "" {
|
||||
continue
|
||||
}
|
||||
var obj struct {
|
||||
Seq int64 `json:"seq"`
|
||||
TS string `json:"ts"`
|
||||
Stream string `json:"stream"`
|
||||
Payload string `json:"payload"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(line), &obj); err != nil {
|
||||
t.Fatalf("parse line %q: %v", line, err)
|
||||
}
|
||||
if obj.Seq == 0 || obj.TS == "" || obj.Stream == "" || obj.Payload == "" {
|
||||
t.Errorf("incomplete object: %+v", obj)
|
||||
}
|
||||
seen++
|
||||
}
|
||||
if seen != 5 {
|
||||
t.Errorf("parsed %d objects, want 5", seen)
|
||||
}
|
||||
}
|
||||
|
||||
// TestJobLogDownloadNotFound: 404 for an unknown job id.
|
||||
func TestJobLogDownloadNotFound(t *testing.T) {
|
||||
t.Parallel()
|
||||
_, ts, st := rawTestServer(t)
|
||||
cookie := loginAsAdmin(t, st)
|
||||
req, _ := stdhttp.NewRequest("GET",
|
||||
ts.URL+"/api/jobs/"+ulid.Make().String()+"/log.txt", nil)
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusNotFound {
|
||||
t.Fatalf("status: got %d, want 404", res.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
// TestJobLogDownloadUnauthenticated: without a session cookie, 401.
|
||||
func TestJobLogDownloadUnauthenticated(t *testing.T) {
|
||||
t.Parallel()
|
||||
_, ts, _ := rawTestServer(t)
|
||||
res, err := stdhttp.Get(ts.URL + "/api/jobs/x/log.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusUnauthorized {
|
||||
t.Fatalf("status: got %d, want 401", res.StatusCode)
|
||||
}
|
||||
}
|
||||
@@ -31,7 +31,7 @@ type runNowResponse struct {
|
||||
func (s *Server) handleRunNow(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -152,7 +152,8 @@ func (s *Server) requireUser(r *stdhttp.Request) (*store.User, bool) {
|
||||
|
||||
func validJobKind(k api.JobKind) bool {
|
||||
switch k {
|
||||
case api.JobBackup, api.JobInit, api.JobForget, api.JobPrune, api.JobCheck, api.JobUnlock:
|
||||
case api.JobBackup, api.JobInit, api.JobForget, api.JobPrune,
|
||||
api.JobCheck, api.JobUnlock, api.JobRestore, api.JobDiff:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
@@ -81,7 +81,7 @@ func drainUntil(t *testing.T, c *websocket.Conn, wantType api.MessageType) api.E
|
||||
return api.Envelope{}
|
||||
}
|
||||
|
||||
// enrolHostForWS pre-enrolls a host with bound repo creds so the server
|
||||
// enrolHostForWS pre-enrols a host with bound repo creds so the server
|
||||
// will treat it as ready to receive command.run.
|
||||
func enrolHostForWS(t *testing.T, srv *Server, st *store.Store, name string) (hostID, token string) {
|
||||
t.Helper()
|
||||
|
||||
@@ -506,12 +506,12 @@ func TestEnqueueOnDispatchFailure(t *testing.T) {
|
||||
func TestDrainPendingSerializesPerHost(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServer(t)
|
||||
hostID, token := enrolHostForWS(t, srv, st, "serialize-host")
|
||||
hostID, token := enrolHostForWS(t, srv, st, "serialise-host")
|
||||
gid, sid := seedSchedAndGroup(t, st, hostID, 10)
|
||||
|
||||
// Connect the agent so DrainPending can dispatch.
|
||||
c := agentDial(t, srv, ts, hostID, token)
|
||||
sendHello(t, c, "serialize-host")
|
||||
sendHello(t, c, "serialise-host")
|
||||
// Drain the on-hello goroutine's pass first (no pending rows yet),
|
||||
// then wait for the schedule.set so the connection is fully settled.
|
||||
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||
|
||||
@@ -214,7 +214,7 @@ type acceptForm struct {
|
||||
func (s *Server) handleAcceptPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
pendingID := chi.URLParam(r, "id")
|
||||
@@ -315,7 +315,7 @@ func (s *Server) handleAcceptPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Re
|
||||
func (s *Server) handleRejectPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
user, ok := s.requireUser(r)
|
||||
if !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
pendingID := chi.URLParam(r, "id")
|
||||
|
||||
@@ -41,7 +41,7 @@ func toRepoMaintenanceView(m store.HostRepoMaintenance) repoMaintenanceView {
|
||||
|
||||
func (s *Server) handleGetRepoMaintenance(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -84,7 +84,7 @@ type repoMaintenanceWriteRequest struct {
|
||||
|
||||
func (s *Server) handleUpdateRepoMaintenance(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
|
||||
@@ -26,7 +26,7 @@ func (s *Server) handleRunRepoPrune(w stdhttp.ResponseWriter, r *stdhttp.Request
|
||||
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
|
||||
return
|
||||
}
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -72,7 +72,7 @@ func (s *Server) handleRunRepoCheck(w stdhttp.ResponseWriter, r *stdhttp.Request
|
||||
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
|
||||
return
|
||||
}
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -125,7 +125,7 @@ func (s *Server) handleRunRepoUnlock(w stdhttp.ResponseWriter, r *stdhttp.Reques
|
||||
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
|
||||
return
|
||||
}
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
|
||||
@@ -53,7 +53,7 @@ func (s *Server) handleRunSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Reque
|
||||
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
|
||||
return
|
||||
}
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
|
||||
@@ -61,7 +61,7 @@ var cronParser = cron.NewParser(
|
||||
|
||||
func (s *Server) handleListSchedules(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -89,7 +89,7 @@ func (s *Server) handleListSchedules(w stdhttp.ResponseWriter, r *stdhttp.Reques
|
||||
|
||||
func (s *Server) handleCreateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -126,7 +126,7 @@ func (s *Server) handleCreateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Reque
|
||||
|
||||
func (s *Server) handleUpdateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -173,7 +173,7 @@ func (s *Server) handleUpdateSchedule(w stdhttp.ResponseWriter, r *stdhttp.Reque
|
||||
|
||||
func (s *Server) handleDeleteSchedule(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
|
||||
@@ -43,7 +43,7 @@ type Server struct {
|
||||
srv *stdhttp.Server
|
||||
deps Deps
|
||||
|
||||
// drainLocks serializes DrainPending per host. The on-hello
|
||||
// drainLocks serialises DrainPending per host. The on-hello
|
||||
// goroutine and the 30s ticker can otherwise race for the same
|
||||
// host, double-dispatching every pending row. Map of hostID →
|
||||
// sync.Mutex; checked-and-locked atomically via drainLocksMu.
|
||||
@@ -58,6 +58,11 @@ type Server struct {
|
||||
// pending_id so the accept/reject handlers can push the bearer
|
||||
// or close cleanly (P2-18b).
|
||||
pendingHub *pendingHub
|
||||
|
||||
// treeCache holds per-wizard-session listings of snapshot
|
||||
// directories (P3-X2). Pre-allocated in New so the lazy-init
|
||||
// race is impossible.
|
||||
treeCache *treeCache
|
||||
}
|
||||
|
||||
// New builds a configured but not-yet-started server.
|
||||
@@ -81,6 +86,7 @@ func New(deps Deps) *Server {
|
||||
drainLocks: make(map[string]*sync.Mutex),
|
||||
announceRL: newAnnounceLimiter(),
|
||||
pendingHub: newPendingHub(),
|
||||
treeCache: newTreeCache(),
|
||||
}
|
||||
s.routes(r)
|
||||
|
||||
@@ -178,8 +184,22 @@ func (s *Server) routes(r chi.Router) {
|
||||
r.Post("/hosts/{id}/repo/prune", s.handleRunRepoPrune)
|
||||
r.Post("/hosts/{id}/repo/check", s.handleRunRepoCheck)
|
||||
r.Post("/hosts/{id}/repo/unlock", s.handleRunRepoUnlock)
|
||||
|
||||
// Cancel a running job. Operator-driven, sends command.cancel
|
||||
// to the agent which kills the restic subprocess; the agent's
|
||||
// resulting job.finished (status=canceled) is what flips the
|
||||
// job row.
|
||||
r.Post("/jobs/{id}/cancel", s.handleCancelJob)
|
||||
|
||||
// Snapshot diff (P3-09). Dispatches a JobDiff against two
|
||||
// snapshots; output streams to the standard live job page.
|
||||
r.Post("/hosts/{id}/snapshots/diff", s.handleSnapshotDiff)
|
||||
})
|
||||
|
||||
// HTMX form variant of diff (mounted outside /api so HTMX forms
|
||||
// can post against it without the api/ prefix).
|
||||
r.Post("/hosts/{id}/snapshots/diff", s.handleSnapshotDiff)
|
||||
|
||||
// Per-source-group Run-now (HTMX form action). Available even
|
||||
// when the server is started without UI templates so REST callers
|
||||
// against the non-/api path also work.
|
||||
@@ -237,7 +257,7 @@ func (s *Server) routes(r chi.Router) {
|
||||
// Durable post-Add-host page (operator can refresh / come
|
||||
// back; password decrypted from the token row each render).
|
||||
// Polled fragment under /awaiting flips to "connected" once
|
||||
// the agent enrolls.
|
||||
// the agent enrols.
|
||||
r.Get("/hosts/pending/{token}", s.handleUIPendingHost)
|
||||
r.Get("/hosts/pending/{token}/awaiting", s.handleUIPendingAwaiting)
|
||||
// Host detail (Snapshots tab is the default).
|
||||
@@ -270,6 +290,12 @@ func (s *Server) routes(r chi.Router) {
|
||||
r.Post("/hosts/{id}/schedules/{sid}/run", s.handleUIScheduleRun)
|
||||
// Live job log.
|
||||
r.Get("/jobs/{id}", s.handleUIJobDetail)
|
||||
// Restore wizard (P3-01/P3-02). Two GET variants land on the
|
||||
// same handler; the second deep-links a chosen snapshot.
|
||||
r.Get("/hosts/{id}/restore", s.handleUIRestoreGet)
|
||||
r.Get("/hosts/{id}/snapshots/{sid}/restore", s.handleUIRestoreGet)
|
||||
r.Post("/hosts/{id}/restore", s.handleUIRestorePost)
|
||||
r.Get("/hosts/{id}/restore/tree", s.handleUIRestoreTree)
|
||||
}
|
||||
|
||||
// Browser job-log stream (separate from /ws/agent so the auth
|
||||
@@ -278,6 +304,11 @@ func (s *Server) routes(r chi.Router) {
|
||||
if s.deps.JobHub != nil {
|
||||
r.Get("/api/jobs/{id}/stream", s.handleJobStream)
|
||||
}
|
||||
|
||||
// Job log download (txt + ndjson). Source of truth is the
|
||||
// persisted job_logs table; safe to call any time, no pause
|
||||
// needed against the live stream.
|
||||
r.Get("/api/jobs/{id}/log.{format:txt|ndjson}", s.handleJobLogDownload)
|
||||
}
|
||||
|
||||
// Start begins listening. Blocks until ListenAndServe returns
|
||||
|
||||
@@ -35,7 +35,7 @@ type listSnapshotsResponse struct {
|
||||
// onto whatever the server most recently received.
|
||||
func (s *Server) handleListHostSnapshots(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if _, ok := s.requireUser(r); !ok {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ type sourceGroupWriteRequest struct {
|
||||
|
||||
func (s *Server) handleListSourceGroups(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -90,7 +90,7 @@ func (s *Server) handleListSourceGroups(w stdhttp.ResponseWriter, r *stdhttp.Req
|
||||
|
||||
func (s *Server) handleGetSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -109,7 +109,7 @@ func (s *Server) handleGetSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Reque
|
||||
|
||||
func (s *Server) handleCreateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -152,7 +152,7 @@ func (s *Server) handleCreateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Re
|
||||
|
||||
func (s *Server) handleUpdateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
@@ -207,7 +207,7 @@ func (s *Server) handleUpdateSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Re
|
||||
// the UI can offer "remove from these schedules first."
|
||||
func (s *Server) handleDeleteSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
)
|
||||
|
||||
// treeCacheTTL is how long a per-session cached directory listing
|
||||
// stays valid. The whole point of the cache is to make re-expanding
|
||||
// nodes within the same wizard session snappy; 30 minutes covers a
|
||||
// generous wizard interaction window without holding stale data
|
||||
// indefinitely.
|
||||
const treeCacheTTL = 30 * time.Minute
|
||||
|
||||
// treeCacheKey identifies one cached listing. session_id scopes
|
||||
// entries to a single browser session so two operators don't share
|
||||
// view state; snapshot_id + path identify the directory inside the
|
||||
// snapshot.
|
||||
type treeCacheKey struct {
|
||||
SessionID string
|
||||
HostID string
|
||||
SnapshotID string
|
||||
Path string
|
||||
}
|
||||
|
||||
type treeCacheEntry struct {
|
||||
Result api.TreeListResultPayload
|
||||
ExpiresAt time.Time
|
||||
}
|
||||
|
||||
// treeCache is a per-process map of synchronously fetched directory
|
||||
// listings. Concurrency is light (a few entries per active wizard
|
||||
// session) so a single mutex is fine.
|
||||
type treeCache struct {
|
||||
mu sync.Mutex
|
||||
entries map[treeCacheKey]treeCacheEntry
|
||||
}
|
||||
|
||||
func newTreeCache() *treeCache {
|
||||
return &treeCache{entries: make(map[treeCacheKey]treeCacheEntry)}
|
||||
}
|
||||
|
||||
// Get returns a cached entry if one exists and hasn't expired.
|
||||
func (c *treeCache) Get(k treeCacheKey, now time.Time) (api.TreeListResultPayload, bool) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
e, ok := c.entries[k]
|
||||
if !ok {
|
||||
return api.TreeListResultPayload{}, false
|
||||
}
|
||||
if now.After(e.ExpiresAt) {
|
||||
delete(c.entries, k)
|
||||
return api.TreeListResultPayload{}, false
|
||||
}
|
||||
return e.Result, true
|
||||
}
|
||||
|
||||
// Put records a fresh listing under k. Caller is responsible for
|
||||
// having validated the result first (Error == "").
|
||||
func (c *treeCache) Put(k treeCacheKey, result api.TreeListResultPayload, now time.Time) {
|
||||
c.mu.Lock()
|
||||
c.entries[k] = treeCacheEntry{
|
||||
Result: result,
|
||||
ExpiresAt: now.Add(treeCacheTTL),
|
||||
}
|
||||
c.mu.Unlock()
|
||||
}
|
||||
|
||||
// Sweep deletes expired entries. Called opportunistically from the
|
||||
// wizard handler — no separate goroutine needed; cache size is small.
|
||||
func (c *treeCache) Sweep(now time.Time) {
|
||||
c.mu.Lock()
|
||||
for k, e := range c.entries {
|
||||
if now.After(e.ExpiresAt) {
|
||||
delete(c.entries, k)
|
||||
}
|
||||
}
|
||||
c.mu.Unlock()
|
||||
}
|
||||
|
||||
// fetchTreeWithCache returns a directory listing — cache hit, or a
|
||||
// synchronous tree.list RPC against the agent on miss. On agent error
|
||||
// (not transport error), the result is returned as-is with Error set
|
||||
// rather than cached, so a transient failure doesn't poison subsequent
|
||||
// requests for the same path.
|
||||
//
|
||||
//nolint:unused // wired in by the wizard handler in the next slice
|
||||
func (s *Server) fetchTreeWithCache(ctx context.Context, sessionID, hostID, snapshotID, path string) (api.TreeListResultPayload, error) {
|
||||
now := time.Now()
|
||||
k := treeCacheKey{SessionID: sessionID, HostID: hostID, SnapshotID: snapshotID, Path: path}
|
||||
if cached, ok := s.treeCache.Get(k, now); ok {
|
||||
return cached, nil
|
||||
}
|
||||
|
||||
reply, err := s.deps.Hub.SendRPC(ctx, hostID, api.MsgTreeList,
|
||||
api.TreeListRequestPayload{SnapshotID: snapshotID, Path: path},
|
||||
30*time.Second)
|
||||
if err != nil {
|
||||
return api.TreeListResultPayload{}, err
|
||||
}
|
||||
var result api.TreeListResultPayload
|
||||
if perr := reply.UnmarshalPayload(&result); perr != nil {
|
||||
return api.TreeListResultPayload{}, perr
|
||||
}
|
||||
if result.Error == "" {
|
||||
s.treeCache.Put(k, result, now)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -0,0 +1,146 @@
|
||||
// tree_rpc_test.go — full round-trip test for the tree.list synchronous
|
||||
// RPC (P3-X2). A fake agent reads the inbound tree.list, replies with a
|
||||
// canned tree.list.result, and we assert the server's SendRPC returned
|
||||
// the expected payload.
|
||||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/coder/websocket"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
)
|
||||
|
||||
func TestSendRPCTreeListRoundTrip(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServer(t)
|
||||
hostID, token := enrolHostForWS(t, srv, st, "rpc-host")
|
||||
c := agentDial(t, srv, ts, hostID, token)
|
||||
sendHello(t, c, "rpc-host")
|
||||
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||
|
||||
// Fake agent: read inbound envelopes, mirror tree.list with a
|
||||
// canned result. Other inbound envelopes (config.update etc) are
|
||||
// already drained above.
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
for {
|
||||
mt, raw, err := c.Read(ctx)
|
||||
if err != nil {
|
||||
done <- err
|
||||
return
|
||||
}
|
||||
if mt != websocket.MessageText {
|
||||
continue
|
||||
}
|
||||
var env api.Envelope
|
||||
if err := json.Unmarshal(raw, &env); err != nil {
|
||||
done <- err
|
||||
return
|
||||
}
|
||||
if env.Type != api.MsgTreeList {
|
||||
continue
|
||||
}
|
||||
var req api.TreeListRequestPayload
|
||||
if err := env.UnmarshalPayload(&req); err != nil {
|
||||
done <- err
|
||||
return
|
||||
}
|
||||
result := api.TreeListResultPayload{
|
||||
SnapshotID: req.SnapshotID,
|
||||
Path: req.Path,
|
||||
Entries: []api.TreeListEntry{
|
||||
{Name: "etc", Type: "dir"},
|
||||
{Name: "var", Type: "dir"},
|
||||
},
|
||||
}
|
||||
out, err := api.Marshal(api.MsgTreeListResult, env.ID, result)
|
||||
if err != nil {
|
||||
done <- err
|
||||
return
|
||||
}
|
||||
rawOut, _ := json.Marshal(out)
|
||||
if err := c.Write(ctx, websocket.MessageText, rawOut); err != nil {
|
||||
done <- err
|
||||
return
|
||||
}
|
||||
done <- nil
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
// Server-side SendRPC.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
reply, err := srv.deps.Hub.SendRPC(ctx, hostID, api.MsgTreeList,
|
||||
api.TreeListRequestPayload{SnapshotID: "f3a7b2c1", Path: "/"},
|
||||
3*time.Second)
|
||||
if err != nil {
|
||||
t.Fatalf("SendRPC: %v", err)
|
||||
}
|
||||
if reply.Type != api.MsgTreeListResult {
|
||||
t.Fatalf("reply type: got %q want %q", reply.Type, api.MsgTreeListResult)
|
||||
}
|
||||
var result api.TreeListResultPayload
|
||||
if err := reply.UnmarshalPayload(&result); err != nil {
|
||||
t.Fatalf("unmarshal reply: %v", err)
|
||||
}
|
||||
if result.SnapshotID != "f3a7b2c1" || result.Path != "/" {
|
||||
t.Fatalf("payload: got %+v", result)
|
||||
}
|
||||
if len(result.Entries) != 2 || result.Entries[0].Name != "etc" {
|
||||
t.Fatalf("entries: %+v", result.Entries)
|
||||
}
|
||||
|
||||
// Make sure the fake agent didn't error out.
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
t.Fatalf("fake agent: %v", err)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("fake agent didn't finish")
|
||||
}
|
||||
}
|
||||
|
||||
// TestSendRPCTimeoutNoReply: SendRPC times out cleanly when the agent
|
||||
// never replies; the registry entry is released so a stray late reply
|
||||
// wouldn't deadlock anything.
|
||||
func TestSendRPCTimeoutNoReply(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServer(t)
|
||||
hostID, token := enrolHostForWS(t, srv, st, "rpc-timeout-host")
|
||||
c := agentDial(t, srv, ts, hostID, token)
|
||||
sendHello(t, c, "rpc-timeout-host")
|
||||
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||
|
||||
// Fake agent reads but never replies.
|
||||
go func() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
for {
|
||||
if _, _, err := c.Read(ctx); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
ctx := context.Background()
|
||||
t0 := time.Now()
|
||||
_, err := srv.deps.Hub.SendRPC(ctx, hostID, api.MsgTreeList,
|
||||
api.TreeListRequestPayload{SnapshotID: "x", Path: "/"},
|
||||
300*time.Millisecond)
|
||||
if err == nil {
|
||||
t.Fatal("expected timeout error")
|
||||
}
|
||||
elapsed := time.Since(t0)
|
||||
if elapsed < 250*time.Millisecond || elapsed > 2*time.Second {
|
||||
t.Fatalf("timeout took %s, expected ~300ms", elapsed)
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
@@ -276,7 +277,7 @@ type addHostPage struct {
|
||||
}
|
||||
|
||||
// pendingHostPage is the GET /hosts/pending/{token} view. Lives
|
||||
// for as long as the token does (1h ttl); once the agent enrolls,
|
||||
// for as long as the token does (1h ttl); once the agent enrols,
|
||||
// the handler redirects to /hosts/{host_id} and this page is gone.
|
||||
type pendingHostPage struct {
|
||||
Token string
|
||||
@@ -377,7 +378,7 @@ func (s *Server) handleUIAddHostPost(w stdhttp.ResponseWriter, r *stdhttp.Reques
|
||||
|
||||
// handleUIPendingHost serves the durable Add-host result page —
|
||||
// shown after a successful POST /hosts/new and reachable until the
|
||||
// agent enrolls (the page redirects to /hosts/{id} once that
|
||||
// agent enrols (the page redirects to /hosts/{id} once that
|
||||
// happens) or the token expires (1h ttl). The password is
|
||||
// re-decrypted from the encrypted token row on every render so
|
||||
// the operator can refresh, bookmark, navigate away and come back.
|
||||
@@ -512,6 +513,14 @@ type hostChromeData struct {
|
||||
InitStatus string
|
||||
InitAt *time.Time // started_at if non-nil else created_at
|
||||
InitJobID string
|
||||
|
||||
// Latest 'restore' job — surfaced as a small line below the
|
||||
// init-status one so the operator has at-a-glance visibility into
|
||||
// recent destructive activity. Empty status means no restore has
|
||||
// ever run on this host.
|
||||
RestoreStatus string
|
||||
RestoreAt *time.Time
|
||||
RestoreJobID string
|
||||
}
|
||||
|
||||
// loadHostChrome fetches the per-tab counts that every host-detail tab
|
||||
@@ -542,6 +551,15 @@ func (s *Server) loadHostChrome(r *stdhttp.Request, host store.Host, subtab, cru
|
||||
}
|
||||
d.InitAt = &t
|
||||
}
|
||||
if j, err := s.deps.Store.LatestJobByKind(r.Context(), host.ID, "restore"); err == nil && j != nil {
|
||||
d.RestoreStatus = j.Status
|
||||
d.RestoreJobID = j.ID
|
||||
t := j.CreatedAt
|
||||
if j.StartedAt != nil {
|
||||
t = *j.StartedAt
|
||||
}
|
||||
d.RestoreAt = &t
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
@@ -552,6 +570,12 @@ type hostDetailPage struct {
|
||||
// SnapshotsShown is the number rendered (we cap at ~50 for the
|
||||
// first slice; pagination lands when it matters).
|
||||
SnapshotsShown int
|
||||
// LegacyRestic is true when the host's restic version predates
|
||||
// 0.17, in which case `restic snapshots --json` doesn't embed the
|
||||
// per-snapshot summary block and the Size/Files columns render
|
||||
// blank. The template uses this to attach a tooltip to those
|
||||
// column headers explaining the version requirement.
|
||||
LegacyRestic bool
|
||||
}
|
||||
|
||||
// handleUIHostDetail is the host detail page (snapshots tab by default).
|
||||
@@ -594,6 +618,7 @@ func (s *Server) handleUIHostDetail(w stdhttp.ResponseWriter, r *stdhttp.Request
|
||||
hostChromeData: s.loadHostChrome(r, *host, "snapshots", "snapshots"),
|
||||
Snapshots: shown,
|
||||
SnapshotsShown: len(shown),
|
||||
LegacyRestic: !restic.Env{Version: host.ResticVersion}.AtLeastVersion(0, 17),
|
||||
}
|
||||
if err := s.deps.UI.Render(w, "host_detail", view); err != nil {
|
||||
slog.Error("ui: render host_detail", "err", err)
|
||||
@@ -713,7 +738,7 @@ func (s *Server) handleUIJobDetail(w stdhttp.ResponseWriter, r *stdhttp.Request)
|
||||
// same way our Go code does.
|
||||
func (s *Server) handleJobStream(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if u, _ := s.sessionUser(r); u == nil {
|
||||
stdhttp.Error(w, "unauthorized", stdhttp.StatusUnauthorized)
|
||||
stdhttp.Error(w, "unauthorised", stdhttp.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
jobID := chi.URLParam(r, "id")
|
||||
|
||||
@@ -49,7 +49,7 @@ func (s *Server) handleUIRepoReinit(w stdhttp.ResponseWriter, r *stdhttp.Request
|
||||
}
|
||||
if !s.deps.Hub.Connected(host.ID) {
|
||||
s.renderRepoPage(w, r, u, host,
|
||||
"Host is offline — bring the agent back up before re-initializing.",
|
||||
"Host is offline — bring the agent back up before re-initialising.",
|
||||
"", "", "")
|
||||
return
|
||||
}
|
||||
@@ -58,7 +58,7 @@ func (s *Server) handleUIRepoReinit(w stdhttp.ResponseWriter, r *stdhttp.Request
|
||||
if _, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindRepo); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
s.renderRepoPage(w, r, u, host,
|
||||
"Bind repo credentials before re-initializing.",
|
||||
"Bind repo credentials before re-initialising.",
|
||||
"", "", "")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -0,0 +1,447 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"log/slog"
|
||||
stdhttp "net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// ui_restore.go — restore wizard backend (P3-01).
|
||||
//
|
||||
// GET /hosts/{id}/restore wizard step 1 (snapshot picker)
|
||||
// GET /hosts/{id}/snapshots/{sid}/restore wizard with snapshot pre-selected
|
||||
// GET /hosts/{id}/restore/tree HTMX partial: one tree node + children
|
||||
// POST /hosts/{id}/restore dispatch the restore job
|
||||
|
||||
// hostRestorePage is the model for the wizard template.
|
||||
type hostRestorePage struct {
|
||||
hostChromeData
|
||||
|
||||
// Snapshot picker rows; rendered by the template into the step-1
|
||||
// table. Limited to most-recent N (the operator can refine on
|
||||
// snapshot ID if they need an older one — out of scope for v1).
|
||||
Snapshots []store.Snapshot
|
||||
|
||||
// Selected is non-nil iff a snapshot has been chosen — either via
|
||||
// the deep-link path /hosts/{id}/snapshots/{sid}/restore or by a
|
||||
// previous form submission that the wizard re-rendered.
|
||||
Selected *store.Snapshot
|
||||
|
||||
// Default target dir — surfaced in the step-3 radio card.
|
||||
DefaultTargetDir string
|
||||
|
||||
// Online mirrors Hub.Connected so the dispatch button can be
|
||||
// disabled at render time when the agent is offline.
|
||||
Online bool
|
||||
|
||||
// Error is shown as a banner above the wizard. Re-render-friendly:
|
||||
// the operator's snapshot/path/target choices survive the round-trip.
|
||||
Error string
|
||||
|
||||
// Form fields preserved on validation re-render. The template
|
||||
// reads these to pre-tick checkboxes etc; the names match the
|
||||
// POST form keys.
|
||||
FormPaths []string // "/etc/nginx/sites-available/alfa.conf"
|
||||
FormInPlace bool
|
||||
FormTargetDir string
|
||||
FormConfirmHN string // typed-confirm input value
|
||||
}
|
||||
|
||||
// handleUIRestoreGet renders the wizard. URL variants:
|
||||
// - /hosts/{id}/restore — step 1 = pick snapshot
|
||||
// - /hosts/{id}/snapshots/{sid}/restore — snapshot pre-selected
|
||||
func (s *Server) handleUIRestoreGet(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
u := s.requireUIUser(w, r)
|
||||
if u == nil {
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
host, err := s.deps.Store.GetHost(r.Context(), hostID)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
stdhttp.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
slog.Error("ui restore: get host", "host_id", hostID, "err", err)
|
||||
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
page := hostRestorePage{
|
||||
hostChromeData: s.loadHostChrome(r, *host, "snapshots", "restore"),
|
||||
DefaultTargetDir: defaultRestoreTargetDir(),
|
||||
Online: s.deps.Hub.Connected(host.ID),
|
||||
}
|
||||
snaps, err := s.deps.Store.ListSnapshotsByHost(r.Context(), hostID)
|
||||
if err != nil {
|
||||
slog.Error("ui restore: list snapshots", "host_id", hostID, "err", err)
|
||||
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if len(snaps) > 100 {
|
||||
snaps = snaps[:100]
|
||||
}
|
||||
page.Snapshots = snaps
|
||||
|
||||
// Snapshot deep-link variant — if the URL carries a sid, prefill it.
|
||||
if sid := chi.URLParam(r, "sid"); sid != "" {
|
||||
for i := range snaps {
|
||||
if snaps[i].ID == sid || snaps[i].ShortID == sid {
|
||||
p := snaps[i]
|
||||
page.Selected = &p
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
view := s.baseView(u)
|
||||
view.Title = "Restore · " + host.Name
|
||||
view.Page = page
|
||||
if err := s.deps.UI.Render(w, "host_restore", view); err != nil {
|
||||
slog.Error("ui restore: render", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// handleUIRestorePost validates the form and dispatches the restore
|
||||
// job. On validation error re-renders the wizard with the error
|
||||
// banner + the operator's input intact.
|
||||
func (s *Server) handleUIRestorePost(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
u := s.requireUIUser(w, r)
|
||||
if u == nil {
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
host, err := s.deps.Store.GetHost(r.Context(), hostID)
|
||||
if err != nil {
|
||||
stdhttp.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if err := r.ParseForm(); err != nil {
|
||||
stdhttp.Error(w, "bad form", stdhttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
snapshotID := strings.TrimSpace(r.PostForm.Get("snapshot_id"))
|
||||
paths := r.PostForm["paths"] // multiple checkbox values
|
||||
inPlace := r.PostForm.Get("target_mode") == "in_place"
|
||||
targetDir := strings.TrimSpace(r.PostForm.Get("target_dir"))
|
||||
confirmHN := strings.TrimSpace(r.PostForm.Get("confirm_hostname"))
|
||||
|
||||
rerender := func(errMsg string, status int) {
|
||||
page := hostRestorePage{
|
||||
hostChromeData: s.loadHostChrome(r, *host, "snapshots", "restore"),
|
||||
DefaultTargetDir: defaultRestoreTargetDir(),
|
||||
Online: s.deps.Hub.Connected(host.ID),
|
||||
Error: errMsg,
|
||||
FormPaths: paths,
|
||||
FormInPlace: inPlace,
|
||||
FormTargetDir: targetDir,
|
||||
FormConfirmHN: confirmHN,
|
||||
}
|
||||
snaps, _ := s.deps.Store.ListSnapshotsByHost(r.Context(), hostID)
|
||||
if len(snaps) > 100 {
|
||||
snaps = snaps[:100]
|
||||
}
|
||||
page.Snapshots = snaps
|
||||
for i := range snaps {
|
||||
if snaps[i].ID == snapshotID || snaps[i].ShortID == snapshotID {
|
||||
ss := snaps[i]
|
||||
page.Selected = &ss
|
||||
break
|
||||
}
|
||||
}
|
||||
view := s.baseView(u)
|
||||
view.Title = "Restore · " + host.Name
|
||||
view.Page = page
|
||||
w.WriteHeader(status)
|
||||
_ = s.deps.UI.Render(w, "host_restore", view)
|
||||
}
|
||||
|
||||
if snapshotID == "" {
|
||||
rerender("Pick a snapshot first.", stdhttp.StatusUnprocessableEntity)
|
||||
return
|
||||
}
|
||||
cleanPaths := make([]string, 0, len(paths))
|
||||
for _, p := range paths {
|
||||
p = strings.TrimSpace(p)
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
if !strings.HasPrefix(p, "/") {
|
||||
rerender("Paths must be absolute (start with /).", stdhttp.StatusUnprocessableEntity)
|
||||
return
|
||||
}
|
||||
cleanPaths = append(cleanPaths, p)
|
||||
}
|
||||
if len(cleanPaths) == 0 {
|
||||
rerender("Pick at least one file or directory to restore.", stdhttp.StatusUnprocessableEntity)
|
||||
return
|
||||
}
|
||||
|
||||
if inPlace {
|
||||
if confirmHN != host.Name {
|
||||
rerender("Type the host name exactly to confirm an in-place (overwrite) restore.",
|
||||
stdhttp.StatusUnprocessableEntity)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
// New-directory mode: trust the operator's chosen target.
|
||||
// Empty falls back to the default. Validate it's either
|
||||
// absolute or starts with $HOME / ~/ (the agent expands
|
||||
// these at run time).
|
||||
if targetDir == "" {
|
||||
targetDir = defaultRestoreTargetDir()
|
||||
}
|
||||
if !looksLikeRestoreTarget(targetDir) {
|
||||
rerender("Target must be an absolute path, or start with $HOME or ~/.",
|
||||
stdhttp.StatusUnprocessableEntity)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !s.deps.Hub.Connected(host.ID) {
|
||||
rerender("Agent is offline. Try again when it reconnects.",
|
||||
stdhttp.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
|
||||
// Build a new job id up-front so we can substitute it into the
|
||||
// new-directory target path. The agent will additionally expand
|
||||
// $HOME / ~/ before invoking restic.
|
||||
jobID := ulid.Make().String()
|
||||
finalTarget := ""
|
||||
if !inPlace {
|
||||
finalTarget = strings.ReplaceAll(targetDir, "<job-id>", jobID)
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
if err := s.deps.Store.CreateJob(r.Context(), store.Job{
|
||||
ID: jobID,
|
||||
HostID: host.ID,
|
||||
Kind: string(api.JobRestore),
|
||||
ActorKind: "user",
|
||||
ActorID: &u.ID,
|
||||
CreatedAt: now,
|
||||
}); err != nil {
|
||||
slog.Error("ui restore: create job", "err", err)
|
||||
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
payload := api.CommandRunPayload{
|
||||
JobID: jobID,
|
||||
Kind: api.JobRestore,
|
||||
Restore: &api.RestorePayload{
|
||||
SnapshotID: snapshotID,
|
||||
Paths: cleanPaths,
|
||||
InPlace: inPlace,
|
||||
TargetDir: finalTarget,
|
||||
},
|
||||
}
|
||||
env, err := api.Marshal(api.MsgCommandRun, jobID, payload)
|
||||
if err != nil {
|
||||
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if err := s.deps.Hub.Send(r.Context(), host.ID, env); err != nil {
|
||||
slog.Warn("ui restore: dispatch failed", "err", err)
|
||||
rerender("Couldn't deliver the restore command (agent went offline).",
|
||||
stdhttp.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||
ID: ulid.Make().String(),
|
||||
UserID: &u.ID,
|
||||
Actor: "user",
|
||||
Action: "host.restore",
|
||||
TargetKind: ptr("host"),
|
||||
TargetID: &host.ID,
|
||||
TS: now,
|
||||
})
|
||||
|
||||
// HTMX redirect (or vanilla redirect) to the live job log.
|
||||
jobURL := "/jobs/" + jobID
|
||||
if r.Header.Get("HX-Request") == "true" {
|
||||
w.Header().Set("HX-Redirect", jobURL)
|
||||
w.WriteHeader(stdhttp.StatusNoContent)
|
||||
return
|
||||
}
|
||||
stdhttp.Redirect(w, r, jobURL, stdhttp.StatusSeeOther)
|
||||
}
|
||||
|
||||
// hostRestoreTreePage is the data shape for the tree-node HTMX partial.
|
||||
type hostRestoreTreePage struct {
|
||||
HostID string
|
||||
SnapshotID string
|
||||
Path string
|
||||
Children []treeChildView
|
||||
Error string
|
||||
}
|
||||
|
||||
// treeChildView is one row of the tree (a direct child of Path).
|
||||
type treeChildView struct {
|
||||
Name string
|
||||
Type string // dir | file | symlink
|
||||
Path string // full path, used in the checkbox value
|
||||
Size int64
|
||||
IsDir bool
|
||||
}
|
||||
|
||||
// handleUIRestoreTree is the HTMX-served partial that loads one
|
||||
// directory's children. Called when the operator clicks an expand
|
||||
// chevron in the wizard's tree browser. Caches via fetchTreeWithCache.
|
||||
func (s *Server) handleUIRestoreTree(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
u := s.requireUIUser(w, r)
|
||||
if u == nil {
|
||||
return
|
||||
}
|
||||
hostID := chi.URLParam(r, "id")
|
||||
host, err := s.deps.Store.GetHost(r.Context(), hostID)
|
||||
if err != nil {
|
||||
stdhttp.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
q := r.URL.Query()
|
||||
snapshotID := strings.TrimSpace(q.Get("snapshot"))
|
||||
pathArg := strings.TrimSpace(q.Get("path"))
|
||||
if pathArg == "" {
|
||||
pathArg = "/"
|
||||
}
|
||||
if snapshotID == "" {
|
||||
stdhttp.Error(w, "snapshot required", stdhttp.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if !s.deps.Hub.Connected(host.ID) {
|
||||
// Render the partial with an error message rather than 503ing
|
||||
// — the wizard renders the error inline next to the failed node.
|
||||
page := hostRestoreTreePage{
|
||||
HostID: host.ID, SnapshotID: snapshotID, Path: pathArg,
|
||||
Error: "agent offline",
|
||||
}
|
||||
view := s.baseView(u)
|
||||
view.Page = page
|
||||
_ = s.deps.UI.RenderPartial(w, "tree_node", view)
|
||||
return
|
||||
}
|
||||
|
||||
sessionID := sessionIDFromCookie(r)
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 35*time.Second)
|
||||
defer cancel()
|
||||
|
||||
result, err := s.fetchTreeWithCache(ctx, sessionID, host.ID, snapshotID, pathArg)
|
||||
if err != nil {
|
||||
page := hostRestoreTreePage{
|
||||
HostID: host.ID, SnapshotID: snapshotID, Path: pathArg,
|
||||
Error: err.Error(),
|
||||
}
|
||||
view := s.baseView(u)
|
||||
view.Page = page
|
||||
_ = s.deps.UI.RenderPartial(w, "tree_node", view)
|
||||
return
|
||||
}
|
||||
if result.Error != "" {
|
||||
page := hostRestoreTreePage{
|
||||
HostID: host.ID, SnapshotID: snapshotID, Path: pathArg,
|
||||
Error: result.Error,
|
||||
}
|
||||
view := s.baseView(u)
|
||||
view.Page = page
|
||||
_ = s.deps.UI.RenderPartial(w, "tree_node", view)
|
||||
return
|
||||
}
|
||||
|
||||
children := make([]treeChildView, 0, len(result.Entries))
|
||||
for _, e := range result.Entries {
|
||||
full := joinTreePath(pathArg, e.Name)
|
||||
children = append(children, treeChildView{
|
||||
Name: e.Name, Type: e.Type, Path: full,
|
||||
Size: e.Size,
|
||||
IsDir: e.Type == "dir",
|
||||
})
|
||||
}
|
||||
// Stable order: dirs first, then files, alphabetically.
|
||||
sort.SliceStable(children, func(i, j int) bool {
|
||||
if children[i].IsDir != children[j].IsDir {
|
||||
return children[i].IsDir
|
||||
}
|
||||
return children[i].Name < children[j].Name
|
||||
})
|
||||
|
||||
page := hostRestoreTreePage{
|
||||
HostID: host.ID, SnapshotID: snapshotID, Path: pathArg,
|
||||
Children: children,
|
||||
}
|
||||
view := s.baseView(u)
|
||||
view.Page = page
|
||||
if err := s.deps.UI.RenderPartial(w, "tree_node", view); err != nil {
|
||||
slog.Warn("ui restore tree: render partial", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// defaultRestoreTargetDir is the placeholder shown on the step-3
|
||||
// New-directory radio card and the value used when the operator
|
||||
// leaves the field blank. $HOME resolves agent-side (typically /root
|
||||
// for the systemd-as-root unit); <job-id> is substituted at dispatch.
|
||||
// The systemd unit pins ReadWritePaths to include the agent user's
|
||||
// home/rm-restore subdir so this default actually works under the
|
||||
// sandbox.
|
||||
func defaultRestoreTargetDir() string {
|
||||
return "$HOME/rm-restore/<job-id>/"
|
||||
}
|
||||
|
||||
// looksLikeRestoreTarget validates the operator-supplied target dir
|
||||
// is a shape the agent can sensibly resolve. We accept absolute
|
||||
// paths and a couple of agent-side expansions ($HOME, ~/). Other env
|
||||
// vars are deliberately rejected — operator-supplied paths shouldn't
|
||||
// be able to pick up arbitrary agent env values.
|
||||
func looksLikeRestoreTarget(p string) bool {
|
||||
if p == "" {
|
||||
return false
|
||||
}
|
||||
switch {
|
||||
case strings.HasPrefix(p, "/"):
|
||||
return true
|
||||
case strings.HasPrefix(p, "$HOME/"), p == "$HOME":
|
||||
return true
|
||||
case strings.HasPrefix(p, "${HOME}/"), p == "${HOME}":
|
||||
return true
|
||||
case strings.HasPrefix(p, "~/"), p == "~":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// sessionIDFromCookie returns the operator's session cookie value,
|
||||
// used as the cache key scope for the tree-list cache. Unauthenticated
|
||||
// requests don't reach this point, so an empty cookie value would
|
||||
// only happen if requireUIUser is bypassed in tests — fall back to
|
||||
// the request remote addr for those cases.
|
||||
func sessionIDFromCookie(r *stdhttp.Request) string {
|
||||
if c, err := r.Cookie(sessionCookieName); err == nil && c.Value != "" {
|
||||
return c.Value
|
||||
}
|
||||
return r.RemoteAddr
|
||||
}
|
||||
|
||||
// joinTreePath combines a directory path and a child name into an
|
||||
// absolute snapshot-relative path, normalising any duplicate slashes.
|
||||
func joinTreePath(dir, name string) string {
|
||||
if dir == "" || dir == "/" {
|
||||
return "/" + name
|
||||
}
|
||||
return strings.TrimRight(dir, "/") + "/" + name
|
||||
}
|
||||
|
||||
// satisfy unused-import if compile order shifts.
|
||||
var _ = ui.User{}
|
||||
@@ -0,0 +1,380 @@
|
||||
// ui_restore_test.go — covers the restore wizard backend (P3-01).
|
||||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
stdhttp "net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/coder/websocket"
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// seedSnapshot creates a snapshot row directly via ReplaceHostSnapshots.
|
||||
// Returns the snapshot ID.
|
||||
func seedSnapshot(t *testing.T, st *store.Store, hostID, hostname string) string {
|
||||
t.Helper()
|
||||
id := strings.ReplaceAll(ulid.Make().String(), "-", "")
|
||||
short := id[:8]
|
||||
if err := st.ReplaceHostSnapshots(context.Background(), hostID, []store.Snapshot{{
|
||||
ID: id, ShortID: short, Time: time.Now().UTC().Add(-2 * time.Hour),
|
||||
Hostname: hostname, Paths: []string{"/etc"}, Tags: []string{"system-config"},
|
||||
SizeBytes: 612 * 1024 * 1024, FileCount: 100,
|
||||
}}, time.Now().UTC()); err != nil {
|
||||
t.Fatalf("seed snapshot: %v", err)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// seedTwoSnapshots seeds two snapshots in one ReplaceHostSnapshots call
|
||||
// so both end up in the host's list. ReplaceHostSnapshots is atomic-
|
||||
// swap, so calling seedSnapshot twice would only leave the second.
|
||||
func seedTwoSnapshots(t *testing.T, st *store.Store, hostID, hostname string) (string, string) {
|
||||
t.Helper()
|
||||
a := strings.ReplaceAll(ulid.Make().String(), "-", "")
|
||||
b := strings.ReplaceAll(ulid.Make().String(), "-", "")
|
||||
if err := st.ReplaceHostSnapshots(context.Background(), hostID, []store.Snapshot{
|
||||
{
|
||||
ID: a, ShortID: a[:8], Time: time.Now().UTC().Add(-3 * time.Hour),
|
||||
Hostname: hostname, Paths: []string{"/etc"}, Tags: []string{"system-config"},
|
||||
},
|
||||
{
|
||||
ID: b, ShortID: b[:8], Time: time.Now().UTC().Add(-1 * time.Hour),
|
||||
Hostname: hostname, Paths: []string{"/etc"}, Tags: []string{"system-config"},
|
||||
},
|
||||
}, time.Now().UTC()); err != nil {
|
||||
t.Fatalf("seed snapshots: %v", err)
|
||||
}
|
||||
return a, b
|
||||
}
|
||||
|
||||
// TestRestoreWizardGetRendersStep1 verifies the snapshot picker is on
|
||||
// the page when no snapshot is pre-selected.
|
||||
func TestRestoreWizardGetRendersStep1(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, _ := enrolHostForUI(t, srv, st, "rstore-host-1")
|
||||
_ = seedSnapshot(t, st, hostID, "rstore-host-1")
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
req, _ := stdhttp.NewRequest("GET", ts.URL+"/hosts/"+hostID+"/restore", nil)
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusOK {
|
||||
t.Fatalf("status: got %d, want 200", res.StatusCode)
|
||||
}
|
||||
body := readBody(t, res.Body)
|
||||
if !strings.Contains(body, "Restore from snapshot") {
|
||||
t.Errorf("expected wizard heading; body: %s", short(body))
|
||||
}
|
||||
if !strings.Contains(body, "Pick a snapshot first") &&
|
||||
!strings.Contains(body, "Pick the point-in-time you want to restore from") {
|
||||
t.Errorf("expected step-1 prompt")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRestoreWizardGetWithSnapshotPreselected verifies the deep-link
|
||||
// path puts the snapshot summary card on the page.
|
||||
func TestRestoreWizardGetWithSnapshotPreselected(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, _ := enrolHostForUI(t, srv, st, "rstore-host-2")
|
||||
sid := seedSnapshot(t, st, hostID, "rstore-host-2")
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
req, _ := stdhttp.NewRequest("GET",
|
||||
ts.URL+"/hosts/"+hostID+"/snapshots/"+sid+"/restore", nil)
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusOK {
|
||||
t.Fatalf("status: got %d", res.StatusCode)
|
||||
}
|
||||
body := readBody(t, res.Body)
|
||||
// The selected summary card should reference the snapshot's short ID.
|
||||
if !strings.Contains(body, sid[:8]) {
|
||||
t.Errorf("expected snapshot short id in body")
|
||||
}
|
||||
if !strings.Contains(body, "picked from") {
|
||||
t.Errorf("expected 'picked from N snapshots' summary line")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRestorePostRequiresSnapshot: form without snapshot_id re-renders
|
||||
// with an error.
|
||||
func TestRestorePostRequiresSnapshot(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, _ := enrolHostForUI(t, srv, st, "rstore-no-snap")
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
form := url.Values{
|
||||
"snapshot_id": {""},
|
||||
"target_mode": {"new_dir"},
|
||||
"paths": {"/etc/foo"},
|
||||
}
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
|
||||
t.Fatalf("status: got %d, want 422", res.StatusCode)
|
||||
}
|
||||
body := readBody(t, res.Body)
|
||||
if !strings.Contains(body, "Pick a snapshot") {
|
||||
t.Errorf("expected 'Pick a snapshot' error in body")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRestorePostRequiresPaths: form with snapshot but no paths is rejected.
|
||||
func TestRestorePostRequiresPaths(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, _ := enrolHostForUI(t, srv, st, "rstore-no-paths")
|
||||
sid := seedSnapshot(t, st, hostID, "rstore-no-paths")
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
form := url.Values{
|
||||
"snapshot_id": {sid},
|
||||
"target_mode": {"new_dir"},
|
||||
}
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
|
||||
t.Fatalf("status: got %d, want 422", res.StatusCode)
|
||||
}
|
||||
body := readBody(t, res.Body)
|
||||
if !strings.Contains(body, "at least one file") {
|
||||
t.Errorf("expected paths-required error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRestorePostInPlaceRequiresHostnameMatch: in-place mode with the
|
||||
// wrong hostname typed re-renders + does not dispatch.
|
||||
func TestRestorePostInPlaceRequiresHostnameMatch(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, token := enrolHostForUI(t, srv, st, "rstore-inplace")
|
||||
sid := seedSnapshot(t, st, hostID, "rstore-inplace")
|
||||
c := agentDial(t, srv, ts, hostID, token)
|
||||
sendHello(t, c, "rstore-inplace")
|
||||
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
form := url.Values{
|
||||
"snapshot_id": {sid},
|
||||
"target_mode": {"in_place"},
|
||||
"paths": {"/etc/nginx/nginx.conf"},
|
||||
"confirm_hostname": {"WRONG"},
|
||||
}
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
|
||||
t.Fatalf("status: got %d, want 422", res.StatusCode)
|
||||
}
|
||||
|
||||
// No restore command should arrive at the agent.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
||||
defer cancel()
|
||||
for {
|
||||
mt, raw, rerr := c.Read(ctx)
|
||||
if rerr != nil {
|
||||
break
|
||||
}
|
||||
if mt == websocket.MessageText && strings.Contains(string(raw), `"command.run"`) &&
|
||||
strings.Contains(string(raw), `"kind":"restore"`) {
|
||||
t.Fatal("unexpected restore command.run after wrong-hostname rejection")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRestorePostHappyPathDispatches: well-formed new-directory form
|
||||
// dispatches a JobRestore command.run with the expected payload + writes
|
||||
// an audit row + redirects.
|
||||
func TestRestorePostHappyPathDispatches(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, token := enrolHostForUI(t, srv, st, "rstore-happy")
|
||||
sid := seedSnapshot(t, st, hostID, "rstore-happy")
|
||||
c := agentDial(t, srv, ts, hostID, token)
|
||||
sendHello(t, c, "rstore-happy")
|
||||
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
form := url.Values{
|
||||
"snapshot_id": {sid},
|
||||
"target_mode": {"new_dir"},
|
||||
"paths": {"/etc/nginx/nginx.conf", "/etc/nginx/sites-available/alfa.conf"},
|
||||
}
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.Header.Set("HX-Request", "true")
|
||||
req.AddCookie(cookie)
|
||||
// Don't follow redirects — we want to inspect the HX-Redirect header.
|
||||
client := &stdhttp.Client{
|
||||
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
|
||||
return stdhttp.ErrUseLastResponse
|
||||
},
|
||||
}
|
||||
res, err := client.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusNoContent {
|
||||
t.Fatalf("status: got %d, want 204", res.StatusCode)
|
||||
}
|
||||
if res.Header.Get("HX-Redirect") == "" {
|
||||
t.Fatal("expected HX-Redirect header pointing at the live job page")
|
||||
}
|
||||
|
||||
// Find the dispatched command.run on the agent socket.
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
var got api.Envelope
|
||||
for time.Now().Before(deadline) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
|
||||
mt, raw, rerr := c.Read(ctx)
|
||||
cancel()
|
||||
if rerr != nil {
|
||||
break
|
||||
}
|
||||
if mt != websocket.MessageText {
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(string(raw), `"command.run"`) || !strings.Contains(string(raw), `"kind":"restore"`) {
|
||||
continue
|
||||
}
|
||||
if err := json.Unmarshal(raw, &got); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
break
|
||||
}
|
||||
if got.Type != api.MsgCommandRun {
|
||||
t.Fatal("never received restore command.run")
|
||||
}
|
||||
var cp api.CommandRunPayload
|
||||
if err := got.UnmarshalPayload(&cp); err != nil {
|
||||
t.Fatalf("unmarshal payload: %v", err)
|
||||
}
|
||||
if cp.Kind != api.JobRestore {
|
||||
t.Fatalf("kind: got %q", cp.Kind)
|
||||
}
|
||||
if cp.Restore == nil {
|
||||
t.Fatal("restore payload is nil")
|
||||
}
|
||||
if cp.Restore.SnapshotID != sid {
|
||||
t.Fatalf("snapshot id: got %q want %q", cp.Restore.SnapshotID, sid)
|
||||
}
|
||||
if cp.Restore.InPlace {
|
||||
t.Fatal("expected new-directory mode (in_place=false)")
|
||||
}
|
||||
if !strings.HasPrefix(cp.Restore.TargetDir, "$HOME/rm-restore/") {
|
||||
t.Fatalf("target_dir: got %q, want prefix $HOME/rm-restore/", cp.Restore.TargetDir)
|
||||
}
|
||||
// <job-id> placeholder substituted with the dispatched job_id.
|
||||
if !strings.Contains(cp.Restore.TargetDir, "/01") {
|
||||
t.Errorf("target_dir: expected job_id substituted into the path; got %q", cp.Restore.TargetDir)
|
||||
}
|
||||
if len(cp.Restore.Paths) != 2 {
|
||||
t.Fatalf("paths: got %d, want 2", len(cp.Restore.Paths))
|
||||
}
|
||||
|
||||
// Audit row.
|
||||
var n int
|
||||
if err := st.DB().QueryRow(
|
||||
`SELECT COUNT(*) FROM audit_log WHERE action = 'host.restore' AND target_id = ?`,
|
||||
hostID).Scan(&n); err != nil {
|
||||
t.Fatalf("audit count: %v", err)
|
||||
}
|
||||
if n != 1 {
|
||||
t.Fatalf("audit rows: got %d, want 1", n)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRestorePostOfflineHostRejected: agent not connected → 503 +
|
||||
// no command.run.
|
||||
func TestRestorePostOfflineHostRejected(t *testing.T) {
|
||||
t.Parallel()
|
||||
srv, ts, st := rawTestServerWithUI(t)
|
||||
hostID, _ := enrolHostForUI(t, srv, st, "rstore-offline")
|
||||
sid := seedSnapshot(t, st, hostID, "rstore-offline")
|
||||
cookie := loginAsAdmin(t, st)
|
||||
|
||||
form := url.Values{
|
||||
"snapshot_id": {sid},
|
||||
"target_mode": {"new_dir"},
|
||||
"paths": {"/etc/foo"},
|
||||
}
|
||||
req, _ := stdhttp.NewRequest("POST",
|
||||
ts.URL+"/hosts/"+hostID+"/restore", strings.NewReader(form.Encode()))
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
req.AddCookie(cookie)
|
||||
res, err := stdhttp.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("do: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusServiceUnavailable {
|
||||
t.Fatalf("status: got %d, want 503", res.StatusCode)
|
||||
}
|
||||
_ = srv
|
||||
}
|
||||
|
||||
// helpers --------------------------------------------------------------
|
||||
|
||||
func readBody(t *testing.T, body interface{ Read(p []byte) (int, error) }) string {
|
||||
t.Helper()
|
||||
buf := make([]byte, 0, 16*1024)
|
||||
tmp := make([]byte, 4096)
|
||||
for {
|
||||
n, err := body.Read(tmp)
|
||||
if n > 0 {
|
||||
buf = append(buf, tmp[:n]...)
|
||||
}
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
func short(s string) string {
|
||||
if len(s) > 400 {
|
||||
return s[:400] + "…"
|
||||
}
|
||||
return s
|
||||
}
|
||||
@@ -92,6 +92,7 @@ func New() (*Renderer, error) {
|
||||
"templates/partials/toast.html",
|
||||
"templates/partials/awaiting_agent.html",
|
||||
"templates/partials/host_chrome.html",
|
||||
"templates/partials/tree_node.html",
|
||||
}
|
||||
|
||||
pageEntries, err := fs.Glob(web.FS, "templates/pages/*.html")
|
||||
|
||||
@@ -54,7 +54,7 @@ func AgentHandler(deps HandlerDeps) stdhttp.Handler {
|
||||
return stdhttp.HandlerFunc(func(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
host, ok := authenticateAgent(r, deps.Store)
|
||||
if !ok {
|
||||
stdhttp.Error(w, "unauthorized", stdhttp.StatusUnauthorized)
|
||||
stdhttp.Error(w, "unauthorised", stdhttp.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -204,7 +204,7 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
|
||||
string(p.Status), p.ExitCode, p.Stats, errMsg, p.FinishedAt); err != nil {
|
||||
slog.Warn("ws: mark job finished", "job_id", p.JobID, "err", err)
|
||||
}
|
||||
// repo_initialized_at projection has been removed — auto-init
|
||||
// repo_initialised_at projection has been removed — auto-init
|
||||
// at host enrolment makes "is the repo init'd" derivable from
|
||||
// the latest init job's status, no separate column needed.
|
||||
if deps.JobHub != nil {
|
||||
@@ -297,6 +297,20 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
|
||||
// (job.started → job.finished) is sufficient signal.
|
||||
slog.Debug("ws msg not yet handled", "type", env.Type, "host_id", hostID)
|
||||
|
||||
case api.MsgTreeListResult:
|
||||
// Reply to a synchronous tree.list RPC. Route to the waiter
|
||||
// registered against the request envelope's ID; if none is
|
||||
// registered the caller already gave up (ctx expired) — drop
|
||||
// the stray reply quietly.
|
||||
if env.ID == "" {
|
||||
slog.Warn("ws: tree.list.result missing envelope ID", "host_id", hostID)
|
||||
break
|
||||
}
|
||||
if !deps.Hub.rpcs.resolve(env.ID, env) {
|
||||
slog.Debug("ws: tree.list.result with no waiter (timeout?)",
|
||||
"id", env.ID, "host_id", hostID)
|
||||
}
|
||||
|
||||
case api.MsgError:
|
||||
var ep api.ErrorPayload
|
||||
_ = env.UnmarshalPayload(&ep)
|
||||
|
||||
@@ -21,6 +21,11 @@ import (
|
||||
type Hub struct {
|
||||
mu sync.RWMutex
|
||||
conns map[string]*Conn // hostID → conn
|
||||
|
||||
// rpcs tracks in-flight synchronous RPC calls (e.g. tree.list).
|
||||
// See rpc.go for details. Lazy-initialised via the registry's
|
||||
// own register() so callers don't have to juggle a constructor.
|
||||
rpcs rpcRegistry
|
||||
}
|
||||
|
||||
// NewHub returns an empty hub.
|
||||
@@ -100,7 +105,7 @@ func NewConn(hostID string, c *websocket.Conn) *Conn {
|
||||
}
|
||||
|
||||
// Send writes an envelope as a JSON text message. Concurrent calls
|
||||
// are serialized; the underlying socket is not safe for parallel
|
||||
// are serialised; the underlying socket is not safe for parallel
|
||||
// writers.
|
||||
func (c *Conn) Send(ctx context.Context, env api.Envelope) error {
|
||||
c.writeMu.Lock()
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
package ws
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
)
|
||||
|
||||
// rpcRegistry holds in-flight synchronous RPC calls. SendRPC registers
|
||||
// a channel keyed by the request envelope's ID; the WS read loop's
|
||||
// dispatcher routes incoming reply envelopes to the matching channel
|
||||
// when their type is one of the known reply types (currently just
|
||||
// tree.list.result).
|
||||
//
|
||||
// A single global registry keyed by envelope ID is fine because IDs
|
||||
// are ULIDs — globally unique without coordinating across hubs.
|
||||
type rpcRegistry struct {
|
||||
mu sync.Mutex
|
||||
pending map[string]chan api.Envelope
|
||||
}
|
||||
|
||||
// register reserves a channel for the given request ID. The channel
|
||||
// is buffered (cap 1) so a slow waiter doesn't block the read loop's
|
||||
// dispatcher when the reply lands.
|
||||
func (r *rpcRegistry) register(id string) chan api.Envelope {
|
||||
ch := make(chan api.Envelope, 1)
|
||||
r.mu.Lock()
|
||||
if r.pending == nil {
|
||||
r.pending = make(map[string]chan api.Envelope)
|
||||
}
|
||||
r.pending[id] = ch
|
||||
r.mu.Unlock()
|
||||
return ch
|
||||
}
|
||||
|
||||
// resolve delivers an envelope to its waiter and removes the entry.
|
||||
// Returns whether a waiter was actually present (the dispatcher uses
|
||||
// this to decide whether to log a stray-reply warning).
|
||||
func (r *rpcRegistry) resolve(id string, env api.Envelope) bool {
|
||||
r.mu.Lock()
|
||||
ch, ok := r.pending[id]
|
||||
if ok {
|
||||
delete(r.pending, id)
|
||||
}
|
||||
r.mu.Unlock()
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
// Buffered chan cap 1 — non-blocking send. The waiter goroutine
|
||||
// owns the receive side so this is the only sender.
|
||||
ch <- env
|
||||
close(ch)
|
||||
return true
|
||||
}
|
||||
|
||||
// release abandons the entry without delivering a value. Used when
|
||||
// the caller's context expires before a reply arrives — the next
|
||||
// stray reply (if any) will hit the no-waiter case in resolve and
|
||||
// just be dropped.
|
||||
func (r *rpcRegistry) release(id string) {
|
||||
r.mu.Lock()
|
||||
delete(r.pending, id)
|
||||
r.mu.Unlock()
|
||||
}
|
||||
|
||||
// SendRPC sends a request envelope to the host and blocks until a
|
||||
// matching reply lands or the context expires. The hub picks a fresh
|
||||
// envelope ID, marshals the payload, registers a waiter, and sends.
|
||||
//
|
||||
// timeout caps the wait; a too-aggressive value relative to the
|
||||
// expected restic-side latency will leak the registry entry until the
|
||||
// reply finally arrives (which is then silently dropped). The default
|
||||
// callers use is 30s, which covers a slow network round-trip plus a
|
||||
// restic ls invocation against a remote rest-server.
|
||||
//
|
||||
// If the host disconnects mid-flight, the read loop ends and no reply
|
||||
// will ever come — the caller's ctx.Done()/timeout is the only path
|
||||
// out. We could pre-fail by tracking conn lifetime, but the bound
|
||||
// keeps the code simple and the worst case is a 30s wait.
|
||||
func (h *Hub) SendRPC(ctx context.Context, hostID string, reqType api.MessageType, payload any, timeout time.Duration) (api.Envelope, error) {
|
||||
if timeout <= 0 {
|
||||
timeout = 30 * time.Second
|
||||
}
|
||||
id := ulid.Make().String()
|
||||
env, err := api.Marshal(reqType, id, payload)
|
||||
if err != nil {
|
||||
return api.Envelope{}, err
|
||||
}
|
||||
|
||||
ch := h.rpcs.register(id)
|
||||
|
||||
if err := h.Send(ctx, hostID, env); err != nil {
|
||||
h.rpcs.release(id)
|
||||
return api.Envelope{}, err
|
||||
}
|
||||
|
||||
select {
|
||||
case reply := <-ch:
|
||||
return reply, nil
|
||||
case <-ctx.Done():
|
||||
h.rpcs.release(id)
|
||||
return api.Envelope{}, ctx.Err()
|
||||
case <-time.After(timeout):
|
||||
h.rpcs.release(id)
|
||||
return api.Envelope{}, errors.New("ws rpc: timed out waiting for reply")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
package ws
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
)
|
||||
|
||||
// TestRPCRegistryRoundTrip: register a waiter, resolve it, get the
|
||||
// envelope back. Cover the no-waiter and double-resolve cases too.
|
||||
func TestRPCRegistryRoundTrip(t *testing.T) {
|
||||
t.Parallel()
|
||||
var r rpcRegistry
|
||||
id := ulid.Make().String()
|
||||
ch := r.register(id)
|
||||
|
||||
want := api.Envelope{Type: api.MsgTreeListResult, ID: id, Payload: json.RawMessage(`{"path":"/"}`)}
|
||||
if !r.resolve(id, want) {
|
||||
t.Fatal("resolve: returned false for registered id")
|
||||
}
|
||||
got := <-ch
|
||||
if got.ID != id {
|
||||
t.Fatalf("id mismatch: got %q want %q", got.ID, id)
|
||||
}
|
||||
|
||||
// A second resolve for the same id has no waiter and should not panic.
|
||||
if r.resolve(id, want) {
|
||||
t.Fatal("resolve: returned true for already-resolved id")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRPCRegistryRelease: release abandons the waiter; a subsequent
|
||||
// resolve is a no-op (no goroutine leak, no panic).
|
||||
func TestRPCRegistryRelease(t *testing.T) {
|
||||
t.Parallel()
|
||||
var r rpcRegistry
|
||||
id := ulid.Make().String()
|
||||
_ = r.register(id)
|
||||
r.release(id)
|
||||
if r.resolve(id, api.Envelope{ID: id}) {
|
||||
t.Fatal("resolve after release: should be no-op")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRPCRegistryConcurrent: many waiters in flight concurrently get
|
||||
// only their own reply. This catches buggy keying/locking.
|
||||
func TestRPCRegistryConcurrent(t *testing.T) {
|
||||
t.Parallel()
|
||||
var r rpcRegistry
|
||||
const n = 64
|
||||
|
||||
ids := make([]string, n)
|
||||
chs := make([]chan api.Envelope, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ids[i] = ulid.Make().String()
|
||||
chs[i] = r.register(ids[i])
|
||||
}
|
||||
|
||||
// Resolve in random-ish order from many goroutines.
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < n; i++ {
|
||||
wg.Add(1)
|
||||
go func(idx int) {
|
||||
defer wg.Done()
|
||||
r.resolve(ids[idx], api.Envelope{ID: ids[idx], Type: api.MsgTreeListResult})
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
select {
|
||||
case got := <-chs[i]:
|
||||
if got.ID != ids[i] {
|
||||
t.Fatalf("waiter %d: got id %q want %q", i, got.ID, ids[i])
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatalf("waiter %d: timed out", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestSendRPCContextCancelReleases ensures that canceling the caller's
|
||||
// ctx releases the registry entry so a stray late reply is harmlessly
|
||||
// dropped. Skips if the hub isn't reachable for direct access — this
|
||||
// is purely a unit test on the registry path inside SendRPC.
|
||||
func TestSendRPCContextCancelReleases(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := NewHub()
|
||||
|
||||
// No host registered, so Hub.Send returns "host offline" and
|
||||
// SendRPC bails without ever waiting. We test the timeout/ctx
|
||||
// path by going through register() directly.
|
||||
id := ulid.Make().String()
|
||||
ch := h.rpcs.register(id)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
cancel()
|
||||
}()
|
||||
|
||||
// Simulate the SendRPC select: ctx wins.
|
||||
select {
|
||||
case <-ch:
|
||||
t.Fatal("unexpected reply")
|
||||
case <-ctx.Done():
|
||||
h.rpcs.release(id)
|
||||
}
|
||||
|
||||
// Now a late reply should not block (ch is still open but no
|
||||
// receiver — buffered size 1 absorbs it).
|
||||
resolved := h.rpcs.resolve(id, api.Envelope{ID: id})
|
||||
if resolved {
|
||||
t.Fatal("resolve after release should return false")
|
||||
}
|
||||
}
|
||||
@@ -38,7 +38,7 @@ func (s *Store) GetHostRepoStats(ctx context.Context, hostID string) (*HostRepoS
|
||||
|
||||
// getHostRepoStatsTx is identical to GetHostRepoStats but runs on an
|
||||
// existing transaction so the fetch-merge-upsert in UpsertHostRepoStats
|
||||
// is fully serialized.
|
||||
// is fully serialised.
|
||||
func getHostRepoStatsTx(ctx context.Context, tx *sql.Tx, hostID string) (*HostRepoStats, error) {
|
||||
row := tx.QueryRowContext(ctx,
|
||||
`SELECT host_id, total_size_bytes, raw_size_bytes, unique_files,
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
-- 0012_jobs_restore_diff_kind.sql
|
||||
--
|
||||
-- Add 'restore' and 'diff' to the jobs.kind CHECK constraint so the
|
||||
-- restore wizard (P3-01) and the snapshot-diff endpoint (P3-09) can
|
||||
-- persist their job rows. SQLite can't ALTER a CHECK in place, so we
|
||||
-- rebuild the table.
|
||||
--
|
||||
-- Rebuild safety: jobs has an inbound FK from job_logs (ON DELETE
|
||||
-- CASCADE) and from schedules.jobs is referenced via scheduled_id.
|
||||
-- CLAUDE.md flags DROP TABLE on a parent as risky under
|
||||
-- foreign_keys=ON; we mitigate two ways:
|
||||
--
|
||||
-- 1. Stash job_logs into a temp table BEFORE rebuilding jobs, then
|
||||
-- restore the rows after the rebuild settles. If a cascade
|
||||
-- misbehaves we can still recover.
|
||||
-- 2. Use the safe rebuild order from 0005: create jobs_new with the
|
||||
-- wider CHECK → copy data → DROP jobs → RENAME jobs_new TO jobs.
|
||||
-- Do NOT rename the original first (the dangling-FK trap that
|
||||
-- 0005's first draft hit and 0006 cleaned up).
|
||||
|
||||
CREATE TEMPORARY TABLE _job_logs_backup AS
|
||||
SELECT job_id, seq, ts, stream, payload FROM job_logs;
|
||||
|
||||
CREATE TABLE jobs_new (
|
||||
id TEXT PRIMARY KEY,
|
||||
host_id TEXT NOT NULL REFERENCES hosts(id) ON DELETE CASCADE,
|
||||
kind TEXT NOT NULL CHECK (kind IN
|
||||
('backup','init','forget','prune','check','unlock','restore','diff')),
|
||||
status TEXT NOT NULL CHECK (status IN ('queued','running','succeeded','failed','cancelled')),
|
||||
scheduled_id TEXT REFERENCES schedules(id) ON DELETE SET NULL,
|
||||
actor_kind TEXT NOT NULL CHECK (actor_kind IN ('user','schedule','system')),
|
||||
actor_id TEXT,
|
||||
started_at TEXT,
|
||||
finished_at TEXT,
|
||||
exit_code INTEGER,
|
||||
stats TEXT,
|
||||
error TEXT,
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO jobs_new
|
||||
SELECT id, host_id, kind, status, scheduled_id, actor_kind, actor_id,
|
||||
started_at, finished_at, exit_code, stats, error, created_at
|
||||
FROM jobs;
|
||||
|
||||
DROP TABLE jobs;
|
||||
|
||||
ALTER TABLE jobs_new RENAME TO jobs;
|
||||
|
||||
CREATE INDEX jobs_host_id ON jobs(host_id);
|
||||
CREATE INDEX jobs_status ON jobs(status);
|
||||
CREATE INDEX jobs_created_at ON jobs(created_at);
|
||||
|
||||
-- Defensive: if cascade-on-DROP wiped job_logs (it shouldn't with the
|
||||
-- foreign_keys behaviour SQLite documents, but the codebase has hit
|
||||
-- "lost rows" before during rebuilds), restore from the temp backup.
|
||||
-- INSERT OR IGNORE so re-running is harmless.
|
||||
INSERT OR IGNORE INTO job_logs (job_id, seq, ts, stream, payload)
|
||||
SELECT job_id, seq, ts, stream, payload FROM _job_logs_backup;
|
||||
|
||||
DROP TABLE _job_logs_backup;
|
||||
@@ -233,19 +233,58 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days.
|
||||
|
||||
## Phase 3 — Restore, alerts, audit
|
||||
|
||||
- [ ] **P3-01** (L) Restore wizard backend: snapshot tree browse via `restic ls --json`, path picker, target selection
|
||||
- [ ] **P3-02** (L) Restore wizard UI (multi-step: host → snapshot → paths → target → confirm)
|
||||
- [ ] **P3-03** (M) Restore execution: `restic restore` invocation, progress streaming
|
||||
- [ ] **P3-04** (L) Cross-host restore: target agent receives a temporary scoped read credential for source host's repo (single-job, auto-revoked); UI supports source→target path remapping; warns when source paths need root and target service user is non-root
|
||||
> Phase 3 is split into three independently-shippable sub-phases:
|
||||
> **Restore** (P3-01..03 + P3-09 + P3-X1 cancel + P3-X2 tree-list RPC),
|
||||
> **Alerts** (P3-05..07), **Audit UI** (P3-08). Each sub-phase has its own
|
||||
> spec → plan → implement cycle; we hand back at sub-phase boundaries.
|
||||
>
|
||||
> P3-04 (cross-host restore) was de-scoped during the Phase-3 brainstorm
|
||||
> on 2026-05-04: disaster recovery is already covered by re-enrolling a
|
||||
> replacement host with the same repo creds (snapshots reappear, restore
|
||||
> is same-host). The remaining "pull a file from host A onto host C
|
||||
> without giving C permanent access" use case is genuinely different and
|
||||
> doesn't have a confirmed need yet, so it's moved to the **Future /
|
||||
> unscheduled** section at the end of this file.
|
||||
|
||||
### Phase 3 — Restore ✅
|
||||
|
||||
> Spec: `docs/superpowers/specs/2026-05-04-p3-restore-design.md`.
|
||||
> Wireframe: `_diag/p3-restore-wizard/wireframe.html`.
|
||||
> Sweep screenshots: `_diag/p3-restore-sweep/`.
|
||||
> Shipped on branch `p3-restore`.
|
||||
|
||||
- [x] **P3-X1** (S) Cancel-job feature. `command.cancel` WS envelope; agent tracks per-job ctx.CancelFunc and kills the running `restic` subprocess via context cancel (SIGTERM, SIGKILL after 5s grace via `cmd.Cancel` + `cmd.WaitDelay`); server endpoint `POST /api/jobs/{id}/cancel` bridges UI → WS; the existing UI Cancel button on `/jobs/{id}` is now real for any running kind. Sandbox-aware: `internal/restic/cancel_{unix,windows}.go` build-tags pick SIGTERM on POSIX vs `os.Kill` on Windows (which can't deliver SIGTERM). Tests: cancel mid-run via 'sleep 30' fake-restic returns JobCancelled with exit 130 in <200ms.
|
||||
- [x] **P3-X2** (S) Tree-list synchronous WS RPC. `MsgTreeList` ↔ `MsgTreeListResult` with `Envelope.ID` correlation; generic `Hub.SendRPC` helper (registry of buffered channels keyed by ULID, ctx-cancel + timeout aware). `internal/restic.ListTreeChildren` wraps `restic ls --json` and filters its recursive output to direct children. Server-side `treeCache` is per-wizard-session (keyed by session cookie + host + snapshot + path) with a 30-min TTL and lazy sweep.
|
||||
- [x] **P3-01** (L) Restore wizard backend (`internal/server/http/ui_restore.go`). GET handlers render the four-step wizard against the wireframe. HTMX/fetch tree partial endpoint hits `fetchTreeWithCache`. POST validates: snapshot_id, ≥1 absolute path, in-place ⇒ confirm_hostname == host name, agent online; on error re-renders with operator's input intact. Happy path mints job_id, target = `/var/lib/restic-manager/restore/<job-id>` (server-picked, agent's writable dir under the systemd sandbox's `ReadWritePaths`), creates job row, ships `command.run` with `RestorePayload`, writes `host.restore` audit row, returns HX-Redirect (or 303) to the live job page.
|
||||
- [x] **P3-02** (L) Wizard UI templates (`web/templates/pages/host_restore.html` + `partials/tree_node.html`). Single-page progressively-enabled four-step form. Form-state-driven JS computes a running tally + step-4 confirm summary client-side. Tree expansion uses plain fetch (not HTMX) for simpler target lookup; loaded-state cached per node. Top-level Restore button on host detail right rail + per-snapshot Restore action on snapshot rows. New `.snap-row` token in `web/styles/input.css`.
|
||||
- [x] **P3-03** (M) Restore execution. `restic.RunRestore` builds `restore <sid> --target <dir> [--include p]...` with --json; new `pumpRestoreStdout` parses status + summary objects. `--no-ownership` is gated on the agent's restic version via `Env.AtLeastVersion(0, 17)` — the flag was added in 0.17 and 0.16 rejects it. Restic version is threaded through `runner.Config.ResticVersion` from the agent's sysinfo snapshot. New-dir target is operator-editable (default `$HOME/rm-restore/<job-id>/`); agent expands `$HOME` / `${HOME}` / `~/` at run time and calls `os.MkdirAll` on the target chain so the operator never has to pre-create the per-job subdir. `runner.RunRestore` translates `RestoreStatus` into `job.progress` (mapping FilesRestored → FilesDone, etc.); agent dispatcher case `JobRestore` reuses the `spawn()` helper from P3-X1 so cancel works. Restore-shaped job-detail variant with current-file display under the progress bar.
|
||||
- [x] **P3-09** (S) `diff` between two snapshots. `JobDiff` JobKind + `restic.RunDiff` + `runner.RunDiff`; `POST /api/hosts/{id}/snapshots/diff` (and HTMX-form variant on the unprefixed path) dispatcher with two-snapshot guard + per-host snapshot-list validation; UI panel on host detail right rail (visible when 2+ snapshots) with two short-id inputs + Diff button. Output streams as log.stream to the standard live job log page.
|
||||
- [x] **P3-X3** (S) Recent-restores line on host detail. `hostChromeData` grows `RestoreStatus` / `RestoreAt` / `RestoreJobID` populated via `store.LatestJobByKind(host_id, 'restore')` (already exists from P2R). `host_chrome.html` renders a small line below the init-status one with status-coloured copy + a link to the job log. Hidden when no restore has ever run on this host.
|
||||
- [x] **P3-X4** (S) Job log download (txt + ndjson). New `GET /api/jobs/{id}/log.{txt|ndjson}` endpoint backed by the persisted `job_logs` table — works any time (running or finished) without pausing the live WS stream because the source is the DB, not the live socket. Plain-text format mirrors the on-screen "HH:MM:SS.mmm TAG payload" shape with a small `# job ... · kind ... · status ...` header; ndjson emits one self-contained `{seq,ts,stream,payload}` JSON object per line for `jq` / tooling. Surfaced as a single header dropdown on the live job page (`details/summary`-driven, native keyboard support, click-outside-to-close). New reusable `.dropdown` / `.dropdown-menu` / `.dropdown-item` tokens in `web/styles/input.css`.
|
||||
- [x] **P3-X5** (S) UK lint locale + sweep. `.golangci.yml` misspell locale switched US → UK and the codebase swept (~73 corrections — behaviour, serialise, recognise, honour, initialise, enrol, unauthorised, etc.). Wire `ErrorCode` value `"unauthorized"` → `"unauthorised"` is a tiny contract change but the agent doesn't parse those codes today and no external clients exist yet.
|
||||
- [x] **P3-X6** (S) Snapshot SIZE/FILES tooltip on host detail. The per-snapshot summary block was added by restic 0.17 (the source comment in `internal/restic/snapshots.go` incorrectly said 0.16+); on 0.16 hosts the columns render `—`. `hostDetailPage.LegacyRestic` (computed via `Env.AtLeastVersion(0, 17)`) drives a `title="Needs restic 0.17+ on the agent host. This host runs <ver>."` + `cursor: help` on the column headers, hidden once the host upgrades.
|
||||
|
||||
> **Migration 0012** widens the `jobs.kind` CHECK constraint to include `restore` and `diff`. Rebuild required (SQLite can't ALTER CHECK in place); follows the safe pattern from 0005, with a defensive temp-table backup of `job_logs` so the cascade-trap that bit migration 0007 wouldn't take the log history with it.
|
||||
|
||||
> **install.sh + systemd unit:** the install script now pre-creates `/root/rm-restore` (root-owned 0700) so the default new-dir restore target works under the sandbox out of the box; the unit's `ReadWritePaths` gains `-/root/rm-restore` (soft-fail prefix). Existing installs need a re-run of `install.sh` to pick up the new dir; new operator-typed targets are auto-created by the agent at job time.
|
||||
|
||||
> **As shipped (Playwright sweep against the live smoke env, 2026-05-04):** login → host detail → Restore button → wizard step 1 picks snapshot a1ac4006 (most recent) → tree drill-down `/home/steve/test` (3 lazy loads) → tick `file1` + `file2` → step 4 confirm summary populated → dispatch → live job page with running progress widget → restore succeeds, files land on disk at `/root/rm-restore/<job-id>/home/steve/test/file{1,2}` (default `$HOME/rm-restore/<job-id>/` after agent-side expansion). Custom-target restore to `/tmp/custom-restore/<job-id>/` lands inside the agent's `PrivateTmp` namespace. Snapshot diff between `a1ac4006` and `5f78c788` → diff job page, statistics output streamed (738 bytes added, 0 removed). Recent-restores line on host detail reads "last restore · succeeded 28s ago · job log →". Download dropdown serves both `.txt` and `.ndjson` with correct `Content-Type` + `Content-Disposition`. SIZE/FILES tooltip "Needs restic 0.17+ on the agent host. This host runs 0.16.4." renders on column hover.
|
||||
|
||||
### Phase 3 — Alerts (not started)
|
||||
|
||||
- [ ] **P3-05** (M) Alert engine: rule evaluation loop (failed backup, stale schedule, agent offline, check failed)
|
||||
- [ ] **P3-06** (M) Notification channels: webhook, ntfy, SMTP email
|
||||
- [ ] **P3-07** (S) Alert UI: list, acknowledge, resolve
|
||||
|
||||
### Phase 3 — Audit log UI (not started)
|
||||
|
||||
- [ ] **P3-08** (S) Audit log UI with filters (user, action, target, time range)
|
||||
- [ ] **P3-09** (S) `diff` between two snapshots in UI
|
||||
|
||||
### Phase 3 acceptance
|
||||
|
||||
- A file deleted on a host can be restored from the UI in under 2 minutes. A failed backup raises an alert via the configured channel within 60s.
|
||||
- A file deleted on a host can be restored from the UI in under 2 minutes via the wizard at `/hosts/{id}/restore`; the operator can cancel a running restore (or any other running job) from the live job page. Snapshot diff between two snapshots renders as a normal job page.
|
||||
- A failed backup raises an alert via the configured channel within 60s.
|
||||
- The audit-log UI lets an admin filter by user / action / target / time range.
|
||||
|
||||
---
|
||||
|
||||
@@ -290,3 +329,14 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days.
|
||||
- [ ] **X-03** Periodic dependency updates (`dependabot` or `renovate`)
|
||||
- [ ] **X-04** Threat-model review at end of each phase
|
||||
- [ ] **X-05** Proper first-run onboarding UI: admin shouldn't need to `curl` `/api/bootstrap` by hand. Render the bootstrap form on the same login page (extra "setup token" field shown only while no admin user exists, hidden after); on submit POST to `/api/bootstrap`, then drop straight into a session. Surface the one-time token from the server log somewhere copy-able (or print a clickable URL with the token in the query string at first-run). Also: relax the 12-char password floor for the first-run path or document it in the form so `admin` doesn't silently fail validation.
|
||||
|
||||
---
|
||||
|
||||
## Future / unscheduled
|
||||
|
||||
> Items here have a plausible use case but no confirmed need. They live
|
||||
> outside numbered phases until a concrete trigger (a user request, a
|
||||
> security review finding, a real disaster-recovery exercise) bumps them
|
||||
> back into a phase.
|
||||
|
||||
- [ ] **F-01** ~~P3-04~~ Cross-host restore. De-scoped from Phase 3 on 2026-05-04. Disaster recovery is already covered: stand up a replacement host, paste the original repo creds at enrolment, snapshots reappear, restore is same-host. The remaining "pull a file from host A onto host C without granting C permanent access" use case is genuinely different (file sharing / migration, not DR) and hasn't been requested. Original spec language was: "target agent receives a temporary scoped read credential for source host's repo (single-job, auto-revoked); UI supports source→target path remapping; warns when source paths need root and target service user is non-root". Re-promote when there's a real ask.
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -206,6 +206,78 @@
|
||||
.src-row.clickable > .row-link { pointer-events: auto; }
|
||||
.src-row.clickable > .row-action { pointer-events: auto; }
|
||||
|
||||
/* ---------- dropdown menu (header actions) ----------
|
||||
* Uses native <details><summary> for keyboard + no-JS support.
|
||||
* The summary is styled like a .btn, the panel sits absolute below.
|
||||
* Click-outside-to-close handled by CSS via :has() — no JS.
|
||||
*/
|
||||
.dropdown { position: relative; display: inline-block; }
|
||||
.dropdown summary {
|
||||
list-style: none; cursor: pointer;
|
||||
/* match .btn shape */
|
||||
font-size: 12px; font-weight: 500;
|
||||
padding: 6px 11px; border-radius: 5px;
|
||||
background: transparent;
|
||||
border: 1px solid var(--line);
|
||||
color: var(--ink-mid);
|
||||
transition: all 120ms ease;
|
||||
display: inline-flex; align-items: center; gap: 6px;
|
||||
user-select: none;
|
||||
}
|
||||
.dropdown summary::-webkit-details-marker { display: none; }
|
||||
.dropdown summary::marker { content: ""; }
|
||||
.dropdown summary:hover { background: var(--panel-hi); color: var(--ink); }
|
||||
.dropdown summary .chev {
|
||||
font-size: 9px; color: var(--ink-fade);
|
||||
transition: transform 120ms ease;
|
||||
}
|
||||
.dropdown[open] summary .chev { transform: rotate(180deg); }
|
||||
.dropdown[open] summary { background: var(--panel-hi); color: var(--ink); }
|
||||
.dropdown-menu {
|
||||
position: absolute; top: calc(100% + 4px); right: 0;
|
||||
z-index: 30;
|
||||
min-width: 220px;
|
||||
background: var(--panel);
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 6px;
|
||||
box-shadow: 0 6px 24px -8px rgba(0,0,0,0.55);
|
||||
padding: 4px;
|
||||
}
|
||||
.dropdown-item {
|
||||
display: block;
|
||||
padding: 8px 11px;
|
||||
border-radius: 4px;
|
||||
text-decoration: none;
|
||||
color: var(--ink-mid);
|
||||
font-size: 12.5px;
|
||||
line-height: 1.35;
|
||||
}
|
||||
.dropdown-item:hover { background: var(--panel-hi); color: var(--ink); }
|
||||
.dropdown-item .label { display: block; color: var(--ink); font-weight: 500; }
|
||||
.dropdown-item .hint {
|
||||
display: block; font-size: 11px; color: var(--ink-mute); margin-top: 2px;
|
||||
font-family: 'JetBrains Mono', ui-monospace, monospace;
|
||||
}
|
||||
|
||||
/* ---------- snapshot picker rows (Restore wizard step 1) ---------- */
|
||||
.snap-row {
|
||||
display: grid; align-items: center;
|
||||
grid-template-columns: 150px 130px 1fr 90px 130px 80px;
|
||||
column-gap: 16px;
|
||||
padding: 11px 14px; font-size: 13px;
|
||||
border-bottom: 1px solid var(--line-soft);
|
||||
cursor: pointer;
|
||||
transition: background 100ms ease;
|
||||
}
|
||||
.snap-row:last-child { border-bottom: 0; }
|
||||
.snap-row:hover { background: var(--panel-hi); }
|
||||
.snap-row.head {
|
||||
font-size: 11px; color: var(--ink-fade);
|
||||
text-transform: uppercase; letter-spacing: 0.08em;
|
||||
padding-top: 9px; padding-bottom: 9px; cursor: default;
|
||||
}
|
||||
.snap-row.head:hover { background: transparent; }
|
||||
|
||||
/* ---------- schedule rows (Schedules tab) ---------- */
|
||||
.schd-row {
|
||||
display: grid; align-items: center;
|
||||
|
||||
@@ -35,8 +35,10 @@
|
||||
<div>Snapshot id</div>
|
||||
<div>Time</div>
|
||||
<div>Paths</div>
|
||||
<div class="text-right">Size</div>
|
||||
<div class="text-right">Files</div>
|
||||
<div class="text-right{{if $page.LegacyRestic}} cursor-help{{end}}"
|
||||
{{if $page.LegacyRestic}}title="Needs restic 0.17+ on the agent host. This host runs {{$host.ResticVersion}}."{{end}}>Size</div>
|
||||
<div class="text-right{{if $page.LegacyRestic}} cursor-help{{end}}"
|
||||
{{if $page.LegacyRestic}}title="Needs restic 0.17+ on the agent host. This host runs {{$host.ResticVersion}}."{{end}}>Files</div>
|
||||
<div></div>
|
||||
</div>
|
||||
|
||||
@@ -51,7 +53,7 @@
|
||||
{{if eq $s.FileCount 0}}<span class="text-ink-fade">—</span>{{else}}{{comma $s.FileCount}}{{end}}
|
||||
</div>
|
||||
<div class="text-right">
|
||||
<button class="btn btn-ghost" disabled title="restore wizard lands in P3">Restore →</button>
|
||||
<a href="/hosts/{{$host.ID}}/snapshots/{{$s.ID}}/restore" class="btn">Restore →</a>
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
@@ -76,6 +78,35 @@
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="panel rounded-[7px] px-4 py-3.5">
|
||||
<div class="text-[11px] text-ink-fade uppercase tracking-[0.1em] mb-2.5">Restore</div>
|
||||
<p class="text-[12px] text-ink-mute leading-[1.55] mb-3">
|
||||
Pick a snapshot, choose paths, dispatch. Live progress streams once the
|
||||
agent starts.
|
||||
</p>
|
||||
<a href="/hosts/{{$host.ID}}/restore"
|
||||
class="btn btn-block">Restore from snapshot…</a>
|
||||
</div>
|
||||
|
||||
{{if gt $host.SnapshotCount 1}}
|
||||
<div class="panel rounded-[7px] px-4 py-3.5">
|
||||
<div class="text-[11px] text-ink-fade uppercase tracking-[0.1em] mb-2.5">Compare snapshots</div>
|
||||
<p class="text-[12px] text-ink-mute leading-[1.55] mb-3">
|
||||
Diff two snapshots to see what changed. Output streams to a live
|
||||
job page like a regular run.
|
||||
</p>
|
||||
<form method="post" action="/hosts/{{$host.ID}}/snapshots/diff"
|
||||
hx-post="/hosts/{{$host.ID}}/snapshots/diff" hx-swap="none"
|
||||
class="space-y-2">
|
||||
<input type="text" name="snapshot_a" placeholder="snapshot A id"
|
||||
class="field mono text-[11.5px]" />
|
||||
<input type="text" name="snapshot_b" placeholder="snapshot B id"
|
||||
class="field mono text-[11.5px]" />
|
||||
<button type="submit" class="btn btn-block">Diff →</button>
|
||||
</form>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
<div class="panel rounded-[7px] px-4 py-3.5">
|
||||
<div class="text-[11px] text-bad uppercase tracking-[0.1em] font-semibold mb-2.5">Danger zone</div>
|
||||
<p class="text-pretty text-[12px] text-ink-mute leading-[1.55] mb-3">
|
||||
|
||||
@@ -0,0 +1,380 @@
|
||||
{{define "title"}}{{.Title}}{{end}}
|
||||
|
||||
{{define "content"}}
|
||||
{{template "host_chrome" .}}
|
||||
{{$page := .Page}}
|
||||
{{$host := $page.Host}}
|
||||
<div class="max-w-[1280px] mx-auto px-8 pt-6 pb-14">
|
||||
|
||||
<div class="flex items-baseline justify-between mb-4">
|
||||
<div>
|
||||
<h2 class="text-[19px] font-medium tracking-[-0.005em]">Restore from snapshot</h2>
|
||||
<div class="text-[12.5px] text-ink-mute mt-1">
|
||||
Pick a snapshot, choose paths, decide where files go, then dispatch.
|
||||
Live progress streams to a job page once you start.
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex gap-2">
|
||||
<a href="/hosts/{{$host.ID}}" class="btn">Cancel</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{if $page.Error}}
|
||||
<div class="rounded-[6px] px-3.5 py-3 text-[13px] mb-4"
|
||||
style="border: 1px solid color-mix(in oklch, var(--bad), transparent 60%); background: color-mix(in oklch, var(--bad), transparent 92%);">
|
||||
{{$page.Error}}
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
<form method="post" action="/hosts/{{$host.ID}}/restore" id="restore-form" class="space-y-4">
|
||||
|
||||
{{/* ============ STEP 1 — snapshot picker ============ */}}
|
||||
<section class="rounded-[8px] border border-line-soft bg-panel overflow-hidden">
|
||||
<header class="flex items-center justify-between px-[18px] py-[14px] border-b border-line-soft"
|
||||
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
|
||||
<div class="flex items-center gap-3">
|
||||
{{if $page.Selected}}
|
||||
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
|
||||
style="background: color-mix(in oklch, var(--ok), transparent 86%); color: var(--ok); border: 1px solid color-mix(in oklch, var(--ok), transparent 60%);">✓</span>
|
||||
{{else}}
|
||||
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
|
||||
style="background: color-mix(in oklch, var(--accent), transparent 84%); color: var(--accent); border: 1px solid color-mix(in oklch, var(--accent), transparent 50%);">1</span>
|
||||
{{end}}
|
||||
<div>
|
||||
<div class="text-[14px] font-medium">Snapshot</div>
|
||||
<div class="text-[12px] text-ink-mute mt-0.5">Pick the point-in-time you want to restore from.</div>
|
||||
</div>
|
||||
</div>
|
||||
<span class="mono text-[11px] text-ink-fade">step 1 of 4</span>
|
||||
</header>
|
||||
<div class="p-[18px]">
|
||||
{{if $page.Selected}}
|
||||
{{/* selected summary card */}}
|
||||
<div class="grid items-center gap-4 px-3.5 py-3 rounded-[6px] bg-bg border border-line-soft"
|
||||
style="grid-template-columns: auto 1fr auto auto;">
|
||||
<span class="mono text-[12px] text-accent">{{$page.Selected.ShortID}}</span>
|
||||
<div>
|
||||
<div class="text-[13px] text-ink">{{$page.Selected.Time.Format "2006-01-02 15:04 MST"}} <span class="text-ink-fade mx-2">·</span><span class="text-ink-mute">{{relTime $page.Selected.Time}}</span></div>
|
||||
<div class="mt-1 text-[12px] text-ink-mute">
|
||||
{{range $page.Selected.Tags}}<span class="tag mr-1.5">{{.}}</span>{{end}}
|
||||
paths:
|
||||
{{range $i, $p := $page.Selected.Paths}}{{if $i}}, {{end}}<span class="mono text-ink-mid">{{$p}}</span>{{end}}
|
||||
{{if $page.Selected.SizeBytes}} · {{bytes $page.Selected.SizeBytes}}{{end}}
|
||||
</div>
|
||||
</div>
|
||||
<span class="text-ink-fade text-[12px]">picked from {{len $page.Snapshots}} snapshots</span>
|
||||
<a href="/hosts/{{$host.ID}}/restore" class="btn">Change</a>
|
||||
</div>
|
||||
<input type="hidden" name="snapshot_id" value="{{$page.Selected.ID}}" />
|
||||
{{else}}
|
||||
{{/* full picker table */}}
|
||||
<div class="rounded-[6px] border border-line-soft bg-bg overflow-hidden">
|
||||
<div class="snap-row head">
|
||||
<div>Time</div>
|
||||
<div>Tag</div>
|
||||
<div>Paths</div>
|
||||
<div>Size</div>
|
||||
<div>Snapshot ID</div>
|
||||
<div></div>
|
||||
</div>
|
||||
{{if not $page.Snapshots}}
|
||||
<div class="px-4 py-8 text-center text-ink-mute text-[13px]">No snapshots yet. Run a backup first.</div>
|
||||
{{end}}
|
||||
{{range $page.Snapshots}}
|
||||
<a href="/hosts/{{$host.ID}}/snapshots/{{.ID}}/restore" class="snap-row" style="text-decoration: none; color: inherit;">
|
||||
<div class="mono text-ink-mid">{{relTime .Time}}</div>
|
||||
<div>{{range .Tags}}<span class="tag">{{.}}</span>{{end}}</div>
|
||||
<div class="text-ink-mute" style="overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">
|
||||
{{range $i, $p := .Paths}}{{if $i}}, {{end}}<span class="mono text-ink-mid">{{$p}}</span>{{end}}
|
||||
</div>
|
||||
<div class="mono text-ink-mid">{{if .SizeBytes}}{{bytes .SizeBytes}}{{else}}—{{end}}</div>
|
||||
<div class="mono text-ink-mid">{{.ShortID}}</div>
|
||||
<div></div>
|
||||
</a>
|
||||
{{end}}
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{{/* ============ STEP 2 — paths (tree browser) ============ */}}
|
||||
<section class="rounded-[8px] border border-line-soft bg-panel overflow-hidden {{if not $page.Selected}}opacity-40 pointer-events-none{{end}}">
|
||||
<header class="flex items-center justify-between px-[18px] py-[14px] border-b border-line-soft"
|
||||
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
|
||||
<div class="flex items-center gap-3">
|
||||
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
|
||||
style="{{if $page.Selected}}background: color-mix(in oklch, var(--accent), transparent 84%); color: var(--accent); border: 1px solid color-mix(in oklch, var(--accent), transparent 50%);{{else}}background: var(--bg); color: var(--ink-mute); border: 1px solid var(--line);{{end}}">2</span>
|
||||
<div>
|
||||
<div class="text-[14px] font-medium">Paths</div>
|
||||
<div class="text-[12px] text-ink-mute mt-0.5">Tick files and directories to restore. Folders restore recursively.</div>
|
||||
</div>
|
||||
</div>
|
||||
<span class="mono text-[11px] text-ink-fade">step 2 of 4</span>
|
||||
</header>
|
||||
<div class="p-[18px]">
|
||||
{{if $page.Selected}}
|
||||
<div class="rounded-[6px] border border-line-soft bg-bg overflow-hidden p-2">
|
||||
{{/* Root tree node — fetched on first wizard render; child
|
||||
expansions reuse the same tree.list cache server-side. */}}
|
||||
<div id="tree-root">
|
||||
<div class="text-ink-mute text-[12.5px] mono px-3 py-2">loading…</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
(function() {
|
||||
fetch('/hosts/{{$host.ID}}/restore/tree?snapshot={{$page.Selected.ID}}&path=/', { credentials: 'same-origin' })
|
||||
.then(function(r) { return r.text(); })
|
||||
.then(function(html) {
|
||||
document.getElementById('tree-root').innerHTML = html;
|
||||
document.body.dispatchEvent(new CustomEvent('tree:loaded'));
|
||||
});
|
||||
})();
|
||||
</script>
|
||||
<div class="mt-3 px-3.5 py-2.5 rounded-[6px] text-[12.5px]"
|
||||
style="border: 1px solid color-mix(in oklch, var(--accent), transparent 70%); background: color-mix(in oklch, var(--accent), transparent 92%);">
|
||||
<span class="text-accent" id="tally-count">0 files selected</span>
|
||||
<span class="text-ink-fade mx-2">·</span>
|
||||
<span class="text-ink-mute mono" id="tally-paths">tick a file or directory above</span>
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="text-ink-mute text-[13px]">Pick a snapshot above to load its paths.</div>
|
||||
{{end}}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{{/* ============ STEP 3 — target ============ */}}
|
||||
<section class="rounded-[8px] border border-line-soft bg-panel overflow-hidden {{if not $page.Selected}}opacity-40 pointer-events-none{{end}}">
|
||||
<header class="flex items-center justify-between px-[18px] py-[14px] border-b border-line-soft"
|
||||
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
|
||||
<div class="flex items-center gap-3">
|
||||
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
|
||||
style="background: color-mix(in oklch, var(--accent), transparent 84%); color: var(--accent); border: 1px solid color-mix(in oklch, var(--accent), transparent 50%);">3</span>
|
||||
<div>
|
||||
<div class="text-[14px] font-medium">Target</div>
|
||||
<div class="text-[12px] text-ink-mute mt-0.5">Where should the files land? Defaults to a fresh, isolated directory.</div>
|
||||
</div>
|
||||
</div>
|
||||
<span class="mono text-[11px] text-ink-fade">step 3 of 4</span>
|
||||
</header>
|
||||
<div class="p-[18px]">
|
||||
<div class="grid grid-cols-2 gap-3.5">
|
||||
<label class="block rounded-[7px] p-4 cursor-pointer transition border target-card-new"
|
||||
id="target-new-card"
|
||||
style="border-color: color-mix(in oklch, var(--accent), transparent 50%); background: color-mix(in oklch, var(--accent), transparent 95%);">
|
||||
<div class="flex items-start gap-3">
|
||||
<input type="radio" name="target_mode" value="new_dir" class="mt-1" {{if not $page.FormInPlace}}checked{{end}} />
|
||||
<div class="flex-1">
|
||||
<div class="text-[14px] font-medium text-ink">New directory</div>
|
||||
<div class="text-[12px] text-ink-mute mt-1 leading-[1.55]">
|
||||
Files restore into a fresh path on the host. Original files untouched.
|
||||
Restored as the agent user — original uid/gid is dropped (restic ≥ 0.17;
|
||||
older versions preserve it).
|
||||
</div>
|
||||
<div class="mt-3 flex items-center gap-2.5">
|
||||
<span class="text-ink-fade mono text-[12px]">→</span>
|
||||
<input type="text" name="target_dir" id="target-dir-input"
|
||||
class="field mono text-[12px] flex-1"
|
||||
value="{{if $page.FormTargetDir}}{{$page.FormTargetDir}}{{else}}{{$page.DefaultTargetDir}}{{end}}"
|
||||
placeholder="$HOME/rm-restore/<job-id>/" />
|
||||
</div>
|
||||
<div class="text-[11.5px] text-ink-fade mt-1.5">
|
||||
<span class="mono">$HOME</span> resolves to the agent user's home;
|
||||
<span class="mono"><job-id></span> is substituted on dispatch.
|
||||
Edit if you want a specific directory.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</label>
|
||||
|
||||
<label class="block rounded-[7px] p-4 cursor-pointer transition border target-card-inplace"
|
||||
id="target-inplace-card"
|
||||
style="border-color: color-mix(in oklch, var(--bad), transparent 70%); background: color-mix(in oklch, var(--bad), transparent 96%);">
|
||||
<div class="flex items-start gap-3">
|
||||
<input type="radio" name="target_mode" value="in_place" class="mt-1" {{if $page.FormInPlace}}checked{{end}} />
|
||||
<div class="flex-1">
|
||||
<div class="text-[14px] font-medium">
|
||||
<span class="text-bad">In place</span>
|
||||
<span class="text-ink-mute font-normal">— overwrite original paths</span>
|
||||
</div>
|
||||
<div class="text-[12px] text-ink-mute mt-1 leading-[1.55]">
|
||||
Files replace whatever is at their original paths.
|
||||
Original ownership and permissions are preserved.
|
||||
<span class="text-bad">Destructive — cannot be undone.</span>
|
||||
</div>
|
||||
<div class="mt-3 px-3 py-3 rounded-[5px]"
|
||||
style="background: color-mix(in oklch, var(--bad), transparent 92%); border: 1px solid color-mix(in oklch, var(--bad), transparent 60%);">
|
||||
<div class="text-[11px] text-bad uppercase tracking-[0.08em] font-medium">Confirm host name</div>
|
||||
<div class="text-[11.5px] text-ink-mute mt-1 leading-[1.55]">
|
||||
Type <span class="mono text-ink">{{$host.Name}}</span> to enable this option.
|
||||
</div>
|
||||
<input type="text" name="confirm_hostname" class="field mono mt-2"
|
||||
placeholder="{{$host.Name}}"
|
||||
value="{{$page.FormConfirmHN}}" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{{/* ============ STEP 4 — confirm ============ */}}
|
||||
<section class="rounded-[8px] border border-line-soft bg-panel overflow-hidden {{if not $page.Selected}}opacity-40 pointer-events-none{{end}}">
|
||||
<header class="flex items-center justify-between px-[18px] py-[14px]"
|
||||
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
|
||||
<div class="flex items-center gap-3">
|
||||
<span class="inline-flex items-center justify-center w-[22px] h-[22px] rounded-full mono text-[11px] font-medium"
|
||||
style="background: color-mix(in oklch, var(--accent), transparent 84%); color: var(--accent); border: 1px solid color-mix(in oklch, var(--accent), transparent 50%);">4</span>
|
||||
<div>
|
||||
<div class="text-[14px] font-medium">Confirm & start</div>
|
||||
<div class="text-[12px] text-ink-mute mt-0.5">Final review. Logs and progress will stream live.</div>
|
||||
</div>
|
||||
</div>
|
||||
<span class="mono text-[11px] text-ink-fade">step 4 of 4</span>
|
||||
</header>
|
||||
<div class="px-[18px] pb-[18px]" id="confirm-summary">
|
||||
<div class="text-[12px] text-ink-mute py-2">A summary will appear here once you've made your selections.</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
{{/* sticky-style action bar */}}
|
||||
<div class="rounded-[8px] border border-line-soft px-[18px] py-[14px] flex items-center justify-between"
|
||||
style="background: color-mix(in oklch, var(--panel), var(--panel-hi) 30%);">
|
||||
<div class="text-[12.5px] text-ink-mute">
|
||||
Audit row <span class="mono text-ink-mid">host.restore</span> will be written on dispatch.
|
||||
</div>
|
||||
<div class="flex items-center gap-2.5">
|
||||
<a href="/hosts/{{$host.ID}}" class="btn">Back</a>
|
||||
<button type="submit" id="dispatch-btn" class="btn btn-primary btn-lg" {{if not $page.Online}}disabled title="agent is offline"{{end}}>
|
||||
Start restore →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
{{/* Lightweight JS to drive the live tally + summary card + tree toggle.
|
||||
The tree-toggle is plain fetch (not HTMX) so its target lookup is
|
||||
trivial — the .tree-children div is always the next sibling
|
||||
inside the same .tree-pair wrapper. */}}
|
||||
<script>
|
||||
window.__rmTreeToggle = function(btn) {
|
||||
var pair = btn.closest('.tree-pair');
|
||||
if (!pair) return;
|
||||
var kids = pair.querySelector(':scope > .tree-children');
|
||||
if (!kids) return;
|
||||
var loaded = btn.getAttribute('data-loaded') === 'true';
|
||||
if (!loaded) {
|
||||
var url = btn.getAttribute('data-tree-url');
|
||||
btn.disabled = true;
|
||||
fetch(url, { credentials: 'same-origin' })
|
||||
.then(function(r) { return r.text(); })
|
||||
.then(function(html) {
|
||||
kids.innerHTML = html;
|
||||
kids.classList.remove('hidden');
|
||||
btn.textContent = '▾';
|
||||
btn.setAttribute('data-loaded', 'true');
|
||||
btn.disabled = false;
|
||||
// Notify the wizard's recompute() that tally state may have changed.
|
||||
document.body.dispatchEvent(new CustomEvent('tree:loaded'));
|
||||
})
|
||||
.catch(function(e) {
|
||||
kids.innerHTML = '<div class="px-3 py-2 mono text-[12px] text-bad">load failed: ' + e + '</div>';
|
||||
kids.classList.remove('hidden');
|
||||
btn.textContent = '▾';
|
||||
btn.disabled = false;
|
||||
});
|
||||
return;
|
||||
}
|
||||
kids.classList.toggle('hidden');
|
||||
btn.textContent = kids.classList.contains('hidden') ? '▸' : '▾';
|
||||
};
|
||||
(function() {
|
||||
const form = document.getElementById('restore-form');
|
||||
if (!form) return;
|
||||
const tallyCount = document.getElementById('tally-count');
|
||||
const tallyPaths = document.getElementById('tally-paths');
|
||||
const dispatchBtn = document.getElementById('dispatch-btn');
|
||||
const summary = document.getElementById('confirm-summary');
|
||||
const inplaceRadio = document.querySelector('input[name="target_mode"][value="in_place"]');
|
||||
const newRadio = document.querySelector('input[name="target_mode"][value="new_dir"]');
|
||||
const newCard = document.getElementById('target-new-card');
|
||||
const inplaceCard = document.getElementById('target-inplace-card');
|
||||
const confirmInput = document.querySelector('input[name="confirm_hostname"]');
|
||||
const hostName = {{$host.Name | js}};
|
||||
const defaultTarget = {{$page.DefaultTargetDir | js}};
|
||||
const selectedSnapID = {{if $page.Selected}}{{$page.Selected.ShortID | js}}{{else}}""{{end}};
|
||||
const selectedSnapTime = {{if $page.Selected}}{{$page.Selected.Time.Format "2006-01-02 15:04 MST" | js}}{{else}}""{{end}};
|
||||
|
||||
function getCheckedPaths() {
|
||||
return Array.from(form.querySelectorAll('input[name="paths"]:checked')).map(i => i.value);
|
||||
}
|
||||
function recompute() {
|
||||
const paths = getCheckedPaths();
|
||||
const count = paths.length;
|
||||
if (tallyCount) tallyCount.textContent = count + ' file' + (count === 1 ? '' : 's') + ' selected';
|
||||
if (tallyPaths) {
|
||||
tallyPaths.textContent = count === 0 ? 'tick a file or directory above'
|
||||
: paths.slice(0, 4).join(' · ') + (count > 4 ? ' …' : '');
|
||||
}
|
||||
// Card emphasis on radio change
|
||||
if (newCard && inplaceCard && inplaceRadio && newRadio) {
|
||||
const isInPlace = inplaceRadio.checked;
|
||||
newCard.style.borderColor = isInPlace ? 'var(--line-soft)' : 'color-mix(in oklch, var(--accent), transparent 50%)';
|
||||
newCard.style.background = isInPlace ? 'var(--bg)' : 'color-mix(in oklch, var(--accent), transparent 95%)';
|
||||
inplaceCard.style.borderColor = isInPlace ? 'color-mix(in oklch, var(--bad), transparent 35%)' : 'color-mix(in oklch, var(--bad), transparent 70%)';
|
||||
inplaceCard.style.background = isInPlace ? 'color-mix(in oklch, var(--bad), transparent 90%)' : 'color-mix(in oklch, var(--bad), transparent 96%)';
|
||||
}
|
||||
// Dispatch button state
|
||||
if (dispatchBtn) {
|
||||
const inPlace = inplaceRadio && inplaceRadio.checked;
|
||||
const okConfirm = !inPlace || (confirmInput && confirmInput.value.trim() === hostName);
|
||||
const enabled = count > 0 && okConfirm;
|
||||
dispatchBtn.disabled = !enabled || !{{if $page.Online}}true{{else}}false{{end}};
|
||||
dispatchBtn.textContent = inPlace ? 'Start restore (overwrite) →' : 'Start restore →';
|
||||
if (inPlace) dispatchBtn.classList.add('btn-danger'); else dispatchBtn.classList.remove('btn-danger');
|
||||
}
|
||||
// Summary card
|
||||
if (summary) {
|
||||
if (count === 0) {
|
||||
summary.innerHTML = '<div class="text-[12px] text-ink-mute py-2">A summary will appear here once you\'ve made your selections.</div>';
|
||||
} else {
|
||||
const inPlace = inplaceRadio && inplaceRadio.checked;
|
||||
const escTarget = defaultTarget
|
||||
.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||
const targetLine = inPlace
|
||||
? '<span class="text-bad">in place · originals will be overwritten</span>'
|
||||
: '<span class="text-ink">New directory</span> <span class="text-ink-fade mx-2">·</span> <span class="mono text-ink-mid">' + escTarget + '</span>';
|
||||
const ownLine = 'preserved (uid/gid/mode/mtime)';
|
||||
const pathLines = paths.slice(0, 12).map(p => '<div>' + p + '</div>').join('');
|
||||
const more = paths.length > 12 ? ('<div class="text-ink-fade">… and ' + (paths.length - 12) + ' more</div>') : '';
|
||||
summary.innerHTML = `
|
||||
<div class="rounded-[6px] border border-line-soft p-3.5 bg-bg">
|
||||
<div class="grid gap-y-2.5" style="grid-template-columns: 140px 1fr; column-gap: 18px; font-size: 13px;">
|
||||
<span class="text-[11px] text-ink-fade uppercase tracking-[0.08em] pt-0.5">Source</span>
|
||||
<div>snapshot <span class="mono text-accent">${selectedSnapID}</span> · <span class="text-ink-mid">${selectedSnapTime}</span></div>
|
||||
<span class="text-[11px] text-ink-fade uppercase tracking-[0.08em] pt-0.5">Paths</span>
|
||||
<div>
|
||||
<span class="text-ink">${count} file${count === 1 ? '' : 's'}</span>
|
||||
<div class="mono text-[11.5px] text-ink-mute mt-1.5 leading-[1.7]">${pathLines}${more}</div>
|
||||
</div>
|
||||
<span class="text-[11px] text-ink-fade uppercase tracking-[0.08em] pt-0.5">Target</span>
|
||||
<div>${targetLine}</div>
|
||||
<span class="text-[11px] text-ink-fade uppercase tracking-[0.08em] pt-0.5">Ownership</span>
|
||||
<div class="text-ink-mute">${ownLine}</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Recompute on any change in the form (path checks, radio swap, typed-confirm).
|
||||
form.addEventListener('change', recompute);
|
||||
form.addEventListener('input', recompute);
|
||||
// Also after HTMX swaps in tree fragments (so initial state is right).
|
||||
document.body.addEventListener('htmx:afterSwap', recompute);
|
||||
recompute();
|
||||
})();
|
||||
</script>
|
||||
|
||||
{{end}}
|
||||
@@ -63,6 +63,22 @@
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex items-center gap-2">
|
||||
<details class="dropdown" id="download-menu">
|
||||
<summary>
|
||||
Download log
|
||||
<span class="chev">▾</span>
|
||||
</summary>
|
||||
<div class="dropdown-menu">
|
||||
<a class="dropdown-item" href="/api/jobs/{{$job.ID}}/log.txt">
|
||||
<span class="label">Plain text</span>
|
||||
<span class="hint">.txt · for humans / paste into a ticket</span>
|
||||
</a>
|
||||
<a class="dropdown-item" href="/api/jobs/{{$job.ID}}/log.ndjson">
|
||||
<span class="label">JSON Lines</span>
|
||||
<span class="hint">.ndjson · pipe into jq / tooling</span>
|
||||
</a>
|
||||
</div>
|
||||
</details>
|
||||
{{if $page.IsActive}}
|
||||
<button class="btn btn-danger" id="cancel-btn"
|
||||
hx-post="/api/jobs/{{$job.ID}}/cancel"
|
||||
@@ -71,14 +87,24 @@
|
||||
<a href="/hosts/{{$host.ID}}" class="btn">Back to host</a>
|
||||
{{end}}
|
||||
</div>
|
||||
<script>
|
||||
// Close the download dropdown when clicking outside it.
|
||||
(function() {
|
||||
var dd = document.getElementById('download-menu');
|
||||
if (!dd) return;
|
||||
document.addEventListener('click', function(e) {
|
||||
if (dd.open && !dd.contains(e.target)) dd.open = false;
|
||||
});
|
||||
})();
|
||||
</script>
|
||||
</div>
|
||||
|
||||
{{/* ---------- progress (running only) ---------- */}}
|
||||
{{if $page.IsActive}}
|
||||
<div class="mt-7" id="progress-block">
|
||||
<div class="mt-7 panel rounded-[8px] p-[18px]" id="progress-block">
|
||||
<div class="flex items-center justify-between mb-2.5">
|
||||
<div class="flex items-center gap-3 text-sm">
|
||||
<span class="mono text-ink font-medium" id="progress-pct">—</span>
|
||||
<div class="flex items-center gap-3.5 text-sm">
|
||||
<span class="mono text-ink font-medium" id="progress-pct" style="font-size: 18px;">—</span>
|
||||
<span class="text-ink-mute" id="progress-bytes"></span>
|
||||
</div>
|
||||
<div class="text-sm text-ink-mute" id="progress-rate"></div>
|
||||
@@ -86,6 +112,12 @@
|
||||
<div class="progress-track">
|
||||
<div class="progress-fill" id="progress-fill" style="width: 0%;"></div>
|
||||
</div>
|
||||
{{if eq (printf "%s" $job.Kind) "restore"}}
|
||||
<div class="mt-3 text-[12px] text-ink-mute" id="restore-current-block">
|
||||
<span class="text-ink-fade uppercase tracking-[0.08em] text-[10.5px]">Current</span>
|
||||
<span class="mono text-ink-mid ml-2.5" id="restore-current-file">—</span>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
@@ -194,6 +226,18 @@
|
||||
return (i === 0 ? n.toFixed(0) : n.toFixed(1)) + ' ' + u[i];
|
||||
}
|
||||
|
||||
const currentFileEl = document.getElementById('restore-current-file');
|
||||
function maybeUpdateCurrent(p) {
|
||||
// Restore-specific: surface the most recent stdout path in the
|
||||
// "Current" slot. Restic restore --json prints per-file lines on
|
||||
// stdout (no JSON wrapper) so any line starting with "/" is a
|
||||
// good candidate.
|
||||
if (!currentFileEl || p.stream !== 'stdout') return;
|
||||
const v = (p.payload || '').trim();
|
||||
if (v.startsWith('/') && v.length < 400) {
|
||||
currentFileEl.textContent = v;
|
||||
}
|
||||
}
|
||||
function appendLine(p) {
|
||||
// Drop the "awaiting" placeholder once real lines arrive.
|
||||
if (stream.children.length === 1 && stream.firstElementChild.textContent.includes('awaiting agent')) {
|
||||
@@ -208,6 +252,7 @@
|
||||
`<span class="log-stream-${p.stream}">${escapeHtml(p.payload)}</span>`;
|
||||
stream.appendChild(line);
|
||||
if (autoScroll) container.scrollTop = container.scrollHeight;
|
||||
maybeUpdateCurrent(p);
|
||||
}
|
||||
|
||||
ws.onmessage = (ev) => {
|
||||
|
||||
@@ -121,6 +121,26 @@
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{/* ---------- latest restore line (P3-X3) ---------- */}}
|
||||
{{if $page.RestoreStatus}}
|
||||
<div class="text-[11.5px] text-ink-mute mt-1 leading-[1.5]">
|
||||
{{if eq $page.RestoreStatus "succeeded"}}
|
||||
last restore · <span class="text-ok">succeeded</span> <span class="mono text-ink-mid">{{relTime $page.RestoreAt}}</span> ·
|
||||
<a href="/jobs/{{$page.RestoreJobID}}" class="link mono">job log →</a>
|
||||
{{else if eq $page.RestoreStatus "failed"}}
|
||||
last restore · <span class="text-bad font-medium">failed</span> <span class="mono text-ink-mid">{{relTime $page.RestoreAt}}</span> ·
|
||||
<a href="/jobs/{{$page.RestoreJobID}}" class="link mono">job log →</a>
|
||||
{{else if eq $page.RestoreStatus "running"}}
|
||||
<span class="text-accent">restore running…</span> · <a href="/jobs/{{$page.RestoreJobID}}" class="link mono">live log →</a>
|
||||
{{else if eq $page.RestoreStatus "cancelled"}}
|
||||
last restore · <span class="text-warn">cancelled</span> <span class="mono text-ink-mid">{{relTime $page.RestoreAt}}</span> ·
|
||||
<a href="/jobs/{{$page.RestoreJobID}}" class="link mono">job log →</a>
|
||||
{{else if eq $page.RestoreStatus "queued"}}
|
||||
<span class="text-ink-fade">restore queued</span> · <a href="/jobs/{{$page.RestoreJobID}}" class="link mono">job {{$page.RestoreJobID}}</a>
|
||||
{{end}}
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{/* ---------- secondary tabs ---------- */}}
|
||||
<div class="flex items-end mt-1.5">
|
||||
<a class="sub-tab {{if eq $page.SubTab "snapshots"}}active{{end}}" href="/hosts/{{$host.ID}}">Snapshots <span class="mono text-ink-fade text-[11px] ml-1">{{comma $host.SnapshotCount}}</span></a>
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
{{define "tree_node"}}
|
||||
{{$page := .Page}}
|
||||
{{if $page.Error}}
|
||||
<div class="px-3 py-2 mono text-[12px] text-bad">error: {{$page.Error}}</div>
|
||||
{{else}}
|
||||
<div class="flex items-center gap-2 px-3 py-1.5 text-[12px] text-ink-mute border-b border-line-soft">
|
||||
<span class="mono text-ink-mid">{{$page.Path}}</span>
|
||||
{{if not $page.Children}}
|
||||
<span class="text-ink-fade ml-auto mono text-[11px]">empty directory</span>
|
||||
{{end}}
|
||||
</div>
|
||||
{{range $page.Children}}
|
||||
<div class="tree-pair">
|
||||
<div class="grid items-center gap-2 px-3 py-[5px] mono text-[12.5px] border-b border-line-soft"
|
||||
style="grid-template-columns: 14px 16px auto 1fr auto;">
|
||||
{{if .IsDir}}
|
||||
<button type="button"
|
||||
class="tree-toggle text-ink-mute text-[10px] cursor-pointer"
|
||||
data-tree-url="/hosts/{{$page.HostID}}/restore/tree?snapshot={{$page.SnapshotID}}&path={{.Path}}"
|
||||
data-loaded="false"
|
||||
onclick="window.__rmTreeToggle(this)">▸</button>
|
||||
{{else}}
|
||||
<span class="text-ink-fade text-center">·</span>
|
||||
{{end}}
|
||||
<label class="cursor-pointer flex items-center justify-center">
|
||||
<input type="checkbox" name="paths" value="{{.Path}}"
|
||||
class="w-[13px] h-[13px] cursor-pointer" />
|
||||
</label>
|
||||
<span class="{{if .IsDir}}text-ink{{else}}text-ink-mid{{end}}">{{.Name}}{{if .IsDir}}/{{end}}</span>
|
||||
<span></span>
|
||||
<span class="text-[11px] text-ink-fade">{{if not .IsDir}}{{if .Size}}{{bytes .Size}}{{else}}—{{end}}{{end}}</span>
|
||||
</div>
|
||||
{{if .IsDir}}
|
||||
<div class="tree-children hidden pl-5 border-l border-line-soft ml-5"></div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{end}}
|
||||
{{end}}
|
||||
{{end}}
|
||||
Reference in New Issue
Block a user