P3-X2: tree.list synchronous WS RPC + per-session cache
Foundational for the restore wizard's tree browser. The wizard needs to lazy-load directory contents from a snapshot as the operator drills down; this lands the transport. - internal/api adds MsgTreeList (server → agent) + MsgTreeListResult (agent → server) with TreeListRequestPayload / TreeListEntry / TreeListResultPayload types. Reply correlates by Envelope.ID. - internal/restic.ListTreeChildren wraps 'restic ls --json' and filters its recursive output to direct children of the requested path. Parser + path-normalisation + isDirectChild are unit-tested. - internal/server/ws/rpc.go introduces a generic SendRPC helper on Hub: register a buffered channel keyed by ULID, send the request, block on ctx.Done()/timeout/reply. Reply routing piggybacks on the existing dispatchAgentMessage by adding a MsgTreeListResult case that forwards to the registered waiter; if no waiter is registered (caller already gave up) the stray reply is dropped quietly. - cmd/agent gains a tree.list handler that runs ListTreeChildren on a fresh per-call context (60s ceiling) and ships the matching tree.list.result envelope. Errors surface in result.Error rather than as transport failures so the server-side waiter can render a sensible UI message. - internal/server/http/tree_cache.go is the per-wizard-session cache layer (~30min TTL, sweep-on-access) that fetchTreeWithCache uses before falling through to SendRPC. Cached on success only; agent errors aren't cached so a transient failure doesn't poison the session. Tests: - internal/restic/ls_test.go covers parseLsChildren at root / mid-tree / leaf, plus normalizeTreePath and isDirectChild edge cases. - internal/server/ws/rpc_test.go unit-tests the registry: round-trip, release semantics, concurrent waiters, ctx-cancel. - internal/server/http/tree_rpc_test.go is the full round-trip: server SendRPC → fake-agent over a real WS → reply → server gets the payload. Plus a timeout test that confirms ~300ms timeouts terminate in ~300ms rather than waiting forever. The cache is plumbed but no UI handler hits fetchTreeWithCache yet — that lands with P3-01 (wizard backend). The unused-linter is suppressed via nolint until the wizard wires it in.
This commit is contained in:
@@ -274,6 +274,17 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
|
||||
slog.Info("ws agent: command.cancel for unknown job (already finished?)", "job_id", p.JobID)
|
||||
}
|
||||
|
||||
case api.MsgTreeList:
|
||||
// Synchronous RPC for the restore wizard's tree browser. The
|
||||
// server has serialized access; we just run restic ls and reply
|
||||
// with the same envelope ID. Run in a goroutine so the WS read
|
||||
// loop keeps draining.
|
||||
var p api.TreeListRequestPayload
|
||||
if err := env.UnmarshalPayload(&p); err != nil {
|
||||
return fmt.Errorf("tree.list: %w", err)
|
||||
}
|
||||
go d.handleTreeList(ctx, env.ID, p, tx)
|
||||
|
||||
case api.MsgScheduleSet:
|
||||
var p api.ScheduleSetPayload
|
||||
if err := env.UnmarshalPayload(&p); err != nil {
|
||||
@@ -381,6 +392,72 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleTreeList runs `restic ls --json <snapshot> <path>` and ships
|
||||
// the matching tree.list.result envelope back, correlated by the
|
||||
// request envelope's ID. Errors (missing creds, restic failure)
|
||||
// surface in the result's Error field rather than as transport-level
|
||||
// failures so the server-side waiter can render a sensible message.
|
||||
func (d *dispatcher) handleTreeList(ctx context.Context, reqID string, p api.TreeListRequestPayload, tx wsclient.Sender) {
|
||||
reply := func(result api.TreeListResultPayload) {
|
||||
result.SnapshotID = p.SnapshotID
|
||||
result.Path = p.Path
|
||||
env, err := api.Marshal(api.MsgTreeListResult, reqID, result)
|
||||
if err != nil {
|
||||
slog.Warn("ws agent: marshal tree.list.result", "err", err)
|
||||
return
|
||||
}
|
||||
_ = tx.Send(env)
|
||||
}
|
||||
|
||||
if d.resticBin == "" {
|
||||
reply(api.TreeListResultPayload{Error: "restic binary not located on this agent"})
|
||||
return
|
||||
}
|
||||
creds, err := d.secrets.Load()
|
||||
if err != nil {
|
||||
reply(api.TreeListResultPayload{Error: "load credentials: " + err.Error()})
|
||||
return
|
||||
}
|
||||
if creds.Empty() {
|
||||
reply(api.TreeListResultPayload{Error: "repo credentials not configured"})
|
||||
return
|
||||
}
|
||||
|
||||
d.bwMu.Lock()
|
||||
upKBps, downKBps := d.bwUpKBps, d.bwDownKBps
|
||||
d.bwMu.Unlock()
|
||||
|
||||
env := restic.Env{
|
||||
Bin: d.resticBin,
|
||||
RepoURL: creds.URL,
|
||||
RepoUsername: creds.Username,
|
||||
RepoPassword: creds.Password,
|
||||
LimitUploadKBps: upKBps,
|
||||
LimitDownloadKBps: downKBps,
|
||||
}
|
||||
|
||||
// 60s ceiling matches snapshots/stats — restic ls on a single
|
||||
// directory is normally sub-second; if the repo is unreachable we
|
||||
// want to surface the failure rather than block the wizard.
|
||||
listCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
entries, err := env.ListTreeChildren(listCtx, p.SnapshotID, p.Path)
|
||||
if err != nil {
|
||||
reply(api.TreeListResultPayload{Error: err.Error()})
|
||||
return
|
||||
}
|
||||
apiEntries := make([]api.TreeListEntry, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
apiEntries = append(apiEntries, api.TreeListEntry{
|
||||
Name: e.Name,
|
||||
Type: e.Type,
|
||||
Size: e.Size,
|
||||
})
|
||||
}
|
||||
reply(api.TreeListResultPayload{Entries: apiEntries})
|
||||
}
|
||||
|
||||
// runJob spawns a runner for one job. We launch a goroutine so the
|
||||
// WS read loop keeps draining messages while restic chugs along.
|
||||
func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsclient.Sender) error {
|
||||
|
||||
Reference in New Issue
Block a user