Files
restic-manager/internal/server/ws/hub.go
T
steve 13f58bd052 P3-X2: tree.list synchronous WS RPC + per-session cache
Foundational for the restore wizard's tree browser. The wizard needs to
lazy-load directory contents from a snapshot as the operator drills
down; this lands the transport.

- internal/api adds MsgTreeList (server → agent) + MsgTreeListResult
  (agent → server) with TreeListRequestPayload / TreeListEntry /
  TreeListResultPayload types. Reply correlates by Envelope.ID.
- internal/restic.ListTreeChildren wraps 'restic ls --json' and
  filters its recursive output to direct children of the requested
  path. Parser + path-normalisation + isDirectChild are unit-tested.
- internal/server/ws/rpc.go introduces a generic SendRPC helper on
  Hub: register a buffered channel keyed by ULID, send the request,
  block on ctx.Done()/timeout/reply. Reply routing piggybacks on the
  existing dispatchAgentMessage by adding a MsgTreeListResult case
  that forwards to the registered waiter; if no waiter is registered
  (caller already gave up) the stray reply is dropped quietly.
- cmd/agent gains a tree.list handler that runs ListTreeChildren on a
  fresh per-call context (60s ceiling) and ships the matching
  tree.list.result envelope. Errors surface in result.Error rather
  than as transport failures so the server-side waiter can render a
  sensible UI message.
- internal/server/http/tree_cache.go is the per-wizard-session cache
  layer (~30min TTL, sweep-on-access) that fetchTreeWithCache uses
  before falling through to SendRPC. Cached on success only; agent
  errors aren't cached so a transient failure doesn't poison the
  session.

Tests:
- internal/restic/ls_test.go covers parseLsChildren at root / mid-tree
  / leaf, plus normalizeTreePath and isDirectChild edge cases.
- internal/server/ws/rpc_test.go unit-tests the registry: round-trip,
  release semantics, concurrent waiters, ctx-cancel.
- internal/server/http/tree_rpc_test.go is the full round-trip: server
  SendRPC → fake-agent over a real WS → reply → server gets the
  payload. Plus a timeout test that confirms ~300ms timeouts terminate
  in ~300ms rather than waiting forever.

The cache is plumbed but no UI handler hits fetchTreeWithCache yet —
that lands with P3-01 (wizard backend). The unused-linter is suppressed
via nolint until the wizard wires it in.
2026-05-04 15:19:22 +01:00

162 lines
4.7 KiB
Go

// Package ws hosts the WebSocket transport for agent ↔ server. The
// Hub tracks one active connection per host id; subsequent connections
// from the same host evict the prior one (last-write-wins).
package ws
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"sync"
"time"
"github.com/coder/websocket"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// Hub owns the live agent connections and routes messages.
type Hub struct {
mu sync.RWMutex
conns map[string]*Conn // hostID → conn
// rpcs tracks in-flight synchronous RPC calls (e.g. tree.list).
// See rpc.go for details. Lazy-initialized via the registry's
// own register() so callers don't have to juggle a constructor.
rpcs rpcRegistry
}
// NewHub returns an empty hub.
func NewHub() *Hub {
return &Hub{conns: make(map[string]*Conn)}
}
// Conn is one agent WS connection. Send is safe for concurrent use;
// Read is single-reader (the connection's run loop).
type Conn struct {
HostID string
c *websocket.Conn
writeMu sync.Mutex
}
// Register installs c as the canonical connection for hostID. Any
// previous connection for that host is closed.
func (h *Hub) Register(hostID string, c *Conn) {
h.mu.Lock()
if prev, ok := h.conns[hostID]; ok {
// Best-effort close — a stuck old socket shouldn't block new one.
go func(old *Conn) {
_ = old.c.Close(websocket.StatusPolicyViolation, "superseded")
}(prev)
}
h.conns[hostID] = c
h.mu.Unlock()
}
// Unregister removes c iff it is still the canonical conn (a race
// where a newer conn already replaced it must not unregister it).
func (h *Hub) Unregister(hostID string, c *Conn) {
h.mu.Lock()
if cur, ok := h.conns[hostID]; ok && cur == c {
delete(h.conns, hostID)
}
h.mu.Unlock()
}
// Send delivers an envelope to the host if connected. Returns an error
// if the host is offline; caller may queue the message for later.
func (h *Hub) Send(ctx context.Context, hostID string, env api.Envelope) error {
h.mu.RLock()
c, ok := h.conns[hostID]
h.mu.RUnlock()
if !ok {
return fmt.Errorf("ws: host %q is offline", hostID)
}
return c.Send(ctx, env)
}
// Connected reports whether hostID has an active connection.
func (h *Hub) Connected(hostID string) bool {
h.mu.RLock()
_, ok := h.conns[hostID]
h.mu.RUnlock()
return ok
}
// Conn returns the canonical connection for hostID, or nil if the
// host is offline. Tests use this to obtain a *Conn for direct calls
// into handlers that take one. Production code should prefer Send,
// which avoids holding a reference past the point where a supersede
// might have replaced the conn.
func (h *Hub) Conn(hostID string) *Conn {
h.mu.RLock()
defer h.mu.RUnlock()
return h.conns[hostID]
}
// ----- Conn methods --------------------------------------------------
// NewConn wraps a freshly-accepted websocket for a given hostID.
func NewConn(hostID string, c *websocket.Conn) *Conn {
return &Conn{HostID: hostID, c: c}
}
// Send writes an envelope as a JSON text message. Concurrent calls
// are serialized; the underlying socket is not safe for parallel
// writers.
func (c *Conn) Send(ctx context.Context, env api.Envelope) error {
c.writeMu.Lock()
defer c.writeMu.Unlock()
raw, err := json.Marshal(env)
if err != nil {
return fmt.Errorf("ws: marshal envelope: %w", err)
}
return c.c.Write(ctx, websocket.MessageText, raw)
}
// SendError writes an error envelope and closes the socket. Used by
// the hello handshake when an agent is rejected.
func (c *Conn) SendError(ctx context.Context, code api.ErrorCode, msg, helpURL string) {
env, err := api.Marshal(api.MsgError, "", api.ErrorPayload{
Code: code, Message: msg, HelpURL: helpURL,
})
if err == nil {
writeCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
_ = c.Send(writeCtx, env)
}
_ = c.c.Close(websocket.StatusPolicyViolation, string(code))
}
// Close shuts the socket down with a normal-closure status code.
func (c *Conn) Close() error {
return c.c.Close(websocket.StatusNormalClosure, "")
}
// Read pulls the next JSON envelope off the wire. The caller's
// context controls cancellation and timeouts (e.g. read deadlines).
func (c *Conn) Read(ctx context.Context) (api.Envelope, error) {
mt, raw, err := c.c.Read(ctx)
if err != nil {
return api.Envelope{}, err
}
if mt != websocket.MessageText {
return api.Envelope{}, errors.New("ws: expected text frame")
}
var env api.Envelope
if err := json.Unmarshal(raw, &env); err != nil {
return api.Envelope{}, fmt.Errorf("ws: unmarshal envelope: %w", err)
}
return env, nil
}
// ----- helpers -------------------------------------------------------
// LogValue emits a slog-friendly representation of a Conn.
func (c *Conn) LogValue() slog.Value {
return slog.GroupValue(slog.String("host_id", c.HostID))
}