phase 1: WS transport, enrollment, agent that hellos and heartbeats

Lands the protocol layer end-to-end: an agent can be enrolled
through the operator UI, store credentials, dial back to the server
over WS, complete the protocol_version handshake, and stay
connected with periodic heartbeats.

Server side:
- P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction,
  json envelope writer with a write mutex, reader, error envelopes.
- P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage
  (10s deadline, protocol_version checked against
  api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on
  reject), main read loop, defer hub register/unregister.
- P1-10 POST /api/agents/enroll consumes a one-time token, mints a
  persistent agent bearer (sha-256 stored), creates a host row.
- P1-10 POST /api/enrollment-tokens (operator, session-auth)
  issues a 1h one-time token.
- P1-11 hello upserts agent_version + restic_version +
  protocol_version on the host row, flips status to online.
- P1-12 heartbeat touches last_seen_at; background sweeper marks
  hosts offline after 90s without one.
- store: hosts table accessors, host_schedule_version,
  enrollment_tokens FK on consumed_host dropped (audit-only field;
  the token gets burned before the host row exists).

Agent side:
- P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml,
  atomic Save (tmp+fsync+rename), Enrolled() helper.
- P1-15 internal/agent/wsclient: dial with bearer + optional
  TLS cert pinning (sha-256 of leaf), exponential backoff with
  jitter (1s → 60s cap), heartbeat goroutine, fatal handling for
  ErrProtocolTooOld.
- P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo.
- P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version
  collection. restic detected by `restic version` parse; absent
  restic doesn't block startup.
- cmd/agent: -enroll-server / -enroll-token flags drive first-run
  enrollment then exit (so the install script can hand off to
  systemd to run the persistent service).

End-to-end smoke verified: bootstrap → login → issue token →
enroll → run agent → server logs `ws agent connected` with the
right host_id and protocol_version 1.

All tests still pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-01 00:39:00 +01:00
parent df2c584b23
commit 9cc0caff1e
18 changed files with 1670 additions and 14 deletions
+165
View File
@@ -0,0 +1,165 @@
package http
import (
"encoding/json"
stdhttp "net/http"
"strings"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// enrollRequest is the body posted by the agent installer. The token
// was issued by the operator via the UI ("Add host" → P1-27); the
// host metadata comes from the agent's own sysinfo collection.
type enrollRequest struct {
Token string `json:"token"`
HostName string `json:"hostname"`
OS api.HostOS `json:"os"`
Arch api.HostArch `json:"arch"`
AgentVersion string `json:"agent_version"`
ResticVersion string `json:"restic_version"`
}
// enrollResponse hands the agent the credentials it'll use forever.
// AgentToken is shown exactly once; the server stores its hash.
// CertPinSHA256 is the SHA-256 of the server's certificate, base64;
// the agent pins this on every reconnect so a stolen DB at the
// control plane can't be replayed against an attacker's TLS endpoint.
type enrollResponse struct {
HostID string `json:"host_id"`
AgentToken string `json:"agent_token"`
CertPinSHA256 string `json:"cert_pin_sha256,omitempty"`
}
// enrollOperatorRequest creates a one-time enrollment token for an
// operator who is about to install an agent. Authenticated UI route.
type enrollOperatorRequest struct {
HostName string `json:"hostname"`
Tags []string `json:"tags,omitempty"`
}
type enrollOperatorResponse struct {
Token string `json:"token"`
ExpiresAt time.Time `json:"expires_at"`
}
// handleAgentEnroll consumes a one-time token, persists a Host row,
// and returns persistent agent credentials. Open endpoint (no
// session) — the token is the credential.
func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) {
var req enrollRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
if req.Token == "" || req.HostName == "" || req.OS == "" || req.Arch == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"token, hostname, os, arch all required")
return
}
hostID := ulid.Make().String()
// Atomically: validate + consume token, then create the host.
// We do these in two statements; if create-host fails, the token
// is already burned. That's acceptable — operator just regens.
tokHash := auth.HashToken(req.Token)
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
"token unknown, expired, or already used")
return
}
// Mint the persistent agent bearer.
agentToken, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
host := store.Host{
ID: hostID,
Name: strings.TrimSpace(req.HostName),
OS: string(req.OS),
Arch: string(req.Arch),
AgentVersion: req.AgentVersion,
ResticVersion: req.ResticVersion,
EnrolledAt: time.Now().UTC(),
}
if err := s.deps.Store.CreateHost(r.Context(), host,
auth.HashToken(agentToken), ""); err != nil {
writeJSONError(w, stdhttp.StatusConflict, "host_exists", err.Error())
return
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
Actor: "system",
Action: "host.enrolled",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: host.EnrolledAt,
})
writeJSON(w, stdhttp.StatusCreated, enrollResponse{
HostID: hostID,
AgentToken: agentToken,
// CertPinSHA256 is populated by a TLS-aware future revision.
// For now (HTTP-or-TLS-by-Caddy) we leave it empty and rely
// on the agent trusting its OS root store.
})
}
// handleCreateEnrollmentToken (operator-facing) — generates a
// short-lived token for a new host. Authenticated; admin/operator only.
//
// TODO: gate by authn middleware once login session lookup lands.
// For Phase 1's first slice, we accept the bootstrap-shipped admin
// session cookie and trust it, validating the cookie via store.
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
return
}
var req enrollOperatorRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
token, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
const ttl = time.Hour
if err := s.deps.Store.CreateEnrollmentToken(r.Context(), auth.HashToken(token), ttl); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{
Token: token,
ExpiresAt: time.Now().Add(ttl).UTC(),
})
}
// authedUser returns true iff the request carries a valid session
// cookie. Minimal stub for now; full RBAC middleware lands with
// P4-03.
func (s *Server) authedUser(r *stdhttp.Request) bool {
c, err := r.Cookie(sessionCookieName)
if err != nil {
return false
}
_, err = s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
return err == nil
}
func ptr(s string) *string { return &s }
+118
View File
@@ -0,0 +1,118 @@
package http
import (
"bytes"
"context"
"encoding/json"
"io"
stdhttp "net/http"
"net/http/httptest"
"path/filepath"
"testing"
"time"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// newTestServerWithHub mirrors newTestServer but plugs in a real
// ws.Hub so /ws/agent is available.
func newTestServerWithHub(t *testing.T) (*Server, string, *store.Store) {
t.Helper()
dir := t.TempDir()
st, err := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
if err != nil {
t.Fatalf("store: %v", err)
}
t.Cleanup(func() { _ = st.Close() })
keyPath := filepath.Join(dir, "secret.key")
_ = crypto.GenerateKeyFile(keyPath)
key, _ := crypto.LoadKeyFromFile(keyPath)
aead, _ := crypto.NewAEAD(key)
deps := Deps{
Cfg: config.Config{Listen: ":0", DataDir: dir, SecretKeyFile: keyPath},
Store: st,
AEAD: aead,
Hub: ws.NewHub(),
}
s := New(deps)
ts := httptest.NewServer(s.srv.Handler)
t.Cleanup(ts.Close)
return s, ts.URL, st
}
func TestEnrollmentBadToken(t *testing.T) {
t.Parallel()
_, url, _ := newTestServerWithHub(t)
body, _ := json.Marshal(enrollRequest{
Token: "no-such-token", HostName: "host1",
OS: api.OSLinux, Arch: api.ArchAmd64,
AgentVersion: "0.1", ResticVersion: "0.17",
})
res, err := stdhttp.Post(url+"/api/agents/enroll", "application/json", bytes.NewReader(body))
if err != nil {
t.Fatalf("post: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnauthorized {
t.Errorf("status: %d", res.StatusCode)
}
}
func TestEnrollmentHappyPath(t *testing.T) {
t.Parallel()
_, url, st := newTestServerWithHub(t)
// Issue a token directly via the store (skipping the operator UI).
rawToken, _ := auth.NewToken()
if err := st.CreateEnrollmentToken(context.Background(),
auth.HashToken(rawToken), 5*time.Minute); err != nil {
t.Fatalf("issue: %v", err)
}
body, _ := json.Marshal(enrollRequest{
Token: rawToken, HostName: "test-host",
OS: api.OSLinux, Arch: api.ArchAmd64,
AgentVersion: "0.1", ResticVersion: "0.17",
})
res, err := stdhttp.Post(url+"/api/agents/enroll", "application/json", bytes.NewReader(body))
if err != nil {
t.Fatalf("post: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusCreated {
buf, _ := io.ReadAll(res.Body)
t.Fatalf("status %d: %s", res.StatusCode, buf)
}
var er enrollResponse
if err := json.NewDecoder(res.Body).Decode(&er); err != nil {
t.Fatalf("decode: %v", err)
}
if er.HostID == "" || er.AgentToken == "" {
t.Errorf("missing fields in response: %+v", er)
}
// Token must not be reusable.
res2, _ := stdhttp.Post(url+"/api/agents/enroll", "application/json", bytes.NewReader(body))
defer res2.Body.Close()
if res2.StatusCode != stdhttp.StatusUnauthorized {
t.Errorf("re-enrollment with same token should fail, got %d", res2.StatusCode)
}
// Host row exists with matching agent_token_hash.
got, err := st.LookupHostByAgentToken(context.Background(), auth.HashToken(er.AgentToken))
if err != nil {
t.Fatalf("lookup by token: %v", err)
}
if got.Name != "test-host" || got.OS != "linux" {
t.Errorf("host fields: %+v", got)
}
}
+18
View File
@@ -15,6 +15,7 @@ import (
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
@@ -24,6 +25,7 @@ type Deps struct {
Cfg config.Config
Store *store.Store
AEAD *crypto.AEAD
Hub *ws.Hub
// BootstrapToken (optional, populated only on first run) is the raw
// admin-bootstrap token printed in the server logs. While set, the
// /bootstrap endpoint accepts it to create the first admin user.
@@ -73,8 +75,24 @@ func (s *Server) routes(r chi.Router) {
r.Post("/auth/login", s.handleLogin)
r.Post("/auth/logout", s.handleLogout)
r.Post("/bootstrap", s.handleBootstrap)
// Agent enrollment (open endpoint — token is the credential).
r.Post("/agents/enroll", s.handleAgentEnroll)
// Operator → server (authenticated). Spec.md §6.1's
// /hosts/{id}/enrollment-token (regenerate) lands when the
// host page can call it; for now just the create endpoint.
r.Post("/enrollment-tokens", s.handleCreateEnrollmentToken)
})
// Agent ↔ server WebSocket. Bearer-authenticated inside the handler.
if s.deps.Hub != nil {
r.Mount("/ws/agent", ws.AgentHandler(ws.HandlerDeps{
Hub: s.deps.Hub,
Store: s.deps.Store,
}))
}
// UI handlers will hang off / — Phase 1 will add them.
r.Get("/", func(w stdhttp.ResponseWriter, _ *stdhttp.Request) {
_, _ = fmt.Fprint(w, "restic-manager — UI not yet implemented")
-3
View File
@@ -1,3 +0,0 @@
// Package ws hosts the WebSocket transport for agent ↔ server and the
// browser-facing live job log stream.
package ws
+183
View File
@@ -0,0 +1,183 @@
package ws
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
stdhttp "net/http"
"strings"
"time"
"github.com/coder/websocket"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// HandlerDeps is the set of collaborators the agent WS handler needs.
type HandlerDeps struct {
Hub *Hub
Store *store.Store
}
// AgentHandler is the http.Handler that owns /ws/agent. Agents
// authenticate with `Authorization: Bearer <token>` (issued at
// enrollment) before the WS upgrade.
//
// Lifecycle:
// 1. Bearer token resolves to a Host row.
// 2. Upgrade.
// 3. First message must be `hello`; protocol_version checked here.
// 4. Loop: read messages, dispatch by type. Heartbeats touch the
// host row; job/log/repo messages forward to the relevant
// handlers (TODO: lands with P1-18 onward).
// 5. On Read error or context cancel, mark host offline, unregister
// from the hub.
func AgentHandler(deps HandlerDeps) stdhttp.Handler {
return stdhttp.HandlerFunc(func(w stdhttp.ResponseWriter, r *stdhttp.Request) {
host, ok := authenticateAgent(r, deps.Store)
if !ok {
stdhttp.Error(w, "unauthorized", stdhttp.StatusUnauthorized)
return
}
conn, err := websocket.Accept(w, r, &websocket.AcceptOptions{
InsecureSkipVerify: true, // Origin checks are pointless for an agent CLI.
})
if err != nil {
slog.Warn("ws accept failed", "err", err, "host_id", host.ID)
return
}
c := NewConn(host.ID, conn)
// Keep agents alive across NAT boxes; coder/websocket
// auto-pings under the hood when configured. The default 60s
// works fine for a 30s heartbeat cadence.
runAgentLoop(r.Context(), c, host.ID, deps)
})
}
// authenticateAgent returns the host that owns the bearer token in
// the request, or (nil, false) if anything is amiss. The same
// "false" path is used for missing header, malformed header, unknown
// token — no information leak about why.
func authenticateAgent(r *stdhttp.Request, st *store.Store) (*store.Host, bool) {
hdr := r.Header.Get("Authorization")
const prefix = "Bearer "
if !strings.HasPrefix(hdr, prefix) {
return nil, false
}
token := strings.TrimPrefix(hdr, prefix)
if token == "" {
return nil, false
}
h, err := st.LookupHostByAgentToken(r.Context(), auth.HashToken(token))
if err != nil {
return nil, false
}
return h, true
}
// runAgentLoop is the per-connection driver. Returns when the socket
// is closed for any reason. It owns the hub registration: register on
// hello acceptance, unregister on exit.
func runAgentLoop(ctx context.Context, c *Conn, hostID string, deps HandlerDeps) {
// Stage 1: hello (with a tight deadline).
helloCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
hello, err := c.Read(helloCtx)
cancel()
if err != nil {
slog.Info("ws hello read failed", "host_id", hostID, "err", err)
_ = c.Close()
return
}
if hello.Type != api.MsgHello {
c.SendError(ctx, api.ErrBadRequest, "first message must be hello", "")
return
}
var helloPayload api.HelloPayload
if err := hello.UnmarshalPayload(&helloPayload); err != nil {
c.SendError(ctx, api.ErrBadRequest, "malformed hello payload", "")
return
}
if helloPayload.ProtocolVersion < api.MinAgentProtocolVersion {
c.SendError(ctx, api.ErrProtocolTooOld,
fmt.Sprintf("agent protocol_version %d below minimum %d",
helloPayload.ProtocolVersion, api.MinAgentProtocolVersion),
"https://restic-manager.example/docs/upgrade")
return
}
if helloPayload.ProtocolVersion > api.CurrentProtocolVersion {
// Forward-compat is fine — newer agents talking to older
// servers should accept their lower version. Just log it.
slog.Info("ws agent newer than server",
"host_id", hostID,
"agent_proto", helloPayload.ProtocolVersion,
"server_proto", api.CurrentProtocolVersion)
}
now := time.Now().UTC()
if err := deps.Store.MarkHostHello(ctx, hostID,
helloPayload.AgentVersion, helloPayload.ResticVersion,
helloPayload.ProtocolVersion, now); err != nil {
slog.Error("ws mark host hello failed", "host_id", hostID, "err", err)
}
deps.Hub.Register(hostID, c)
defer deps.Hub.Unregister(hostID, c)
defer func() { _ = c.Close() }()
slog.Info("ws agent connected",
"host_id", hostID,
"agent_version", helloPayload.AgentVersion,
"protocol_version", helloPayload.ProtocolVersion)
// Stage 2: main read loop.
for {
env, err := c.Read(ctx)
if err != nil {
if !errors.Is(err, context.Canceled) {
slog.Info("ws agent read loop ended", "host_id", hostID, "err", err)
}
return
}
dispatchAgentMessage(ctx, c, hostID, env, deps)
}
}
// dispatchAgentMessage routes a single envelope to its handler. Only
// hello + heartbeat are wired up in Phase 1's first slice; the rest
// land with P1-18+ (jobs) and P2 (schedules).
func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.Envelope, deps HandlerDeps) {
switch env.Type {
case api.MsgHeartbeat:
_ = deps.Store.TouchHost(ctx, hostID, time.Now().UTC())
case api.MsgJobStarted, api.MsgJobProgress, api.MsgJobFinished,
api.MsgLogStream, api.MsgSnapshotsRpt, api.MsgRepoStats,
api.MsgScheduleAck, api.MsgCommandResult:
// TODO(P1-18+): persist + fan out to subscribed browsers.
slog.Debug("ws msg not yet handled", "type", env.Type, "host_id", hostID)
case api.MsgError:
var ep api.ErrorPayload
_ = env.UnmarshalPayload(&ep)
slog.Warn("ws agent reported error", "host_id", hostID,
"code", string(ep.Code), "message", ep.Message)
default:
slog.Warn("ws unknown message type from agent",
"type", env.Type, "host_id", hostID)
}
}
// MinHeartbeatInterval is a sanity floor — any agent reporting
// heartbeats more often than this is misbehaving. (Spec says 30s.)
const MinHeartbeatInterval = 5 * time.Second
// suppress unused-import false-positives if json drops out later
var _ = json.Marshal
+145
View File
@@ -0,0 +1,145 @@
// Package ws hosts the WebSocket transport for agent ↔ server. The
// Hub tracks one active connection per host id; subsequent connections
// from the same host evict the prior one (last-write-wins).
package ws
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"sync"
"time"
"github.com/coder/websocket"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// Hub owns the live agent connections and routes messages.
type Hub struct {
mu sync.RWMutex
conns map[string]*Conn // hostID → conn
}
// NewHub returns an empty hub.
func NewHub() *Hub {
return &Hub{conns: make(map[string]*Conn)}
}
// Conn is one agent WS connection. Send is safe for concurrent use;
// Read is single-reader (the connection's run loop).
type Conn struct {
HostID string
c *websocket.Conn
writeMu sync.Mutex
}
// Register installs c as the canonical connection for hostID. Any
// previous connection for that host is closed.
func (h *Hub) Register(hostID string, c *Conn) {
h.mu.Lock()
if prev, ok := h.conns[hostID]; ok {
// Best-effort close — a stuck old socket shouldn't block new one.
go func(old *Conn) {
_ = old.c.Close(websocket.StatusPolicyViolation, "superseded")
}(prev)
}
h.conns[hostID] = c
h.mu.Unlock()
}
// Unregister removes c iff it is still the canonical conn (a race
// where a newer conn already replaced it must not unregister it).
func (h *Hub) Unregister(hostID string, c *Conn) {
h.mu.Lock()
if cur, ok := h.conns[hostID]; ok && cur == c {
delete(h.conns, hostID)
}
h.mu.Unlock()
}
// Send delivers an envelope to the host if connected. Returns an error
// if the host is offline; caller may queue the message for later.
func (h *Hub) Send(ctx context.Context, hostID string, env api.Envelope) error {
h.mu.RLock()
c, ok := h.conns[hostID]
h.mu.RUnlock()
if !ok {
return fmt.Errorf("ws: host %q is offline", hostID)
}
return c.Send(ctx, env)
}
// Connected reports whether hostID has an active connection.
func (h *Hub) Connected(hostID string) bool {
h.mu.RLock()
_, ok := h.conns[hostID]
h.mu.RUnlock()
return ok
}
// ----- Conn methods --------------------------------------------------
// NewConn wraps a freshly-accepted websocket for a given hostID.
func NewConn(hostID string, c *websocket.Conn) *Conn {
return &Conn{HostID: hostID, c: c}
}
// Send writes an envelope as a JSON text message. Concurrent calls
// are serialised; the underlying socket is not safe for parallel
// writers.
func (c *Conn) Send(ctx context.Context, env api.Envelope) error {
c.writeMu.Lock()
defer c.writeMu.Unlock()
raw, err := json.Marshal(env)
if err != nil {
return fmt.Errorf("ws: marshal envelope: %w", err)
}
return c.c.Write(ctx, websocket.MessageText, raw)
}
// SendError writes an error envelope and closes the socket. Used by
// the hello handshake when an agent is rejected.
func (c *Conn) SendError(ctx context.Context, code api.ErrorCode, msg, helpURL string) {
env, err := api.Marshal(api.MsgError, "", api.ErrorPayload{
Code: code, Message: msg, HelpURL: helpURL,
})
if err == nil {
writeCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
_ = c.Send(writeCtx, env)
}
_ = c.c.Close(websocket.StatusPolicyViolation, string(code))
}
// Close shuts the socket down with a normal-closure status code.
func (c *Conn) Close() error {
return c.c.Close(websocket.StatusNormalClosure, "")
}
// Read pulls the next JSON envelope off the wire. The caller's
// context controls cancellation and timeouts (e.g. read deadlines).
func (c *Conn) Read(ctx context.Context) (api.Envelope, error) {
mt, raw, err := c.c.Read(ctx)
if err != nil {
return api.Envelope{}, err
}
if mt != websocket.MessageText {
return api.Envelope{}, errors.New("ws: expected text frame")
}
var env api.Envelope
if err := json.Unmarshal(raw, &env); err != nil {
return api.Envelope{}, fmt.Errorf("ws: unmarshal envelope: %w", err)
}
return env, nil
}
// ----- helpers -------------------------------------------------------
// LogValue emits a slog-friendly representation of a Conn.
func (c *Conn) LogValue() slog.Value {
return slog.GroupValue(slog.String("host_id", c.HostID))
}
+181
View File
@@ -0,0 +1,181 @@
package ws
import (
"context"
"encoding/json"
stdhttp "net/http"
"net/http/httptest"
"path/filepath"
"strings"
"testing"
"time"
"github.com/coder/websocket"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// setupTestHub spins up a Server that exposes only /ws/agent against
// a fresh sqlite store with one pre-enrolled host. Returns the URL,
// the agent's bearer token, and the host ID.
func setupTestHub(t *testing.T) (url string, token string, hostID string, st *store.Store, hub *Hub) {
t.Helper()
dir := t.TempDir()
var err error
st, err = store.Open(context.Background(), filepath.Join(dir, "rm.db"))
if err != nil {
t.Fatalf("store: %v", err)
}
t.Cleanup(func() { _ = st.Close() })
hub = NewHub()
mux := stdhttp.NewServeMux()
mux.Handle("/ws/agent", AgentHandler(HandlerDeps{Hub: hub, Store: st}))
srv := httptest.NewServer(mux)
t.Cleanup(srv.Close)
// Pre-enroll a host directly via store (skipping HTTP).
hostID = "01HJ8K70000000000000000000"
token, _ = auth.NewToken()
now := time.Now().UTC()
if err := st.CreateHost(context.Background(), store.Host{
ID: hostID, Name: "h1", OS: "linux", Arch: "amd64",
EnrolledAt: now,
}, auth.HashToken(token), ""); err != nil {
t.Fatalf("enroll: %v", err)
}
url = "ws" + strings.TrimPrefix(srv.URL, "http") + "/ws/agent"
return
}
func TestWSHelloAndHeartbeat(t *testing.T) {
t.Parallel()
url, token, hostID, st, hub := setupTestHub(t)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
c, _, err := websocket.Dial(ctx, url, &websocket.DialOptions{
HTTPHeader: stdhttp.Header{"Authorization": []string{"Bearer " + token}},
})
if err != nil {
t.Fatalf("dial: %v", err)
}
defer c.CloseNow()
// Send hello.
hello := api.HelloPayload{
ProtocolVersion: api.CurrentProtocolVersion,
AgentVersion: "0.1.0",
ResticVersion: "0.17.1",
Hostname: "h1",
OS: api.OSLinux,
Arch: api.ArchAmd64,
}
env, _ := api.Marshal(api.MsgHello, "", hello)
raw, _ := json.Marshal(env)
if err := c.Write(ctx, websocket.MessageText, raw); err != nil {
t.Fatalf("write hello: %v", err)
}
// Wait for the server to register us (registration happens after
// the hello-handler returns; give it up to 1s).
deadline := time.Now().Add(time.Second)
for !hub.Connected(hostID) && time.Now().Before(deadline) {
time.Sleep(20 * time.Millisecond)
}
if !hub.Connected(hostID) {
t.Fatal("host did not register on hub after hello")
}
// Verify host row was marked online + has populated metadata.
h, err := st.GetHost(context.Background(), hostID)
if err != nil {
t.Fatalf("get host: %v", err)
}
if h.Status != "online" || h.AgentVersion != "0.1.0" {
t.Errorf("host after hello: %+v", h)
}
// Send a heartbeat — server should touch last_seen.
hb := api.HeartbeatPayload{SentAt: time.Now().UTC()}
env, _ = api.Marshal(api.MsgHeartbeat, "", hb)
raw, _ = json.Marshal(env)
preTouch := h.LastSeenAt
_ = c.Write(ctx, websocket.MessageText, raw)
// Wait briefly for server to process.
deadline = time.Now().Add(time.Second)
for time.Now().Before(deadline) {
h2, _ := st.GetHost(context.Background(), hostID)
if h2.LastSeenAt != nil && (preTouch == nil || h2.LastSeenAt.After(*preTouch)) {
return
}
time.Sleep(20 * time.Millisecond)
}
t.Error("heartbeat did not update last_seen_at")
}
func TestWSRejectsOldProtocol(t *testing.T) {
t.Parallel()
url, token, _, _, _ := setupTestHub(t)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
c, _, err := websocket.Dial(ctx, url, &websocket.DialOptions{
HTTPHeader: stdhttp.Header{"Authorization": []string{"Bearer " + token}},
})
if err != nil {
t.Fatalf("dial: %v", err)
}
defer c.CloseNow()
hello := api.HelloPayload{ProtocolVersion: 0} // below minimum
env, _ := api.Marshal(api.MsgHello, "", hello)
raw, _ := json.Marshal(env)
_ = c.Write(ctx, websocket.MessageText, raw)
// Server should send an error envelope, then close.
mt, body, err := c.Read(ctx)
if err != nil {
t.Fatalf("read: %v", err)
}
if mt != websocket.MessageText {
t.Fatalf("frame type: %v", mt)
}
var got api.Envelope
if err := json.Unmarshal(body, &got); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if got.Type != api.MsgError {
t.Errorf("expected error envelope, got %q", got.Type)
}
var ep api.ErrorPayload
_ = got.UnmarshalPayload(&ep)
if ep.Code != api.ErrProtocolTooOld {
t.Errorf("error code: %q", ep.Code)
}
}
func TestWSRejectsBadToken(t *testing.T) {
t.Parallel()
url, _, _, _, _ := setupTestHub(t)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_, res, err := websocket.Dial(ctx, url, &websocket.DialOptions{
HTTPHeader: stdhttp.Header{"Authorization": []string{"Bearer wrong"}},
})
if err == nil {
t.Fatal("dial should fail")
}
if res == nil || res.StatusCode != stdhttp.StatusUnauthorized {
if res != nil {
t.Errorf("status: %d", res.StatusCode)
}
}
}