phase 1: WS transport, enrollment, agent that hellos and heartbeats

Lands the protocol layer end-to-end: an agent can be enrolled
through the operator UI, store credentials, dial back to the server
over WS, complete the protocol_version handshake, and stay
connected with periodic heartbeats.

Server side:
- P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction,
  json envelope writer with a write mutex, reader, error envelopes.
- P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage
  (10s deadline, protocol_version checked against
  api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on
  reject), main read loop, defer hub register/unregister.
- P1-10 POST /api/agents/enroll consumes a one-time token, mints a
  persistent agent bearer (sha-256 stored), creates a host row.
- P1-10 POST /api/enrollment-tokens (operator, session-auth)
  issues a 1h one-time token.
- P1-11 hello upserts agent_version + restic_version +
  protocol_version on the host row, flips status to online.
- P1-12 heartbeat touches last_seen_at; background sweeper marks
  hosts offline after 90s without one.
- store: hosts table accessors, host_schedule_version,
  enrollment_tokens FK on consumed_host dropped (audit-only field;
  the token gets burned before the host row exists).

Agent side:
- P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml,
  atomic Save (tmp+fsync+rename), Enrolled() helper.
- P1-15 internal/agent/wsclient: dial with bearer + optional
  TLS cert pinning (sha-256 of leaf), exponential backoff with
  jitter (1s → 60s cap), heartbeat goroutine, fatal handling for
  ErrProtocolTooOld.
- P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo.
- P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version
  collection. restic detected by `restic version` parse; absent
  restic doesn't block startup.
- cmd/agent: -enroll-server / -enroll-token flags drive first-run
  enrollment then exit (so the install script can hand off to
  systemd to run the persistent service).

End-to-end smoke verified: bootstrap → login → issue token →
enroll → run agent → server logs `ws agent connected` with the
right host_id and protocol_version 1.

All tests still pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-01 00:39:00 +01:00
parent df2c584b23
commit 9cc0caff1e
18 changed files with 1670 additions and 14 deletions
+165
View File
@@ -0,0 +1,165 @@
package http
import (
"encoding/json"
stdhttp "net/http"
"strings"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// enrollRequest is the body posted by the agent installer. The token
// was issued by the operator via the UI ("Add host" → P1-27); the
// host metadata comes from the agent's own sysinfo collection.
type enrollRequest struct {
Token string `json:"token"`
HostName string `json:"hostname"`
OS api.HostOS `json:"os"`
Arch api.HostArch `json:"arch"`
AgentVersion string `json:"agent_version"`
ResticVersion string `json:"restic_version"`
}
// enrollResponse hands the agent the credentials it'll use forever.
// AgentToken is shown exactly once; the server stores its hash.
// CertPinSHA256 is the SHA-256 of the server's certificate, base64;
// the agent pins this on every reconnect so a stolen DB at the
// control plane can't be replayed against an attacker's TLS endpoint.
type enrollResponse struct {
HostID string `json:"host_id"`
AgentToken string `json:"agent_token"`
CertPinSHA256 string `json:"cert_pin_sha256,omitempty"`
}
// enrollOperatorRequest creates a one-time enrollment token for an
// operator who is about to install an agent. Authenticated UI route.
type enrollOperatorRequest struct {
HostName string `json:"hostname"`
Tags []string `json:"tags,omitempty"`
}
type enrollOperatorResponse struct {
Token string `json:"token"`
ExpiresAt time.Time `json:"expires_at"`
}
// handleAgentEnroll consumes a one-time token, persists a Host row,
// and returns persistent agent credentials. Open endpoint (no
// session) — the token is the credential.
func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) {
var req enrollRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
if req.Token == "" || req.HostName == "" || req.OS == "" || req.Arch == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"token, hostname, os, arch all required")
return
}
hostID := ulid.Make().String()
// Atomically: validate + consume token, then create the host.
// We do these in two statements; if create-host fails, the token
// is already burned. That's acceptable — operator just regens.
tokHash := auth.HashToken(req.Token)
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
"token unknown, expired, or already used")
return
}
// Mint the persistent agent bearer.
agentToken, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
host := store.Host{
ID: hostID,
Name: strings.TrimSpace(req.HostName),
OS: string(req.OS),
Arch: string(req.Arch),
AgentVersion: req.AgentVersion,
ResticVersion: req.ResticVersion,
EnrolledAt: time.Now().UTC(),
}
if err := s.deps.Store.CreateHost(r.Context(), host,
auth.HashToken(agentToken), ""); err != nil {
writeJSONError(w, stdhttp.StatusConflict, "host_exists", err.Error())
return
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
Actor: "system",
Action: "host.enrolled",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: host.EnrolledAt,
})
writeJSON(w, stdhttp.StatusCreated, enrollResponse{
HostID: hostID,
AgentToken: agentToken,
// CertPinSHA256 is populated by a TLS-aware future revision.
// For now (HTTP-or-TLS-by-Caddy) we leave it empty and rely
// on the agent trusting its OS root store.
})
}
// handleCreateEnrollmentToken (operator-facing) — generates a
// short-lived token for a new host. Authenticated; admin/operator only.
//
// TODO: gate by authn middleware once login session lookup lands.
// For Phase 1's first slice, we accept the bootstrap-shipped admin
// session cookie and trust it, validating the cookie via store.
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
return
}
var req enrollOperatorRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
token, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
const ttl = time.Hour
if err := s.deps.Store.CreateEnrollmentToken(r.Context(), auth.HashToken(token), ttl); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{
Token: token,
ExpiresAt: time.Now().Add(ttl).UTC(),
})
}
// authedUser returns true iff the request carries a valid session
// cookie. Minimal stub for now; full RBAC middleware lands with
// P4-03.
func (s *Server) authedUser(r *stdhttp.Request) bool {
c, err := r.Cookie(sessionCookieName)
if err != nil {
return false
}
_, err = s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
return err == nil
}
func ptr(s string) *string { return &s }
+118
View File
@@ -0,0 +1,118 @@
package http
import (
"bytes"
"context"
"encoding/json"
"io"
stdhttp "net/http"
"net/http/httptest"
"path/filepath"
"testing"
"time"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// newTestServerWithHub mirrors newTestServer but plugs in a real
// ws.Hub so /ws/agent is available.
func newTestServerWithHub(t *testing.T) (*Server, string, *store.Store) {
t.Helper()
dir := t.TempDir()
st, err := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
if err != nil {
t.Fatalf("store: %v", err)
}
t.Cleanup(func() { _ = st.Close() })
keyPath := filepath.Join(dir, "secret.key")
_ = crypto.GenerateKeyFile(keyPath)
key, _ := crypto.LoadKeyFromFile(keyPath)
aead, _ := crypto.NewAEAD(key)
deps := Deps{
Cfg: config.Config{Listen: ":0", DataDir: dir, SecretKeyFile: keyPath},
Store: st,
AEAD: aead,
Hub: ws.NewHub(),
}
s := New(deps)
ts := httptest.NewServer(s.srv.Handler)
t.Cleanup(ts.Close)
return s, ts.URL, st
}
func TestEnrollmentBadToken(t *testing.T) {
t.Parallel()
_, url, _ := newTestServerWithHub(t)
body, _ := json.Marshal(enrollRequest{
Token: "no-such-token", HostName: "host1",
OS: api.OSLinux, Arch: api.ArchAmd64,
AgentVersion: "0.1", ResticVersion: "0.17",
})
res, err := stdhttp.Post(url+"/api/agents/enroll", "application/json", bytes.NewReader(body))
if err != nil {
t.Fatalf("post: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnauthorized {
t.Errorf("status: %d", res.StatusCode)
}
}
func TestEnrollmentHappyPath(t *testing.T) {
t.Parallel()
_, url, st := newTestServerWithHub(t)
// Issue a token directly via the store (skipping the operator UI).
rawToken, _ := auth.NewToken()
if err := st.CreateEnrollmentToken(context.Background(),
auth.HashToken(rawToken), 5*time.Minute); err != nil {
t.Fatalf("issue: %v", err)
}
body, _ := json.Marshal(enrollRequest{
Token: rawToken, HostName: "test-host",
OS: api.OSLinux, Arch: api.ArchAmd64,
AgentVersion: "0.1", ResticVersion: "0.17",
})
res, err := stdhttp.Post(url+"/api/agents/enroll", "application/json", bytes.NewReader(body))
if err != nil {
t.Fatalf("post: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusCreated {
buf, _ := io.ReadAll(res.Body)
t.Fatalf("status %d: %s", res.StatusCode, buf)
}
var er enrollResponse
if err := json.NewDecoder(res.Body).Decode(&er); err != nil {
t.Fatalf("decode: %v", err)
}
if er.HostID == "" || er.AgentToken == "" {
t.Errorf("missing fields in response: %+v", er)
}
// Token must not be reusable.
res2, _ := stdhttp.Post(url+"/api/agents/enroll", "application/json", bytes.NewReader(body))
defer res2.Body.Close()
if res2.StatusCode != stdhttp.StatusUnauthorized {
t.Errorf("re-enrollment with same token should fail, got %d", res2.StatusCode)
}
// Host row exists with matching agent_token_hash.
got, err := st.LookupHostByAgentToken(context.Background(), auth.HashToken(er.AgentToken))
if err != nil {
t.Fatalf("lookup by token: %v", err)
}
if got.Name != "test-host" || got.OS != "linux" {
t.Errorf("host fields: %+v", got)
}
}
+18
View File
@@ -15,6 +15,7 @@ import (
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
@@ -24,6 +25,7 @@ type Deps struct {
Cfg config.Config
Store *store.Store
AEAD *crypto.AEAD
Hub *ws.Hub
// BootstrapToken (optional, populated only on first run) is the raw
// admin-bootstrap token printed in the server logs. While set, the
// /bootstrap endpoint accepts it to create the first admin user.
@@ -73,8 +75,24 @@ func (s *Server) routes(r chi.Router) {
r.Post("/auth/login", s.handleLogin)
r.Post("/auth/logout", s.handleLogout)
r.Post("/bootstrap", s.handleBootstrap)
// Agent enrollment (open endpoint — token is the credential).
r.Post("/agents/enroll", s.handleAgentEnroll)
// Operator → server (authenticated). Spec.md §6.1's
// /hosts/{id}/enrollment-token (regenerate) lands when the
// host page can call it; for now just the create endpoint.
r.Post("/enrollment-tokens", s.handleCreateEnrollmentToken)
})
// Agent ↔ server WebSocket. Bearer-authenticated inside the handler.
if s.deps.Hub != nil {
r.Mount("/ws/agent", ws.AgentHandler(ws.HandlerDeps{
Hub: s.deps.Hub,
Store: s.deps.Store,
}))
}
// UI handlers will hang off / — Phase 1 will add them.
r.Get("/", func(w stdhttp.ResponseWriter, _ *stdhttp.Request) {
_, _ = fmt.Fprint(w, "restic-manager — UI not yet implemented")