Files
restic-manager/internal/server/http/enrollment.go
T
steve f34773b505 phase 1: WS transport, enrollment, agent that hellos and heartbeats
Lands the protocol layer end-to-end: an agent can be enrolled
through the operator UI, store credentials, dial back to the server
over WS, complete the protocol_version handshake, and stay
connected with periodic heartbeats.

Server side:
- P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction,
  json envelope writer with a write mutex, reader, error envelopes.
- P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage
  (10s deadline, protocol_version checked against
  api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on
  reject), main read loop, defer hub register/unregister.
- P1-10 POST /api/agents/enroll consumes a one-time token, mints a
  persistent agent bearer (sha-256 stored), creates a host row.
- P1-10 POST /api/enrollment-tokens (operator, session-auth)
  issues a 1h one-time token.
- P1-11 hello upserts agent_version + restic_version +
  protocol_version on the host row, flips status to online.
- P1-12 heartbeat touches last_seen_at; background sweeper marks
  hosts offline after 90s without one.
- store: hosts table accessors, host_schedule_version,
  enrollment_tokens FK on consumed_host dropped (audit-only field;
  the token gets burned before the host row exists).

Agent side:
- P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml,
  atomic Save (tmp+fsync+rename), Enrolled() helper.
- P1-15 internal/agent/wsclient: dial with bearer + optional
  TLS cert pinning (sha-256 of leaf), exponential backoff with
  jitter (1s → 60s cap), heartbeat goroutine, fatal handling for
  ErrProtocolTooOld.
- P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo.
- P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version
  collection. restic detected by `restic version` parse; absent
  restic doesn't block startup.
- cmd/agent: -enroll-server / -enroll-token flags drive first-run
  enrollment then exit (so the install script can hand off to
  systemd to run the persistent service).

End-to-end smoke verified: bootstrap → login → issue token →
enroll → run agent → server logs `ws agent connected` with the
right host_id and protocol_version 1.

All tests still pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 00:39:00 +01:00

166 lines
5.4 KiB
Go

package http
import (
"encoding/json"
stdhttp "net/http"
"strings"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// enrollRequest is the body posted by the agent installer. The token
// was issued by the operator via the UI ("Add host" → P1-27); the
// host metadata comes from the agent's own sysinfo collection.
type enrollRequest struct {
Token string `json:"token"`
HostName string `json:"hostname"`
OS api.HostOS `json:"os"`
Arch api.HostArch `json:"arch"`
AgentVersion string `json:"agent_version"`
ResticVersion string `json:"restic_version"`
}
// enrollResponse hands the agent the credentials it'll use forever.
// AgentToken is shown exactly once; the server stores its hash.
// CertPinSHA256 is the SHA-256 of the server's certificate, base64;
// the agent pins this on every reconnect so a stolen DB at the
// control plane can't be replayed against an attacker's TLS endpoint.
type enrollResponse struct {
HostID string `json:"host_id"`
AgentToken string `json:"agent_token"`
CertPinSHA256 string `json:"cert_pin_sha256,omitempty"`
}
// enrollOperatorRequest creates a one-time enrollment token for an
// operator who is about to install an agent. Authenticated UI route.
type enrollOperatorRequest struct {
HostName string `json:"hostname"`
Tags []string `json:"tags,omitempty"`
}
type enrollOperatorResponse struct {
Token string `json:"token"`
ExpiresAt time.Time `json:"expires_at"`
}
// handleAgentEnroll consumes a one-time token, persists a Host row,
// and returns persistent agent credentials. Open endpoint (no
// session) — the token is the credential.
func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) {
var req enrollRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
if req.Token == "" || req.HostName == "" || req.OS == "" || req.Arch == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"token, hostname, os, arch all required")
return
}
hostID := ulid.Make().String()
// Atomically: validate + consume token, then create the host.
// We do these in two statements; if create-host fails, the token
// is already burned. That's acceptable — operator just regens.
tokHash := auth.HashToken(req.Token)
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
"token unknown, expired, or already used")
return
}
// Mint the persistent agent bearer.
agentToken, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
host := store.Host{
ID: hostID,
Name: strings.TrimSpace(req.HostName),
OS: string(req.OS),
Arch: string(req.Arch),
AgentVersion: req.AgentVersion,
ResticVersion: req.ResticVersion,
EnrolledAt: time.Now().UTC(),
}
if err := s.deps.Store.CreateHost(r.Context(), host,
auth.HashToken(agentToken), ""); err != nil {
writeJSONError(w, stdhttp.StatusConflict, "host_exists", err.Error())
return
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
Actor: "system",
Action: "host.enrolled",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: host.EnrolledAt,
})
writeJSON(w, stdhttp.StatusCreated, enrollResponse{
HostID: hostID,
AgentToken: agentToken,
// CertPinSHA256 is populated by a TLS-aware future revision.
// For now (HTTP-or-TLS-by-Caddy) we leave it empty and rely
// on the agent trusting its OS root store.
})
}
// handleCreateEnrollmentToken (operator-facing) — generates a
// short-lived token for a new host. Authenticated; admin/operator only.
//
// TODO: gate by authn middleware once login session lookup lands.
// For Phase 1's first slice, we accept the bootstrap-shipped admin
// session cookie and trust it, validating the cookie via store.
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
return
}
var req enrollOperatorRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
token, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
const ttl = time.Hour
if err := s.deps.Store.CreateEnrollmentToken(r.Context(), auth.HashToken(token), ttl); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{
Token: token,
ExpiresAt: time.Now().Add(ttl).UTC(),
})
}
// authedUser returns true iff the request carries a valid session
// cookie. Minimal stub for now; full RBAC middleware lands with
// P4-03.
func (s *Server) authedUser(r *stdhttp.Request) bool {
c, err := r.Cookie(sessionCookieName)
if err != nil {
return false
}
_, err = s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
return err == nil
}
func ptr(s string) *string { return &s }