phase 1: WS transport, enrollment, agent that hellos and heartbeats
Lands the protocol layer end-to-end: an agent can be enrolled through the operator UI, store credentials, dial back to the server over WS, complete the protocol_version handshake, and stay connected with periodic heartbeats. Server side: - P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction, json envelope writer with a write mutex, reader, error envelopes. - P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage (10s deadline, protocol_version checked against api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on reject), main read loop, defer hub register/unregister. - P1-10 POST /api/agents/enroll consumes a one-time token, mints a persistent agent bearer (sha-256 stored), creates a host row. - P1-10 POST /api/enrollment-tokens (operator, session-auth) issues a 1h one-time token. - P1-11 hello upserts agent_version + restic_version + protocol_version on the host row, flips status to online. - P1-12 heartbeat touches last_seen_at; background sweeper marks hosts offline after 90s without one. - store: hosts table accessors, host_schedule_version, enrollment_tokens FK on consumed_host dropped (audit-only field; the token gets burned before the host row exists). Agent side: - P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml, atomic Save (tmp+fsync+rename), Enrolled() helper. - P1-15 internal/agent/wsclient: dial with bearer + optional TLS cert pinning (sha-256 of leaf), exponential backoff with jitter (1s → 60s cap), heartbeat goroutine, fatal handling for ErrProtocolTooOld. - P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo. - P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version collection. restic detected by `restic version` parse; absent restic doesn't block startup. - cmd/agent: -enroll-server / -enroll-token flags drive first-run enrollment then exit (so the install script can hand off to systemd to run the persistent service). End-to-end smoke verified: bootstrap → login → issue token → enroll → run agent → server logs `ws agent connected` with the right host_id and protocol_version 1. All tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,165 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
stdhttp "net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// enrollRequest is the body posted by the agent installer. The token
|
||||
// was issued by the operator via the UI ("Add host" → P1-27); the
|
||||
// host metadata comes from the agent's own sysinfo collection.
|
||||
type enrollRequest struct {
|
||||
Token string `json:"token"`
|
||||
HostName string `json:"hostname"`
|
||||
OS api.HostOS `json:"os"`
|
||||
Arch api.HostArch `json:"arch"`
|
||||
AgentVersion string `json:"agent_version"`
|
||||
ResticVersion string `json:"restic_version"`
|
||||
}
|
||||
|
||||
// enrollResponse hands the agent the credentials it'll use forever.
|
||||
// AgentToken is shown exactly once; the server stores its hash.
|
||||
// CertPinSHA256 is the SHA-256 of the server's certificate, base64;
|
||||
// the agent pins this on every reconnect so a stolen DB at the
|
||||
// control plane can't be replayed against an attacker's TLS endpoint.
|
||||
type enrollResponse struct {
|
||||
HostID string `json:"host_id"`
|
||||
AgentToken string `json:"agent_token"`
|
||||
CertPinSHA256 string `json:"cert_pin_sha256,omitempty"`
|
||||
}
|
||||
|
||||
// enrollOperatorRequest creates a one-time enrollment token for an
|
||||
// operator who is about to install an agent. Authenticated UI route.
|
||||
type enrollOperatorRequest struct {
|
||||
HostName string `json:"hostname"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
}
|
||||
|
||||
type enrollOperatorResponse struct {
|
||||
Token string `json:"token"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
}
|
||||
|
||||
// handleAgentEnroll consumes a one-time token, persists a Host row,
|
||||
// and returns persistent agent credentials. Open endpoint (no
|
||||
// session) — the token is the credential.
|
||||
func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
var req enrollRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
||||
return
|
||||
}
|
||||
if req.Token == "" || req.HostName == "" || req.OS == "" || req.Arch == "" {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
|
||||
"token, hostname, os, arch all required")
|
||||
return
|
||||
}
|
||||
|
||||
hostID := ulid.Make().String()
|
||||
|
||||
// Atomically: validate + consume token, then create the host.
|
||||
// We do these in two statements; if create-host fails, the token
|
||||
// is already burned. That's acceptable — operator just regens.
|
||||
tokHash := auth.HashToken(req.Token)
|
||||
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
|
||||
"token unknown, expired, or already used")
|
||||
return
|
||||
}
|
||||
|
||||
// Mint the persistent agent bearer.
|
||||
agentToken, err := auth.NewToken()
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
||||
return
|
||||
}
|
||||
|
||||
host := store.Host{
|
||||
ID: hostID,
|
||||
Name: strings.TrimSpace(req.HostName),
|
||||
OS: string(req.OS),
|
||||
Arch: string(req.Arch),
|
||||
AgentVersion: req.AgentVersion,
|
||||
ResticVersion: req.ResticVersion,
|
||||
EnrolledAt: time.Now().UTC(),
|
||||
}
|
||||
if err := s.deps.Store.CreateHost(r.Context(), host,
|
||||
auth.HashToken(agentToken), ""); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusConflict, "host_exists", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||
ID: ulid.Make().String(),
|
||||
Actor: "system",
|
||||
Action: "host.enrolled",
|
||||
TargetKind: ptr("host"),
|
||||
TargetID: &hostID,
|
||||
TS: host.EnrolledAt,
|
||||
})
|
||||
|
||||
writeJSON(w, stdhttp.StatusCreated, enrollResponse{
|
||||
HostID: hostID,
|
||||
AgentToken: agentToken,
|
||||
// CertPinSHA256 is populated by a TLS-aware future revision.
|
||||
// For now (HTTP-or-TLS-by-Caddy) we leave it empty and rely
|
||||
// on the agent trusting its OS root store.
|
||||
})
|
||||
}
|
||||
|
||||
// handleCreateEnrollmentToken (operator-facing) — generates a
|
||||
// short-lived token for a new host. Authenticated; admin/operator only.
|
||||
//
|
||||
// TODO: gate by authn middleware once login session lookup lands.
|
||||
// For Phase 1's first slice, we accept the bootstrap-shipped admin
|
||||
// session cookie and trust it, validating the cookie via store.
|
||||
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
return
|
||||
}
|
||||
|
||||
var req enrollOperatorRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
token, err := auth.NewToken()
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
||||
return
|
||||
}
|
||||
const ttl = time.Hour
|
||||
if err := s.deps.Store.CreateEnrollmentToken(r.Context(), auth.HashToken(token), ttl); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{
|
||||
Token: token,
|
||||
ExpiresAt: time.Now().Add(ttl).UTC(),
|
||||
})
|
||||
}
|
||||
|
||||
// authedUser returns true iff the request carries a valid session
|
||||
// cookie. Minimal stub for now; full RBAC middleware lands with
|
||||
// P4-03.
|
||||
func (s *Server) authedUser(r *stdhttp.Request) bool {
|
||||
c, err := r.Cookie(sessionCookieName)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_, err = s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func ptr(s string) *string { return &s }
|
||||
@@ -0,0 +1,118 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
stdhttp "net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// newTestServerWithHub mirrors newTestServer but plugs in a real
|
||||
// ws.Hub so /ws/agent is available.
|
||||
func newTestServerWithHub(t *testing.T) (*Server, string, *store.Store) {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
st, err := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("store: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = st.Close() })
|
||||
|
||||
keyPath := filepath.Join(dir, "secret.key")
|
||||
_ = crypto.GenerateKeyFile(keyPath)
|
||||
key, _ := crypto.LoadKeyFromFile(keyPath)
|
||||
aead, _ := crypto.NewAEAD(key)
|
||||
|
||||
deps := Deps{
|
||||
Cfg: config.Config{Listen: ":0", DataDir: dir, SecretKeyFile: keyPath},
|
||||
Store: st,
|
||||
AEAD: aead,
|
||||
Hub: ws.NewHub(),
|
||||
}
|
||||
s := New(deps)
|
||||
ts := httptest.NewServer(s.srv.Handler)
|
||||
t.Cleanup(ts.Close)
|
||||
return s, ts.URL, st
|
||||
}
|
||||
|
||||
func TestEnrollmentBadToken(t *testing.T) {
|
||||
t.Parallel()
|
||||
_, url, _ := newTestServerWithHub(t)
|
||||
|
||||
body, _ := json.Marshal(enrollRequest{
|
||||
Token: "no-such-token", HostName: "host1",
|
||||
OS: api.OSLinux, Arch: api.ArchAmd64,
|
||||
AgentVersion: "0.1", ResticVersion: "0.17",
|
||||
})
|
||||
res, err := stdhttp.Post(url+"/api/agents/enroll", "application/json", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
t.Fatalf("post: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusUnauthorized {
|
||||
t.Errorf("status: %d", res.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnrollmentHappyPath(t *testing.T) {
|
||||
t.Parallel()
|
||||
_, url, st := newTestServerWithHub(t)
|
||||
|
||||
// Issue a token directly via the store (skipping the operator UI).
|
||||
rawToken, _ := auth.NewToken()
|
||||
if err := st.CreateEnrollmentToken(context.Background(),
|
||||
auth.HashToken(rawToken), 5*time.Minute); err != nil {
|
||||
t.Fatalf("issue: %v", err)
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(enrollRequest{
|
||||
Token: rawToken, HostName: "test-host",
|
||||
OS: api.OSLinux, Arch: api.ArchAmd64,
|
||||
AgentVersion: "0.1", ResticVersion: "0.17",
|
||||
})
|
||||
res, err := stdhttp.Post(url+"/api/agents/enroll", "application/json", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
t.Fatalf("post: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != stdhttp.StatusCreated {
|
||||
buf, _ := io.ReadAll(res.Body)
|
||||
t.Fatalf("status %d: %s", res.StatusCode, buf)
|
||||
}
|
||||
|
||||
var er enrollResponse
|
||||
if err := json.NewDecoder(res.Body).Decode(&er); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if er.HostID == "" || er.AgentToken == "" {
|
||||
t.Errorf("missing fields in response: %+v", er)
|
||||
}
|
||||
|
||||
// Token must not be reusable.
|
||||
res2, _ := stdhttp.Post(url+"/api/agents/enroll", "application/json", bytes.NewReader(body))
|
||||
defer res2.Body.Close()
|
||||
if res2.StatusCode != stdhttp.StatusUnauthorized {
|
||||
t.Errorf("re-enrollment with same token should fail, got %d", res2.StatusCode)
|
||||
}
|
||||
|
||||
// Host row exists with matching agent_token_hash.
|
||||
got, err := st.LookupHostByAgentToken(context.Background(), auth.HashToken(er.AgentToken))
|
||||
if err != nil {
|
||||
t.Fatalf("lookup by token: %v", err)
|
||||
}
|
||||
if got.Name != "test-host" || got.OS != "linux" {
|
||||
t.Errorf("host fields: %+v", got)
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
@@ -24,6 +25,7 @@ type Deps struct {
|
||||
Cfg config.Config
|
||||
Store *store.Store
|
||||
AEAD *crypto.AEAD
|
||||
Hub *ws.Hub
|
||||
// BootstrapToken (optional, populated only on first run) is the raw
|
||||
// admin-bootstrap token printed in the server logs. While set, the
|
||||
// /bootstrap endpoint accepts it to create the first admin user.
|
||||
@@ -73,8 +75,24 @@ func (s *Server) routes(r chi.Router) {
|
||||
r.Post("/auth/login", s.handleLogin)
|
||||
r.Post("/auth/logout", s.handleLogout)
|
||||
r.Post("/bootstrap", s.handleBootstrap)
|
||||
|
||||
// Agent enrollment (open endpoint — token is the credential).
|
||||
r.Post("/agents/enroll", s.handleAgentEnroll)
|
||||
|
||||
// Operator → server (authenticated). Spec.md §6.1's
|
||||
// /hosts/{id}/enrollment-token (regenerate) lands when the
|
||||
// host page can call it; for now just the create endpoint.
|
||||
r.Post("/enrollment-tokens", s.handleCreateEnrollmentToken)
|
||||
})
|
||||
|
||||
// Agent ↔ server WebSocket. Bearer-authenticated inside the handler.
|
||||
if s.deps.Hub != nil {
|
||||
r.Mount("/ws/agent", ws.AgentHandler(ws.HandlerDeps{
|
||||
Hub: s.deps.Hub,
|
||||
Store: s.deps.Store,
|
||||
}))
|
||||
}
|
||||
|
||||
// UI handlers will hang off / — Phase 1 will add them.
|
||||
r.Get("/", func(w stdhttp.ResponseWriter, _ *stdhttp.Request) {
|
||||
_, _ = fmt.Fprint(w, "restic-manager — UI not yet implemented")
|
||||
|
||||
Reference in New Issue
Block a user