phase 1: WS transport, enrollment, agent that hellos and heartbeats
Lands the protocol layer end-to-end: an agent can be enrolled through the operator UI, store credentials, dial back to the server over WS, complete the protocol_version handshake, and stay connected with periodic heartbeats. Server side: - P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction, json envelope writer with a write mutex, reader, error envelopes. - P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage (10s deadline, protocol_version checked against api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on reject), main read loop, defer hub register/unregister. - P1-10 POST /api/agents/enroll consumes a one-time token, mints a persistent agent bearer (sha-256 stored), creates a host row. - P1-10 POST /api/enrollment-tokens (operator, session-auth) issues a 1h one-time token. - P1-11 hello upserts agent_version + restic_version + protocol_version on the host row, flips status to online. - P1-12 heartbeat touches last_seen_at; background sweeper marks hosts offline after 90s without one. - store: hosts table accessors, host_schedule_version, enrollment_tokens FK on consumed_host dropped (audit-only field; the token gets burned before the host row exists). Agent side: - P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml, atomic Save (tmp+fsync+rename), Enrolled() helper. - P1-15 internal/agent/wsclient: dial with bearer + optional TLS cert pinning (sha-256 of leaf), exponential backoff with jitter (1s → 60s cap), heartbeat goroutine, fatal handling for ErrProtocolTooOld. - P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo. - P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version collection. restic detected by `restic version` parse; absent restic doesn't block startup. - cmd/agent: -enroll-server / -enroll-token flags drive first-run enrollment then exit (so the install script can hand off to systemd to run the persistent service). End-to-end smoke verified: bootstrap → login → issue token → enroll → run agent → server logs `ws agent connected` with the right host_id and protocol_version 1. All tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,165 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
stdhttp "net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/oklog/ulid/v2"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
// enrollRequest is the body posted by the agent installer. The token
|
||||
// was issued by the operator via the UI ("Add host" → P1-27); the
|
||||
// host metadata comes from the agent's own sysinfo collection.
|
||||
type enrollRequest struct {
|
||||
Token string `json:"token"`
|
||||
HostName string `json:"hostname"`
|
||||
OS api.HostOS `json:"os"`
|
||||
Arch api.HostArch `json:"arch"`
|
||||
AgentVersion string `json:"agent_version"`
|
||||
ResticVersion string `json:"restic_version"`
|
||||
}
|
||||
|
||||
// enrollResponse hands the agent the credentials it'll use forever.
|
||||
// AgentToken is shown exactly once; the server stores its hash.
|
||||
// CertPinSHA256 is the SHA-256 of the server's certificate, base64;
|
||||
// the agent pins this on every reconnect so a stolen DB at the
|
||||
// control plane can't be replayed against an attacker's TLS endpoint.
|
||||
type enrollResponse struct {
|
||||
HostID string `json:"host_id"`
|
||||
AgentToken string `json:"agent_token"`
|
||||
CertPinSHA256 string `json:"cert_pin_sha256,omitempty"`
|
||||
}
|
||||
|
||||
// enrollOperatorRequest creates a one-time enrollment token for an
|
||||
// operator who is about to install an agent. Authenticated UI route.
|
||||
type enrollOperatorRequest struct {
|
||||
HostName string `json:"hostname"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
}
|
||||
|
||||
type enrollOperatorResponse struct {
|
||||
Token string `json:"token"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
}
|
||||
|
||||
// handleAgentEnroll consumes a one-time token, persists a Host row,
|
||||
// and returns persistent agent credentials. Open endpoint (no
|
||||
// session) — the token is the credential.
|
||||
func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
var req enrollRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
||||
return
|
||||
}
|
||||
if req.Token == "" || req.HostName == "" || req.OS == "" || req.Arch == "" {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
|
||||
"token, hostname, os, arch all required")
|
||||
return
|
||||
}
|
||||
|
||||
hostID := ulid.Make().String()
|
||||
|
||||
// Atomically: validate + consume token, then create the host.
|
||||
// We do these in two statements; if create-host fails, the token
|
||||
// is already burned. That's acceptable — operator just regens.
|
||||
tokHash := auth.HashToken(req.Token)
|
||||
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
|
||||
"token unknown, expired, or already used")
|
||||
return
|
||||
}
|
||||
|
||||
// Mint the persistent agent bearer.
|
||||
agentToken, err := auth.NewToken()
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
||||
return
|
||||
}
|
||||
|
||||
host := store.Host{
|
||||
ID: hostID,
|
||||
Name: strings.TrimSpace(req.HostName),
|
||||
OS: string(req.OS),
|
||||
Arch: string(req.Arch),
|
||||
AgentVersion: req.AgentVersion,
|
||||
ResticVersion: req.ResticVersion,
|
||||
EnrolledAt: time.Now().UTC(),
|
||||
}
|
||||
if err := s.deps.Store.CreateHost(r.Context(), host,
|
||||
auth.HashToken(agentToken), ""); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusConflict, "host_exists", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||
ID: ulid.Make().String(),
|
||||
Actor: "system",
|
||||
Action: "host.enrolled",
|
||||
TargetKind: ptr("host"),
|
||||
TargetID: &hostID,
|
||||
TS: host.EnrolledAt,
|
||||
})
|
||||
|
||||
writeJSON(w, stdhttp.StatusCreated, enrollResponse{
|
||||
HostID: hostID,
|
||||
AgentToken: agentToken,
|
||||
// CertPinSHA256 is populated by a TLS-aware future revision.
|
||||
// For now (HTTP-or-TLS-by-Caddy) we leave it empty and rely
|
||||
// on the agent trusting its OS root store.
|
||||
})
|
||||
}
|
||||
|
||||
// handleCreateEnrollmentToken (operator-facing) — generates a
|
||||
// short-lived token for a new host. Authenticated; admin/operator only.
|
||||
//
|
||||
// TODO: gate by authn middleware once login session lookup lands.
|
||||
// For Phase 1's first slice, we accept the bootstrap-shipped admin
|
||||
// session cookie and trust it, validating the cookie via store.
|
||||
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||
if !s.authedUser(r) {
|
||||
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
||||
return
|
||||
}
|
||||
|
||||
var req enrollOperatorRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
token, err := auth.NewToken()
|
||||
if err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
||||
return
|
||||
}
|
||||
const ttl = time.Hour
|
||||
if err := s.deps.Store.CreateEnrollmentToken(r.Context(), auth.HashToken(token), ttl); err != nil {
|
||||
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{
|
||||
Token: token,
|
||||
ExpiresAt: time.Now().Add(ttl).UTC(),
|
||||
})
|
||||
}
|
||||
|
||||
// authedUser returns true iff the request carries a valid session
|
||||
// cookie. Minimal stub for now; full RBAC middleware lands with
|
||||
// P4-03.
|
||||
func (s *Server) authedUser(r *stdhttp.Request) bool {
|
||||
c, err := r.Cookie(sessionCookieName)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_, err = s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func ptr(s string) *string { return &s }
|
||||
Reference in New Issue
Block a user