9cc0caff1e
Lands the protocol layer end-to-end: an agent can be enrolled through the operator UI, store credentials, dial back to the server over WS, complete the protocol_version handshake, and stay connected with periodic heartbeats. Server side: - P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction, json envelope writer with a write mutex, reader, error envelopes. - P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage (10s deadline, protocol_version checked against api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on reject), main read loop, defer hub register/unregister. - P1-10 POST /api/agents/enroll consumes a one-time token, mints a persistent agent bearer (sha-256 stored), creates a host row. - P1-10 POST /api/enrollment-tokens (operator, session-auth) issues a 1h one-time token. - P1-11 hello upserts agent_version + restic_version + protocol_version on the host row, flips status to online. - P1-12 heartbeat touches last_seen_at; background sweeper marks hosts offline after 90s without one. - store: hosts table accessors, host_schedule_version, enrollment_tokens FK on consumed_host dropped (audit-only field; the token gets burned before the host row exists). Agent side: - P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml, atomic Save (tmp+fsync+rename), Enrolled() helper. - P1-15 internal/agent/wsclient: dial with bearer + optional TLS cert pinning (sha-256 of leaf), exponential backoff with jitter (1s → 60s cap), heartbeat goroutine, fatal handling for ErrProtocolTooOld. - P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo. - P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version collection. restic detected by `restic version` parse; absent restic doesn't block startup. - cmd/agent: -enroll-server / -enroll-token flags drive first-run enrollment then exit (so the install script can hand off to systemd to run the persistent service). End-to-end smoke verified: bootstrap → login → issue token → enroll → run agent → server logs `ws agent connected` with the right host_id and protocol_version 1. All tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
166 lines
5.4 KiB
Go
166 lines
5.4 KiB
Go
package http
|
|
|
|
import (
|
|
"encoding/json"
|
|
stdhttp "net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/oklog/ulid/v2"
|
|
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
|
)
|
|
|
|
// enrollRequest is the body posted by the agent installer. The token
|
|
// was issued by the operator via the UI ("Add host" → P1-27); the
|
|
// host metadata comes from the agent's own sysinfo collection.
|
|
type enrollRequest struct {
|
|
Token string `json:"token"`
|
|
HostName string `json:"hostname"`
|
|
OS api.HostOS `json:"os"`
|
|
Arch api.HostArch `json:"arch"`
|
|
AgentVersion string `json:"agent_version"`
|
|
ResticVersion string `json:"restic_version"`
|
|
}
|
|
|
|
// enrollResponse hands the agent the credentials it'll use forever.
|
|
// AgentToken is shown exactly once; the server stores its hash.
|
|
// CertPinSHA256 is the SHA-256 of the server's certificate, base64;
|
|
// the agent pins this on every reconnect so a stolen DB at the
|
|
// control plane can't be replayed against an attacker's TLS endpoint.
|
|
type enrollResponse struct {
|
|
HostID string `json:"host_id"`
|
|
AgentToken string `json:"agent_token"`
|
|
CertPinSHA256 string `json:"cert_pin_sha256,omitempty"`
|
|
}
|
|
|
|
// enrollOperatorRequest creates a one-time enrollment token for an
|
|
// operator who is about to install an agent. Authenticated UI route.
|
|
type enrollOperatorRequest struct {
|
|
HostName string `json:"hostname"`
|
|
Tags []string `json:"tags,omitempty"`
|
|
}
|
|
|
|
type enrollOperatorResponse struct {
|
|
Token string `json:"token"`
|
|
ExpiresAt time.Time `json:"expires_at"`
|
|
}
|
|
|
|
// handleAgentEnroll consumes a one-time token, persists a Host row,
|
|
// and returns persistent agent credentials. Open endpoint (no
|
|
// session) — the token is the credential.
|
|
func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
var req enrollRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
|
return
|
|
}
|
|
if req.Token == "" || req.HostName == "" || req.OS == "" || req.Arch == "" {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
|
|
"token, hostname, os, arch all required")
|
|
return
|
|
}
|
|
|
|
hostID := ulid.Make().String()
|
|
|
|
// Atomically: validate + consume token, then create the host.
|
|
// We do these in two statements; if create-host fails, the token
|
|
// is already burned. That's acceptable — operator just regens.
|
|
tokHash := auth.HashToken(req.Token)
|
|
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
|
|
"token unknown, expired, or already used")
|
|
return
|
|
}
|
|
|
|
// Mint the persistent agent bearer.
|
|
agentToken, err := auth.NewToken()
|
|
if err != nil {
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
|
return
|
|
}
|
|
|
|
host := store.Host{
|
|
ID: hostID,
|
|
Name: strings.TrimSpace(req.HostName),
|
|
OS: string(req.OS),
|
|
Arch: string(req.Arch),
|
|
AgentVersion: req.AgentVersion,
|
|
ResticVersion: req.ResticVersion,
|
|
EnrolledAt: time.Now().UTC(),
|
|
}
|
|
if err := s.deps.Store.CreateHost(r.Context(), host,
|
|
auth.HashToken(agentToken), ""); err != nil {
|
|
writeJSONError(w, stdhttp.StatusConflict, "host_exists", err.Error())
|
|
return
|
|
}
|
|
|
|
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
|
ID: ulid.Make().String(),
|
|
Actor: "system",
|
|
Action: "host.enrolled",
|
|
TargetKind: ptr("host"),
|
|
TargetID: &hostID,
|
|
TS: host.EnrolledAt,
|
|
})
|
|
|
|
writeJSON(w, stdhttp.StatusCreated, enrollResponse{
|
|
HostID: hostID,
|
|
AgentToken: agentToken,
|
|
// CertPinSHA256 is populated by a TLS-aware future revision.
|
|
// For now (HTTP-or-TLS-by-Caddy) we leave it empty and rely
|
|
// on the agent trusting its OS root store.
|
|
})
|
|
}
|
|
|
|
// handleCreateEnrollmentToken (operator-facing) — generates a
|
|
// short-lived token for a new host. Authenticated; admin/operator only.
|
|
//
|
|
// TODO: gate by authn middleware once login session lookup lands.
|
|
// For Phase 1's first slice, we accept the bootstrap-shipped admin
|
|
// session cookie and trust it, validating the cookie via store.
|
|
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
if !s.authedUser(r) {
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
|
return
|
|
}
|
|
|
|
var req enrollOperatorRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
|
return
|
|
}
|
|
|
|
token, err := auth.NewToken()
|
|
if err != nil {
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
|
return
|
|
}
|
|
const ttl = time.Hour
|
|
if err := s.deps.Store.CreateEnrollmentToken(r.Context(), auth.HashToken(token), ttl); err != nil {
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
|
return
|
|
}
|
|
|
|
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{
|
|
Token: token,
|
|
ExpiresAt: time.Now().Add(ttl).UTC(),
|
|
})
|
|
}
|
|
|
|
// authedUser returns true iff the request carries a valid session
|
|
// cookie. Minimal stub for now; full RBAC middleware lands with
|
|
// P4-03.
|
|
func (s *Server) authedUser(r *stdhttp.Request) bool {
|
|
c, err := r.Cookie(sessionCookieName)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
_, err = s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
|
|
return err == nil
|
|
}
|
|
|
|
func ptr(s string) *string { return &s }
|