Files
restic-manager/internal/server/http/enrollment.go
T
steve 44feb708bc fix: enrollment FK race + log-when-rejected; runbook fixes from dry-run
The smoke runbook caught a real bug: ConsumeEnrollmentToken was
inserting into host_credentials (FK -> hosts) inside the same tx as
the token burn, but the host row didn't exist yet — CreateHost
runs in the *next* statement. The agent saw a generic 401 with no
clue why.

Fix: drop the host_credentials insert from ConsumeEnrollmentToken;
the HTTP handler now does Consume -> CreateHost ->
SetHostCredentials. SetHostCredentials failure is logged loudly
but doesn't fail the enrol — operator recovers via PUT
/api/hosts/{id}/repo-credentials.

Adds slog.Warn lines on both 401 paths in handleAgentEnroll so the
underlying cause is visible in server logs (the wire response stays
generic to avoid leaking which step failed).

Test: TestEnrollmentTransfersRepoCreds rewritten to mirror the new
order (consume -> create host -> SetHostCredentials).

Runbook (docs/e2e-smoke.md): rest-server moved off 8000 (commonly
in use); URLs use trailing slash on the rest path; clarified that
secrets_key is minted on first agent start, not at enrol time.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 14:01:59 +01:00

265 lines
9.1 KiB
Go

package http
import (
"context"
"encoding/json"
"fmt"
"log/slog"
stdhttp "net/http"
"strings"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// enrollRequest is the body posted by the agent installer. The token
// was issued by the operator via the UI ("Add host" → P1-27); the
// host metadata comes from the agent's own sysinfo collection.
type enrollRequest struct {
Token string `json:"token"`
HostName string `json:"hostname"`
OS api.HostOS `json:"os"`
Arch api.HostArch `json:"arch"`
AgentVersion string `json:"agent_version"`
ResticVersion string `json:"restic_version"`
}
// enrollResponse hands the agent the credentials it'll use forever.
// AgentToken is shown exactly once; the server stores its hash.
//
// CertPinSHA256 is reserved for future use. The server is HTTP-only
// and sits behind a reverse proxy that owns the TLS cert; pinning is
// configured at the agent install step (`-cert-pin`) by the operator
// pasting in the proxy's cert hash. The field stays in the response
// shape so we can populate it later if the topology changes.
type enrollResponse struct {
HostID string `json:"host_id"`
AgentToken string `json:"agent_token"`
CertPinSHA256 string `json:"cert_pin_sha256,omitempty"`
}
// enrollOperatorRequest creates a one-time enrollment token for an
// operator who is about to install an agent. Authenticated UI route.
//
// Repo creds are required at token-mint time so the agent can run a
// backup the moment it comes online. The trio is JSON-encoded,
// AEAD-encrypted with token_hash as additional data, and stashed on
// the token row. ConsumeEnrollmentToken re-encrypts under host_id
// and writes the host_credentials row in the same tx as token-burn.
type enrollOperatorRequest struct {
HostName string `json:"hostname"`
Tags []string `json:"tags,omitempty"`
RepoURL string `json:"repo_url"`
RepoUsername string `json:"repo_username"`
RepoPassword string `json:"repo_password"`
}
type enrollOperatorResponse struct {
Token string `json:"token"`
ExpiresAt time.Time `json:"expires_at"`
}
// repoCredsBlob is the JSON shape of the encrypted repo-creds blob.
// Lives only inside AEAD ciphertext — never on the wire as plaintext
// outside the WS config.update push.
type repoCredsBlob struct {
RepoURL string `json:"repo_url"`
RepoUsername string `json:"repo_username"`
RepoPassword string `json:"repo_password"`
}
// handleAgentEnroll consumes a one-time token, persists a Host row,
// and returns persistent agent credentials. Open endpoint (no
// session) — the token is the credential.
func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) {
var req enrollRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
if req.Token == "" || req.HostName == "" || req.OS == "" || req.Arch == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"token, hostname, os, arch all required")
return
}
hostID := ulid.Make().String()
// Atomically: validate + consume token, then create the host.
// We do these in two statements; if create-host fails, the token
// is already burned. That's acceptable — operator just regens.
tokHash := auth.HashToken(req.Token)
// If the token carries repo creds, re-encrypt them under the new
// host_id so the host_credentials row is bound to the host (not
// the token, which is about to disappear).
encForHost, err := s.rebindTokenCreds(r.Context(), tokHash, hostID)
if err != nil {
slog.Warn("enrollment: rebind token creds failed", "err", err)
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
"token unknown, expired, or already used")
return
}
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
slog.Warn("enrollment: consume token failed", "err", err)
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
"token unknown, expired, or already used")
return
}
// Mint the persistent agent bearer.
agentToken, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
host := store.Host{
ID: hostID,
Name: strings.TrimSpace(req.HostName),
OS: string(req.OS),
Arch: string(req.Arch),
AgentVersion: req.AgentVersion,
ResticVersion: req.ResticVersion,
EnrolledAt: time.Now().UTC(),
}
if err := s.deps.Store.CreateHost(r.Context(), host,
auth.HashToken(agentToken), ""); err != nil {
writeJSONError(w, stdhttp.StatusConflict, "host_exists", err.Error())
return
}
// Promote the encrypted repo creds onto the freshly-created host
// row. If this fails for any reason we log loudly but still
// return the bearer — the operator recovers via PUT
// /api/hosts/{id}/repo-credentials. Failing the whole enrolment
// here would leave a half-burned token + an orphan host.
if encForHost != "" {
if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, encForHost); err != nil {
slog.Error("enrollment: set host credentials failed",
"host_id", hostID, "err", err)
}
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
Actor: "system",
Action: "host.enrolled",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: host.EnrolledAt,
})
writeJSON(w, stdhttp.StatusCreated, enrollResponse{
HostID: hostID,
AgentToken: agentToken,
// CertPinSHA256: the server is HTTP-only and sits behind a
// reverse proxy that owns the cert. The operator pastes the
// proxy's cert hash into the install command (`-cert-pin`)
// when they want pinning; the server cannot introspect a
// cert it doesn't terminate.
})
}
// handleCreateEnrollmentToken (operator-facing) — generates a
// short-lived token for a new host. Authenticated; admin/operator only.
//
// TODO: gate by authn middleware once login session lookup lands.
// For Phase 1's first slice, we accept the bootstrap-shipped admin
// session cookie and trust it, validating the cookie via store.
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
return
}
var req enrollOperatorRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
if req.RepoURL == "" || req.RepoPassword == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"repo_url and repo_password are required so the agent can run backups on first connect")
return
}
token, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
tokHash := auth.HashToken(token)
enc, err := s.encryptRepoCreds(repoCredsBlob{
RepoURL: req.RepoURL, RepoUsername: req.RepoUsername, RepoPassword: req.RepoPassword,
}, []byte("token:"+tokHash))
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
const ttl = time.Hour
if err := s.deps.Store.CreateEnrollmentToken(r.Context(), tokHash, ttl, enc); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{
Token: token,
ExpiresAt: time.Now().Add(ttl).UTC(),
})
}
// rebindTokenCreds decrypts the creds attached to the token (if any),
// re-encrypts under the new host_id, and returns the new ciphertext.
// Empty return = the token had no creds attached, which we treat as
// a hard error today (the operator must supply creds at mint time).
func (s *Server) rebindTokenCreds(ctx context.Context, tokHash, hostID string) (string, error) {
enc, err := s.deps.Store.GetEnrollmentTokenCreds(ctx, tokHash)
if err != nil {
return "", err
}
if enc == "" {
return "", nil
}
plain, err := s.deps.AEAD.Decrypt(enc, []byte("token:"+tokHash))
if err != nil {
return "", fmt.Errorf("decrypt token creds: %w", err)
}
out, err := s.deps.AEAD.Encrypt(plain, []byte("host:"+hostID))
if err != nil {
return "", fmt.Errorf("re-encrypt for host: %w", err)
}
return out, nil
}
// encryptRepoCreds JSON-encodes blob and seals it with the given
// additional-data context.
func (s *Server) encryptRepoCreds(blob repoCredsBlob, ad []byte) (string, error) {
body, err := json.Marshal(blob)
if err != nil {
return "", fmt.Errorf("marshal repo creds: %w", err)
}
return s.deps.AEAD.Encrypt(body, ad)
}
// authedUser returns true iff the request carries a valid session
// cookie. Minimal stub for now; full RBAC middleware lands with
// P4-03.
func (s *Server) authedUser(r *stdhttp.Request) bool {
c, err := r.Cookie(sessionCookieName)
if err != nil {
return false
}
_, err = s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
return err == nil
}
func ptr(s string) *string { return &s }