Files
restic-manager/internal/server/http/enrollment.go
T
steve 8aa635f0c1 P1 polish: Host.default_paths interim + restic env hygiene + job_id JS quoting
Two fixes that close the loop on dashboard run-now and harden the
agent's restic invocation.

Default paths (interim until P2-01 schedules):
  - 0003 migration adds default_paths TEXT NOT NULL DEFAULT '[]'
    to hosts and to enrollment_tokens.
  - Operator types paths in the Add-host form (textarea, one per
    line). They ride on the enrol_token row alongside the
    encrypted creds (paths aren't secret — plain JSON column).
  - On consume, ConsumeEnrollmentToken still just burns the token;
    the new GetEnrollmentTokenAttachments returns both the
    re-bindable creds and the path list in one round trip, the
    handler transfers them onto the new host row inside CreateHost.
  - The dashboard's Run-now and host-detail's "Run backup now"
    button now read Host.DefaultPaths and pass them to dispatchJob.
    A host with no default paths returns 400 with a friendly
    "no paths set" message instead of dispatching a doomed
    `restic backup` with no positional args.
  - Doc comments explicitly call this out as a Phase 1 interim —
    schedules supersede.

Restic env hygiene:
  - envSlice() previously omitted HOME / XDG_CACHE_HOME, which
    bit the smoke runs whenever the agent was launched outside
    systemd (restic refused to start: "neither $XDG_CACHE_HOME
    nor $HOME are defined"). Now both are set explicitly: prefer
    Env.ExtraEnv overrides, fall back to the agent process's own
    HOME, and finally to /var/lib/restic-manager.
  - Comment makes the env policy explicit: parent's RESTIC_* /
    AWS_* / B2_* env is filtered out by design — control-plane
    is the unambiguous source of truth.

JS bug fix in the live log page:
  - {{$job.ID | printf "%q"}} produced a literal-quoted JS string,
    which then went into the WS URL as ".../jobs/"<ID>"/stream"
    → 404. Switched to '{{$job.ID}}' inside the literal so
    html/template's auto-escape does the right thing. Verified
    end-to-end: dashboard "Run now" → live progress + log lines
    arrive over the WS → succeeded pill renders.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 22:35:33 +01:00

309 lines
11 KiB
Go

package http
import (
"context"
"encoding/json"
"fmt"
"log/slog"
stdhttp "net/http"
"strings"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// enrollRequest is the body posted by the agent installer. The token
// was issued by the operator via the UI ("Add host" → P1-27); the
// host metadata comes from the agent's own sysinfo collection.
type enrollRequest struct {
Token string `json:"token"`
HostName string `json:"hostname"`
OS api.HostOS `json:"os"`
Arch api.HostArch `json:"arch"`
AgentVersion string `json:"agent_version"`
ResticVersion string `json:"restic_version"`
}
// enrollResponse hands the agent the credentials it'll use forever.
// AgentToken is shown exactly once; the server stores its hash.
//
// CertPinSHA256 is reserved for future use. The server is HTTP-only
// and sits behind a reverse proxy that owns the TLS cert; pinning is
// configured at the agent install step (`-cert-pin`) by the operator
// pasting in the proxy's cert hash. The field stays in the response
// shape so we can populate it later if the topology changes.
type enrollResponse struct {
HostID string `json:"host_id"`
AgentToken string `json:"agent_token"`
CertPinSHA256 string `json:"cert_pin_sha256,omitempty"`
}
// enrollOperatorRequest creates a one-time enrollment token for an
// operator who is about to install an agent. Authenticated UI route.
//
// Repo creds are required at token-mint time so the agent can run a
// backup the moment it comes online. The trio is JSON-encoded,
// AEAD-encrypted with token_hash as additional data, and stashed on
// the token row. ConsumeEnrollmentToken re-encrypts under host_id
// and writes the host_credentials row in the same tx as token-burn.
type enrollOperatorRequest struct {
HostName string `json:"hostname"`
Tags []string `json:"tags,omitempty"`
RepoURL string `json:"repo_url"`
RepoUsername string `json:"repo_username"`
RepoPassword string `json:"repo_password"`
// DefaultPaths lands on the host row at consume time. Used by
// run-now buttons (the dashboard's per-row Run, the host
// detail's Run backup now). When schedules ship in P2-01 they
// supersede this — until then, this is the only source of paths
// for run-now jobs.
DefaultPaths []string `json:"default_paths,omitempty"`
}
type enrollOperatorResponse struct {
Token string `json:"token"`
ExpiresAt time.Time `json:"expires_at"`
}
// repoCredsBlob is the JSON shape of the encrypted repo-creds blob.
// Lives only inside AEAD ciphertext — never on the wire as plaintext
// outside the WS config.update push.
type repoCredsBlob struct {
RepoURL string `json:"repo_url"`
RepoUsername string `json:"repo_username"`
RepoPassword string `json:"repo_password"`
}
// handleAgentEnroll consumes a one-time token, persists a Host row,
// and returns persistent agent credentials. Open endpoint (no
// session) — the token is the credential.
func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) {
var req enrollRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
if req.Token == "" || req.HostName == "" || req.OS == "" || req.Arch == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"token, hostname, os, arch all required")
return
}
hostID := ulid.Make().String()
// Atomically: validate + consume token, then create the host.
// We do these in two statements; if create-host fails, the token
// is already burned. That's acceptable — operator just regens.
tokHash := auth.HashToken(req.Token)
// Pull every operator-supplied attachment off the token row in one
// query: encrypted repo creds (rebound under the new host_id) plus
// the default-paths list. Both transferred onto the new host row
// after consume.
attachments, encForHost, err := s.rebindTokenAttachments(r.Context(), tokHash, hostID)
if err != nil {
slog.Warn("enrollment: rebind token attachments failed", "err", err)
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
"token unknown, expired, or already used")
return
}
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
slog.Warn("enrollment: consume token failed", "err", err)
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
"token unknown, expired, or already used")
return
}
// Mint the persistent agent bearer.
agentToken, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
host := store.Host{
ID: hostID,
Name: strings.TrimSpace(req.HostName),
OS: string(req.OS),
Arch: string(req.Arch),
AgentVersion: req.AgentVersion,
ResticVersion: req.ResticVersion,
EnrolledAt: time.Now().UTC(),
DefaultPaths: attachments.DefaultPaths,
}
if err := s.deps.Store.CreateHost(r.Context(), host,
auth.HashToken(agentToken), ""); err != nil {
writeJSONError(w, stdhttp.StatusConflict, "host_exists", err.Error())
return
}
// Promote the encrypted repo creds onto the freshly-created host
// row. If this fails for any reason we log loudly but still
// return the bearer — the operator recovers via PUT
// /api/hosts/{id}/repo-credentials. Failing the whole enrolment
// here would leave a half-burned token + an orphan host.
if encForHost != "" {
if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, encForHost); err != nil {
slog.Error("enrollment: set host credentials failed",
"host_id", hostID, "err", err)
}
}
auditPayload, _ := json.Marshal(struct {
Hostname string `json:"hostname"`
OS string `json:"os"`
Arch string `json:"arch"`
HasRepoCreds bool `json:"has_repo_creds"`
}{
Hostname: host.Name,
OS: host.OS,
Arch: host.Arch,
HasRepoCreds: encForHost != "",
})
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
Actor: "system",
Action: "host.enrolled",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: host.EnrolledAt,
Payload: auditPayload,
})
writeJSON(w, stdhttp.StatusCreated, enrollResponse{
HostID: hostID,
AgentToken: agentToken,
// CertPinSHA256: the server is HTTP-only and sits behind a
// reverse proxy that owns the cert. The operator pastes the
// proxy's cert hash into the install command (`-cert-pin`)
// when they want pinning; the server cannot introspect a
// cert it doesn't terminate.
})
}
// handleCreateEnrollmentToken (operator-facing) — generates a
// short-lived token for a new host. Authenticated; admin/operator only.
//
// TODO: gate by authn middleware once login session lookup lands.
// For Phase 1's first slice, we accept the bootstrap-shipped admin
// session cookie and trust it, validating the cookie via store.
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
return
}
var req enrollOperatorRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
token, expiresAt, err := s.mintEnrollmentToken(r.Context(), req.RepoURL, req.RepoUsername, req.RepoPassword, req.DefaultPaths)
switch err {
case nil:
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{Token: token, ExpiresAt: expiresAt})
case errMissingRepoCreds:
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"repo_url and repo_password are required so the agent can run backups on first connect")
default:
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
}
}
// errMissingRepoCreds is returned by mintEnrollmentToken when the
// operator hasn't supplied the URL+password pair the agent needs.
// Sentinel error so HTML and JSON handlers can map it to their own
// surface (form re-render with banner / 400 with code).
var errMissingRepoCreds = errAuth("missing_repo_creds")
// mintEnrollmentToken creates a fresh one-time enrollment token and
// stashes the AEAD-encrypted repo creds on its row. Returns the raw
// token (shown to the operator exactly once) and the expiry time.
//
// Shared by the JSON endpoint and the HTML "Add host" flow.
func (s *Server) mintEnrollmentToken(ctx context.Context, repoURL, repoUsername, repoPassword string, defaultPaths []string) (string, time.Time, error) {
if repoURL == "" || repoPassword == "" {
return "", time.Time{}, errMissingRepoCreds
}
token, err := auth.NewToken()
if err != nil {
return "", time.Time{}, err
}
tokHash := auth.HashToken(token)
enc, err := s.encryptRepoCreds(repoCredsBlob{
RepoURL: repoURL, RepoUsername: repoUsername, RepoPassword: repoPassword,
}, []byte("token:"+tokHash))
if err != nil {
return "", time.Time{}, err
}
if defaultPaths == nil {
defaultPaths = []string{}
}
pathsJSON, err := json.Marshal(defaultPaths)
if err != nil {
return "", time.Time{}, fmt.Errorf("marshal default_paths: %w", err)
}
const ttl = time.Hour
if err := s.deps.Store.CreateEnrollmentToken(ctx, tokHash, ttl, enc, string(pathsJSON)); err != nil {
return "", time.Time{}, err
}
return token, time.Now().Add(ttl).UTC(), nil
}
// rebindTokenAttachments fetches every operator-supplied attachment
// off the token row, re-encrypting the repo-creds blob under the
// new host_id (the additional-data binding moves with the cred so
// a token-row dump can't be replayed against a different host's
// row). Returns the attachments (sans the rebind work), the
// re-encrypted ciphertext for SetHostCredentials, and any error.
func (s *Server) rebindTokenAttachments(ctx context.Context, tokHash, hostID string) (store.EnrollmentTokenAttachments, string, error) {
att, err := s.deps.Store.GetEnrollmentTokenAttachments(ctx, tokHash)
if err != nil {
return store.EnrollmentTokenAttachments{}, "", err
}
if att.EncRepoCreds == "" {
return att, "", nil
}
plain, err := s.deps.AEAD.Decrypt(att.EncRepoCreds, []byte("token:"+tokHash))
if err != nil {
return att, "", fmt.Errorf("decrypt token creds: %w", err)
}
out, err := s.deps.AEAD.Encrypt(plain, []byte("host:"+hostID))
if err != nil {
return att, "", fmt.Errorf("re-encrypt for host: %w", err)
}
return att, out, nil
}
// encryptRepoCreds JSON-encodes blob and seals it with the given
// additional-data context.
func (s *Server) encryptRepoCreds(blob repoCredsBlob, ad []byte) (string, error) {
body, err := json.Marshal(blob)
if err != nil {
return "", fmt.Errorf("marshal repo creds: %w", err)
}
return s.deps.AEAD.Encrypt(body, ad)
}
// authedUser returns true iff the request carries a valid session
// cookie. Minimal stub for now; full RBAC middleware lands with
// P4-03.
func (s *Server) authedUser(r *stdhttp.Request) bool {
c, err := r.Cookie(sessionCookieName)
if err != nil {
return false
}
_, err = s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
return err == nil
}
func ptr(s string) *string { return &s }