148e61b33b
Two independent path lists for "what does this host back up?" was
a real divergence footgun — operator types one set at Add-host time
and a different set into a schedule, both end up in the same repo,
the snapshot history looks fine until restore. Resolution: drop
host.default_paths entirely; add a `manual` flag on schedules.
A manual schedule has paths/excludes/tags/retention like any other
but no cron — it fires only via per-schedule Run-now. Single source
of truth for what gets backed up.
Schema (migration 0007):
* schedules.manual INTEGER NOT NULL DEFAULT 0.
* For every host with non-empty default_paths, seed a manual
schedule with those paths and bump host_schedule_version.
* ALTER TABLE hosts DROP COLUMN default_paths.
* ALTER TABLE enrollment_tokens RENAME COLUMN default_paths
TO initial_paths.
Original draft of this migration rebuilt hosts via the
create-new + drop-old + rename-new pattern. With foreign_keys=ON
(set in the connection DSN), DROP TABLE on the parent fired
ON DELETE CASCADE on every child of hosts(id) — schedules /
jobs / snapshots / host_credentials all wiped on the smoke env
when I tried it. SQLite 3.35+ supports column-level ALTERs
directly, so we skip the rebuild dance and avoid the cascade
trap. Six lines of SQL instead of sixty, no FK risk.
Run-now rewiring:
* New `dispatchScheduleNow(hostID, scheduleID, conn?)` helper
unifies the agent-driven path (cron fire → schedule.fire →
OnScheduleFire callback) and the UI-driven path (operator
clicks Run-now on a schedule row). Conn arg is optional; nil
falls back to Hub.Send.
* New POST /hosts/{id}/schedules/{sid}/run endpoint — per-row
Run-now button on the schedules list.
* Dashboard's per-host Run-now (handleUIRunBackup) now picks the
host's only enabled manual schedule, falls back to the only
enabled schedule, else returns "pick one in Schedules tab".
Keeps one-click for the common case.
Agent:
* Scheduler skips manual schedules in cron build (silent — they're
a normal data shape, not an error).
* Wire Schedule struct gains Manual flag.
* Schedule.fire flow unchanged — the agent only ever fires
non-manual schedules anyway.
UI:
* Add-host form retitled "Initial schedule · manual" so the
operator knows the paths become an editable schedule under
the Schedules tab. Result page calls out the manual schedule
+ points at Host > Schedules.
* Schedule edit form: "Manual schedule" checkbox at the top of
the When section; toggling it hides/shows the cron field via
inline JS. Server-side validator skips the cron requirement
when manual=true.
* Schedule list shows a "manual" tag under the status pill and
renders the When column as "— run-now only —" for manual rows.
Each row gets a Run-now button when the schedule is enabled
and the host is online.
Tests + go test ./... green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
329 lines
12 KiB
Go
329 lines
12 KiB
Go
package http
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
stdhttp "net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/oklog/ulid/v2"
|
|
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
|
)
|
|
|
|
// enrollRequest is the body posted by the agent installer. The token
|
|
// was issued by the operator via the UI ("Add host" → P1-27); the
|
|
// host metadata comes from the agent's own sysinfo collection.
|
|
type enrollRequest struct {
|
|
Token string `json:"token"`
|
|
HostName string `json:"hostname"`
|
|
OS api.HostOS `json:"os"`
|
|
Arch api.HostArch `json:"arch"`
|
|
AgentVersion string `json:"agent_version"`
|
|
ResticVersion string `json:"restic_version"`
|
|
}
|
|
|
|
// enrollResponse hands the agent the credentials it'll use forever.
|
|
// AgentToken is shown exactly once; the server stores its hash.
|
|
//
|
|
// CertPinSHA256 is reserved for future use. The server is HTTP-only
|
|
// and sits behind a reverse proxy that owns the TLS cert; pinning is
|
|
// configured at the agent install step (`-cert-pin`) by the operator
|
|
// pasting in the proxy's cert hash. The field stays in the response
|
|
// shape so we can populate it later if the topology changes.
|
|
type enrollResponse struct {
|
|
HostID string `json:"host_id"`
|
|
AgentToken string `json:"agent_token"`
|
|
CertPinSHA256 string `json:"cert_pin_sha256,omitempty"`
|
|
}
|
|
|
|
// enrollOperatorRequest creates a one-time enrollment token for an
|
|
// operator who is about to install an agent. Authenticated UI route.
|
|
//
|
|
// Repo creds are required at token-mint time so the agent can run a
|
|
// backup the moment it comes online. The trio is JSON-encoded,
|
|
// AEAD-encrypted with token_hash as additional data, and stashed on
|
|
// the token row. ConsumeEnrollmentToken re-encrypts under host_id
|
|
// and writes the host_credentials row in the same tx as token-burn.
|
|
type enrollOperatorRequest struct {
|
|
HostName string `json:"hostname"`
|
|
Tags []string `json:"tags,omitempty"`
|
|
RepoURL string `json:"repo_url"`
|
|
RepoUsername string `json:"repo_username"`
|
|
RepoPassword string `json:"repo_password"`
|
|
// InitialPaths seeds the host's initial manual schedule on
|
|
// consume — operator can edit/extend from the host's Schedules
|
|
// tab afterwards. Empty list = no initial schedule (operator
|
|
// must add one before backups can run).
|
|
InitialPaths []string `json:"initial_paths,omitempty"`
|
|
}
|
|
|
|
type enrollOperatorResponse struct {
|
|
Token string `json:"token"`
|
|
ExpiresAt time.Time `json:"expires_at"`
|
|
}
|
|
|
|
// repoCredsBlob is the JSON shape of the encrypted repo-creds blob.
|
|
// Lives only inside AEAD ciphertext — never on the wire as plaintext
|
|
// outside the WS config.update push.
|
|
type repoCredsBlob struct {
|
|
RepoURL string `json:"repo_url"`
|
|
RepoUsername string `json:"repo_username"`
|
|
RepoPassword string `json:"repo_password"`
|
|
}
|
|
|
|
// handleAgentEnroll consumes a one-time token, persists a Host row,
|
|
// and returns persistent agent credentials. Open endpoint (no
|
|
// session) — the token is the credential.
|
|
func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
var req enrollRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
|
return
|
|
}
|
|
if req.Token == "" || req.HostName == "" || req.OS == "" || req.Arch == "" {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
|
|
"token, hostname, os, arch all required")
|
|
return
|
|
}
|
|
|
|
hostID := ulid.Make().String()
|
|
|
|
// Atomically: validate + consume token, then create the host.
|
|
// We do these in two statements; if create-host fails, the token
|
|
// is already burned. That's acceptable — operator just regens.
|
|
tokHash := auth.HashToken(req.Token)
|
|
|
|
// Pull every operator-supplied attachment off the token row in one
|
|
// query: encrypted repo creds (rebound under the new host_id) plus
|
|
// the default-paths list. Both transferred onto the new host row
|
|
// after consume.
|
|
attachments, encForHost, err := s.rebindTokenAttachments(r.Context(), tokHash, hostID)
|
|
if err != nil {
|
|
slog.Warn("enrollment: rebind token attachments failed", "err", err)
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
|
|
"token unknown, expired, or already used")
|
|
return
|
|
}
|
|
|
|
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
|
|
slog.Warn("enrollment: consume token failed", "err", err)
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
|
|
"token unknown, expired, or already used")
|
|
return
|
|
}
|
|
|
|
// Mint the persistent agent bearer.
|
|
agentToken, err := auth.NewToken()
|
|
if err != nil {
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
|
return
|
|
}
|
|
|
|
host := store.Host{
|
|
ID: hostID,
|
|
Name: strings.TrimSpace(req.HostName),
|
|
OS: string(req.OS),
|
|
Arch: string(req.Arch),
|
|
AgentVersion: req.AgentVersion,
|
|
ResticVersion: req.ResticVersion,
|
|
EnrolledAt: time.Now().UTC(),
|
|
}
|
|
if err := s.deps.Store.CreateHost(r.Context(), host,
|
|
auth.HashToken(agentToken), ""); err != nil {
|
|
writeJSONError(w, stdhttp.StatusConflict, "host_exists", err.Error())
|
|
return
|
|
}
|
|
|
|
// Seed an initial manual schedule from whatever paths the
|
|
// operator typed into Add-host. The schedule is editable from
|
|
// the host's Schedules tab; the operator can add automated
|
|
// schedules alongside it later. We skip this when no paths
|
|
// were supplied — the host can still enrol; it just can't
|
|
// back up until the operator adds a schedule.
|
|
if len(attachments.InitialPaths) > 0 {
|
|
seed := store.Schedule{
|
|
ID: ulid.Make().String(),
|
|
HostID: hostID,
|
|
Kind: string(api.JobBackup),
|
|
CronExpr: "",
|
|
Paths: attachments.InitialPaths,
|
|
Enabled: true,
|
|
Manual: true,
|
|
}
|
|
if err := s.deps.Store.CreateSchedule(r.Context(), &seed); err != nil {
|
|
slog.Warn("enrollment: seed manual schedule failed",
|
|
"host_id", hostID, "err", err)
|
|
}
|
|
}
|
|
|
|
// Promote the encrypted repo creds onto the freshly-created host
|
|
// row. If this fails for any reason we log loudly but still
|
|
// return the bearer — the operator recovers via PUT
|
|
// /api/hosts/{id}/repo-credentials. Failing the whole enrolment
|
|
// here would leave a half-burned token + an orphan host.
|
|
if encForHost != "" {
|
|
if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, encForHost); err != nil {
|
|
slog.Error("enrollment: set host credentials failed",
|
|
"host_id", hostID, "err", err)
|
|
}
|
|
}
|
|
|
|
auditPayload, _ := json.Marshal(struct {
|
|
Hostname string `json:"hostname"`
|
|
OS string `json:"os"`
|
|
Arch string `json:"arch"`
|
|
HasRepoCreds bool `json:"has_repo_creds"`
|
|
}{
|
|
Hostname: host.Name,
|
|
OS: host.OS,
|
|
Arch: host.Arch,
|
|
HasRepoCreds: encForHost != "",
|
|
})
|
|
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
|
ID: ulid.Make().String(),
|
|
Actor: "system",
|
|
Action: "host.enrolled",
|
|
TargetKind: ptr("host"),
|
|
TargetID: &hostID,
|
|
TS: host.EnrolledAt,
|
|
Payload: auditPayload,
|
|
})
|
|
|
|
writeJSON(w, stdhttp.StatusCreated, enrollResponse{
|
|
HostID: hostID,
|
|
AgentToken: agentToken,
|
|
// CertPinSHA256: the server is HTTP-only and sits behind a
|
|
// reverse proxy that owns the cert. The operator pastes the
|
|
// proxy's cert hash into the install command (`-cert-pin`)
|
|
// when they want pinning; the server cannot introspect a
|
|
// cert it doesn't terminate.
|
|
})
|
|
}
|
|
|
|
// handleCreateEnrollmentToken (operator-facing) — generates a
|
|
// short-lived token for a new host. Authenticated; admin/operator only.
|
|
//
|
|
// TODO: gate by authn middleware once login session lookup lands.
|
|
// For Phase 1's first slice, we accept the bootstrap-shipped admin
|
|
// session cookie and trust it, validating the cookie via store.
|
|
func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
if !s.authedUser(r) {
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
|
return
|
|
}
|
|
|
|
var req enrollOperatorRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
|
return
|
|
}
|
|
token, expiresAt, err := s.mintEnrollmentToken(r.Context(), req.RepoURL, req.RepoUsername, req.RepoPassword, req.InitialPaths)
|
|
switch err {
|
|
case nil:
|
|
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{Token: token, ExpiresAt: expiresAt})
|
|
case errMissingRepoCreds:
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
|
|
"repo_url and repo_password are required so the agent can run backups on first connect")
|
|
default:
|
|
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
|
|
}
|
|
}
|
|
|
|
// errMissingRepoCreds is returned by mintEnrollmentToken when the
|
|
// operator hasn't supplied the URL+password pair the agent needs.
|
|
// Sentinel error so HTML and JSON handlers can map it to their own
|
|
// surface (form re-render with banner / 400 with code).
|
|
var errMissingRepoCreds = errAuth("missing_repo_creds")
|
|
|
|
// mintEnrollmentToken creates a fresh one-time enrollment token and
|
|
// stashes the AEAD-encrypted repo creds on its row. Returns the raw
|
|
// token (shown to the operator exactly once) and the expiry time.
|
|
//
|
|
// Shared by the JSON endpoint and the HTML "Add host" flow.
|
|
func (s *Server) mintEnrollmentToken(ctx context.Context, repoURL, repoUsername, repoPassword string, initialPaths []string) (string, time.Time, error) {
|
|
if repoURL == "" || repoPassword == "" {
|
|
return "", time.Time{}, errMissingRepoCreds
|
|
}
|
|
token, err := auth.NewToken()
|
|
if err != nil {
|
|
return "", time.Time{}, err
|
|
}
|
|
tokHash := auth.HashToken(token)
|
|
|
|
enc, err := s.encryptRepoCreds(repoCredsBlob{
|
|
RepoURL: repoURL, RepoUsername: repoUsername, RepoPassword: repoPassword,
|
|
}, []byte("token:"+tokHash))
|
|
if err != nil {
|
|
return "", time.Time{}, err
|
|
}
|
|
|
|
if initialPaths == nil {
|
|
initialPaths = []string{}
|
|
}
|
|
pathsJSON, err := json.Marshal(initialPaths)
|
|
if err != nil {
|
|
return "", time.Time{}, fmt.Errorf("marshal initial_paths: %w", err)
|
|
}
|
|
|
|
const ttl = time.Hour
|
|
if err := s.deps.Store.CreateEnrollmentToken(ctx, tokHash, ttl, enc, string(pathsJSON)); err != nil {
|
|
return "", time.Time{}, err
|
|
}
|
|
return token, time.Now().Add(ttl).UTC(), nil
|
|
}
|
|
|
|
// rebindTokenAttachments fetches every operator-supplied attachment
|
|
// off the token row, re-encrypting the repo-creds blob under the
|
|
// new host_id (the additional-data binding moves with the cred so
|
|
// a token-row dump can't be replayed against a different host's
|
|
// row). Returns the attachments (sans the rebind work), the
|
|
// re-encrypted ciphertext for SetHostCredentials, and any error.
|
|
func (s *Server) rebindTokenAttachments(ctx context.Context, tokHash, hostID string) (store.EnrollmentTokenAttachments, string, error) {
|
|
att, err := s.deps.Store.GetEnrollmentTokenAttachments(ctx, tokHash)
|
|
if err != nil {
|
|
return store.EnrollmentTokenAttachments{}, "", err
|
|
}
|
|
if att.EncRepoCreds == "" {
|
|
return att, "", nil
|
|
}
|
|
plain, err := s.deps.AEAD.Decrypt(att.EncRepoCreds, []byte("token:"+tokHash))
|
|
if err != nil {
|
|
return att, "", fmt.Errorf("decrypt token creds: %w", err)
|
|
}
|
|
out, err := s.deps.AEAD.Encrypt(plain, []byte("host:"+hostID))
|
|
if err != nil {
|
|
return att, "", fmt.Errorf("re-encrypt for host: %w", err)
|
|
}
|
|
return att, out, nil
|
|
}
|
|
|
|
// encryptRepoCreds JSON-encodes blob and seals it with the given
|
|
// additional-data context.
|
|
func (s *Server) encryptRepoCreds(blob repoCredsBlob, ad []byte) (string, error) {
|
|
body, err := json.Marshal(blob)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal repo creds: %w", err)
|
|
}
|
|
return s.deps.AEAD.Encrypt(body, ad)
|
|
}
|
|
|
|
// authedUser returns true iff the request carries a valid session
|
|
// cookie. Minimal stub for now; full RBAC middleware lands with
|
|
// P4-03.
|
|
func (s *Server) authedUser(r *stdhttp.Request) bool {
|
|
c, err := r.Cookie(sessionCookieName)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
_, err = s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
|
|
return err == nil
|
|
}
|
|
|
|
func ptr(s string) *string { return &s }
|