P2-04.5: kill host.default_paths in favour of manual schedules

Two independent path lists for "what does this host back up?" was
a real divergence footgun — operator types one set at Add-host time
and a different set into a schedule, both end up in the same repo,
the snapshot history looks fine until restore. Resolution: drop
host.default_paths entirely; add a `manual` flag on schedules.
A manual schedule has paths/excludes/tags/retention like any other
but no cron — it fires only via per-schedule Run-now. Single source
of truth for what gets backed up.

Schema (migration 0007):
* schedules.manual INTEGER NOT NULL DEFAULT 0.
* For every host with non-empty default_paths, seed a manual
  schedule with those paths and bump host_schedule_version.
* ALTER TABLE hosts DROP COLUMN default_paths.
* ALTER TABLE enrollment_tokens RENAME COLUMN default_paths
  TO initial_paths.

Original draft of this migration rebuilt hosts via the
create-new + drop-old + rename-new pattern. With foreign_keys=ON
(set in the connection DSN), DROP TABLE on the parent fired
ON DELETE CASCADE on every child of hosts(id) — schedules /
jobs / snapshots / host_credentials all wiped on the smoke env
when I tried it. SQLite 3.35+ supports column-level ALTERs
directly, so we skip the rebuild dance and avoid the cascade
trap. Six lines of SQL instead of sixty, no FK risk.

Run-now rewiring:
* New `dispatchScheduleNow(hostID, scheduleID, conn?)` helper
  unifies the agent-driven path (cron fire → schedule.fire →
  OnScheduleFire callback) and the UI-driven path (operator
  clicks Run-now on a schedule row). Conn arg is optional; nil
  falls back to Hub.Send.
* New POST /hosts/{id}/schedules/{sid}/run endpoint — per-row
  Run-now button on the schedules list.
* Dashboard's per-host Run-now (handleUIRunBackup) now picks the
  host's only enabled manual schedule, falls back to the only
  enabled schedule, else returns "pick one in Schedules tab".
  Keeps one-click for the common case.

Agent:
* Scheduler skips manual schedules in cron build (silent — they're
  a normal data shape, not an error).
* Wire Schedule struct gains Manual flag.
* Schedule.fire flow unchanged — the agent only ever fires
  non-manual schedules anyway.

UI:
* Add-host form retitled "Initial schedule · manual" so the
  operator knows the paths become an editable schedule under
  the Schedules tab. Result page calls out the manual schedule
  + points at Host > Schedules.
* Schedule edit form: "Manual schedule" checkbox at the top of
  the When section; toggling it hides/shows the cron field via
  inline JS. Server-side validator skips the cron requirement
  when manual=true.
* Schedule list shows a "manual" tag under the status pill and
  renders the When column as "— run-now only —" for manual rows.
  Each row gets a Run-now button when the schedule is enabled
  and the host is online.

Tests + go test ./... green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-02 12:26:06 +01:00
parent c6237d4004
commit 8fb1c100fd
18 changed files with 327 additions and 132 deletions
+33 -13
View File
@@ -56,12 +56,11 @@ type enrollOperatorRequest struct {
RepoURL string `json:"repo_url"`
RepoUsername string `json:"repo_username"`
RepoPassword string `json:"repo_password"`
// DefaultPaths lands on the host row at consume time. Used by
// run-now buttons (the dashboard's per-row Run, the host
// detail's Run backup now). When schedules ship in P2-01 they
// supersede this — until then, this is the only source of paths
// for run-now jobs.
DefaultPaths []string `json:"default_paths,omitempty"`
// InitialPaths seeds the host's initial manual schedule on
// consume — operator can edit/extend from the host's Schedules
// tab afterwards. Empty list = no initial schedule (operator
// must add one before backups can run).
InitialPaths []string `json:"initial_paths,omitempty"`
}
type enrollOperatorResponse struct {
@@ -134,7 +133,6 @@ func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request)
AgentVersion: req.AgentVersion,
ResticVersion: req.ResticVersion,
EnrolledAt: time.Now().UTC(),
DefaultPaths: attachments.DefaultPaths,
}
if err := s.deps.Store.CreateHost(r.Context(), host,
auth.HashToken(agentToken), ""); err != nil {
@@ -142,6 +140,28 @@ func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request)
return
}
// Seed an initial manual schedule from whatever paths the
// operator typed into Add-host. The schedule is editable from
// the host's Schedules tab; the operator can add automated
// schedules alongside it later. We skip this when no paths
// were supplied — the host can still enrol; it just can't
// back up until the operator adds a schedule.
if len(attachments.InitialPaths) > 0 {
seed := store.Schedule{
ID: ulid.Make().String(),
HostID: hostID,
Kind: string(api.JobBackup),
CronExpr: "",
Paths: attachments.InitialPaths,
Enabled: true,
Manual: true,
}
if err := s.deps.Store.CreateSchedule(r.Context(), &seed); err != nil {
slog.Warn("enrollment: seed manual schedule failed",
"host_id", hostID, "err", err)
}
}
// Promote the encrypted repo creds onto the freshly-created host
// row. If this fails for any reason we log loudly but still
// return the bearer — the operator recovers via PUT
@@ -203,7 +223,7 @@ func (s *Server) handleCreateEnrollmentToken(w stdhttp.ResponseWriter, r *stdhtt
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
token, expiresAt, err := s.mintEnrollmentToken(r.Context(), req.RepoURL, req.RepoUsername, req.RepoPassword, req.DefaultPaths)
token, expiresAt, err := s.mintEnrollmentToken(r.Context(), req.RepoURL, req.RepoUsername, req.RepoPassword, req.InitialPaths)
switch err {
case nil:
writeJSON(w, stdhttp.StatusCreated, enrollOperatorResponse{Token: token, ExpiresAt: expiresAt})
@@ -226,7 +246,7 @@ var errMissingRepoCreds = errAuth("missing_repo_creds")
// token (shown to the operator exactly once) and the expiry time.
//
// Shared by the JSON endpoint and the HTML "Add host" flow.
func (s *Server) mintEnrollmentToken(ctx context.Context, repoURL, repoUsername, repoPassword string, defaultPaths []string) (string, time.Time, error) {
func (s *Server) mintEnrollmentToken(ctx context.Context, repoURL, repoUsername, repoPassword string, initialPaths []string) (string, time.Time, error) {
if repoURL == "" || repoPassword == "" {
return "", time.Time{}, errMissingRepoCreds
}
@@ -243,12 +263,12 @@ func (s *Server) mintEnrollmentToken(ctx context.Context, repoURL, repoUsername,
return "", time.Time{}, err
}
if defaultPaths == nil {
defaultPaths = []string{}
if initialPaths == nil {
initialPaths = []string{}
}
pathsJSON, err := json.Marshal(defaultPaths)
pathsJSON, err := json.Marshal(initialPaths)
if err != nil {
return "", time.Time{}, fmt.Errorf("marshal default_paths: %w", err)
return "", time.Time{}, fmt.Errorf("marshal initial_paths: %w", err)
}
const ttl = time.Hour
+42 -37
View File
@@ -3,6 +3,7 @@ package http
import (
"context"
"encoding/json"
"errors"
"log/slog"
"time"
@@ -46,6 +47,7 @@ func (s *Server) loadScheduleSetPayload(ctx context.Context, hostID string) (api
PreHook: r.PreHook,
PostHook: r.PostHook,
Enabled: r.Enabled,
Manual: r.Manual,
})
}
return out, nil
@@ -144,37 +146,42 @@ func (s *Server) applyScheduleAck(ctx context.Context, hostID string, version in
}
// dispatchScheduledJob is invoked when the agent reports a local
// cron fire via `schedule.fire`. We look up the schedule, build the
// CommandRunPayload from it, persist a job row (actor=schedule,
// linked back to scheduled_id), and write MsgCommandRun straight
// back on the same conn so the agent runs the job through its
// normal command dispatch path.
//
// On any error we log and bail — the agent's cron will fire again
// at the next tick. We deliberately don't try to retry: schedules
// are by definition repeating, and a missed tick is less bad than
// a confused operator-visible "phantom job" that never actually
// ran restic.
func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *ws.Conn, scheduleID string, scheduledAt time.Time) {
sched, err := s.deps.Store.GetSchedule(ctx, hostID, scheduleID)
// cron fire via `schedule.fire`. Thin wrapper around the shared
// dispatcher; logs and discards the return values since the agent
// can't usefully act on them.
func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, _ *ws.Conn, scheduleID string, scheduledAt time.Time) {
jobID, err := s.dispatchScheduleNow(ctx, hostID, scheduleID, nil)
if err != nil {
slog.Warn("schedule.fire: schedule not found",
slog.Warn("schedule.fire: dispatch failed",
"host_id", hostID, "schedule_id", scheduleID, "err", err)
return
}
slog.Info("schedule.fire: dispatched",
"host_id", hostID, "schedule_id", scheduleID,
"job_id", jobID, "scheduled_at", scheduledAt)
}
// dispatchScheduleNow looks up a schedule, builds a CommandRunPayload,
// persists a jobs row (actor_kind=schedule, scheduled_id linking
// back), and ships MsgCommandRun to the host. Used by both the
// agent-driven path (cron fire reaches us as schedule.fire) and the
// UI-driven path (operator clicks Run-now on a schedule row).
//
// conn is optional: when set we write directly through it (no race
// against an in-flight Register). When nil we fall back to Hub.Send.
// Returns the new job_id on success.
func (s *Server) dispatchScheduleNow(ctx context.Context, hostID, scheduleID string, conn *ws.Conn) (string, error) {
sched, err := s.deps.Store.GetSchedule(ctx, hostID, scheduleID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
return "", errFmtf("schedule not found")
}
return "", errFmtf("internal: %s", err)
}
if !sched.Enabled {
// The agent shouldn't be firing disabled schedules — its
// local cron is rebuilt from the canonical version after
// every push — but treat as belt-and-braces.
slog.Info("schedule.fire: ignoring disabled schedule",
"host_id", hostID, "schedule_id", scheduleID)
return
return "", errFmtf("schedule is disabled")
}
// Args differ by kind. For backup we ship the schedule's paths;
// other kinds are still arg-less in Phase 2 (forget/prune/check
// take their parameters from RetentionPolicy / Options at exec
// time on the agent — handled when those job kinds land).
var args []string
if sched.Kind == string(api.JobBackup) {
args = append(args, sched.Paths...)
@@ -191,9 +198,7 @@ func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *
ActorID: &sched.ID,
CreatedAt: now,
}); err != nil {
slog.Warn("schedule.fire: create job",
"host_id", hostID, "schedule_id", scheduleID, "err", err)
return
return "", errFmtf("create job: %s", err)
}
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
@@ -202,16 +207,18 @@ func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *
Args: args,
})
if err != nil {
slog.Error("schedule.fire: marshal command.run",
"host_id", hostID, "schedule_id", scheduleID, "err", err)
return
return "", errFmtf("marshal command.run: %s", err)
}
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if err := conn.Send(sendCtx, env); err != nil {
slog.Warn("schedule.fire: send command.run",
"host_id", hostID, "job_id", jobID, "err", err)
return
if conn != nil {
if err := conn.Send(sendCtx, env); err != nil {
return "", errFmtf("send command.run: %s", err)
}
} else {
if err := s.deps.Hub.Send(sendCtx, hostID, env); err != nil {
return "", errFmtf("send command.run: %s", err)
}
}
_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
@@ -222,9 +229,7 @@ func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *
TargetID: &jobID,
TS: now,
})
slog.Info("schedule.fire: dispatched",
"host_id", hostID, "schedule_id", scheduleID,
"job_id", jobID, "kind", sched.Kind, "scheduled_at", scheduledAt)
return jobID, nil
}
// Compile-time guard that the store actually implements the methods
+11 -5
View File
@@ -30,6 +30,9 @@ type scheduleAPI struct {
PreHook string `json:"pre_hook,omitempty"`
PostHook string `json:"post_hook,omitempty"`
Enabled bool `json:"enabled"`
// Manual = no cron, fires only when the operator triggers a
// run-now. Cron expr is ignored when this is true.
Manual bool `json:"manual"`
CreatedAt string `json:"created_at,omitempty"`
UpdatedAt string `json:"updated_at,omitempty"`
}
@@ -254,11 +257,13 @@ func validateSchedule(s *scheduleAPI) (code, msg string) {
default:
return "invalid_kind", "kind must be one of backup|forget|prune|check"
}
if strings.TrimSpace(s.CronExpr) == "" {
return "missing_cron_expr", "cron_expr is required"
}
if _, err := cronParser.Parse(s.CronExpr); err != nil {
return "invalid_cron_expr", err.Error()
if !s.Manual {
if strings.TrimSpace(s.CronExpr) == "" {
return "missing_cron_expr", "cron_expr is required (or set manual=true)"
}
if _, err := cronParser.Parse(s.CronExpr); err != nil {
return "invalid_cron_expr", err.Error()
}
}
if s.Kind == api.JobBackup && len(s.Paths) == 0 {
return "missing_paths", "backup schedules require at least one path"
@@ -283,6 +288,7 @@ func toScheduleAPI(s store.Schedule) scheduleAPI {
PreHook: s.PreHook,
PostHook: s.PostHook,
Enabled: s.Enabled,
Manual: s.Manual,
CreatedAt: s.CreatedAt.Format("2006-01-02T15:04:05.999999999Z07:00"),
UpdatedAt: s.UpdatedAt.Format("2006-01-02T15:04:05.999999999Z07:00"),
}
+1
View File
@@ -160,6 +160,7 @@ func (s *Server) routes(r chi.Router) {
r.Get("/hosts/{id}/schedules/{sid}/edit", s.handleUIScheduleEditGet)
r.Post("/hosts/{id}/schedules/{sid}/edit", s.handleUIScheduleSave)
r.Post("/hosts/{id}/schedules/{sid}/delete", s.handleUIScheduleDelete)
r.Post("/hosts/{id}/schedules/{sid}/run", s.handleUIScheduleRun)
// Live job log.
r.Get("/jobs/{id}", s.handleUIJobDetail)
}
+49 -11
View File
@@ -1,6 +1,7 @@
package http
import (
"context"
"crypto/rand"
"encoding/base64"
"errors"
@@ -170,23 +171,18 @@ func (s *Server) handleUIRunBackup(w stdhttp.ResponseWriter, r *stdhttp.Request)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
if len(host.DefaultPaths) == 0 {
// Tell the user with HX-Redirect via a friendly toast — for
// now, just an HTTP error: HTMX surfaces the response body
// to the operator's console, and a future toast component
// will lift it into the UI.
stdhttp.Error(w,
"this host has no default backup paths set — edit the host or wait for schedules (P2)",
stdhttp.StatusBadRequest)
return
}
if host.RepoInitialisedAt == nil {
stdhttp.Error(w,
"this host's repo hasn't been initialised yet — click Initialise repo first",
stdhttp.StatusBadRequest)
return
}
res, status, code, msg := s.dispatchJob(r.Context(), storeUser, hostID, api.JobBackup, host.DefaultPaths)
pick, err := s.pickRunNowSchedule(r.Context(), hostID)
if err != nil {
stdhttp.Error(w, err.Error(), stdhttp.StatusBadRequest)
return
}
res, status, code, msg := s.dispatchJob(r.Context(), storeUser, hostID, api.JobBackup, pick.Paths)
if code != "" {
stdhttp.Error(w, msg, status)
return
@@ -205,6 +201,48 @@ func (s *Server) handleUIRunBackup(w stdhttp.ResponseWriter, r *stdhttp.Request)
stdhttp.Redirect(w, r, target, stdhttp.StatusSeeOther)
}
// pickRunNowSchedule chooses which schedule a generic per-host
// "Run now" button should dispatch when the operator hasn't picked
// one explicitly. Picks in priority order: the host's only enabled
// manual schedule, then its only enabled schedule of any kind.
// Returns a friendly error if there's nothing to run, or if the
// operator needs to disambiguate.
func (s *Server) pickRunNowSchedule(ctx context.Context, hostID string) (*store.Schedule, error) {
rows, err := s.deps.Store.ListSchedulesByHost(ctx, hostID)
if err != nil {
return nil, errFmt("internal: %s", err)
}
enabled := make([]store.Schedule, 0, len(rows))
for _, r := range rows {
if r.Enabled {
enabled = append(enabled, r)
}
}
if len(enabled) == 0 {
return nil, errFmt("this host has no enabled schedules — add one in the Schedules tab")
}
manuals := []store.Schedule{}
for _, r := range enabled {
if r.Manual {
manuals = append(manuals, r)
}
}
switch {
case len(manuals) == 1:
s := manuals[0]
return &s, nil
case len(enabled) == 1:
s := enabled[0]
return &s, nil
default:
return nil, errFmt("this host has %d schedules — pick one from the Schedules tab", len(enabled))
}
}
func errFmt(format string, args ...any) error {
return errFmtf(format, args...)
}
// handleUIInitRepo dispatches a one-shot `restic init` job for a
// host. Surfaced in the run-now panel as a red "Initialise repo"
// button when host.repo_initialised_at IS NULL. On success it
+52 -6
View File
@@ -26,9 +26,9 @@ type schedulesListPage struct {
// scheduleEditPage drives both the Create form (Schedule.ID empty)
// and the Edit form (Schedule populated). Errors come back via Error
// to be rendered as a banner; FormValues holds the just-submitted
// raw fields so a failed POST can re-render with the operator's
// typed input still in place.
// to be rendered as a banner; the rest of the fields hold the just-
// submitted raw values so a failed POST can re-render with the
// operator's typed input still in place.
type scheduleEditPage struct {
Host store.Host
IsNew bool
@@ -49,6 +49,7 @@ type scheduleEditPage struct {
LimitUpKBps string
LimitDownKBps string
Enabled bool
Manual bool
}
// handleUISchedulesList renders the Schedules sub-tab on a host.
@@ -151,6 +152,7 @@ func (s *Server) handleUIScheduleEditGet(w stdhttp.ResponseWriter, r *stdhttp.Re
ExcludesRaw: strings.Join(sched.Excludes, "\n"),
TagsRaw: strings.Join(sched.Tags, ", "),
Enabled: sched.Enabled,
Manual: sched.Manual,
}
page.KeepLast = intStringPtr(sched.RetentionPolicy.KeepLast)
page.KeepHourly = intStringPtr(sched.RetentionPolicy.KeepHourly)
@@ -213,6 +215,7 @@ func (s *Server) handleUIScheduleSave(w stdhttp.ResponseWriter, r *stdhttp.Reque
LimitUpKBps: strings.TrimSpace(r.PostForm.Get("limit_up_kbps")),
LimitDownKBps: strings.TrimSpace(r.PostForm.Get("limit_down_kbps")),
Enabled: r.PostForm.Get("enabled") == "on",
Manual: r.PostForm.Get("manual") == "on",
}
// Convert the raw form values into store-shape data, surfacing
@@ -234,13 +237,14 @@ func (s *Server) handleUIScheduleSave(w stdhttp.ResponseWriter, r *stdhttp.Reque
return
}
// Validate against the same rules the JSON API uses (cron, paths,
// hooks-on-non-backup) — the UI only handles backup kind today,
// so we hardcode kind=backup here.
// Validate against the same rules the JSON API uses. Manual
// schedules skip the cron-expr requirement; everything else
// applies the same.
apiShape := scheduleAPI{
Kind: api.JobBackup,
CronExpr: page.CronExpr,
Paths: paths,
Manual: page.Manual,
}
if code, msg := validateSchedule(&apiShape); code != "" {
page.Error = uiErrorMessage(code, msg)
@@ -260,6 +264,7 @@ func (s *Server) handleUIScheduleSave(w stdhttp.ResponseWriter, r *stdhttp.Reque
RetentionPolicy: retention,
Options: options,
Enabled: page.Enabled,
Manual: page.Manual,
}
if err := s.deps.Store.CreateSchedule(r.Context(), &row); err != nil {
page.Error = "Couldn't save schedule — see server log."
@@ -294,6 +299,7 @@ func (s *Server) handleUIScheduleSave(w stdhttp.ResponseWriter, r *stdhttp.Reque
existing.RetentionPolicy = retention
existing.Options = options
existing.Enabled = page.Enabled
existing.Manual = page.Manual
if err := s.deps.Store.UpdateSchedule(r.Context(), existing); err != nil {
page.Error = "Couldn't save schedule — see server log."
slog.Error("ui schedule update", "err", err)
@@ -315,6 +321,46 @@ func (s *Server) handleUIScheduleSave(w stdhttp.ResponseWriter, r *stdhttp.Reque
stdhttp.Redirect(w, r, "/hosts/"+hostID+"/schedules", stdhttp.StatusSeeOther)
}
// handleUIScheduleRun is the POST target of per-schedule Run-now
// buttons. Reuses dispatchScheduledJob (the same code path used by
// the agent's local cron firing) so manual + automated runs flow
// through identical job lifecycle. Sets HX-Redirect to the live
// log on success.
func (s *Server) handleUIScheduleRun(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
hostID := chi.URLParam(r, "id")
scheduleID := chi.URLParam(r, "sid")
host, err := s.deps.Store.GetHost(r.Context(), hostID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
stdhttp.NotFound(w, r)
return
}
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
if !s.deps.Hub.Connected(hostID) {
stdhttp.Error(w, "agent is offline", stdhttp.StatusBadRequest)
return
}
_ = host
jobID, err := s.dispatchScheduleNow(r.Context(), hostID, scheduleID, nil)
if err != nil {
stdhttp.Error(w, err.Error(), stdhttp.StatusBadRequest)
return
}
target := "/jobs/" + jobID
if r.Header.Get("HX-Request") == "true" {
w.Header().Set("HX-Redirect", target)
w.WriteHeader(stdhttp.StatusOK)
return
}
stdhttp.Redirect(w, r, target, stdhttp.StatusSeeOther)
}
// handleUIScheduleDelete is the POST target of the Delete buttons on
// the list view. Confirm-then-redirect; no AJAX.
func (s *Server) handleUIScheduleDelete(w stdhttp.ResponseWriter, r *stdhttp.Request) {