P2-04.5: kill host.default_paths in favour of manual schedules
Two independent path lists for "what does this host back up?" was
a real divergence footgun — operator types one set at Add-host time
and a different set into a schedule, both end up in the same repo,
the snapshot history looks fine until restore. Resolution: drop
host.default_paths entirely; add a `manual` flag on schedules.
A manual schedule has paths/excludes/tags/retention like any other
but no cron — it fires only via per-schedule Run-now. Single source
of truth for what gets backed up.
Schema (migration 0007):
* schedules.manual INTEGER NOT NULL DEFAULT 0.
* For every host with non-empty default_paths, seed a manual
schedule with those paths and bump host_schedule_version.
* ALTER TABLE hosts DROP COLUMN default_paths.
* ALTER TABLE enrollment_tokens RENAME COLUMN default_paths
TO initial_paths.
Original draft of this migration rebuilt hosts via the
create-new + drop-old + rename-new pattern. With foreign_keys=ON
(set in the connection DSN), DROP TABLE on the parent fired
ON DELETE CASCADE on every child of hosts(id) — schedules /
jobs / snapshots / host_credentials all wiped on the smoke env
when I tried it. SQLite 3.35+ supports column-level ALTERs
directly, so we skip the rebuild dance and avoid the cascade
trap. Six lines of SQL instead of sixty, no FK risk.
Run-now rewiring:
* New `dispatchScheduleNow(hostID, scheduleID, conn?)` helper
unifies the agent-driven path (cron fire → schedule.fire →
OnScheduleFire callback) and the UI-driven path (operator
clicks Run-now on a schedule row). Conn arg is optional; nil
falls back to Hub.Send.
* New POST /hosts/{id}/schedules/{sid}/run endpoint — per-row
Run-now button on the schedules list.
* Dashboard's per-host Run-now (handleUIRunBackup) now picks the
host's only enabled manual schedule, falls back to the only
enabled schedule, else returns "pick one in Schedules tab".
Keeps one-click for the common case.
Agent:
* Scheduler skips manual schedules in cron build (silent — they're
a normal data shape, not an error).
* Wire Schedule struct gains Manual flag.
* Schedule.fire flow unchanged — the agent only ever fires
non-manual schedules anyway.
UI:
* Add-host form retitled "Initial schedule · manual" so the
operator knows the paths become an editable schedule under
the Schedules tab. Result page calls out the manual schedule
+ points at Host > Schedules.
* Schedule edit form: "Manual schedule" checkbox at the top of
the When section; toggling it hides/shows the cron field via
inline JS. Server-side validator skips the cron requirement
when manual=true.
* Schedule list shows a "manual" tag under the status pill and
renders the When column as "— run-now only —" for manual rows.
Each row gets a Run-now button when the schedule is enabled
and the host is online.
Tests + go test ./... green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@ package http
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
@@ -46,6 +47,7 @@ func (s *Server) loadScheduleSetPayload(ctx context.Context, hostID string) (api
|
||||
PreHook: r.PreHook,
|
||||
PostHook: r.PostHook,
|
||||
Enabled: r.Enabled,
|
||||
Manual: r.Manual,
|
||||
})
|
||||
}
|
||||
return out, nil
|
||||
@@ -144,37 +146,42 @@ func (s *Server) applyScheduleAck(ctx context.Context, hostID string, version in
|
||||
}
|
||||
|
||||
// dispatchScheduledJob is invoked when the agent reports a local
|
||||
// cron fire via `schedule.fire`. We look up the schedule, build the
|
||||
// CommandRunPayload from it, persist a job row (actor=schedule,
|
||||
// linked back to scheduled_id), and write MsgCommandRun straight
|
||||
// back on the same conn so the agent runs the job through its
|
||||
// normal command dispatch path.
|
||||
//
|
||||
// On any error we log and bail — the agent's cron will fire again
|
||||
// at the next tick. We deliberately don't try to retry: schedules
|
||||
// are by definition repeating, and a missed tick is less bad than
|
||||
// a confused operator-visible "phantom job" that never actually
|
||||
// ran restic.
|
||||
func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *ws.Conn, scheduleID string, scheduledAt time.Time) {
|
||||
sched, err := s.deps.Store.GetSchedule(ctx, hostID, scheduleID)
|
||||
// cron fire via `schedule.fire`. Thin wrapper around the shared
|
||||
// dispatcher; logs and discards the return values since the agent
|
||||
// can't usefully act on them.
|
||||
func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, _ *ws.Conn, scheduleID string, scheduledAt time.Time) {
|
||||
jobID, err := s.dispatchScheduleNow(ctx, hostID, scheduleID, nil)
|
||||
if err != nil {
|
||||
slog.Warn("schedule.fire: schedule not found",
|
||||
slog.Warn("schedule.fire: dispatch failed",
|
||||
"host_id", hostID, "schedule_id", scheduleID, "err", err)
|
||||
return
|
||||
}
|
||||
slog.Info("schedule.fire: dispatched",
|
||||
"host_id", hostID, "schedule_id", scheduleID,
|
||||
"job_id", jobID, "scheduled_at", scheduledAt)
|
||||
}
|
||||
|
||||
// dispatchScheduleNow looks up a schedule, builds a CommandRunPayload,
|
||||
// persists a jobs row (actor_kind=schedule, scheduled_id linking
|
||||
// back), and ships MsgCommandRun to the host. Used by both the
|
||||
// agent-driven path (cron fire reaches us as schedule.fire) and the
|
||||
// UI-driven path (operator clicks Run-now on a schedule row).
|
||||
//
|
||||
// conn is optional: when set we write directly through it (no race
|
||||
// against an in-flight Register). When nil we fall back to Hub.Send.
|
||||
// Returns the new job_id on success.
|
||||
func (s *Server) dispatchScheduleNow(ctx context.Context, hostID, scheduleID string, conn *ws.Conn) (string, error) {
|
||||
sched, err := s.deps.Store.GetSchedule(ctx, hostID, scheduleID)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
return "", errFmtf("schedule not found")
|
||||
}
|
||||
return "", errFmtf("internal: %s", err)
|
||||
}
|
||||
if !sched.Enabled {
|
||||
// The agent shouldn't be firing disabled schedules — its
|
||||
// local cron is rebuilt from the canonical version after
|
||||
// every push — but treat as belt-and-braces.
|
||||
slog.Info("schedule.fire: ignoring disabled schedule",
|
||||
"host_id", hostID, "schedule_id", scheduleID)
|
||||
return
|
||||
return "", errFmtf("schedule is disabled")
|
||||
}
|
||||
|
||||
// Args differ by kind. For backup we ship the schedule's paths;
|
||||
// other kinds are still arg-less in Phase 2 (forget/prune/check
|
||||
// take their parameters from RetentionPolicy / Options at exec
|
||||
// time on the agent — handled when those job kinds land).
|
||||
var args []string
|
||||
if sched.Kind == string(api.JobBackup) {
|
||||
args = append(args, sched.Paths...)
|
||||
@@ -191,9 +198,7 @@ func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *
|
||||
ActorID: &sched.ID,
|
||||
CreatedAt: now,
|
||||
}); err != nil {
|
||||
slog.Warn("schedule.fire: create job",
|
||||
"host_id", hostID, "schedule_id", scheduleID, "err", err)
|
||||
return
|
||||
return "", errFmtf("create job: %s", err)
|
||||
}
|
||||
|
||||
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
|
||||
@@ -202,16 +207,18 @@ func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *
|
||||
Args: args,
|
||||
})
|
||||
if err != nil {
|
||||
slog.Error("schedule.fire: marshal command.run",
|
||||
"host_id", hostID, "schedule_id", scheduleID, "err", err)
|
||||
return
|
||||
return "", errFmtf("marshal command.run: %s", err)
|
||||
}
|
||||
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
if err := conn.Send(sendCtx, env); err != nil {
|
||||
slog.Warn("schedule.fire: send command.run",
|
||||
"host_id", hostID, "job_id", jobID, "err", err)
|
||||
return
|
||||
if conn != nil {
|
||||
if err := conn.Send(sendCtx, env); err != nil {
|
||||
return "", errFmtf("send command.run: %s", err)
|
||||
}
|
||||
} else {
|
||||
if err := s.deps.Hub.Send(sendCtx, hostID, env); err != nil {
|
||||
return "", errFmtf("send command.run: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
|
||||
@@ -222,9 +229,7 @@ func (s *Server) dispatchScheduledJob(ctx context.Context, hostID string, conn *
|
||||
TargetID: &jobID,
|
||||
TS: now,
|
||||
})
|
||||
slog.Info("schedule.fire: dispatched",
|
||||
"host_id", hostID, "schedule_id", scheduleID,
|
||||
"job_id", jobID, "kind", sched.Kind, "scheduled_at", scheduledAt)
|
||||
return jobID, nil
|
||||
}
|
||||
|
||||
// Compile-time guard that the store actually implements the methods
|
||||
|
||||
Reference in New Issue
Block a user