// Package maintenance owns the server-side scheduler that fires // forget/prune/check on the cadences operators set on // host_repo_maintenance rows. Independent of the agent's local cron // (which now only handles backup schedules). // // The ticker is intentionally side-effect-free at the package // boundary: it asks an injected Backend for current state and emits // a list of Decisions for the caller to act on. Easy to unit-test // without a running server. package maintenance import ( "context" "errors" "time" "github.com/robfig/cron/v3" "gitea.dcglab.co.uk/steve/restic-manager/internal/store" ) // Decision is one cadence-driven dispatch the ticker recommends. // SubsetPct is populated only when Kind == "check"; ignored for // "forget" and "prune". type Decision struct { HostID string Kind string // "forget" | "prune" | "check" SubsetPct int } // Backend is the subset of *store.Store the ticker depends on. // Constrained interface so tests can pass a fake. type Backend interface { ListAllMaintenance(ctx context.Context) ([]store.HostRepoMaintenance, error) LatestJobByKind(ctx context.Context, hostID, kind string) (*store.Job, error) } // Ticker decides which cadence-driven jobs are due to fire at a // given instant. Stateless — the only state lives in the Backend. type Ticker struct { backend Backend parser cron.Parser } // New builds a Ticker bound to the given Backend. func New(b Backend) *Ticker { return &Ticker{ backend: b, parser: cron.NewParser(cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow), } } // Decide returns the set of jobs the ticker would dispatch at `now`. // The caller is responsible for: checking host online state, // persisting the job row, and shipping command.run. Returns nil // (not an error) when the maintenance table is empty — a fresh // install is the most common case. func (t *Ticker) Decide(ctx context.Context, now time.Time) ([]Decision, error) { rows, err := t.backend.ListAllMaintenance(ctx) if err != nil { return nil, err } var out []Decision for _, m := range rows { if d, ok := t.dueFor(ctx, now, m.HostID, "forget", m.ForgetCron, m.ForgetEnabled, 0); ok { out = append(out, d) } if d, ok := t.dueFor(ctx, now, m.HostID, "prune", m.PruneCron, m.PruneEnabled, 0); ok { out = append(out, d) } if d, ok := t.dueFor(ctx, now, m.HostID, "check", m.CheckCron, m.CheckEnabled, m.CheckSubsetPct); ok { out = append(out, d) } } return out, nil } // dueFor returns true if the cron has a fire-instant strictly after // the latest persisted job's created_at and at-or-before now. // // Anchor selection: // - When LatestJobByKind returns a job: anchor = j.CreatedAt. // - When LatestJobByKind returns ErrNotFound: anchor = now - 24h // (first-run case — cap the lookback so a brand-new host doesn't // fire 30 days of missed monthly-checks on first tick). // - When LatestJobByKind returns a hard error: skip this kind for // this host on this tick. // // Disabled (`enabled == false`) or empty cron skips silently. // Cron parse failures skip silently — the schedule/maintenance // routes already validate cron at write time, so this is defensive. func (t *Ticker) dueFor(ctx context.Context, now time.Time, hostID, kind, expr string, enabled bool, subset int) (Decision, bool) { if !enabled || expr == "" { return Decision{}, false } sched, err := t.parser.Parse(expr) if err != nil { return Decision{}, false } j, err := t.backend.LatestJobByKind(ctx, hostID, kind) var anchor time.Time switch { case err == nil && j != nil: anchor = j.CreatedAt case errors.Is(err, store.ErrNotFound): anchor = now.Add(-24 * time.Hour) default: // Hard error — skip this kind on this tick. return Decision{}, false } next := sched.Next(anchor) if next.IsZero() || next.After(now) { return Decision{}, false } return Decision{HostID: hostID, Kind: kind, SubsetPct: subset}, true }