e283d70c27
Wires a 60s server-side ticker to the pure-logic maintenance.Decide
introduced in the previous commit. Decisions flow through a new
DispatchMaintenance method on *Server, which:
- skips offline hosts (no pending_runs queueing — maintenance is
not a backup, missed fires shouldn't pile up)
- silently skips prune when admin creds aren't bound
- pushes admin creds before prune, then dispatches with
RequiresAdminCreds=true (same as operator-driven prune)
- persists job rows with actor_kind="system"
Reshapes the forget wire payload from a single RetentionPolicy to a
ForgetGroups list (one tag + per-group keep-* per source group). The
agent walks the groups and runs `restic forget --tag <name> --keep-*`
once per group. Dead-code removed: CommandRunPayload.RetentionPolicy,
the old forget JSON-decode in cmd/agent, and the single-policy form of
restic.RunForget.
133 lines
4.9 KiB
Go
133 lines
4.9 KiB
Go
// maintenance_dispatch.go bridges the pure-logic maintenance.Ticker
|
|
// (internal/server/maintenance) to the side-effecting world: checks
|
|
// online state, builds the per-kind command.run payload, and calls
|
|
// dispatchJobWithPayload — the same path operator-triggered Run-now
|
|
// uses. Cadence-driven jobs are persisted with actor_kind="system"
|
|
// (dispatchJobWithPayload tags it that way when user==nil).
|
|
//
|
|
// Maintenance fires deliberately do NOT queue to pending_runs when
|
|
// the host is offline — five missed prunes on a laptop returning
|
|
// from a week away is not what the operator wants. Skip + log; the
|
|
// next 60s tick will re-evaluate.
|
|
package http
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"log/slog"
|
|
"strconv"
|
|
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/maintenance"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
|
)
|
|
|
|
// DispatchMaintenance acts on each Decision from the ticker. Offline
|
|
// hosts are skipped (logged); prune dispatches without admin creds
|
|
// are skipped silently (logged) — the operator hasn't completed the
|
|
// admin-creds setup yet, and re-trying every minute would just spam
|
|
// the logs. (Operator-triggered prune via the run-now endpoint
|
|
// returns a clear error instead — different path, different UX.)
|
|
func (s *Server) DispatchMaintenance(ctx context.Context, decisions []maintenance.Decision) {
|
|
for _, d := range decisions {
|
|
if !s.deps.Hub.Connected(d.HostID) {
|
|
slog.Info("maintenance: host offline, skipping",
|
|
"host_id", d.HostID, "kind", d.Kind)
|
|
continue
|
|
}
|
|
switch d.Kind {
|
|
case "forget":
|
|
payload, ok := s.buildForgetPayloadForHost(ctx, d.HostID)
|
|
if !ok {
|
|
slog.Info("maintenance: forget skipped — no source groups with retention",
|
|
"host_id", d.HostID)
|
|
continue
|
|
}
|
|
_, _, code, msg := s.dispatchJobWithPayload(ctx, nil, d.HostID, api.JobForget, payload)
|
|
if code != "" {
|
|
slog.Warn("maintenance: forget dispatch failed",
|
|
"host_id", d.HostID, "code", code, "msg", msg)
|
|
}
|
|
case "prune":
|
|
if _, err := s.deps.Store.GetHostCredentials(ctx, d.HostID, store.CredKindAdmin); err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
slog.Info("maintenance: prune skipped — no admin creds",
|
|
"host_id", d.HostID)
|
|
continue
|
|
}
|
|
slog.Warn("maintenance: prune skipped — admin creds error",
|
|
"host_id", d.HostID, "err", err)
|
|
continue
|
|
}
|
|
if err := s.pushAdminCredsToAgent(ctx, d.HostID); err != nil {
|
|
slog.Warn("maintenance: prune push admin creds failed",
|
|
"host_id", d.HostID, "err", err)
|
|
continue
|
|
}
|
|
payload := api.CommandRunPayload{RequiresAdminCreds: true}
|
|
_, _, code, msg := s.dispatchJobWithPayload(ctx, nil, d.HostID, api.JobPrune, payload)
|
|
if code != "" {
|
|
slog.Warn("maintenance: prune dispatch failed",
|
|
"host_id", d.HostID, "code", code, "msg", msg)
|
|
}
|
|
case "check":
|
|
payload := api.CommandRunPayload{Args: []string{strconv.Itoa(d.SubsetPct)}}
|
|
_, _, code, msg := s.dispatchJobWithPayload(ctx, nil, d.HostID, api.JobCheck, payload)
|
|
if code != "" {
|
|
slog.Warn("maintenance: check dispatch failed",
|
|
"host_id", d.HostID, "code", code, "msg", msg)
|
|
}
|
|
default:
|
|
slog.Warn("maintenance: unknown decision kind",
|
|
"host_id", d.HostID, "kind", d.Kind)
|
|
}
|
|
}
|
|
}
|
|
|
|
// buildForgetPayloadForHost collects every source group on the host
|
|
// that has a non-empty retention policy and builds a CommandRunPayload
|
|
// with ForgetGroups populated. Returns ok=false if the host has no
|
|
// such groups (the dispatcher then skips this kind).
|
|
func (s *Server) buildForgetPayloadForHost(ctx context.Context, hostID string) (api.CommandRunPayload, bool) {
|
|
groups, err := s.deps.Store.ListSourceGroupsByHost(ctx, hostID)
|
|
if err != nil {
|
|
slog.Warn("maintenance: list source groups failed", "host_id", hostID, "err", err)
|
|
return api.CommandRunPayload{}, false
|
|
}
|
|
fg := make([]api.ForgetGroup, 0, len(groups))
|
|
for _, g := range groups {
|
|
if isEmptyRetention(g.RetentionPolicy) {
|
|
continue
|
|
}
|
|
fg = append(fg, api.ForgetGroup{
|
|
Tag: g.Name,
|
|
Policy: forgetPolicyJSONFromStore(g.RetentionPolicy),
|
|
})
|
|
}
|
|
if len(fg) == 0 {
|
|
return api.CommandRunPayload{}, false
|
|
}
|
|
return api.CommandRunPayload{ForgetGroups: fg}, true
|
|
}
|
|
|
|
func isEmptyRetention(p store.RetentionPolicy) bool {
|
|
return p.KeepLast == nil && p.KeepHourly == nil &&
|
|
p.KeepDaily == nil && p.KeepWeekly == nil &&
|
|
p.KeepMonthly == nil && p.KeepYearly == nil
|
|
}
|
|
|
|
// forgetPolicyJSONFromStore copies retention pointers from the store
|
|
// view to the wire view. Both shapes are field-for-field identical;
|
|
// this avoids importing store from internal/api (which would invert
|
|
// the dependency direction).
|
|
func forgetPolicyJSONFromStore(p store.RetentionPolicy) api.ForgetPolicyJSON {
|
|
return api.ForgetPolicyJSON{
|
|
KeepLast: p.KeepLast,
|
|
KeepHourly: p.KeepHourly,
|
|
KeepDaily: p.KeepDaily,
|
|
KeepWeekly: p.KeepWeekly,
|
|
KeepMonthly: p.KeepMonthly,
|
|
KeepYearly: p.KeepYearly,
|
|
}
|
|
}
|