P2R-01: REST + WS rewire against the slim shape
Schedules CRUD now takes {cron, enabled, source_group_ids[]} with cron
parsed via robfig/cron/v3 and group membership scoped to the host.
New source-groups CRUD lives at /api/hosts/{id}/source-groups; delete
refuses with 409 if any schedule still references the group, returning
the schedule list so the UI can prompt 'remove from these schedules
first.' Repo-maintenance GET/PUT manages forget/prune/check cadences
on host_repo_maintenance — no version bump, the server-side ticker
(P2R-06) drives execution.
Per-source-group Run-now (POST /hosts/{id}/source-groups/{gid}/run)
resolves the group's includes/excludes/retention/tag and dispatches a
backup command.run with the new structured CommandRunPayload fields
(Includes/Excludes/Tag). Old per-host /hosts/{id}/run-backup and
/hosts/{id}/init-repo return 410 Gone with a redirect message.
schedule_push.go is rebuilt: buildScheduleSetPayload assembles the
slim wire shape, pushScheduleSetOnConn ships it during the on-hello
window, pushScheduleSetAsync fires after every CRUD mutation, and
dispatchScheduledJob handles agent schedule.fire by iterating the
schedule's source groups and dispatching one backup per group with
actor_kind=schedule and scheduled_id pointing at the schedule.
Auto-init at first WS connect: when the host has repo creds bound and
no init job in its history, server dispatches restic init. Restic's
'config file already exists' soft-success means re-runs against an
existing repo no-op; we don't auto-retry on failure (operator triggers
re-init manually via the danger zone in P2R-09).
api.Schedule drops Kind/Paths/Excludes/Tags/RetentionPolicy/Manual etc.
in favour of {id, cron, enabled, source_groups: [...]}. The agent
scheduler stops checking sch.Manual; cmd/agent's backup dispatch reads
Includes/Excludes/Tag instead of Args.
Tests cover the new HTTP surface end-to-end: source-groups CRUD with
in-use refusal, schedule validation (bad cron / missing groups /
foreign group), repo-maintenance auto-seed and validation, the 410
route, and buildScheduleSetPayload's wire-shape correctness. Full
suite passes; smoke env exercises auto-init dispatch on hello,
async push after schedule create, and per-source-group Run-now
landing the right paths/excludes/tag at the agent.
This commit is contained in:
@@ -199,6 +199,67 @@ func (s *Server) onAgentHello(ctx context.Context, hostID string, conn *ws.Conn)
|
||||
// drop any cron entries left over from a previous deployment.
|
||||
// Always runs, even when the host has no repo credentials yet.
|
||||
s.pushScheduleSetOnConn(ctx, hostID, conn)
|
||||
// Auto-init the repo if we've never landed a successful init job
|
||||
// against this host. Restic treats "config file already exists"
|
||||
// as a soft success, so re-enrolment against a populated repo
|
||||
// just no-ops. Skipped silently when the host has no creds yet —
|
||||
// the next hello after the operator binds creds will dispatch.
|
||||
s.maybeAutoInit(ctx, hostID, conn)
|
||||
}
|
||||
|
||||
// maybeAutoInit dispatches a `restic init` job iff the host has no
|
||||
// successful init in its history AND repo creds are bound (without
|
||||
// them the runner can't talk to the repo). We rely on Restic's
|
||||
// idempotent init for re-runs.
|
||||
func (s *Server) maybeAutoInit(ctx context.Context, hostID string, conn *ws.Conn) {
|
||||
if _, err := s.deps.Store.GetHostCredentials(ctx, hostID); err != nil {
|
||||
// No creds bound yet — operator hasn't supplied them. The next
|
||||
// hello after creds land will pick this up.
|
||||
return
|
||||
}
|
||||
already, err := s.deps.Store.HasJobOfKind(ctx, hostID, string(api.JobInit))
|
||||
if err != nil {
|
||||
slog.Warn("auto-init: check job history", "host_id", hostID, "err", err)
|
||||
return
|
||||
}
|
||||
if already {
|
||||
return
|
||||
}
|
||||
jobID := ulid.Make().String()
|
||||
now := time.Now().UTC()
|
||||
if err := s.deps.Store.CreateJob(ctx, store.Job{
|
||||
ID: jobID,
|
||||
HostID: hostID,
|
||||
Kind: string(api.JobInit),
|
||||
ActorKind: "system",
|
||||
CreatedAt: now,
|
||||
}); err != nil {
|
||||
slog.Warn("auto-init: persist job", "host_id", hostID, "err", err)
|
||||
return
|
||||
}
|
||||
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
|
||||
JobID: jobID,
|
||||
Kind: api.JobInit,
|
||||
})
|
||||
if err != nil {
|
||||
slog.Warn("auto-init: marshal command.run", "host_id", hostID, "err", err)
|
||||
return
|
||||
}
|
||||
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
if err := conn.Send(sendCtx, env); err != nil {
|
||||
slog.Warn("auto-init: send command.run", "host_id", hostID, "err", err)
|
||||
return
|
||||
}
|
||||
_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
|
||||
ID: ulid.Make().String(),
|
||||
Actor: "system",
|
||||
Action: "host.auto_init",
|
||||
TargetKind: ptr("host"),
|
||||
TargetID: &hostID,
|
||||
TS: now,
|
||||
})
|
||||
slog.Info("auto-init: dispatched", "host_id", hostID, "job_id", jobID)
|
||||
}
|
||||
|
||||
// pushRepoCredsOnHello loads + decrypts + sends the host's repo
|
||||
|
||||
Reference in New Issue
Block a user