6450bf1b88
Closes the schedule reconciliation loop end-to-end.
* New `internal/agent/scheduler` package wraps robfig/cron/v3 with
the lifecycle the agent needs:
- Apply(ScheduleSetPayload, Sender) stops the prior cron (waiting
for in-flight entries to return), rebuilds from scratch, starts,
and emits schedule.ack with the version we just applied.
- Disabled entries skipped silently; bad cron exprs (which
shouldn't reach us — the server validates — but defensive)
log a warn and skip.
- On each cron tick the entry sends a new schedule.fire envelope
to the server with {schedule_id, scheduled_at}. The scheduler
itself never builds CommandRunPayloads — server is the source
of truth for jobs.
- tx is swapped on every Apply, so reconnect is handled
naturally: cron entries that fire against a dropped tx log
"no active connection" and skip the tick.
- Stop() is idempotent and waits for the cron's in-flight
workers via cron.Stop().Done().
* New wire message api.MsgScheduleFire + api.ScheduleFirePayload
for the agent → server "I just fired locally" RPC.
* Server-side dispatch (schedule_push.go: dispatchScheduledJob):
looks up the schedule by id, validates ownership + that it's
enabled, builds args from kind (paths for backup; other kinds
are still arg-less in Phase 2 and grow as those job kinds land
in P2-05..08), persists a jobs row with actor_kind=schedule +
scheduled_id, and writes command.run back on the same conn so
the agent runs through its existing dispatch path.
* store.CreateJob now writes scheduled_id. This column was in the
schema since 0001 but never populated — the original P1 path
only had operator-driven jobs, so actor_kind was always 'user'
and scheduled_id was always nil.
* cmd/agent/main.go integration: dispatcher gains a
*scheduler.Scheduler; the MsgScheduleSet case now hands the
payload to scheduler.Apply (in a goroutine so the WS read loop
keeps draining other messages).
* WS dispatcher gains OnScheduleFire alongside OnScheduleAck.
* Tests:
- scheduler unit tests (4): ack-on-apply, cron tick fires
schedule.fire envelope, disabled entries don't fire, replace-
prior-state stops the old cron.
- Server-side end-to-end: schedule.fire → command.run with the
right job_id / kind / args, plus jobs row with actor_kind=
"schedule" and scheduled_id linking back to the schedule.
Persistence of next-fire times across agent restarts is
deliberately deferred. A missed fire window during downtime
simply fires once on reconnect — that's the desirable behaviour
(the operator wants the missed backup to run, not be silently
skipped because we lost track of when it was due).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
305 lines
9.6 KiB
Go
305 lines
9.6 KiB
Go
package http
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"io"
|
|
stdhttp "net/http"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/coder/websocket"
|
|
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
|
)
|
|
|
|
// makePushHost is like makeHTTPHost but mints a known agent token so
|
|
// the test can dial /ws/agent as the host. Returns (hostID, raw token).
|
|
func makePushHost(t *testing.T, st *store.Store) (string, string) {
|
|
t.Helper()
|
|
const id = "01HSCHEDPUSH00000000000000"
|
|
tok, _ := auth.NewToken()
|
|
if err := st.CreateHost(context.Background(), store.Host{
|
|
ID: id, Name: "ph", OS: "linux", Arch: "amd64",
|
|
AgentVersion: "dev", ResticVersion: "0.16.0", ProtocolVersion: 1,
|
|
EnrolledAt: time.Now().UTC(),
|
|
}, auth.HashToken(tok), ""); err != nil {
|
|
t.Fatalf("create host: %v", err)
|
|
}
|
|
return id, tok
|
|
}
|
|
|
|
// readUntilType pumps messages from the WS until one of the wanted
|
|
// types arrives or ctx times out. Returns the matched envelope.
|
|
// Useful because the on-hello path may push several messages
|
|
// (config.update first if creds exist, schedule.set, …).
|
|
func readUntilType(ctx context.Context, t *testing.T, c *websocket.Conn, want api.MessageType) api.Envelope {
|
|
t.Helper()
|
|
for {
|
|
_, raw, err := c.Read(ctx)
|
|
if err != nil {
|
|
t.Fatalf("ws read waiting for %s: %v", want, err)
|
|
}
|
|
var env api.Envelope
|
|
if err := json.Unmarshal(raw, &env); err != nil {
|
|
t.Fatalf("envelope: %v (raw=%s)", err, raw)
|
|
}
|
|
t.Logf("recv: type=%s payload=%s", env.Type, env.Payload)
|
|
if env.Type == want {
|
|
return env
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestSchedulePushOnHelloAndAckRoundtrip(t *testing.T) {
|
|
t.Parallel()
|
|
srv, url, st := newTestServerWithHub(t)
|
|
_ = srv
|
|
cookie := loginAndCookie(t, url)
|
|
hostID, agentToken := makePushHost(t, st)
|
|
|
|
// Pre-populate one schedule so we have something to push.
|
|
body, _ := json.Marshal(scheduleAPI{
|
|
Kind: "backup",
|
|
CronExpr: "@hourly",
|
|
Paths: []string{"/etc"},
|
|
Enabled: true,
|
|
})
|
|
req, _ := stdhttp.NewRequest("POST", url+"/api/hosts/"+hostID+"/schedules",
|
|
bytes.NewReader(body))
|
|
req.AddCookie(cookie)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
res, err := stdhttp.DefaultClient.Do(req)
|
|
if err != nil {
|
|
t.Fatalf("create schedule: %v", err)
|
|
}
|
|
got, _ := io.ReadAll(res.Body)
|
|
res.Body.Close()
|
|
if res.StatusCode != stdhttp.StatusCreated {
|
|
t.Fatalf("create schedule: %d %s", res.StatusCode, got)
|
|
}
|
|
var created scheduleAPI
|
|
_ = json.Unmarshal(got, &created)
|
|
|
|
// Dial the WS as the agent and send hello.
|
|
wsURL := "ws" + strings.TrimPrefix(url, "http") + "/ws/agent"
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
c, _, err := websocket.Dial(ctx, wsURL, &websocket.DialOptions{
|
|
HTTPHeader: stdhttp.Header{"Authorization": []string{"Bearer " + agentToken}},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("dial: %v", err)
|
|
}
|
|
defer c.CloseNow()
|
|
|
|
helloEnv, _ := api.Marshal(api.MsgHello, "", api.HelloPayload{
|
|
ProtocolVersion: api.CurrentProtocolVersion,
|
|
AgentVersion: "test", ResticVersion: "test",
|
|
Hostname: "ph", OS: api.OSLinux, Arch: api.ArchAmd64,
|
|
})
|
|
raw, _ := json.Marshal(helloEnv)
|
|
if err := c.Write(ctx, websocket.MessageText, raw); err != nil {
|
|
t.Fatalf("write hello: %v", err)
|
|
}
|
|
|
|
// Server should push schedule.set (our host has no creds, so the
|
|
// config.update branch is silently skipped).
|
|
pushedEnv := readUntilType(ctx, t, c, api.MsgScheduleSet)
|
|
var pushed api.ScheduleSetPayload
|
|
if err := pushedEnv.UnmarshalPayload(&pushed); err != nil {
|
|
t.Fatalf("decode payload: %v", err)
|
|
}
|
|
if pushed.Version != 1 {
|
|
t.Fatalf("pushed version: got %d, want 1", pushed.Version)
|
|
}
|
|
if len(pushed.Schedules) != 1 || pushed.Schedules[0].ID != created.ID {
|
|
t.Fatalf("pushed schedules: %+v", pushed.Schedules)
|
|
}
|
|
if pushed.Schedules[0].CronExpr != "@hourly" || len(pushed.Schedules[0].Paths) != 1 {
|
|
t.Fatalf("schedule contents: %+v", pushed.Schedules[0])
|
|
}
|
|
|
|
// Ack the version. Server should record it on the host row.
|
|
ackEnv, _ := api.Marshal(api.MsgScheduleAck, "", api.ScheduleAckPayload{
|
|
Version: pushed.Version,
|
|
AppliedAt: time.Now().UTC(),
|
|
})
|
|
raw, _ = json.Marshal(ackEnv)
|
|
if err := c.Write(ctx, websocket.MessageText, raw); err != nil {
|
|
t.Fatalf("write ack: %v", err)
|
|
}
|
|
|
|
// Wait for applied_schedule_version to flip.
|
|
deadline := time.Now().Add(2 * time.Second)
|
|
for time.Now().Before(deadline) {
|
|
h, err := st.GetHost(context.Background(), hostID)
|
|
if err == nil && h.AppliedScheduleVersion == pushed.Version {
|
|
return
|
|
}
|
|
time.Sleep(20 * time.Millisecond)
|
|
}
|
|
h, _ := st.GetHost(context.Background(), hostID)
|
|
t.Fatalf("applied_schedule_version did not advance: got %d, want %d",
|
|
h.AppliedScheduleVersion, pushed.Version)
|
|
}
|
|
|
|
func TestScheduleFireDispatchesCommandRun(t *testing.T) {
|
|
t.Parallel()
|
|
srv, url, st := newTestServerWithHub(t)
|
|
_ = srv
|
|
cookie := loginAndCookie(t, url)
|
|
hostID, agentToken := makePushHost(t, st)
|
|
|
|
// Pre-create one backup schedule.
|
|
body, _ := json.Marshal(scheduleAPI{
|
|
Kind: "backup", CronExpr: "@hourly",
|
|
Paths: []string{"/etc/hostname"}, Enabled: true,
|
|
})
|
|
req, _ := stdhttp.NewRequest("POST",
|
|
url+"/api/hosts/"+hostID+"/schedules", bytes.NewReader(body))
|
|
req.AddCookie(cookie)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
res, err := stdhttp.DefaultClient.Do(req)
|
|
if err != nil {
|
|
t.Fatalf("create: %v", err)
|
|
}
|
|
got, _ := io.ReadAll(res.Body)
|
|
res.Body.Close()
|
|
var created scheduleAPI
|
|
_ = json.Unmarshal(got, &created)
|
|
|
|
// Connect as the agent.
|
|
wsURL := "ws" + strings.TrimPrefix(url, "http") + "/ws/agent"
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
c, _, err := websocket.Dial(ctx, wsURL, &websocket.DialOptions{
|
|
HTTPHeader: stdhttp.Header{"Authorization": []string{"Bearer " + agentToken}},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("dial: %v", err)
|
|
}
|
|
defer c.CloseNow()
|
|
|
|
helloEnv, _ := api.Marshal(api.MsgHello, "", api.HelloPayload{
|
|
ProtocolVersion: api.CurrentProtocolVersion,
|
|
AgentVersion: "test", ResticVersion: "test",
|
|
Hostname: "ph", OS: api.OSLinux, Arch: api.ArchAmd64,
|
|
})
|
|
raw, _ := json.Marshal(helloEnv)
|
|
_ = c.Write(ctx, websocket.MessageText, raw)
|
|
|
|
// Drain the on-hello schedule.set.
|
|
_ = readUntilType(ctx, t, c, api.MsgScheduleSet)
|
|
|
|
// Pretend our local cron just fired this schedule.
|
|
fireEnv, _ := api.Marshal(api.MsgScheduleFire, "", api.ScheduleFirePayload{
|
|
ScheduleID: created.ID,
|
|
ScheduledAt: time.Now().UTC(),
|
|
})
|
|
raw, _ = json.Marshal(fireEnv)
|
|
if err := c.Write(ctx, websocket.MessageText, raw); err != nil {
|
|
t.Fatalf("write fire: %v", err)
|
|
}
|
|
|
|
// Server should respond with command.run.
|
|
cmdEnv := readUntilType(ctx, t, c, api.MsgCommandRun)
|
|
var cmd api.CommandRunPayload
|
|
if err := cmdEnv.UnmarshalPayload(&cmd); err != nil {
|
|
t.Fatalf("decode command.run: %v", err)
|
|
}
|
|
if cmd.JobID == "" || cmd.Kind != api.JobBackup {
|
|
t.Fatalf("command.run: %+v", cmd)
|
|
}
|
|
if len(cmd.Args) != 1 || cmd.Args[0] != "/etc/hostname" {
|
|
t.Fatalf("command.run args: %+v", cmd.Args)
|
|
}
|
|
|
|
// Verify the job row landed with actor_kind=schedule.
|
|
deadline := time.Now().Add(2 * time.Second)
|
|
for time.Now().Before(deadline) {
|
|
var actorKind, scheduledID string
|
|
row := st.DB().QueryRowContext(context.Background(),
|
|
`SELECT actor_kind, COALESCE(scheduled_id,'') FROM jobs WHERE id = ?`,
|
|
cmd.JobID)
|
|
if err := row.Scan(&actorKind, &scheduledID); err == nil {
|
|
if actorKind != "schedule" {
|
|
t.Fatalf("job actor_kind: %q", actorKind)
|
|
}
|
|
if scheduledID != created.ID {
|
|
t.Fatalf("job scheduled_id: %q want %q", scheduledID, created.ID)
|
|
}
|
|
return
|
|
}
|
|
time.Sleep(20 * time.Millisecond)
|
|
}
|
|
t.Fatalf("job row %s never landed", cmd.JobID)
|
|
}
|
|
|
|
func TestSchedulePushOnCRUD(t *testing.T) {
|
|
t.Parallel()
|
|
srv, url, st := newTestServerWithHub(t)
|
|
_ = srv
|
|
cookie := loginAndCookie(t, url)
|
|
hostID, agentToken := makePushHost(t, st)
|
|
|
|
// Connect first so the CRUD push has somewhere to land.
|
|
wsURL := "ws" + strings.TrimPrefix(url, "http") + "/ws/agent"
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
c, _, err := websocket.Dial(ctx, wsURL, &websocket.DialOptions{
|
|
HTTPHeader: stdhttp.Header{"Authorization": []string{"Bearer " + agentToken}},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("dial: %v", err)
|
|
}
|
|
defer c.CloseNow()
|
|
|
|
helloEnv, _ := api.Marshal(api.MsgHello, "", api.HelloPayload{
|
|
ProtocolVersion: api.CurrentProtocolVersion,
|
|
AgentVersion: "test", ResticVersion: "test",
|
|
Hostname: "ph", OS: api.OSLinux, Arch: api.ArchAmd64,
|
|
})
|
|
raw, _ := json.Marshal(helloEnv)
|
|
_ = c.Write(ctx, websocket.MessageText, raw)
|
|
|
|
// Drain the on-hello schedule.set (will be version 0, empty list).
|
|
first := readUntilType(ctx, t, c, api.MsgScheduleSet)
|
|
var initial api.ScheduleSetPayload
|
|
_ = first.UnmarshalPayload(&initial)
|
|
if initial.Version != 0 || len(initial.Schedules) != 0 {
|
|
t.Fatalf("initial push: %+v", initial)
|
|
}
|
|
|
|
// Now create a schedule via REST. The handler should fire a
|
|
// schedule.set push asynchronously.
|
|
body, _ := json.Marshal(scheduleAPI{
|
|
Kind: "backup", CronExpr: "*/30 * * * *",
|
|
Paths: []string{"/var/lib"}, Enabled: true,
|
|
})
|
|
req, _ := stdhttp.NewRequest("POST",
|
|
url+"/api/hosts/"+hostID+"/schedules", bytes.NewReader(body))
|
|
req.AddCookie(cookie)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
res, err := stdhttp.DefaultClient.Do(req)
|
|
if err != nil {
|
|
t.Fatalf("create: %v", err)
|
|
}
|
|
res.Body.Close()
|
|
if res.StatusCode != stdhttp.StatusCreated {
|
|
t.Fatalf("create: %d", res.StatusCode)
|
|
}
|
|
|
|
// Wait for the pushed schedule.set with version 1.
|
|
pushed := readUntilType(ctx, t, c, api.MsgScheduleSet)
|
|
var pl api.ScheduleSetPayload
|
|
_ = pushed.UnmarshalPayload(&pl)
|
|
if pl.Version != 1 || len(pl.Schedules) != 1 {
|
|
t.Fatalf("push after create: %+v", pl)
|
|
}
|
|
}
|