store: LatestJobByKind includes in-flight jobs (avoid maintenance double-fire)

Widen the SQL query to consider all statuses (queued, running,
succeeded, failed, cancelled) rather than terminal-only. An in-flight
prune that outlasts the 60s tick interval previously produced
ErrNotFound, causing the ticker to anchor at now-24h and fire a second
prune concurrently with the first.

Update the doc comment and test: remove the "queued job filtered out"
case, add assertions that a running job and a queued job are each
returned as the latest.
This commit is contained in:
2026-05-04 00:29:52 +01:00
parent 5bcb20dfce
commit 9ec69456fe
2 changed files with 31 additions and 12 deletions
+4 -6
View File
@@ -193,20 +193,18 @@ func (s *Store) GetJob(ctx context.Context, id string) (*Job, error) {
return &j, nil return &j, nil
} }
// LatestJobByKind returns the most recent terminal job (status in // LatestJobByKind returns the most recent job (any status, including
// 'succeeded','failed','cancelled' — UK spelling matches the wire/DB // queued and running) of the given kind for the host, or
// literal, see api.JobCancelled) of the given kind for the host, or
// (nil, ErrNotFound) if no such job exists. Used by the maintenance // (nil, ErrNotFound) if no such job exists. Used by the maintenance
// ticker to compute "last fire" anchors for the cron-due check; // ticker to compute "last fire" anchors for the cron-due check;
// queued and running jobs are excluded so an in-flight run doesn't // in-flight jobs MUST be considered or a long-running prune (>60s)
// suppress its own cron tick from firing. //nolint:misspell // wire format // would re-fire on the next tick while the first is still running.
func (s *Store) LatestJobByKind(ctx context.Context, hostID, kind string) (*Job, error) { func (s *Store) LatestJobByKind(ctx context.Context, hostID, kind string) (*Job, error) {
row := s.db.QueryRowContext(ctx, row := s.db.QueryRowContext(ctx,
`SELECT id, host_id, kind, status, scheduled_id, actor_kind, actor_id, `SELECT id, host_id, kind, status, scheduled_id, actor_kind, actor_id,
started_at, finished_at, exit_code, stats, error, created_at started_at, finished_at, exit_code, stats, error, created_at
FROM jobs FROM jobs
WHERE host_id = ? AND kind = ? WHERE host_id = ? AND kind = ?
AND status IN ('succeeded','failed','cancelled')
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT 1`, hostID, kind) LIMIT 1`, hostID, kind)
var ( var (
+27 -6
View File
@@ -49,20 +49,41 @@ func TestLatestJobByKind(t *testing.T) {
t.Errorf("want j-new, got %q", got.ID) t.Errorf("want j-new, got %q", got.ID)
} }
// A queued job should be ignored — terminal-status filter. // An in-flight running job must be returned — long-prune-suppresses-tick
queuedAt := time.Now().UTC() // scenario: if a prune runs >60s the next tick must not re-fire it.
runningAt := time.Now().UTC()
if err := s.CreateJob(ctx, Job{
ID: "j-running", HostID: hostID, Kind: "forget",
ActorKind: "system", CreatedAt: runningAt,
}); err != nil {
t.Fatalf("create running: %v", err)
}
if err := s.MarkJobStarted(ctx, "j-running", runningAt); err != nil {
t.Fatalf("mark started: %v", err)
}
got2, err := s.LatestJobByKind(ctx, hostID, "forget")
if err != nil {
t.Fatalf("LatestJobByKind 2: %v", err)
}
if got2.ID != "j-running" {
t.Errorf("in-flight running job must be returned; want j-running, got %q", got2.ID)
}
// A queued (not-yet-started) job is also returned (it is newer than
// j-running because CreatedAt is later).
queuedAt := runningAt.Add(time.Millisecond)
if err := s.CreateJob(ctx, Job{ if err := s.CreateJob(ctx, Job{
ID: "j-queued", HostID: hostID, Kind: "forget", ID: "j-queued", HostID: hostID, Kind: "forget",
ActorKind: "system", CreatedAt: queuedAt, ActorKind: "system", CreatedAt: queuedAt,
}); err != nil { }); err != nil {
t.Fatalf("create queued: %v", err) t.Fatalf("create queued: %v", err)
} }
got2, err := s.LatestJobByKind(ctx, hostID, "forget") got3, err := s.LatestJobByKind(ctx, hostID, "forget")
if err != nil { if err != nil {
t.Fatalf("LatestJobByKind 2: %v", err) t.Fatalf("LatestJobByKind 3: %v", err)
} }
if got2.ID != "j-new" { if got3.ID != "j-queued" {
t.Errorf("queued job should be ignored; want j-new, got %q", got2.ID) t.Errorf("queued job must be returned as newest; want j-queued, got %q", got3.ID)
} }
// Different kind → ErrNotFound. // Different kind → ErrNotFound.