Files
restic-manager/internal/store/pending.go
T
steve e7eea7afac P2 redesign · phase 2: store rewrite — sources, slim schedules, repo maintenance
Go-side data model rebuilt against migration 0008. The fat-Schedule
shape (paths/excludes/tags/retention/manual/kind/options/hooks) is
gone; that surface lives on source_groups now.

* store/types.go
  - Schedule slimmed to {id, host_id, cron, enabled, source_group_ids,
    timestamps}. SourceGroupIDs populated by Get/List, accepted on
    Create/Update so callers pass desired junction state in one shape.
  - SourceGroup added: name (= snapshot tag), includes/excludes,
    retention_policy, retry_max + retry_backoff_seconds, cached
    conflict_dimension.
  - HostRepoMaintenance added: forget/prune/check cadences + enabled.
  - PendingRun added: offline-retry queue.
  - Host loses RepoInitialisedAt; gains BandwidthUpKBps + BandwidthDownKBps.
  - RetentionPolicy moves home from "schedule field" to "source group
    field" but the type itself + Summary() method unchanged.

* store/sources.go (new) — CRUD + GetByName + ConflictDimension cache.
  Group writes bump host_schedule_version; conflict cache writes don't
  (server-internal projection, agent doesn't see it).
* store/maintenance.go (new) — CreateDefault is idempotent (INSERT OR
  IGNORE). UpdateRepoMaintenance doesn't bump schedule version because
  these run on the server's own ticker, not the agent's local cron.
* store/pending.go (new) — Enqueue / DueRunsForRetry / Bump / Delete.
* store/schedules.go — rewritten for slim shape + junction CRUD.
  Update wipes the schedule_source_groups junction wholesale and
  re-inserts (simpler than diffing). Adds SchedulesUsingGroup for
  retention-conflict detection + UI labels.
* store/hosts.go — drops repo_initialised_at scan, adds bandwidth scan.
  New SetHostBandwidth helper.

* HTTP layer — temporarily stubbed during this rewrite (501 returns
  with redesign_in_progress error code). Phase 3 fills these in
  against the new shape:
    - schedules.go REST CRUD
    - schedule_push.go agent reconciliation
    - ui_schedules.go HTML form CRUD
  Run-now-per-host + Init-repo handlers in ui_handlers.go also stubbed
  — both go away in the new model (Run-now per source group; auto-init
  at host enrolment).

* enrollment.go — replaces "seed manual schedule from typed paths"
  with "seed default source group + repo-maintenance row." The default
  group gets the typed paths as its includes; operator edits later
  via Sources tab.

* ws/handler.go — drops the MarkHostRepoInitialised projection (column
  is gone; auto-init makes it derivable from latest init job's status).

Tests:
* store: existing schedule test rewritten for slim shape + junction;
  new sources_test.go covers source-group CRUD, name uniqueness,
  conflict cache, repo-maintenance defaults + idempotent seed,
  pending-runs queue lifecycle.
* http: schedules_test.go and schedule_push_test.go deleted — both
  exercised the obsolete fat-schedule API. Phase 3 rewrites them
  against the new endpoints.

go test ./... green. cmd/server + cmd/agent build. The UI is broken
end-to-end (schedules / sources / repo tabs all hit 501 stubs); Phase 3
restores REST + on-the-wire reconciliation; Phase 4 rewires the UI
templates against the new model.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 21:30:41 +01:00

104 lines
3.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package store
import (
"context"
"errors"
"fmt"
"time"
)
// EnqueuePendingRun queues a missed cron tick for the offline-retry
// ticker to dispatch later. Caller (the schedule firing path) sets
// next_attempt_at = now + group.retry_backoff_seconds × 2^(attempt-1).
func (st *Store) EnqueuePendingRun(ctx context.Context, p *PendingRun) error {
if p.ID == "" || p.ScheduleID == "" || p.SourceGroupID == "" || p.HostID == "" {
return errors.New("store: pending run id, schedule_id, source_group_id, host_id required")
}
if p.Attempt == 0 {
p.Attempt = 1
}
if p.NextAttemptAt.IsZero() {
p.NextAttemptAt = time.Now().UTC()
}
if p.ScheduledAt.IsZero() {
p.ScheduledAt = time.Now().UTC()
}
_, err := st.db.ExecContext(ctx,
`INSERT INTO pending_runs (id, schedule_id, source_group_id, host_id,
attempt, next_attempt_at, scheduled_at, last_error)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
p.ID, p.ScheduleID, p.SourceGroupID, p.HostID,
p.Attempt,
p.NextAttemptAt.UTC().Format(time.RFC3339Nano),
p.ScheduledAt.UTC().Format(time.RFC3339Nano),
nullableString(p.LastError))
if err != nil {
return fmt.Errorf("store: enqueue pending run: %w", err)
}
return nil
}
// DuePendingRuns returns rows whose next_attempt_at <= now, ordered
// oldest first. Server-side ticker calls this every ~30s.
func (st *Store) DuePendingRuns(ctx context.Context, now time.Time, limit int) ([]PendingRun, error) {
rows, err := st.db.QueryContext(ctx,
`SELECT id, schedule_id, source_group_id, host_id, attempt,
next_attempt_at, scheduled_at, COALESCE(last_error, '')
FROM pending_runs
WHERE next_attempt_at <= ?
ORDER BY next_attempt_at
LIMIT ?`,
now.UTC().Format(time.RFC3339Nano), limit)
if err != nil {
return nil, fmt.Errorf("store: due pending runs: %w", err)
}
defer rows.Close()
out := []PendingRun{}
for rows.Next() {
var p PendingRun
var nextAt, scheduledAt string
if err := rows.Scan(&p.ID, &p.ScheduleID, &p.SourceGroupID, &p.HostID,
&p.Attempt, &nextAt, &scheduledAt, &p.LastError); err != nil {
return nil, err
}
if t, err := time.Parse(time.RFC3339Nano, nextAt); err == nil {
p.NextAttemptAt = t
}
if t, err := time.Parse(time.RFC3339Nano, scheduledAt); err == nil {
p.ScheduledAt = t
}
out = append(out, p)
}
return out, rows.Err()
}
// DeletePendingRun removes a row by id. Called after successful
// dispatch or after exceeding retry_max.
func (st *Store) DeletePendingRun(ctx context.Context, id string) error {
_, err := st.db.ExecContext(ctx,
`DELETE FROM pending_runs WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("store: delete pending run: %w", err)
}
return nil
}
// BumpPendingRunAttempt increments the attempt counter and updates
// next_attempt_at + last_error. Used after a failed retry — caller
// has decided to try again.
func (st *Store) BumpPendingRunAttempt(ctx context.Context, id string, nextAttemptAt time.Time, lastError string) error {
_, err := st.db.ExecContext(ctx,
`UPDATE pending_runs SET
attempt = attempt + 1,
next_attempt_at = ?,
last_error = ?
WHERE id = ?`,
nextAttemptAt.UTC().Format(time.RFC3339Nano),
nullableString(lastError),
id)
if err != nil {
return fmt.Errorf("store: bump pending run: %w", err)
}
return nil
}