P2 redesign · phase 2: store rewrite — sources, slim schedules, repo maintenance
Go-side data model rebuilt against migration 0008. The fat-Schedule
shape (paths/excludes/tags/retention/manual/kind/options/hooks) is
gone; that surface lives on source_groups now.
* store/types.go
- Schedule slimmed to {id, host_id, cron, enabled, source_group_ids,
timestamps}. SourceGroupIDs populated by Get/List, accepted on
Create/Update so callers pass desired junction state in one shape.
- SourceGroup added: name (= snapshot tag), includes/excludes,
retention_policy, retry_max + retry_backoff_seconds, cached
conflict_dimension.
- HostRepoMaintenance added: forget/prune/check cadences + enabled.
- PendingRun added: offline-retry queue.
- Host loses RepoInitialisedAt; gains BandwidthUpKBps + BandwidthDownKBps.
- RetentionPolicy moves home from "schedule field" to "source group
field" but the type itself + Summary() method unchanged.
* store/sources.go (new) — CRUD + GetByName + ConflictDimension cache.
Group writes bump host_schedule_version; conflict cache writes don't
(server-internal projection, agent doesn't see it).
* store/maintenance.go (new) — CreateDefault is idempotent (INSERT OR
IGNORE). UpdateRepoMaintenance doesn't bump schedule version because
these run on the server's own ticker, not the agent's local cron.
* store/pending.go (new) — Enqueue / DueRunsForRetry / Bump / Delete.
* store/schedules.go — rewritten for slim shape + junction CRUD.
Update wipes the schedule_source_groups junction wholesale and
re-inserts (simpler than diffing). Adds SchedulesUsingGroup for
retention-conflict detection + UI labels.
* store/hosts.go — drops repo_initialised_at scan, adds bandwidth scan.
New SetHostBandwidth helper.
* HTTP layer — temporarily stubbed during this rewrite (501 returns
with redesign_in_progress error code). Phase 3 fills these in
against the new shape:
- schedules.go REST CRUD
- schedule_push.go agent reconciliation
- ui_schedules.go HTML form CRUD
Run-now-per-host + Init-repo handlers in ui_handlers.go also stubbed
— both go away in the new model (Run-now per source group; auto-init
at host enrolment).
* enrollment.go — replaces "seed manual schedule from typed paths"
with "seed default source group + repo-maintenance row." The default
group gets the typed paths as its includes; operator edits later
via Sources tab.
* ws/handler.go — drops the MarkHostRepoInitialised projection (column
is gone; auto-init makes it derivable from latest init job's status).
Tests:
* store: existing schedule test rewritten for slim shape + junction;
new sources_test.go covers source-group CRUD, name uniqueness,
conflict cache, repo-maintenance defaults + idempotent seed,
pending-runs queue lifecycle.
* http: schedules_test.go and schedule_push_test.go deleted — both
exercised the obsolete fat-schedule API. Phase 3 rewrites them
against the new endpoints.
go test ./... green. cmd/server + cmd/agent build. The UI is broken
end-to-end (schedules / sources / repo tabs all hit 501 stubs); Phase 3
restores REST + on-the-wire reconciliation; Phase 4 rewires the UI
templates against the new model.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,103 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// EnqueuePendingRun queues a missed cron tick for the offline-retry
|
||||
// ticker to dispatch later. Caller (the schedule firing path) sets
|
||||
// next_attempt_at = now + group.retry_backoff_seconds × 2^(attempt-1).
|
||||
func (st *Store) EnqueuePendingRun(ctx context.Context, p *PendingRun) error {
|
||||
if p.ID == "" || p.ScheduleID == "" || p.SourceGroupID == "" || p.HostID == "" {
|
||||
return errors.New("store: pending run id, schedule_id, source_group_id, host_id required")
|
||||
}
|
||||
if p.Attempt == 0 {
|
||||
p.Attempt = 1
|
||||
}
|
||||
if p.NextAttemptAt.IsZero() {
|
||||
p.NextAttemptAt = time.Now().UTC()
|
||||
}
|
||||
if p.ScheduledAt.IsZero() {
|
||||
p.ScheduledAt = time.Now().UTC()
|
||||
}
|
||||
_, err := st.db.ExecContext(ctx,
|
||||
`INSERT INTO pending_runs (id, schedule_id, source_group_id, host_id,
|
||||
attempt, next_attempt_at, scheduled_at, last_error)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
p.ID, p.ScheduleID, p.SourceGroupID, p.HostID,
|
||||
p.Attempt,
|
||||
p.NextAttemptAt.UTC().Format(time.RFC3339Nano),
|
||||
p.ScheduledAt.UTC().Format(time.RFC3339Nano),
|
||||
nullableString(p.LastError))
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: enqueue pending run: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DuePendingRuns returns rows whose next_attempt_at <= now, ordered
|
||||
// oldest first. Server-side ticker calls this every ~30s.
|
||||
func (st *Store) DuePendingRuns(ctx context.Context, now time.Time, limit int) ([]PendingRun, error) {
|
||||
rows, err := st.db.QueryContext(ctx,
|
||||
`SELECT id, schedule_id, source_group_id, host_id, attempt,
|
||||
next_attempt_at, scheduled_at, COALESCE(last_error, '')
|
||||
FROM pending_runs
|
||||
WHERE next_attempt_at <= ?
|
||||
ORDER BY next_attempt_at
|
||||
LIMIT ?`,
|
||||
now.UTC().Format(time.RFC3339Nano), limit)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("store: due pending runs: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
out := []PendingRun{}
|
||||
for rows.Next() {
|
||||
var p PendingRun
|
||||
var nextAt, scheduledAt string
|
||||
if err := rows.Scan(&p.ID, &p.ScheduleID, &p.SourceGroupID, &p.HostID,
|
||||
&p.Attempt, &nextAt, &scheduledAt, &p.LastError); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if t, err := time.Parse(time.RFC3339Nano, nextAt); err == nil {
|
||||
p.NextAttemptAt = t
|
||||
}
|
||||
if t, err := time.Parse(time.RFC3339Nano, scheduledAt); err == nil {
|
||||
p.ScheduledAt = t
|
||||
}
|
||||
out = append(out, p)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// DeletePendingRun removes a row by id. Called after successful
|
||||
// dispatch or after exceeding retry_max.
|
||||
func (st *Store) DeletePendingRun(ctx context.Context, id string) error {
|
||||
_, err := st.db.ExecContext(ctx,
|
||||
`DELETE FROM pending_runs WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: delete pending run: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// BumpPendingRunAttempt increments the attempt counter and updates
|
||||
// next_attempt_at + last_error. Used after a failed retry — caller
|
||||
// has decided to try again.
|
||||
func (st *Store) BumpPendingRunAttempt(ctx context.Context, id string, nextAttemptAt time.Time, lastError string) error {
|
||||
_, err := st.db.ExecContext(ctx,
|
||||
`UPDATE pending_runs SET
|
||||
attempt = attempt + 1,
|
||||
next_attempt_at = ?,
|
||||
last_error = ?
|
||||
WHERE id = ?`,
|
||||
nextAttemptAt.UTC().Format(time.RFC3339Nano),
|
||||
nullableString(lastError),
|
||||
id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: bump pending run: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user