P6-03 repo size trend + agent-update UI fix + dashboard polish #21

Merged
steve merged 17 commits from tidy-up-last-backup-projection into main 2026-05-07 23:00:04 +01:00
3 changed files with 108 additions and 6 deletions
Showing only changes of commit 7b390e9e5e - Show all commits
+1 -1
View File
@@ -92,7 +92,7 @@ func run() error {
notifHub := notification.NewHub(st, aead, cfg.BaseURL)
alertEngine := alert.NewEngine(st, notifHub)
updateWatcher := ws.NewUpdateWatcher(st, alertEngine)
updateWatcher := ws.NewUpdateWatcher(st, alertEngine, jobHub)
renderer, err := ui.New()
if err != nil {
+34 -1
View File
@@ -7,6 +7,7 @@ import (
"sync"
"time"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
@@ -29,6 +30,7 @@ type AlertRaiser interface {
type UpdateWatcher struct {
store *store.Store
alerts AlertRaiser
jobHub *JobHub // optional — if nil, no fan-out to browser streams
mu sync.Mutex
entries map[string]*updateEntry // hostID → entry
@@ -46,10 +48,11 @@ type updateEntry struct {
// NewUpdateWatcher builds an unstarted watcher. Call Run in a goroutine
// to start the periodic sweep.
func NewUpdateWatcher(st *store.Store, alerts AlertRaiser) *UpdateWatcher {
func NewUpdateWatcher(st *store.Store, alerts AlertRaiser, jobHub *JobHub) *UpdateWatcher {
return &UpdateWatcher{
store: st,
alerts: alerts,
jobHub: jobHub,
entries: make(map[string]*updateEntry),
tickPeriod: 5 * time.Second,
}
@@ -95,6 +98,7 @@ func (w *UpdateWatcher) OnHello(ctx context.Context, hostID, agentVersion, targe
if err := w.store.MarkJobFinished(ctx, jobID, "succeeded", 0, nil, "", now); err != nil {
slog.Warn("ws update watcher: mark succeeded", "job_id", jobID, "host_id", hostID, "err", err)
}
w.publishJobFinished(jobID, api.JobSucceeded, 0, "", now)
if w.alerts != nil {
w.alerts.ResolveUpdateFailed(ctx, hostID, now)
}
@@ -144,8 +148,37 @@ func (w *UpdateWatcher) sweep(ctx context.Context, now time.Time) {
if err := w.store.MarkJobFinished(ctx, x.jobID, "failed", -1, nil, errMsg, stamp); err != nil {
slog.Warn("ws update watcher: mark failed", "job_id", x.jobID, "host_id", x.hostID, "err", err)
}
w.publishJobFinished(x.jobID, api.JobFailed, -1, errMsg, stamp)
if w.alerts != nil {
w.alerts.RaiseUpdateFailed(ctx, x.hostID, x.jobID, reason, stamp)
}
}
}
// publishJobFinished pushes a synthetic job.finished envelope into the
// JobHub so any browser still streaming this job sees it terminate.
// The agent itself exits before it can send job.finished (it has to —
// it's about to relaunch into the new binary), so without this fan-out
// the /jobs/{id} page hangs until reload.
//
// Best-effort: if the hub is nil or the envelope can't be marshalled
// we log and move on — the DB-side state is already correct, this is
// purely a UI wake-up.
func (w *UpdateWatcher) publishJobFinished(jobID string, status api.JobStatus, exitCode int, errMsg string, finishedAt time.Time) {
if w.jobHub == nil {
return
}
payload := api.JobFinishedPayload{
JobID: jobID,
Status: status,
ExitCode: exitCode,
FinishedAt: finishedAt,
Error: errMsg,
}
env, err := api.Marshal(api.MsgJobFinished, "", payload)
if err != nil {
slog.Warn("ws update watcher: marshal synthetic job.finished", "job_id", jobID, "err", err)
return
}
w.jobHub.Broadcast(jobID, env)
}
+73 -4
View File
@@ -8,6 +8,7 @@ import (
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
@@ -50,7 +51,7 @@ func TestUpdateWatcherOnHelloSuccess(t *testing.T) {
jobID := seedJob(t, st, hostID)
a := &fakeAlerts{}
w := NewUpdateWatcher(st, a)
w := NewUpdateWatcher(st, a, nil)
w.Track(jobID, hostID)
w.OnHello(context.Background(), hostID, "v2", "v2")
@@ -83,7 +84,7 @@ func TestUpdateWatcherTimeout(t *testing.T) {
jobID := seedJob(t, st, hostID)
a := &fakeAlerts{}
w := NewUpdateWatcher(st, a)
w := NewUpdateWatcher(st, a, nil)
w.Track(jobID, hostID)
time.Sleep(80 * time.Millisecond)
@@ -113,7 +114,7 @@ func TestUpdateWatcherMismatchedVersionNoOp(t *testing.T) {
jobID := seedJob(t, st, hostID)
a := &fakeAlerts{}
w := NewUpdateWatcher(st, a)
w := NewUpdateWatcher(st, a, nil)
w.Track(jobID, hostID)
w.OnHello(context.Background(), hostID, "v1", "v2")
@@ -140,7 +141,7 @@ func TestUpdateWatcherHelloAfterTimeoutIsNoOp(t *testing.T) {
jobID := seedJob(t, st, hostID)
a := &fakeAlerts{}
w := NewUpdateWatcher(st, a)
w := NewUpdateWatcher(st, a, nil)
w.Track(jobID, hostID)
time.Sleep(80 * time.Millisecond)
@@ -159,3 +160,71 @@ func TestUpdateWatcherHelloAfterTimeoutIsNoOp(t *testing.T) {
t.Fatalf("late hello triggered ResolveUpdateFailed: %v", a.resolved)
}
}
func TestUpdateWatcherOnHelloBroadcastsJobFinished(t *testing.T) {
st := openWSTestStore(t)
hostID := ulid.Make().String()
seedHostWS(t, st, hostID)
jobID := seedJob(t, st, hostID)
hub := NewJobHub()
sub := hub.Register(jobID)
defer sub.unregister()
w := NewUpdateWatcher(st, &fakeAlerts{}, hub)
w.Track(jobID, hostID)
w.OnHello(context.Background(), hostID, "v2", "v2")
select {
case env := <-sub.ch:
if env.Type != api.MsgJobFinished {
t.Fatalf("envelope type: got %q want %q", env.Type, api.MsgJobFinished)
}
var p api.JobFinishedPayload
if err := env.UnmarshalPayload(&p); err != nil {
t.Fatalf("unmarshal payload: %v", err)
}
if p.JobID != jobID || p.Status != api.JobSucceeded {
t.Fatalf("payload: got %+v", p)
}
case <-time.After(time.Second):
t.Fatal("expected synthetic job.finished broadcast, got nothing")
}
}
func TestUpdateWatcherTimeoutBroadcastsJobFinished(t *testing.T) {
prev := updateTimeout
updateTimeout = 50 * time.Millisecond
t.Cleanup(func() { updateTimeout = prev })
st := openWSTestStore(t)
hostID := ulid.Make().String()
seedHostWS(t, st, hostID)
jobID := seedJob(t, st, hostID)
hub := NewJobHub()
sub := hub.Register(jobID)
defer sub.unregister()
w := NewUpdateWatcher(st, &fakeAlerts{}, hub)
w.Track(jobID, hostID)
time.Sleep(80 * time.Millisecond)
w.sweep(context.Background(), time.Now())
select {
case env := <-sub.ch:
if env.Type != api.MsgJobFinished {
t.Fatalf("envelope type: got %q want %q", env.Type, api.MsgJobFinished)
}
var p api.JobFinishedPayload
if err := env.UnmarshalPayload(&p); err != nil {
t.Fatalf("unmarshal payload: %v", err)
}
if p.JobID != jobID || p.Status != api.JobFailed {
t.Fatalf("payload: got %+v", p)
}
case <-time.After(time.Second):
t.Fatal("expected synthetic job.finished broadcast, got nothing")
}
}