feat(catchup): scheduleOverdue helper for missed-window detection

fix(store): SetHostAlwaysOn returns ErrNotFound; test agent-token lookup path
feat(store): add hosts.always_on flag (default on)
2026-06-15 20:58:17 +01:00 · 2026-06-15 20:56:59 +01:00 · 2026-06-15 20:53:13 +01:00 · 2026-06-15 20:48:16 +01:00 · 2026-06-15 20:42:00 +01:00 · 2026-06-15 20:37:45 +01:00
21 changed files with 1630 additions and 49 deletions
@@ -70,7 +70,11 @@ jobs:
    # one runner. The third shard ("rest") covers everything else.
    name: Test (${{ matrix.name }})
    runs-on: ubuntu-latest
-    container: gitea.dcglab.co.uk/steve/ci-runner-go:2026-05-08
+    container:
+      image: docker.dcglab.co.uk/ci-runner-go:2026-05-15
+      credentials:
+        username: ${{ secrets.ZOT_USERNAME }}
+        password: ${{ secrets.ZOT_PASSWORD }}
    strategy:
      fail-fast: false
      matrix:
@@ -105,7 +109,11 @@ jobs:
  lint:
    name: Lint
    runs-on: ubuntu-latest
-    container: gitea.dcglab.co.uk/steve/ci-runner-go:2026-05-08
+    container:
+      image: docker.dcglab.co.uk/ci-runner-go:2026-05-15
+      credentials:
+        username: ${{ secrets.ZOT_USERNAME }}
+        password: ${{ secrets.ZOT_PASSWORD }}
    steps:
      - uses: actions/checkout@v4
      - uses: golangci/golangci-lint-action@v7
@@ -121,7 +129,11 @@ jobs:
  build:
    name: Build (${{ matrix.goos }}/${{ matrix.goarch }})
    runs-on: ubuntu-latest
-    container: gitea.dcglab.co.uk/steve/ci-runner-go:2026-05-08
+    container:
+      image: docker.dcglab.co.uk/ci-runner-go:2026-05-15
+      credentials:
+        username: ${{ secrets.ZOT_USERNAME }}
+        password: ${{ secrets.ZOT_PASSWORD }}
    strategy:
      fail-fast: false
      matrix:
@@ -12,18 +12,12 @@
 #     plus install.sh / install.ps1 / the systemd unit baked in under
 #     /opt/restic-manager/dist (the read-only fallback path the server
 #     handlers use when <DataDir>/... is empty).
-#   * Pushes to this Gitea instance's container registry under
-#     <gitea-host>/<owner>/restic-manager.
+#   * Pushes to zot OCI registry (docker.dcglab.co.uk).
 #
 # Tag fan-out
 #   * tag push: :vX.Y.Z, :X.Y, :X
 #   * tag push and X >= 1: also :latest
 #   * workflow_dispatch: only :snapshot-<shortsha>; nothing else moves.
-#
-# Why no goreleaser
-#   The architecture already routes agent distribution through the
-#   server's /agent/binary endpoint. The image is the only deliverable;
-#   binary archives would just be a second source of truth.

 name: Release

@@ -34,8 +28,8 @@ on:
  workflow_dispatch:

 env:
-  REGISTRY: gitea.dcglab.co.uk
-  IMAGE_NAME: ${{ gitea.repository }}
+  REGISTRY: docker.dcglab.co.uk
+  IMAGE_NAME: restic-manager

 # Force bash as the default shell — see ci.yml header.
 defaults:
@@ -46,19 +40,23 @@ jobs:
  image:
    name: Build + push image
    runs-on: ubuntu-latest
-    container: gitea.dcglab.co.uk/steve/ci-runner-go:2026-05-08
+    container:
+      image: docker.dcglab.co.uk/ci-runner-go:2026-05-15
+      credentials:
+        username: ${{ secrets.ZOT_USERNAME }}
+        password: ${{ secrets.ZOT_PASSWORD }}
    steps:
      - uses: actions/checkout@v4

      - uses: docker/setup-qemu-action@v3
      - uses: docker/setup-buildx-action@v3

-      - name: Log in to Gitea registry
+      - name: Log in to zot registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
-          username: ${{ gitea.actor }}
-          password: ${{ secrets.DEV_TOKEN }}
+          username: ${{ secrets.ZOT_USERNAME }}
+          password: ${{ secrets.ZOT_PASSWORD }}

      - name: Compute tags + version
        id: meta
@@ -45,3 +45,7 @@ coverage.html
 # tooling already skips paths starting with _, but ignore explicitly
 # so an accidental `git add cmd/.` can't sneak them into a release.
 /cmd/_*/
+
+# Local-only planning / scratch — never committed.
+/ask.md
+/docs/superpowers/
@@ -8,8 +8,10 @@ VERSION        ?= $(shell git describe --tags --always --dirty 2>/dev/null || ec
 COMMIT         ?= $(shell git rev-parse HEAD 2>/dev/null || echo none)
 DATE           ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
 VERSION_PKG    := gitea.dcglab.co.uk/steve/restic-manager/internal/version
-LDFLAGS        := -s -w -X main.version=$(VERSION) -X main.commit=$(COMMIT) -X main.date=$(DATE) \
-                  -X $(VERSION_PKG).Version=$(VERSION) -X $(VERSION_PKG).Commit=$(COMMIT)
+LDFLAGS        := -s -w \
+                  -X $(VERSION_PKG).Version=$(VERSION) \
+                  -X $(VERSION_PKG).Commit=$(COMMIT) \
+                  -X $(VERSION_PKG).Date=$(DATE)
 GOFLAGS        := -trimpath
 DOCKER_IMAGE   ?= gitea.dcglab.co.uk/steve/restic-manager
 DOCKER_TAG     ?= dev
@@ -22,12 +22,7 @@ import (
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/wsclient"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
-)
-
-var (
-	version = "dev"
-	commit  = "none"
-	date    = "unknown"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/version"
 )

 func main() {
@@ -66,7 +61,7 @@ func run() error {
 	flag.Parse()

 	if *showVersion {
-		fmt.Printf("restic-manager-agent %s (commit %s, built %s)\n", version, commit, date)
+		fmt.Printf("restic-manager-agent %s (commit %s, built %s)\n", version.Version, version.Commit, version.Date)
 		return nil
 	}

@@ -82,14 +77,14 @@ func run() error {
 		if *enrollServer == "" {
 			return errors.New("enrollment: -enroll-server is required with -enroll-token")
 		}
-		return doEnroll(*enrollServer, *enrollToken, cfg, version)
+		return doEnroll(*enrollServer, *enrollToken, cfg, version.Version)
 	}

 	// Announce-and-approve: -enroll-server set, no token, agent not
 	// yet enrolled. Run the announce flow inline; on success the cfg
 	// has the bearer + host_id and we drop into the normal run loop.
 	if !cfg.Enrolled() && *enrollServer != "" {
-		if err := doAnnounce(*enrollServer, cfg, version); err != nil {
+		if err := doAnnounce(*enrollServer, cfg, version.Version); err != nil {
 			return fmt.Errorf("announce: %w", err)
 		}
 	}
@@ -106,7 +101,7 @@ func run() error {
 		return fmt.Errorf("sysinfo: %w", err)
 	}
 	slog.Info("agent starting",
-		"version", version,
+		"version", version.Version,
 		"host_id", cfg.HostID,
 		"server", cfg.ServerURL,
 		"restic_version", snap.ResticVersion,
@@ -136,7 +131,7 @@ func run() error {
 		CertPinSHA256: cfg.CertPinSHA256,
 		HelloPayload: api.HelloPayload{
 			ProtocolVersion: snap.ProtocolVersion,
-			AgentVersion:    version,
+			AgentVersion:    version.Version,
 			ResticVersion:   snap.ResticVersion,
 			Hostname:        snap.Hostname,
 			OS:              snap.OS,
@@ -26,12 +26,7 @@ import (
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
 	"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
-)
-
-var (
-	version = "dev"
-	commit  = "none"
-	date    = "unknown"
+	"gitea.dcglab.co.uk/steve/restic-manager/internal/version"
 )

 func main() {
@@ -47,7 +42,7 @@ func run() error {
 	flag.Parse()

 	if *showVersion {
-		fmt.Printf("restic-manager-server %s (commit %s, built %s)\n", version, commit, date)
+		fmt.Printf("restic-manager-server %s (commit %s, built %s)\n", version.Version, version.Commit, version.Date)
 		return nil
 	}

@@ -123,7 +118,7 @@ func run() error {
 		NotificationHub: notifHub,
 		UpdateWatcher:   updateWatcher,
 		UI:              renderer,
-		Version:         version,
+		Version:         version.Version,
 		OIDC:            oidcClient,
 		Metrics:         metricsRegistry,
 	}
@@ -177,7 +172,7 @@ func run() error {

 	errCh := make(chan error, 1)
 	go func() {
-		slog.Info("server listening", "addr", cfg.Listen, "version", version)
+		slog.Info("server listening", "addr", cfg.Listen, "version", version.Version)
 		errCh <- srv.Start()
 	}()

@@ -26,7 +26,11 @@ ARG DATE=unknown
 ARG TARGETOS
 ARG TARGETARCH

-ENV LDFLAGS="-s -w -X main.version=${VERSION} -X main.commit=${COMMIT} -X main.date=${DATE}"
+ENV VERSION_PKG="gitea.dcglab.co.uk/steve/restic-manager/internal/version"
+ENV LDFLAGS="-s -w \
+    -X ${VERSION_PKG}.Version=${VERSION} \
+    -X ${VERSION_PKG}.Commit=${COMMIT} \
+    -X ${VERSION_PKG}.Date=${DATE}"

 # Server: built for the image's runtime arch.
 RUN GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
@@ -0,0 +1,223 @@
+# Always-On vs Intermittent host mode
+
+**Date:** 2026-06-15
+**Branch:** `feat-laptop-host-mode`
+**Status:** Design — awaiting review
+
+## Problem
+
+The server currently assumes every host should be present 24×7. When an
+agent stops heartbeating for 90s it is flipped to `offline`, and after 15
+minutes that raises a `warning` alert. This is correct for a server, but
+wrong for a host that legitimately comes and goes — a workstation or
+laptop that sleeps overnight, travels, or is shut down on weekends. Such
+a host generates noise alerts every time it is closed, and — more
+importantly — there is **no mechanism to catch up a backup it missed
+while it was away.**
+
+Two distinct facts make the catch-up gap real:
+
+- **Backup cron runs on the agent, locally.** The agent fires
+  `MsgScheduleFire`; the server only dispatches in response. If the host
+  is asleep, the agent process is suspended, so the cron tick never
+  fires and no `MsgScheduleFire` is ever sent.
+- Therefore the existing `pending_runs` retry queue **does not** cover
+  this case. `pending_runs` only gets a row when a schedule *fired* but
+  the agent was momentarily disconnected at dispatch time. A window
+  missed entirely during sleep never enqueues anything.
+
+## Goal
+
+Let an operator mark a host as **not** always-on. Such a host:
+
+1. Does **not** raise offline/agent-down alerts when it is not visible.
+2. Renders a distinct, calm "asleep" state in the UI instead of the
+   alarming red "offline".
+3. When it reconnects, after a short settle delay, the server checks
+   whether it missed a scheduled backup and — if so — triggers a
+   catch-up backup automatically.
+4. Still raises a *staleness* alert if it has genuinely gone too long
+   without any backup (a host left in a drawer). This is the only
+   alert covering an asleep host: while the agent is offline no job
+   runs, so there is no failure to detect — staleness is the safety
+   net for "no backups are happening at all."
+5. Leaves normal job-failure alerting untouched: a backup that
+   actually runs (scheduled or catch-up) and fails alerts as it does
+   today. Failures can only occur while the agent is online and
+   executing restic.
+
+Default behaviour is unchanged for the entire existing fleet.
+
+## Decisions (from brainstorming)
+
+- **Setting shape:** a single boolean `Always On` checkbox per host,
+  **default ON**. Checked = today's 24×7 server semantics. Unchecked =
+  intermittent host. Opt-in only; zero behaviour change for current and
+  future hosts unless explicitly toggled.
+- **Overdue trigger:** evaluated on **reconnect + behind schedule**
+  (not a continuous always-evaluating sweep).
+- **Alert policy for intermittent hosts:** suppress offline alerts;
+  keep a long-threshold **staleness** alert; keep job-failure alerts.
+- **Staleness threshold:** **7 days**, a global constant for v1. May
+  become per-host configurable later — out of scope now.
+- **Catch-up granularity:** **per enabled schedule.** A host with a
+  daily and a weekly schedule catches up only whichever is actually
+  behind.
+- **UI vocabulary:** not-visible intermittent host shows a grey
+  `asleep` state; detail line reads
+  `asleep · last seen <relTime> · will catch up on return`.
+- **Chip:** chip and checkbox highlight the **same** truth (24×7). Show
+  a chip for **Always-On** hosts; **no** chip for intermittent.
+
+## Architecture
+
+The change is deliberately a thin policy + presentation layer over the
+existing online/offline state machine. We do **not** add a new `status`
+enum value or alter heartbeat / `last_seen_at` tracking. "Asleep" is a
+reinterpretation of `status='offline' AND NOT always_on`.
+
+### 1. Data model
+
+- **Migration `0024_hosts_always_on.sql`:**
+  ```sql
+  ALTER TABLE hosts ADD COLUMN always_on INTEGER NOT NULL DEFAULT 1;
+  ```
+  Column-level ALTER per the repo's migration rules. Default `1` means
+  every existing row is Always-On — no behaviour change on upgrade.
+- `store/types.go`: add `AlwaysOn bool` to the `Host` struct; thread it
+  through every host SELECT scan and the host insert/update paths.
+- New store helper `SetHostAlwaysOn(ctx, hostID, bool) error`.
+
+### 2. Online/offline mechanics — UNCHANGED
+
+The 30s offline sweeper (`cmd/server/main.go:220`) still flips an unseen
+host to `status='offline'` and still calls
+`alertEngine.NotifyHostOffline(id)`. `TouchHost` / `MarkHostHello`
+behaviour is untouched. The intermittent distinction is applied
+*downstream* of this state, in the alert engine and the templates.
+
+### 3. Alert behaviour
+
+All changes key off `host.AlwaysOn`, which the engine already has access
+to via the host row it loads.
+
+- **Suppress offline alert** (`alert/engine.go` `handleHostOffline()`
+  and the 60s `tick()`): when `!host.AlwaysOn`, do not raise
+  `agent_offline`.
+- **Resolve-on-toggle:** when a host is switched server→intermittent and
+  has an open `agent_offline` alert, auto-resolve it. (Handled in the
+  mode-change handler, fanning through the normal resolve path so
+  channels/audit fire as usual.)
+- **Staleness alert** — wire up the currently-dead `KindStaleSchedule`
+  constant, **for intermittent hosts only.** On the 60s tick, for each
+  host where `!AlwaysOn` AND the host has ≥1 enabled schedule AND
+  `LastBackupAt != nil` AND `now - LastBackupAt > 7*24h`: raise a
+  `warning` `stale_schedule` alert (dedup key `""`, one per host).
+  Auto-resolves when `LastBackupAt` advances past the threshold (i.e.
+  any successful backup, including the catch-up). Always-On hosts'
+  `stale_schedule` remains a no-op (unchanged, out of scope).
+  - If `LastBackupAt == nil` (intermittent host enrolled but never
+    backed up): no staleness alert in v1 — there is no baseline to
+    measure against, and onboarding probe state (`repo_status`) already
+    covers "never successfully set up."
+- **Job-failure alerts:** untouched. A catch-up backup that runs and
+  fails alerts exactly like any other backup.
+
+### 4. Catch-up on reconnect
+
+A new small component — the **catch-up scheduler** — lives server-side
+alongside the existing ticks.
+
+- **Arm:** on agent hello (`server/ws/handler.go` hello path /
+  `onAgentHello`), if the host is `!AlwaysOn`, record
+  `catchupDueAt[hostID] = now + 60s` in an in-memory map. Re-arming on a
+  subsequent hello just overwrites the timestamp (debounce — rapid
+  flapping does not stack catch-ups). In-memory is acceptable: catch-up
+  is best-effort and a server restart simply re-arms on the next hello.
+- **Fire:** reuse the existing 30s server tick. For each due entry
+  (`catchupDueAt <= now`):
+  1. Re-verify the agent is still connected (`Hub.Connected(hostID)`).
+     If it bounced back offline within the settle window, drop the entry
+     (it will re-arm on the next hello).
+  2. Skip if a backup is already running or queued for the host
+     (`current_job_id` set, or a relevant `pending_runs` row exists) —
+     avoid double-firing alongside a normal dispatch or pending drain.
+  3. For each **enabled** schedule on the host, compute overdue:
+     ```
+     overdue := sched.Next(host.LastBackupAt) <= now
+     ```
+     using `robfig/cron/v3` (already a dependency) to parse
+     `Schedule.CronExpr`. `Next(lastBackup)` is the first fire strictly
+     after the last successful backup; if that moment has already
+     passed, the window was missed → overdue. (If `LastBackupAt` is nil,
+     treat as overdue so a never-backed-up intermittent host with a
+     schedule gets its first run on connect.)
+  4. For each overdue schedule, dispatch its source-groups via the
+     existing `dispatchBackupForGroupCore()`.
+  5. Clear the entry.
+
+Net latency is ~60–90s after wake (60s settle + up to one 30s tick).
+This path is independent of and complementary to the `pending_runs`
+drain, which continues to handle the fired-but-not-sent case.
+
+### 5. UI
+
+- **CSS:** new grey `dot-asleep` token in `web/styles/input.css`,
+  visually distinct from red `dot-offline`.
+- **`partials/host_row.html` and `partials/host_chrome.html`:** when
+  `!AlwaysOn && status=='offline'`, render the grey dot + label
+  `asleep`; the detail/last-seen line reads
+  `asleep · last seen <relTime> · will catch up on return`. All other
+  states unchanged.
+- **24×7 chip:** on the host detail header, render a small
+  `Always On` / `24×7` chip **only when `AlwaysOn` is true**. No chip
+  for intermittent hosts. (Chip and checkbox highlight the same fact.)
+- **Toggle:** an `Always On` checkbox (default checked) on the host edit
+  surface. Operator-band `POST` (mirrors existing host-edit handlers),
+  audited as `host.mode_updated`. On save, if switching to intermittent,
+  trigger the resolve-on-toggle path for any open `agent_offline` alert.
+
+## Error handling & edge cases
+
+- **Toggle server→intermittent while offline+alerting:** open
+  `agent_offline` alert auto-resolved on save.
+- **Toggle intermittent→server while asleep:** host resumes normal
+  offline/alert semantics; it will alert per the 15-minute floor once
+  the sweeper/tick next evaluates it.
+- **No enabled schedules:** no catch-up and no staleness alert — there
+  is no backup expectation to measure against.
+- **Catch-up vs in-flight work:** guarded by the running/queued check in
+  step 4.2 so catch-up never races a normal dispatch or pending drain.
+- **Agent flaps during settle window:** entry dropped if not connected
+  at fire time; re-armed on the next hello.
+
+## Testing
+
+- **Alert engine (unit):**
+  - offline alert suppressed when `!AlwaysOn`.
+  - staleness alert raised when intermittent + schedule + last backup >
+    7d; not raised for Always-On hosts; not raised when last backup is
+    recent; not raised when no enabled schedule.
+  - staleness alert auto-resolves after a backup advances `LastBackupAt`.
+  - server→intermittent toggle resolves an open `agent_offline` alert.
+- **Overdue computation (unit, table-driven):** `(cronExpr,
+  lastBackupAt, now) → overdue?` including nil-last-backup and
+  daily/weekly cases.
+- **Catch-up scheduler (unit):** fires only when still connected; skips
+  when a backup is running/queued; dispatches only overdue schedules.
+- **UI (render test):** asleep state + 24×7 chip render under the right
+  conditions; offline state for Always-On hosts unchanged.
+- `go vet ./...` and full `go test ./...` green before merge.
+
+## Out of scope
+
+- Per-host staleness thresholds (global 7d constant for v1).
+- Continuous (non-reconnect) overdue evaluation.
+- Agent-side catch-up cron — the server is the reliable arbiter.
+- Wiring `stale_schedule` for Always-On hosts (separate concern).
+
+## Task tracking
+
+Add an entry to `tasks.md` under "Next steps from testing" (or a new
+small section) once the plan is approved, per the repo's tasks.md
+source-of-truth rule.
@@ -0,0 +1,29 @@
+// catchup.go — server-side catch-up for intermittent (non-always-on)
+// hosts. When such a host reconnects we wait a short settle window,
+// then dispatch a backup for any schedule whose window elapsed while
+// the host was asleep. This is separate from pending_runs: a host that
+// was asleep never fired its local cron, so no pending row exists.
+package http
+
+import (
+	"time"
+)
+
+// scheduleOverdue reports whether a schedule's most recent expected
+// fire is newer than the host's last successful backup — i.e. a window
+// passed with no backup. A nil lastBackup means "never backed up" and
+// is always overdue (provided the cron parses). An unparseable cron is
+// treated as not-overdue so a bad expression can never trigger a
+// surprise dispatch. Uses the same cronParser the agent's scheduler
+// and schedule validation use, so interpretation is identical.
+func scheduleOverdue(cronExpr string, lastBackup *time.Time, now time.Time) bool {
+	sched, err := cronParser.Parse(cronExpr)
+	if err != nil {
+		return false
+	}
+	if lastBackup == nil {
+		return true
+	}
+	next := sched.Next(*lastBackup)
+	return !next.After(now)
+}
@@ -0,0 +1,41 @@
+package http
+
+import (
+	"testing"
+	"time"
+)
+
+func TestScheduleOverdue(t *testing.T) {
+	mustParse := func(s string) time.Time {
+		t.Helper()
+		v, err := time.Parse(time.RFC3339, s)
+		if err != nil {
+			t.Fatalf("parse %q: %v", s, err)
+		}
+		return v
+	}
+	daily := "0 2 * * *" // 02:00 every day
+
+	cases := []struct {
+		name       string
+		cron       string
+		lastBackup *time.Time
+		now        time.Time
+		want       bool
+	}{
+		{name: "never backed up is overdue", cron: daily, lastBackup: nil, now: mustParse("2026-06-15T09:00:00Z"), want: true},
+		{name: "missed last nights window", cron: daily, lastBackup: ptrTime(mustParse("2026-06-13T02:05:00Z")), now: mustParse("2026-06-15T09:00:00Z"), want: true},
+		{name: "backed up after the most recent window", cron: daily, lastBackup: ptrTime(mustParse("2026-06-15T02:05:00Z")), now: mustParse("2026-06-15T09:00:00Z"), want: false},
+		{name: "unparseable cron is never overdue", cron: "not a cron", lastBackup: nil, now: mustParse("2026-06-15T09:00:00Z"), want: false},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			got := scheduleOverdue(c.cron, c.lastBackup, c.now)
+			if got != c.want {
+				t.Fatalf("scheduleOverdue(%q, %v, %v) = %v, want %v", c.cron, c.lastBackup, c.now, got, c.want)
+			}
+		})
+	}
+}
+
+func ptrTime(t time.Time) *time.Time { return &t }
@@ -221,23 +221,40 @@ func formatBytes(n int64) template.HTML {
 // "in 5m"-style. Accepts *time.Time or time.Time so templates can
 // pass either without fighting Go's lack of an address-of operator.
 // Anything else returns "—".
-func formatRelTime(v any) string {
+//
+// The output is wrapped in a <time data-rel-ts="..."> element so a
+// small client-side ticker (see base.html) can refresh the label
+// without a full page reload — otherwise a long-open tab shows
+// timestamps frozen at render time.
+func formatRelTime(v any) template.HTML {
 	var t time.Time
 	switch x := v.(type) {
 	case time.Time:
 		t = x
 	case *time.Time:
 		if x == nil {
-			return "—"
+			return template.HTML("—")
 		}
 		t = *x
 	default:
-		return "—"
+		return template.HTML("—")
 	}
 	if t.IsZero() {
-		return "—"
+		return template.HTML("—")
 	}
-	d := time.Since(t)
+	label := relTimeLabel(time.Since(t))
+	return template.HTML(fmt.Sprintf(
+		`<time data-rel-ts="%s" title="%s">%s</time>`,
+		t.UTC().Format(time.RFC3339Nano),
+		t.UTC().Format("2006-01-02 15:04:05 UTC"),
+		label,
+	))
+}
+
+// relTimeLabel turns a duration-since-now into the short human label
+// used by formatRelTime (and mirrored verbatim by the JS ticker, so
+// keep the two in sync if you change the buckets).
+func relTimeLabel(d time.Duration) string {
 	suffix := "ago"
 	if d < 0 {
 		d = -d
@@ -0,0 +1,49 @@
+package ui
+
+import (
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestFormatRelTimeWrapsInTickableTimeElement(t *testing.T) {
+	// A long-open tab needs a stable anchor so the JS ticker can
+	// refresh the label — see base.html.
+	when := time.Now().Add(-3 * time.Hour)
+	got := string(formatRelTime(when))
+	if !strings.Contains(got, `<time data-rel-ts="`) {
+		t.Errorf("missing data-rel-ts anchor in %q", got)
+	}
+	if !strings.Contains(got, "3h ago</time>") {
+		t.Errorf("expected '3h ago' label, got %q", got)
+	}
+}
+
+func TestFormatRelTimeNilReturnsDash(t *testing.T) {
+	var p *time.Time
+	if string(formatRelTime(p)) != "—" {
+		t.Errorf("nil should render as em-dash, got %q", formatRelTime(p))
+	}
+	if string(formatRelTime(time.Time{})) != "—" {
+		t.Errorf("zero should render as em-dash")
+	}
+}
+
+func TestRelTimeLabelBuckets(t *testing.T) {
+	cases := []struct {
+		d    time.Duration
+		want string
+	}{
+		{30 * time.Second, "30s ago"},
+		{5 * time.Minute, "5m ago"},
+		{2 * time.Hour, "2h ago"},
+		{3 * 24 * time.Hour, "3d ago"},
+		{2 * 7 * 24 * time.Hour, "2w ago"},
+		{-5 * time.Minute, "5m from now"},
+	}
+	for _, c := range cases {
+		if got := relTimeLabel(c.d); got != c.want {
+			t.Errorf("relTimeLabel(%v) = %q, want %q", c.d, got, c.want)
+		}
+	}
+}
@@ -44,7 +44,7 @@ func (s *Store) LookupHostByAgentToken(ctx context.Context, tokenHash string) (*
 			repo_size_bytes, snapshot_count, open_alert_count,
 			applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
 			pre_hook_default, post_hook_default,
-			repo_status, repo_status_error
+			repo_status, repo_status_error, always_on
 		 FROM hosts WHERE agent_token_hash = ?`,
 		tokenHash)
 	return scanHost(row)
@@ -59,7 +59,7 @@ func (s *Store) GetHost(ctx context.Context, id string) (*Host, error) {
 			repo_size_bytes, snapshot_count, open_alert_count,
 			applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
 			pre_hook_default, post_hook_default,
-			repo_status, repo_status_error
+			repo_status, repo_status_error, always_on
 		 FROM hosts WHERE id = ?`, id)
 	return scanHost(row)
 }
@@ -227,7 +227,7 @@ func (s *Store) ListHosts(ctx context.Context) ([]Host, error) {
 			repo_size_bytes, snapshot_count, open_alert_count,
 			applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
 			pre_hook_default, post_hook_default,
-			repo_status, repo_status_error
+			repo_status, repo_status_error, always_on
 		 FROM hosts ORDER BY name`)
 	if err != nil {
 		return nil, fmt.Errorf("store: list hosts: %w", err)
@@ -267,6 +267,7 @@ func scanHostRow(s hostScanner) (*Host, error) {
 		tags                         string
 		bwUp, bwDown                 sql.NullInt64
 		preHook, postHook            sql.NullString
+		alwaysOn                     int
 	)
 	err := s.Scan(&h.ID, &h.Name, &h.OS, &h.Arch,
 		&h.AgentVersion, &h.ResticVersion, &h.ProtocolVersion,
@@ -275,7 +276,7 @@ func scanHostRow(s hostScanner) (*Host, error) {
 		&h.RepoSizeBytes, &h.SnapshotCount, &h.OpenAlertCount,
 		&h.AppliedScheduleVersion, &bwUp, &bwDown,
 		&preHook, &postHook,
-		&h.RepoStatus, &h.RepoStatusError)
+		&h.RepoStatus, &h.RepoStatusError, &alwaysOn)
 	if err != nil {
 		if errors.Is(err, sql.ErrNoRows) {
 			return nil, ErrNotFound
@@ -330,6 +331,7 @@ func scanHostRow(s hostScanner) (*Host, error) {
 	if postHook.Valid {
 		h.PostHookDefault = postHook.String
 	}
+	h.AlwaysOn = alwaysOn != 0
 	return &h, nil
 }

@@ -378,6 +380,25 @@ func (s *Store) SetHostTags(ctx context.Context, hostID string, tags []string) e
 	return nil
 }

+// SetHostAlwaysOn flips the host's always-on flag. true = 24x7 server
+// (default); false = intermittent host (laptop). See the
+// always-on-host-mode spec.
+func (s *Store) SetHostAlwaysOn(ctx context.Context, hostID string, alwaysOn bool) error {
+	v := 0
+	if alwaysOn {
+		v = 1
+	}
+	res, err := s.db.ExecContext(ctx,
+		`UPDATE hosts SET always_on = ? WHERE id = ?`, v, hostID)
+	if err != nil {
+		return fmt.Errorf("store: set host always_on: %w", err)
+	}
+	if n, _ := res.RowsAffected(); n == 0 {
+		return ErrNotFound
+	}
+	return nil
+}
+
 // DistinctHostTags returns the union of every tag in use across the
 // fleet, sorted. Powers the autocomplete on the host-tags editor and
 // the chip-row filter on the dashboard. Cheap at fleet sizes this
@@ -0,0 +1,55 @@
+package store
+
+import (
+	"context"
+	"testing"
+	"time"
+)
+
+func TestHostAlwaysOnDefaultAndToggle(t *testing.T) {
+	ctx := context.Background()
+	st := openTestStore(t)
+
+	h := Host{
+		ID: "h-always-on", Name: "lap", OS: "linux", Arch: "amd64",
+		ProtocolVersion: 1, EnrolledAt: time.Now().UTC(),
+	}
+	if err := st.CreateHost(ctx, h, "tok-hash", "pin"); err != nil {
+		t.Fatalf("create host: %v", err)
+	}
+	got, err := st.GetHost(ctx, h.ID)
+	if err != nil {
+		t.Fatalf("get host: %v", err)
+	}
+	if !got.AlwaysOn {
+		t.Fatalf("new host should default to always_on=true, got false")
+	}
+
+	if err := st.SetHostAlwaysOn(ctx, h.ID, false); err != nil {
+		t.Fatalf("set always_on: %v", err)
+	}
+	got, err = st.GetHost(ctx, h.ID)
+	if err != nil {
+		t.Fatalf("get host 2: %v", err)
+	}
+	if got.AlwaysOn {
+		t.Fatalf("expected always_on=false after toggle, got true")
+	}
+
+	hosts, err := st.ListHosts(ctx)
+	if err != nil {
+		t.Fatalf("list hosts: %v", err)
+	}
+	if len(hosts) != 1 || hosts[0].AlwaysOn {
+		t.Fatalf("ListHosts should report always_on=false, got %+v", hosts)
+	}
+
+	// Verify the agent hot-path (LookupHostByAgentToken) also reflects the toggle.
+	byToken, err := st.LookupHostByAgentToken(ctx, "tok-hash")
+	if err != nil {
+		t.Fatalf("lookup by agent token: %v", err)
+	}
+	if byToken.AlwaysOn {
+		t.Fatalf("LookupHostByAgentToken: expected always_on=false after toggle, got true")
+	}
+}
@@ -0,0 +1,6 @@
+-- 0024: distinguish always-on (24x7 server) hosts from intermittent
+-- hosts (laptops/workstations that legitimately sleep). Default 1 so
+-- every existing and future host keeps today's offline/alert
+-- semantics unless explicitly opted out. Column-level ALTER per the
+-- repo's migration rules (no table rebuild — hosts has inbound FKs).
+ALTER TABLE hosts ADD COLUMN always_on INTEGER NOT NULL DEFAULT 1;
@@ -99,6 +99,12 @@ type Host struct {
 	// agent-side message when RepoStatus == "init_failed".
 	RepoStatus      string
 	RepoStatusError string
+
+	// AlwaysOn is true for 24x7 server hosts (the default). When false
+	// the host is intermittent (laptop/workstation): offline alerts are
+	// suppressed, the UI shows an "asleep" state, and a missed backup is
+	// caught up ~1 min after reconnect. See the always-on-host-mode spec.
+	AlwaysOn bool
 }

 // Schedule is now intentionally slim: cron + which groups + enabled.
@@ -13,4 +13,8 @@ var (
 	// Commit is the short git SHA. Informational only; surfaced via
 	// /api/version but not used for any comparison.
 	Commit = ""
+
+	// Date is the RFC3339 build timestamp. Informational only; printed
+	// by `--version` but not used for any comparison.
+	Date = "unknown"
 )
@@ -310,7 +310,7 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days.
 > **Sweep verified (smoke env):** admin adds operator → setup link generated → curl-as-new-user fetches /setup (200, page shows username) → POSTs password → 303 to / + Set-Cookie → operator authenticated → 200 on /, 200 on /settings/account, **403 on /settings/users** (admin-only) → admin disables user → operator's next request is **401** + session row count drops to 0 → audit log shows `user.created` + `user.setup_completed` for the cycle. All 26 implementation tasks landed; full `go test ./...` green.
 - [x] **P4-05** (L) OIDC login (generic provider config, group → role mapping)

-> **As shipped (2026-05-05):** Authorization Code + PKCE (S256) against any OIDC IdP advertising standard discovery. Config is YAML+env (`oidc.issuer`, `oidc.client_id`, `oidc.client_secret`/`_file`, `oidc.role_claim` default `groups`, `oidc.role_mapping`, `oidc.display_name`, `oidc.redirect_url`); empty issuer → OIDC disabled, no routes mounted. Migration 0019 adds `users.auth_source`/`oidc_subject` (partial unique index on `oidc_subject`), `sessions.id_token`, and a small `oidc_state` table for state+verifier round-trip (cleaned up every alert tick, 5 min TTL). Login page renders **Sign in with `<display_name>`** above the local form when OIDC is enabled; the SSO button kicks off a 303 to the IdP with state + S256 code_challenge persisted server-side. Callback verifies ID token, fetches `/userinfo` to merge claims (Authelia / many IdPs only put `sub` in the ID token and surface `preferred_username`/`email`/`groups` from userinfo), maps the first matching group to a role; **no match → deny banner**, no row created, audit `user.oidc_login_blocked`. Username-collision with an existing local user → same deny path with `username_taken`. New user → JIT-provisioned with `auth_source='oidc'`, `oidc_subject=<sub>`, `password_hash=''`. Returning user → looked up by `oidc_subject` (stable when usernames change at the IdP), role + email refreshed on every login. Local password login is rejected for `auth_source='oidc'` users. Logout posts to `/logout` and, when the IdP advertised `end_session_endpoint`, follows up with RP-initiated logout (carries `id_token_hint` + `post_logout_redirect_uri=BaseURL`); when not advertised (Authelia in our smoke env), the local session is cleared and the browser lands on `/login`. Users list shows a small **oidc** chip beside enabled/disabled; the edit page disables username/email/role for OIDC users (server-side guard mirrors UI, returns 403). Force-logout, disable, and the last-admin guard from P4-04 all still apply. **Live Authelia sweep verified all four paths against `https://auth.example.invalid`:** rm-admin → admin role + JIT row + chip + readonly edit; rm-operator → operator JIT, 403 on `/settings/users`; rm-viewer → viewer JIT, 403 on `/hosts/new`; rm-other (group not in role_mapping) → no_role_match banner, no row created, audit logged. Returning rm-admin login resolved to the same row by sub. Screenshots in `_diag/p4-05-sweep/`. Out-of-scope and on Phase 6 candidate list: refresh tokens, back-channel logout, multiple providers, post-login PKCE for the cookie itself.
+> **As shipped (2026-05-05):** Authorization Code + PKCE (S256) against any OIDC IdP advertising standard discovery. Config is YAML+env (`oidc.issuer`, `oidc.client_id`, `oidc.client_secret`/`_file`, `oidc.role_claim` default `groups`, `oidc.role_mapping`, `oidc.display_name`, `oidc.redirect_url`); empty issuer → OIDC disabled, no routes mounted. Migration 0019 adds `users.auth_source`/`oidc_subject` (partial unique index on `oidc_subject`), `sessions.id_token`, and a small `oidc_state` table for state+verifier round-trip (cleaned up every alert tick, 5 min TTL). Login page renders **Sign in with `<display_name>`** above the local form when OIDC is enabled; the SSO button kicks off a 303 to the IdP with state + S256 code_challenge persisted server-side. Callback verifies ID token, fetches `/userinfo` to merge claims (Authelia / many IdPs only put `sub` in the ID token and surface `preferred_username`/`email`/`groups` from userinfo), maps the first matching group to a role; **no match → deny banner**, no row created, audit `user.oidc_login_blocked`. Username-collision with an existing local user → same deny path with `username_taken`. New user → JIT-provisioned with `auth_source='oidc'`, `oidc_subject=<sub>`, `password_hash=''`. Returning user → looked up by `oidc_subject` (stable when usernames change at the IdP), role + email refreshed on every login. Local password login is rejected for `auth_source='oidc'` users. Logout posts to `/logout` and, when the IdP advertised `end_session_endpoint`, follows up with RP-initiated logout (carries `id_token_hint` + `post_logout_redirect_uri=BaseURL`); when not advertised (Authelia in our smoke env), the local session is cleared and the browser lands on `/login`. Users list shows a small **oidc** chip beside enabled/disabled; the edit page disables username/email/role for OIDC users (server-side guard mirrors UI, returns 403). Force-logout, disable, and the last-admin guard from P4-04 all still apply. **Live Authelia sweep verified all four paths against local auth:** rm-admin → admin role + JIT row + chip + readonly edit; rm-operator → operator JIT, 403 on `/settings/users`; rm-viewer → viewer JIT, 403 on `/hosts/new`; rm-other (group not in role_mapping) → no_role_match banner, no row created, audit logged. Returning rm-admin login resolved to the same row by sub. Screenshots in `_diag/p4-05-sweep/`. Out-of-scope and on Phase 6 candidate list: refresh tokens, back-channel logout, multiple providers, post-login PKCE for the cookie itself.

 - [x] **P4-07** (S) Per-host tags + dashboard filtering by tag

@@ -498,6 +498,7 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days.
 - [x] **NS-03** Auto-init repo on first onboard, surface credential failures eagerly. ✅ Landed: migration 0020 adds `hosts.repo_status` (`unknown`/`ready`/`init_failed`) + `repo_status_error`; WS handler projects every init job's terminal state onto the host row (with idempotent "config file already exists" → ready); creds-save handlers (UI + JSON API) reset status to `unknown` and dispatch a fresh init when the agent is online; new `/hosts/{id}/repo/probe` retry endpoint and a status banner on the repo page. Remainder of original scope below. surface credential failures eagerly. Today the operator types repo URL + creds during Add-host and the credentials are pushed to the agent on connect, but no `restic init`/probe runs until the first scheduled job — so a typo in the password or a wrong URL goes undetected for hours/days, manifesting as a silent missed-backup. Wanted behaviour: when the host completes enrolment (or when an admin saves new repo creds), the server dispatches a one-shot probe job that runs `restic cat config` (cheap, repo-existence + creds-validity in one call). On `Is there already a config file? unable to open config file` → run `restic init`. On success → mark the host's repo as ready. On any other error (network, auth, fingerprint) → surface a panel-level error on the host detail page and audit the failure, leaving the host in an "init pending" state with a "Retry" button. Needs: a new `JobKind` (or piggyback on an existing one) for the probe, server-side state on the host row (`repo_status` enum: `unknown`/`ready`/`init_pending`/`init_failed`), UI panel that shows the state, and clear copy on the Add-host page so the operator knows the save isn't fire-and-forget.
 - [x] **NS-05** Drop redundant `actions/setup-go` from `.gitea/workflows/ci.yml`. ✅ Already gone — verified `.gitea/workflows/ci.yml` has zero `actions/setup-go@v5` invocations and no `GO_VERSION` env; the file's header comment now documents that the runner image (`gitea.dcglab.co.uk/steve/ci-runner-go`) is the single source of truth for the Go version. Closing as done; no further code change needed.
 - [x] **NS-06** Remove the permanently-disabled "Run backup now" button from `web/templates/partials/host_chrome.html`. ✅ Landed: dropped the disabled tombstone button from the host header action row; only "Edit credentials" + the ⋯ menu remain. Per-source-group Run-now on `/hosts/{id}/sources` is the only path now. No e2e change needed — `smoke.spec.ts` does not assert on host_chrome's button row.
+- [x] **NS-07** Relative timestamps go stale on long-open tabs. ✅ Landed: `formatRelTime` now wraps its label in `<time data-rel-ts=…>` and both layouts (`base.html`, `chromeless.html`) carry a small ticker that re-renders every 30s, so a page rendered an hour ago no longer keeps showing "2h ago" when the wall-clock truth is "3h ago". Covered by `funcs_test.go`. The bug: every relative label was computed once at server render and never updated client-side, so a job-detail page left open drifted further from reality the longer it sat.
 - [x] **NS-04** Dashboard parity with the alerts screen: live refresh, column sorting, filters. ✅ Landed: `/` now parses `q`/`status`/`repo_status`/`tag`/`sort`/`dir` query params (round-trip durable for bookmarks); table is wrapped in an `id="hosts-table"` htmx live-poll matching the alerts cadence (5s, gated on `document.visibilityState` and `localStorage.rm-dashboard-live`); filter row above the table with hostname free-text + status + repo_status selects + tag chips + clear; column headers (Host / OS · arch / Last backup / Repo size / Snapshots) are clickable links that toggle direction on the active column; pure-Go sort+filter pipeline covered by `dashboard_filter_test.go`. Original scope below. live refresh, column sorting, filters. The host list is currently a static render — operators have to reload to see new heartbeats / job state changes. Mirror the alerts pattern (`web/templates/pages/alerts.html` uses `hx-trigger="every 5s [document.visibilityState==='visible' && localStorage.getItem('rm-alerts-live')!=='off']"` plus a Live/Off toggle so background tabs and explicit-off don't burn server cycles). Add: server-side sort on every meaningful column (name, OS, last-backup time, last-backup status, agent online/offline, restic version, tags), and a small filter row above the table — at minimum free-text on hostname, status (online/offline/never-seen), and tag chips. Columns + filter state should round-trip through query string so a bookmarked / shared URL is durable. Re-use the `host_row` partial that already exists so the live-refresh swap is a clean OOB swap, not a full table re-render.

 ---
@@ -20,6 +20,37 @@

  {{template "toast" .}}

+  <script>
+  // Tick <time data-rel-ts> labels so long-open tabs don't freeze
+  // (e.g. a job page rendered an hour ago kept showing "2h ago" when
+  // the truth was "3h ago"). Buckets must match relTimeLabel in
+  // internal/server/ui/funcs.go.
+  (function () {
+    function label(ms) {
+      var suffix = 'ago';
+      if (ms < 0) { ms = -ms; suffix = 'from now'; }
+      var s = Math.floor(ms / 1000);
+      if (s < 60) return s + 's ' + suffix;
+      var m = Math.floor(s / 60);
+      if (m < 60) return m + 'm ' + suffix;
+      var h = Math.floor(m / 60);
+      if (h < 24) return h + 'h ' + suffix;
+      var d = Math.floor(h / 24);
+      if (d < 7) return d + 'd ' + suffix;
+      return Math.floor(d / 7) + 'w ' + suffix;
+    }
+    function tick() {
+      var now = Date.now();
+      document.querySelectorAll('time[data-rel-ts]').forEach(function (el) {
+        var t = Date.parse(el.getAttribute('data-rel-ts'));
+        if (!isNaN(t)) el.textContent = label(now - t);
+      });
+    }
+    tick();
+    setInterval(tick, 30000);
+  })();
+  </script>
+
 </body>
 </html>
 {{end}}
@@ -11,6 +11,34 @@
 </head>
 <body class="min-h-screen flex flex-col">
  {{block "content" .}}{{end}}
+  <script>
+  // See base.html for rationale; chromeless pages (e.g. pending host)
+  // also use the relTime helper, so they need the same ticker.
+  (function () {
+    function label(ms) {
+      var suffix = 'ago';
+      if (ms < 0) { ms = -ms; suffix = 'from now'; }
+      var s = Math.floor(ms / 1000);
+      if (s < 60) return s + 's ' + suffix;
+      var m = Math.floor(s / 60);
+      if (m < 60) return m + 'm ' + suffix;
+      var h = Math.floor(m / 60);
+      if (h < 24) return h + 'h ' + suffix;
+      var d = Math.floor(h / 24);
+      if (d < 7) return d + 'd ' + suffix;
+      return Math.floor(d / 7) + 'w ' + suffix;
+    }
+    function tick() {
+      var now = Date.now();
+      document.querySelectorAll('time[data-rel-ts]').forEach(function (el) {
+        var t = Date.parse(el.getAttribute('data-rel-ts'));
+        if (!isNaN(t)) el.textContent = label(now - t);
+      });
+    }
+    tick();
+    setInterval(tick, 30000);
+  })();
+  </script>
 </body>
 </html>
 {{end}}
Author	SHA1	Message	Date
steve	7aaafceab5	feat(catchup): scheduleOverdue helper for missed-window detection	2026-06-15 20:58:17 +01:00
steve	4c9641b6ed	fix(store): SetHostAlwaysOn returns ErrNotFound; test agent-token lookup path	2026-06-15 20:56:59 +01:00
steve	ff65d39f25	feat(store): add hosts.always_on flag (default on)	2026-06-15 20:53:13 +01:00
steve	9d16e3f7e3	docs(plan): always-on vs intermittent host mode implementation plan	2026-06-15 20:48:16 +01:00
steve	261b83ec26	docs(spec): clarify staleness vs job-failure alerting for asleep hosts	2026-06-15 20:42:00 +01:00
steve	0c3a0844e4	docs(spec): always-on vs intermittent host mode design	2026-06-15 20:37:45 +01:00
steve	2dae61f678	Merge pull request 'fix(ui): tick relative timestamps client-side so long-open tabs don't go stale' (#29 ) from fix-stale-reltime into main Reviewed-on: #29	2026-06-15 20:19:59 +01:00
steve	55cb8909c7	docs(tasks): record NS-07 client-side relTime ticker fix CI / Test (rest) (pull_request) Successful in 1m46s Details CI / Test (store) (pull_request) Successful in 2m4s Details CI / Lint (pull_request) Successful in 34s Details CI / Build (windows/amd64) (pull_request) Successful in 45s Details CI / Build (linux/amd64) (pull_request) Successful in 46s Details CI / Test (server-http) (pull_request) Failing after 3m32s Details CI / Build (linux/arm64) (pull_request) Successful in 47s Details e2e / Playwright vs docker-compose (pull_request) Successful in 2m43s Details	2026-06-15 20:19:32 +01:00
steve	06748f5582	Merge pull request 'ui(relTime): tick relative timestamps client-side' (#28 ) from fix-stale-reltime into main Release / Build + push image (push) Successful in 3m52s Details Reviewed-on: #28	2026-05-15 20:14:08 +00:00
steve	a4d705db6b	Merge branch 'main' into fix-stale-reltime CI / Test (store) (pull_request) Successful in 1m15s Details CI / Lint (pull_request) Successful in 19s Details CI / Build (windows/amd64) (pull_request) Successful in 25s Details CI / Test (server-http) (pull_request) Successful in 2m2s Details CI / Test (rest) (pull_request) Successful in 2m12s Details CI / Build (linux/amd64) (pull_request) Successful in 26s Details CI / Build (linux/arm64) (pull_request) Successful in 26s Details e2e / Playwright vs docker-compose (pull_request) Successful in 2m59s Details	2026-05-15 20:05:45 +00:00
steve	c6f73f790d	ci: pull ci-runner-go from zot registry	2026-05-15 19:51:02 +00:00
steve	068f08d96d	ci: migrate release workflow to zot registry	2026-05-15 19:50:50 +00:00
steve	28ef9750d3	ui(relTime): tick relative timestamps client-side so long-open tabs don't freeze CI / Test (rest) (pull_request) Successful in 9s Details CI / Test (store) (pull_request) Successful in 6s Details CI / Build (windows/amd64) (pull_request) Successful in 8s Details CI / Build (linux/amd64) (pull_request) Successful in 7s Details CI / Lint (pull_request) Successful in 19s Details CI / Build (linux/arm64) (pull_request) Successful in 7s Details e2e / Playwright vs docker-compose (pull_request) Successful in 1m26s Details CI / Test (server-http) (pull_request) Successful in 2m34s Details formatRelTime now wraps its label in <time data-rel-ts=...>, and both layouts include a small ticker that re-renders every 30s. Without this, a job-detail page rendered an hour ago kept showing '2h ago' when the wall-clock truth was '3h ago'.	2026-05-10 07:37:03 +01:00
steve	f4db0b17e8	Merge pull request 'fix(version): single-source internal/version, fix dockerfile ldflags' (#27 ) from fix-version-ldflags into main Release / Build + push image (push) Successful in 3m58s Details	2026-05-09 14:26:50 +00:00
steve	8afda7cd8c	fix(version): use internal/version as single source for build constants CI / Test (store) (pull_request) Successful in 5s Details CI / Test (rest) (pull_request) Successful in 9s Details CI / Build (windows/amd64) (pull_request) Successful in 7s Details CI / Test (server-http) (pull_request) Successful in 17s Details CI / Build (linux/amd64) (pull_request) Successful in 7s Details CI / Lint (pull_request) Successful in 19s Details CI / Build (linux/arm64) (pull_request) Successful in 14s Details e2e / Playwright vs docker-compose (pull_request) Successful in 1m27s Details The Dockerfile only set `-X main.version=...`, so docker-built binaries left `internal/version.Version` at its default "dev". The update logic (host_update.go:61, hosts.go:94, fleet_update.go:101 et al.) compares against `internal/version.Version`, so a v1.0.0 host always looked out-of-date to a v1.0.0 server, the chip never cleared, and pressing "update" re-downloaded the same bundled binary on a loop. Collapse the two version sources: drop the `var version/commit/date` locals in cmd/{server,agent}/main.go, route everything through internal/version (now also carrying Date), and have both the Dockerfile and the Makefile set the same single set of -X flags. Verified end-to-end: make build and docker build both emit binaries whose --version reflects the build VERSION.	2026-05-09 15:20:13 +01:00
steve	123e4f4915	scrub: remove docs/superpowers and ask.md; gitignore them These were never meant for the public repo. Wiped from history in the same change set via git-filter-repo.	2026-05-09 14:23:29 +01:00