Compare commits
32 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0fbacf9f98 | |||
| d8fd4110b0 | |||
| e17932d797 | |||
| 39030a3bbe | |||
| a30f824a3c | |||
| 239d55b65b | |||
| 74e5b75380 | |||
| 9371b7b777 | |||
| 10b2518323 | |||
| 6694dfdc3a | |||
| f88f2cc1f2 | |||
| 1a07fbb217 | |||
| 9e6524788f | |||
| 25c55e5e4d | |||
| e408de9610 | |||
| 5c4e0275d9 | |||
| 7aaafceab5 | |||
| 4c9641b6ed | |||
| ff65d39f25 | |||
| 9d16e3f7e3 | |||
| 261b83ec26 | |||
| 0c3a0844e4 | |||
| 2dae61f678 | |||
| 55cb8909c7 | |||
| 06748f5582 | |||
| a4d705db6b | |||
| c6f73f790d | |||
| 068f08d96d | |||
| 28ef9750d3 | |||
| f4db0b17e8 | |||
| 8afda7cd8c | |||
| 123e4f4915 |
+15
-3
@@ -70,7 +70,11 @@ jobs:
|
|||||||
# one runner. The third shard ("rest") covers everything else.
|
# one runner. The third shard ("rest") covers everything else.
|
||||||
name: Test (${{ matrix.name }})
|
name: Test (${{ matrix.name }})
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container: gitea.dcglab.co.uk/steve/ci-runner-go:2026-05-08
|
container:
|
||||||
|
image: docker.dcglab.co.uk/ci-runner-go:2026-05-15
|
||||||
|
credentials:
|
||||||
|
username: ${{ secrets.ZOT_USERNAME }}
|
||||||
|
password: ${{ secrets.ZOT_PASSWORD }}
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
@@ -105,7 +109,11 @@ jobs:
|
|||||||
lint:
|
lint:
|
||||||
name: Lint
|
name: Lint
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container: gitea.dcglab.co.uk/steve/ci-runner-go:2026-05-08
|
container:
|
||||||
|
image: docker.dcglab.co.uk/ci-runner-go:2026-05-15
|
||||||
|
credentials:
|
||||||
|
username: ${{ secrets.ZOT_USERNAME }}
|
||||||
|
password: ${{ secrets.ZOT_PASSWORD }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: golangci/golangci-lint-action@v7
|
- uses: golangci/golangci-lint-action@v7
|
||||||
@@ -121,7 +129,11 @@ jobs:
|
|||||||
build:
|
build:
|
||||||
name: Build (${{ matrix.goos }}/${{ matrix.goarch }})
|
name: Build (${{ matrix.goos }}/${{ matrix.goarch }})
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container: gitea.dcglab.co.uk/steve/ci-runner-go:2026-05-08
|
container:
|
||||||
|
image: docker.dcglab.co.uk/ci-runner-go:2026-05-15
|
||||||
|
credentials:
|
||||||
|
username: ${{ secrets.ZOT_USERNAME }}
|
||||||
|
password: ${{ secrets.ZOT_PASSWORD }}
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
|
|||||||
@@ -12,18 +12,12 @@
|
|||||||
# plus install.sh / install.ps1 / the systemd unit baked in under
|
# plus install.sh / install.ps1 / the systemd unit baked in under
|
||||||
# /opt/restic-manager/dist (the read-only fallback path the server
|
# /opt/restic-manager/dist (the read-only fallback path the server
|
||||||
# handlers use when <DataDir>/... is empty).
|
# handlers use when <DataDir>/... is empty).
|
||||||
# * Pushes to this Gitea instance's container registry under
|
# * Pushes to zot OCI registry (docker.dcglab.co.uk).
|
||||||
# <gitea-host>/<owner>/restic-manager.
|
|
||||||
#
|
#
|
||||||
# Tag fan-out
|
# Tag fan-out
|
||||||
# * tag push: :vX.Y.Z, :X.Y, :X
|
# * tag push: :vX.Y.Z, :X.Y, :X
|
||||||
# * tag push and X >= 1: also :latest
|
# * tag push and X >= 1: also :latest
|
||||||
# * workflow_dispatch: only :snapshot-<shortsha>; nothing else moves.
|
# * workflow_dispatch: only :snapshot-<shortsha>; nothing else moves.
|
||||||
#
|
|
||||||
# Why no goreleaser
|
|
||||||
# The architecture already routes agent distribution through the
|
|
||||||
# server's /agent/binary endpoint. The image is the only deliverable;
|
|
||||||
# binary archives would just be a second source of truth.
|
|
||||||
|
|
||||||
name: Release
|
name: Release
|
||||||
|
|
||||||
@@ -34,8 +28,8 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
REGISTRY: gitea.dcglab.co.uk
|
REGISTRY: docker.dcglab.co.uk
|
||||||
IMAGE_NAME: ${{ gitea.repository }}
|
IMAGE_NAME: restic-manager
|
||||||
|
|
||||||
# Force bash as the default shell — see ci.yml header.
|
# Force bash as the default shell — see ci.yml header.
|
||||||
defaults:
|
defaults:
|
||||||
@@ -46,19 +40,23 @@ jobs:
|
|||||||
image:
|
image:
|
||||||
name: Build + push image
|
name: Build + push image
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container: gitea.dcglab.co.uk/steve/ci-runner-go:2026-05-08
|
container:
|
||||||
|
image: docker.dcglab.co.uk/ci-runner-go:2026-05-15
|
||||||
|
credentials:
|
||||||
|
username: ${{ secrets.ZOT_USERNAME }}
|
||||||
|
password: ${{ secrets.ZOT_PASSWORD }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- uses: docker/setup-qemu-action@v3
|
- uses: docker/setup-qemu-action@v3
|
||||||
- uses: docker/setup-buildx-action@v3
|
- uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
- name: Log in to Gitea registry
|
- name: Log in to zot registry
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
registry: ${{ env.REGISTRY }}
|
registry: ${{ env.REGISTRY }}
|
||||||
username: ${{ gitea.actor }}
|
username: ${{ secrets.ZOT_USERNAME }}
|
||||||
password: ${{ secrets.DEV_TOKEN }}
|
password: ${{ secrets.ZOT_PASSWORD }}
|
||||||
|
|
||||||
- name: Compute tags + version
|
- name: Compute tags + version
|
||||||
id: meta
|
id: meta
|
||||||
|
|||||||
@@ -45,3 +45,10 @@ coverage.html
|
|||||||
# tooling already skips paths starting with _, but ignore explicitly
|
# tooling already skips paths starting with _, but ignore explicitly
|
||||||
# so an accidental `git add cmd/.` can't sneak them into a release.
|
# so an accidental `git add cmd/.` can't sneak them into a release.
|
||||||
/cmd/_*/
|
/cmd/_*/
|
||||||
|
|
||||||
|
# Local-only planning / scratch — never committed.
|
||||||
|
/ask.md
|
||||||
|
/docs/superpowers/
|
||||||
|
|
||||||
|
# Claude Code agent worktrees (transient, harness-created).
|
||||||
|
/.claude/worktrees/
|
||||||
|
|||||||
@@ -6,6 +6,44 @@ and the project follows [Semantic Versioning](https://semver.org/).
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [1.1.0] - 2026-06-15
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **Always-On vs intermittent host mode.** A host can now be marked as
|
||||||
|
not always-on — for laptops/workstations that legitimately sleep,
|
||||||
|
travel, or shut down outside hours. An intermittent host no longer
|
||||||
|
raises "agent offline" alerts when it disappears; instead it shows a
|
||||||
|
calm "asleep" state in the UI ("asleep · last seen … · will catch up
|
||||||
|
on return") and is covered by a longer-horizon staleness alert (raised
|
||||||
|
only when it has an enabled schedule and no successful backup in 7
|
||||||
|
days). When such a host reconnects, the server waits a short settle
|
||||||
|
window and then automatically dispatches any scheduled backup whose
|
||||||
|
window elapsed while it was asleep. Toggle per host from the host
|
||||||
|
detail page (operator-band, audited as `host.mode_updated`). New and
|
||||||
|
existing hosts default to always-on, so current fleets are unaffected.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- Host-detail header redesign: tags and presence are grouped into
|
||||||
|
labelled, boxed pills with click-to-edit; presence shows a `24x7` /
|
||||||
|
`Free` chip; the agent "out of date" indicator is simplified (the full
|
||||||
|
version detail remains in the Agent-update panel and on hover).
|
||||||
|
- Relative timestamps ("2h ago") now tick client-side, so a tab left
|
||||||
|
open no longer shows a stale value as wall-clock time moves on.
|
||||||
|
- Release and CI container images are now published to and pulled from
|
||||||
|
the zot OCI registry (`docker.dcglab.co.uk`).
|
||||||
|
|
||||||
|
## [1.0.1] - 2026-05-09
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Build version is now single-sourced from `internal/version`, and the
|
||||||
|
server Dockerfile's ldflags were corrected so docker-built binaries
|
||||||
|
report their real version. Previously `internal/version.Version` stayed
|
||||||
|
at its "dev" default in docker images, which made every host look
|
||||||
|
permanently out-of-date to the update logic.
|
||||||
|
|
||||||
## [1.0.0] - 2026-05-09
|
## [1.0.0] - 2026-05-09
|
||||||
|
|
||||||
First tagged release. Six development phases brought the project from
|
First tagged release. Six development phases brought the project from
|
||||||
|
|||||||
@@ -8,8 +8,10 @@ VERSION ?= $(shell git describe --tags --always --dirty 2>/dev/null || ec
|
|||||||
COMMIT ?= $(shell git rev-parse HEAD 2>/dev/null || echo none)
|
COMMIT ?= $(shell git rev-parse HEAD 2>/dev/null || echo none)
|
||||||
DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
|
DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
|
||||||
VERSION_PKG := gitea.dcglab.co.uk/steve/restic-manager/internal/version
|
VERSION_PKG := gitea.dcglab.co.uk/steve/restic-manager/internal/version
|
||||||
LDFLAGS := -s -w -X main.version=$(VERSION) -X main.commit=$(COMMIT) -X main.date=$(DATE) \
|
LDFLAGS := -s -w \
|
||||||
-X $(VERSION_PKG).Version=$(VERSION) -X $(VERSION_PKG).Commit=$(COMMIT)
|
-X $(VERSION_PKG).Version=$(VERSION) \
|
||||||
|
-X $(VERSION_PKG).Commit=$(COMMIT) \
|
||||||
|
-X $(VERSION_PKG).Date=$(DATE)
|
||||||
GOFLAGS := -trimpath
|
GOFLAGS := -trimpath
|
||||||
DOCKER_IMAGE ?= gitea.dcglab.co.uk/steve/restic-manager
|
DOCKER_IMAGE ?= gitea.dcglab.co.uk/steve/restic-manager
|
||||||
DOCKER_TAG ?= dev
|
DOCKER_TAG ?= dev
|
||||||
|
|||||||
+6
-11
@@ -22,12 +22,7 @@ import (
|
|||||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/wsclient"
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/wsclient"
|
||||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
|
||||||
)
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/version"
|
||||||
|
|
||||||
var (
|
|
||||||
version = "dev"
|
|
||||||
commit = "none"
|
|
||||||
date = "unknown"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -66,7 +61,7 @@ func run() error {
|
|||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if *showVersion {
|
if *showVersion {
|
||||||
fmt.Printf("restic-manager-agent %s (commit %s, built %s)\n", version, commit, date)
|
fmt.Printf("restic-manager-agent %s (commit %s, built %s)\n", version.Version, version.Commit, version.Date)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,14 +77,14 @@ func run() error {
|
|||||||
if *enrollServer == "" {
|
if *enrollServer == "" {
|
||||||
return errors.New("enrollment: -enroll-server is required with -enroll-token")
|
return errors.New("enrollment: -enroll-server is required with -enroll-token")
|
||||||
}
|
}
|
||||||
return doEnroll(*enrollServer, *enrollToken, cfg, version)
|
return doEnroll(*enrollServer, *enrollToken, cfg, version.Version)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Announce-and-approve: -enroll-server set, no token, agent not
|
// Announce-and-approve: -enroll-server set, no token, agent not
|
||||||
// yet enrolled. Run the announce flow inline; on success the cfg
|
// yet enrolled. Run the announce flow inline; on success the cfg
|
||||||
// has the bearer + host_id and we drop into the normal run loop.
|
// has the bearer + host_id and we drop into the normal run loop.
|
||||||
if !cfg.Enrolled() && *enrollServer != "" {
|
if !cfg.Enrolled() && *enrollServer != "" {
|
||||||
if err := doAnnounce(*enrollServer, cfg, version); err != nil {
|
if err := doAnnounce(*enrollServer, cfg, version.Version); err != nil {
|
||||||
return fmt.Errorf("announce: %w", err)
|
return fmt.Errorf("announce: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -106,7 +101,7 @@ func run() error {
|
|||||||
return fmt.Errorf("sysinfo: %w", err)
|
return fmt.Errorf("sysinfo: %w", err)
|
||||||
}
|
}
|
||||||
slog.Info("agent starting",
|
slog.Info("agent starting",
|
||||||
"version", version,
|
"version", version.Version,
|
||||||
"host_id", cfg.HostID,
|
"host_id", cfg.HostID,
|
||||||
"server", cfg.ServerURL,
|
"server", cfg.ServerURL,
|
||||||
"restic_version", snap.ResticVersion,
|
"restic_version", snap.ResticVersion,
|
||||||
@@ -136,7 +131,7 @@ func run() error {
|
|||||||
CertPinSHA256: cfg.CertPinSHA256,
|
CertPinSHA256: cfg.CertPinSHA256,
|
||||||
HelloPayload: api.HelloPayload{
|
HelloPayload: api.HelloPayload{
|
||||||
ProtocolVersion: snap.ProtocolVersion,
|
ProtocolVersion: snap.ProtocolVersion,
|
||||||
AgentVersion: version,
|
AgentVersion: version.Version,
|
||||||
ResticVersion: snap.ResticVersion,
|
ResticVersion: snap.ResticVersion,
|
||||||
Hostname: snap.Hostname,
|
Hostname: snap.Hostname,
|
||||||
OS: snap.OS,
|
OS: snap.OS,
|
||||||
|
|||||||
+5
-9
@@ -26,12 +26,7 @@ import (
|
|||||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
|
||||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
|
||||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||||
)
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/version"
|
||||||
|
|
||||||
var (
|
|
||||||
version = "dev"
|
|
||||||
commit = "none"
|
|
||||||
date = "unknown"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -47,7 +42,7 @@ func run() error {
|
|||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if *showVersion {
|
if *showVersion {
|
||||||
fmt.Printf("restic-manager-server %s (commit %s, built %s)\n", version, commit, date)
|
fmt.Printf("restic-manager-server %s (commit %s, built %s)\n", version.Version, version.Commit, version.Date)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -123,7 +118,7 @@ func run() error {
|
|||||||
NotificationHub: notifHub,
|
NotificationHub: notifHub,
|
||||||
UpdateWatcher: updateWatcher,
|
UpdateWatcher: updateWatcher,
|
||||||
UI: renderer,
|
UI: renderer,
|
||||||
Version: version,
|
Version: version.Version,
|
||||||
OIDC: oidcClient,
|
OIDC: oidcClient,
|
||||||
Metrics: metricsRegistry,
|
Metrics: metricsRegistry,
|
||||||
}
|
}
|
||||||
@@ -177,7 +172,7 @@ func run() error {
|
|||||||
|
|
||||||
errCh := make(chan error, 1)
|
errCh := make(chan error, 1)
|
||||||
go func() {
|
go func() {
|
||||||
slog.Info("server listening", "addr", cfg.Listen, "version", version)
|
slog.Info("server listening", "addr", cfg.Listen, "version", version.Version)
|
||||||
errCh <- srv.Start()
|
errCh <- srv.Start()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@@ -232,6 +227,7 @@ func run() error {
|
|||||||
}
|
}
|
||||||
case <-pendingDrainTick.C:
|
case <-pendingDrainTick.C:
|
||||||
srv.DrainAllDue(ctx)
|
srv.DrainAllDue(ctx)
|
||||||
|
srv.RunCatchupsDue(ctx)
|
||||||
case <-pendingExpiryTick.C:
|
case <-pendingExpiryTick.C:
|
||||||
if n, err := st.DeleteExpiredPendingHosts(ctx, time.Now().UTC()); err == nil && n > 0 {
|
if n, err := st.DeleteExpiredPendingHosts(ctx, time.Now().UTC()); err == nil && n > 0 {
|
||||||
slog.Info("expired pending hosts swept", "n", n)
|
slog.Info("expired pending hosts swept", "n", n)
|
||||||
|
|||||||
@@ -26,7 +26,11 @@ ARG DATE=unknown
|
|||||||
ARG TARGETOS
|
ARG TARGETOS
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
|
|
||||||
ENV LDFLAGS="-s -w -X main.version=${VERSION} -X main.commit=${COMMIT} -X main.date=${DATE}"
|
ENV VERSION_PKG="gitea.dcglab.co.uk/steve/restic-manager/internal/version"
|
||||||
|
ENV LDFLAGS="-s -w \
|
||||||
|
-X ${VERSION_PKG}.Version=${VERSION} \
|
||||||
|
-X ${VERSION_PKG}.Commit=${COMMIT} \
|
||||||
|
-X ${VERSION_PKG}.Date=${DATE}"
|
||||||
|
|
||||||
# Server: built for the image's runtime arch.
|
# Server: built for the image's runtime arch.
|
||||||
RUN GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
|
RUN GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,223 @@
|
|||||||
|
# Always-On vs Intermittent host mode
|
||||||
|
|
||||||
|
**Date:** 2026-06-15
|
||||||
|
**Branch:** `feat-laptop-host-mode`
|
||||||
|
**Status:** Design — awaiting review
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
The server currently assumes every host should be present 24×7. When an
|
||||||
|
agent stops heartbeating for 90s it is flipped to `offline`, and after 15
|
||||||
|
minutes that raises a `warning` alert. This is correct for a server, but
|
||||||
|
wrong for a host that legitimately comes and goes — a workstation or
|
||||||
|
laptop that sleeps overnight, travels, or is shut down on weekends. Such
|
||||||
|
a host generates noise alerts every time it is closed, and — more
|
||||||
|
importantly — there is **no mechanism to catch up a backup it missed
|
||||||
|
while it was away.**
|
||||||
|
|
||||||
|
Two distinct facts make the catch-up gap real:
|
||||||
|
|
||||||
|
- **Backup cron runs on the agent, locally.** The agent fires
|
||||||
|
`MsgScheduleFire`; the server only dispatches in response. If the host
|
||||||
|
is asleep, the agent process is suspended, so the cron tick never
|
||||||
|
fires and no `MsgScheduleFire` is ever sent.
|
||||||
|
- Therefore the existing `pending_runs` retry queue **does not** cover
|
||||||
|
this case. `pending_runs` only gets a row when a schedule *fired* but
|
||||||
|
the agent was momentarily disconnected at dispatch time. A window
|
||||||
|
missed entirely during sleep never enqueues anything.
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Let an operator mark a host as **not** always-on. Such a host:
|
||||||
|
|
||||||
|
1. Does **not** raise offline/agent-down alerts when it is not visible.
|
||||||
|
2. Renders a distinct, calm "asleep" state in the UI instead of the
|
||||||
|
alarming red "offline".
|
||||||
|
3. When it reconnects, after a short settle delay, the server checks
|
||||||
|
whether it missed a scheduled backup and — if so — triggers a
|
||||||
|
catch-up backup automatically.
|
||||||
|
4. Still raises a *staleness* alert if it has genuinely gone too long
|
||||||
|
without any backup (a host left in a drawer). This is the only
|
||||||
|
alert covering an asleep host: while the agent is offline no job
|
||||||
|
runs, so there is no failure to detect — staleness is the safety
|
||||||
|
net for "no backups are happening at all."
|
||||||
|
5. Leaves normal job-failure alerting untouched: a backup that
|
||||||
|
actually runs (scheduled or catch-up) and fails alerts as it does
|
||||||
|
today. Failures can only occur while the agent is online and
|
||||||
|
executing restic.
|
||||||
|
|
||||||
|
Default behaviour is unchanged for the entire existing fleet.
|
||||||
|
|
||||||
|
## Decisions (from brainstorming)
|
||||||
|
|
||||||
|
- **Setting shape:** a single boolean `Always On` checkbox per host,
|
||||||
|
**default ON**. Checked = today's 24×7 server semantics. Unchecked =
|
||||||
|
intermittent host. Opt-in only; zero behaviour change for current and
|
||||||
|
future hosts unless explicitly toggled.
|
||||||
|
- **Overdue trigger:** evaluated on **reconnect + behind schedule**
|
||||||
|
(not a continuous always-evaluating sweep).
|
||||||
|
- **Alert policy for intermittent hosts:** suppress offline alerts;
|
||||||
|
keep a long-threshold **staleness** alert; keep job-failure alerts.
|
||||||
|
- **Staleness threshold:** **7 days**, a global constant for v1. May
|
||||||
|
become per-host configurable later — out of scope now.
|
||||||
|
- **Catch-up granularity:** **per enabled schedule.** A host with a
|
||||||
|
daily and a weekly schedule catches up only whichever is actually
|
||||||
|
behind.
|
||||||
|
- **UI vocabulary:** not-visible intermittent host shows a grey
|
||||||
|
`asleep` state; detail line reads
|
||||||
|
`asleep · last seen <relTime> · will catch up on return`.
|
||||||
|
- **Chip:** chip and checkbox highlight the **same** truth (24×7). Show
|
||||||
|
a chip for **Always-On** hosts; **no** chip for intermittent.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
The change is deliberately a thin policy + presentation layer over the
|
||||||
|
existing online/offline state machine. We do **not** add a new `status`
|
||||||
|
enum value or alter heartbeat / `last_seen_at` tracking. "Asleep" is a
|
||||||
|
reinterpretation of `status='offline' AND NOT always_on`.
|
||||||
|
|
||||||
|
### 1. Data model
|
||||||
|
|
||||||
|
- **Migration `0024_hosts_always_on.sql`:**
|
||||||
|
```sql
|
||||||
|
ALTER TABLE hosts ADD COLUMN always_on INTEGER NOT NULL DEFAULT 1;
|
||||||
|
```
|
||||||
|
Column-level ALTER per the repo's migration rules. Default `1` means
|
||||||
|
every existing row is Always-On — no behaviour change on upgrade.
|
||||||
|
- `store/types.go`: add `AlwaysOn bool` to the `Host` struct; thread it
|
||||||
|
through every host SELECT scan and the host insert/update paths.
|
||||||
|
- New store helper `SetHostAlwaysOn(ctx, hostID, bool) error`.
|
||||||
|
|
||||||
|
### 2. Online/offline mechanics — UNCHANGED
|
||||||
|
|
||||||
|
The 30s offline sweeper (`cmd/server/main.go:220`) still flips an unseen
|
||||||
|
host to `status='offline'` and still calls
|
||||||
|
`alertEngine.NotifyHostOffline(id)`. `TouchHost` / `MarkHostHello`
|
||||||
|
behaviour is untouched. The intermittent distinction is applied
|
||||||
|
*downstream* of this state, in the alert engine and the templates.
|
||||||
|
|
||||||
|
### 3. Alert behaviour
|
||||||
|
|
||||||
|
All changes key off `host.AlwaysOn`, which the engine already has access
|
||||||
|
to via the host row it loads.
|
||||||
|
|
||||||
|
- **Suppress offline alert** (`alert/engine.go` `handleHostOffline()`
|
||||||
|
and the 60s `tick()`): when `!host.AlwaysOn`, do not raise
|
||||||
|
`agent_offline`.
|
||||||
|
- **Resolve-on-toggle:** when a host is switched server→intermittent and
|
||||||
|
has an open `agent_offline` alert, auto-resolve it. (Handled in the
|
||||||
|
mode-change handler, fanning through the normal resolve path so
|
||||||
|
channels/audit fire as usual.)
|
||||||
|
- **Staleness alert** — wire up the currently-dead `KindStaleSchedule`
|
||||||
|
constant, **for intermittent hosts only.** On the 60s tick, for each
|
||||||
|
host where `!AlwaysOn` AND the host has ≥1 enabled schedule AND
|
||||||
|
`LastBackupAt != nil` AND `now - LastBackupAt > 7*24h`: raise a
|
||||||
|
`warning` `stale_schedule` alert (dedup key `""`, one per host).
|
||||||
|
Auto-resolves when `LastBackupAt` advances past the threshold (i.e.
|
||||||
|
any successful backup, including the catch-up). Always-On hosts'
|
||||||
|
`stale_schedule` remains a no-op (unchanged, out of scope).
|
||||||
|
- If `LastBackupAt == nil` (intermittent host enrolled but never
|
||||||
|
backed up): no staleness alert in v1 — there is no baseline to
|
||||||
|
measure against, and onboarding probe state (`repo_status`) already
|
||||||
|
covers "never successfully set up."
|
||||||
|
- **Job-failure alerts:** untouched. A catch-up backup that runs and
|
||||||
|
fails alerts exactly like any other backup.
|
||||||
|
|
||||||
|
### 4. Catch-up on reconnect
|
||||||
|
|
||||||
|
A new small component — the **catch-up scheduler** — lives server-side
|
||||||
|
alongside the existing ticks.
|
||||||
|
|
||||||
|
- **Arm:** on agent hello (`server/ws/handler.go` hello path /
|
||||||
|
`onAgentHello`), if the host is `!AlwaysOn`, record
|
||||||
|
`catchupDueAt[hostID] = now + 60s` in an in-memory map. Re-arming on a
|
||||||
|
subsequent hello just overwrites the timestamp (debounce — rapid
|
||||||
|
flapping does not stack catch-ups). In-memory is acceptable: catch-up
|
||||||
|
is best-effort and a server restart simply re-arms on the next hello.
|
||||||
|
- **Fire:** reuse the existing 30s server tick. For each due entry
|
||||||
|
(`catchupDueAt <= now`):
|
||||||
|
1. Re-verify the agent is still connected (`Hub.Connected(hostID)`).
|
||||||
|
If it bounced back offline within the settle window, drop the entry
|
||||||
|
(it will re-arm on the next hello).
|
||||||
|
2. Skip if a backup is already running or queued for the host
|
||||||
|
(`current_job_id` set, or a relevant `pending_runs` row exists) —
|
||||||
|
avoid double-firing alongside a normal dispatch or pending drain.
|
||||||
|
3. For each **enabled** schedule on the host, compute overdue:
|
||||||
|
```
|
||||||
|
overdue := sched.Next(host.LastBackupAt) <= now
|
||||||
|
```
|
||||||
|
using `robfig/cron/v3` (already a dependency) to parse
|
||||||
|
`Schedule.CronExpr`. `Next(lastBackup)` is the first fire strictly
|
||||||
|
after the last successful backup; if that moment has already
|
||||||
|
passed, the window was missed → overdue. (If `LastBackupAt` is nil,
|
||||||
|
treat as overdue so a never-backed-up intermittent host with a
|
||||||
|
schedule gets its first run on connect.)
|
||||||
|
4. For each overdue schedule, dispatch its source-groups via the
|
||||||
|
existing `dispatchBackupForGroupCore()`.
|
||||||
|
5. Clear the entry.
|
||||||
|
|
||||||
|
Net latency is ~60–90s after wake (60s settle + up to one 30s tick).
|
||||||
|
This path is independent of and complementary to the `pending_runs`
|
||||||
|
drain, which continues to handle the fired-but-not-sent case.
|
||||||
|
|
||||||
|
### 5. UI
|
||||||
|
|
||||||
|
- **CSS:** new grey `dot-asleep` token in `web/styles/input.css`,
|
||||||
|
visually distinct from red `dot-offline`.
|
||||||
|
- **`partials/host_row.html` and `partials/host_chrome.html`:** when
|
||||||
|
`!AlwaysOn && status=='offline'`, render the grey dot + label
|
||||||
|
`asleep`; the detail/last-seen line reads
|
||||||
|
`asleep · last seen <relTime> · will catch up on return`. All other
|
||||||
|
states unchanged.
|
||||||
|
- **24×7 chip:** on the host detail header, render a small
|
||||||
|
`Always On` / `24×7` chip **only when `AlwaysOn` is true**. No chip
|
||||||
|
for intermittent hosts. (Chip and checkbox highlight the same fact.)
|
||||||
|
- **Toggle:** an `Always On` checkbox (default checked) on the host edit
|
||||||
|
surface. Operator-band `POST` (mirrors existing host-edit handlers),
|
||||||
|
audited as `host.mode_updated`. On save, if switching to intermittent,
|
||||||
|
trigger the resolve-on-toggle path for any open `agent_offline` alert.
|
||||||
|
|
||||||
|
## Error handling & edge cases
|
||||||
|
|
||||||
|
- **Toggle server→intermittent while offline+alerting:** open
|
||||||
|
`agent_offline` alert auto-resolved on save.
|
||||||
|
- **Toggle intermittent→server while asleep:** host resumes normal
|
||||||
|
offline/alert semantics; it will alert per the 15-minute floor once
|
||||||
|
the sweeper/tick next evaluates it.
|
||||||
|
- **No enabled schedules:** no catch-up and no staleness alert — there
|
||||||
|
is no backup expectation to measure against.
|
||||||
|
- **Catch-up vs in-flight work:** guarded by the running/queued check in
|
||||||
|
step 4.2 so catch-up never races a normal dispatch or pending drain.
|
||||||
|
- **Agent flaps during settle window:** entry dropped if not connected
|
||||||
|
at fire time; re-armed on the next hello.
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
- **Alert engine (unit):**
|
||||||
|
- offline alert suppressed when `!AlwaysOn`.
|
||||||
|
- staleness alert raised when intermittent + schedule + last backup >
|
||||||
|
7d; not raised for Always-On hosts; not raised when last backup is
|
||||||
|
recent; not raised when no enabled schedule.
|
||||||
|
- staleness alert auto-resolves after a backup advances `LastBackupAt`.
|
||||||
|
- server→intermittent toggle resolves an open `agent_offline` alert.
|
||||||
|
- **Overdue computation (unit, table-driven):** `(cronExpr,
|
||||||
|
lastBackupAt, now) → overdue?` including nil-last-backup and
|
||||||
|
daily/weekly cases.
|
||||||
|
- **Catch-up scheduler (unit):** fires only when still connected; skips
|
||||||
|
when a backup is running/queued; dispatches only overdue schedules.
|
||||||
|
- **UI (render test):** asleep state + 24×7 chip render under the right
|
||||||
|
conditions; offline state for Always-On hosts unchanged.
|
||||||
|
- `go vet ./...` and full `go test ./...` green before merge.
|
||||||
|
|
||||||
|
## Out of scope
|
||||||
|
|
||||||
|
- Per-host staleness thresholds (global 7d constant for v1).
|
||||||
|
- Continuous (non-reconnect) overdue evaluation.
|
||||||
|
- Agent-side catch-up cron — the server is the reliable arbiter.
|
||||||
|
- Wiring `stale_schedule` for Always-On hosts (separate concern).
|
||||||
|
|
||||||
|
## Task tracking
|
||||||
|
|
||||||
|
Add an entry to `tasks.md` under "Next steps from testing" (or a new
|
||||||
|
small section) once the plan is approved, per the repo's tasks.md
|
||||||
|
source-of-truth rule.
|
||||||
@@ -22,6 +22,12 @@ import (
|
|||||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// staleBackupThreshold is how long an intermittent host may go without
|
||||||
|
// a successful backup before we raise a stale_schedule alert. Global
|
||||||
|
// constant for v1 (may become per-host later). Only intermittent hosts
|
||||||
|
// are evaluated — always-on hosts' stale_schedule stays a no-op.
|
||||||
|
const staleBackupThreshold = 7 * 24 * time.Hour
|
||||||
|
|
||||||
// JobFinishedEvent carries everything the engine needs to evaluate
|
// JobFinishedEvent carries everything the engine needs to evaluate
|
||||||
// the failed-X rules. Pushed via Engine.NotifyJobFinished from the
|
// the failed-X rules. Pushed via Engine.NotifyJobFinished from the
|
||||||
// MarkJobFinished site.
|
// MarkJobFinished site.
|
||||||
@@ -149,6 +155,10 @@ func (e *Engine) handleJobFinished(ctx context.Context, ev JobFinishedEvent) {
|
|||||||
fmt.Sprintf("%s job %s failed", ev.Kind, ev.JobID), ev.When)
|
fmt.Sprintf("%s job %s failed", ev.Kind, ev.JobID), ev.When)
|
||||||
case "succeeded":
|
case "succeeded":
|
||||||
e.resolveAndNotify(ctx, ev.HostID, kind, dedupKey, ev.When)
|
e.resolveAndNotify(ctx, ev.HostID, kind, dedupKey, ev.When)
|
||||||
|
if ev.Kind == "backup" {
|
||||||
|
// A fresh backup clears staleness for intermittent hosts.
|
||||||
|
e.resolveAndNotify(ctx, ev.HostID, KindStaleSchedule, "", ev.When)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -157,6 +167,12 @@ func (e *Engine) handleHostOffline(ctx context.Context, hostID string) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// Intermittent hosts (laptops) legitimately disappear — never raise
|
||||||
|
// agent_offline for them. The stale_schedule sweep in tick() is the
|
||||||
|
// only staleness signal for these hosts.
|
||||||
|
if !host.AlwaysOn {
|
||||||
|
return
|
||||||
|
}
|
||||||
// Apply the 15-min floor — raise only when last_seen_at is older
|
// Apply the 15-min floor — raise only when last_seen_at is older
|
||||||
// than agentOfflineFloor. A nil last_seen_at (host enrolled but
|
// than agentOfflineFloor. A nil last_seen_at (host enrolled but
|
||||||
// never connected) is treated as "now" so we don't raise
|
// never connected) is treated as "now" so we don't raise
|
||||||
@@ -180,11 +196,9 @@ func (e *Engine) handleHostOnline(ctx context.Context, hostID string) {
|
|||||||
// tick is the 60-second sweep. Responsibilities:
|
// tick is the 60-second sweep. Responsibilities:
|
||||||
// 1. Re-evaluate agent_offline for every offline host that may have
|
// 1. Re-evaluate agent_offline for every offline host that may have
|
||||||
// crossed the floor between explicit events.
|
// crossed the floor between explicit events.
|
||||||
// 2. Stale-schedule detection — declared in the spec but intentionally
|
// 2. Stale-schedule detection for intermittent hosts — raises
|
||||||
// left as a no-op in v1. The precise "expected to have fired but
|
// stale_schedule when LastBackupAt is older than 7 days and the
|
||||||
// didn't" trigger requires a store helper that lands in a later
|
// host has an enabled schedule. Always-on hosts are excluded.
|
||||||
// task. The KindStaleSchedule constant is exported so UI code can
|
|
||||||
// reference the tag string today.
|
|
||||||
func (e *Engine) tick(ctx context.Context, now time.Time) {
|
func (e *Engine) tick(ctx context.Context, now time.Time) {
|
||||||
// User-management cleanup piggy-backed here for now. Setup tokens
|
// User-management cleanup piggy-backed here for now. Setup tokens
|
||||||
// have a 1h expiry; the alert engine tick is the cheapest existing
|
// have a 1h expiry; the alert engine tick is the cheapest existing
|
||||||
@@ -203,6 +217,35 @@ func (e *Engine) tick(ctx context.Context, now time.Time) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, h := range hosts {
|
for _, h := range hosts {
|
||||||
|
// Intermittent hosts: suppress agent_offline entirely; instead
|
||||||
|
// raise stale_schedule when they have gone too long with no
|
||||||
|
// successful backup AND they have at least one enabled schedule
|
||||||
|
// to be measured against. A nil LastBackupAt (never backed up)
|
||||||
|
// has no baseline — onboarding/repo_status covers that case.
|
||||||
|
if !h.AlwaysOn {
|
||||||
|
if h.LastBackupAt == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if now.Sub(*h.LastBackupAt) < staleBackupThreshold {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
hasEnabled, err := e.hostHasEnabledSchedule(ctx, h.ID)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("alert: tick list schedules", "host_id", h.ID, "err", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !hasEnabled {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
e.raiseAndNotify(ctx, h.ID, KindStaleSchedule, "", "warning",
|
||||||
|
fmt.Sprintf("No backup in %s (threshold %s)",
|
||||||
|
roundDur(now.Sub(*h.LastBackupAt)), staleBackupThreshold), now)
|
||||||
|
// Resolution is handled in handleJobFinished on a successful
|
||||||
|
// backup (and ResolveOnModeChange on toggle) — the tick only
|
||||||
|
// raises, it does not auto-resolve.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Always-on hosts: existing agent_offline re-evaluation.
|
||||||
if h.Status != "offline" || h.LastSeenAt == nil {
|
if h.Status != "offline" || h.LastSeenAt == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -212,7 +255,6 @@ func (e *Engine) tick(ctx context.Context, now time.Time) {
|
|||||||
roundDur(now.Sub(*h.LastSeenAt)), e.agentOfflineFloor), now)
|
roundDur(now.Sub(*h.LastSeenAt)), e.agentOfflineFloor), now)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Stale-schedule sweep — no-op in v1. See KindStaleSchedule doc comment.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// roundDur returns a human-readable duration string, rounding to the
|
// roundDur returns a human-readable duration string, rounding to the
|
||||||
@@ -224,3 +266,19 @@ func roundDur(d time.Duration) string {
|
|||||||
}
|
}
|
||||||
return d.Round(time.Minute).String()
|
return d.Round(time.Minute).String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hostHasEnabledSchedule reports whether the host has at least one
|
||||||
|
// enabled backup schedule — the precondition for a stale_schedule
|
||||||
|
// alert (no schedule = no backup expectation to measure against).
|
||||||
|
func (e *Engine) hostHasEnabledSchedule(ctx context.Context, hostID string) (bool, error) {
|
||||||
|
schedules, err := e.store.ListSchedulesByHost(ctx, hostID)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
for _, sc := range schedules {
|
||||||
|
if sc.Enabled {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,255 @@
|
|||||||
|
package alert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/oklog/ulid/v2"
|
||||||
|
|
||||||
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestIntermittentHostSuppressesOfflineAlert checks that handleHostOffline
|
||||||
|
// does NOT raise agent_offline for a host with AlwaysOn=false.
|
||||||
|
func TestIntermittentHostSuppressesOfflineAlert(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
eng, st, hostID := setupEngine(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
// Make the host intermittent.
|
||||||
|
if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil {
|
||||||
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Give it a stale last_seen_at well past the floor.
|
||||||
|
if _, err := st.DB().Exec(
|
||||||
|
`UPDATE hosts SET last_seen_at = ?, status = ? WHERE id = ?`,
|
||||||
|
time.Now().UTC().Add(-2*time.Hour).Format(time.RFC3339Nano),
|
||||||
|
"offline",
|
||||||
|
hostID,
|
||||||
|
); err != nil {
|
||||||
|
t.Fatalf("update last_seen_at: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
eng.handleHostOffline(ctx, hostID)
|
||||||
|
|
||||||
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
||||||
|
if len(open) != 0 {
|
||||||
|
t.Fatalf("expected 0 open alerts for intermittent host; got %d: %+v", len(open), open)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestAlwaysOnHostStillRaisesOfflineAlert checks that always-on hosts still
|
||||||
|
// get an agent_offline alert when offline past the floor.
|
||||||
|
func TestAlwaysOnHostStillRaisesOfflineAlert(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
eng, st, hostID := setupEngine(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
// always_on=true is the default, but be explicit.
|
||||||
|
if err := st.SetHostAlwaysOn(ctx, hostID, true); err != nil {
|
||||||
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Give it a stale last_seen_at well past the 15m floor.
|
||||||
|
if _, err := st.DB().Exec(
|
||||||
|
`UPDATE hosts SET last_seen_at = ?, status = ? WHERE id = ?`,
|
||||||
|
time.Now().UTC().Add(-2*time.Hour).Format(time.RFC3339Nano),
|
||||||
|
"offline",
|
||||||
|
hostID,
|
||||||
|
); err != nil {
|
||||||
|
t.Fatalf("update last_seen_at: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
eng.handleHostOffline(ctx, hostID)
|
||||||
|
|
||||||
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
||||||
|
if len(open) != 1 || open[0].Kind != KindAgentOffline {
|
||||||
|
t.Fatalf("expected 1 agent_offline alert; got %d: %+v", len(open), open)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStalenessAlertForIntermittentHost checks that tick raises stale_schedule
|
||||||
|
// for an intermittent host whose last backup is older than 7 days AND has an
|
||||||
|
// enabled schedule. Also verifies that a succeeded backup clears the alert.
|
||||||
|
func TestStalenessAlertForIntermittentHost(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
eng, st, hostID := setupEngine(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
// Make intermittent.
|
||||||
|
if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil {
|
||||||
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a source group to attach the schedule to.
|
||||||
|
sgID := ulid.Make().String()
|
||||||
|
if err := st.CreateSourceGroup(ctx, &store.SourceGroup{
|
||||||
|
ID: sgID,
|
||||||
|
HostID: hostID,
|
||||||
|
Name: "default",
|
||||||
|
Includes: []string{"/home"},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("CreateSourceGroup: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create an enabled schedule pointing at the source group.
|
||||||
|
schedID := ulid.Make().String()
|
||||||
|
if err := st.CreateSchedule(ctx, &store.Schedule{
|
||||||
|
ID: schedID,
|
||||||
|
HostID: hostID,
|
||||||
|
CronExpr: "0 2 * * *",
|
||||||
|
Enabled: true,
|
||||||
|
SourceGroupIDs: []string{sgID},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("CreateSchedule: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set last_backup_at to 8 days ago.
|
||||||
|
eightDaysAgo := time.Now().UTC().Add(-8 * 24 * time.Hour)
|
||||||
|
if err := st.SetHostLastBackup(ctx, hostID, "succeeded", eightDaysAgo); err != nil {
|
||||||
|
t.Fatalf("SetHostLastBackup: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
eng.tick(ctx, time.Now().UTC())
|
||||||
|
|
||||||
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
||||||
|
var staleCount int
|
||||||
|
for _, a := range open {
|
||||||
|
if a.Kind == KindStaleSchedule {
|
||||||
|
staleCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if staleCount != 1 {
|
||||||
|
t.Fatalf("expected 1 stale_schedule alert after tick; got %d (all open: %+v)", staleCount, open)
|
||||||
|
}
|
||||||
|
|
||||||
|
// A succeeded backup should clear the stale_schedule alert.
|
||||||
|
eng.handleJobFinished(ctx, JobFinishedEvent{
|
||||||
|
HostID: hostID,
|
||||||
|
JobID: ulid.Make().String(),
|
||||||
|
Kind: "backup",
|
||||||
|
Status: "succeeded",
|
||||||
|
SourceGroupID: sgID,
|
||||||
|
When: time.Now().UTC(),
|
||||||
|
})
|
||||||
|
|
||||||
|
open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
||||||
|
for _, a := range open {
|
||||||
|
if a.Kind == KindStaleSchedule {
|
||||||
|
t.Fatalf("expected stale_schedule to be resolved after backup succeeded; still open: %+v", a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNoStalenessWithoutEnabledSchedule checks that no stale_schedule is
|
||||||
|
// raised for an intermittent host with a stale backup but no enabled schedule.
|
||||||
|
func TestNoStalenessWithoutEnabledSchedule(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
eng, st, hostID := setupEngine(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
// Make intermittent.
|
||||||
|
if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil {
|
||||||
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set last_backup_at to 8 days ago — stale — but no schedule.
|
||||||
|
eightDaysAgo := time.Now().UTC().Add(-8 * 24 * time.Hour)
|
||||||
|
if err := st.SetHostLastBackup(ctx, hostID, "succeeded", eightDaysAgo); err != nil {
|
||||||
|
t.Fatalf("SetHostLastBackup: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
eng.tick(ctx, time.Now().UTC())
|
||||||
|
|
||||||
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
||||||
|
for _, a := range open {
|
||||||
|
if a.Kind == KindStaleSchedule {
|
||||||
|
t.Fatalf("expected no stale_schedule without an enabled schedule; got: %+v", a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestResolveOnModeChangeClearsOfflineAlert checks that ResolveOnModeChange
|
||||||
|
// clears an open agent_offline alert when a host's mode is toggled.
|
||||||
|
func TestResolveOnModeChangeClearsOfflineAlert(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
eng, st, hostID := setupEngine(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
// Make always-on and set it offline with a stale last_seen_at.
|
||||||
|
if err := st.SetHostAlwaysOn(ctx, hostID, true); err != nil {
|
||||||
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := st.DB().Exec(
|
||||||
|
`UPDATE hosts SET last_seen_at = ?, status = ? WHERE id = ?`,
|
||||||
|
time.Now().UTC().Add(-2*time.Hour).Format(time.RFC3339Nano),
|
||||||
|
"offline",
|
||||||
|
hostID,
|
||||||
|
); err != nil {
|
||||||
|
t.Fatalf("update last_seen_at: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Raise the offline alert.
|
||||||
|
eng.handleHostOffline(ctx, hostID)
|
||||||
|
|
||||||
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
||||||
|
if len(open) != 1 || open[0].Kind != KindAgentOffline {
|
||||||
|
t.Fatalf("expected 1 agent_offline alert before mode change; got %d: %+v", len(open), open)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Toggle mode — should clear the alert.
|
||||||
|
eng.ResolveOnModeChange(ctx, hostID, time.Now().UTC())
|
||||||
|
|
||||||
|
open, _ = st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
||||||
|
for _, a := range open {
|
||||||
|
if a.Kind == KindAgentOffline {
|
||||||
|
t.Fatalf("expected agent_offline to be resolved after mode change; still open: %+v", a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNoStalenessWhenNeverBackedUp checks that no stale_schedule alert is
|
||||||
|
// raised for an intermittent host that has never backed up (nil LastBackupAt).
|
||||||
|
func TestNoStalenessWhenNeverBackedUp(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
eng, st, hostID := setupEngine(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
// Make intermittent.
|
||||||
|
if err := st.SetHostAlwaysOn(ctx, hostID, false); err != nil {
|
||||||
|
t.Fatalf("SetHostAlwaysOn: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a source group and an enabled schedule — but do NOT set LastBackupAt.
|
||||||
|
sgID := ulid.Make().String()
|
||||||
|
if err := st.CreateSourceGroup(ctx, &store.SourceGroup{
|
||||||
|
ID: sgID,
|
||||||
|
HostID: hostID,
|
||||||
|
Name: "default",
|
||||||
|
Includes: []string{"/home"},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("CreateSourceGroup: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
schedID := ulid.Make().String()
|
||||||
|
if err := st.CreateSchedule(ctx, &store.Schedule{
|
||||||
|
ID: schedID,
|
||||||
|
HostID: hostID,
|
||||||
|
CronExpr: "0 2 * * *",
|
||||||
|
Enabled: true,
|
||||||
|
SourceGroupIDs: []string{sgID},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("CreateSchedule: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
eng.tick(ctx, time.Now().UTC())
|
||||||
|
|
||||||
|
open, _ := st.ListAlerts(ctx, store.AlertFilter{Status: "open", HostID: hostID})
|
||||||
|
for _, a := range open {
|
||||||
|
if a.Kind == KindStaleSchedule {
|
||||||
|
t.Fatalf("expected no stale_schedule when never backed up; got: %+v", a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
+14
-4
@@ -27,10 +27,10 @@ const (
|
|||||||
// integrity is at risk) when a check job fails.
|
// integrity is at risk) when a check job fails.
|
||||||
KindCheckFailed = "check_failed"
|
KindCheckFailed = "check_failed"
|
||||||
|
|
||||||
// KindStaleSchedule is declared for completeness but intentionally
|
// KindStaleSchedule is raised for intermittent (non-always-on) hosts
|
||||||
// left as a no-op in v1. The precise "expected to have fired but
|
// when their last successful backup is older than staleBackupThreshold
|
||||||
// didn't" logic requires a store helper that lands in a follow-up
|
// (7 days) and they have at least one enabled schedule. Resolved on
|
||||||
// task. Ask the team before implementing.
|
// backup success or when the host is switched to always-on mode.
|
||||||
KindStaleSchedule = "stale_schedule"
|
KindStaleSchedule = "stale_schedule"
|
||||||
|
|
||||||
// KindAgentOffline is raised when a host's last_seen_at is older
|
// KindAgentOffline is raised when a host's last_seen_at is older
|
||||||
@@ -122,6 +122,16 @@ func alertPayload(ctx context.Context, st *store.Store, ev notification.Event, a
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ResolveOnModeChange clears any open agent_offline and stale_schedule
|
||||||
|
// alerts for a host whose always-on flag was just toggled. The next
|
||||||
|
// 60s tick re-raises whichever still applies under the new mode, so
|
||||||
|
// this is a self-correcting "wipe and let the sweep settle" call.
|
||||||
|
// Safe to invoke from the HTTP layer (it only touches the store + hub).
|
||||||
|
func (e *Engine) ResolveOnModeChange(ctx context.Context, hostID string, when time.Time) {
|
||||||
|
e.resolveAndNotify(ctx, hostID, KindAgentOffline, "", when)
|
||||||
|
e.resolveAndNotify(ctx, hostID, KindStaleSchedule, "", when)
|
||||||
|
}
|
||||||
|
|
||||||
// resolveAndNotify clears the open (or acknowledged) alert matching
|
// resolveAndNotify clears the open (or acknowledged) alert matching
|
||||||
// (host_id, kind, dedup_key) via store.AutoResolve, then fires
|
// (host_id, kind, dedup_key) via store.AutoResolve, then fires
|
||||||
// alert.resolved for the row(s) actually closed. Best-effort —
|
// alert.resolved for the row(s) actually closed. Best-effort —
|
||||||
|
|||||||
@@ -0,0 +1,141 @@
|
|||||||
|
// catchup.go — server-side catch-up for intermittent (non-always-on)
|
||||||
|
// hosts. When such a host reconnects we wait a short settle window,
|
||||||
|
// then dispatch a backup for any schedule whose window elapsed while
|
||||||
|
// the host was asleep. This is separate from pending_runs: a host that
|
||||||
|
// was asleep never fired its local cron, so no pending row exists.
|
||||||
|
package http
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// scheduleOverdue reports whether a schedule's most recent expected
|
||||||
|
// fire is newer than the host's last successful backup — i.e. a window
|
||||||
|
// passed with no backup. A nil lastBackup means "never backed up" and
|
||||||
|
// is always overdue (provided the cron parses). An unparseable cron is
|
||||||
|
// treated as not-overdue so a bad expression can never trigger a
|
||||||
|
// surprise dispatch. Uses the same cronParser the agent's scheduler
|
||||||
|
// and schedule validation use, so interpretation is identical.
|
||||||
|
func scheduleOverdue(cronExpr string, lastBackup *time.Time, now time.Time) bool {
|
||||||
|
sched, err := cronParser.Parse(cronExpr)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if lastBackup == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
next := sched.Next(*lastBackup)
|
||||||
|
return !next.After(now)
|
||||||
|
}
|
||||||
|
|
||||||
|
// catchupSettle is how long after a reconnect we wait before evaluating
|
||||||
|
// catch-up, so a laptop that wakes briefly and sleeps again doesn't
|
||||||
|
// trigger a backup it can't finish. ~1 minute per the spec.
|
||||||
|
const catchupSettle = 60 * time.Second
|
||||||
|
|
||||||
|
// ArmCatchup records that an intermittent host just reconnected and
|
||||||
|
// should be evaluated for a missed backup after the settle window.
|
||||||
|
// No-op for always-on hosts (caller passes only intermittent hosts).
|
||||||
|
// Re-arming overwrites the timer (debounce — flapping doesn't stack).
|
||||||
|
func (s *Server) ArmCatchup(hostID string, now time.Time) {
|
||||||
|
s.catchupMu.Lock()
|
||||||
|
defer s.catchupMu.Unlock()
|
||||||
|
s.catchupDueAt[hostID] = now.Add(catchupSettle)
|
||||||
|
}
|
||||||
|
|
||||||
|
// dueCatchups returns the hostIDs whose settle window has elapsed and
|
||||||
|
// removes them from the map. Caller evaluates each.
|
||||||
|
func (s *Server) dueCatchups(now time.Time) []string {
|
||||||
|
s.catchupMu.Lock()
|
||||||
|
defer s.catchupMu.Unlock()
|
||||||
|
var due []string
|
||||||
|
for id, at := range s.catchupDueAt {
|
||||||
|
if !now.Before(at) {
|
||||||
|
due = append(due, id)
|
||||||
|
delete(s.catchupDueAt, id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return due
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunCatchupsDue is the tick entrypoint. For each host past its settle
|
||||||
|
// window it dispatches a backup for every enabled schedule that is
|
||||||
|
// overdue. Skips hosts that bounced back offline, that are already
|
||||||
|
// running/queued a job, or that turned out to be always-on.
|
||||||
|
func (s *Server) RunCatchupsDue(ctx context.Context) {
|
||||||
|
if s.deps.Hub == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
now := time.Now().UTC()
|
||||||
|
for _, hostID := range s.dueCatchups(now) {
|
||||||
|
s.runCatchup(ctx, hostID, now)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// runCatchup evaluates and dispatches catch-up backups for a single
|
||||||
|
// host. Kept separate so RunCatchupsDue reads cleanly.
|
||||||
|
func (s *Server) runCatchup(ctx context.Context, hostID string, now time.Time) {
|
||||||
|
conn := s.deps.Hub.Conn(hostID)
|
||||||
|
if conn == nil {
|
||||||
|
return // bounced offline during the settle window; re-arms on next hello
|
||||||
|
}
|
||||||
|
host, err := s.deps.Store.GetHost(ctx, hostID)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("catchup: load host", "host_id", hostID, "err", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if host.AlwaysOn {
|
||||||
|
return // mode flipped during settle window
|
||||||
|
}
|
||||||
|
// Skip if a backup is already queued or running for this host —
|
||||||
|
// don't pile a catch-up on top of in-flight work. (hosts.current_job_id
|
||||||
|
// is not maintained, so we check the jobs table directly.)
|
||||||
|
active, err := s.deps.Store.HasActiveBackupJob(ctx, hostID)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("catchup: check active backup", "host_id", hostID, "err", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if active {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
schedules, err := s.deps.Store.ListSchedulesByHost(ctx, hostID)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("catchup: list schedules", "host_id", hostID, "err", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// NOTE: overdue is measured against host.LastBackupAt, which is the
|
||||||
|
// most recent *successful backup of any schedule* on this host — not
|
||||||
|
// a per-schedule timestamp. For the common intermittent host (a
|
||||||
|
// single backup schedule) this is exact. With multiple schedules of
|
||||||
|
// different cadences, a recent backup from one schedule can mask
|
||||||
|
// another schedule's missed window. Acceptable for v1; revisit with
|
||||||
|
// per-schedule last-success tracking if multi-cadence laptops appear.
|
||||||
|
for _, sc := range schedules {
|
||||||
|
if !sc.Enabled || len(sc.SourceGroupIDs) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !scheduleOverdue(sc.CronExpr, host.LastBackupAt, now) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, gid := range sc.SourceGroupIDs {
|
||||||
|
g, err := s.deps.Store.GetSourceGroup(ctx, hostID, gid)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("catchup: load source group",
|
||||||
|
"host_id", hostID, "schedule_id", sc.ID, "group_id", gid, "err", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, derr := s.dispatchBackupForGroupCore(ctx, conn, hostID, sc.ID, g, now); derr != nil {
|
||||||
|
// Send failed for this group — host may have dropped
|
||||||
|
// again. Earlier groups in this batch were already
|
||||||
|
// dispatched; re-arm so a later reconnect re-evaluates
|
||||||
|
// any still-overdue schedules.
|
||||||
|
s.ArmCatchup(hostID, now)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
slog.Info("catchup: dispatched missed backup",
|
||||||
|
"host_id", hostID, "schedule_id", sc.ID, "group", g.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,246 @@
|
|||||||
|
// catchup_scheduler_test.go — integration tests for the catch-up scheduler.
|
||||||
|
package http
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/oklog/ulid/v2"
|
||||||
|
|
||||||
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||||
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestRunCatchupDispatchesOverdue verifies four properties of the
|
||||||
|
// catch-up scheduler in separate sub-tests sharing no state.
|
||||||
|
func TestRunCatchupDispatchesOverdue(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// --- 1. Overdue host with connected agent → backup dispatched -------
|
||||||
|
t.Run("overdue_dispatch", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
srv, ts, st := rawTestServer(t)
|
||||||
|
hostID, token := enrolHostForWS(t, srv, st, "catchup-overdue")
|
||||||
|
|
||||||
|
if err := st.SetHostAlwaysOn(context.Background(), hostID, false); err != nil {
|
||||||
|
t.Fatalf("set always_on: %v", err)
|
||||||
|
}
|
||||||
|
// Last backup ~8 days ago → schedule overdue.
|
||||||
|
eightDaysAgo := time.Now().UTC().Add(-8 * 24 * time.Hour)
|
||||||
|
if err := st.SetHostLastBackup(context.Background(), hostID, "succeeded", eightDaysAgo); err != nil {
|
||||||
|
t.Fatalf("set last backup: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := st.CreateJob(context.Background(), store.Job{
|
||||||
|
ID: ulid.Make().String(), HostID: hostID, Kind: "init",
|
||||||
|
ActorKind: "system", CreatedAt: time.Now().UTC(),
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed init: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
gid := ulid.Make().String()
|
||||||
|
if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
|
||||||
|
ID: gid, HostID: hostID, Name: "home", Includes: []string{"/home"},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("source group: %v", err)
|
||||||
|
}
|
||||||
|
sid := ulid.Make().String()
|
||||||
|
if err := st.CreateSchedule(context.Background(), &store.Schedule{
|
||||||
|
ID: sid, HostID: hostID, CronExpr: "0 2 * * *", Enabled: true,
|
||||||
|
SourceGroupIDs: []string{gid},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("schedule: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
c := agentDial(t, srv, ts, hostID, token)
|
||||||
|
sendHello(t, c, "catchup-overdue")
|
||||||
|
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||||
|
|
||||||
|
// Arm with a past time so the settle window is already elapsed.
|
||||||
|
srv.ArmCatchup(hostID, time.Now().UTC().Add(-2*time.Minute))
|
||||||
|
srv.RunCatchupsDue(context.Background())
|
||||||
|
|
||||||
|
// Give the dispatch goroutine a moment to write the job row.
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
var n int
|
||||||
|
if err := st.DB().QueryRow(
|
||||||
|
`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup'`, hostID).Scan(&n); err != nil {
|
||||||
|
t.Fatalf("count: %v", err)
|
||||||
|
}
|
||||||
|
if n < 1 {
|
||||||
|
t.Errorf("overdue host: want ≥1 backup job, got %d", n)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// --- 2. Not overdue → no dispatch -----------------------------------
|
||||||
|
t.Run("not_overdue_no_dispatch", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
srv, ts, st := rawTestServer(t)
|
||||||
|
hostID, token := enrolHostForWS(t, srv, st, "catchup-notoverdue")
|
||||||
|
|
||||||
|
if err := st.SetHostAlwaysOn(context.Background(), hostID, false); err != nil {
|
||||||
|
t.Fatalf("set always_on: %v", err)
|
||||||
|
}
|
||||||
|
// Last backup just now → not overdue.
|
||||||
|
now := time.Now().UTC()
|
||||||
|
if err := st.SetHostLastBackup(context.Background(), hostID, "succeeded", now); err != nil {
|
||||||
|
t.Fatalf("set last backup: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := st.CreateJob(context.Background(), store.Job{
|
||||||
|
ID: ulid.Make().String(), HostID: hostID, Kind: "init",
|
||||||
|
ActorKind: "system", CreatedAt: now,
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed init: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
gid := ulid.Make().String()
|
||||||
|
if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
|
||||||
|
ID: gid, HostID: hostID, Name: "home", Includes: []string{"/home"},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("source group: %v", err)
|
||||||
|
}
|
||||||
|
sid := ulid.Make().String()
|
||||||
|
if err := st.CreateSchedule(context.Background(), &store.Schedule{
|
||||||
|
ID: sid, HostID: hostID, CronExpr: "0 2 * * *", Enabled: true,
|
||||||
|
SourceGroupIDs: []string{gid},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("schedule: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
c := agentDial(t, srv, ts, hostID, token)
|
||||||
|
sendHello(t, c, "catchup-notoverdue")
|
||||||
|
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||||
|
|
||||||
|
srv.ArmCatchup(hostID, time.Now().UTC().Add(-2*time.Minute))
|
||||||
|
srv.RunCatchupsDue(context.Background())
|
||||||
|
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
var n int
|
||||||
|
if err := st.DB().QueryRow(
|
||||||
|
`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup'`, hostID).Scan(&n); err != nil {
|
||||||
|
t.Fatalf("count: %v", err)
|
||||||
|
}
|
||||||
|
if n != 0 {
|
||||||
|
t.Errorf("not-overdue host: want 0 backup jobs, got %d", n)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// --- 3. Active backup in flight → no new dispatch -------------------
|
||||||
|
t.Run("active_backup_blocks_dispatch", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
srv, ts, st := rawTestServer(t)
|
||||||
|
hostID, token := enrolHostForWS(t, srv, st, "catchup-active")
|
||||||
|
|
||||||
|
if err := st.SetHostAlwaysOn(context.Background(), hostID, false); err != nil {
|
||||||
|
t.Fatalf("set always_on: %v", err)
|
||||||
|
}
|
||||||
|
eightDaysAgo := time.Now().UTC().Add(-8 * 24 * time.Hour)
|
||||||
|
if err := st.SetHostLastBackup(context.Background(), hostID, "succeeded", eightDaysAgo); err != nil {
|
||||||
|
t.Fatalf("set last backup: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := st.CreateJob(context.Background(), store.Job{
|
||||||
|
ID: ulid.Make().String(), HostID: hostID, Kind: "init",
|
||||||
|
ActorKind: "system", CreatedAt: time.Now().UTC(),
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed init: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
gid := ulid.Make().String()
|
||||||
|
if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
|
||||||
|
ID: gid, HostID: hostID, Name: "home", Includes: []string{"/home"},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("source group: %v", err)
|
||||||
|
}
|
||||||
|
sid := ulid.Make().String()
|
||||||
|
if err := st.CreateSchedule(context.Background(), &store.Schedule{
|
||||||
|
ID: sid, HostID: hostID, CronExpr: "0 2 * * *", Enabled: true,
|
||||||
|
SourceGroupIDs: []string{gid},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("schedule: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Seed a queued backup job — this is "already in flight".
|
||||||
|
if err := st.CreateJob(context.Background(), store.Job{
|
||||||
|
ID: ulid.Make().String(), HostID: hostID, Kind: "backup",
|
||||||
|
ActorKind: "schedule", CreatedAt: time.Now().UTC(),
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed queued backup: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
c := agentDial(t, srv, ts, hostID, token)
|
||||||
|
sendHello(t, c, "catchup-active")
|
||||||
|
_ = drainUntil(t, c, api.MsgScheduleSet)
|
||||||
|
|
||||||
|
srv.ArmCatchup(hostID, time.Now().UTC().Add(-2*time.Minute))
|
||||||
|
srv.RunCatchupsDue(context.Background())
|
||||||
|
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
var n int
|
||||||
|
if err := st.DB().QueryRow(
|
||||||
|
`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup'`, hostID).Scan(&n); err != nil {
|
||||||
|
t.Fatalf("count: %v", err)
|
||||||
|
}
|
||||||
|
// Count must still be exactly 1 — no second job added.
|
||||||
|
if n != 1 {
|
||||||
|
t.Errorf("active backup guard: want 1 job (the seeded one), got %d", n)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// --- 4. Disconnected host → no dispatch -----------------------------
|
||||||
|
t.Run("disconnected_no_dispatch", func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
srv, _, st := rawTestServer(t)
|
||||||
|
hostID, _ := enrolHostForWS(t, srv, st, "catchup-disconnected")
|
||||||
|
|
||||||
|
if err := st.SetHostAlwaysOn(context.Background(), hostID, false); err != nil {
|
||||||
|
t.Fatalf("set always_on: %v", err)
|
||||||
|
}
|
||||||
|
eightDaysAgo := time.Now().UTC().Add(-8 * 24 * time.Hour)
|
||||||
|
if err := st.SetHostLastBackup(context.Background(), hostID, "succeeded", eightDaysAgo); err != nil {
|
||||||
|
t.Fatalf("set last backup: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := st.CreateJob(context.Background(), store.Job{
|
||||||
|
ID: ulid.Make().String(), HostID: hostID, Kind: "init",
|
||||||
|
ActorKind: "system", CreatedAt: time.Now().UTC(),
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed init: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
gid := ulid.Make().String()
|
||||||
|
if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
|
||||||
|
ID: gid, HostID: hostID, Name: "home", Includes: []string{"/home"},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("source group: %v", err)
|
||||||
|
}
|
||||||
|
sid := ulid.Make().String()
|
||||||
|
if err := st.CreateSchedule(context.Background(), &store.Schedule{
|
||||||
|
ID: sid, HostID: hostID, CronExpr: "0 2 * * *", Enabled: true,
|
||||||
|
SourceGroupIDs: []string{gid},
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("schedule: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Host is NOT connected — no agentDial.
|
||||||
|
|
||||||
|
srv.ArmCatchup(hostID, time.Now().UTC().Add(-2*time.Minute))
|
||||||
|
srv.RunCatchupsDue(context.Background())
|
||||||
|
|
||||||
|
time.Sleep(100 * time.Millisecond)
|
||||||
|
|
||||||
|
var n int
|
||||||
|
if err := st.DB().QueryRow(
|
||||||
|
`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'backup'`, hostID).Scan(&n); err != nil {
|
||||||
|
t.Fatalf("count: %v", err)
|
||||||
|
}
|
||||||
|
if n != 0 {
|
||||||
|
t.Errorf("disconnected host: want 0 backup jobs, got %d", n)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
package http
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestScheduleOverdue(t *testing.T) {
|
||||||
|
mustParse := func(s string) time.Time {
|
||||||
|
t.Helper()
|
||||||
|
v, err := time.Parse(time.RFC3339, s)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse %q: %v", s, err)
|
||||||
|
}
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
daily := "0 2 * * *" // 02:00 every day
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
cron string
|
||||||
|
lastBackup *time.Time
|
||||||
|
now time.Time
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{name: "never backed up is overdue", cron: daily, lastBackup: nil, now: mustParse("2026-06-15T09:00:00Z"), want: true},
|
||||||
|
{name: "missed last nights window", cron: daily, lastBackup: ptrTime(mustParse("2026-06-13T02:05:00Z")), now: mustParse("2026-06-15T09:00:00Z"), want: true},
|
||||||
|
{name: "backed up after the most recent window", cron: daily, lastBackup: ptrTime(mustParse("2026-06-15T02:05:00Z")), now: mustParse("2026-06-15T09:00:00Z"), want: false},
|
||||||
|
{name: "unparseable cron is never overdue", cron: "not a cron", lastBackup: nil, now: mustParse("2026-06-15T09:00:00Z"), want: false},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
got := scheduleOverdue(c.cron, c.lastBackup, c.now)
|
||||||
|
if got != c.want {
|
||||||
|
t.Fatalf("scheduleOverdue(%q, %v, %v) = %v, want %v", c.cron, c.lastBackup, c.now, got, c.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ptrTime(t time.Time) *time.Time { return &t }
|
||||||
@@ -483,6 +483,12 @@ func (s *Server) onAgentHello(ctx context.Context, hostID string, conn *ws.Conn)
|
|||||||
// and the drain may take seconds across many rows. A non-blocking
|
// and the drain may take seconds across many rows. A non-blocking
|
||||||
// goroutine keeps the hello path snappy.
|
// goroutine keeps the hello path snappy.
|
||||||
go s.DrainPending(context.Background(), hostID)
|
go s.DrainPending(context.Background(), hostID)
|
||||||
|
// Intermittent hosts that just reconnected may have slept through a
|
||||||
|
// backup window. Arm a catch-up evaluation after a settle delay; the
|
||||||
|
// pending-drain tick fires it. Always-on hosts never need this.
|
||||||
|
if host, err := s.deps.Store.GetHost(ctx, hostID); err == nil && !host.AlwaysOn {
|
||||||
|
s.ArmCatchup(hostID, time.Now().UTC())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// maybeAutoInit dispatches a `restic init` job iff the host has no
|
// maybeAutoInit dispatches a `restic init` job iff the host has no
|
||||||
|
|||||||
@@ -90,6 +90,13 @@ type Server struct {
|
|||||||
// directories (P3-X2). Pre-allocated in New so the lazy-init
|
// directories (P3-X2). Pre-allocated in New so the lazy-init
|
||||||
// race is impossible.
|
// race is impossible.
|
||||||
treeCache *treeCache
|
treeCache *treeCache
|
||||||
|
|
||||||
|
// catchupDueAt tracks intermittent hosts that reconnected and are
|
||||||
|
// in their settle window. Keyed hostID → earliest time to evaluate
|
||||||
|
// catch-up. Best-effort + in-memory: a server restart simply re-arms
|
||||||
|
// on the next hello. Guarded by catchupMu.
|
||||||
|
catchupMu sync.Mutex
|
||||||
|
catchupDueAt map[string]time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
// New builds a configured but not-yet-started server.
|
// New builds a configured but not-yet-started server.
|
||||||
@@ -104,11 +111,12 @@ func New(deps Deps) *Server {
|
|||||||
r.Use(requestLogger)
|
r.Use(requestLogger)
|
||||||
|
|
||||||
s := &Server{
|
s := &Server{
|
||||||
deps: deps,
|
deps: deps,
|
||||||
drainLocks: make(map[string]*sync.Mutex),
|
drainLocks: make(map[string]*sync.Mutex),
|
||||||
announceRL: newAnnounceLimiter(),
|
announceRL: newAnnounceLimiter(),
|
||||||
pendingHub: newPendingHub(),
|
pendingHub: newPendingHub(),
|
||||||
treeCache: newTreeCache(),
|
treeCache: newTreeCache(),
|
||||||
|
catchupDueAt: make(map[string]time.Time),
|
||||||
}
|
}
|
||||||
s.routes(r)
|
s.routes(r)
|
||||||
|
|
||||||
@@ -279,6 +287,7 @@ func (s *Server) routes(r chi.Router) {
|
|||||||
r.Post("/hosts/{id}/repo/probe", s.handleUIRepoProbe)
|
r.Post("/hosts/{id}/repo/probe", s.handleUIRepoProbe)
|
||||||
r.Post("/hosts/{id}/repo/hooks", s.handleUIRepoHooksSave)
|
r.Post("/hosts/{id}/repo/hooks", s.handleUIRepoHooksSave)
|
||||||
r.Post("/hosts/{id}/tags", s.handleUIHostTagsSave)
|
r.Post("/hosts/{id}/tags", s.handleUIHostTagsSave)
|
||||||
|
r.Post("/hosts/{id}/mode", s.handleUIHostModeSave)
|
||||||
r.Post("/hosts/{id}/admin-credentials", s.handleUIAdminCredentialsSave)
|
r.Post("/hosts/{id}/admin-credentials", s.handleUIAdminCredentialsSave)
|
||||||
r.Post("/hosts/{id}/admin-credentials/delete", s.handleUIAdminCredentialsDelete)
|
r.Post("/hosts/{id}/admin-credentials/delete", s.handleUIAdminCredentialsDelete)
|
||||||
r.Post("/hosts/{id}/schedules/new", s.handleUIScheduleSave)
|
r.Post("/hosts/{id}/schedules/new", s.handleUIScheduleSave)
|
||||||
|
|||||||
@@ -49,8 +49,14 @@ func TestDashboard_HostRowSparklineRendersWithHistory(t *testing.T) {
|
|||||||
hostID := makeHost(t, st, "h-spark")
|
hostID := makeHost(t, st, "h-spark")
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
// Two history points → polyline must render.
|
// Two history points → polyline must render. Use dates relative to
|
||||||
for i, day := range []string{"2026-05-05", "2026-05-06"} {
|
// now so the points always fall inside the dashboard's rolling
|
||||||
|
// 30-day window (ui_handlers.go: since = now-30d); hard-coded dates
|
||||||
|
// silently age out of the window and break this test over time.
|
||||||
|
for i, day := range []string{
|
||||||
|
time.Now().UTC().AddDate(0, 0, -2).Format("2006-01-02"),
|
||||||
|
time.Now().UTC().AddDate(0, 0, -1).Format("2006-01-02"),
|
||||||
|
} {
|
||||||
v := int64(100 + i*50)
|
v := int64(100 + i*50)
|
||||||
if err := st.UpsertHostRepoStatsHistory(ctx, hostID, day,
|
if err := st.UpsertHostRepoStatsHistory(ctx, hostID, day,
|
||||||
store.HostRepoStats{TotalSizeBytes: &v}, time.Now().UTC()); err != nil {
|
store.HostRepoStats{TotalSizeBytes: &v}, time.Now().UTC()); err != nil {
|
||||||
|
|||||||
@@ -983,6 +983,43 @@ func (s *Server) handleUIHostTagsSave(w stdhttp.ResponseWriter, r *stdhttp.Reque
|
|||||||
stdhttp.Redirect(w, r, "/hosts/"+hostID, stdhttp.StatusSeeOther)
|
stdhttp.Redirect(w, r, "/hosts/"+hostID, stdhttp.StatusSeeOther)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handleUIHostModeSave flips a host's always-on flag. Checkbox present
|
||||||
|
// in the form (value any) => always-on; absent => intermittent.
|
||||||
|
// Operator-band; mounted in server.go. On change we clear open
|
||||||
|
// offline/staleness alerts via the engine so the next sweep re-raises
|
||||||
|
// only what still applies under the new mode.
|
||||||
|
func (s *Server) handleUIHostModeSave(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
||||||
|
u := s.requireUIUser(w, r)
|
||||||
|
if u == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
hostID := chi.URLParam(r, "id")
|
||||||
|
if _, err := s.deps.Store.GetHost(r.Context(), hostID); err != nil {
|
||||||
|
stdhttp.NotFound(w, r)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := r.ParseForm(); err != nil {
|
||||||
|
stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
alwaysOn := r.PostForm.Get("always_on") != ""
|
||||||
|
if err := s.deps.Store.SetHostAlwaysOn(r.Context(), hostID, alwaysOn); err != nil {
|
||||||
|
slog.Error("ui host mode: save", "host_id", hostID, "err", err)
|
||||||
|
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if s.deps.AlertEngine != nil {
|
||||||
|
s.deps.AlertEngine.ResolveOnModeChange(r.Context(), hostID, time.Now().UTC())
|
||||||
|
}
|
||||||
|
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||||
|
ID: ulid.Make().String(), UserID: &u.ID, Actor: "user",
|
||||||
|
Action: "host.mode_updated",
|
||||||
|
TargetKind: ptr("host"), TargetID: &hostID,
|
||||||
|
TS: time.Now().UTC(),
|
||||||
|
})
|
||||||
|
stdhttp.Redirect(w, r, "/hosts/"+hostID, stdhttp.StatusSeeOther)
|
||||||
|
}
|
||||||
|
|
||||||
// normaliseTags splits a comma-separated string, lowercases each token,
|
// normaliseTags splits a comma-separated string, lowercases each token,
|
||||||
// trims whitespace, drops empties, and dedupes. Order is preserved
|
// trims whitespace, drops empties, and dedupes. Order is preserved
|
||||||
// from first occurrence (so the user's typing order shows on screen).
|
// from first occurrence (so the user's typing order shows on screen).
|
||||||
|
|||||||
@@ -0,0 +1,88 @@
|
|||||||
|
// ui_host_mode_test.go — covers handleUIHostModeSave: toggling a
|
||||||
|
// host's always-on flag via POST /hosts/{id}/mode.
|
||||||
|
package http
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
stdhttp "net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestHostModeSaveToggle verifies the checkbox-absent ⇒ intermittent
|
||||||
|
// and checkbox-present ⇒ always-on semantics, and that the audit row
|
||||||
|
// lands for each request.
|
||||||
|
func TestHostModeSaveToggle(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
_, ts, st := rawTestServerWithUI(t)
|
||||||
|
hostID, _ := enrolHostForUI(t, nil, st, "mode-toggle-host")
|
||||||
|
|
||||||
|
cookie := loginAsAdmin(t, st)
|
||||||
|
|
||||||
|
cli := &stdhttp.Client{
|
||||||
|
CheckRedirect: func(*stdhttp.Request, []*stdhttp.Request) error {
|
||||||
|
return stdhttp.ErrUseLastResponse
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- POST with no always_on field => intermittent ---
|
||||||
|
form := url.Values{}
|
||||||
|
req, _ := stdhttp.NewRequest("POST", ts.URL+"/hosts/"+hostID+"/mode",
|
||||||
|
strings.NewReader(form.Encode()))
|
||||||
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
req.AddCookie(cookie)
|
||||||
|
res, err := cli.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("do: %v", err)
|
||||||
|
}
|
||||||
|
_ = res.Body.Close()
|
||||||
|
if res.StatusCode != stdhttp.StatusSeeOther {
|
||||||
|
t.Fatalf("status: got %d, want 303", res.StatusCode)
|
||||||
|
}
|
||||||
|
if loc := res.Header.Get("Location"); loc != "/hosts/"+hostID {
|
||||||
|
t.Errorf("Location: got %q, want /hosts/%s", loc, hostID)
|
||||||
|
}
|
||||||
|
|
||||||
|
got, err := st.GetHost(context.Background(), hostID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GetHost: %v", err)
|
||||||
|
}
|
||||||
|
if got.AlwaysOn {
|
||||||
|
t.Errorf("AlwaysOn after empty form: got true, want false")
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- POST with always_on=on => always-on ---
|
||||||
|
form2 := url.Values{"always_on": {"on"}}
|
||||||
|
req2, _ := stdhttp.NewRequest("POST", ts.URL+"/hosts/"+hostID+"/mode",
|
||||||
|
strings.NewReader(form2.Encode()))
|
||||||
|
req2.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
req2.AddCookie(cookie)
|
||||||
|
res2, err := cli.Do(req2)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("do: %v", err)
|
||||||
|
}
|
||||||
|
_ = res2.Body.Close()
|
||||||
|
if res2.StatusCode != stdhttp.StatusSeeOther {
|
||||||
|
t.Fatalf("status: got %d, want 303", res2.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
got2, err := st.GetHost(context.Background(), hostID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GetHost: %v", err)
|
||||||
|
}
|
||||||
|
if !got2.AlwaysOn {
|
||||||
|
t.Errorf("AlwaysOn after always_on=on: got false, want true")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Audit rows must exist (one per request).
|
||||||
|
var n int
|
||||||
|
if err := st.DB().QueryRow(
|
||||||
|
`SELECT COUNT(*) FROM audit_log WHERE action = 'host.mode_updated' AND target_id = ?`,
|
||||||
|
hostID).Scan(&n); err != nil {
|
||||||
|
t.Fatalf("count audit: %v", err)
|
||||||
|
}
|
||||||
|
if n != 2 {
|
||||||
|
t.Errorf("audit rows: got %d, want 2", n)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -221,23 +221,40 @@ func formatBytes(n int64) template.HTML {
|
|||||||
// "in 5m"-style. Accepts *time.Time or time.Time so templates can
|
// "in 5m"-style. Accepts *time.Time or time.Time so templates can
|
||||||
// pass either without fighting Go's lack of an address-of operator.
|
// pass either without fighting Go's lack of an address-of operator.
|
||||||
// Anything else returns "—".
|
// Anything else returns "—".
|
||||||
func formatRelTime(v any) string {
|
//
|
||||||
|
// The output is wrapped in a <time data-rel-ts="..."> element so a
|
||||||
|
// small client-side ticker (see base.html) can refresh the label
|
||||||
|
// without a full page reload — otherwise a long-open tab shows
|
||||||
|
// timestamps frozen at render time.
|
||||||
|
func formatRelTime(v any) template.HTML {
|
||||||
var t time.Time
|
var t time.Time
|
||||||
switch x := v.(type) {
|
switch x := v.(type) {
|
||||||
case time.Time:
|
case time.Time:
|
||||||
t = x
|
t = x
|
||||||
case *time.Time:
|
case *time.Time:
|
||||||
if x == nil {
|
if x == nil {
|
||||||
return "—"
|
return template.HTML("—")
|
||||||
}
|
}
|
||||||
t = *x
|
t = *x
|
||||||
default:
|
default:
|
||||||
return "—"
|
return template.HTML("—")
|
||||||
}
|
}
|
||||||
if t.IsZero() {
|
if t.IsZero() {
|
||||||
return "—"
|
return template.HTML("—")
|
||||||
}
|
}
|
||||||
d := time.Since(t)
|
label := relTimeLabel(time.Since(t))
|
||||||
|
return template.HTML(fmt.Sprintf(
|
||||||
|
`<time data-rel-ts="%s" title="%s">%s</time>`,
|
||||||
|
t.UTC().Format(time.RFC3339Nano),
|
||||||
|
t.UTC().Format("2006-01-02 15:04:05 UTC"),
|
||||||
|
label,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// relTimeLabel turns a duration-since-now into the short human label
|
||||||
|
// used by formatRelTime (and mirrored verbatim by the JS ticker, so
|
||||||
|
// keep the two in sync if you change the buckets).
|
||||||
|
func relTimeLabel(d time.Duration) string {
|
||||||
suffix := "ago"
|
suffix := "ago"
|
||||||
if d < 0 {
|
if d < 0 {
|
||||||
d = -d
|
d = -d
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
package ui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFormatRelTimeWrapsInTickableTimeElement(t *testing.T) {
|
||||||
|
// A long-open tab needs a stable anchor so the JS ticker can
|
||||||
|
// refresh the label — see base.html.
|
||||||
|
when := time.Now().Add(-3 * time.Hour)
|
||||||
|
got := string(formatRelTime(when))
|
||||||
|
if !strings.Contains(got, `<time data-rel-ts="`) {
|
||||||
|
t.Errorf("missing data-rel-ts anchor in %q", got)
|
||||||
|
}
|
||||||
|
if !strings.Contains(got, "3h ago</time>") {
|
||||||
|
t.Errorf("expected '3h ago' label, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatRelTimeNilReturnsDash(t *testing.T) {
|
||||||
|
var p *time.Time
|
||||||
|
if string(formatRelTime(p)) != "—" {
|
||||||
|
t.Errorf("nil should render as em-dash, got %q", formatRelTime(p))
|
||||||
|
}
|
||||||
|
if string(formatRelTime(time.Time{})) != "—" {
|
||||||
|
t.Errorf("zero should render as em-dash")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRelTimeLabelBuckets(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
d time.Duration
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{30 * time.Second, "30s ago"},
|
||||||
|
{5 * time.Minute, "5m ago"},
|
||||||
|
{2 * time.Hour, "2h ago"},
|
||||||
|
{3 * 24 * time.Hour, "3d ago"},
|
||||||
|
{2 * 7 * 24 * time.Hour, "2w ago"},
|
||||||
|
{-5 * time.Minute, "5m from now"},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
if got := relTimeLabel(c.d); got != c.want {
|
||||||
|
t.Errorf("relTimeLabel(%v) = %q, want %q", c.d, got, c.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
+25
-4
@@ -44,7 +44,7 @@ func (s *Store) LookupHostByAgentToken(ctx context.Context, tokenHash string) (*
|
|||||||
repo_size_bytes, snapshot_count, open_alert_count,
|
repo_size_bytes, snapshot_count, open_alert_count,
|
||||||
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
|
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
|
||||||
pre_hook_default, post_hook_default,
|
pre_hook_default, post_hook_default,
|
||||||
repo_status, repo_status_error
|
repo_status, repo_status_error, always_on
|
||||||
FROM hosts WHERE agent_token_hash = ?`,
|
FROM hosts WHERE agent_token_hash = ?`,
|
||||||
tokenHash)
|
tokenHash)
|
||||||
return scanHost(row)
|
return scanHost(row)
|
||||||
@@ -59,7 +59,7 @@ func (s *Store) GetHost(ctx context.Context, id string) (*Host, error) {
|
|||||||
repo_size_bytes, snapshot_count, open_alert_count,
|
repo_size_bytes, snapshot_count, open_alert_count,
|
||||||
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
|
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
|
||||||
pre_hook_default, post_hook_default,
|
pre_hook_default, post_hook_default,
|
||||||
repo_status, repo_status_error
|
repo_status, repo_status_error, always_on
|
||||||
FROM hosts WHERE id = ?`, id)
|
FROM hosts WHERE id = ?`, id)
|
||||||
return scanHost(row)
|
return scanHost(row)
|
||||||
}
|
}
|
||||||
@@ -227,7 +227,7 @@ func (s *Store) ListHosts(ctx context.Context) ([]Host, error) {
|
|||||||
repo_size_bytes, snapshot_count, open_alert_count,
|
repo_size_bytes, snapshot_count, open_alert_count,
|
||||||
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
|
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
|
||||||
pre_hook_default, post_hook_default,
|
pre_hook_default, post_hook_default,
|
||||||
repo_status, repo_status_error
|
repo_status, repo_status_error, always_on
|
||||||
FROM hosts ORDER BY name`)
|
FROM hosts ORDER BY name`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("store: list hosts: %w", err)
|
return nil, fmt.Errorf("store: list hosts: %w", err)
|
||||||
@@ -267,6 +267,7 @@ func scanHostRow(s hostScanner) (*Host, error) {
|
|||||||
tags string
|
tags string
|
||||||
bwUp, bwDown sql.NullInt64
|
bwUp, bwDown sql.NullInt64
|
||||||
preHook, postHook sql.NullString
|
preHook, postHook sql.NullString
|
||||||
|
alwaysOn int
|
||||||
)
|
)
|
||||||
err := s.Scan(&h.ID, &h.Name, &h.OS, &h.Arch,
|
err := s.Scan(&h.ID, &h.Name, &h.OS, &h.Arch,
|
||||||
&h.AgentVersion, &h.ResticVersion, &h.ProtocolVersion,
|
&h.AgentVersion, &h.ResticVersion, &h.ProtocolVersion,
|
||||||
@@ -275,7 +276,7 @@ func scanHostRow(s hostScanner) (*Host, error) {
|
|||||||
&h.RepoSizeBytes, &h.SnapshotCount, &h.OpenAlertCount,
|
&h.RepoSizeBytes, &h.SnapshotCount, &h.OpenAlertCount,
|
||||||
&h.AppliedScheduleVersion, &bwUp, &bwDown,
|
&h.AppliedScheduleVersion, &bwUp, &bwDown,
|
||||||
&preHook, &postHook,
|
&preHook, &postHook,
|
||||||
&h.RepoStatus, &h.RepoStatusError)
|
&h.RepoStatus, &h.RepoStatusError, &alwaysOn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, sql.ErrNoRows) {
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
return nil, ErrNotFound
|
return nil, ErrNotFound
|
||||||
@@ -330,6 +331,7 @@ func scanHostRow(s hostScanner) (*Host, error) {
|
|||||||
if postHook.Valid {
|
if postHook.Valid {
|
||||||
h.PostHookDefault = postHook.String
|
h.PostHookDefault = postHook.String
|
||||||
}
|
}
|
||||||
|
h.AlwaysOn = alwaysOn != 0
|
||||||
return &h, nil
|
return &h, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -378,6 +380,25 @@ func (s *Store) SetHostTags(ctx context.Context, hostID string, tags []string) e
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetHostAlwaysOn flips the host's always-on flag. true = 24x7 server
|
||||||
|
// (default); false = intermittent host (laptop). See the
|
||||||
|
// always-on-host-mode spec.
|
||||||
|
func (s *Store) SetHostAlwaysOn(ctx context.Context, hostID string, alwaysOn bool) error {
|
||||||
|
v := 0
|
||||||
|
if alwaysOn {
|
||||||
|
v = 1
|
||||||
|
}
|
||||||
|
res, err := s.db.ExecContext(ctx,
|
||||||
|
`UPDATE hosts SET always_on = ? WHERE id = ?`, v, hostID)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("store: set host always_on: %w", err)
|
||||||
|
}
|
||||||
|
if n, _ := res.RowsAffected(); n == 0 {
|
||||||
|
return ErrNotFound
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// DistinctHostTags returns the union of every tag in use across the
|
// DistinctHostTags returns the union of every tag in use across the
|
||||||
// fleet, sorted. Powers the autocomplete on the host-tags editor and
|
// fleet, sorted. Powers the autocomplete on the host-tags editor and
|
||||||
// the chip-row filter on the dashboard. Cheap at fleet sizes this
|
// the chip-row filter on the dashboard. Cheap at fleet sizes this
|
||||||
|
|||||||
@@ -0,0 +1,55 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHostAlwaysOnDefaultAndToggle(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
st := openTestStore(t)
|
||||||
|
|
||||||
|
h := Host{
|
||||||
|
ID: "h-always-on", Name: "lap", OS: "linux", Arch: "amd64",
|
||||||
|
ProtocolVersion: 1, EnrolledAt: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
if err := st.CreateHost(ctx, h, "tok-hash", "pin"); err != nil {
|
||||||
|
t.Fatalf("create host: %v", err)
|
||||||
|
}
|
||||||
|
got, err := st.GetHost(ctx, h.ID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("get host: %v", err)
|
||||||
|
}
|
||||||
|
if !got.AlwaysOn {
|
||||||
|
t.Fatalf("new host should default to always_on=true, got false")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := st.SetHostAlwaysOn(ctx, h.ID, false); err != nil {
|
||||||
|
t.Fatalf("set always_on: %v", err)
|
||||||
|
}
|
||||||
|
got, err = st.GetHost(ctx, h.ID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("get host 2: %v", err)
|
||||||
|
}
|
||||||
|
if got.AlwaysOn {
|
||||||
|
t.Fatalf("expected always_on=false after toggle, got true")
|
||||||
|
}
|
||||||
|
|
||||||
|
hosts, err := st.ListHosts(ctx)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("list hosts: %v", err)
|
||||||
|
}
|
||||||
|
if len(hosts) != 1 || hosts[0].AlwaysOn {
|
||||||
|
t.Fatalf("ListHosts should report always_on=false, got %+v", hosts)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify the agent hot-path (LookupHostByAgentToken) also reflects the toggle.
|
||||||
|
byToken, err := st.LookupHostByAgentToken(ctx, "tok-hash")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("lookup by agent token: %v", err)
|
||||||
|
}
|
||||||
|
if byToken.AlwaysOn {
|
||||||
|
t.Fatalf("LookupHostByAgentToken: expected always_on=false after toggle, got true")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -270,6 +270,22 @@ func (s *Store) LatestJobByKind(ctx context.Context, hostID, kind string) (*Job,
|
|||||||
return &j, nil
|
return &j, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HasActiveBackupJob reports whether the host has a backup job that is
|
||||||
|
// still queued or running. The catch-up scheduler uses this to avoid
|
||||||
|
// dispatching a duplicate backup alongside one already in flight
|
||||||
|
// (hosts.current_job_id is not maintained, so this is the authoritative
|
||||||
|
// in-flight check).
|
||||||
|
func (s *Store) HasActiveBackupJob(ctx context.Context, hostID string) (bool, error) {
|
||||||
|
var exists bool
|
||||||
|
err := s.db.QueryRowContext(ctx,
|
||||||
|
`SELECT EXISTS(SELECT 1 FROM jobs WHERE host_id = ? AND kind = 'backup' AND status IN ('queued','running'))`,
|
||||||
|
hostID).Scan(&exists)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("store: has active backup job: %w", err)
|
||||||
|
}
|
||||||
|
return exists, nil
|
||||||
|
}
|
||||||
|
|
||||||
// HasJobOfKind reports whether any job of the given kind exists for
|
// HasJobOfKind reports whether any job of the given kind exists for
|
||||||
// this host, regardless of status. Used by the auto-init path on
|
// this host, regardless of status. Used by the auto-init path on
|
||||||
// agent hello to decide whether to dispatch a fresh `restic init` —
|
// agent hello to decide whether to dispatch a fresh `restic init` —
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
-- 0024: distinguish always-on (24x7 server) hosts from intermittent
|
||||||
|
-- hosts (laptops/workstations that legitimately sleep). Default 1 so
|
||||||
|
-- every existing and future host keeps today's offline/alert
|
||||||
|
-- semantics unless explicitly opted out. Column-level ALTER per the
|
||||||
|
-- repo's migration rules (no table rebuild — hosts has inbound FKs).
|
||||||
|
ALTER TABLE hosts ADD COLUMN always_on INTEGER NOT NULL DEFAULT 1;
|
||||||
@@ -99,6 +99,12 @@ type Host struct {
|
|||||||
// agent-side message when RepoStatus == "init_failed".
|
// agent-side message when RepoStatus == "init_failed".
|
||||||
RepoStatus string
|
RepoStatus string
|
||||||
RepoStatusError string
|
RepoStatusError string
|
||||||
|
|
||||||
|
// AlwaysOn is true for 24x7 server hosts (the default). When false
|
||||||
|
// the host is intermittent (laptop/workstation): offline alerts are
|
||||||
|
// suppressed, the UI shows an "asleep" state, and a missed backup is
|
||||||
|
// caught up ~1 min after reconnect. See the always-on-host-mode spec.
|
||||||
|
AlwaysOn bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Schedule is now intentionally slim: cron + which groups + enabled.
|
// Schedule is now intentionally slim: cron + which groups + enabled.
|
||||||
|
|||||||
@@ -13,4 +13,8 @@ var (
|
|||||||
// Commit is the short git SHA. Informational only; surfaced via
|
// Commit is the short git SHA. Informational only; surfaced via
|
||||||
// /api/version but not used for any comparison.
|
// /api/version but not used for any comparison.
|
||||||
Commit = ""
|
Commit = ""
|
||||||
|
|
||||||
|
// Date is the RFC3339 build timestamp. Informational only; printed
|
||||||
|
// by `--version` but not used for any comparison.
|
||||||
|
Date = "unknown"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -310,7 +310,7 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days.
|
|||||||
> **Sweep verified (smoke env):** admin adds operator → setup link generated → curl-as-new-user fetches /setup (200, page shows username) → POSTs password → 303 to / + Set-Cookie → operator authenticated → 200 on /, 200 on /settings/account, **403 on /settings/users** (admin-only) → admin disables user → operator's next request is **401** + session row count drops to 0 → audit log shows `user.created` + `user.setup_completed` for the cycle. All 26 implementation tasks landed; full `go test ./...` green.
|
> **Sweep verified (smoke env):** admin adds operator → setup link generated → curl-as-new-user fetches /setup (200, page shows username) → POSTs password → 303 to / + Set-Cookie → operator authenticated → 200 on /, 200 on /settings/account, **403 on /settings/users** (admin-only) → admin disables user → operator's next request is **401** + session row count drops to 0 → audit log shows `user.created` + `user.setup_completed` for the cycle. All 26 implementation tasks landed; full `go test ./...` green.
|
||||||
- [x] **P4-05** (L) OIDC login (generic provider config, group → role mapping)
|
- [x] **P4-05** (L) OIDC login (generic provider config, group → role mapping)
|
||||||
|
|
||||||
> **As shipped (2026-05-05):** Authorization Code + PKCE (S256) against any OIDC IdP advertising standard discovery. Config is YAML+env (`oidc.issuer`, `oidc.client_id`, `oidc.client_secret`/`_file`, `oidc.role_claim` default `groups`, `oidc.role_mapping`, `oidc.display_name`, `oidc.redirect_url`); empty issuer → OIDC disabled, no routes mounted. Migration 0019 adds `users.auth_source`/`oidc_subject` (partial unique index on `oidc_subject`), `sessions.id_token`, and a small `oidc_state` table for state+verifier round-trip (cleaned up every alert tick, 5 min TTL). Login page renders **Sign in with `<display_name>`** above the local form when OIDC is enabled; the SSO button kicks off a 303 to the IdP with state + S256 code_challenge persisted server-side. Callback verifies ID token, fetches `/userinfo` to merge claims (Authelia / many IdPs only put `sub` in the ID token and surface `preferred_username`/`email`/`groups` from userinfo), maps the first matching group to a role; **no match → deny banner**, no row created, audit `user.oidc_login_blocked`. Username-collision with an existing local user → same deny path with `username_taken`. New user → JIT-provisioned with `auth_source='oidc'`, `oidc_subject=<sub>`, `password_hash=''`. Returning user → looked up by `oidc_subject` (stable when usernames change at the IdP), role + email refreshed on every login. Local password login is rejected for `auth_source='oidc'` users. Logout posts to `/logout` and, when the IdP advertised `end_session_endpoint`, follows up with RP-initiated logout (carries `id_token_hint` + `post_logout_redirect_uri=BaseURL`); when not advertised (Authelia in our smoke env), the local session is cleared and the browser lands on `/login`. Users list shows a small **oidc** chip beside enabled/disabled; the edit page disables username/email/role for OIDC users (server-side guard mirrors UI, returns 403). Force-logout, disable, and the last-admin guard from P4-04 all still apply. **Live Authelia sweep verified all four paths against `https://auth.example.invalid`:** rm-admin → admin role + JIT row + chip + readonly edit; rm-operator → operator JIT, 403 on `/settings/users`; rm-viewer → viewer JIT, 403 on `/hosts/new`; rm-other (group not in role_mapping) → no_role_match banner, no row created, audit logged. Returning rm-admin login resolved to the same row by sub. Screenshots in `_diag/p4-05-sweep/`. Out-of-scope and on Phase 6 candidate list: refresh tokens, back-channel logout, multiple providers, post-login PKCE for the cookie itself.
|
> **As shipped (2026-05-05):** Authorization Code + PKCE (S256) against any OIDC IdP advertising standard discovery. Config is YAML+env (`oidc.issuer`, `oidc.client_id`, `oidc.client_secret`/`_file`, `oidc.role_claim` default `groups`, `oidc.role_mapping`, `oidc.display_name`, `oidc.redirect_url`); empty issuer → OIDC disabled, no routes mounted. Migration 0019 adds `users.auth_source`/`oidc_subject` (partial unique index on `oidc_subject`), `sessions.id_token`, and a small `oidc_state` table for state+verifier round-trip (cleaned up every alert tick, 5 min TTL). Login page renders **Sign in with `<display_name>`** above the local form when OIDC is enabled; the SSO button kicks off a 303 to the IdP with state + S256 code_challenge persisted server-side. Callback verifies ID token, fetches `/userinfo` to merge claims (Authelia / many IdPs only put `sub` in the ID token and surface `preferred_username`/`email`/`groups` from userinfo), maps the first matching group to a role; **no match → deny banner**, no row created, audit `user.oidc_login_blocked`. Username-collision with an existing local user → same deny path with `username_taken`. New user → JIT-provisioned with `auth_source='oidc'`, `oidc_subject=<sub>`, `password_hash=''`. Returning user → looked up by `oidc_subject` (stable when usernames change at the IdP), role + email refreshed on every login. Local password login is rejected for `auth_source='oidc'` users. Logout posts to `/logout` and, when the IdP advertised `end_session_endpoint`, follows up with RP-initiated logout (carries `id_token_hint` + `post_logout_redirect_uri=BaseURL`); when not advertised (Authelia in our smoke env), the local session is cleared and the browser lands on `/login`. Users list shows a small **oidc** chip beside enabled/disabled; the edit page disables username/email/role for OIDC users (server-side guard mirrors UI, returns 403). Force-logout, disable, and the last-admin guard from P4-04 all still apply. **Live Authelia sweep verified all four paths against local auth:** rm-admin → admin role + JIT row + chip + readonly edit; rm-operator → operator JIT, 403 on `/settings/users`; rm-viewer → viewer JIT, 403 on `/hosts/new`; rm-other (group not in role_mapping) → no_role_match banner, no row created, audit logged. Returning rm-admin login resolved to the same row by sub. Screenshots in `_diag/p4-05-sweep/`. Out-of-scope and on Phase 6 candidate list: refresh tokens, back-channel logout, multiple providers, post-login PKCE for the cookie itself.
|
||||||
|
|
||||||
- [x] **P4-07** (S) Per-host tags + dashboard filtering by tag
|
- [x] **P4-07** (S) Per-host tags + dashboard filtering by tag
|
||||||
|
|
||||||
@@ -498,6 +498,8 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days.
|
|||||||
- [x] **NS-03** Auto-init repo on first onboard, surface credential failures eagerly. ✅ Landed: migration 0020 adds `hosts.repo_status` (`unknown`/`ready`/`init_failed`) + `repo_status_error`; WS handler projects every init job's terminal state onto the host row (with idempotent "config file already exists" → ready); creds-save handlers (UI + JSON API) reset status to `unknown` and dispatch a fresh init when the agent is online; new `/hosts/{id}/repo/probe` retry endpoint and a status banner on the repo page. Remainder of original scope below. surface credential failures eagerly. Today the operator types repo URL + creds during Add-host and the credentials are pushed to the agent on connect, but no `restic init`/probe runs until the first scheduled job — so a typo in the password or a wrong URL goes undetected for hours/days, manifesting as a silent missed-backup. Wanted behaviour: when the host completes enrolment (or when an admin saves new repo creds), the server dispatches a one-shot probe job that runs `restic cat config` (cheap, repo-existence + creds-validity in one call). On `Is there already a config file? unable to open config file` → run `restic init`. On success → mark the host's repo as ready. On any other error (network, auth, fingerprint) → surface a panel-level error on the host detail page and audit the failure, leaving the host in an "init pending" state with a "Retry" button. Needs: a new `JobKind` (or piggyback on an existing one) for the probe, server-side state on the host row (`repo_status` enum: `unknown`/`ready`/`init_pending`/`init_failed`), UI panel that shows the state, and clear copy on the Add-host page so the operator knows the save isn't fire-and-forget.
|
- [x] **NS-03** Auto-init repo on first onboard, surface credential failures eagerly. ✅ Landed: migration 0020 adds `hosts.repo_status` (`unknown`/`ready`/`init_failed`) + `repo_status_error`; WS handler projects every init job's terminal state onto the host row (with idempotent "config file already exists" → ready); creds-save handlers (UI + JSON API) reset status to `unknown` and dispatch a fresh init when the agent is online; new `/hosts/{id}/repo/probe` retry endpoint and a status banner on the repo page. Remainder of original scope below. surface credential failures eagerly. Today the operator types repo URL + creds during Add-host and the credentials are pushed to the agent on connect, but no `restic init`/probe runs until the first scheduled job — so a typo in the password or a wrong URL goes undetected for hours/days, manifesting as a silent missed-backup. Wanted behaviour: when the host completes enrolment (or when an admin saves new repo creds), the server dispatches a one-shot probe job that runs `restic cat config` (cheap, repo-existence + creds-validity in one call). On `Is there already a config file? unable to open config file` → run `restic init`. On success → mark the host's repo as ready. On any other error (network, auth, fingerprint) → surface a panel-level error on the host detail page and audit the failure, leaving the host in an "init pending" state with a "Retry" button. Needs: a new `JobKind` (or piggyback on an existing one) for the probe, server-side state on the host row (`repo_status` enum: `unknown`/`ready`/`init_pending`/`init_failed`), UI panel that shows the state, and clear copy on the Add-host page so the operator knows the save isn't fire-and-forget.
|
||||||
- [x] **NS-05** Drop redundant `actions/setup-go` from `.gitea/workflows/ci.yml`. ✅ Already gone — verified `.gitea/workflows/ci.yml` has zero `actions/setup-go@v5` invocations and no `GO_VERSION` env; the file's header comment now documents that the runner image (`gitea.dcglab.co.uk/steve/ci-runner-go`) is the single source of truth for the Go version. Closing as done; no further code change needed.
|
- [x] **NS-05** Drop redundant `actions/setup-go` from `.gitea/workflows/ci.yml`. ✅ Already gone — verified `.gitea/workflows/ci.yml` has zero `actions/setup-go@v5` invocations and no `GO_VERSION` env; the file's header comment now documents that the runner image (`gitea.dcglab.co.uk/steve/ci-runner-go`) is the single source of truth for the Go version. Closing as done; no further code change needed.
|
||||||
- [x] **NS-06** Remove the permanently-disabled "Run backup now" button from `web/templates/partials/host_chrome.html`. ✅ Landed: dropped the disabled tombstone button from the host header action row; only "Edit credentials" + the ⋯ menu remain. Per-source-group Run-now on `/hosts/{id}/sources` is the only path now. No e2e change needed — `smoke.spec.ts` does not assert on host_chrome's button row.
|
- [x] **NS-06** Remove the permanently-disabled "Run backup now" button from `web/templates/partials/host_chrome.html`. ✅ Landed: dropped the disabled tombstone button from the host header action row; only "Edit credentials" + the ⋯ menu remain. Per-source-group Run-now on `/hosts/{id}/sources` is the only path now. No e2e change needed — `smoke.spec.ts` does not assert on host_chrome's button row.
|
||||||
|
- [x] **NS-07** Relative timestamps go stale on long-open tabs. ✅ Landed: `formatRelTime` now wraps its label in `<time data-rel-ts=…>` and both layouts (`base.html`, `chromeless.html`) carry a small ticker that re-renders every 30s, so a page rendered an hour ago no longer keeps showing "2h ago" when the wall-clock truth is "3h ago". Covered by `funcs_test.go`. The bug: every relative label was computed once at server render and never updated client-side, so a job-detail page left open drifted further from reality the longer it sat.
|
||||||
|
- [x] **NS-08** Always-On vs intermittent host mode. ✅ Landed: a host can now be marked not-always-on (laptop/workstation) so it stops generating offline-alert noise when it legitimately sleeps. Migration 0024 adds `hosts.always_on` (default 1 = today's 24×7 behaviour; intermittent is strictly opt-in). The alert engine suppresses `agent_offline` for intermittent hosts and instead wires up the previously-dead `stale_schedule` alert for them — raised at a 7-day global threshold when the host has an enabled schedule and a stale last backup, resolved on the next successful backup. A new server-side catch-up scheduler (`internal/server/http/catchup.go`) arms on agent hello and fires from the existing 30s pending-drain tick: ~60s after an intermittent host reconnects it dispatches a backup for any enabled schedule whose window elapsed while asleep (overdue = `cron.Next(lastBackup) <= now`, reusing the shared `cronParser`), guarded against firing when the host bounced offline, flipped to always-on, or already has a job running. Overdue is measured against the per-host `LastBackupAt` (exact for the common single-schedule laptop; a known coarseness for multi-cadence hosts, documented in code). Operator toggle via `POST /hosts/{id}/mode` (audited `host.mode_updated`), which also clears open offline/staleness alerts so the next sweep re-settles. UI: intermittent offline hosts render a calm grey `asleep · <relTime> · will catch up on return` state (new `.dot-asleep`) instead of red "offline"; a `24×7` chip shows only for always-on hosts; a "presence" inline toggle on the host header. Design + plan in `docs/specs/2026-06-15-always-on-host-mode-design.md` and `docs/plans/2026-06-15-always-on-host-mode.md`. Spec §2 (online/offline mechanics) deliberately left untouched. Out of scope for v1: per-host staleness thresholds, continuous (non-reconnect) overdue evaluation, per-schedule last-success tracking.
|
||||||
- [x] **NS-04** Dashboard parity with the alerts screen: live refresh, column sorting, filters. ✅ Landed: `/` now parses `q`/`status`/`repo_status`/`tag`/`sort`/`dir` query params (round-trip durable for bookmarks); table is wrapped in an `id="hosts-table"` htmx live-poll matching the alerts cadence (5s, gated on `document.visibilityState` and `localStorage.rm-dashboard-live`); filter row above the table with hostname free-text + status + repo_status selects + tag chips + clear; column headers (Host / OS · arch / Last backup / Repo size / Snapshots) are clickable links that toggle direction on the active column; pure-Go sort+filter pipeline covered by `dashboard_filter_test.go`. Original scope below. live refresh, column sorting, filters. The host list is currently a static render — operators have to reload to see new heartbeats / job state changes. Mirror the alerts pattern (`web/templates/pages/alerts.html` uses `hx-trigger="every 5s [document.visibilityState==='visible' && localStorage.getItem('rm-alerts-live')!=='off']"` plus a Live/Off toggle so background tabs and explicit-off don't burn server cycles). Add: server-side sort on every meaningful column (name, OS, last-backup time, last-backup status, agent online/offline, restic version, tags), and a small filter row above the table — at minimum free-text on hostname, status (online/offline/never-seen), and tag chips. Columns + filter state should round-trip through query string so a bookmarked / shared URL is durable. Re-use the `host_row` partial that already exists so the live-refresh swap is a clean OOB swap, not a full table re-render.
|
- [x] **NS-04** Dashboard parity with the alerts screen: live refresh, column sorting, filters. ✅ Landed: `/` now parses `q`/`status`/`repo_status`/`tag`/`sort`/`dir` query params (round-trip durable for bookmarks); table is wrapped in an `id="hosts-table"` htmx live-poll matching the alerts cadence (5s, gated on `document.visibilityState` and `localStorage.rm-dashboard-live`); filter row above the table with hostname free-text + status + repo_status selects + tag chips + clear; column headers (Host / OS · arch / Last backup / Repo size / Snapshots) are clickable links that toggle direction on the active column; pure-Go sort+filter pipeline covered by `dashboard_filter_test.go`. Original scope below. live refresh, column sorting, filters. The host list is currently a static render — operators have to reload to see new heartbeats / job state changes. Mirror the alerts pattern (`web/templates/pages/alerts.html` uses `hx-trigger="every 5s [document.visibilityState==='visible' && localStorage.getItem('rm-alerts-live')!=='off']"` plus a Live/Off toggle so background tabs and explicit-off don't burn server cycles). Add: server-side sort on every meaningful column (name, OS, last-backup time, last-backup status, agent online/offline, restic version, tags), and a small filter row above the table — at minimum free-text on hostname, status (online/offline/never-seen), and tag chips. Columns + filter state should round-trip through query string so a bookmarked / shared URL is durable. Re-use the `host_row` partial that already exists so the live-refresh swap is a clean OOB swap, not a full table re-render.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -70,6 +70,7 @@
|
|||||||
.dot-online { background: var(--ok); box-shadow: 0 0 0 3px color-mix(in oklch, var(--ok), transparent 80%); }
|
.dot-online { background: var(--ok); box-shadow: 0 0 0 3px color-mix(in oklch, var(--ok), transparent 80%); }
|
||||||
.dot-degraded { background: var(--warn); box-shadow: 0 0 0 3px color-mix(in oklch, var(--warn), transparent 80%); }
|
.dot-degraded { background: var(--warn); box-shadow: 0 0 0 3px color-mix(in oklch, var(--warn), transparent 80%); }
|
||||||
.dot-offline { background: var(--off); }
|
.dot-offline { background: var(--off); }
|
||||||
|
.dot-asleep { background: var(--ink-fade); opacity: 0.6; }
|
||||||
.dot-failed { background: var(--bad); box-shadow: 0 0 0 3px color-mix(in oklch, var(--bad), transparent 80%); }
|
.dot-failed { background: var(--bad); box-shadow: 0 0 0 3px color-mix(in oklch, var(--bad), transparent 80%); }
|
||||||
.pulse { animation: rm-pulse 2.4s ease-in-out infinite; }
|
.pulse { animation: rm-pulse 2.4s ease-in-out infinite; }
|
||||||
@keyframes rm-pulse {
|
@keyframes rm-pulse {
|
||||||
@@ -195,6 +196,17 @@
|
|||||||
}
|
}
|
||||||
.tag-removable .x { color: var(--ink-fade); cursor: pointer; padding-left: 2px; }
|
.tag-removable .x { color: var(--ink-fade); cursor: pointer; padding-left: 2px; }
|
||||||
|
|
||||||
|
/* ---------- header meta groups (boxed tags / presence pills) ---------- */
|
||||||
|
.meta-group {
|
||||||
|
display: inline-flex; align-items: center; gap: 6px;
|
||||||
|
font-size: 11px; line-height: 1; padding: 3px 9px;
|
||||||
|
border: 1px solid var(--line); border-radius: 5px;
|
||||||
|
background: color-mix(in oklch, var(--ink), transparent 95%);
|
||||||
|
}
|
||||||
|
.meta-group .meta-label { color: var(--ink-mute); }
|
||||||
|
.meta-group .meta-val { color: var(--ink-mid); text-decoration: none; }
|
||||||
|
.meta-group a.meta-val:hover { color: var(--ink); text-decoration: underline; }
|
||||||
|
|
||||||
/* ---------- form fields ---------- */
|
/* ---------- form fields ---------- */
|
||||||
.field-label { font-size: 12px; color: var(--ink-mid); margin-bottom: 6px; display: block; }
|
.field-label { font-size: 12px; color: var(--ink-mid); margin-bottom: 6px; display: block; }
|
||||||
.field-help { font-size: 12px; color: var(--ink-mute); margin-top: 6px; line-height: 1.55; }
|
.field-help { font-size: 12px; color: var(--ink-mute); margin-top: 6px; line-height: 1.55; }
|
||||||
|
|||||||
@@ -20,6 +20,37 @@
|
|||||||
|
|
||||||
{{template "toast" .}}
|
{{template "toast" .}}
|
||||||
|
|
||||||
|
<script>
|
||||||
|
// Tick <time data-rel-ts> labels so long-open tabs don't freeze
|
||||||
|
// (e.g. a job page rendered an hour ago kept showing "2h ago" when
|
||||||
|
// the truth was "3h ago"). Buckets must match relTimeLabel in
|
||||||
|
// internal/server/ui/funcs.go.
|
||||||
|
(function () {
|
||||||
|
function label(ms) {
|
||||||
|
var suffix = 'ago';
|
||||||
|
if (ms < 0) { ms = -ms; suffix = 'from now'; }
|
||||||
|
var s = Math.floor(ms / 1000);
|
||||||
|
if (s < 60) return s + 's ' + suffix;
|
||||||
|
var m = Math.floor(s / 60);
|
||||||
|
if (m < 60) return m + 'm ' + suffix;
|
||||||
|
var h = Math.floor(m / 60);
|
||||||
|
if (h < 24) return h + 'h ' + suffix;
|
||||||
|
var d = Math.floor(h / 24);
|
||||||
|
if (d < 7) return d + 'd ' + suffix;
|
||||||
|
return Math.floor(d / 7) + 'w ' + suffix;
|
||||||
|
}
|
||||||
|
function tick() {
|
||||||
|
var now = Date.now();
|
||||||
|
document.querySelectorAll('time[data-rel-ts]').forEach(function (el) {
|
||||||
|
var t = Date.parse(el.getAttribute('data-rel-ts'));
|
||||||
|
if (!isNaN(t)) el.textContent = label(now - t);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
tick();
|
||||||
|
setInterval(tick, 30000);
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|||||||
@@ -11,6 +11,34 @@
|
|||||||
</head>
|
</head>
|
||||||
<body class="min-h-screen flex flex-col">
|
<body class="min-h-screen flex flex-col">
|
||||||
{{block "content" .}}{{end}}
|
{{block "content" .}}{{end}}
|
||||||
|
<script>
|
||||||
|
// See base.html for rationale; chromeless pages (e.g. pending host)
|
||||||
|
// also use the relTime helper, so they need the same ticker.
|
||||||
|
(function () {
|
||||||
|
function label(ms) {
|
||||||
|
var suffix = 'ago';
|
||||||
|
if (ms < 0) { ms = -ms; suffix = 'from now'; }
|
||||||
|
var s = Math.floor(ms / 1000);
|
||||||
|
if (s < 60) return s + 's ' + suffix;
|
||||||
|
var m = Math.floor(s / 60);
|
||||||
|
if (m < 60) return m + 'm ' + suffix;
|
||||||
|
var h = Math.floor(m / 60);
|
||||||
|
if (h < 24) return h + 'h ' + suffix;
|
||||||
|
var d = Math.floor(h / 24);
|
||||||
|
if (d < 7) return d + 'd ' + suffix;
|
||||||
|
return Math.floor(d / 7) + 'w ' + suffix;
|
||||||
|
}
|
||||||
|
function tick() {
|
||||||
|
var now = Date.now();
|
||||||
|
document.querySelectorAll('time[data-rel-ts]').forEach(function (el) {
|
||||||
|
var t = Date.parse(el.getAttribute('data-rel-ts'));
|
||||||
|
if (!isNaN(t)) el.textContent = label(now - t);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
tick();
|
||||||
|
setInterval(tick, 30000);
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|||||||
@@ -34,17 +34,32 @@
|
|||||||
{{else if eq $host.Status "degraded"}}
|
{{else if eq $host.Status "degraded"}}
|
||||||
<span class="dot dot-degraded"></span>
|
<span class="dot dot-degraded"></span>
|
||||||
{{else if eq $host.Status "offline"}}
|
{{else if eq $host.Status "offline"}}
|
||||||
<span class="dot dot-offline"></span>
|
{{if $host.AlwaysOn}}
|
||||||
|
<span class="dot dot-offline"></span>
|
||||||
|
{{else}}
|
||||||
|
<span class="dot dot-asleep"></span>
|
||||||
|
{{end}}
|
||||||
{{else}}
|
{{else}}
|
||||||
<span class="dot dot-failed"></span>
|
<span class="dot dot-failed"></span>
|
||||||
{{end}}
|
{{end}}
|
||||||
<h1 class="mono text-[26px] font-medium tracking-[0.005em] text-ink">{{$host.Name}}</h1>
|
<h1 class="mono text-[26px] font-medium tracking-[0.005em] text-ink">{{$host.Name}}</h1>
|
||||||
<div class="flex gap-1.5 items-center">
|
<div class="flex items-center gap-2.5">
|
||||||
{{range $host.Tags}}<a href="/?tag={{.}}" class="tag" title="filter dashboard by this tag">{{.}}</a>{{end}}
|
{{/* tags group pill — click the "tags" label to edit; the tag
|
||||||
<button type="button" class="text-ink-fade text-[11px] hover:text-ink-mid whitespace-nowrap"
|
values still filter the dashboard by that tag. */}}
|
||||||
style="padding: 2px 8px; border: 1px dashed var(--line); border-radius: 3px; cursor: pointer;"
|
<span class="meta-group">
|
||||||
|
<span class="meta-label cursor-pointer hover:text-ink"
|
||||||
onclick="document.getElementById('tags-edit-{{$host.ID}}').classList.toggle('hidden')"
|
onclick="document.getElementById('tags-edit-{{$host.ID}}').classList.toggle('hidden')"
|
||||||
title="Edit tags">{{if $host.Tags}}edit tags{{else}}add tags{{end}}</button>
|
title="Edit tags">tags</span>
|
||||||
|
{{range $host.Tags}}<a href="/?tag={{.}}" class="meta-val" title="filter dashboard by this tag">{{.}}</a>{{end}}
|
||||||
|
{{if not $host.Tags}}<span class="meta-val">—</span>{{end}}
|
||||||
|
</span>
|
||||||
|
{{/* presence group pill — click anywhere to edit. */}}
|
||||||
|
<span class="meta-group cursor-pointer"
|
||||||
|
onclick="document.getElementById('mode-edit-{{$host.ID}}').classList.toggle('hidden')"
|
||||||
|
title="Change presence mode">
|
||||||
|
<span class="meta-label">presence</span>
|
||||||
|
<span class="meta-val">{{if $host.AlwaysOn}}24x7{{else}}Free{{end}}</span>
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
{{if gt $page.ScheduleVersion 0}}
|
{{if gt $page.ScheduleVersion 0}}
|
||||||
<span class="mono text-[11px] text-ink-mute ml-2">
|
<span class="mono text-[11px] text-ink-mute ml-2">
|
||||||
@@ -80,6 +95,24 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="field-help">Comma-separated. Lowercased automatically.</div>
|
<div class="field-help">Comma-separated. Lowercased automatically.</div>
|
||||||
</form>
|
</form>
|
||||||
|
{{/* Presence-mode editor — hidden by default; toggled by the
|
||||||
|
"presence" button. Checkbox present => always-on (24×7);
|
||||||
|
unchecked => intermittent (laptop): no offline alerts, shows
|
||||||
|
"asleep", auto-catches-up a missed backup on reconnect. */}}
|
||||||
|
<form id="mode-edit-{{$host.ID}}" method="post"
|
||||||
|
action="/hosts/{{$host.ID}}/mode"
|
||||||
|
class="hidden mt-3" style="max-width: 640px;">
|
||||||
|
<label class="flex items-center gap-2 text-[12px] text-ink-mid">
|
||||||
|
<input type="checkbox" name="always_on" value="on" {{if $host.AlwaysOn}}checked{{end}} />
|
||||||
|
Always On — expected online 24×7
|
||||||
|
</label>
|
||||||
|
<div class="field-help">
|
||||||
|
Uncheck for an intermittent host (laptop/workstation): it won't
|
||||||
|
raise offline alerts when asleep, shows an "asleep" state, and
|
||||||
|
catches up a missed backup ~1 minute after it reconnects.
|
||||||
|
</div>
|
||||||
|
<button type="submit" class="btn btn-primary mt-2 whitespace-nowrap">Save presence</button>
|
||||||
|
</form>
|
||||||
<div class="flex items-center gap-3 mt-3 text-[13px] text-ink-mute">
|
<div class="flex items-center gap-3 mt-3 text-[13px] text-ink-mute">
|
||||||
<span class="mono text-ink-mid">{{$host.OS}}/{{$host.Arch}}</span>
|
<span class="mono text-ink-mid">{{$host.OS}}/{{$host.Arch}}</span>
|
||||||
<span class="text-ink-fade">·</span>
|
<span class="text-ink-fade">·</span>
|
||||||
@@ -88,7 +121,11 @@
|
|||||||
<span>restic <span class="mono text-ink-mid">{{if $host.ResticVersion}}{{$host.ResticVersion}}{{else}}—{{end}}</span></span>
|
<span>restic <span class="mono text-ink-mid">{{if $host.ResticVersion}}{{$host.ResticVersion}}{{else}}—{{end}}</span></span>
|
||||||
<span class="text-ink-fade">·</span>
|
<span class="text-ink-fade">·</span>
|
||||||
{{if eq $host.Status "offline"}}
|
{{if eq $host.Status "offline"}}
|
||||||
<span>last seen <span class="mono text-ink-mid">{{relTime $host.LastSeenAt}}</span></span>
|
{{if $host.AlwaysOn}}
|
||||||
|
<span>last seen <span class="mono text-ink-mid">{{relTime $host.LastSeenAt}}</span></span>
|
||||||
|
{{else}}
|
||||||
|
<span>asleep · last seen <span class="mono text-ink-mid">{{relTime $host.LastSeenAt}}</span> · will catch up on return</span>
|
||||||
|
{{end}}
|
||||||
{{else}}
|
{{else}}
|
||||||
<span>online · last heartbeat <span class="mono text-ink-mid">{{relTime $host.LastSeenAt}}</span></span>
|
<span>online · last heartbeat <span class="mono text-ink-mid">{{relTime $host.LastSeenAt}}</span></span>
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|||||||
@@ -8,7 +8,11 @@
|
|||||||
{{- else if eq $h.Status "degraded" -}}
|
{{- else if eq $h.Status "degraded" -}}
|
||||||
<span class="dot dot-degraded"></span>
|
<span class="dot dot-degraded"></span>
|
||||||
{{- else if eq $h.Status "offline" -}}
|
{{- else if eq $h.Status "offline" -}}
|
||||||
<span class="dot dot-offline"></span>
|
{{- if $h.AlwaysOn -}}
|
||||||
|
<span class="dot dot-offline"></span>
|
||||||
|
{{- else -}}
|
||||||
|
<span class="dot dot-asleep"></span>
|
||||||
|
{{- end -}}
|
||||||
{{- else -}}
|
{{- else -}}
|
||||||
<span class="dot dot-failed"></span>
|
<span class="dot dot-failed"></span>
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
@@ -26,7 +30,11 @@
|
|||||||
{{- else if eq (deref $h.LastBackupStatus) "cancelled" -}}
|
{{- else if eq (deref $h.LastBackupStatus) "cancelled" -}}
|
||||||
<span class="text-warn">cancelled</span> · <span class="mono">{{relTime $h.LastBackupAt}}</span>
|
<span class="text-warn">cancelled</span> · <span class="mono">{{relTime $h.LastBackupAt}}</span>
|
||||||
{{- else if eq $h.Status "offline" -}}
|
{{- else if eq $h.Status "offline" -}}
|
||||||
<span class="text-ink-mute">last seen <span class="mono">{{relTime $h.LastSeenAt}}</span></span>
|
{{- if $h.AlwaysOn -}}
|
||||||
|
<span class="text-ink-mute">last seen <span class="mono">{{relTime $h.LastSeenAt}}</span></span>
|
||||||
|
{{- else -}}
|
||||||
|
<span class="text-ink-mute">asleep · <span class="mono">{{relTime $h.LastSeenAt}}</span> · will catch up on return</span>
|
||||||
|
{{- end -}}
|
||||||
{{- else -}}
|
{{- else -}}
|
||||||
<span class="text-ink-fade italic">never run</span>
|
<span class="text-ink-fade italic">never run</span>
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
@@ -53,7 +61,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="text-right row-action">
|
<div class="text-right row-action">
|
||||||
{{- if eq $h.Status "offline" -}}
|
{{- if eq $h.Status "offline" -}}
|
||||||
<span class="mono text-xs text-ink-fade">offline</span>
|
<span class="mono text-xs text-ink-fade">{{if $h.AlwaysOn}}offline{{else}}asleep{{end}}</span>
|
||||||
{{- else if $h.CurrentJobID -}}
|
{{- else if $h.CurrentJobID -}}
|
||||||
<a href="/jobs/{{deref $h.CurrentJobID}}" class="btn btn-ghost">View job →</a>
|
<a href="/jobs/{{deref $h.CurrentJobID}}" class="btn btn-ghost">View job →</a>
|
||||||
{{- else if .RunAllScheduleID -}}
|
{{- else if .RunAllScheduleID -}}
|
||||||
|
|||||||
@@ -7,5 +7,5 @@
|
|||||||
Hidden entirely when UpdateAvailable is false.
|
Hidden entirely when UpdateAvailable is false.
|
||||||
*/}}
|
*/}}
|
||||||
{{define "host_update_chip"}}
|
{{define "host_update_chip"}}
|
||||||
{{if .UpdateAvailable}}<span class="update-chip" title="Agent at {{.Host.AgentVersion}}; server at {{.TargetVersion}}">out of date · {{.Host.AgentVersion}} → {{.TargetVersion}}</span>{{end}}
|
{{if .UpdateAvailable}}<span class="update-chip" title="Agent at {{.Host.AgentVersion}}; server at {{.TargetVersion}}">out of date</span>{{end}}
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|||||||
Reference in New Issue
Block a user