Merge pull request 'P2 completion (P2R-09/10/11/12/13/14, P2-16/17/18)' (#5) from p2-completion into main

This commit was merged in pull request #5.
This commit is contained in:
2026-05-04 13:19:05 +00:00
54 changed files with 3934 additions and 88 deletions
+6
View File
@@ -2,6 +2,10 @@
Project-specific rules for Claude when working in this repo.
## Repo
The repo lives inside a Gitea instance; `tea` CLI is available for use by agents
## Run `go vet` before every commit
CI runs `go vet ./...` and will fail the build on any vet error.
@@ -43,6 +47,8 @@ cp bin/restic-manager-agent \
/tmp/rm-smoke/data/agent-binaries/restic-manager-agent-linux-amd64
cp deploy/install/install.sh \
/tmp/rm-smoke/data/install/install.sh
cp deploy/install/install.ps1 \
/tmp/rm-smoke/data/install/install.ps1
cp deploy/install/restic-manager-agent.service \
/tmp/rm-smoke/data/install/restic-manager-agent.service
+262
View File
@@ -0,0 +1,262 @@
// announce.go — agent-side announce-and-approve enrolment (P2-18c).
//
// Run path: when the agent has no AgentToken set but RM_SERVER is
// configured (and no -enroll-token was supplied), main() switches
// into announce mode:
// 1. Load (or mint+persist) an Ed25519 keypair in agent.yaml.
// 2. POST {hostname, os, arch, agent_version, restic_version,
// public_key} to /api/agents/announce.
// 3. Print the fingerprint to stderr in a copy-friendly banner so
// the operator can compare it against the dashboard.
// 4. Open /ws/agent/pending?pending_id=…, sign the nonce with our
// private key, wait for an `enrolled` message.
// 5. On enrolled: persist the bearer + repo creds, return; main()
// then drops into the normal WS run loop with the new bearer.
// 6. On reject: server closes the socket with code 4001; we exit
// with a clear message.
package main
import (
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
stdhttp "net/http"
"os"
"strings"
"time"
"github.com/coder/websocket"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/config"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/secrets"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/sysinfo"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// announceRequest mirrors the server's announceRequest. Duplicated
// here so cmd/agent stays decoupled from the http package.
type announceRequest struct {
Hostname string `json:"hostname"`
OS string `json:"os"`
Arch string `json:"arch"`
AgentVersion string `json:"agent_version"`
ResticVersion string `json:"restic_version"`
PublicKey string `json:"public_key"`
}
type announceResponse struct {
PendingID string `json:"pending_id"`
Fingerprint string `json:"fingerprint"`
HostnameCollision bool `json:"hostname_collision"`
}
type pendingNonceMessage struct {
Type string `json:"type"`
Nonce string `json:"nonce"`
}
type pendingSignedMessage struct {
Type string `json:"type"`
Signature string `json:"signature"`
}
type pendingEnrolledMessage struct {
Type string `json:"type"`
HostID string `json:"host_id"`
Bearer string `json:"bearer"`
}
// doAnnounce runs the full announce → wait-for-accept flow. On
// success, persists the bearer + host_id into cfg + writes secrets
// for the repo creds the admin supplied at accept time. Returns
// only after the bearer has landed (or on hard error / reject).
func doAnnounce(serverURL string, cfg *config.Config, agentVersion string) error {
ctx, cancel := context.WithTimeout(context.Background(), 24*time.Hour)
defer cancel()
// Ensure we have a keypair.
priv, pub, err := loadOrMintAnnounceKey(cfg)
if err != nil {
return fmt.Errorf("announce: keypair: %w", err)
}
fingerprint := store.FingerprintForKey(pub)
snap, err := sysinfo.Collect(ctx, cfg.ResticPath)
if err != nil {
return fmt.Errorf("announce: sysinfo: %w", err)
}
// POST /api/agents/announce.
body, _ := json.Marshal(announceRequest{
Hostname: snap.Hostname, OS: string(snap.OS), Arch: string(snap.Arch),
AgentVersion: agentVersion, ResticVersion: snap.ResticVersion,
PublicKey: base64.StdEncoding.EncodeToString(pub),
})
req, _ := stdhttp.NewRequestWithContext(ctx, "POST",
strings.TrimRight(serverURL, "/")+"/api/agents/announce",
strings.NewReader(string(body)))
req.Header.Set("Content-Type", "application/json")
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("announce: POST: %w", err)
}
rawBody := readAllShort(res)
_ = res.Body.Close()
if res.StatusCode != stdhttp.StatusOK {
return fmt.Errorf("announce: server returned %d: %s", res.StatusCode, rawBody)
}
var ar announceResponse
if err := json.Unmarshal(rawBody, &ar); err != nil {
return fmt.Errorf("announce: parse response: %w", err)
}
// Print the fingerprint banner.
fmt.Fprintln(os.Stderr, strings.Repeat("=", 64))
fmt.Fprintln(os.Stderr, " Restic-manager: announce-and-approve enrolment")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " Hostname : "+snap.Hostname)
fmt.Fprintln(os.Stderr, " Server : "+serverURL)
fmt.Fprintln(os.Stderr, " Pending ID : "+ar.PendingID)
fmt.Fprintln(os.Stderr, " Fingerprint : "+fingerprint)
if ar.HostnameCollision {
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " WARNING: another pending host already uses this hostname.")
fmt.Fprintln(os.Stderr, " Confirm the fingerprint above matches what you see in the UI.")
}
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " Compare the fingerprint with the one in the UI before accepting.")
fmt.Fprintln(os.Stderr, " Waiting for an admin to accept (1 hour timeout)…")
fmt.Fprintln(os.Stderr, strings.Repeat("=", 64))
// Open /ws/agent/pending and run the nonce-sign handshake.
wsURL := wsURLFromHTTP(serverURL) + "/ws/agent/pending?pending_id=" + ar.PendingID
dialCtx, dialCancel := context.WithTimeout(ctx, 30*time.Second)
c, dialRes, err := websocket.Dial(dialCtx, wsURL, nil)
dialCancel()
if err != nil {
return fmt.Errorf("announce: dial pending ws: %w", err)
}
if dialRes != nil && dialRes.Body != nil {
_ = dialRes.Body.Close()
}
defer func() { _ = c.CloseNow() }()
// Read nonce.
rctx, rcancel := context.WithTimeout(ctx, 30*time.Second)
_, raw, err := c.Read(rctx)
rcancel()
if err != nil {
return fmt.Errorf("announce: read nonce: %w", err)
}
var nm pendingNonceMessage
if err := json.Unmarshal(raw, &nm); err != nil {
return fmt.Errorf("announce: parse nonce: %w", err)
}
nonce, err := base64.StdEncoding.DecodeString(nm.Nonce)
if err != nil {
return fmt.Errorf("announce: decode nonce: %w", err)
}
sig := ed25519.Sign(priv, nonce)
reply, _ := json.Marshal(pendingSignedMessage{
Type: "signed_nonce", Signature: base64.StdEncoding.EncodeToString(sig),
})
wctx, wcancel := context.WithTimeout(ctx, 10*time.Second)
if err := c.Write(wctx, websocket.MessageText, reply); err != nil {
wcancel()
return fmt.Errorf("announce: write signed nonce: %w", err)
}
wcancel()
// Block until enrolled (or reject / disconnect).
rctx2, rcancel2 := context.WithTimeout(ctx, 1*time.Hour)
defer rcancel2()
_, raw2, err := c.Read(rctx2)
if err != nil {
// CloseError with our reject code 4001 = admin rejected.
var ce websocket.CloseError
if errors.As(err, &ce) && ce.Code == 4001 {
return errors.New("announce: rejected by admin")
}
return fmt.Errorf("announce: wait for enrolled: %w", err)
}
var em pendingEnrolledMessage
if err := json.Unmarshal(raw2, &em); err != nil {
return fmt.Errorf("announce: parse enrolled: %w", err)
}
if em.Type != "enrolled" || em.Bearer == "" {
return fmt.Errorf("announce: bad enrolled payload: %s", raw2)
}
// Persist the bearer + host_id.
cfg.ServerURL = serverURL
cfg.HostID = em.HostID
cfg.AgentToken = em.Bearer
if err := cfg.EnsureSecretsKey(); err != nil {
return fmt.Errorf("announce: mint secrets key: %w", err)
}
// Note: repo creds aren't pushed in the enrolled message — the
// server pushes them via `config.update` on first WS hello. The
// secrets store will start empty and fill in then.
if err := cfg.Save(); err != nil {
return fmt.Errorf("announce: save config: %w", err)
}
// Touch the secrets store so it exists with the right perms.
keyBytes, _ := cfg.SecretsKeyBytes()
if _, err := secrets.New(cfg.ResolvedSecretsPath(), keyBytes); err != nil {
return fmt.Errorf("announce: open secrets store: %w", err)
}
fmt.Fprintln(os.Stderr, "Accepted. Bearer persisted; reconnecting via the standard WS.")
return nil
}
// loadOrMintAnnounceKey returns the (priv, pub) keypair, generating
// + persisting one when AnnounceKey is empty. The private key holds
// the public half in its tail 32 bytes per ed25519 convention.
func loadOrMintAnnounceKey(cfg *config.Config) (ed25519.PrivateKey, ed25519.PublicKey, error) {
if cfg.AnnounceKey != "" {
raw, err := base64.StdEncoding.DecodeString(cfg.AnnounceKey)
if err != nil {
return nil, nil, fmt.Errorf("decode AnnounceKey: %w", err)
}
if len(raw) != ed25519.PrivateKeySize {
return nil, nil, fmt.Errorf("AnnounceKey must be %d bytes, got %d",
ed25519.PrivateKeySize, len(raw))
}
priv := ed25519.PrivateKey(raw)
pub := priv.Public().(ed25519.PublicKey)
return priv, pub, nil
}
pub, priv, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return nil, nil, fmt.Errorf("generate keypair: %w", err)
}
cfg.AnnounceKey = base64.StdEncoding.EncodeToString(priv)
if err := cfg.Save(); err != nil {
return nil, nil, fmt.Errorf("persist AnnounceKey: %w", err)
}
return priv, pub, nil
}
// wsURLFromHTTP swaps the http(s) scheme for ws(s).
func wsURLFromHTTP(httpURL string) string {
switch {
case strings.HasPrefix(httpURL, "https://"):
return "wss://" + strings.TrimPrefix(httpURL, "https://")
case strings.HasPrefix(httpURL, "http://"):
return "ws://" + strings.TrimPrefix(httpURL, "http://")
default:
return httpURL
}
}
// readAllShort reads up to 64KB of the response body. The announce
// response is small; we cap to avoid pathological server replies.
func readAllShort(res *stdhttp.Response) []byte {
buf := make([]byte, 64*1024)
n, _ := res.Body.Read(buf)
return buf[:n]
}
+85 -10
View File
@@ -9,6 +9,7 @@ import (
"os"
"os/signal"
"strconv"
"sync"
"syscall"
"time"
@@ -16,6 +17,7 @@ import (
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/runner"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/scheduler"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/secrets"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/service"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/sysinfo"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/wsclient"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
@@ -32,6 +34,27 @@ func main() {
}
func run() error {
// Optional first positional verb for SCM control on Windows.
// `restic-manager-agent install|uninstall|start|stop` route into
// the service package; everything else falls through to the
// flag-driven default (which is what systemd / interactive runs
// hit). On non-Windows builds these verbs return a clear error.
if len(os.Args) > 1 {
switch os.Args[1] {
case "install":
return service.Install()
case "uninstall":
return service.Uninstall()
case "start":
return service.Start()
case "stop":
return service.Stop()
case "run":
// Strip the verb so flag.Parse sees the rest unchanged.
os.Args = append([]string{os.Args[0]}, os.Args[2:]...)
}
}
configPath := flag.String("config", config.DefaultPath(), "path to agent.yaml")
enrollServer := flag.String("enroll-server", "", "server URL (used with -enroll-token to perform first-run enrollment)")
enrollToken := flag.String("enroll-token", "", "one-time enrollment token (operator copies this from the UI)")
@@ -58,8 +81,17 @@ func run() error {
return doEnroll(*enrollServer, *enrollToken, cfg, version)
}
// Announce-and-approve: -enroll-server set, no token, agent not
// yet enrolled. Run the announce flow inline; on success the cfg
// has the bearer + host_id and we drop into the normal run loop.
if !cfg.Enrolled() && *enrollServer != "" {
if err := doAnnounce(*enrollServer, cfg, version); err != nil {
return fmt.Errorf("announce: %w", err)
}
}
if !cfg.Enrolled() {
return fmt.Errorf("agent is not enrolled; run with -enroll-server and -enroll-token first (config %q)", *configPath)
return fmt.Errorf("agent is not enrolled; run with -enroll-server (and either -enroll-token or wait for admin to accept the announce) first (config %q)", *configPath)
}
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
@@ -170,6 +202,14 @@ type dispatcher struct {
resticBin string
secrets *secrets.Store
scheduler *scheduler.Scheduler
// Bandwidth caps in KB/s pushed via config.update. Mutated under
// bwMu by the config.update handler; read by runJob when building
// the runner. <=0 means "no cap" (do not pass --limit-* to restic).
// Per-job overrides on CommandRunPayload take precedence.
bwMu sync.Mutex
bwUpKBps int
bwDownKBps int
}
func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.Sender) error {
@@ -263,6 +303,24 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
slog.Warn("ws agent: unknown config.update slot, ignoring", "slot", p.Slot)
}
// Bandwidth caps ride independently of the slot — they're host-
// wide and apply to every restic invocation regardless of which
// credentials slot the job uses. nil pointer = no change in this
// push; non-nil = set to that value (≤0 clears the cap).
if p.BandwidthUpKBps != nil || p.BandwidthDownKBps != nil {
d.bwMu.Lock()
if p.BandwidthUpKBps != nil {
d.bwUpKBps = *p.BandwidthUpKBps
}
if p.BandwidthDownKBps != nil {
d.bwDownKBps = *p.BandwidthDownKBps
}
up, down := d.bwUpKBps, d.bwDownKBps
d.bwMu.Unlock()
slog.Info("ws agent: bandwidth caps updated",
"up_kbps", up, "down_kbps", down)
}
case api.MsgAgentUpdateAvail:
var p api.AgentUpdateAvailablePayload
_ = env.UnmarshalPayload(&p)
@@ -295,11 +353,25 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
// not on r). If you find yourself adding a new JobKind that
// needs delete authority, mirror the JobPrune pattern below
// — don't try to overload r.
// Resolve bandwidth caps: per-job override (if set) wins over the
// host-wide caps last pushed via config.update. <=0 means no cap.
d.bwMu.Lock()
upKBps, downKBps := d.bwUpKBps, d.bwDownKBps
d.bwMu.Unlock()
if p.BandwidthUpKBps != nil {
upKBps = *p.BandwidthUpKBps
}
if p.BandwidthDownKBps != nil {
downKBps = *p.BandwidthDownKBps
}
r := runner.New(runner.Config{
ResticBin: d.resticBin,
RepoURL: creds.URL,
RepoUsername: creds.Username,
RepoPassword: creds.Password,
ResticBin: d.resticBin,
RepoURL: creds.URL,
RepoUsername: creds.Username,
RepoPassword: creds.Password,
LimitUploadKBps: upKBps,
LimitDownloadKBps: downKBps,
}, tx, time.Second)
switch p.Kind {
@@ -318,8 +390,9 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
}
slog.Info("agent: accepting backup job",
"job_id", p.JobID, "paths", paths, "excludes", p.Excludes, "tag", p.Tag)
hooks := runner.BackupHooks{Pre: p.PreHook, Post: p.PostHook}
go func() {
if err := r.RunBackup(ctx, p.JobID, paths, p.Excludes, tags); err != nil {
if err := r.RunBackup(ctx, p.JobID, paths, p.Excludes, tags, hooks); err != nil {
slog.Warn("agent: backup job failed", "job_id", p.JobID, "err", err)
return
}
@@ -381,10 +454,12 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
runCreds = ac
}
prr := runner.New(runner.Config{
ResticBin: d.resticBin,
RepoURL: runCreds.URL,
RepoUsername: runCreds.Username,
RepoPassword: runCreds.Password,
ResticBin: d.resticBin,
RepoURL: runCreds.URL,
RepoUsername: runCreds.Username,
RepoPassword: runCreds.Password,
LimitUploadKBps: upKBps,
LimitDownloadKBps: downKBps,
}, tx, time.Second)
slog.Info("agent: accepting prune job", "job_id", p.JobID, "admin_creds", p.RequiresAdminCreds)
go func() {
+8
View File
@@ -156,6 +156,10 @@ func run() error {
// shouldn't, but the queue exists either way).
pendingDrainTick := time.NewTicker(30 * time.Second)
defer pendingDrainTick.Stop()
// Pending-hosts expiry sweeper: drops announce rows past their 1h
// ceiling so the dashboard panel doesn't accumulate stale entries.
pendingExpiryTick := time.NewTicker(60 * time.Second)
defer pendingExpiryTick.Stop()
mt := maintenance.New(st)
go func() {
for {
@@ -176,6 +180,10 @@ func run() error {
}
case <-pendingDrainTick.C:
srv.DrainAllDue(ctx)
case <-pendingExpiryTick.C:
if n, err := st.DeleteExpiredPendingHosts(ctx, time.Now().UTC()); err == nil && n > 0 {
slog.Info("expired pending hosts swept", "n", n)
}
case <-maintenanceTick.C:
decisions, err := mt.Decide(ctx, time.Now().UTC())
if err != nil {
+133
View File
@@ -0,0 +1,133 @@
# install.ps1 — Windows installer for the restic-manager agent (P2-17).
#
# Usage (Run as administrator):
# $env:RM_SERVER = "https://restic.lab.example"
# $env:RM_TOKEN = "<one-time-token>" # omit for announce-and-approve
# iwr "$env:RM_SERVER/install/install.ps1" -UseBasicParsing | iex
#
# What it does:
# 1. checks for admin elevation
# 2. downloads the matching agent binary from the server
# 3. lays down C:\Program Files\restic-manager\ and
# C:\ProgramData\restic-manager\ (config + state)
# 4. registers the agent as a Windows service via the agent's own
# `install` subcommand (which uses the SCM API)
# 5. enrolls (token flow if RM_TOKEN set, otherwise announce flow)
# by spawning the agent with the right CLI flags and waits
# until config is written
# 6. surfaces (but does NOT disable) any existing scheduled tasks
# whose name contains "restic" so the operator can decide
#
# Idempotent — safe to re-run.
[CmdletBinding()]
param(
[string]$Server = $env:RM_SERVER,
[string]$Token = $env:RM_TOKEN,
[string]$InstallDir = 'C:\Program Files\restic-manager',
[string]$DataDir = 'C:\ProgramData\restic-manager'
)
$ErrorActionPreference = 'Stop'
function Test-Admin {
$id = [System.Security.Principal.WindowsIdentity]::GetCurrent()
$pri = New-Object System.Security.Principal.WindowsPrincipal($id)
return $pri.IsInRole([System.Security.Principal.WindowsBuiltInRole]::Administrator)
}
function Detect-Arch {
switch ($env:PROCESSOR_ARCHITECTURE) {
'AMD64' { return 'amd64' }
'ARM64' { return 'arm64' }
default { throw "unsupported PROCESSOR_ARCHITECTURE: $($env:PROCESSOR_ARCHITECTURE)" }
}
}
function Detect-ResticTasks {
Write-Host ''
Write-Host '— Existing restic-named scheduled tasks (review manually) —'
try {
$tasks = Get-ScheduledTask -ErrorAction SilentlyContinue |
Where-Object { $_.TaskName -match 'restic' -or $_.TaskPath -match 'restic' }
if ($tasks) {
foreach ($t in $tasks) {
Write-Host " * $($t.TaskPath)$($t.TaskName) state=$($t.State)"
Write-Host " Disable with: Disable-ScheduledTask -TaskName '$($t.TaskName)' -TaskPath '$($t.TaskPath)'"
}
} else {
Write-Host ' (none found)'
}
} catch {
Write-Host ' (Get-ScheduledTask failed; review the Task Scheduler UI manually)'
}
Write-Host ''
}
# --- preflight -------------------------------------------------------
if (-not (Test-Admin)) {
throw 'install.ps1: must be run from an elevated PowerShell (Run as administrator).'
}
if (-not $Server) {
throw 'install.ps1: -Server (or $env:RM_SERVER) is required, e.g. https://restic.lab.example'
}
$arch = Detect-Arch
Write-Host "install.ps1: server=$Server arch=$arch"
# --- directories -----------------------------------------------------
New-Item -ItemType Directory -Force -Path $InstallDir | Out-Null
New-Item -ItemType Directory -Force -Path $DataDir | Out-Null
# --- download agent --------------------------------------------------
$agentExe = Join-Path $InstallDir 'restic-manager-agent.exe'
$tmpExe = "$agentExe.tmp"
$dlURL = "$Server/agent/binary?os=windows&arch=$arch"
Write-Host "install.ps1: downloading $dlURL"
Invoke-WebRequest -UseBasicParsing -Uri $dlURL -OutFile $tmpExe
# Atomic-ish replace: stop service if running so the .exe isn't busy.
try { Stop-Service -Name 'restic-manager-agent' -ErrorAction SilentlyContinue } catch {}
Move-Item -Force -Path $tmpExe -Destination $agentExe
# --- enroll / announce -----------------------------------------------
$cfgPath = Join-Path $DataDir 'agent.yaml'
$args = @('-config', $cfgPath, '-enroll-server', $Server)
if ($Token) {
$args += @('-enroll-token', $Token)
Write-Host 'install.ps1: enrolling with one-time token'
} else {
Write-Host 'install.ps1: no RM_TOKEN — running announce-and-approve flow.'
Write-Host ' The fingerprint will print below. Compare it with the dashboard before clicking Accept.'
}
& $agentExe @args
if ($LASTEXITCODE -ne 0) {
throw "install.ps1: agent enrolment failed (exit $LASTEXITCODE)"
}
# --- install + start service ----------------------------------------
# The 'install' subcommand registers the service via the SCM. If
# already registered, it errors loudly — re-run with -Force only if
# you've manually verified.
try {
& $agentExe install
} catch {
Write-Host "install.ps1: service may already be registered ($_); continuing."
}
try {
Start-Service -Name 'restic-manager-agent'
} catch {
Write-Host "install.ps1: Start-Service failed ($_); check Event Viewer."
}
Detect-ResticTasks
Write-Host ''
Write-Host 'install.ps1: done.'
Write-Host " config : $cfgPath"
Write-Host " binary : $agentExe"
Write-Host " service: restic-manager-agent (Get-Service to inspect)"
@@ -0,0 +1,259 @@
# P2 Completion Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
**Goal:** Close every remaining P2 task in `tasks.md`: P2R-09 (auto-init UX), P2R-10/11/12 (hooks), P2R-13 (bandwidth wiring + per-job override), P2R-14 (schedule next/last run), P2-16 (Windows svc), P2-17 (`install.ps1`), P2-18 (announce-and-approve).
**Architecture:** Server stays HTTP+WS; agent stays a single binary that auto-restages via `make build`. Hooks live on `source_groups` (and host-level defaults). Announce-and-approve adds a separate WS path (`/ws/agent/pending`) and a Pending hosts panel; token-flow stays default. Windows service support uses `golang.org/x/sys/windows/svc` behind a `//go:build windows` tag — Linux builds untouched. **Operator is away — make best guesses on small UX choices, but commit each item separately so the choices are reviewable.**
**Tech Stack:** Go 1.23+, chi router, modernc/sqlite, `coder/websocket`, `robfig/cron/v3`, HTMX + Tailwind, `golang.org/x/sys/windows/svc`, Ed25519 (stdlib).
---
## Pre-flight
- [ ] **Run baseline:** `go vet ./... && go build ./... && go test ./...` — must be green before starting. Restage agent + restart server (per CLAUDE.md restage block) so smoke env is warm.
## Order of execution
Smallest blast-radius first. UI polish → bandwidth → next/last → hooks → announce → Windows. Commit and restage at each task boundary. Run `go vet ./... && go test ./...` before every commit.
---
## Task 1 — P2R-13a: Wire bandwidth caps into restic invocations
**Files:**
- Modify: `internal/restic/runner.go` (add `LimitUploadKBps`, `LimitDownloadKBps` to `Env` or to a per-call options struct already present; emit `--limit-upload N`/`--limit-download N` on `restic backup|forget|prune|check|restore`)
- Modify: `internal/agent/runner/*.go` — pass host-wide caps into the runner. Caps come from `agent.config.Config` or are pushed via `config.update`. Decision: ship caps in the existing `config.update` envelope as new fields `bandwidth_up_kbps`, `bandwidth_down_kbps`. Server pushes on hello + on `PUT /api/hosts/{id}/bandwidth`.
- Modify: `internal/api/messages.go` — extend `ConfigUpdatePayload` with the two int pointers.
- Modify: `internal/server/ws/handler.go` (or wherever hello/config push lives) — include caps in the pushed config.
- Modify: `internal/server/http/host_bandwidth.go` — after `SetHostBandwidth`, fan out a `config.update` to the connected agent (mirror the credentials-edit path).
- Test: `internal/restic/runner_test.go` — assert flag injection.
- Test: `internal/server/ws/*_test.go` — assert config.update carries caps on hello and on edit.
- [ ] **Step 1.1** Add `LimitUploadKBps *int`, `LimitDownloadKBps *int` to whatever per-host config the runner already consults. Existing pattern is `restic.Env{}`; extend it.
- [ ] **Step 1.2** Failing test in `internal/restic/runner_test.go`: build a backup command with `LimitUploadKBps=1024`, assert the resulting argv contains `--limit-upload 1024`.
- [ ] **Step 1.3** Implement: prepend the flags in argv builders for `backup`, `forget`, `prune`, `check`, `restore`. Skip when nil/<=0.
- [ ] **Step 1.4** Wire `config.update` payload — server reads `Host.BandwidthUpKBps`/`DownKBps`, includes them in the existing `ConfigUpdatePayload` push on hello and on bandwidth edit (mirror cred-edit fan-out in `internal/server/http/host_credentials.go`).
- [ ] **Step 1.5** Agent applies caps: store in the in-memory dispatcher state on `config.update`, attach to every restic call.
- [ ] **Step 1.6** `go vet ./... && go test ./... && make build && <restage block>`. Commit:
```
agent+server: apply host bandwidth caps to restic invocations
```
## Task 2 — P2R-13b: Per-job override on Run-now confirm dialog
**Decision:** A small numeric input on the per-source-group Run-now button (and dashboard Run-all). Operator is away — keep it minimal: two optional inputs (up/down KB/s) on the dispatch endpoint; UI shows a `<details>` "Limit bandwidth for this run" disclosure with two number inputs.
**Files:**
- Modify: `internal/server/http/sources.go` (or wherever the per-group Run-now POST lives) — accept optional `bandwidth_up_kbps`/`bandwidth_down_kbps` form fields, pass through.
- Modify: dispatch path (`internal/server/dispatch_*.go` or `ws/handler.go` job-dispatch core) — accept overrides, include in the `command.run` payload.
- Modify: `internal/api/messages.go``CommandRunPayload` gains optional caps that take precedence over host-wide caps when present.
- Modify: agent dispatcher — use payload override if present else falls back to config caps.
- Modify: `web/templates/pages/host_sources.html` (and the schedules Run-now form) — `<details>` block.
- Test: HTTP test for the new form fields; agent runner test for override precedence.
- [ ] **Step 2.1** Failing test: POST to per-group Run-now with `bandwidth_up_kbps=512` → assert dispatched payload carries 512.
- [ ] **Step 2.2** Implement endpoint changes + payload extension.
- [ ] **Step 2.3** Agent override precedence test (payload wins over config).
- [ ] **Step 2.4** UI `<details>` blocks (one per Run-now form).
- [ ] **Step 2.5** Playwright spot-check via `:8080` smoke env: open Sources tab, expand the Run-now disclosure, fire with limit=128, then open the live job log and confirm the agent's restic argv (read `/tmp/rm-smoke/server.log` for the dispatched command — it logs argv) shows `--limit-upload 128`.
- [ ] **Step 2.6** Commit.
## Task 3 — P2R-14: Schedule "next run" / "last run"
**Files:**
- Modify: `internal/store/schedules.go` — add `NextRunAt(time.Time)` derivation helper and `LatestScheduledJobAt(host_id, schedule_id) (time.Time, error)` (or a single batched fetch for all schedules of a host).
- Modify: dashboard host row (`web/templates/partials/host_row.html`) — show "Next: …" and "Last: …" when there's a single covering schedule (already detected in slice 5).
- Modify: `web/templates/pages/host_schedules.html` — add Next/Last columns to the schedules table.
- Modify: relevant page handlers (`internal/server/http/ui_schedules.go`, dashboard handler) — populate the data.
- Test: `schedules_test.go` for next-run derivation (parse cron, compute next from a fixed `now`).
- [ ] **Step 3.1** Add `NextRun(cronExpr string, from time.Time) (time.Time, error)` helper using `robfig/cron/v3`'s `Parse(...).Next(from)`. Test with three crons.
- [ ] **Step 3.2** Add `LatestJobByActorKindForSchedule(host_id, schedule_id) (time.Time, status, error)` query against `jobs` (filter `actor_kind='schedule'` AND `schedule_id=?`, ORDER BY `started_at` DESC LIMIT 1).
- [ ] **Step 3.3** Wire schedules-page handler to populate Next/Last per row; render relative time + ISO tooltip (mirror existing `formatRelTime` template helper if it exists; otherwise use a simple "5m ago" helper).
- [ ] **Step 3.4** Wire dashboard row: when single covering schedule, surface "Next: 03:00" / "Last: 8h ago — succeeded".
- [ ] **Step 3.5** Playwright spot-check: a host with a schedule shows Next/Last; pause it → Next becomes "—" / "(paused)".
- [ ] **Step 3.6** Commit.
## Task 4 — P2R-09: Auto-init UX polish
**Files:**
- Modify: `web/templates/pages/host_repo.html` — danger-zone re-init button + two-step confirm (type the host name).
- Modify: `internal/server/http/ui_repo.go` (or new `repo_reinit.go`) — `POST /hosts/{id}/repo/reinit` admin-only, audit-logged. Server runs `restic init --force` (or wipes-then-inits — pick the safer of the two; restic doesn't truly wipe a repo, the operator must clear the bucket. **Best guess:** dispatch a normal `init` job with a flag that re-runs even if the repo claims to exist; if restic refuses, surface "the repo on the remote already has data — clear it manually before re-init" via the job log).
- Modify: host detail page header / vitals strip — surface init result line. Use the existing latest-`init`-job query to render "repo ready · initialised <relative time> ago" or "init failed · job N · retry".
- Test: HTTP test for re-init endpoint (auth, audit, host-name confirm); template test that the result line renders for both states.
- [ ] **Step 4.1** Add helper: `LatestJobByKind(host_id, "init")` — already exists from P2R-06 (`store.LatestJobByKind`). Reuse.
- [ ] **Step 4.2** Render init line into vitals strip; show "init failed" amber when latest init failed.
- [ ] **Step 4.3** Implement `POST /hosts/{id}/repo/reinit` handler — admin role check, requires a `confirm_hostname` form field that must equal `host.Name`, returns 400 otherwise. Dispatches a fresh `init` job.
- [ ] **Step 4.4** Add danger-zone re-init form to `host_repo.html` (currently disabled per slice 4). Two-step confirm with the typed hostname.
- [ ] **Step 4.5** Playwright: visit `/hosts/{id}/repo`, click re-init, type wrong hostname → blocked; type right hostname → dispatches init job → returns to live log.
- [ ] **Step 4.6** Commit.
## Task 5 — P2R-10: Hook schema (migration 0010)
**Files:**
- Create: `internal/store/migrations/0010_hooks.sql`
- `ALTER TABLE source_groups ADD COLUMN pre_hook BLOB;` (AEAD ciphertext, NULLable)
- `ALTER TABLE source_groups ADD COLUMN post_hook BLOB;`
- `ALTER TABLE hosts ADD COLUMN pre_hook_default BLOB;`
- `ALTER TABLE hosts ADD COLUMN post_hook_default BLOB;`
- All four are AEAD ciphertext (existing `crypto.AEAD`); BLOB column type.
- Modify: `internal/store/types.go` — add `PreHook *string` (decrypted), `PostHook *string` to `SourceGroup`; same to `Host`.
- Modify: `internal/store/sources.go` + `internal/store/hosts.go` — getters/setters encrypt on write, decrypt on read. Pass `crypto.AEAD` through (pattern mirrors `host_credentials.go`).
- Test: encrypt/decrypt round-trip; setting `nil` clears the column.
- [ ] **Step 5.1** Write migration SQL. Column-level ALTERs only (per CLAUDE.md).
- [ ] **Step 5.2** Update store types + getters/setters with AEAD encrypt/decrypt. Mirror `internal/store/host_credentials.go` patterns exactly.
- [ ] **Step 5.3** Round-trip test: set hook on a source group; reload; assert plaintext returned. Set nil; assert nil after reload.
- [ ] **Step 5.4** `go vet && go test`. Commit.
## Task 6 — P2R-11: Agent execution of hooks
**Files:**
- Modify: `internal/api/messages.go``ConfigUpdatePayload` (or the per-source-group bundle inside `ScheduleSetPayload`) carries `PreHook`, `PostHook` plaintext (server has decrypted by then; wire is authenticated WS, same trust boundary as repo creds).
- Modify: agent dispatcher — for `kind=backup` only:
- Run `pre_hook` (if present) via `os/exec` with the host shell (`/bin/sh -c` on Linux, `cmd.exe /C` on Windows). Capture stdout+stderr → JobLog with `hook:` prefix. Non-zero exit aborts the backup, marks the job failed with `pre_hook` error.
- Run `post_hook` (if present) **always** after the backup, with `RM_JOB_STATUS=succeeded|failed` env var. Capture into JobLog, prefix `hook:`. Non-zero exit on post_hook does NOT change job status (warning logged).
- Skip both for `kind` ∈ {forget, prune, check, unlock, init} per spec.md §14.3.
- Test: dispatcher test with a `pre_hook` that exits 1 → backup not started; `post_hook` always runs and sees `RM_JOB_STATUS`.
- [ ] **Step 6.1** Plumb hooks through `ScheduleSetPayload` source-group bundle + per-group Run-now `command.run` payload (override host-default with group hook if both present). Server-side resolution: host default if group hook is empty.
- [ ] **Step 6.2** Agent dispatcher: factor hook execution into `internal/agent/runner/hooks.go`. Use `exec.CommandContext`, set env, plumb output to existing JobLog stream with `Source: "hook"` (or prefix the log lines `hook: …`).
- [ ] **Step 6.3** Failing test in `internal/agent/runner/runner_test.go` (create file if absent): `pre_hook=/bin/false` → job fails with `pre_hook failed (exit 1)` and the actual restic backup never runs (assert via mock-restic shim).
- [ ] **Step 6.4** Test: `post_hook` runs even when backup fails; receives `RM_JOB_STATUS=failed`.
- [ ] **Step 6.5** Test: hooks skipped on `forget`/`prune`/`check`/`unlock` jobs.
- [ ] **Step 6.6** `go vet && go test && make build && <restage block>`. Commit.
## Task 7 — P2R-12: Hook editor UI
**Files:**
- Modify: `web/templates/pages/source_group_edit.html` (new or extend existing source-group form) — `<textarea>` for pre_hook, `<textarea>` for post_hook, with the warning banner: "this hook runs as the agent service user (root on Linux; LocalSystem on Windows)".
- Modify: source-group HTTP handler (`internal/server/http/sources.go`) — accept hook fields on POST/PUT, encrypt-and-persist via store.
- Create: a new "Settings" tab section on host detail (currently inert per P1-25) — wait, just add a new sub-tab or extend Repo page. **Decision:** add `pre_hook_default` / `post_hook_default` to the Repo page under a new "Hooks" section since Settings is still inert.
- Modify: source-group form admin-only check; post-only edit allowed by operators? **Decision:** admin-only edit per spec; render but disable for operators.
- Modify: audit-log writer — emit `source_group.hook_updated` and `host.default_hook_updated` events (without the hook body).
- Test: HTTP test for create + update; admin-only enforcement; audit row written without secret.
- [ ] **Step 7.1** Source-group form extension + handler wiring.
- [ ] **Step 7.2** Repo page Hooks section (host defaults).
- [ ] **Step 7.3** Audit entries.
- [ ] **Step 7.4** Playwright: as admin, set a `pre_hook` of `echo hello`, fire Run-now, open live log, confirm `hook: hello` line appears.
- [ ] **Step 7.5** Commit.
## Task 8 — P2-18a: Announce schema + endpoint
**Files:**
- Create: `internal/store/migrations/0011_pending_hosts.sql`
```sql
CREATE TABLE pending_hosts (
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
os TEXT NOT NULL,
arch TEXT NOT NULL,
agent_version TEXT NOT NULL,
restic_version TEXT NOT NULL,
public_key BLOB NOT NULL, -- 32-byte Ed25519
fingerprint TEXT NOT NULL, -- "SHA256:hex"
announced_from_ip TEXT NOT NULL,
first_seen_at TEXT NOT NULL,
last_seen_at TEXT NOT NULL,
expires_at TEXT NOT NULL
);
CREATE INDEX pending_hosts_expires ON pending_hosts(expires_at);
CREATE INDEX pending_hosts_fingerprint ON pending_hosts(fingerprint);
```
- Create: `internal/store/pending_hosts.go` — `CreatePendingHost`, `GetPendingHostByFingerprint`, `ListPendingHosts`, `DeletePendingHost`, `TouchPendingHost`, `DeleteExpiredPendingHosts`.
- Create: `internal/server/http/announce.go` — `POST /api/agents/announce` accepts `{hostname, os, arch, agent_version, restic_version, public_key (base64)}`. Validates protocol_version implicitly via `agent_version` check. Token-bucket rate limit per source IP (10/min). Global cap 100 pending rows. Returns `{fingerprint, pending_id, hostname_collision: bool}`.
- Test: `announce_test.go` — happy path; rate limit; cap; collision flag.
- [ ] **Step 8.1** Migration + store layer + tests.
- [ ] **Step 8.2** Endpoint + tests (use a fake clock + in-process token bucket).
- [ ] **Step 8.3** Commit.
## Task 9 — P2-18b: Pending WS + accept/reject
**Files:**
- Create: `internal/server/ws/pending.go` — `GET /ws/agent/pending` upgrade. Server issues a 32-byte nonce; agent signs it with its Ed25519 private key; server verifies against the `public_key` stored on the pending row keyed by the supplied `pending_id`. If valid, hold the connection open; on accept, push a single `enrolled` message containing `{bearer_token, repo_credentials_aead_blob}` and close cleanly. On reject, close with code 4001 + reason "rejected".
- Create: `internal/server/http/pending.go` — admin-only `POST /api/pending-hosts/{id}/accept` (atomically: mint bearer, decrypt admin-supplied repo creds (passed in form), promote pending row → real `hosts` row, push `enrolled` to the open WS, audit-log) and `POST /api/pending-hosts/{id}/reject` (delete row + close socket).
- Modify: server `main.go` route registration.
- Test: integration test — fake agent opens pending WS, admin POST /accept, agent receives bearer.
- [ ] **Step 9.1** Pending WS handler with nonce-sign verify.
- [ ] **Step 9.2** Accept/reject endpoints. Accept reuses the existing token-consume path internally (mints persistent bearer from `crypto.RandomToken`-style helper, inserts host row + `host_credentials`).
- [ ] **Step 9.3** Tests.
- [ ] **Step 9.4** Commit.
## Task 10 — P2-18c: Agent announce path
**Files:**
- Modify: `cmd/agent/main.go` — when `RM_TOKEN` is unset, switch to announce mode instead of erroring out. `RM_SERVER` still required.
- Create: `internal/agent/announce/announce.go` — generate-or-load Ed25519 keypair (persisted as a file alongside `secrets.enc`, mode 0600). POST `/api/agents/announce`. Open `/ws/agent/pending`. Wait. On `enrolled` message, persist bearer to `agent.yaml`, persist repo creds via existing secrets store, exit announce mode and reconnect via the normal WS path.
- Modify: `deploy/install/install.sh` — when `RM_TOKEN` is missing, run agent in announce mode and `journalctl --follow` until the agent prints the fingerprint, print it to the operator's terminal in big copy-friendly format, then keep following until enrolled.
- Test: end-to-end test in `internal/server/...` using a fake agent.
- [ ] **Step 10.1** Keypair generation + persistence.
- [ ] **Step 10.2** Announce client + pending WS client; print `SHA256:…` fingerprint to stdout in a banner.
- [ ] **Step 10.3** Install script branch.
- [ ] **Step 10.4** Playwright: register a host via announce mode (run agent locally with no RM_TOKEN), log into UI, see Pending hosts panel with the fingerprint, click Accept, confirm host appears.
- [ ] **Step 10.5** Commit.
## Task 11 — P2-18d: Pending hosts UI panel
**Files:**
- Modify: `web/templates/pages/dashboard.html` — add Pending hosts panel above the host list when any pending rows exist.
- Modify: dashboard handler — `Store.ListPendingHosts(now)` (auto-skips expired).
- Add buttons → POST `/api/pending-hosts/{id}/accept` and `/reject` via HTMX.
- Background sweeper for `DeleteExpiredPendingHosts` every 60s (mirror the existing offline-sweeper goroutine pattern).
- [ ] **Step 11.1** Sweeper goroutine.
- [ ] **Step 11.2** Dashboard handler + template.
- [ ] **Step 11.3** Accept form must include the same repo URL/user/pw fields as the token-mint form (admin still supplies repo creds at accept time).
- [ ] **Step 11.4** Playwright sweep.
- [ ] **Step 11.5** Commit.
## Task 12 — P2-16: Windows service integration
**Decision:** Cannot test on Windows from WSL. Goal is a clean compile under `GOOS=windows GOARCH=amd64` and code that follows the canonical `golang.org/x/sys/windows/svc/example` pattern. Untestable beyond compile + manual review; mark in commit message.
**Files:**
- Create: `internal/agent/service/service_windows.go` (build tag `//go:build windows`) — implements `svc.Handler`. `Execute` starts the agent's main loop in a goroutine, listens for `svc.Stop`/`svc.Shutdown`, cancels ctx, waits.
- Create: `internal/agent/service/service_other.go` (build tag `//go:build !windows`) — stub `RunService` that just runs the agent loop in the foreground.
- Create: `internal/agent/service/install_windows.go` — `Install`, `Uninstall`, `Start`, `Stop` thin wrappers around `mgr` package.
- Modify: `cmd/agent/main.go` — sub-commands: `install`, `uninstall`, `start`, `stop`, `run` (default). `run` delegates to `service.Run()` which on Windows checks `svc.IsWindowsService()` and dispatches accordingly.
- Test: `internal/agent/service/service_windows_test.go` (build-tagged) for argv parsing only — actual SCM interaction can't be tested in CI.
- [ ] **Step 12.1** Implement the svc.Handler shell.
- [ ] **Step 12.2** Install/uninstall wrappers (use `mgr.ConnectLocal()`, `m.CreateService(name, exepath, mgr.Config{...}, "run")`).
- [ ] **Step 12.3** Cross-compile check: `GOOS=windows GOARCH=amd64 go build ./cmd/agent` must succeed.
- [ ] **Step 12.4** Commit with note "untested on Windows; compile-verified only".
## Task 13 — P2-17: install.ps1
**Files:**
- Create: `deploy/install/install.ps1` — PowerShell 5.1+ compatible. Checks admin elevation. Downloads agent binary from `$RM_SERVER/agent/binary?os=windows&arch=amd64`. Drops it at `C:\Program Files\restic-manager\restic-manager-agent.exe`. Runs `restic-manager-agent.exe install` (registers service). Starts it. Detects existing tasks named `*restic*` via `Get-ScheduledTask` and prints them — does not auto-disable. Writes `C:\ProgramData\restic-manager\agent.yaml` with `RM_SERVER` + `RM_TOKEN` (or no token if announce-mode).
- Modify: `internal/server/http/install.go` (or wherever install scripts are served) to also serve `/install/install.ps1`.
- Modify: CLAUDE.md restage block to also stage `install.ps1`.
- [ ] **Step 13.1** Write the script.
- [ ] **Step 13.2** Wire serving + restage.
- [ ] **Step 13.3** Smoke parse: `pwsh -NoProfile -Command "Get-Command -Syntax (Get-ChildItem deploy/install/install.ps1)"` if pwsh is on PATH, else `Set-StrictMode` parse via `pwsh -c "$null = [scriptblock]::Create((Get-Content deploy/install/install.ps1 -Raw))"`. Skip if no pwsh available — note in commit.
- [ ] **Step 13.4** Commit.
## Task 14 — Final integration sweep
- [ ] **Step 14.1** `go vet ./... && go test ./... -race`. Full build. Restage. Restart server.
- [ ] **Step 14.2** Playwright walkthrough on `:8080`: login → dashboard shows pending-hosts empty state → create source group → set a `pre_hook` → Run-now with bandwidth override → confirm hook fires + bandwidth applied → schedules tab shows next/last → repo page shows init-OK line → re-init flow gated by typed hostname.
- [ ] **Step 14.3** Update `tasks.md`: tick P2R-09, P2R-10, P2R-11, P2R-12, P2R-13, P2R-14, P2-16, P2-17, P2-18 done. Update Phase 2 acceptance line items as satisfied.
- [ ] **Step 14.4** Open PR `p2-completion → main` with a summary of every item closed.
---
## Decisions made on the operator's behalf (away)
1. **Bandwidth UI for per-job override:** small `<details>` disclosure under each Run-now button. Simpler than a modal; matches the rest of the app's progressive-disclosure style.
2. **Re-init UX:** server dispatches a fresh `init` job; if restic refuses because the repo already exists, surfaces the error in the job log and instructs the operator to clear the remote bucket. We don't try to forcibly wipe — too dangerous, and the agent doesn't have credentials to wipe S3/B2/etc generically.
3. **Hooks editor lives on the Repo page (host defaults) + on the source-group edit form (per-group override).** Skips inventing a new "Settings" tab since that surface is still inert.
4. **Announce flow:** admin still supplies repo creds at accept time (same form as the token-mint flow). The pending row only carries identity-of-the-endpoint material, never repo creds.
5. **Windows service:** compile-verified only; untested. Commit message will say so.
+7
View File
@@ -62,6 +62,13 @@ type Config struct {
LegacyRepoURL string `yaml:"repo_url,omitempty"`
LegacyRepoPassword string `yaml:"repo_password,omitempty"`
// AnnounceKey is the base64-encoded Ed25519 private key used by
// announce-and-approve enrolment (P2-18). Generated on first
// announce, persisted so the agent can re-attach to the same
// pending row across restarts. 64 bytes when decoded.
// Empty for token-flow enrolments.
AnnounceKey string `yaml:"announce_key,omitempty"`
// path is the file we loaded from. Used by Save.
path string `yaml:"-"`
}
+106
View File
@@ -0,0 +1,106 @@
// hooks.go — pre/post backup hooks for the agent runner (P2R-11).
//
// Hooks fire only for backup jobs (the runner's other kinds —
// init/forget/prune/check/unlock — call shell scripts that touch
// repo internals; running operator hooks for those would be
// surprising). Hook bodies arrive plaintext on the wire (server
// decrypted before the WS push). The agent never persists them
// to disk; they live in memory for the lifetime of one job.
//
// Failure semantics:
// - pre_hook non-zero exit aborts the backup: the runner returns
// the error, the job is recorded as failed, and the actual
// restic invocation never runs.
// - post_hook non-zero exit is logged with a warning prefix in
// the job log but does NOT change the job status — the operator
// wants the backup result preserved even if the cleanup step
// misbehaved.
//
// Streaming: each line of the hook's stdout/stderr is shipped as a
// log.stream envelope with payload prefixed `hook: ` so the live
// log viewer can visually separate it from restic's own output.
package runner
import (
"bufio"
"context"
"fmt"
"io"
"os/exec"
"runtime"
"sync/atomic"
"time"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// runHook executes script via the host shell. status is the value
// passed as RM_JOB_STATUS in the env (empty for pre-hooks; the
// final job status — "succeeded" or "failed" — for post-hooks).
// Returns an error iff the hook exited non-zero. ctx cancellation
// kills the subprocess.
func (r *Runner) runHook(ctx context.Context, jobID, phase, script, status string, seq *atomic.Int64) error {
if script == "" {
return nil
}
shell, flag := defaultShell()
cmd := exec.CommandContext(ctx, shell, flag, script)
cmd.Env = []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
}
if status != "" {
cmd.Env = append(cmd.Env, "RM_JOB_STATUS="+status)
}
cmd.Env = append(cmd.Env, "RM_JOB_ID="+jobID, "RM_HOOK_PHASE="+phase)
stdout, err := cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("hook %s: stdout pipe: %w", phase, err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
return fmt.Errorf("hook %s: stderr pipe: %w", phase, err)
}
if err := cmd.Start(); err != nil {
return fmt.Errorf("hook %s: start: %w", phase, err)
}
done := make(chan struct{}, 2)
go func() { r.pumpHookLines(stdout, "stdout", phase, jobID, seq); done <- struct{}{} }()
go func() { r.pumpHookLines(stderr, "stderr", phase, jobID, seq); done <- struct{}{} }()
<-done
<-done
if werr := cmd.Wait(); werr != nil {
return fmt.Errorf("hook %s exited non-zero: %w", phase, werr)
}
return nil
}
// pumpHookLines streams lines as log.stream envelopes prefixed with
// "hook(<phase>): " so the live log can visually separate them.
func (r *Runner) pumpHookLines(rd io.Reader, stream, phase, jobID string, seq *atomic.Int64) {
scanner := bufio.NewScanner(rd)
scanner.Buffer(make([]byte, 0, 64*1024), 256*1024)
for scanner.Scan() {
line := "hook(" + phase + "): " + scanner.Text()
env, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
JobID: jobID,
Seq: seq.Add(1),
TS: time.Now().UTC(),
Stream: api.LogStream(stream),
Payload: line,
})
_ = r.tx.Send(env)
}
}
// defaultShell returns the (binary, single-arg-flag) pair to use for
// `<shell> <flag> "<script>"`. /bin/sh -c on Unix; cmd.exe /C on
// Windows. The hook author writes whichever shell they prefer
// inside the script body itself (PowerShell, bash, etc) — this is
// just the bootstrap interpreter.
func defaultShell() (string, string) {
if runtime.GOOS == "windows" {
return "cmd.exe", "/C"
}
return "/bin/sh", "-c"
}
+90
View File
@@ -0,0 +1,90 @@
// hooks_test.go — pre/post backup hook semantics (P2R-11).
package runner
import (
"context"
"strings"
"testing"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// TestPreHookFailureAbortsBackup: pre_hook exits 1 → restic never
// runs, job is recorded failed with the hook's error.
func TestPreHookFailureAbortsBackup(t *testing.T) {
t.Parallel()
// Restic script that records every invocation. If restic was
// called we'll see "restic-was-here" in the captured log.
bin := setupScript(t, `echo "restic-was-here"`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
err := r.RunBackup(context.Background(), "job-pre",
[]string{"/etc"}, nil, []string{"tag"},
BackupHooks{Pre: "exit 1"})
if err == nil {
t.Fatal("expected RunBackup to return an error from failed pre_hook")
}
if !strings.Contains(err.Error(), "pre_hook failed") {
t.Fatalf("error message: %q (want 'pre_hook failed')", err)
}
// job.finished arrived with status=failed.
finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished)
var fin api.JobFinishedPayload
_ = finEnv.UnmarshalPayload(&fin)
if fin.Status != api.JobFailed {
t.Fatalf("status: %q, want failed", fin.Status)
}
// restic must NOT have run.
for _, env := range tx.envs {
if env.Type != api.MsgLogStream {
continue
}
var l api.LogStreamLine
_ = env.UnmarshalPayload(&l)
if strings.Contains(l.Payload, "restic-was-here") {
t.Fatal("restic was invoked despite pre_hook failure")
}
}
}
// TestPostHookRunsAfterBackup: post_hook fires after a successful
// backup and receives RM_JOB_STATUS=succeeded in the env.
func TestPostHookRunsAfterBackup(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
backup) echo '{"message_type":"summary","snapshot_id":"abc"}' ;;
snapshots) echo '[]' ;;
stats) echo '{"total_size":0,"total_uncompressed_size":0,"snapshots_count":0,"total_file_count":0,"total_blob_count":0}' ;;
*) exit 0 ;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
post := `echo "post-status=$RM_JOB_STATUS phase=$RM_HOOK_PHASE"`
if err := r.RunBackup(context.Background(), "job-post",
[]string{"/etc"}, nil, nil, BackupHooks{Post: post}); err != nil {
t.Fatalf("RunBackup: %v", err)
}
// Walk log.stream envelopes; one of them should be the post-hook
// line with the expected status.
var found bool
for _, env := range tx.envs {
if env.Type != api.MsgLogStream {
continue
}
var l api.LogStreamLine
_ = env.UnmarshalPayload(&l)
if strings.Contains(l.Payload, "post-status=succeeded") &&
strings.Contains(l.Payload, "phase=post") {
found = true
break
}
}
if !found {
t.Fatal("post_hook output not found in log.stream envelopes")
}
}
+48 -7
View File
@@ -30,6 +30,12 @@ type Config struct {
RepoURL string
RepoUsername string
RepoPassword string
// Bandwidth caps in KB/s applied to every restic invocation.
// <=0 means "no cap". Per-job override: callers that build a
// runner per-dispatch can pass the override value here directly.
LimitUploadKBps int
LimitDownloadKBps int
}
// Runner owns the restic invocations.
@@ -54,10 +60,12 @@ func New(cfg Config, tx Sender, progressMinPeriod time.Duration) *Runner {
// resticEnv builds the shared restic.Env from r.cfg.
func (r *Runner) resticEnv() restic.Env {
return restic.Env{
Bin: r.cfg.ResticBin,
RepoURL: r.cfg.RepoURL,
RepoUsername: r.cfg.RepoUsername,
RepoPassword: r.cfg.RepoPassword,
Bin: r.cfg.ResticBin,
RepoURL: r.cfg.RepoURL,
RepoUsername: r.cfg.RepoUsername,
RepoPassword: r.cfg.RepoPassword,
LimitUploadKBps: r.cfg.LimitUploadKBps,
LimitDownloadKBps: r.cfg.LimitDownloadKBps,
}
}
@@ -108,15 +116,34 @@ func (r *Runner) sendFinished(jobID string, finishedAt time.Time, err error, sta
_ = r.tx.Send(finEnv)
}
// BackupHooks bundles the optional pre/post shell snippets that fire
// around a backup. Empty fields skip that phase. Resolved server-side
// (group → host default) before dispatch; the agent just executes
// whatever arrives in the payload.
type BackupHooks struct {
Pre string
Post string
}
// RunBackup executes a backup job and reports back via the sender.
// Returns nil on a clean (or "incomplete-but-snapshot-created") finish.
func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, tags []string) error {
func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, tags []string, hooks BackupHooks) error {
startedAt := time.Now().UTC()
r.sendStarted(jobID, api.JobBackup, startedAt)
env := r.resticEnv()
var seq atomic.Int64
// pre_hook: non-zero exit aborts the backup. The job is recorded
// as failed with the hook's error and restic never runs.
if hooks.Pre != "" {
if err := r.runHook(ctx, jobID, "pre", hooks.Pre, "", &seq); err != nil {
finishedAt := time.Now().UTC()
r.sendFinished(jobID, finishedAt, err, nil)
return fmt.Errorf("pre_hook failed: %w", err)
}
}
env := r.resticEnv()
lastProgress := time.Now()
handle := func(stream string, line string, ev any) {
@@ -165,6 +192,20 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
if summary != nil {
statsBlob, _ = json.Marshal(summary)
}
// post_hook: always runs regardless of backup outcome. Receives
// RM_JOB_STATUS=succeeded|failed in env. Non-zero exit is logged
// but does not change the recorded job status.
if hooks.Post != "" {
status := "succeeded"
if err != nil {
status = "failed"
}
if perr := r.runHook(ctx, jobID, "post", hooks.Post, status, &seq); perr != nil {
slog.Warn("runner: post_hook exited non-zero", "job_id", jobID, "err", perr)
}
}
r.sendFinished(jobID, finishedAt, err, statsBlob)
// On a successful backup, refresh the server's snapshot projection.
+103
View File
@@ -0,0 +1,103 @@
//go:build windows
// install_windows.go — thin wrappers around the Service Control
// Manager via golang.org/x/sys/windows/svc/mgr. Used by the agent's
// `install` / `uninstall` / `start` / `stop` subcommands.
//
// UNTESTED in CI. Mirrors the canonical example shape; if you need
// to extend this, prefer copying from x/sys/windows/svc/example
// over inventing new patterns.
package service
import (
"fmt"
"os"
"path/filepath"
"golang.org/x/sys/windows/svc/mgr"
)
// Install registers the service with the SCM, pointing it at the
// currently-running binary. The service starts on every boot and
// runs as LocalSystem (default).
func Install() error {
exe, err := os.Executable()
if err != nil {
return fmt.Errorf("install: locate executable: %w", err)
}
exe, err = filepath.Abs(exe)
if err != nil {
return fmt.Errorf("install: absolutise path: %w", err)
}
m, err := mgr.Connect()
if err != nil {
return fmt.Errorf("install: connect SCM: %w", err)
}
defer m.Disconnect()
if existing, err := m.OpenService(ServiceName); err == nil {
_ = existing.Close()
return fmt.Errorf("service %q already installed; uninstall first", ServiceName)
}
s, err := m.CreateService(ServiceName, exe, mgr.Config{
StartType: mgr.StartAutomatic,
DisplayName: "Restic-manager agent",
Description: "Backs up this host on the schedule the central restic-manager dictates.",
}, "run")
if err != nil {
return fmt.Errorf("install: create service: %w", err)
}
defer s.Close()
return nil
}
// Uninstall removes the service from the SCM. Caller is expected to
// stop the service first; this returns the SCM's error if it's
// still running.
func Uninstall() error {
m, err := mgr.Connect()
if err != nil {
return fmt.Errorf("uninstall: connect SCM: %w", err)
}
defer m.Disconnect()
s, err := m.OpenService(ServiceName)
if err != nil {
return fmt.Errorf("uninstall: open service: %w", err)
}
defer s.Close()
if err := s.Delete(); err != nil {
return fmt.Errorf("uninstall: delete service: %w", err)
}
return nil
}
// Start asks the SCM to start the installed service. No-op if it's
// already running (the SCM returns an error which we surface).
func Start() error {
m, err := mgr.Connect()
if err != nil {
return err
}
defer m.Disconnect()
s, err := m.OpenService(ServiceName)
if err != nil {
return err
}
defer s.Close()
return s.Start()
}
// Stop sends a stop control to the service.
func Stop() error {
m, err := mgr.Connect()
if err != nil {
return err
}
defer m.Disconnect()
s, err := m.OpenService(ServiceName)
if err != nil {
return err
}
defer s.Close()
_, err = s.Control(0x00000001) // SERVICE_CONTROL_STOP
return err
}
+44
View File
@@ -0,0 +1,44 @@
//go:build !windows
// service_other.go — non-Windows fallback for the service package.
// Linux uses systemd to wrap the agent; the binary itself just runs
// in the foreground. Run() therefore just executes the agent loop
// and returns. install/uninstall sub-commands return a clear error
// directing the operator at the install.sh + systemd unit shipped
// in deploy/install/.
package service
import (
"context"
"errors"
)
// AgentRun is the function-pointer shape main passes in. Same shape
// as the Windows variant so the call site is portable.
type AgentRun func(ctx context.Context) error
// Run executes the agent loop in the foreground; on Unix the
// systemd unit (or whatever runs us) supplies the lifecycle.
func Run(agentRun AgentRun) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
return agentRun(ctx)
}
// Install registers the agent as a service. Windows-only; on Unix
// the systemd unit covers this — returns an error pointing there.
func Install() error { return errUnsupported("install") }
// Uninstall is the inverse of Install. Windows-only.
func Uninstall() error { return errUnsupported("uninstall") }
// Start asks the OS service manager to start the installed service.
// Windows-only.
func Start() error { return errUnsupported("start") }
// Stop sends a stop signal to the installed service. Windows-only.
func Stop() error { return errUnsupported("stop") }
func errUnsupported(verb string) error {
return errors.New("service " + verb + " is Windows-only; use the systemd unit on Linux")
}
+93
View File
@@ -0,0 +1,93 @@
//go:build windows
// service_windows.go — Service Control Manager integration for the
// agent on Windows (P2-16). Implements the svc.Handler interface so
// `restic-manager-agent run` works under both interactive and SCM
// contexts. install/uninstall live in install_windows.go.
//
// UNTESTED on Windows in this repo's CI (the runners are Linux).
// The shape mirrors the canonical example in
// golang.org/x/sys/windows/svc/example. Treat any deviation from
// that example as suspicious.
package service
import (
"context"
"errors"
"log/slog"
"golang.org/x/sys/windows/svc"
)
// ServiceName is the SCM identifier for the agent service.
const ServiceName = "restic-manager-agent"
// AgentRun is the function the service handler calls to start the
// agent's main loop. Pass cmd/agent's run-loop entry point at the
// call site so this package stays free of cross-cmd imports.
type AgentRun func(ctx context.Context) error
// Run delegates to the SCM dispatcher when running under Windows
// service control, otherwise runs the agent loop in the foreground
// (for `restic-manager-agent run` from a console, e.g. while
// debugging on a developer's box).
func Run(agentRun AgentRun) error {
isService, err := svc.IsWindowsService()
if err != nil {
return err
}
if !isService {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
return agentRun(ctx)
}
return svc.Run(ServiceName, &handler{run: agentRun})
}
// handler implements svc.Handler. Execute is called once when the
// service is started. We spawn the agent loop in a goroutine and
// listen for SCM Stop / Shutdown notifications, cancelling the
// context to wind down cleanly.
type handler struct {
run AgentRun
}
func (h *handler) Execute(_ []string, req <-chan svc.ChangeRequest, status chan<- svc.Status) (bool, uint32) {
const accepted = svc.AcceptStop | svc.AcceptShutdown
status <- svc.Status{State: svc.StartPending}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
doneCh := make(chan error, 1)
go func() {
doneCh <- h.run(ctx)
}()
status <- svc.Status{State: svc.Running, Accepts: accepted}
for {
select {
case c := <-req:
switch c.Cmd {
case svc.Interrogate:
status <- c.CurrentStatus
case svc.Stop, svc.Shutdown:
slog.Info("svc: stop requested")
cancel()
status <- svc.Status{State: svc.StopPending}
if err := <-doneCh; err != nil && !errors.Is(err, context.Canceled) {
slog.Warn("svc: agent loop exited with error", "err", err)
return false, 1
}
return false, 0
}
case err := <-doneCh:
// Agent loop exited on its own — uncommon (only via signal
// or fatal error). Surface as an SCM stop.
if err != nil && !errors.Is(err, context.Canceled) {
slog.Warn("svc: agent loop exited unexpectedly", "err", err)
return false, 1
}
return false, 0
}
}
}
+21
View File
@@ -130,6 +130,19 @@ type CommandRunPayload struct {
Tag string `json:"tag,omitempty"`
ForgetGroups []ForgetGroup `json:"forget_groups,omitempty"`
RequiresAdminCreds bool `json:"requires_admin_creds,omitempty"`
// Per-job bandwidth caps in KB/s. When nil, the agent uses the
// host-wide caps it received via config.update. When non-nil,
// the override wins for this job only — even a non-nil zero
// pointer means "no cap for this job" (caller's explicit choice).
BandwidthUpKBps *int `json:"bandwidth_up_kbps,omitempty"`
BandwidthDownKBps *int `json:"bandwidth_down_kbps,omitempty"`
// Hooks run only for kind=backup. Server resolves source-group
// hook → host default → empty before dispatching, so the agent
// just executes whatever is here.
PreHook string `json:"pre_hook,omitempty"`
PostHook string `json:"post_hook,omitempty"`
}
// CommandCancelPayload is the server → agent cancel signal.
@@ -306,6 +319,14 @@ type ConfigUpdatePayload struct {
RepoCredential string `json:"repo_credential,omitempty"` // sensitive (for rest server basic auth)
HookShell string `json:"hook_shell,omitempty"`
Slot string `json:"slot,omitempty"`
// Bandwidth caps in KB/s. Pointer semantics so the server can
// disambiguate "no change in this push" (nil → omitted on the
// wire) from "explicitly clear the cap" (zero or negative value).
// Applied to every restic invocation as --limit-upload /
// --limit-download. Per-job overrides ride on CommandRunPayload.
BandwidthUpKBps *int `json:"bandwidth_up_kbps,omitempty"`
BandwidthDownKBps *int `json:"bandwidth_down_kbps,omitempty"`
}
// AgentUpdateAvailablePayload — informational only; the agent does
+38 -23
View File
@@ -47,6 +47,37 @@ type Env struct {
RepoPassword string // doubles as RESTIC_PASSWORD and (for rest:) HTTP basic-auth password
ExtraEnv map[string]string // any other RESTIC_* / passthrough
WorkDir string // CWD; default = current
// Bandwidth caps in KB/s. <=0 means "no cap" (omit the flag).
// Emitted as restic global flags --limit-upload / --limit-download
// before the subcommand on every invocation.
LimitUploadKBps int
LimitDownloadKBps int
}
// globalArgs returns restic's pre-subcommand global flags derived
// from the Env. Currently just bandwidth caps.
func (e Env) globalArgs() []string {
var out []string
if e.LimitUploadKBps > 0 {
out = append(out, "--limit-upload", fmt.Sprintf("%d", e.LimitUploadKBps))
}
if e.LimitDownloadKBps > 0 {
out = append(out, "--limit-download", fmt.Sprintf("%d", e.LimitDownloadKBps))
}
return out
}
// resticCmd builds an exec.Cmd with bandwidth-limit globals prefixed
// before the supplied subcommand args. Centralizing this so every
// command (backup/forget/prune/check/unlock/init/stats) honors
// the caps without each call site having to remember.
func (e Env) resticCmd(ctx context.Context, sub ...string) *exec.Cmd {
args := append(e.globalArgs(), sub...)
cmd := exec.CommandContext(ctx, e.Bin, args...)
cmd.Env = e.envSlice()
cmd.Dir = e.WorkDir
return cmd
}
// EventKind enumerates what we care about in restic's --json output
@@ -110,9 +141,7 @@ func (e Env) RunBackup(ctx context.Context, paths, excludes, tags []string, hand
}
args = append(args, paths...)
cmd := exec.CommandContext(ctx, e.Bin, args...)
cmd.Env = e.envSlice()
cmd.Dir = e.WorkDir
cmd := e.resticCmd(ctx, args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
@@ -215,9 +244,7 @@ func (e Env) RunForget(ctx context.Context, groups []ForgetGroup, handle LineHan
}
args := []string{"forget", "--json", "--tag", g.Tag}
args = append(args, g.Policy.args()...)
cmd := exec.CommandContext(ctx, e.Bin, args...)
cmd.Env = e.envSlice()
cmd.Dir = e.WorkDir
cmd := e.resticCmd(ctx, args...)
if err := runWithPump(cmd, handle); err != nil {
return err
}
@@ -232,9 +259,7 @@ func (e Env) RunForget(ctx context.Context, groups []ForgetGroup, handle LineHan
// <id> at <url>" on success, "config file already exists" on a
// re-init attempt, etc.).
func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
cmd := exec.CommandContext(ctx, e.Bin, "init")
cmd.Env = e.envSlice()
cmd.Dir = e.WorkDir
cmd := e.resticCmd(ctx, "init")
// Sniff for "config file already exists" on stderr; if we see it
// we'll treat the non-zero exit as a soft success — running init
@@ -272,10 +297,7 @@ func (e Env) RunInit(ctx context.Context, handle LineHandler) error {
// support that's useful for our purposes). We tee everything to the
// handler so the live log is the operator's progress bar.
func (e Env) RunPrune(ctx context.Context, handle LineHandler) error {
cmd := exec.CommandContext(ctx, e.Bin, "prune")
cmd.Env = e.envSlice()
cmd.Dir = e.WorkDir
return runWithPump(cmd, handle)
return runWithPump(e.resticCmd(ctx, "prune"), handle)
}
// runWithPump starts the configured cmd, fans stdout+stderr into
@@ -313,10 +335,7 @@ func runWithPump(cmd *exec.Cmd, handle LineHandler) error {
// RunUnlock executes `restic unlock`. Returns nil on a clean exit.
func (e Env) RunUnlock(ctx context.Context, handle LineHandler) error {
cmd := exec.CommandContext(ctx, e.Bin, "unlock")
cmd.Env = e.envSlice()
cmd.Dir = e.WorkDir
return runWithPump(cmd, handle)
return runWithPump(e.resticCmd(ctx, "unlock"), handle)
}
// RepoStats mirrors `restic stats --json --mode raw-data` output.
@@ -333,9 +352,7 @@ type RepoStats struct {
// caller can still log it. Returns an error if no JSON-shaped line
// arrived on stdout.
func (e Env) RunStats(ctx context.Context, handle LineHandler) (*RepoStats, error) {
cmd := exec.CommandContext(ctx, e.Bin, "stats", "--json", "--mode", "raw-data")
cmd.Env = e.envSlice()
cmd.Dir = e.WorkDir
cmd := e.resticCmd(ctx, "stats", "--json", "--mode", "raw-data")
var out *RepoStats
capture := func(stream, line string, ev any) {
if stream == "stdout" && strings.HasPrefix(line, "{") {
@@ -378,9 +395,7 @@ func (e Env) RunCheck(ctx context.Context, subsetPct int, handle LineHandler) (C
if subsetPct > 0 {
args = append(args, "--read-data-subset", fmt.Sprintf("%d%%", subsetPct))
}
cmd := exec.CommandContext(ctx, e.Bin, args...)
cmd.Env = e.envSlice()
cmd.Dir = e.WorkDir
cmd := e.resticCmd(ctx, args...)
var res CheckResult
sniff := func(stream, line string, ev any) {
+37
View File
@@ -174,6 +174,43 @@ func TestRunStatsErrorsWithoutJSON(t *testing.T) {
}
}
func TestBandwidthLimitFlagsInjected(t *testing.T) {
// Script echoes its argv to stdout. Each variant should produce
// the right --limit-* flags before the subcommand.
cases := []struct {
name string
env Env
want []string
}{
{"both caps", Env{LimitUploadKBps: 1024, LimitDownloadKBps: 512}, []string{"--limit-upload 1024", "--limit-download 512"}},
{"only upload", Env{LimitUploadKBps: 256}, []string{"--limit-upload 256"}},
{"zero means omit", Env{LimitUploadKBps: 0, LimitDownloadKBps: 0}, nil},
{"negative means omit", Env{LimitUploadKBps: -1}, nil},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
bin := setupScriptBin(t, `echo "$@"`)
env := c.env
env.Bin = bin
lines, h := captureLines()
if err := env.RunUnlock(context.Background(), h); err != nil {
t.Fatalf("RunUnlock: %v", err)
}
joined := strings.Join(*lines, "\n")
for _, want := range c.want {
if !strings.Contains(joined, want) {
t.Fatalf("want %q in argv; got: %s", want, joined)
}
}
if len(c.want) == 0 {
if strings.Contains(joined, "--limit-upload") || strings.Contains(joined, "--limit-download") {
t.Fatalf("expected no limit flags; got: %s", joined)
}
}
})
}
}
func TestRunStatsZeroSnapshots(t *testing.T) {
// Confirms RunStats succeeds and returns a valid *RepoStats when the
// repo has no snapshots (snapshots_count=0). A regression that
+211
View File
@@ -0,0 +1,211 @@
// announce.go — POST /api/agents/announce: agent without a token
// announces itself with a freshly-minted Ed25519 public key, server
// stashes a pending_hosts row, admin compares fingerprints in the
// UI before accepting (P2-18a).
//
// Guards (per spec):
// - Per-source-IP token-bucket rate limit (10/min).
// - Global cap of 100 in-flight pending rows; further announces
// get 503 with a hint.
// - Public key must be exactly 32 bytes (Ed25519). Anything else
// 400-rejected.
//
// Hostname collisions are NOT rejected — multiple announces with
// the same hostname can be legitimate (re-running install on the
// same box). The UI flags collisions for the admin to disambiguate.
package http
import (
"crypto/ed25519"
"encoding/base64"
"encoding/json"
stdhttp "net/http"
"strings"
"sync"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// Tunables — exposed as vars so tests can lower them. Defaults mirror
// the spec's recommendations.
var (
announceMaxPerMin = 10
announceGlobalCap = 100
)
// announceRequest is the wire shape POST /api/agents/announce takes.
// PublicKey is base64-std (no padding strip — stdlib decoder is
// lenient on padding for both forms).
type announceRequest struct {
Hostname string `json:"hostname"`
OS string `json:"os"`
Arch string `json:"arch"`
AgentVersion string `json:"agent_version"`
ResticVersion string `json:"restic_version"`
PublicKey string `json:"public_key"` // base64
}
// announceResponse is what the agent gets back. Fingerprint is the
// canonical "SHA256:hex" the operator compares against the UI.
// HostnameCollision warns the install script that another pending
// row already uses the same hostname.
type announceResponse struct {
PendingID string `json:"pending_id"`
Fingerprint string `json:"fingerprint"`
HostnameCollision bool `json:"hostname_collision"`
}
// rateBucket is a tiny per-IP token-bucket. last is the timestamp of
// the most recent refill; tokens is the current bucket level. Refill
// rate is announceMaxPerMin tokens/minute, burst = announceMaxPerMin.
type rateBucket struct {
tokens float64
last time.Time
}
// announceLimiter holds one bucket per source IP. Buckets are reaped
// lazily by a tiny grace period — we don't need true LRU cleanup
// because the bucket count is bounded by unique IPs in any given
// few minutes (small).
type announceLimiter struct {
mu sync.Mutex
buckets map[string]*rateBucket
}
func newAnnounceLimiter() *announceLimiter {
return &announceLimiter{buckets: map[string]*rateBucket{}}
}
// allow returns true and consumes a token if the IP's bucket has at
// least one token, else returns false. Capacity = announceMaxPerMin.
func (l *announceLimiter) allow(ip string, now time.Time) bool {
l.mu.Lock()
defer l.mu.Unlock()
cap := float64(announceMaxPerMin)
b, ok := l.buckets[ip]
if !ok {
b = &rateBucket{tokens: cap, last: now}
l.buckets[ip] = b
}
// Refill at cap tokens per minute.
elapsed := now.Sub(b.last).Seconds()
if elapsed > 0 {
b.tokens += (elapsed / 60.0) * cap
if b.tokens > cap {
b.tokens = cap
}
b.last = now
}
if b.tokens < 1.0 {
return false
}
b.tokens--
return true
}
// handleAnnounce is the public POST handler. Public — no auth.
func (s *Server) handleAnnounce(w stdhttp.ResponseWriter, r *stdhttp.Request) {
now := time.Now().UTC()
// Rate limit by source IP. Strip port — the limit is per host,
// not per outbound source port.
ip := remoteIP(r)
if !s.announceRL.allow(ip, now) {
w.Header().Set("Retry-After", "60")
writeJSONError(w, stdhttp.StatusTooManyRequests, "rate_limited",
"too many announces from this source; retry in a minute")
return
}
var req announceRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
if req.Hostname == "" || req.OS == "" || req.Arch == "" || req.PublicKey == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"hostname, os, arch, public_key are required")
return
}
keyBytes, err := base64.StdEncoding.DecodeString(req.PublicKey)
if err != nil {
// Try URL-safe / no-padding flavors before giving up.
if k2, e2 := base64.RawStdEncoding.DecodeString(req.PublicKey); e2 == nil {
keyBytes = k2
} else {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_public_key",
"public_key must be base64")
return
}
}
if len(keyBytes) != ed25519.PublicKeySize {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_public_key",
"public_key must be 32 bytes (Ed25519)")
return
}
// Global cap (cheap query — index on expires_at).
count, err := s.deps.Store.CountPendingHosts(r.Context(), now)
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
if count >= announceGlobalCap {
writeJSONError(w, stdhttp.StatusServiceUnavailable, "pending_cap_reached",
"too many in-flight pending hosts; ask an admin to clear the queue")
return
}
// Hostname collision flag (informational).
colls, err := s.deps.Store.CountPendingHostsByHostname(r.Context(), req.Hostname, now)
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
ph := &store.PendingHost{
ID: ulid.Make().String(),
Hostname: req.Hostname,
OS: req.OS,
Arch: req.Arch,
AgentVersion: req.AgentVersion,
ResticVersion: req.ResticVersion,
PublicKey: keyBytes,
Fingerprint: store.FingerprintForKey(keyBytes),
AnnouncedFromIP: ip,
FirstSeenAt: now,
LastSeenAt: now,
ExpiresAt: now.Add(time.Hour),
}
if err := s.deps.Store.CreatePendingHost(r.Context(), ph); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
writeJSON(w, stdhttp.StatusOK, announceResponse{
PendingID: ph.ID,
Fingerprint: ph.Fingerprint,
HostnameCollision: colls > 0,
})
}
// remoteIP returns r.RemoteAddr stripped of any :port suffix, plus
// the X-Forwarded-For chain's first hop when behind a trusted proxy
// (RM_TRUSTED_PROXY in the deployment doc). Trust-proxy lookup
// matches the framework's existing behavior elsewhere.
func remoteIP(r *stdhttp.Request) string {
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
// Take the first IP in the chain (closest to the original
// client) — same convention chi uses. Trim whitespace.
parts := strings.Split(xff, ",")
return strings.TrimSpace(parts[0])
}
addr := r.RemoteAddr
if i := strings.LastIndex(addr, ":"); i >= 0 {
return addr[:i]
}
return addr
}
+164
View File
@@ -0,0 +1,164 @@
// announce_test.go — covers POST /api/agents/announce: happy path,
// invalid public key, hostname collision flag, rate limit, global
// cap (P2-18a).
package http
import (
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"encoding/json"
stdhttp "net/http"
"strings"
"testing"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
func newKeypair(t *testing.T) ed25519.PublicKey {
t.Helper()
pub, _, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("ed25519: %v", err)
}
return pub
}
func postAnnounce(t *testing.T, url string, req announceRequest) (status int, header stdhttp.Header, body []byte) {
t.Helper()
b, _ := json.Marshal(req)
r, _ := stdhttp.NewRequest("POST", url+"/api/agents/announce", bytes.NewReader(b))
r.Header.Set("Content-Type", "application/json")
res, err := stdhttp.DefaultClient.Do(r)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
out := make([]byte, 4096)
n, _ := res.Body.Read(out)
return res.StatusCode, res.Header, out[:n]
}
func TestAnnounceHappyPath(t *testing.T) {
t.Parallel()
_, url, st := newTestServerWithHub(t)
pub := newKeypair(t)
status, _, body := postAnnounce(t, url, announceRequest{
Hostname: "alice", OS: "linux", Arch: "amd64",
AgentVersion: "1.0", ResticVersion: "0.17",
PublicKey: base64.StdEncoding.EncodeToString(pub),
})
if status != stdhttp.StatusOK {
t.Fatalf("status: %d body=%s", status, body)
}
var ar announceResponse
if err := json.Unmarshal(body, &ar); err != nil {
t.Fatalf("unmarshal: %v body=%s", err, body)
}
if ar.PendingID == "" {
t.Fatal("missing pending_id")
}
if !strings.HasPrefix(ar.Fingerprint, "SHA256:") {
t.Fatalf("fingerprint shape: %q", ar.Fingerprint)
}
if ar.HostnameCollision {
t.Fatal("first announce shouldn't be a collision")
}
// Row exists in the store.
if _, err := st.GetPendingHost(context.Background(), ar.PendingID); err != nil {
t.Fatalf("pending row missing: %v", err)
}
}
func TestAnnounceRejectsBadKey(t *testing.T) {
t.Parallel()
_, url, _ := newTestServerWithHub(t)
status, _, _ := postAnnounce(t, url, announceRequest{
Hostname: "x", OS: "linux", Arch: "amd64",
PublicKey: base64.StdEncoding.EncodeToString([]byte("too-short")),
})
if status != stdhttp.StatusBadRequest {
t.Fatalf("status: got %d, want 400", status)
}
}
func TestAnnounceHostnameCollisionFlag(t *testing.T) {
t.Parallel()
_, url, _ := newTestServerWithHub(t)
pub1 := newKeypair(t)
pub2 := newKeypair(t)
_, _, _ = postAnnounce(t, url, announceRequest{
Hostname: "dup-host", OS: "linux", Arch: "amd64",
PublicKey: base64.StdEncoding.EncodeToString(pub1),
})
status, _, body := postAnnounce(t, url, announceRequest{
Hostname: "dup-host", OS: "linux", Arch: "amd64",
PublicKey: base64.StdEncoding.EncodeToString(pub2),
})
if status != stdhttp.StatusOK {
t.Fatalf("status: %d", status)
}
var ar announceResponse
_ = json.Unmarshal(body, &ar)
if !ar.HostnameCollision {
t.Fatal("expected hostname_collision=true on second announce")
}
}
func TestAnnounceRateLimit(t *testing.T) {
// Not t.Parallel — mutates the package-level announceMaxPerMin
// var, which would otherwise race other announce tests.
_, url, _ := newTestServerWithHub(t)
prev := announceMaxPerMin
announceMaxPerMin = 2
t.Cleanup(func() { announceMaxPerMin = prev })
pub := newKeypair(t)
body := announceRequest{
Hostname: "rl-host", OS: "linux", Arch: "amd64",
PublicKey: base64.StdEncoding.EncodeToString(pub),
}
for i := 0; i < 2; i++ {
status, _, _ := postAnnounce(t, url, body)
if status != stdhttp.StatusOK {
t.Fatalf("call %d: status %d", i, status)
}
}
status, _, _ := postAnnounce(t, url, body)
if status != stdhttp.StatusTooManyRequests {
t.Fatalf("3rd call: want 429, got %d", status)
}
}
func TestAnnounceGlobalCap(t *testing.T) {
// Not t.Parallel — mutates the package-level announceGlobalCap.
_, url, st := newTestServerWithHub(t)
prev := announceGlobalCap
announceGlobalCap = 1
t.Cleanup(func() { announceGlobalCap = prev })
// Pre-seed one row directly via the store so the cap is hit.
pub := newKeypair(t)
if err := st.CreatePendingHost(context.Background(), &store.PendingHost{
ID: ulid.Make().String(), Hostname: "x", OS: "linux", Arch: "amd64",
PublicKey: pub, Fingerprint: store.FingerprintForKey(pub),
AnnouncedFromIP: "127.0.0.1",
FirstSeenAt: time.Now().UTC(),
LastSeenAt: time.Now().UTC(),
ExpiresAt: time.Now().UTC().Add(time.Hour),
}); err != nil {
t.Fatalf("seed: %v", err)
}
status, _, _ := postAnnounce(t, url, announceRequest{
Hostname: "next", OS: "linux", Arch: "amd64",
PublicKey: base64.StdEncoding.EncodeToString(newKeypair(t)),
})
if status != stdhttp.StatusServiceUnavailable {
t.Fatalf("want 503 over cap, got %d", status)
}
}
+75
View File
@@ -0,0 +1,75 @@
// hooks_resolve.go — server-side resolution of pre/post hooks for a
// backup dispatch (P2R-11). The agent receives plaintext hook bodies
// in CommandRunPayload; this file is where the AEAD blob on the
// source group (or the host's default) gets decrypted into the
// strings the wire payload carries.
//
// Resolution order:
// 1. source_group.<phase>_hook (per-group override)
// 2. host.<phase>_hook_default (host-wide default)
// 3. "" (no hook → agent skips that phase)
//
// Decrypt errors are logged and treated as "no hook configured" so
// a malformed blob can't poison every backup. The audit trail
// captures the underlying state regardless.
package http
import (
"log/slog"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// resolveBackupHooks returns the (pre, post) plaintext hook strings
// the agent should run around the backup. Both are empty when no
// hook is configured at either level.
func (s *Server) resolveBackupHooks(host *store.Host, g *store.SourceGroup) (pre, post string) {
if s.deps.AEAD == nil {
return "", ""
}
pre = s.decryptHookOrFallback(g.PreHook, host.PreHookDefault, host.ID, "pre")
post = s.decryptHookOrFallback(g.PostHook, host.PostHookDefault, host.ID, "post")
return pre, post
}
// decryptHookOrFallback returns the per-group hook decrypted, or
// (when that's empty) the host default decrypted, or "" if neither
// is configured. Decrypt failures log and degrade to empty.
func (s *Server) decryptHookOrFallback(group, hostDefault, hostID, phase string) string {
tryDecrypt := func(blob, slot string) (string, bool) {
if blob == "" {
return "", false
}
plain, err := s.deps.AEAD.Decrypt(blob, []byte("hook:"+hostID+":"+slot+":"+phase))
if err != nil {
slog.Error("decrypt hook", "host_id", hostID, "phase", phase, "slot", slot, "err", err)
return "", false
}
return string(plain), true
}
if v, ok := tryDecrypt(group, "group"); ok {
return v
}
if v, ok := tryDecrypt(hostDefault, "host"); ok {
return v
}
return ""
}
// EncryptHookForGroup encrypts a hook body for storage on a source
// group. Caller passes the plaintext from a UI form; an empty body
// returns "" so the store persists NULL (cleared).
func (s *Server) EncryptHookForGroup(hostID, phase, body string) (string, error) {
if body == "" {
return "", nil
}
return s.deps.AEAD.Encrypt([]byte(body), []byte("hook:"+hostID+":group:"+phase))
}
// EncryptHookForHost is the host-default twin of EncryptHookForGroup.
func (s *Server) EncryptHookForHost(hostID, phase, body string) (string, error) {
if body == "" {
return "", nil
}
return s.deps.AEAD.Encrypt([]byte(body), []byte("hook:"+hostID+":host:"+phase))
}
+5
View File
@@ -58,5 +58,10 @@ func (s *Server) handleUpdateHostBandwidth(w stdhttp.ResponseWriter, r *stdhttp.
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
// Fan out to the agent if connected. Errors are non-fatal — the
// next reconnect's onAgentHello will resync.
if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
_ = s.pushBandwidthToAgent(r.Context(), hostID, req.BandwidthUpKBps, req.BandwidthDownKBps)
}
writeJSON(w, stdhttp.StatusOK, hostBandwidthView(req))
}
@@ -0,0 +1,78 @@
// host_bandwidth_push.go — server → agent fan-out of host-wide
// bandwidth caps via config.update.
//
// Two entry points: pushBandwidthOnHello (called from onAgentHello,
// always pushes the current state so the agent picks up edits made
// while it was offline) and pushBandwidthToAgent (called after the
// PUT bandwidth handler succeeds, so an online agent re-arms within
// seconds).
//
// We always send pointer fields (zero-valued when uncapped) so the
// agent can distinguish "no change" (nil → field absent on the wire)
// from "explicitly cleared" (non-nil zero pointer). See
// api.ConfigUpdatePayload doc for the wire semantics.
package http
import (
"context"
"log/slog"
"time"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
)
// pushBandwidthOnHello ships the host's current bandwidth caps as a
// config.update on the supplied conn. Silent no-op on lookup error.
func (s *Server) pushBandwidthOnHello(ctx context.Context, hostID string, conn *ws.Conn) {
host, err := s.deps.Store.GetHost(ctx, hostID)
if err != nil {
slog.Warn("on-hello: load host for bandwidth", "host_id", hostID, "err", err)
return
}
payload := bandwidthPayload(host.BandwidthUpKBps, host.BandwidthDownKBps)
env, err := api.Marshal(api.MsgConfigUpdate, "", payload)
if err != nil {
slog.Error("on-hello: marshal bandwidth config.update", "host_id", hostID, "err", err)
return
}
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if err := conn.Send(sendCtx, env); err != nil {
slog.Warn("on-hello: send bandwidth config.update", "host_id", hostID, "err", err)
}
}
// pushBandwidthToAgent ships the supplied caps via the hub. Caller is
// expected to check Hub.Connected first when it matters.
func (s *Server) pushBandwidthToAgent(ctx context.Context, hostID string, up, down *int) error {
env, err := api.Marshal(api.MsgConfigUpdate, "", bandwidthPayload(up, down))
if err != nil {
return err
}
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
return s.deps.Hub.Send(sendCtx, hostID, env)
}
// bandwidthPayload builds a ConfigUpdatePayload with only the
// bandwidth fields populated. Pointers are passed through verbatim;
// callers wanting to clear a cap should pass a non-nil pointer to 0.
// On the on-hello path we materialize zero-valued pointers when the
// host record has no cap set, so the agent's stored state is always
// in sync (rather than retaining whatever value it last received).
func bandwidthPayload(up, down *int) api.ConfigUpdatePayload {
zero := 0
upPtr := up
if upPtr == nil {
upPtr = &zero
}
downPtr := down
if downPtr == nil {
downPtr = &zero
}
return api.ConfigUpdatePayload{
BandwidthUpKBps: upPtr,
BandwidthDownKBps: downPtr,
}
}
+4
View File
@@ -399,6 +399,10 @@ func (s *Server) pushAdminCredsToAgent(ctx context.Context, hostID string) error
// don't race a brand-new register against an old still-closing conn.
func (s *Server) onAgentHello(ctx context.Context, hostID string, conn *ws.Conn) {
s.pushRepoCredsOnHello(ctx, hostID, conn)
// Bandwidth caps are sent unconditionally so an agent that
// reconnects after a cap edit picks up the new state without
// waiting for the next bandwidth PUT.
s.pushBandwidthOnHello(ctx, hostID, conn)
// Push the current schedule set in the same on-hello window so
// the agent's local cron is in sync before any command.run lands.
// An empty schedule list is a valid push: it tells the agent to
+349
View File
@@ -0,0 +1,349 @@
// pending_ws.go — /ws/agent/pending and the admin accept/reject
// endpoints for the announce-and-approve enrolment flow (P2-18b).
//
// Flow:
// 1. Agent has previously called POST /api/agents/announce, which
// returned its pending_id + fingerprint. Agent persists the
// keypair locally.
// 2. Agent connects to /ws/agent/pending?pending_id=… (no auth).
// Server reads the row, generates a 32-byte nonce, sends it.
// 3. Agent signs the nonce with its Ed25519 private key, sends the
// signature back. Server verifies; close on bad sig.
// 4. The connection sits open; the agent reads but doesn't write.
// 5. Admin clicks Accept: POST /api/pending-hosts/{id}/accept with
// the same repo-creds form the token-mint flow uses. Server
// mints a Host row + bearer + encrypted creds, pushes one
// `enrolled` message down the open socket, closes cleanly.
// 6. Admin clicks Reject: socket closes with code 4001.
//
// Hub: a process-local in-memory map of pending_id → live conn so
// the accept/reject handlers can find the right socket. Sole
// instance lives on Server.pendingHub.
package http
import (
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"encoding/json"
"errors"
"log/slog"
stdhttp "net/http"
"sync"
"time"
"github.com/coder/websocket"
"github.com/go-chi/chi/v5"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// pendingConn is a single live /ws/agent/pending session. The accept
// handler sends the enrolment message via Send and closes the socket;
// the WS read loop is just waiting for that close.
type pendingConn struct {
conn *websocket.Conn
pendingID string
closed chan struct{}
}
// pendingHub is the in-memory map of pending_id → live socket.
type pendingHub struct {
mu sync.Mutex
conns map[string]*pendingConn
}
func newPendingHub() *pendingHub {
return &pendingHub{conns: map[string]*pendingConn{}}
}
func (h *pendingHub) register(pc *pendingConn) {
h.mu.Lock()
defer h.mu.Unlock()
// Replace any existing socket for the same pending_id (an agent
// reconnected) — close the old one cleanly first so its goroutine
// can exit.
if old, ok := h.conns[pc.pendingID]; ok {
_ = old.conn.Close(websocket.StatusNormalClosure, "superseded")
close(old.closed)
}
h.conns[pc.pendingID] = pc
}
func (h *pendingHub) unregister(pendingID string, pc *pendingConn) {
h.mu.Lock()
defer h.mu.Unlock()
if cur, ok := h.conns[pendingID]; ok && cur == pc {
delete(h.conns, pendingID)
}
}
func (h *pendingHub) get(pendingID string) *pendingConn {
h.mu.Lock()
defer h.mu.Unlock()
return h.conns[pendingID]
}
// nonceMessage is what the server sends first on /ws/agent/pending.
type nonceMessage struct {
Type string `json:"type"` // "nonce"
Nonce string `json:"nonce"` // base64
}
// signedNonceMessage is what the agent sends back.
type signedNonceMessage struct {
Type string `json:"type"` // "signed_nonce"
Signature string `json:"signature"` // base64
}
// enrolledMessage is what the server sends on accept. The agent
// persists the bearer to agent.yaml and exits announce mode.
type enrolledMessage struct {
Type string `json:"type"` // "enrolled"
HostID string `json:"host_id"`
Bearer string `json:"bearer"`
ServerID string `json:"server_id,omitempty"`
}
// handlePendingWS upgrades the WS, runs the nonce-sign handshake,
// registers the conn in the hub, and blocks until the conn is
// closed (by accept/reject or by the agent disconnecting).
func (s *Server) handlePendingWS(w stdhttp.ResponseWriter, r *stdhttp.Request) {
pendingID := r.URL.Query().Get("pending_id")
if pendingID == "" {
stdhttp.Error(w, "missing pending_id", stdhttp.StatusBadRequest)
return
}
row, err := s.deps.Store.GetPendingHost(r.Context(), pendingID)
if err != nil {
stdhttp.Error(w, "pending host not found", stdhttp.StatusNotFound)
return
}
if time.Now().UTC().After(row.ExpiresAt) {
stdhttp.Error(w, "pending host expired", stdhttp.StatusGone)
return
}
conn, err := websocket.Accept(w, r, &websocket.AcceptOptions{
// Same-origin defaults are safe: the agent isn't a browser.
InsecureSkipVerify: true,
})
if err != nil {
slog.Warn("pending ws: accept", "pending_id", pendingID, "err", err)
return
}
// Generate + send nonce.
nonce := make([]byte, 32)
if _, err := rand.Read(nonce); err != nil {
_ = conn.Close(websocket.StatusInternalError, "nonce gen")
return
}
nm := nonceMessage{Type: "nonce", Nonce: base64.StdEncoding.EncodeToString(nonce)}
raw, _ := json.Marshal(nm)
wctx, wcancel := context.WithTimeout(r.Context(), 5*time.Second)
if err := conn.Write(wctx, websocket.MessageText, raw); err != nil {
wcancel()
_ = conn.Close(websocket.StatusInternalError, "send nonce")
return
}
wcancel()
// Read signed nonce back.
rctx, rcancel := context.WithTimeout(r.Context(), 30*time.Second)
mt, body, err := conn.Read(rctx)
rcancel()
if err != nil || mt != websocket.MessageText {
_ = conn.Close(websocket.StatusPolicyViolation, "no signed nonce")
return
}
var sig signedNonceMessage
if err := json.Unmarshal(body, &sig); err != nil || sig.Type != "signed_nonce" {
_ = conn.Close(websocket.StatusPolicyViolation, "bad signed nonce shape")
return
}
sigBytes, err := base64.StdEncoding.DecodeString(sig.Signature)
if err != nil {
_ = conn.Close(websocket.StatusPolicyViolation, "bad signature b64")
return
}
if !ed25519.Verify(row.PublicKey, nonce, sigBytes) {
_ = conn.Close(websocket.StatusPolicyViolation, "signature does not verify")
return
}
// Touch the row so the dashboard knows the agent is live.
_ = s.deps.Store.TouchPendingHost(context.Background(), pendingID, time.Now().UTC())
// Register and block until close.
pc := &pendingConn{conn: conn, pendingID: pendingID, closed: make(chan struct{})}
s.pendingHub.register(pc)
defer s.pendingHub.unregister(pendingID, pc)
// Read loop: we don't expect any further frames from the agent.
// If the agent closes, we exit.
go func() {
for {
ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
_, _, err := conn.Read(ctx)
cancel()
if err != nil {
close(pc.closed)
return
}
}
}()
<-pc.closed
}
// acceptForm is the admin form for POST /api/pending-hosts/{id}/accept.
// repo_password may be omitted only when the host already has admin-
// supplied creds elsewhere — we don't currently model that. For now,
// require all three.
type acceptForm struct {
RepoURL string `json:"repo_url"`
RepoUsername string `json:"repo_username"`
RepoPassword string `json:"repo_password"`
}
// handleAcceptPendingHost mints a real Host row + bearer + encrypted
// repo creds and pushes the bearer down the agent's open pending WS.
// Admin-auth required.
func (s *Server) handleAcceptPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
return
}
pendingID := chi.URLParam(r, "id")
row, err := s.deps.Store.GetPendingHost(r.Context(), pendingID)
if err != nil {
writeJSONError(w, stdhttp.StatusNotFound, "pending_not_found", "")
return
}
pc := s.pendingHub.get(pendingID)
if pc == nil {
writeJSONError(w, stdhttp.StatusConflict, "agent_not_connected",
"the pending agent is not currently connected; ask it to retry")
return
}
var form acceptForm
// Accept either JSON or form-urlencoded so HTMX-style POST works.
if r.Header.Get("Content-Type") == "application/json" {
if err := json.NewDecoder(r.Body).Decode(&form); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
} else {
if err := r.ParseForm(); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "bad_form", err.Error())
return
}
form.RepoURL = r.PostForm.Get("repo_url")
form.RepoUsername = r.PostForm.Get("repo_username")
form.RepoPassword = r.PostForm.Get("repo_password")
}
if form.RepoURL == "" || form.RepoPassword == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"repo_url and repo_password are required")
return
}
// Mint persistent bearer + Host row.
hostID := ulid.Make().String()
token, err := auth.NewToken()
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
host := store.Host{
ID: hostID, Name: row.Hostname, OS: row.OS, Arch: row.Arch,
AgentVersion: row.AgentVersion, ResticVersion: row.ResticVersion,
EnrolledAt: time.Now().UTC(),
}
if err := s.deps.Store.CreateHost(r.Context(), host, auth.HashToken(token), ""); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
// Encrypt + persist repo creds.
enc, err := s.encryptRepoCreds(repoCredsBlob(form), []byte("host:"+hostID))
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, store.CredKindRepo, enc); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
// Drop the pending row.
if err := s.deps.Store.DeletePendingHost(r.Context(), pendingID); err != nil {
slog.Warn("accept pending: delete row", "pending_id", pendingID, "err", err)
}
// Push enrolled message + close the pending WS.
enrolled := enrolledMessage{Type: "enrolled", HostID: hostID, Bearer: token}
raw, _ := json.Marshal(enrolled)
wctx, wcancel := context.WithTimeout(r.Context(), 5*time.Second)
if err := pc.conn.Write(wctx, websocket.MessageText, raw); err != nil {
slog.Warn("accept pending: write enrolled", "pending_id", pendingID, "err", err)
}
wcancel()
_ = pc.conn.Close(websocket.StatusNormalClosure, "accepted")
// Audit.
uid := user.ID
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &uid,
Actor: "user",
Action: "host.accept_pending",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: time.Now().UTC(),
})
writeJSON(w, stdhttp.StatusOK, map[string]any{
"host_id": hostID,
"fingerprint": row.Fingerprint,
})
}
// handleRejectPendingHost deletes the pending row and closes any
// open WS for it. Admin-auth required.
func (s *Server) handleRejectPendingHost(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
return
}
pendingID := chi.URLParam(r, "id")
row, err := s.deps.Store.GetPendingHost(r.Context(), pendingID)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
w.WriteHeader(stdhttp.StatusNoContent)
return
}
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
if pc := s.pendingHub.get(pendingID); pc != nil {
_ = pc.conn.Close(4001, "rejected")
}
if err := s.deps.Store.DeletePendingHost(r.Context(), pendingID); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", err.Error())
return
}
uid := user.ID
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &uid,
Actor: "user",
Action: "host.reject_pending",
TargetKind: ptr("pending_host"),
TargetID: &row.ID,
TS: time.Now().UTC(),
})
w.WriteHeader(stdhttp.StatusNoContent)
}
+203
View File
@@ -0,0 +1,203 @@
// pending_ws_test.go — end-to-end test of the announce → pending WS
// → admin accept → bearer push round trip (P2-18b/c).
package http
import (
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"encoding/json"
stdhttp "net/http"
"net/url"
"strings"
"testing"
"time"
"github.com/coder/websocket"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// TestPendingWSNonceSignAcceptFlow: simulate an agent. Announce →
// open pending WS → sign nonce → admin accept (with repo creds) →
// expect 'enrolled' message with bearer.
func TestPendingWSNonceSignAcceptFlow(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
pub, priv, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("ed25519: %v", err)
}
// Pre-seed pending row directly (bypass the announce HTTP path
// since announce coverage lives in announce_test.go).
pendingID := ulid.Make().String()
if err := st.CreatePendingHost(context.Background(), &store.PendingHost{
ID: pendingID, Hostname: "ann-host", OS: "linux", Arch: "amd64",
AgentVersion: "1.0", ResticVersion: "0.17",
PublicKey: pub, Fingerprint: store.FingerprintForKey(pub),
AnnouncedFromIP: "127.0.0.1",
FirstSeenAt: time.Now().UTC(),
LastSeenAt: time.Now().UTC(),
ExpiresAt: time.Now().UTC().Add(time.Hour),
}); err != nil {
t.Fatalf("seed: %v", err)
}
// Open the pending WS.
wsURL := "ws" + strings.TrimPrefix(ts.URL, "http") + "/ws/agent/pending?pending_id=" + pendingID
dialCtx, dialCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer dialCancel()
c, res, err := websocket.Dial(dialCtx, wsURL, nil)
if err != nil {
t.Fatalf("dial pending ws: %v", err)
}
if res != nil && res.Body != nil {
_ = res.Body.Close()
}
t.Cleanup(func() { _ = c.CloseNow() })
// Read nonce.
rctx, rcancel := context.WithTimeout(context.Background(), 3*time.Second)
_, raw, err := c.Read(rctx)
rcancel()
if err != nil {
t.Fatalf("read nonce: %v", err)
}
var nm nonceMessage
if err := json.Unmarshal(raw, &nm); err != nil {
t.Fatalf("unmarshal nonce: %v", err)
}
nonce, _ := base64.StdEncoding.DecodeString(nm.Nonce)
// Sign + reply.
sig := ed25519.Sign(priv, nonce)
reply, _ := json.Marshal(signedNonceMessage{
Type: "signed_nonce", Signature: base64.StdEncoding.EncodeToString(sig),
})
wctx, wcancel := context.WithTimeout(context.Background(), 3*time.Second)
if err := c.Write(wctx, websocket.MessageText, reply); err != nil {
wcancel()
t.Fatalf("write signed nonce: %v", err)
}
wcancel()
// Wait briefly so the server's hub.register completes before we
// fire accept.
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
if srv.pendingHub.get(pendingID) != nil {
break
}
time.Sleep(20 * time.Millisecond)
}
// Admin POST accept (form-encoded, with cookie).
cookie := loginAsAdmin(t, st)
form := url.Values{
"repo_url": {"rest:http://r/x"},
"repo_username": {"u"},
"repo_password": {"p"},
}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/api/pending-hosts/"+pendingID+"/accept",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
resAccept, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("accept: %v", err)
}
defer resAccept.Body.Close()
if resAccept.StatusCode != stdhttp.StatusOK {
t.Fatalf("accept status: %d", resAccept.StatusCode)
}
// Expect 'enrolled' message + close.
rctx2, rcancel2 := context.WithTimeout(context.Background(), 3*time.Second)
_, raw2, err := c.Read(rctx2)
rcancel2()
if err != nil {
t.Fatalf("read enrolled: %v", err)
}
var em enrolledMessage
if err := json.Unmarshal(raw2, &em); err != nil {
t.Fatalf("unmarshal enrolled: %v", err)
}
if em.Type != "enrolled" || em.Bearer == "" || em.HostID == "" {
t.Fatalf("enrolled payload bad: %+v", em)
}
// Pending row should be gone.
if _, err := st.GetPendingHost(context.Background(), pendingID); err == nil {
t.Error("pending row should have been deleted on accept")
}
// Real host row should exist.
if _, err := st.GetHost(context.Background(), em.HostID); err != nil {
t.Errorf("host row not created: %v", err)
}
}
// TestPendingWSBadSignatureClosed: server closes the WS when the
// signature does not verify against the row's public key.
func TestPendingWSBadSignatureClosed(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
_ = srv
// Two distinct keypairs — agent signs with the wrong one.
pubReal, _, _ := ed25519.GenerateKey(rand.Reader)
_, privAttacker, _ := ed25519.GenerateKey(rand.Reader)
pendingID := ulid.Make().String()
if err := st.CreatePendingHost(context.Background(), &store.PendingHost{
ID: pendingID, Hostname: "bad-host", OS: "linux", Arch: "amd64",
PublicKey: pubReal, Fingerprint: store.FingerprintForKey(pubReal),
AnnouncedFromIP: "127.0.0.1",
FirstSeenAt: time.Now().UTC(),
LastSeenAt: time.Now().UTC(),
ExpiresAt: time.Now().UTC().Add(time.Hour),
}); err != nil {
t.Fatalf("seed: %v", err)
}
wsURL := "ws" + strings.TrimPrefix(ts.URL, "http") + "/ws/agent/pending?pending_id=" + pendingID
dialCtx, dialCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer dialCancel()
c, res, err := websocket.Dial(dialCtx, wsURL, nil)
if err != nil {
t.Fatalf("dial: %v", err)
}
if res != nil && res.Body != nil {
_ = res.Body.Close()
}
defer func() { _ = c.CloseNow() }()
// Read nonce.
rctx, rcancel := context.WithTimeout(context.Background(), 3*time.Second)
_, raw, _ := c.Read(rctx)
rcancel()
var nm nonceMessage
_ = json.Unmarshal(raw, &nm)
nonce, _ := base64.StdEncoding.DecodeString(nm.Nonce)
// Sign with the wrong key.
sig := ed25519.Sign(privAttacker, nonce)
reply, _ := json.Marshal(signedNonceMessage{
Type: "signed_nonce", Signature: base64.StdEncoding.EncodeToString(sig),
})
wctx, wcancel := context.WithTimeout(context.Background(), 3*time.Second)
_ = c.Write(wctx, websocket.MessageText, reply)
wcancel()
// Server should close. Read until error.
rctx2, rcancel2 := context.WithTimeout(context.Background(), 3*time.Second)
_, _, err = c.Read(rctx2)
rcancel2()
if err == nil {
t.Fatal("expected ws to close on bad signature")
}
}
+53 -3
View File
@@ -9,6 +9,7 @@ package http
import (
"errors"
stdhttp "net/http"
"strconv"
"github.com/go-chi/chi/v5"
@@ -16,6 +17,34 @@ import (
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// parseBandwidthOverride pulls optional bandwidth_up_kbps /
// bandwidth_down_kbps from the request (form or query). Returns nil
// for any field absent or empty; an explicit "0" produces a non-nil
// pointer to 0 — i.e., "no cap for this run, even if the host has
// one set." Non-integers / negatives are rejected with an error.
func parseBandwidthOverride(r *stdhttp.Request) (up *int, down *int, err error) {
parse := func(name string) (*int, error) {
v := r.FormValue(name)
if v == "" {
return nil, nil
}
n, perr := strconv.Atoi(v)
if perr != nil {
return nil, errors.New(name + " must be an integer")
}
if n < 0 {
return nil, errors.New(name + " must be >= 0")
}
return &n, nil
}
up, err = parse("bandwidth_up_kbps")
if err != nil {
return nil, nil, err
}
down, err = parse("bandwidth_down_kbps")
return up, down, err
}
func (s *Server) handleRunSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
@@ -40,13 +69,34 @@ func (s *Server) handleRunSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Reque
return
}
// Optional per-run bandwidth override. Disclosed in the UI under a
// <details> "Limit bandwidth for this run" affordance; absent on
// the wire (and from JSON callers that don't supply it) means
// "fall back to the host's standing caps."
upOverride, downOverride, perr := parseBandwidthOverride(r)
if perr != nil {
s.runGroupError(w, r, stdhttp.StatusBadRequest, "invalid_value", perr.Error())
return
}
// Resolve hooks (group → host default → empty). Best-effort host
// lookup; failure proceeds with no hook rather than block the run.
var preHook, postHook string
if host, herr := s.deps.Store.GetHost(r.Context(), hostID); herr == nil {
preHook, postHook = s.resolveBackupHooks(host, g)
}
// Backup invocations don't consume RetentionPolicy — that lives on
// forget. Sending the resolved set here would just be dead weight.
res, status, code, msg := s.dispatchJobWithPayload(r.Context(), user, hostID, api.JobBackup,
api.CommandRunPayload{
Includes: g.Includes,
Excludes: g.Excludes,
Tag: g.Name,
Includes: g.Includes,
Excludes: g.Excludes,
Tag: g.Name,
BandwidthUpKBps: upOverride,
BandwidthDownKBps: downOverride,
PreHook: preHook,
PostHook: postHook,
})
if code != "" {
s.runGroupError(w, r, status, code, msg)
@@ -0,0 +1,133 @@
// run_group_bandwidth_test.go — covers the per-job bandwidth override
// that operators can set via the Run-now form's "Limit bandwidth for
// this run" disclosure (P2R-13b).
package http
import (
"context"
"encoding/json"
stdhttp "net/http"
"net/url"
"strings"
"testing"
"time"
"github.com/coder/websocket"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// TestRunSourceGroupBandwidthOverride: connect a fake agent, POST the
// per-group Run-now endpoint with bandwidth_up_kbps=512, assert the
// dispatched command.run carries it.
func TestRunSourceGroupBandwidthOverride(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServer(t)
hostID, token := enrolHostForWS(t, srv, st, "bw-host")
// Pre-seed an init job so auto-init doesn't fire on hello and
// pollute our envelope sequence.
if err := st.CreateJob(context.Background(), store.Job{
ID: ulid.Make().String(), HostID: hostID, Kind: "init",
ActorKind: "system", CreatedAt: time.Now().UTC(),
}); err != nil {
t.Fatalf("seed init: %v", err)
}
gid := ulid.Make().String()
if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
ID: gid, HostID: hostID, Name: "etc", Includes: []string{"/etc"},
}); err != nil {
t.Fatalf("group: %v", err)
}
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "bw-host")
// Drain on-hello burst before issuing the run-now.
_ = drainUntil(t, c, api.MsgScheduleSet)
cookie := loginAsAdmin(t, st)
form := url.Values{
"bandwidth_up_kbps": {"512"},
"bandwidth_down_kbps": {"256"},
}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/source-groups/"+gid+"/run",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("Accept", "application/json")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
res.Body.Close()
if res.StatusCode != stdhttp.StatusAccepted {
t.Fatalf("status: got %d, want 202", res.StatusCode)
}
// Read the dispatched command.run; assert overrides are present.
deadline := time.Now().Add(3 * time.Second)
for time.Now().Before(deadline) {
ctx, cancel := context.WithTimeout(context.Background(), 800*time.Millisecond)
mt, raw, rerr := c.Read(ctx)
cancel()
if rerr != nil {
break
}
if mt != websocket.MessageText {
continue
}
var env api.Envelope
_ = json.Unmarshal(raw, &env)
if env.Type != api.MsgCommandRun {
continue
}
var p api.CommandRunPayload
if err := env.UnmarshalPayload(&p); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if p.Kind != api.JobBackup {
continue
}
if p.BandwidthUpKBps == nil || *p.BandwidthUpKBps != 512 {
t.Fatalf("BandwidthUpKBps: got %v, want 512", p.BandwidthUpKBps)
}
if p.BandwidthDownKBps == nil || *p.BandwidthDownKBps != 256 {
t.Fatalf("BandwidthDownKBps: got %v, want 256", p.BandwidthDownKBps)
}
return
}
t.Fatal("timed out waiting for command.run with bandwidth override")
}
// TestRunSourceGroupBandwidthRejectsNegative: invalid value → 400.
func TestRunSourceGroupBandwidthRejectsNegative(t *testing.T) {
t.Parallel()
_, url2, st := newTestServerWithHub(t)
cookie := loginAsAdmin(t, st)
hostID := makeHost(t, st, "bw-rej-host")
gid := ulid.Make().String()
if err := st.CreateSourceGroup(context.Background(), &store.SourceGroup{
ID: gid, HostID: hostID, Name: "etc", Includes: []string{"/etc"},
}); err != nil {
t.Fatalf("group: %v", err)
}
form := url.Values{"bandwidth_up_kbps": {"-1"}}
req, _ := stdhttp.NewRequest("POST",
url2+"/hosts/"+hostID+"/source-groups/"+gid+"/run",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("Accept", "application/json")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusBadRequest {
t.Fatalf("status: got %d, want 400", res.StatusCode)
}
}
@@ -0,0 +1,48 @@
// schedule_nextrun_test.go — pin the cron parser → next-run shape we
// rely on for the dashboard host row + schedules tab (P2R-14).
package http
import (
"testing"
"time"
)
func TestCronParserNext(t *testing.T) {
cases := []struct {
name string
expr string
from time.Time
want time.Time
}{
{
name: "daily at 03:00",
expr: "0 3 * * *",
from: time.Date(2026, 5, 4, 1, 0, 0, 0, time.UTC),
want: time.Date(2026, 5, 4, 3, 0, 0, 0, time.UTC),
},
{
name: "daily at 03:00 (after time of day → next day)",
expr: "0 3 * * *",
from: time.Date(2026, 5, 4, 5, 0, 0, 0, time.UTC),
want: time.Date(2026, 5, 5, 3, 0, 0, 0, time.UTC),
},
{
name: "every 15 minutes",
expr: "*/15 * * * *",
from: time.Date(2026, 5, 4, 1, 7, 0, 0, time.UTC),
want: time.Date(2026, 5, 4, 1, 15, 0, 0, time.UTC),
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
parsed, err := cronParser.Parse(c.expr)
if err != nil {
t.Fatalf("parse %q: %v", c.expr, err)
}
got := parsed.Next(c.from)
if !got.Equal(c.want) {
t.Fatalf("Next(%v) = %v, want %v", c.from, got, c.want)
}
})
}
}
+14
View File
@@ -192,6 +192,18 @@ func (s *Server) dispatchBackupForGroupCore(ctx context.Context, conn *ws.Conn,
"schedule_id", scheduleID, "group", g.Name, "err", err)
return "", err
}
// Resolve pre/post hooks (group → host default → empty) so they
// ride on the backup payload as plaintext. The host lookup is
// cheap; failure here is non-fatal (we proceed without hooks
// rather than block the backup).
var preHook, postHook string
if host, herr := s.deps.Store.GetHost(ctx, hostID); herr == nil {
preHook, postHook = s.resolveBackupHooks(host, g)
} else {
slog.Warn("schedule.fire: load host for hook resolve",
"host_id", hostID, "err", herr)
}
// Backup ignores RetentionPolicy — the forget cadence lives on
// host_repo_maintenance and is driven by the server-side ticker
// (P2R-06). Don't ship the field on backup dispatches.
@@ -201,6 +213,8 @@ func (s *Server) dispatchBackupForGroupCore(ctx context.Context, conn *ws.Conn,
Includes: g.Includes,
Excludes: g.Excludes,
Tag: g.Name,
PreHook: preHook,
PostHook: postHook,
})
if err != nil {
slog.Warn("schedule.fire: marshal command.run",
+30 -1
View File
@@ -49,6 +49,15 @@ type Server struct {
// sync.Mutex; checked-and-locked atomically via drainLocksMu.
drainLocksMu sync.Mutex
drainLocks map[string]*sync.Mutex
// announceRL is the per-source-IP token-bucket guarding
// POST /api/agents/announce (P2-18). One process-local map.
announceRL *announceLimiter
// pendingHub holds live /ws/agent/pending sockets keyed by
// pending_id so the accept/reject handlers can push the bearer
// or close cleanly (P2-18b).
pendingHub *pendingHub
}
// New builds a configured but not-yet-started server.
@@ -67,7 +76,12 @@ func New(deps Deps) *Server {
w.WriteHeader(stdhttp.StatusNoContent)
})
s := &Server{deps: deps, drainLocks: make(map[string]*sync.Mutex)}
s := &Server{
deps: deps,
drainLocks: make(map[string]*sync.Mutex),
announceRL: newAnnounceLimiter(),
pendingHub: newPendingHub(),
}
s.routes(r)
s.srv = &stdhttp.Server{
@@ -92,6 +106,15 @@ func (s *Server) routes(r chi.Router) {
// Agent enrollment (open endpoint — token is the credential).
r.Post("/agents/enroll", s.handleAgentEnroll)
// Announce-and-approve enrolment (open endpoint — fingerprint
// comparison in the UI is the gate). Per-IP rate-limited and
// globally capped (P2-18).
r.Post("/agents/announce", s.handleAnnounce)
// Pending host management — admin-only (gated inside the handler).
r.Post("/pending-hosts/{id}/accept", s.handleAcceptPendingHost)
r.Post("/pending-hosts/{id}/reject", s.handleRejectPendingHost)
// Operator → server (authenticated). Spec.md §6.1's
// /hosts/{id}/enrollment-token (regenerate) lands when the
// host page can call it; for now just the create endpoint.
@@ -172,6 +195,10 @@ func (s *Server) routes(r chi.Router) {
r.Post("/hosts/{id}/run-backup", s.handleUIRunBackupGone)
r.Post("/hosts/{id}/init-repo", s.handleUIInitRepoGone)
// Pending-host WebSocket (announce-and-approve, P2-18b). Mounted
// before /ws/agent so the more-specific route matches first.
r.Get("/ws/agent/pending", s.handlePendingWS)
// Agent ↔ server WebSocket. Bearer-authenticated inside the handler.
if s.deps.Hub != nil {
r.Mount("/ws/agent", ws.AgentHandler(ws.HandlerDeps{
@@ -228,6 +255,8 @@ func (s *Server) routes(r chi.Router) {
r.Post("/hosts/{id}/repo/credentials", s.handleUIRepoCredentialsSave)
r.Post("/hosts/{id}/repo/bandwidth", s.handleUIRepoBandwidthSave)
r.Post("/hosts/{id}/repo/maintenance", s.handleUIRepoMaintenanceSave)
r.Post("/hosts/{id}/repo/reinit", s.handleUIRepoReinit)
r.Post("/hosts/{id}/repo/hooks", s.handleUIRepoHooksSave)
// Admin credentials form (separate slot for prune-capable user).
r.Post("/hosts/{id}/admin-credentials", s.handleUIAdminCredentialsSave)
r.Post("/hosts/{id}/admin-credentials/delete", s.handleUIAdminCredentialsDelete)
+42 -6
View File
@@ -109,9 +109,10 @@ func (s *Server) version() string {
// dashboardPage is the data the dashboard template renders against.
type dashboardPage struct {
Hosts []dashboardHostRow
HostCount int
Summary store.FleetSummary
Hosts []dashboardHostRow
HostCount int
Summary store.FleetSummary
PendingHosts []store.PendingHost // announce-and-approve queue (P2-18d)
}
// dashboardHostRow carries a host plus the per-row Run-now decision
@@ -124,6 +125,9 @@ type dashboardHostRow struct {
// match — in that case the row shows "Open →" instead of a Run-now
// button (the operator picks per-group from the host detail).
RunAllScheduleID string
// NextRun is the next-fire time of RunAllScheduleID (when set),
// computed server-side from its cron. nil otherwise.
NextRun *time.Time
}
// pickRunAllSchedule returns the ID of the single schedule whose
@@ -203,15 +207,32 @@ func (s *Server) handleUIDashboard(w stdhttp.ResponseWriter, r *stdhttp.Request)
slog.Warn("ui dashboard: list schedules", "host_id", h.ID, "err", serr)
}
row.RunAllScheduleID = pickRunAllSchedule(scheds, groups)
if row.RunAllScheduleID != "" {
for _, sc := range scheds {
if sc.ID == row.RunAllScheduleID {
if parsed, perr := cronParser.Parse(sc.CronExpr); perr == nil {
n := parsed.Next(time.Now().UTC()).UTC()
row.NextRun = &n
}
break
}
}
}
rows = append(rows, row)
}
pending, perr := s.deps.Store.ListPendingHosts(r.Context(), time.Now().UTC())
if perr != nil {
slog.Warn("ui dashboard: list pending hosts", "err", perr)
}
view := s.baseView(u)
view.OpenAlerts = summary.OpenAlerts
view.Page = dashboardPage{
Hosts: rows,
HostCount: len(hosts),
Summary: summary,
Hosts: rows,
HostCount: len(hosts),
Summary: summary,
PendingHosts: pending,
}
if err := s.deps.UI.Render(w, "dashboard", view); err != nil {
slog.Error("ui: render dashboard", "err", err)
@@ -485,6 +506,12 @@ type hostChromeData struct {
SourceGroupCount int
ScheduleCount int
ScheduleVersion int64 // host_schedule_version (latest desired)
// Auto-init status surfaced from the latest 'init' job.
// InitStatus is "succeeded" | "failed" | "running" | "queued" | "" (never run).
InitStatus string
InitAt *time.Time // started_at if non-nil else created_at
InitJobID string
}
// loadHostChrome fetches the per-tab counts that every host-detail tab
@@ -506,6 +533,15 @@ func (s *Server) loadHostChrome(r *stdhttp.Request, host store.Host, subtab, cru
if v, err := s.deps.Store.GetHostScheduleVersion(r.Context(), host.ID); err == nil {
d.ScheduleVersion = v
}
if j, err := s.deps.Store.LatestJobByKind(r.Context(), host.ID, "init"); err == nil && j != nil {
d.InitStatus = j.Status
d.InitJobID = j.ID
t := j.CreatedAt
if j.StartedAt != nil {
t = *j.StartedAt
}
d.InitAt = &t
}
return d
}
+9
View File
@@ -79,11 +79,16 @@ type hostRepoPage struct {
UntaggedSnapshots int
GroupNames []string // ordered, for stable rendering
// Host-default hooks (decrypted plaintext for round-trip in form).
HostPreHook string
HostPostHook string
// Inline form-error banners. Empty when no error for that section.
CredentialsError string
AdminCredsError string
BandwidthError string
MaintenanceError string
HooksError string
// Highlight which form was just submitted, for the success-state
// border (subtle UX nicety; empty = no recent save).
@@ -179,6 +184,10 @@ func (s *Server) loadHostRepoPage(r *stdhttp.Request, host store.Host) (*hostRep
p.BandwidthDown = strconv.Itoa(*host.BandwidthDownKBps)
}
// Host-default hooks (decrypt for round-trip in the form).
p.HostPreHook = s.decryptHookOrFallback("", host.PreHookDefault, host.ID, "pre")
p.HostPostHook = s.decryptHookOrFallback("", host.PostHookDefault, host.ID, "post")
// Maintenance — auto-seed defaults if missing.
m, err := s.deps.Store.GetRepoMaintenance(r.Context(), host.ID)
if err != nil && errors.Is(err, store.ErrNotFound) {
+50
View File
@@ -0,0 +1,50 @@
// ui_repo_hooks.go — host-default pre/post hook editor on the Repo
// page (P2R-12). Per-source-group hooks live on the source group
// edit form; this surface lets the operator set defaults that apply
// to every group that doesn't override them.
//
// POST /hosts/{id}/repo/hooks takes pre_hook + post_hook form
// fields; encrypts each with the AEAD key (per-host AD bytes); and
// persists the (possibly empty) blobs via store.SetHostHooks.
package http
import (
"log/slog"
stdhttp "net/http"
)
func (s *Server) handleUIRepoHooksSave(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
host, ok := s.loadHostForUI(w, r)
if !ok {
return
}
if err := r.ParseForm(); err != nil {
stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest)
return
}
pre := r.PostForm.Get("pre_hook")
post := r.PostForm.Get("post_hook")
preEnc, err := s.EncryptHookForHost(host.ID, "pre", pre)
if err != nil {
slog.Error("ui repo hooks: encrypt pre", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
postEnc, err := s.EncryptHookForHost(host.ID, "post", post)
if err != nil {
slog.Error("ui repo hooks: encrypt post", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
if err := s.deps.Store.SetHostHooks(r.Context(), host.ID, preEnc, postEnc); err != nil {
slog.Error("ui repo hooks: persist", "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
stdhttp.Redirect(w, r, "/hosts/"+host.ID+"/repo?saved=hooks", stdhttp.StatusSeeOther)
}
+120
View File
@@ -0,0 +1,120 @@
// ui_repo_reinit.go — danger-zone re-init handler. Dispatches a fresh
// `restic init` job after the operator types the host name to confirm.
// Restic itself refuses to overwrite an existing repo (its init is
// effectively idempotent — see the runner's "config file already
// exists" sniff in restic.RunInit), so this is *not* a destructive
// data wipe; it's a "try again from scratch" affordance for the
// operator. If the rest-server bucket needs clearing the operator
// has to do that out-of-band; the job log will say so.
//
// Audit-logged with action='host.repo_reinit' so the trail records
// who triggered the wipe attempt and when.
package http
import (
"context"
"errors"
"log/slog"
stdhttp "net/http"
"strings"
"time"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
func (s *Server) handleUIRepoReinit(w stdhttp.ResponseWriter, r *stdhttp.Request) {
u := s.requireUIUser(w, r)
if u == nil {
return
}
host, ok := s.loadHostForUI(w, r)
if !ok {
return
}
if err := r.ParseForm(); err != nil {
stdhttp.Error(w, "bad request", stdhttp.StatusBadRequest)
return
}
confirm := strings.TrimSpace(r.PostForm.Get("confirm_hostname"))
if confirm != host.Name {
// We don't have a dedicated re-init banner field; surface via
// the existing CredentialsError slot — it sits adjacent to the
// danger zone visually so the operator's eye lands on it.
s.renderRepoPage(w, r, u, host,
"Re-init aborted — typed hostname did not match.", "", "", "")
return
}
if !s.deps.Hub.Connected(host.ID) {
s.renderRepoPage(w, r, u, host,
"Host is offline — bring the agent back up before re-initializing.",
"", "", "")
return
}
// Ensure the host has creds bound; otherwise restic init can't
// connect to the repo.
if _, err := s.deps.Store.GetHostCredentials(r.Context(), host.ID, store.CredKindRepo); err != nil {
if errors.Is(err, store.ErrNotFound) {
s.renderRepoPage(w, r, u, host,
"Bind repo credentials before re-initializing.",
"", "", "")
return
}
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := s.deps.Store.CreateJob(r.Context(), store.Job{
ID: jobID,
HostID: host.ID,
Kind: string(api.JobInit),
ActorKind: "user",
ActorID: &u.ID,
CreatedAt: now,
}); err != nil {
slog.Error("repo reinit: persist job", "host_id", host.ID, "err", err)
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
JobID: jobID,
Kind: api.JobInit,
})
if err != nil {
stdhttp.Error(w, "internal", stdhttp.StatusInternalServerError)
return
}
sendCtx, cancel := context.WithTimeout(r.Context(), 5*time.Second)
defer cancel()
if err := s.deps.Hub.Send(sendCtx, host.ID, env); err != nil {
slog.Warn("repo reinit: ws send failed", "host_id", host.ID, "err", err)
s.renderRepoPage(w, r, u, host,
"Failed to deliver the init job to the agent — try again.",
"", "", "")
return
}
uid := u.ID
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &uid,
Actor: "user",
Action: "host.repo_reinit",
TargetKind: ptr("host"),
TargetID: &host.ID,
TS: now,
})
// HTMX redirect → live job log. JSON callers get a 202.
if wantsHTML(r) {
w.Header().Set("HX-Redirect", "/jobs/"+jobID)
w.WriteHeader(stdhttp.StatusNoContent)
return
}
stdhttp.Redirect(w, r, "/jobs/"+jobID, stdhttp.StatusSeeOther)
}
+212
View File
@@ -0,0 +1,212 @@
// ui_repo_reinit_test.go — covers the danger-zone re-init handler:
// hostname-confirm gate + offline guard + missing-creds guard.
package http
import (
"context"
stdhttp "net/http"
"net/http/httptest"
"net/url"
"path/filepath"
"strings"
"testing"
"time"
"github.com/coder/websocket"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ui"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// rawTestServerWithUI is the rawTestServer twin that also wires the
// UI renderer in, returning the raw httptest server so callers can
// dial /ws/agent. The UI is needed for the repo-reinit handler's
// error re-render path.
func rawTestServerWithUI(t *testing.T) (*Server, *httptest.Server, *store.Store) {
t.Helper()
dir := t.TempDir()
st, err := store.Open(context.Background(), filepath.Join(dir, "rm.db"))
if err != nil {
t.Fatalf("store: %v", err)
}
t.Cleanup(func() { _ = st.Close() })
keyPath := filepath.Join(dir, "secret.key")
_ = crypto.GenerateKeyFile(keyPath)
key, _ := crypto.LoadKeyFromFile(keyPath)
aead, _ := crypto.NewAEAD(key)
renderer, err := ui.New()
if err != nil {
t.Fatalf("ui.New: %v", err)
}
deps := Deps{
Cfg: config.Config{Listen: ":0", DataDir: dir, SecretKeyFile: keyPath},
Store: st,
AEAD: aead,
Hub: ws.NewHub(),
UI: renderer,
}
srv := New(deps)
ts := httptest.NewServer(srv.srv.Handler)
t.Cleanup(ts.Close)
return srv, ts, st
}
// enrolHostForUI is the enrolHostForWS twin for tests that use the
// UI-enabled rawTestServerWithUI.
func enrolHostForUI(t *testing.T, _ *Server, st *store.Store, name string) (hostID, token string) {
t.Helper()
hostID = ulid.Make().String()
token, _ = auth.NewToken()
if err := st.CreateHost(context.Background(), store.Host{
ID: hostID, Name: name, OS: "linux", Arch: "amd64",
EnrolledAt: time.Now().UTC(),
}, auth.HashToken(token), ""); err != nil {
t.Fatalf("create host: %v", err)
}
return hostID, token
}
// TestRepoReinitWrongHostnameRejected: typing a different name keeps
// the page on the repo screen with an error banner; no init job is
// dispatched.
func TestRepoReinitWrongHostnameRejected(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, token := enrolHostForUI(t, srv, st, "reinit-host")
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "reinit-host")
_ = drainUntil(t, c, api.MsgScheduleSet)
cookie := loginAsAdmin(t, st)
form := url.Values{"confirm_hostname": {"WRONG-NAME"}}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/repo/reinit",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusUnprocessableEntity {
t.Fatalf("status: got %d, want 422 (re-rendered page with banner)", res.StatusCode)
}
// No init job should appear in the queue beyond the one auto-init
// pushed on hello (which fires when no init has run yet — let's
// just make sure no new "user" actor init was created).
var n int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'init' AND actor_kind = 'user'`,
hostID).Scan(&n); err != nil {
t.Fatalf("count: %v", err)
}
if n != 0 {
t.Fatalf("user-actor init jobs: got %d, want 0 (gate was bypassed)", n)
}
}
// TestRepoReinitDispatchesOnMatch: typing the right hostname dispatches
// a new init job + audit row.
func TestRepoReinitDispatchesOnMatch(t *testing.T) {
t.Parallel()
srv, ts, st := rawTestServerWithUI(t)
hostID, token := enrolHostForUI(t, srv, st, "reinit-ok-host")
// Bind repo creds — re-init guard requires them.
enc, err := srv.encryptRepoCreds(repoCredsBlob{
RepoURL: "rest:http://r/x", RepoUsername: "u", RepoPassword: "p",
}, []byte("host:"+hostID))
if err != nil {
t.Fatalf("encrypt: %v", err)
}
if err := st.SetHostCredentials(context.Background(), hostID, store.CredKindRepo, enc); err != nil {
t.Fatalf("set creds: %v", err)
}
// Pre-seed a successful init so auto-init doesn't fire on hello.
preID := ulid.Make().String()
if err := st.CreateJob(context.Background(), store.Job{
ID: preID, HostID: hostID, Kind: "init",
ActorKind: "system", CreatedAt: time.Now().UTC(),
}); err != nil {
t.Fatalf("seed init: %v", err)
}
if err := st.MarkJobFinished(context.Background(), preID, "succeeded", 0, nil, "", time.Now().UTC()); err != nil {
t.Fatalf("mark seed init: %v", err)
}
c := agentDial(t, srv, ts, hostID, token)
sendHello(t, c, "reinit-ok-host")
_ = drainUntil(t, c, api.MsgScheduleSet)
cookie := loginAsAdmin(t, st)
form := url.Values{"confirm_hostname": {"reinit-ok-host"}}
req, _ := stdhttp.NewRequest("POST",
ts.URL+"/hosts/"+hostID+"/repo/reinit",
strings.NewReader(form.Encode()))
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("HX-Request", "true") // get HX-Redirect path
req.AddCookie(cookie)
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
t.Fatalf("do: %v", err)
}
defer res.Body.Close()
if res.StatusCode != stdhttp.StatusNoContent {
t.Fatalf("status: got %d, want 204", res.StatusCode)
}
if res.Header.Get("HX-Redirect") == "" {
t.Fatal("expected HX-Redirect header")
}
// Read the dispatched command.run; assert it's an init job.
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
mt, raw, rerr := c.Read(ctx)
cancel()
if rerr != nil {
break
}
if mt != websocket.MessageText {
continue
}
// Quick parse — we only care about the type. Avoid full
// envelope unmarshal here because the surrounding loop is just
// looking for the command.run we triggered.
if !strings.Contains(string(raw), `"command.run"`) {
continue
}
// Verify a user-actor init job row was created.
var n int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM jobs WHERE host_id = ? AND kind = 'init' AND actor_kind = 'user'`,
hostID).Scan(&n); err != nil {
t.Fatalf("count: %v", err)
}
if n != 1 {
t.Fatalf("user-actor init jobs: got %d, want 1", n)
}
// Audit row.
var na int
if err := st.DB().QueryRow(
`SELECT COUNT(*) FROM audit_log WHERE action = 'host.repo_reinit' AND target_id = ?`,
hostID).Scan(&na); err != nil {
t.Fatalf("audit count: %v", err)
}
if na != 1 {
t.Fatalf("audit rows: got %d, want 1", na)
}
return
}
t.Fatal("timed out waiting for command.run after re-init dispatch")
}
+36 -2
View File
@@ -25,10 +25,22 @@ import (
// the template doesn't need to do per-row store lookups.
type hostSchedulesPage struct {
hostChromeData
Schedules []store.Schedule
Schedules []scheduleRow
GroupNames map[string]string
}
// scheduleRow bundles a schedule with its derived "next run" + "last
// run" data. The Schedule is embedded so existing template field
// references (`$sc.ID`, `$sc.CronExpr`, etc) keep working when we
// switch the iterating slice from []store.Schedule to []scheduleRow.
type scheduleRow struct {
store.Schedule
NextRun *time.Time
LastRun *time.Time
LastJobID string
LastStatus string // succeeded|failed|running|queued — empty when never fired
}
// scheduleFormData mirrors the form's wire shape — strings + bool for
// round-trip on validation re-render.
type scheduleFormData struct {
@@ -74,6 +86,28 @@ func (s *Server) handleUISchedulesList(w stdhttp.ResponseWriter, r *stdhttp.Requ
names[g.ID] = g.Name
}
now := time.Now().UTC()
rows := make([]scheduleRow, 0, len(scheds))
for _, sc := range scheds {
row := scheduleRow{Schedule: sc}
if sc.Enabled {
if sched, err := cronParser.Parse(sc.CronExpr); err == nil {
next := sched.Next(now).UTC()
row.NextRun = &next
}
}
if j, jerr := s.deps.Store.LatestJobBySchedule(r.Context(), host.ID, sc.ID); jerr == nil && j != nil {
t := j.CreatedAt
if j.StartedAt != nil {
t = *j.StartedAt
}
row.LastRun = &t
row.LastJobID = j.ID
row.LastStatus = j.Status
}
rows = append(rows, row)
}
chrome := s.loadHostChrome(r, *host, "schedules", "schedules")
chrome.ScheduleCount = len(scheds)
chrome.SourceGroupCount = len(groups)
@@ -82,7 +116,7 @@ func (s *Server) handleUISchedulesList(w stdhttp.ResponseWriter, r *stdhttp.Requ
view.Title = host.Name + " schedules · restic-manager"
view.Page = hostSchedulesPage{
hostChromeData: chrome,
Schedules: scheds,
Schedules: rows,
GroupNames: names,
}
if err := s.deps.UI.Render(w, "host_schedules", view); err != nil {
+26 -1
View File
@@ -56,6 +56,8 @@ type sourceFormData struct {
RetryMax int
RetryBackoffSeconds int
ConflictDimension string
PreHook string // plaintext; encrypted on save
PostHook string
}
// sourceGroupEditPage backs both the new and edit form views.
@@ -173,11 +175,14 @@ func (s *Server) handleUISourceGroupEditGet(w stdhttp.ResponseWriter, r *stdhttp
}
view := s.baseView(u)
view.Title = g.Name + " · " + host.Name + " · restic-manager"
form := formFromGroup(*g)
form.PreHook = s.decryptHookOrFallback(g.PreHook, "", host.ID, "pre")
form.PostHook = s.decryptHookOrFallback(g.PostHook, "", host.ID, "post")
view.Page = sourceGroupEditPage{
hostChromeData: s.loadHostChrome(r, *host, "sources", g.Name),
IsNew: false,
GroupID: gid,
Form: formFromGroup(*g),
Form: form,
SaveAction: "/hosts/" + host.ID + "/sources/" + gid + "/edit",
}
if err := s.deps.UI.Render(w, "source_group_edit", view); err != nil {
@@ -253,6 +258,20 @@ func (s *Server) handleUISourceGroupSave(w stdhttp.ResponseWriter, r *stdhttp.Re
return
}
// Encrypt hook bodies (empty → empty stored, clearing the column).
preEnc, err := s.EncryptHookForGroup(host.ID, "pre", form.PreHook)
if err != nil {
slog.Error("ui sources: encrypt pre_hook", "err", err)
s.renderSourceFormError(w, r, u, host, gid, isNew, form, "Couldn't encrypt pre-hook — see the server log.")
return
}
postEnc, err := s.EncryptHookForGroup(host.ID, "post", form.PostHook)
if err != nil {
slog.Error("ui sources: encrypt post_hook", "err", err)
s.renderSourceFormError(w, r, u, host, gid, isNew, form, "Couldn't encrypt post-hook — see the server log.")
return
}
g := store.SourceGroup{
ID: gid,
HostID: host.ID,
@@ -265,6 +284,8 @@ func (s *Server) handleUISourceGroupSave(w stdhttp.ResponseWriter, r *stdhttp.Re
},
RetryMax: form.RetryMax,
RetryBackoffSeconds: form.RetryBackoffSeconds,
PreHook: preEnc,
PostHook: postEnc,
}
if isNew {
@@ -381,6 +402,8 @@ func parseSourceForm(v map[string][]string) sourceFormData {
KeepYearly: get("keep_yearly"),
RetryMax: rmax,
RetryBackoffSeconds: rback,
PreHook: firstVal(v, "pre_hook"),
PostHook: firstVal(v, "post_hook"),
}
}
@@ -435,5 +458,7 @@ func formFromGroup(g store.SourceGroup) sourceFormData {
RetryMax: g.RetryMax,
RetryBackoffSeconds: g.RetryBackoffSeconds,
ConflictDimension: g.ConflictDimension,
// PreHook/PostHook are decrypted on render (handler-side, not
// here) since formFromGroup has no AEAD reference.
}
}
+106
View File
@@ -0,0 +1,106 @@
// hooks_test.go — covers the pre/post hook columns added in
// migration 0010 (P2R-10): set + reload roundtrip on both
// source_groups and hosts; nil clears the column.
package store
import (
"context"
"path/filepath"
"testing"
"time"
"github.com/oklog/ulid/v2"
)
func newTestStore(t *testing.T) *Store {
t.Helper()
dir := t.TempDir()
st, err := Open(context.Background(), filepath.Join(dir, "rm.db"))
if err != nil {
t.Fatalf("open store: %v", err)
}
t.Cleanup(func() { _ = st.Close() })
return st
}
func makeHostInStore(t *testing.T, st *Store, name string) string {
t.Helper()
id := ulid.Make().String()
if err := st.CreateHost(context.Background(), Host{
ID: id, Name: name, OS: "linux", Arch: "amd64",
EnrolledAt: time.Now().UTC(),
}, "tokenhash-"+id, ""); err != nil {
t.Fatalf("create host: %v", err)
}
return id
}
func TestSourceGroupHooksRoundTrip(t *testing.T) {
t.Parallel()
st := newTestStore(t)
hostID := makeHostInStore(t, st, "hooks-host")
g := &SourceGroup{
ID: ulid.Make().String(), HostID: hostID, Name: "etc",
PreHook: "ENC-PRE",
PostHook: "ENC-POST",
}
if err := st.CreateSourceGroup(context.Background(), g); err != nil {
t.Fatalf("create: %v", err)
}
got, err := st.GetSourceGroup(context.Background(), hostID, g.ID)
if err != nil {
t.Fatalf("get: %v", err)
}
if got.PreHook != "ENC-PRE" {
t.Fatalf("PreHook: got %q, want ENC-PRE", got.PreHook)
}
if got.PostHook != "ENC-POST" {
t.Fatalf("PostHook: got %q, want ENC-POST", got.PostHook)
}
// Update: clear PreHook, change PostHook.
got.PreHook = ""
got.PostHook = "ENC-POST-2"
if err := st.UpdateSourceGroup(context.Background(), got); err != nil {
t.Fatalf("update: %v", err)
}
got, err = st.GetSourceGroup(context.Background(), hostID, g.ID)
if err != nil {
t.Fatalf("get: %v", err)
}
if got.PreHook != "" {
t.Fatalf("PreHook: want empty after clear, got %q", got.PreHook)
}
if got.PostHook != "ENC-POST-2" {
t.Fatalf("PostHook: got %q, want ENC-POST-2", got.PostHook)
}
}
func TestHostHookDefaultsRoundTrip(t *testing.T) {
t.Parallel()
st := newTestStore(t)
hostID := makeHostInStore(t, st, "host-hooks-host")
if err := st.SetHostHooks(context.Background(), hostID, "PRE", "POST"); err != nil {
t.Fatalf("set: %v", err)
}
h, err := st.GetHost(context.Background(), hostID)
if err != nil {
t.Fatalf("get: %v", err)
}
if h.PreHookDefault != "PRE" || h.PostHookDefault != "POST" {
t.Fatalf("after set: pre=%q post=%q", h.PreHookDefault, h.PostHookDefault)
}
// Clear by passing empty strings.
if err := st.SetHostHooks(context.Background(), hostID, "", ""); err != nil {
t.Fatalf("clear: %v", err)
}
h, err = st.GetHost(context.Background(), hostID)
if err != nil {
t.Fatalf("get: %v", err)
}
if h.PreHookDefault != "" || h.PostHookDefault != "" {
t.Fatalf("after clear: pre=%q post=%q (want empty)", h.PreHookDefault, h.PostHookDefault)
}
}
+28 -4
View File
@@ -42,7 +42,8 @@ func (s *Store) LookupHostByAgentToken(ctx context.Context, tokenHash string) (*
enrolled_at, last_seen_at, status, repo_id, tags,
current_job_id, last_backup_at, last_backup_status,
repo_size_bytes, snapshot_count, open_alert_count,
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
pre_hook_default, post_hook_default
FROM hosts WHERE agent_token_hash = ?`,
tokenHash)
return scanHost(row)
@@ -55,7 +56,8 @@ func (s *Store) GetHost(ctx context.Context, id string) (*Host, error) {
enrolled_at, last_seen_at, status, repo_id, tags,
current_job_id, last_backup_at, last_backup_status,
repo_size_bytes, snapshot_count, open_alert_count,
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
pre_hook_default, post_hook_default
FROM hosts WHERE id = ?`, id)
return scanHost(row)
}
@@ -116,7 +118,8 @@ func (s *Store) ListHosts(ctx context.Context) ([]Host, error) {
enrolled_at, last_seen_at, status, repo_id, tags,
current_job_id, last_backup_at, last_backup_status,
repo_size_bytes, snapshot_count, open_alert_count,
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
pre_hook_default, post_hook_default
FROM hosts ORDER BY name`)
if err != nil {
return nil, fmt.Errorf("store: list hosts: %w", err)
@@ -155,13 +158,15 @@ func scanHostRow(s hostScanner) (*Host, error) {
enrolled string
tags string
bwUp, bwDown sql.NullInt64
preHook, postHook sql.NullString
)
err := s.Scan(&h.ID, &h.Name, &h.OS, &h.Arch,
&h.AgentVersion, &h.ResticVersion, &h.ProtocolVersion,
&enrolled, &lastSeen, &h.Status, &repoID, &tags,
&currentJob, &lastBackupAt, &lastBkSt,
&h.RepoSizeBytes, &h.SnapshotCount, &h.OpenAlertCount,
&h.AppliedScheduleVersion, &bwUp, &bwDown)
&h.AppliedScheduleVersion, &bwUp, &bwDown,
&preHook, &postHook)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, ErrNotFound
@@ -210,9 +215,28 @@ func scanHostRow(s hostScanner) (*Host, error) {
v := int(bwDown.Int64)
h.BandwidthDownKBps = &v
}
if preHook.Valid {
h.PreHookDefault = preHook.String
}
if postHook.Valid {
h.PostHookDefault = postHook.String
}
return &h, nil
}
// SetHostHooks replaces the host-wide pre/post hook defaults. Pass
// the empty string to clear that hook. Stored verbatim — caller is
// expected to encrypt before they reach this layer.
func (s *Store) SetHostHooks(ctx context.Context, hostID string, pre, post string) error {
_, err := s.db.ExecContext(ctx,
`UPDATE hosts SET pre_hook_default = ?, post_hook_default = ? WHERE id = ?`,
nullableString(pre), nullableString(post), hostID)
if err != nil {
return fmt.Errorf("store: set host hooks: %w", err)
}
return nil
}
// SetHostBandwidth replaces the host's upload/download caps. Pass nil
// to clear a cap. Caller decides validation; non-positive caps are
// treated as "no cap" by the agent regardless.
+25
View File
@@ -0,0 +1,25 @@
-- 0010_hooks.sql
--
-- P2R-10: pre/post hooks on source groups + host-wide defaults.
--
-- Hook bodies are stored as AEAD ciphertext (existing crypto.AEAD)
-- because operators do put credentials in shell snippets — even
-- though we tell them not to. NULL means "no hook configured".
--
-- Hooks fire only for kind=backup jobs. forget/prune/check/unlock
-- skip them per spec.md §14.3 (P2R-11 enforces this in the agent
-- dispatcher).
--
-- Resolution order at dispatch time:
-- source_group.<phase>_hook (per-group override, AEAD blob)
-- host.<phase>_hook_default (host default, AEAD blob)
-- none → no hook runs
--
-- All four columns are added in-place via ALTER TABLE ADD COLUMN.
-- Per CLAUDE.md the table-rebuild pattern is unsafe with FK cascades.
ALTER TABLE source_groups ADD COLUMN pre_hook BLOB;
ALTER TABLE source_groups ADD COLUMN post_hook BLOB;
ALTER TABLE hosts ADD COLUMN pre_hook_default BLOB;
ALTER TABLE hosts ADD COLUMN post_hook_default BLOB;
@@ -0,0 +1,39 @@
-- 0011_pending_hosts.sql
--
-- P2-18: announce-and-approve enrolment.
--
-- Agents that don't have an enrolment token announce themselves
-- with `POST /api/agents/announce`, persisting one row here. The
-- admin sees them in the dashboard's Pending hosts panel and can
-- accept (mints a real Host row + bearer) or reject (deletes the
-- row + closes the agent's pending WS).
--
-- public_key is the agent's Ed25519 public key (32 raw bytes).
-- fingerprint = "SHA256:" + hex(sha256(public_key)) — printed by
-- the install script on the endpoint terminal so the operator can
-- compare the two before clicking accept. This comparison is the
-- load-bearing security gate for this flow.
--
-- expires_at is set to first_seen_at + 1h on insert; a sweeper
-- goroutine (P2-18b) deletes rows past their expiry. Hostname
-- collisions with existing or other pending rows are *not*
-- prevented at the DB level — multiple announces with the same
-- hostname are flagged in the UI so admin can pick the right one.
CREATE TABLE pending_hosts (
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
os TEXT NOT NULL,
arch TEXT NOT NULL,
agent_version TEXT NOT NULL,
restic_version TEXT NOT NULL,
public_key BLOB NOT NULL, -- 32-byte Ed25519
fingerprint TEXT NOT NULL, -- "SHA256:hex(...)"
announced_from_ip TEXT NOT NULL,
first_seen_at TEXT NOT NULL,
last_seen_at TEXT NOT NULL,
expires_at TEXT NOT NULL
);
CREATE INDEX pending_hosts_expires ON pending_hosts(expires_at);
CREATE INDEX pending_hosts_fingerprint ON pending_hosts(fingerprint);
CREATE INDEX pending_hosts_hostname ON pending_hosts(hostname);
+225
View File
@@ -0,0 +1,225 @@
// pending_hosts.go — store layer for the announce-and-approve
// enrolment queue (P2-18a). Rows live for at most 1h; a sweeper
// deletes anything past expires_at.
package store
import (
"context"
"crypto/sha256"
"database/sql"
"encoding/hex"
"errors"
"fmt"
"time"
)
// PendingHost mirrors the pending_hosts table row, plus the derived
// HostnameCollision flag the API hands back to the agent so the
// install script can warn the operator at announce time.
type PendingHost struct {
ID string
Hostname string
OS string
Arch string
AgentVersion string
ResticVersion string
PublicKey []byte // 32-byte Ed25519
Fingerprint string // "SHA256:hex"
AnnouncedFromIP string
FirstSeenAt time.Time
LastSeenAt time.Time
ExpiresAt time.Time
}
// FingerprintForKey returns the canonical "SHA256:hex" fingerprint
// the operator sees in the UI and on the endpoint terminal.
func FingerprintForKey(pubKey []byte) string {
sum := sha256.Sum256(pubKey)
return "SHA256:" + hex.EncodeToString(sum[:])
}
// CreatePendingHost inserts a new row. Caller has already validated
// the public key length and rate limits.
func (s *Store) CreatePendingHost(ctx context.Context, ph *PendingHost) error {
if ph.ID == "" || len(ph.PublicKey) == 0 {
return errors.New("store: pending host id + public_key required")
}
if ph.Fingerprint == "" {
ph.Fingerprint = FingerprintForKey(ph.PublicKey)
}
now := time.Now().UTC()
if ph.FirstSeenAt.IsZero() {
ph.FirstSeenAt = now
}
ph.LastSeenAt = now
if ph.ExpiresAt.IsZero() {
ph.ExpiresAt = now.Add(time.Hour)
}
_, err := s.db.ExecContext(ctx,
`INSERT INTO pending_hosts (
id, hostname, os, arch, agent_version, restic_version,
public_key, fingerprint, announced_from_ip,
first_seen_at, last_seen_at, expires_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
ph.ID, ph.Hostname, ph.OS, ph.Arch, ph.AgentVersion, ph.ResticVersion,
ph.PublicKey, ph.Fingerprint, ph.AnnouncedFromIP,
ph.FirstSeenAt.Format(time.RFC3339Nano),
ph.LastSeenAt.Format(time.RFC3339Nano),
ph.ExpiresAt.Format(time.RFC3339Nano),
)
if err != nil {
return fmt.Errorf("store: create pending host: %w", err)
}
return nil
}
// TouchPendingHost bumps last_seen_at on the named pending row,
// extending its visibility in the dashboard while the agent's
// pending WS stays open. Does NOT extend expires_at — the 1h cap
// is firm.
func (s *Store) TouchPendingHost(ctx context.Context, id string, when time.Time) error {
_, err := s.db.ExecContext(ctx,
`UPDATE pending_hosts SET last_seen_at = ? WHERE id = ?`,
when.UTC().Format(time.RFC3339Nano), id)
return err
}
// GetPendingHost returns one row by ID. ErrNotFound on miss.
func (s *Store) GetPendingHost(ctx context.Context, id string) (*PendingHost, error) {
row := s.db.QueryRowContext(ctx,
`SELECT id, hostname, os, arch, agent_version, restic_version,
public_key, fingerprint, announced_from_ip,
first_seen_at, last_seen_at, expires_at
FROM pending_hosts WHERE id = ?`, id)
return scanPendingHost(row)
}
// GetPendingHostByFingerprint resolves a row by its public key
// fingerprint (used by the WS pending handler to look up which row
// an incoming connection corresponds to).
func (s *Store) GetPendingHostByFingerprint(ctx context.Context, fp string) (*PendingHost, error) {
row := s.db.QueryRowContext(ctx,
`SELECT id, hostname, os, arch, agent_version, restic_version,
public_key, fingerprint, announced_from_ip,
first_seen_at, last_seen_at, expires_at
FROM pending_hosts WHERE fingerprint = ?`, fp)
return scanPendingHost(row)
}
// ListPendingHosts returns every non-expired row, newest first. The
// caller passes `now` so tests can fast-forward.
func (s *Store) ListPendingHosts(ctx context.Context, now time.Time) ([]PendingHost, error) {
rows, err := s.db.QueryContext(ctx,
`SELECT id, hostname, os, arch, agent_version, restic_version,
public_key, fingerprint, announced_from_ip,
first_seen_at, last_seen_at, expires_at
FROM pending_hosts WHERE expires_at > ?
ORDER BY first_seen_at DESC`,
now.UTC().Format(time.RFC3339Nano))
if err != nil {
return nil, fmt.Errorf("store: list pending hosts: %w", err)
}
defer func() { _ = rows.Close() }()
out := []PendingHost{}
for rows.Next() {
ph, err := scanPendingHostRow(rows)
if err != nil {
return nil, err
}
out = append(out, *ph)
}
return out, rows.Err()
}
// CountPendingHosts returns the count of non-expired rows. Used for
// the global cap (P2-18: refuse new announces past 100 in flight).
func (s *Store) CountPendingHosts(ctx context.Context, now time.Time) (int, error) {
var n int
err := s.db.QueryRowContext(ctx,
`SELECT COUNT(*) FROM pending_hosts WHERE expires_at > ?`,
now.UTC().Format(time.RFC3339Nano)).Scan(&n)
if err != nil {
return 0, fmt.Errorf("store: count pending hosts: %w", err)
}
return n, nil
}
// CountPendingHostsByHostname returns the number of non-expired
// pending rows that share the supplied hostname. Used by the
// announce endpoint to set the hostname_collision flag in its
// response.
func (s *Store) CountPendingHostsByHostname(ctx context.Context, hostname string, now time.Time) (int, error) {
var n int
err := s.db.QueryRowContext(ctx,
`SELECT COUNT(*) FROM pending_hosts WHERE hostname = ? AND expires_at > ?`,
hostname, now.UTC().Format(time.RFC3339Nano)).Scan(&n)
if err != nil {
return 0, fmt.Errorf("store: count pending hosts by hostname: %w", err)
}
return n, nil
}
// DeletePendingHost removes one row by ID. ErrNotFound on miss.
func (s *Store) DeletePendingHost(ctx context.Context, id string) error {
res, err := s.db.ExecContext(ctx,
`DELETE FROM pending_hosts WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("store: delete pending host: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return ErrNotFound
}
return nil
}
// DeleteExpiredPendingHosts removes every row whose expires_at is in
// the past. Returns the number of rows deleted so the sweeper can
// log non-zero events.
func (s *Store) DeleteExpiredPendingHosts(ctx context.Context, now time.Time) (int64, error) {
res, err := s.db.ExecContext(ctx,
`DELETE FROM pending_hosts WHERE expires_at <= ?`,
now.UTC().Format(time.RFC3339Nano))
if err != nil {
return 0, fmt.Errorf("store: delete expired pending hosts: %w", err)
}
n, _ := res.RowsAffected()
return n, nil
}
// ----- scan helpers --------------------------------------------------
type pendingHostScanner interface {
Scan(dest ...any) error
}
func scanPendingHost(row *sql.Row) (*PendingHost, error) {
ph, err := scanPendingHostRow(row)
if errors.Is(err, sql.ErrNoRows) {
return nil, ErrNotFound
}
return ph, err
}
func scanPendingHostRow(s pendingHostScanner) (*PendingHost, error) {
var (
ph PendingHost
firstSeenAt, lastSeenAt, expiresAt string
)
if err := s.Scan(&ph.ID, &ph.Hostname, &ph.OS, &ph.Arch,
&ph.AgentVersion, &ph.ResticVersion,
&ph.PublicKey, &ph.Fingerprint, &ph.AnnouncedFromIP,
&firstSeenAt, &lastSeenAt, &expiresAt); err != nil {
return nil, err
}
if t, err := time.Parse(time.RFC3339Nano, firstSeenAt); err == nil {
ph.FirstSeenAt = t
}
if t, err := time.Parse(time.RFC3339Nano, lastSeenAt); err == nil {
ph.LastSeenAt = t
}
if t, err := time.Parse(time.RFC3339Nano, expiresAt); err == nil {
ph.ExpiresAt = t
}
return &ph, nil
}
+88
View File
@@ -0,0 +1,88 @@
// schedule_runs.go — derived "next run" / "last run" helpers for the
// dashboard host row + schedules tab (P2R-14).
//
// Both are derived data: NextRun is computed from the cron expression
// at request time; LatestJobBySchedule reads the most recent job that
// fired against this schedule. Neither is persisted — the cost of the
// query is small relative to a page render.
package store
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
)
// LatestJobBySchedule returns the most recent job fired by this
// schedule (actor_kind='schedule' AND scheduled_id=schedID), or
// (nil, ErrNotFound) when the schedule has never fired. Includes
// queued/running rows because the operator wants to see "running
// now" too.
func (s *Store) LatestJobBySchedule(ctx context.Context, hostID, schedID string) (*Job, error) {
row := s.db.QueryRowContext(ctx,
`SELECT id, host_id, kind, status, scheduled_id, actor_kind, actor_id,
started_at, finished_at, exit_code, stats, error, created_at
FROM jobs
WHERE host_id = ? AND scheduled_id = ? AND actor_kind = 'schedule'
ORDER BY created_at DESC
LIMIT 1`, hostID, schedID)
return scanJobRow(row)
}
// scanJobRow is the shared scan used by LatestJobBySchedule. Mirrors
// the columns LatestJobByKind reads. Kept in this file (vs jobs.go)
// to avoid disturbing the stable API surface exported there.
func scanJobRow(row *sql.Row) (*Job, error) {
var (
j Job
schedID sql.NullString
actorID sql.NullString
startedAt sql.NullString
finishedAt sql.NullString
exitCode sql.NullInt64
stats sql.NullString
errMsg sql.NullString
createdAt string
)
if err := row.Scan(&j.ID, &j.HostID, &j.Kind, &j.Status, &schedID,
&j.ActorKind, &actorID, &startedAt, &finishedAt,
&exitCode, &stats, &errMsg, &createdAt); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("store: scan job: %w", err)
}
if schedID.Valid {
v := schedID.String
j.ScheduledID = &v
}
if actorID.Valid {
v := actorID.String
j.ActorID = &v
}
if startedAt.Valid {
t, _ := time.Parse(time.RFC3339Nano, startedAt.String)
j.StartedAt = &t
}
if finishedAt.Valid {
t, _ := time.Parse(time.RFC3339Nano, finishedAt.String)
j.FinishedAt = &t
}
if exitCode.Valid {
i := int(exitCode.Int64)
j.ExitCode = &i
}
if stats.Valid && stats.String != "" {
j.Stats = []byte(stats.String)
}
if errMsg.Valid {
v := errMsg.String
j.Error = &v
}
if t, err := time.Parse(time.RFC3339Nano, createdAt); err == nil {
j.CreatedAt = t
}
return &j, nil
}
+16 -7
View File
@@ -45,13 +45,14 @@ func (st *Store) CreateSourceGroup(ctx context.Context, g *SourceGroup) error {
`INSERT INTO source_groups (
id, host_id, name, includes, excludes, retention_policy,
retry_max, retry_backoff_seconds, conflict_dimension,
created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
created_at, updated_at, pre_hook, post_hook
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
g.ID, g.HostID, g.Name,
string(includesJSON), string(excludesJSON), string(retentionJSON),
g.RetryMax, g.RetryBackoffSeconds,
nullableString(g.ConflictDimension),
now.Format(time.RFC3339Nano), now.Format(time.RFC3339Nano),
nullableString(g.PreHook), nullableString(g.PostHook),
); err != nil {
return fmt.Errorf("store: create source group: %w", err)
}
@@ -88,13 +89,14 @@ func (st *Store) UpdateSourceGroup(ctx context.Context, g *SourceGroup) error {
`UPDATE source_groups SET
name = ?, includes = ?, excludes = ?, retention_policy = ?,
retry_max = ?, retry_backoff_seconds = ?, conflict_dimension = ?,
updated_at = ?
updated_at = ?, pre_hook = ?, post_hook = ?
WHERE id = ? AND host_id = ?`,
g.Name,
string(includesJSON), string(excludesJSON), string(retentionJSON),
g.RetryMax, g.RetryBackoffSeconds,
nullableString(g.ConflictDimension),
now.Format(time.RFC3339Nano),
nullableString(g.PreHook), nullableString(g.PostHook),
g.ID, g.HostID,
)
if err != nil {
@@ -143,7 +145,7 @@ func (st *Store) GetSourceGroup(ctx context.Context, hostID, groupID string) (*S
row := st.db.QueryRowContext(ctx,
`SELECT id, host_id, name, includes, excludes, retention_policy,
retry_max, retry_backoff_seconds, conflict_dimension,
created_at, updated_at
created_at, updated_at, pre_hook, post_hook
FROM source_groups WHERE id = ? AND host_id = ?`,
groupID, hostID)
g, err := scanSourceGroup(row)
@@ -159,7 +161,7 @@ func (st *Store) GetSourceGroupByName(ctx context.Context, hostID, name string)
row := st.db.QueryRowContext(ctx,
`SELECT id, host_id, name, includes, excludes, retention_policy,
retry_max, retry_backoff_seconds, conflict_dimension,
created_at, updated_at
created_at, updated_at, pre_hook, post_hook
FROM source_groups WHERE host_id = ? AND name = ?`,
hostID, name)
g, err := scanSourceGroup(row)
@@ -177,7 +179,7 @@ func (st *Store) ListSourceGroupsByHost(ctx context.Context, hostID string) ([]S
rows, err := st.db.QueryContext(ctx,
`SELECT id, host_id, name, includes, excludes, retention_policy,
retry_max, retry_backoff_seconds, conflict_dimension,
created_at, updated_at
created_at, updated_at, pre_hook, post_hook
FROM source_groups WHERE host_id = ? ORDER BY name`,
hostID)
if err != nil {
@@ -224,14 +226,21 @@ func scanSourceGroupRow(s sourceGroupScanner) (*SourceGroup, error) {
includes, excludes, retention string
conflict sql.NullString
createdAt, updatedAt string
preHook, postHook sql.NullString
)
err := s.Scan(&out.ID, &out.HostID, &out.Name,
&includes, &excludes, &retention,
&out.RetryMax, &out.RetryBackoffSeconds, &conflict,
&createdAt, &updatedAt)
&createdAt, &updatedAt, &preHook, &postHook)
if err != nil {
return nil, err
}
if preHook.Valid {
out.PreHook = preHook.String
}
if postHook.Valid {
out.PostHook = postHook.String
}
if includes != "" {
_ = json.Unmarshal([]byte(includes), &out.Includes)
}
+14
View File
@@ -66,6 +66,13 @@ type Host struct {
// (backup, restore, prune). nil = no cap.
BandwidthUpKBps *int
BandwidthDownKBps *int
// PreHookDefault / PostHookDefault are AEAD ciphertext (string
// blob produced by crypto.AEAD.Encrypt). Per source group hooks
// (SourceGroup.PreHook / PostHook) override these when set.
// Empty = no default configured.
PreHookDefault string
PostHookDefault string
}
// Schedule is now intentionally slim: cron + which groups + enabled.
@@ -106,6 +113,13 @@ type SourceGroup struct {
ConflictDimension string
CreatedAt time.Time
UpdatedAt time.Time
// PreHook / PostHook are AEAD ciphertext (string blob produced by
// crypto.AEAD.Encrypt). Empty means "no hook configured."
// Encryption/decryption happens at the HTTP layer (where AEAD
// lives); the store layer just persists the bytes verbatim.
PreHook string
PostHook string
}
// RetentionPolicy is the typed view of `restic forget --keep-*`.
+28 -14
View File
@@ -178,26 +178,26 @@ Sizes: **S** = under a day, **M** = 13 days, **L** = 37 days.
- [x] **P2R-07** (S) Repo stats panel on the Repo page: total size, raw size, last-check timestamp + status (color-coded), last-prune timestamp, stale-lock banner. Backed by `restic stats --json --mode raw-data` that the agent ships in a `repo.stats` envelope after every backup / check / prune / unlock; persisted via `Store.UpsertHostRepoStats` into a new `host_repo_stats` projection table.
- [x] **P2R-08** (M) Pending-runs queue worker. Scheduled backup fires that race an agent disconnect queue to `pending_runs`. Drained on a 30s server-side tick **and** on agent reconnect (via `onAgentHello`); per-host TryLock mutex prevents the two paths double-dispatching the same row. Exponential backoff capped at 30 minutes; abandons rows that exceed the source-group's `retry_max` (audit-logged) or whose schedule/group has genuinely been deleted.
### P2 redesign — Phase 6 (auto-init follow-up) — TODO
### P2 redesign — Phase 6 (auto-init follow-up)
- [ ] **P2R-09** (S) Auto-init UX polish. Surface init result on host detail (small "repo ready · initialised by you on …" line; or "init failed — see job N · retry" if init failed). Re-init button on Repo page danger zone wipes then re-runs init (admin only, audit-logged, two-step confirm with the host name typed in).
- [x] **P2R-09** (S) Auto-init UX polish. Latest `init` job status surfaced under the host-detail vitals strip (succeeded/failed/running/queued, with link to the live job log on non-success). Danger-zone `POST /hosts/{id}/repo/reinit` dispatches a fresh init job after the operator types the host name to confirm; audit row records `host.repo_reinit`.
### Pre/post hooks (rehomed onto source groups) — TODO
### Pre/post hooks (rehomed onto source groups)
- [ ] **P2R-10** (M) Hook schema: `source_group.pre_hook`, `source_group.post_hook`, `host.pre_hook_default`, `host.post_hook_default`. Encrypted at rest (existing `crypto.AEAD`). Admin-only edit. Audit-logged.
- [ ] **P2R-11** (M) Agent execution of hooks: configurable shell per host. `pre_hook` failure aborts the backup. `post_hook` always runs with `RM_JOB_STATUS` env var. Stdout/stderr captured into `JobLog` with a `hook:` prefix. Hooks only run for `kind=backup` jobs (forget/prune/check/unlock skip them, per spec.md §14.3).
- [ ] **P2R-12** (S) Hook editor UI on source-group edit page (per-group override) and host Settings tab (host-wide default). Validation rejects non-backup contexts. Warning banner: "this hook runs as the agent service user (root on Linux; LocalSystem on Windows)".
- [x] **P2R-10** (M) Hook schema: migration 0010 adds `pre_hook`/`post_hook` BLOB columns to `source_groups` and `pre_hook_default`/`post_hook_default` to `hosts`. Bytes stored verbatim — AEAD encrypt/decrypt at the HTTP layer (per-slot AD bytes). Round-trip tests cover set/clear semantics on both tables.
- [x] **P2R-11** (M) Agent execution of hooks: `runner.BackupHooks` + `runHook` helper invoked via `/bin/sh -c` (`cmd.exe /C` on Windows). pre_hook non-zero exit aborts the backup; post_hook always runs with `RM_JOB_STATUS=succeeded|failed` in env. Output streamed as `hook(<phase>): …` log.stream lines. Hooks only run for `kind=backup`. Server side resolves group → host default → empty and ships plaintext on the WS payload (decrypt at HTTP layer).
- [x] **P2R-12** (S) Hook editor UI: source-group edit form gains pre/post hook textareas with the service-user warning banner; bodies AEAD-encrypted on save (per-group AD). Repo page adds a host-default Hooks panel with the same shape; saved via `POST /hosts/{id}/repo/hooks`.
### Bandwidth + niceties (rehomed onto host + source groups) — TODO
### Bandwidth + niceties (rehomed onto host + source groups)
- [ ] **P2R-13** (S) Bandwidth limit fields. Host-wide caps (`Host.BandwidthUpKBps`, `BandwidthDownKBps` — schema is in 0008 already, just needs UI on the Repo page) applied to every restic invocation. Per-job override on Run-now (override field on the Run-now confirm dialog). Maps to `restic --limit-upload` / `--limit-download`.
- [ ] **P2R-14** (S) Schedule "next run" / "last run" surfaced on host card (dashboard row) + on the Schedules tab. "Next run" computed server-side from cron + now; "last run" from the most recent job with `actor_kind=schedule` for any schedule that uses any of the host's source groups.
- [x] **P2R-13** (S) Bandwidth limit fields. `restic.Env` gains `LimitUploadKBps`/`LimitDownloadKBps`, emitted as `--limit-upload`/`--limit-download` global flags before the subcommand on every invocation. Agent dispatcher tracks host-wide caps received via `config.update`; server pushes them on hello and after `PUT /api/hosts/{id}/bandwidth`. Per-job override on the per-source-group Run-now form (collapsed `<details>` "Limit bandwidth for this run" with two KB/s inputs); override wins over host caps.
- [x] **P2R-14** (S) Schedule "next run" / "last run". New `store.LatestJobBySchedule` query. Schedules tab grows two columns (Next derived from cron via `robfig/cron/v3.Parse(...).Next`, Last from latest `actor_kind=schedule` job). Dashboard host row prepends `next 12h ago/from now` when a single covering schedule is the run-now candidate.
### Cross-platform + alt-enrolment (unchanged by redesign) — TODO
### Cross-platform + alt-enrolment
- [ ] **P2-16** (M) Windows service integration: agent runs under the Service Control Manager via `golang.org/x/sys/windows/svc`; install/uninstall/start/stop wired up.
- [ ] **P2-17** (M) `install.ps1` (Windows): downloads agent, installs as service, enrolls; detects existing scheduled tasks named `*restic*` and prints them for manual review.
- [ ] **P2-18** (L) Announce-and-approve enrollment (second enrollment mode, alongside the token flow that ships in Phase 1):
- [x] **P2-16** (M) Windows service integration: `internal/agent/service` (build-tagged) implements `svc.Handler`; new `restic-manager-agent install|uninstall|start|stop|run` subcommands wrap the SCM via `golang.org/x/sys/windows/svc/mgr`. Cross-compile verified (`GOOS=windows GOARCH=amd64 go build ./cmd/agent`); **untested on Windows itself** — Linux CI can't exercise the SCM round-trip.
- [x] **P2-17** (M) `install.ps1` (Windows): pwsh installer that detects arch, downloads `$Server/agent/binary?os=windows&arch=amd64`, runs the agent in `-enroll-server` (+ optional `-enroll-token`) mode (token flow OR announce-and-approve), then registers the service via `restic-manager-agent install`. Surfaces existing scheduled tasks named `*restic*` without disabling. Served by the existing `GET /install/*` handler; restage block in CLAUDE.md updated.
- [x] **P2-18** (L) Announce-and-approve enrolment (second enrolment mode):
- Agent run with no `RM_TOKEN` generates a local Ed25519 keypair (persisted alongside the encrypted secrets blob), then `POST /api/agents/announce` with `{hostname, os, arch, agent_version, restic_version, public_key}`. Server stores a `pending_hosts` row (`public_key`, `fingerprint = sha256(public_key)`, `announced_from_ip`, `first_seen_at`, `last_seen_at`, `expires_at = now+1h`). Hostname collisions with existing or other pending rows are flagged in the response so the install script can warn loudly on the endpoint terminal.
- Agent then opens a long-poll/WS to `/ws/agent/pending` authenticated by signing a server-issued nonce with its private key — proves possession of the key tied to the pending row. Connection stays open; agent waits.
- Install script prints the fingerprint on the endpoint's terminal in a copy-friendly form (e.g. `SHA256:ab12…cd34`) and tells the operator to compare it to the one shown in the UI before clicking accept.
@@ -205,6 +205,20 @@ Sizes: **S** = under a day, **M** = 13 days, **L** = 37 days.
- Server-side guards: per-source-IP rate limit on `/api/agents/announce` (token-bucket, e.g. 10/min); global cap on pending rows (e.g. 100); pending rows auto-expire after 1h; duplicate-hostname pending rows allowed but visually flagged in UI; accepting one does **not** auto-reject the others (admin sees them all and decides — defends against the "attacker announces first, real host second" race).
- Token-based enrollment (Phase 1) remains the default and is unchanged; announce-and-approve is opt-in for interactive installs. Docs explicitly call out that the fingerprint comparison step is what makes this flow safe — without it, this is no better than trusting `hostname` over the wire.
> **As shipped:** migration 0011 + `store/pending_hosts.go` cover the table.
> `POST /api/agents/announce` (rate-limited 10/min/IP, global cap 100 in-flight rows)
> returns `{pending_id, fingerprint, hostname_collision}`. `GET /ws/agent/pending`
> runs the Ed25519 nonce-sign handshake. Admin POSTs to
> `/api/pending-hosts/{id}/accept|reject` (audit-logged as
> `host.accept_pending`/`host.reject_pending`). Dashboard panel renders the queue
> with a copyable fingerprint + inline accept form (URL/user/password). 60s
> server ticker sweeps expired rows. Agent: `cmd/agent/announce.go` mints +
> persists an Ed25519 keypair into `agent.yaml`'s `announce_key` field; runs
> automatically when `-enroll-server` is supplied without `-enroll-token`. The
> install scripts haven't been updated to surface the printed fingerprint
> beyond the agent's own banner — the operator reads it from the install
> script's stdout.
### Phase 2 acceptance
- A host can be onboarded end-to-end with no manual REST: enrol → auto-init runs → operator opens host → creates source group(s) → attaches them to one or more schedules → schedule fires on time → backup runs against the right paths with the right retention → snapshots tagged by group name appear in UI.
@@ -212,7 +226,7 @@ Sizes: **S** = under a day, **M** = 13 days, **L** = 37 days.
- Server-side maintenance ticker drives forget/prune/check at the configured cadences, independent of agent cron. Offline hosts queue to `pending_runs` and drain on reconnect.
- Pre/post hooks fire correctly per source group, fail loudly on `pre_hook` errors, run `post_hook` with `RM_JOB_STATUS`. Rejected on non-backup kinds.
- Bandwidth limits honoured (host-wide default + per-run override).
- A Windows host can enrol, appear in the dashboard, and run a backup with live log streaming.
- A Windows host can enrol, appear in the dashboard, and run a backup with live log streaming. **Not validated in CI:** Linux runners cannot exercise the SCM round-trip; the `service_windows.go`/`install.ps1` pieces compile cleanly under `GOOS=windows GOARCH=amd64` but the first real Windows install will be the first end-to-end test.
- A Linux host can enrol via announce-and-approve, with fingerprint-comparison gate enforced. Rate-limit + pending-cap guards verified.
---
File diff suppressed because one or more lines are too long
+2 -2
View File
@@ -209,8 +209,8 @@
/* ---------- schedule rows (Schedules tab) ---------- */
.schd-row {
display: grid; align-items: center;
grid-template-columns: 90px 1fr 2fr auto;
column-gap: 18px;
grid-template-columns: 78px 1fr 1.6fr 100px 110px auto;
column-gap: 14px;
padding: 12px 18px; font-size: 13px;
}
.schd-row.head {
+54
View File
@@ -65,6 +65,60 @@
</div>
</div>
{{/* ---------- Pending hosts (announce-and-approve queue) ---------- */}}
{{if gt (len $page.PendingHosts) 0}}
<div class="pt-6">
<div class="flex items-center justify-between mb-3">
<div class="flex items-center gap-3">
<h2 class="text-[13px] font-semibold tracking-[0.01em] text-warn">Pending hosts</h2>
<div class="text-xs text-ink-fade">{{len $page.PendingHosts}} waiting for approval</div>
</div>
</div>
<div class="panel rounded-[7px] overflow-hidden"
style="border-color: color-mix(in oklch, var(--warn), transparent 70%);">
{{range $i, $ph := $page.PendingHosts}}
<div class="p-4 {{if not (eq $i 0)}}hairline{{end}}">
<div class="flex items-start justify-between gap-4">
<div class="flex-1 min-w-0">
<div class="flex items-center gap-2">
<span class="mono text-ink font-medium">{{$ph.Hostname}}</span>
<span class="mono text-[11px] text-ink-fade">{{$ph.OS}}/{{$ph.Arch}}</span>
<span class="mono text-[11px] text-ink-fade">agent {{$ph.AgentVersion}}</span>
<span class="mono text-[11px] text-ink-fade">restic {{$ph.ResticVersion}}</span>
</div>
<div class="mt-2 mono text-[12px] text-ink-mid select-all break-all"
style="font-family: var(--font-mono); padding: 6px 8px; background: var(--panel-hi); border-radius: 4px;">
{{$ph.Fingerprint}}
</div>
<div class="text-[11px] text-ink-fade mt-2">
from {{$ph.AnnouncedFromIP}} · {{relTime $ph.FirstSeenAt}}
· expires {{relTime $ph.ExpiresAt}}
</div>
</div>
<form method="post" action="/api/pending-hosts/{{$ph.ID}}/accept"
class="flex flex-col gap-2 flex-none" style="width: 320px;"
onsubmit="return confirm('Accept host &quot;{{$ph.Hostname}}&quot; (fingerprint {{$ph.Fingerprint}})? Make sure this matches what the install script printed.');">
<input type="text" name="repo_url" required placeholder="rest:http://…"
class="input mono" style="height: 28px; padding: 0 8px; font-size: 12px;">
<input type="text" name="repo_username" placeholder="repo username (optional)"
class="input mono" style="height: 28px; padding: 0 8px; font-size: 12px;">
<input type="password" name="repo_password" required placeholder="repo password"
class="input mono" style="height: 28px; padding: 0 8px; font-size: 12px;">
<div class="flex gap-2">
<button type="submit" class="btn btn-primary flex-1">Accept</button>
<button type="button" class="btn btn-danger flex-1"
hx-post="/api/pending-hosts/{{$ph.ID}}/reject"
hx-confirm="Reject pending host '{{$ph.Hostname}}'?"
hx-on::after-request="window.location.reload()">Reject</button>
</div>
</form>
</div>
</div>
{{end}}
</div>
</div>
{{end}}
{{/* ---------- hosts table ---------- */}}
<div class="pt-6 pb-4">
<div class="flex items-center justify-between mb-3">
+36 -2
View File
@@ -220,6 +220,32 @@
</div>
</div>
{{/* ---------- Host-default hooks ---------- */}}
<h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mt-9 mb-3.5">Host-default hooks</h2>
<form method="post" action="/hosts/{{$host.ID}}/repo/hooks" class="panel rounded-[7px] p-5">
<p class="text-[12px] text-ink-mute leading-[1.55] mb-3">
Defaults applied to every backup that doesn't set its own. Per-source-group hooks (on the
<a href="/hosts/{{$host.ID}}/sources" class="text-accent">Sources</a> tab) override these.
</p>
<div class="text-[12px] text-warn leading-[1.55] mb-3"
style="background: color-mix(in oklch, var(--warn), transparent 92%); border: 1px solid color-mix(in oklch, var(--warn), transparent 75%); padding: 8px 10px; border-radius: 5px;">
Hooks run as the agent service user — root on Linux, LocalSystem on Windows.
</div>
<div class="mb-3">
<label class="field-label" for="host_pre_hook">Pre-backup hook (default)</label>
<textarea id="host_pre_hook" name="pre_hook" class="field mono" rows="3" style="resize: vertical;"
placeholder="# default; per-group overrides win">{{$page.HostPreHook}}</textarea>
</div>
<div class="mb-3">
<label class="field-label" for="host_post_hook">Post-backup hook (default)</label>
<textarea id="host_post_hook" name="post_hook" class="field mono" rows="3" style="resize: vertical;"
placeholder="# RM_JOB_STATUS in env">{{$page.HostPostHook}}</textarea>
</div>
<div class="mt-3">
<button type="submit" class="btn btn-primary">Save host-default hooks</button>
</div>
</form>
{{/* ---------- Danger zone ---------- */}}
<h2 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-bad mt-9 mb-3.5">Danger zone</h2>
<div class="panel rounded-[7px] p-5"
@@ -238,8 +264,16 @@
<span class="mono text-ink-mid">secrets.enc</span> is reused.
</p>
</div>
<button class="btn btn-danger btn-lg flex-none" disabled
title="re-init flow lands in P2R-09">Re-init repo…</button>
<form method="post" action="/hosts/{{$host.ID}}/repo/reinit"
class="flex-none flex flex-col items-end" style="gap: 8px;"
onsubmit="return confirm('Re-initialise the repo on host &quot;{{$host.Name}}&quot;? Existing snapshots are lost if the rest-server allows the wipe; restic refuses if it sees a config file already there.');">
<input type="text" name="confirm_hostname" required autocomplete="off"
placeholder="type hostname to confirm"
class="input mono"
style="width: 240px; height: 30px; padding: 0 8px; font-size: 12px;">
<button type="submit" class="btn btn-danger btn-lg whitespace-nowrap"
{{if eq $host.Status "online"}}{{else}}disabled title="host is offline"{{end}}>Re-init repo…</button>
</form>
</div>
</div>
</div>
+10
View File
@@ -33,6 +33,8 @@
<div>Status</div>
<div>Cron</div>
<div>Sources</div>
<div>Next</div>
<div>Last</div>
<div></div>
</div>
{{range $i, $sc := $page.Schedules}}
@@ -52,6 +54,14 @@
<span class="tag" style="border-color: color-mix(in oklch, var(--accent), transparent 60%); color: var(--accent); {{if not $sc.Enabled}}opacity: 0.6;{{end}}">{{if $name}}{{$name}}{{else}}<span class="text-ink-fade">unknown</span>{{end}}</span>
{{end}}
</div>
<div class="mono text-[11.5px] {{if $sc.NextRun}}text-ink-mid{{else}}text-ink-fade{{end}}"
{{if $sc.NextRun}}title="{{$sc.NextRun.Format "2006-01-02 15:04:05 MST"}}"{{end}}>
{{if $sc.NextRun}}{{relTime $sc.NextRun}}{{else if not $sc.Enabled}}(paused){{else}}—{{end}}
</div>
<div class="mono text-[11.5px] {{if $sc.LastRun}}{{if eq $sc.LastStatus "failed"}}text-warn{{else}}text-ink-mid{{end}}{{else}}text-ink-fade{{end}}"
{{if $sc.LastRun}}title="{{$sc.LastRun.Format "2006-01-02 15:04:05 MST"}} · {{$sc.LastStatus}}"{{end}}>
{{if $sc.LastRun}}{{relTime $sc.LastRun}}{{else}}—{{end}}
</div>
<div class="flex gap-1.5 justify-end row-action">
{{if eq $host.Status "online"}}
{{if $sc.Enabled}}
+20 -5
View File
@@ -53,12 +53,27 @@
{{if gt $row.SnapshotCount 0}} · <span class="mono">{{$row.SnapshotCount}}</span> snapshot{{if ne $row.SnapshotCount 1}}s{{end}}{{end}}
</div>
</div>
<div class="flex justify-end row-action" style="gap: 6px;">
<div class="flex flex-col items-end row-action" style="gap: 6px;">
{{if and (gt (len $g.Includes) 0) (eq $host.Status "online")}}
<button class="btn btn-primary"
hx-post="/hosts/{{$host.ID}}/source-groups/{{$g.ID}}/run"
hx-swap="none"
hx-disabled-elt="this">Run now</button>
<form id="run-{{$g.ID}}" class="flex flex-col items-end" style="gap: 4px;">
<button class="btn btn-primary"
hx-post="/hosts/{{$host.ID}}/source-groups/{{$g.ID}}/run"
hx-include="#run-{{$g.ID}}"
hx-swap="none"
hx-disabled-elt="this">Run now</button>
<details class="text-[11px] text-ink-fade" style="text-align: right;">
<summary class="cursor-pointer hover:text-ink-mid select-none">Limit bandwidth for this run</summary>
<div class="flex items-center mt-2" style="gap: 6px; font-family: var(--font-mono);">
<label class="text-[10.5px] text-ink-mute"></label>
<input type="number" min="0" name="bandwidth_up_kbps" placeholder="—" class="input mono"
style="width: 70px; height: 22px; padding: 0 6px; font-size: 11px;">
<label class="text-[10.5px] text-ink-mute"></label>
<input type="number" min="0" name="bandwidth_down_kbps" placeholder="—" class="input mono"
style="width: 70px; height: 22px; padding: 0 6px; font-size: 11px;">
<span class="text-[10.5px] text-ink-fade">KB/s</span>
</div>
</details>
</form>
{{else}}
<button class="btn" disabled
title="{{if eq (len $g.Includes) 0}}add at least one include path before running{{else}}host is offline{{end}}">Run now</button>
@@ -95,6 +95,27 @@
Each retry doubles the wait. <strong>Manual run-now ignores this</strong> — it just fails immediately if the agent is offline.
</div>
<h3 class="text-[11.5px] font-semibold uppercase tracking-[0.08em] text-ink-mute mb-3.5 mt-7 pt-4 border-t border-line-soft">
Hooks
<span class="text-ink-fade font-medium normal-case tracking-[0.01em] ml-2">backup jobs only</span>
</h3>
<div class="text-[12px] text-warn leading-[1.55] mb-3"
style="background: color-mix(in oklch, var(--warn), transparent 92%); border: 1px solid color-mix(in oklch, var(--warn), transparent 75%); padding: 8px 10px; border-radius: 5px;">
Hooks run as the agent service user — root on Linux, LocalSystem on Windows. Treat them like any other root cron entry.
</div>
<div class="mb-3">
<label class="field-label" for="pre_hook">Pre-backup hook</label>
<textarea id="pre_hook" name="pre_hook" class="field mono" rows="3" style="resize: vertical;"
placeholder="# e.g. systemctl stop myapp">{{$f.PreHook}}</textarea>
<div class="field-help mt-1">Non-zero exit aborts the backup. Stored AEAD-encrypted.</div>
</div>
<div class="mb-3">
<label class="field-label" for="post_hook">Post-backup hook</label>
<textarea id="post_hook" name="post_hook" class="field mono" rows="3" style="resize: vertical;"
placeholder="# RM_JOB_STATUS={succeeded|failed} is in env">{{$f.PostHook}}</textarea>
<div class="field-help mt-1">Always runs. <span class="mono">RM_JOB_STATUS</span> is set to the backup's outcome. Stored AEAD-encrypted.</div>
</div>
<div class="mt-8 pt-4 border-t border-line-soft flex gap-2">
<button type="submit" class="btn btn-primary btn-lg">{{if $page.IsNew}}Create group{{else}}Save changes{{end}}</button>
<a href="/hosts/{{$host.ID}}/sources" class="btn btn-lg">Cancel</a>
+16
View File
@@ -105,6 +105,22 @@
</div>
</div>
{{/* ---------- repo init line (P2R-09) ---------- */}}
{{if $page.InitStatus}}
<div class="text-[11.5px] text-ink-mute mt-2.5 leading-[1.5]">
{{if eq $page.InitStatus "succeeded"}}
repo ready · initialised <span class="mono text-ink-mid" {{if $page.InitAt}}title="{{$page.InitAt.Format "2006-01-02 15:04:05 MST"}}"{{end}}>{{relTime $page.InitAt}}</span>
{{else if eq $page.InitStatus "failed"}}
<span class="text-bad font-medium">init failed</span> ·
<a href="/jobs/{{$page.InitJobID}}" class="link mono">job {{$page.InitJobID}}</a> · retry from the Repo tab's danger zone
{{else if eq $page.InitStatus "running"}}
<span class="text-accent">init running…</span> · <a href="/jobs/{{$page.InitJobID}}" class="link mono">live log →</a>
{{else if eq $page.InitStatus "queued"}}
<span class="text-ink-fade">init queued</span> · <a href="/jobs/{{$page.InitJobID}}" class="link mono">job {{$page.InitJobID}}</a>
{{end}}
</div>
{{end}}
{{/* ---------- secondary tabs ---------- */}}
<div class="flex items-end mt-1.5">
<a class="sub-tab {{if eq $page.SubTab "snapshots"}}active{{end}}" href="/hosts/{{$host.ID}}">Snapshots <span class="mono text-ink-fade text-[11px] ml-1">{{comma $host.SnapshotCount}}</span></a>
+3
View File
@@ -30,6 +30,9 @@
{{- else -}}
<span class="text-ink-fade italic">never run</span>
{{- end -}}
{{- if .NextRun -}}
<br><span class="mono text-[10.5px] text-ink-fade" title="{{.NextRun.Format "2006-01-02 15:04:05 MST"}}">next {{relTime .NextRun}}</span>
{{- end -}}
</div>
<div class="text-right mono {{if eq $h.Status "offline"}}text-ink-mid{{else}}text-ink{{end}}">{{bytes $h.RepoSizeBytes}}</div>
<div class="text-right mono {{if eq $h.Status "offline"}}text-ink-mute{{else}}text-ink-mid{{end}}">