agent+server: apply host bandwidth caps to restic invocations

P2R-13a. restic.Env gains LimitUploadKBps/LimitDownloadKBps which are
emitted as global --limit-upload/--limit-download flags before the
subcommand on every invocation. Agent dispatcher tracks host-wide
caps received via config.update; server pushes them on hello and
after PUT /api/hosts/{id}/bandwidth.

Also extends api.CommandRunPayload with optional per-job overrides
(BandwidthUpKBps/Down + PreHook/PostHook); the override consumers
land in T2/T6.
This commit is contained in:
2026-05-04 10:38:34 +01:00
parent 95ab3f4d16
commit cdf88c6dc3
8 changed files with 246 additions and 35 deletions
+51 -8
View File
@@ -9,6 +9,7 @@ import (
"os"
"os/signal"
"strconv"
"sync"
"syscall"
"time"
@@ -170,6 +171,14 @@ type dispatcher struct {
resticBin string
secrets *secrets.Store
scheduler *scheduler.Scheduler
// Bandwidth caps in KB/s pushed via config.update. Mutated under
// bwMu by the config.update handler; read by runJob when building
// the runner. <=0 means "no cap" (do not pass --limit-* to restic).
// Per-job overrides on CommandRunPayload take precedence.
bwMu sync.Mutex
bwUpKBps int
bwDownKBps int
}
func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.Sender) error {
@@ -263,6 +272,24 @@ func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.S
slog.Warn("ws agent: unknown config.update slot, ignoring", "slot", p.Slot)
}
// Bandwidth caps ride independently of the slot — they're host-
// wide and apply to every restic invocation regardless of which
// credentials slot the job uses. nil pointer = no change in this
// push; non-nil = set to that value (≤0 clears the cap).
if p.BandwidthUpKBps != nil || p.BandwidthDownKBps != nil {
d.bwMu.Lock()
if p.BandwidthUpKBps != nil {
d.bwUpKBps = *p.BandwidthUpKBps
}
if p.BandwidthDownKBps != nil {
d.bwDownKBps = *p.BandwidthDownKBps
}
up, down := d.bwUpKBps, d.bwDownKBps
d.bwMu.Unlock()
slog.Info("ws agent: bandwidth caps updated",
"up_kbps", up, "down_kbps", down)
}
case api.MsgAgentUpdateAvail:
var p api.AgentUpdateAvailablePayload
_ = env.UnmarshalPayload(&p)
@@ -295,11 +322,25 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
// not on r). If you find yourself adding a new JobKind that
// needs delete authority, mirror the JobPrune pattern below
// — don't try to overload r.
// Resolve bandwidth caps: per-job override (if set) wins over the
// host-wide caps last pushed via config.update. <=0 means no cap.
d.bwMu.Lock()
upKBps, downKBps := d.bwUpKBps, d.bwDownKBps
d.bwMu.Unlock()
if p.BandwidthUpKBps != nil {
upKBps = *p.BandwidthUpKBps
}
if p.BandwidthDownKBps != nil {
downKBps = *p.BandwidthDownKBps
}
r := runner.New(runner.Config{
ResticBin: d.resticBin,
RepoURL: creds.URL,
RepoUsername: creds.Username,
RepoPassword: creds.Password,
ResticBin: d.resticBin,
RepoURL: creds.URL,
RepoUsername: creds.Username,
RepoPassword: creds.Password,
LimitUploadKBps: upKBps,
LimitDownloadKBps: downKBps,
}, tx, time.Second)
switch p.Kind {
@@ -381,10 +422,12 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc
runCreds = ac
}
prr := runner.New(runner.Config{
ResticBin: d.resticBin,
RepoURL: runCreds.URL,
RepoUsername: runCreds.Username,
RepoPassword: runCreds.Password,
ResticBin: d.resticBin,
RepoURL: runCreds.URL,
RepoUsername: runCreds.Username,
RepoPassword: runCreds.Password,
LimitUploadKBps: upKBps,
LimitDownloadKBps: downKBps,
}, tx, time.Second)
slog.Info("agent: accepting prune job", "job_id", p.JobID, "admin_creds", p.RequiresAdminCreds)
go func() {