From 7b1990cf111a83ffe45d2eb7430390e0e9ff1312 Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Mon, 4 May 2026 10:57:28 +0100 Subject: [PATCH] agent+server: P2R-11 pre/post hook execution for backup jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent: new runner.BackupHooks struct + runHook helper invoked via /bin/sh -c (cmd.exe /C on Windows). pre_hook non-zero exit aborts the backup; post_hook always runs with RM_JOB_STATUS=succeeded|failed in env. Output streamed as 'hook(): …' log.stream lines. Hooks only run for kind=backup (other kinds skip both phases). Server: resolveBackupHooks resolves group → host default → empty, decrypts via crypto.AEAD with per-slot ad bytes, plumbs plaintext into CommandRunPayload for both schedule.fire and per-group Run-now dispatch sites. Decrypt failures degrade silently to no hook so a malformed blob can't poison every backup. --- cmd/agent/main.go | 3 +- internal/agent/runner/hooks.go | 106 ++++++++++++++++++++++++++ internal/agent/runner/hooks_test.go | 90 ++++++++++++++++++++++ internal/agent/runner/runner.go | 39 +++++++++- internal/server/http/hooks_resolve.go | 75 ++++++++++++++++++ internal/server/http/run_group.go | 9 +++ internal/server/http/schedule_push.go | 14 ++++ internal/store/hooks_test.go | 30 ++++---- internal/store/hosts.go | 18 +++-- internal/store/sources.go | 24 +++--- internal/store/types.go | 23 +++--- 11 files changed, 379 insertions(+), 52 deletions(-) create mode 100644 internal/agent/runner/hooks.go create mode 100644 internal/agent/runner/hooks_test.go create mode 100644 internal/server/http/hooks_resolve.go diff --git a/cmd/agent/main.go b/cmd/agent/main.go index d6c3d8b..0c2b691 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -359,8 +359,9 @@ func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsc } slog.Info("agent: accepting backup job", "job_id", p.JobID, "paths", paths, "excludes", p.Excludes, "tag", p.Tag) + hooks := runner.BackupHooks{Pre: p.PreHook, Post: p.PostHook} go func() { - if err := r.RunBackup(ctx, p.JobID, paths, p.Excludes, tags); err != nil { + if err := r.RunBackup(ctx, p.JobID, paths, p.Excludes, tags, hooks); err != nil { slog.Warn("agent: backup job failed", "job_id", p.JobID, "err", err) return } diff --git a/internal/agent/runner/hooks.go b/internal/agent/runner/hooks.go new file mode 100644 index 0000000..904b100 --- /dev/null +++ b/internal/agent/runner/hooks.go @@ -0,0 +1,106 @@ +// hooks.go — pre/post backup hooks for the agent runner (P2R-11). +// +// Hooks fire only for backup jobs (the runner's other kinds — +// init/forget/prune/check/unlock — call shell scripts that touch +// repo internals; running operator hooks for those would be +// surprising). Hook bodies arrive plaintext on the wire (server +// decrypted before the WS push). The agent never persists them +// to disk; they live in memory for the lifetime of one job. +// +// Failure semantics: +// - pre_hook non-zero exit aborts the backup: the runner returns +// the error, the job is recorded as failed, and the actual +// restic invocation never runs. +// - post_hook non-zero exit is logged with a warning prefix in +// the job log but does NOT change the job status — the operator +// wants the backup result preserved even if the cleanup step +// misbehaved. +// +// Streaming: each line of the hook's stdout/stderr is shipped as a +// log.stream envelope with payload prefixed `hook: ` so the live +// log viewer can visually separate it from restic's own output. +package runner + +import ( + "bufio" + "context" + "fmt" + "io" + "os/exec" + "runtime" + "sync/atomic" + "time" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/api" +) + +// runHook executes script via the host shell. status is the value +// passed as RM_JOB_STATUS in the env (empty for pre-hooks; the +// final job status — "succeeded" or "failed" — for post-hooks). +// Returns an error iff the hook exited non-zero. ctx cancellation +// kills the subprocess. +func (r *Runner) runHook(ctx context.Context, jobID, phase, script, status string, seq *atomic.Int64) error { + if script == "" { + return nil + } + shell, flag := defaultShell() + cmd := exec.CommandContext(ctx, shell, flag, script) + cmd.Env = []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + } + if status != "" { + cmd.Env = append(cmd.Env, "RM_JOB_STATUS="+status) + } + cmd.Env = append(cmd.Env, "RM_JOB_ID="+jobID, "RM_HOOK_PHASE="+phase) + + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("hook %s: stdout pipe: %w", phase, err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("hook %s: stderr pipe: %w", phase, err) + } + if err := cmd.Start(); err != nil { + return fmt.Errorf("hook %s: start: %w", phase, err) + } + done := make(chan struct{}, 2) + go func() { r.pumpHookLines(stdout, "stdout", phase, jobID, seq); done <- struct{}{} }() + go func() { r.pumpHookLines(stderr, "stderr", phase, jobID, seq); done <- struct{}{} }() + <-done + <-done + if werr := cmd.Wait(); werr != nil { + return fmt.Errorf("hook %s exited non-zero: %w", phase, werr) + } + return nil +} + +// pumpHookLines streams lines as log.stream envelopes prefixed with +// "hook(): " so the live log can visually separate them. +func (r *Runner) pumpHookLines(rd io.Reader, stream, phase, jobID string, seq *atomic.Int64) { + scanner := bufio.NewScanner(rd) + scanner.Buffer(make([]byte, 0, 64*1024), 256*1024) + for scanner.Scan() { + line := "hook(" + phase + "): " + scanner.Text() + env, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{ + JobID: jobID, + Seq: seq.Add(1), + TS: time.Now().UTC(), + Stream: api.LogStream(stream), + Payload: line, + }) + _ = r.tx.Send(env) + } +} + +// defaultShell returns the (binary, single-arg-flag) pair to use for +// ` "