agent+server: P2R-11 pre/post hook execution for backup jobs
Agent: new runner.BackupHooks struct + runHook helper invoked via /bin/sh -c (cmd.exe /C on Windows). pre_hook non-zero exit aborts the backup; post_hook always runs with RM_JOB_STATUS=succeeded|failed in env. Output streamed as 'hook(<phase>): …' log.stream lines. Hooks only run for kind=backup (other kinds skip both phases). Server: resolveBackupHooks resolves group → host default → empty, decrypts via crypto.AEAD with per-slot ad bytes, plumbs plaintext into CommandRunPayload for both schedule.fire and per-group Run-now dispatch sites. Decrypt failures degrade silently to no hook so a malformed blob can't poison every backup.
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
// hooks.go — pre/post backup hooks for the agent runner (P2R-11).
|
||||
//
|
||||
// Hooks fire only for backup jobs (the runner's other kinds —
|
||||
// init/forget/prune/check/unlock — call shell scripts that touch
|
||||
// repo internals; running operator hooks for those would be
|
||||
// surprising). Hook bodies arrive plaintext on the wire (server
|
||||
// decrypted before the WS push). The agent never persists them
|
||||
// to disk; they live in memory for the lifetime of one job.
|
||||
//
|
||||
// Failure semantics:
|
||||
// - pre_hook non-zero exit aborts the backup: the runner returns
|
||||
// the error, the job is recorded as failed, and the actual
|
||||
// restic invocation never runs.
|
||||
// - post_hook non-zero exit is logged with a warning prefix in
|
||||
// the job log but does NOT change the job status — the operator
|
||||
// wants the backup result preserved even if the cleanup step
|
||||
// misbehaved.
|
||||
//
|
||||
// Streaming: each line of the hook's stdout/stderr is shipped as a
|
||||
// log.stream envelope with payload prefixed `hook: ` so the live
|
||||
// log viewer can visually separate it from restic's own output.
|
||||
package runner
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
)
|
||||
|
||||
// runHook executes script via the host shell. status is the value
|
||||
// passed as RM_JOB_STATUS in the env (empty for pre-hooks; the
|
||||
// final job status — "succeeded" or "failed" — for post-hooks).
|
||||
// Returns an error iff the hook exited non-zero. ctx cancellation
|
||||
// kills the subprocess.
|
||||
func (r *Runner) runHook(ctx context.Context, jobID, phase, script, status string, seq *atomic.Int64) error {
|
||||
if script == "" {
|
||||
return nil
|
||||
}
|
||||
shell, flag := defaultShell()
|
||||
cmd := exec.CommandContext(ctx, shell, flag, script)
|
||||
cmd.Env = []string{
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
}
|
||||
if status != "" {
|
||||
cmd.Env = append(cmd.Env, "RM_JOB_STATUS="+status)
|
||||
}
|
||||
cmd.Env = append(cmd.Env, "RM_JOB_ID="+jobID, "RM_HOOK_PHASE="+phase)
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("hook %s: stdout pipe: %w", phase, err)
|
||||
}
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("hook %s: stderr pipe: %w", phase, err)
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return fmt.Errorf("hook %s: start: %w", phase, err)
|
||||
}
|
||||
done := make(chan struct{}, 2)
|
||||
go func() { r.pumpHookLines(stdout, "stdout", phase, jobID, seq); done <- struct{}{} }()
|
||||
go func() { r.pumpHookLines(stderr, "stderr", phase, jobID, seq); done <- struct{}{} }()
|
||||
<-done
|
||||
<-done
|
||||
if werr := cmd.Wait(); werr != nil {
|
||||
return fmt.Errorf("hook %s exited non-zero: %w", phase, werr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// pumpHookLines streams lines as log.stream envelopes prefixed with
|
||||
// "hook(<phase>): " so the live log can visually separate them.
|
||||
func (r *Runner) pumpHookLines(rd io.Reader, stream, phase, jobID string, seq *atomic.Int64) {
|
||||
scanner := bufio.NewScanner(rd)
|
||||
scanner.Buffer(make([]byte, 0, 64*1024), 256*1024)
|
||||
for scanner.Scan() {
|
||||
line := "hook(" + phase + "): " + scanner.Text()
|
||||
env, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
|
||||
JobID: jobID,
|
||||
Seq: seq.Add(1),
|
||||
TS: time.Now().UTC(),
|
||||
Stream: api.LogStream(stream),
|
||||
Payload: line,
|
||||
})
|
||||
_ = r.tx.Send(env)
|
||||
}
|
||||
}
|
||||
|
||||
// defaultShell returns the (binary, single-arg-flag) pair to use for
|
||||
// `<shell> <flag> "<script>"`. /bin/sh -c on Unix; cmd.exe /C on
|
||||
// Windows. The hook author writes whichever shell they prefer
|
||||
// inside the script body itself (PowerShell, bash, etc) — this is
|
||||
// just the bootstrap interpreter.
|
||||
func defaultShell() (string, string) {
|
||||
if runtime.GOOS == "windows" {
|
||||
return "cmd.exe", "/C"
|
||||
}
|
||||
return "/bin/sh", "-c"
|
||||
}
|
||||
Reference in New Issue
Block a user