P3-03: restic restore + diff execution path

Wires JobRestore and JobDiff end-to-end at the agent layer (the wizard
backend that drives this lands in the next slice).

- internal/api: JobRestore + JobDiff JobKind constants. CommandRunPayload
  grows nullable Restore + Diff sub-payloads. RestorePayload carries
  snapshot_id, paths, in_place, target_dir; DiffPayload carries
  snapshot_a + snapshot_b.
- internal/restic.RunRestore wraps 'restic restore <sid> --target ...
  [--no-ownership] [--include p]...' with --json. New pumpRestoreStdout
  parses the per-line status / summary objects (drops raw status from
  log.stream — the throttled job.progress envelope covers it). New
  RestoreStatus + RestoreSummary types mirror restic's wire shape.
- internal/restic.RunDiff wraps 'restic diff --json <a> <b>'.
- internal/agent/runner: RunRestore translates RestoreStatus into
  job.progress (mapping FilesRestored → FilesDone etc) with a small
  estimateETA helper since restic doesn't provide ETA for restore.
  RunDiff is a thin streamHandler wrapper.
- cmd/agent dispatcher gains JobRestore + JobDiff cases. Both reuse
  the spawn() helper from P3-X1 so cancel just works.
- Drive-by fix: lastProgress was initialised to time.Now() so the
  very first status event was suppressed by the 1s throttle if the
  agent reported quickly. Initialise to time.Time{} (zero) so the
  first event always emits. Affects backup + restore.

Tests:
- restore_test covers restore happy path (started → progress →
  finished, kind=restore on the started envelope), in-place argv
  asserts no --no-ownership, new-dir argv asserts --no-ownership +
  --target + --include, diff produces the expected log.stream lines.

Restage block (CLAUDE.md) is deferred to the end of the restore
sub-phase so we restage once with all changes.
This commit is contained in:
2026-05-04 15:24:14 +01:00
parent 13f58bd052
commit f5e3bca6a2
6 changed files with 611 additions and 9 deletions
+233
View File
@@ -0,0 +1,233 @@
package runner
import (
"context"
"strings"
"testing"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
)
// TestRunRestoreShipsExpectedEnvelopes: a fake restic emits a couple
// of restore status lines and a summary; the runner translates them
// into job.progress envelopes and finishes the job successfully.
func TestRunRestoreShipsExpectedEnvelopes(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
restore)
echo '{"message_type":"status","seconds_elapsed":1,"percent_done":0.5,"total_files":10,"files_restored":5,"total_bytes":1000,"bytes_restored":500}'
echo '{"message_type":"status","seconds_elapsed":2,"percent_done":1.0,"total_files":10,"files_restored":10,"total_bytes":1000,"bytes_restored":1000}'
echo '{"message_type":"summary","seconds_elapsed":2,"total_files":10,"files_restored":10,"total_bytes":1000,"bytes_restored":1000}'
;;
*)
echo "unknown: $*" ;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
if err := r.RunRestore(context.Background(), "job-r1", "f3a7b2c1",
[]string{"/etc/nginx/sites-available/alfa.conf"},
false, "/tmp/restore-out"); err != nil {
t.Fatalf("RunRestore: %v", err)
}
// Confirm landmarks: started → progress → finished.
order := envelopeOrder(tx.envs)
wants := []api.MessageType{api.MsgJobStarted, api.MsgJobProgress, api.MsgJobFinished}
positions := map[api.MessageType]int{}
for i, mt := range order {
if _, seen := positions[mt]; !seen {
positions[mt] = i
}
}
for i := 0; i < len(wants)-1; i++ {
a, b := wants[i], wants[i+1]
pa, aOK := positions[a]
pb, bOK := positions[b]
if !aOK {
t.Fatalf("envelope %q not found in %v", a, order)
}
if !bOK {
t.Fatalf("envelope %q not found in %v", b, order)
}
if pa >= pb {
t.Fatalf("expected %q before %q (positions %d, %d)", a, b, pa, pb)
}
}
// Started carries the right kind.
startEnv := firstEnvOfType(t, tx.envs, api.MsgJobStarted)
var startP api.JobStartedPayload
if err := startEnv.UnmarshalPayload(&startP); err != nil {
t.Fatalf("unmarshal started: %v", err)
}
if startP.Kind != api.JobRestore {
t.Fatalf("kind: got %q want %q", startP.Kind, api.JobRestore)
}
// Finished is succeeded.
finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished)
var finP api.JobFinishedPayload
if err := finEnv.UnmarshalPayload(&finP); err != nil {
t.Fatalf("unmarshal finished: %v", err)
}
if finP.Status != api.JobSucceeded {
t.Fatalf("status: got %q want %q", finP.Status, api.JobSucceeded)
}
// Progress envelope reflects the last status line: 100% with 10 files.
progEnv := firstEnvOfType(t, tx.envs, api.MsgJobProgress)
var progP api.JobProgressPayload
if err := progEnv.UnmarshalPayload(&progP); err != nil {
t.Fatalf("unmarshal progress: %v", err)
}
// First progress will be from line 1 (50%) since we send first status
// immediately. Verify we at least see a sensible value.
if progP.PercentDone <= 0 {
t.Fatalf("expected non-zero progress, got %v", progP.PercentDone)
}
if progP.FilesDone <= 0 || progP.TotalFiles <= 0 {
t.Fatalf("expected file counters set, got %+v", progP)
}
}
// TestRunRestoreInPlaceArgvHasNoNoOwnership: indirectly verifies that
// in-place mode doesn't pass --no-ownership. We can't see the actual
// argv without a custom test harness, so we use a fake restic that
// echoes its args and check the captured log.stream.
func TestRunRestoreInPlaceArgvHasNoNoOwnership(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
restore)
# Print all args on stderr so they're forwarded as log.stream.
echo "argv: $*" 1>&2
echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}'
;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
if err := r.RunRestore(context.Background(), "job-r2", "abc",
nil, true, ""); err != nil {
t.Fatalf("RunRestore: %v", err)
}
// Reconstruct the argv from the captured stderr log line.
var argv string
for _, e := range tx.envs {
if e.Type == api.MsgLogStream {
var p api.LogStreamLine
_ = e.UnmarshalPayload(&p)
if p.Stream == api.LogStderr && strings.HasPrefix(p.Payload, "argv:") {
argv = p.Payload
break
}
}
}
if argv == "" {
t.Fatal("never captured argv echo from fake restic")
}
if strings.Contains(argv, "--no-ownership") {
t.Errorf("in-place restore should NOT pass --no-ownership; got argv=%q", argv)
}
if !strings.Contains(argv, "--target /") {
t.Errorf("in-place restore should pass --target /; got argv=%q", argv)
}
}
// TestRunRestoreNewDirArgvHasNoOwnership: complement of the above —
// non-in-place restore must include --no-ownership.
func TestRunRestoreNewDirArgvHasNoOwnership(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
restore)
echo "argv: $*" 1>&2
echo '{"message_type":"summary","seconds_elapsed":0,"total_files":0,"files_restored":0,"total_bytes":0,"bytes_restored":0}'
;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
if err := r.RunRestore(context.Background(), "job-r3", "abc",
[]string{"/etc/foo"}, false, "/tmp/restore-out"); err != nil {
t.Fatalf("RunRestore: %v", err)
}
var argv string
for _, e := range tx.envs {
if e.Type == api.MsgLogStream {
var p api.LogStreamLine
_ = e.UnmarshalPayload(&p)
if p.Stream == api.LogStderr && strings.HasPrefix(p.Payload, "argv:") {
argv = p.Payload
break
}
}
}
if argv == "" {
t.Fatal("no argv echo")
}
if !strings.Contains(argv, "--no-ownership") {
t.Errorf("new-dir restore should pass --no-ownership; got argv=%q", argv)
}
if !strings.Contains(argv, "--target /tmp/restore-out") {
t.Errorf("expected --target /tmp/restore-out; got argv=%q", argv)
}
if !strings.Contains(argv, "--include /etc/foo") {
t.Errorf("expected --include /etc/foo; got argv=%q", argv)
}
}
// TestRunDiffShipsLogLines: diff output is forwarded as log.stream.
func TestRunDiffShipsLogLines(t *testing.T) {
t.Parallel()
bin := setupScript(t, `
case "$1" in
diff)
echo '{"message_type":"change","path":"/etc/nginx/nginx.conf","modifier":"M"}'
echo '{"message_type":"statistics","added":{"files":0,"dirs":0}}'
;;
esac
`)
tx := &fakeSender{}
r := New(Config{ResticBin: bin}, tx, 0)
if err := r.RunDiff(context.Background(), "job-d1", "snap-a", "snap-b"); err != nil {
t.Fatalf("RunDiff: %v", err)
}
startEnv := firstEnvOfType(t, tx.envs, api.MsgJobStarted)
var startP api.JobStartedPayload
_ = startEnv.UnmarshalPayload(&startP)
if startP.Kind != api.JobDiff {
t.Fatalf("kind: got %q want %q", startP.Kind, api.JobDiff)
}
finEnv := firstEnvOfType(t, tx.envs, api.MsgJobFinished)
var finP api.JobFinishedPayload
_ = finEnv.UnmarshalPayload(&finP)
if finP.Status != api.JobSucceeded {
t.Fatalf("status: %q", finP.Status)
}
// At least one log line should carry the change payload.
var sawChange bool
for _, e := range tx.envs {
if e.Type != api.MsgLogStream {
continue
}
var p api.LogStreamLine
_ = e.UnmarshalPayload(&p)
if strings.Contains(p.Payload, `"message_type":"change"`) {
sawChange = true
}
}
if !sawChange {
t.Fatal("never saw a change log line in diff output")
}
}
+97 -1
View File
@@ -156,7 +156,7 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t
}
env := r.resticEnv()
lastProgress := time.Now()
lastProgress := time.Time{} // zero time → first status event always emits
handle := func(stream string, line string, ev any) {
// Throttled progress events come from restic's `status` JSON.
@@ -359,6 +359,102 @@ func (r *Runner) RunCheck(ctx context.Context, jobID string, subsetPct int) erro
return nil
}
// RunRestore executes a restic restore job and reports back via the
// sender. paths is the operator-selected file/dir list to restore.
// inPlace=true preserves uid/gid/mode and writes at "/"; inPlace=false
// writes at targetDir with --no-ownership.
//
// Status events from restic are throttled into job.progress in the
// same shape as backup; raw status lines are dropped from log.stream
// (they would drown the log on a fast restore — the progress widget
// already covers them).
func (r *Runner) RunRestore(ctx context.Context, jobID, snapshotID string, paths []string, inPlace bool, targetDir string) error {
startedAt := time.Now().UTC()
r.sendStarted(jobID, api.JobRestore, startedAt)
env := r.resticEnv()
var seq atomic.Int64
lastProgress := time.Time{} // zero time → first status event always emits
handle := func(stream string, line string, ev any) {
status, isStatus := ev.(restic.RestoreStatus)
if !isStatus {
now := time.Now().UTC()
logEnv, _ := api.Marshal(api.MsgLogStream, "", api.LogStreamLine{
JobID: jobID,
Seq: seq.Add(1),
TS: now,
Stream: api.LogStream(stream),
Payload: line,
})
_ = r.tx.Send(logEnv)
}
if isStatus {
if time.Since(lastProgress) < r.progressMinPeriod {
return
}
lastProgress = time.Now()
progEnv, _ := api.Marshal(api.MsgJobProgress, jobID, api.JobProgressPayload{
JobID: jobID,
PercentDone: status.PercentDone,
FilesDone: status.FilesRestored,
TotalFiles: status.TotalFiles,
BytesDone: status.BytesRestored,
TotalBytes: status.TotalBytes,
ETASeconds: estimateETA(status.BytesRestored, status.TotalBytes, status.SecondsElapsed),
ThroughputBps: throughput(status.BytesRestored, status.SecondsElapsed),
})
_ = r.tx.Send(progEnv)
}
}
summary, err := env.RunRestore(ctx, snapshotID, paths, inPlace, targetDir, handle)
finishedAt := time.Now().UTC()
var statsBlob json.RawMessage
if summary != nil {
statsBlob, _ = json.Marshal(summary)
}
r.sendFinished(ctx, jobID, finishedAt, err, statsBlob)
if err != nil {
return fmt.Errorf("runner restore: %w", err)
}
return nil
}
// estimateETA computes an ETA in seconds based on current bytes
// progress + elapsed seconds. Restic restore's --json doesn't emit an
// ETA field of its own (unlike backup), so we approximate by linear
// extrapolation. Returns 0 when we don't have enough data.
func estimateETA(bytesDone, totalBytes, secondsElapsed int64) int64 {
if bytesDone <= 0 || totalBytes <= 0 || secondsElapsed <= 0 || bytesDone >= totalBytes {
return 0
}
rate := float64(bytesDone) / float64(secondsElapsed)
if rate <= 0 {
return 0
}
return int64(float64(totalBytes-bytesDone) / rate)
}
// RunDiff executes `restic diff --json <a> <b>` and forwards output
// as log.stream lines. No snapshot-list refresh, no stats update —
// diff is purely informational.
func (r *Runner) RunDiff(ctx context.Context, jobID, snapshotA, snapshotB string) error {
startedAt := time.Now().UTC()
r.sendStarted(jobID, api.JobDiff, startedAt)
env := r.resticEnv()
var seq atomic.Int64
err := env.RunDiff(ctx, snapshotA, snapshotB, r.streamHandler(jobID, &seq))
finishedAt := time.Now().UTC()
r.sendFinished(ctx, jobID, finishedAt, err, nil)
if err != nil {
return fmt.Errorf("runner diff: %w", err)
}
return nil
}
// RunUnlock executes a `restic unlock` job. On success it ships a
// repo.stats envelope with LockPresent=false so the UI banner clears.
func (r *Runner) RunUnlock(ctx context.Context, jobID string) error {