Files
restic-manager/internal/api/messages.go
T
steve 265b4b6c5d P3-03: restic restore + diff execution path
Wires JobRestore and JobDiff end-to-end at the agent layer (the wizard
backend that drives this lands in the next slice).

- internal/api: JobRestore + JobDiff JobKind constants. CommandRunPayload
  grows nullable Restore + Diff sub-payloads. RestorePayload carries
  snapshot_id, paths, in_place, target_dir; DiffPayload carries
  snapshot_a + snapshot_b.
- internal/restic.RunRestore wraps 'restic restore <sid> --target ...
  [--no-ownership] [--include p]...' with --json. New pumpRestoreStdout
  parses the per-line status / summary objects (drops raw status from
  log.stream — the throttled job.progress envelope covers it). New
  RestoreStatus + RestoreSummary types mirror restic's wire shape.
- internal/restic.RunDiff wraps 'restic diff --json <a> <b>'.
- internal/agent/runner: RunRestore translates RestoreStatus into
  job.progress (mapping FilesRestored → FilesDone etc) with a small
  estimateETA helper since restic doesn't provide ETA for restore.
  RunDiff is a thin streamHandler wrapper.
- cmd/agent dispatcher gains JobRestore + JobDiff cases. Both reuse
  the spawn() helper from P3-X1 so cancel just works.
- Drive-by fix: lastProgress was initialised to time.Now() so the
  very first status event was suppressed by the 1s throttle if the
  agent reported quickly. Initialise to time.Time{} (zero) so the
  first event always emits. Affects backup + restore.

Tests:
- restore_test covers restore happy path (started → progress →
  finished, kind=restore on the started envelope), in-place argv
  asserts no --no-ownership, new-dir argv asserts --no-ownership +
  --target + --include, diff produces the expected log.stream lines.

Restage block (CLAUDE.md) is deferred to the end of the restore
sub-phase so we restage once with all changes.
2026-05-04 15:24:14 +01:00

406 lines
17 KiB
Go

package api
import (
"encoding/json"
"time"
)
// HostOS / HostArch are constrained string types. The store stores them
// raw, but agent metadata collection should populate them from these
// constants so we don't end up with both "linux" and "Linux" rows.
type HostOS string
// Allowed values for HostOS. Lowercased on the wire so the server
// can use a single CHECK constraint.
const (
OSLinux HostOS = "linux"
OSWindows HostOS = "windows"
)
// HostArch is the agent's CPU architecture; same lowercase-on-wire
// rule as HostOS.
type HostArch string
// Allowed values for HostArch.
const (
ArchAmd64 HostArch = "amd64"
ArchArm64 HostArch = "arm64"
)
// HelloPayload is the agent's first message after WS auth. The server
// upserts a Host row, marks it online, and (if protocol_version is
// acceptable) responds with a config.update + schedule.set burst.
type HelloPayload struct {
ProtocolVersion int `json:"protocol_version"`
AgentVersion string `json:"agent_version"`
ResticVersion string `json:"restic_version"`
Hostname string `json:"hostname"`
OS HostOS `json:"os"`
Arch HostArch `json:"arch"`
BootTime time.Time `json:"boot_time,omitempty"`
}
// HeartbeatPayload is sent by the agent every 30s. It carries no data
// today; presence is the signal. Future fields (load, free disk) can
// land here without bumping protocol_version.
type HeartbeatPayload struct {
SentAt time.Time `json:"sent_at"`
}
// JobKind is the operation an agent is being asked to run, or just ran.
type JobKind string
// Allowed JobKind values. backup is operator/cron driven; init runs
// once per host on first connect; forget/prune/check fire from the
// server-side maintenance ticker; unlock and restore are operator-
// only; diff is operator-only and read-only.
const (
JobBackup JobKind = "backup"
JobInit JobKind = "init"
JobForget JobKind = "forget"
JobPrune JobKind = "prune"
JobCheck JobKind = "check"
JobUnlock JobKind = "unlock"
JobRestore JobKind = "restore"
JobDiff JobKind = "diff"
)
// JobStatus is the lifecycle state of a job.
type JobStatus string
// Allowed JobStatus values. queued → running → one of {succeeded,
// failed, JobCancelled} as a terminal state. The wire/DB literal for
// the JobCancelled value uses UK spelling — don't "fix" it; existing
// job rows + agent payloads will mismatch. //nolint:misspell
const (
JobQueued JobStatus = "queued"
JobRunning JobStatus = "running"
JobSucceeded JobStatus = "succeeded"
JobFailed JobStatus = "failed"
JobCancelled JobStatus = "cancelled" //nolint:misspell // wire format
)
// ForgetPolicyJSON is the wire shape of a per-group retention policy
// shipped with a forget command.run. Mirrors store.RetentionPolicy
// JSON tags exactly so a future caller could json-roundtrip between
// the two without reshaping. All fields nullable; an empty struct is
// rejected by the agent (restic refuses to forget without --keep-*).
type ForgetPolicyJSON struct {
KeepLast *int `json:"keep_last,omitempty"`
KeepHourly *int `json:"keep_hourly,omitempty"`
KeepDaily *int `json:"keep_daily,omitempty"`
KeepWeekly *int `json:"keep_weekly,omitempty"`
KeepMonthly *int `json:"keep_monthly,omitempty"`
KeepYearly *int `json:"keep_yearly,omitempty"`
}
// ForgetGroup is one (tag, retention) pair shipped to the agent in a
// forget command.run. The agent invokes
// `restic forget --tag <Tag> --keep-* …` once per group, with each
// group's own policy. The Tag is the source-group name (which is
// also the snapshot tag carried at backup time).
type ForgetGroup struct {
Tag string `json:"tag"`
Policy ForgetPolicyJSON `json:"policy"`
}
// CommandRunPayload is the server → agent dispatch for a run-now job.
//
// For kind=backup, Includes/Excludes/Tag are populated from the source
// group the operator (or schedule) targeted; the agent runs one restic
// backup invocation per command.run, tagging the snapshot with Tag (=
// the source group's name) so retention can target it later via
// `restic forget --tag`.
//
// For kind=forget, ForgetGroups carries one entry per source-group on
// the host that has a non-empty retention policy. The agent walks the
// list and runs `restic forget --tag <Tag> --keep-* …` per group.
//
// Args is preserved as a generic free-form slice for kinds that don't
// fit the structured fields (e.g. unlock takes none; init takes none;
// check carries the subset% as Args[0]).
//
// RequiresAdminCreds tells the agent to load the admin slot of its
// secrets store rather than the everyday repo slot. Set by the server
// only for prune (the only kind that needs delete authority on a
// rest-server repo today).
type CommandRunPayload struct {
JobID string `json:"job_id"`
Kind JobKind `json:"kind"`
Args []string `json:"args,omitempty"`
Includes []string `json:"includes,omitempty"`
Excludes []string `json:"excludes,omitempty"`
Tag string `json:"tag,omitempty"`
ForgetGroups []ForgetGroup `json:"forget_groups,omitempty"`
RequiresAdminCreds bool `json:"requires_admin_creds,omitempty"`
// Per-job bandwidth caps in KB/s. When nil, the agent uses the
// host-wide caps it received via config.update. When non-nil,
// the override wins for this job only — even a non-nil zero
// pointer means "no cap for this job" (caller's explicit choice).
BandwidthUpKBps *int `json:"bandwidth_up_kbps,omitempty"`
BandwidthDownKBps *int `json:"bandwidth_down_kbps,omitempty"`
// Hooks run only for kind=backup. Server resolves source-group
// hook → host default → empty before dispatching, so the agent
// just executes whatever is here.
PreHook string `json:"pre_hook,omitempty"`
PostHook string `json:"post_hook,omitempty"`
// Restore is populated only for kind=restore. See RestorePayload
// for the shape; nil for every other kind.
Restore *RestorePayload `json:"restore,omitempty"`
// Diff is populated only for kind=diff. See DiffPayload for
// shape; nil for every other kind.
Diff *DiffPayload `json:"diff,omitempty"`
}
// RestorePayload carries restore-specific arguments on a JobRestore
// command.run. Paths are absolute paths inside the snapshot (same
// shape restic accepts as positional args). When InPlace is true the
// agent restores at root (`--target /`) and preserves uid/gid/mode;
// otherwise it restores into TargetDir with --no-ownership so the
// operator can inspect the files as the agent user.
type RestorePayload struct {
SnapshotID string `json:"snapshot_id"`
Paths []string `json:"paths"`
InPlace bool `json:"in_place"`
TargetDir string `json:"target_dir,omitempty"` // ignored when in_place=true
}
// DiffPayload carries snapshot-diff arguments on a JobDiff command.run.
// SnapshotA / SnapshotB may be either short or long IDs; restic
// accepts both.
type DiffPayload struct {
SnapshotA string `json:"snapshot_a"`
SnapshotB string `json:"snapshot_b"`
}
// CommandCancelPayload is the server → agent cancel signal.
type CommandCancelPayload struct {
JobID string `json:"job_id"`
}
// CommandResultPayload acks a command.run dispatch (the agent has
// accepted the job and persisted it locally) — this is *not* the job
// completion. job.finished signals that.
type CommandResultPayload struct {
JobID string `json:"job_id"`
Accepted bool `json:"accepted"`
Error string `json:"error,omitempty"`
}
// JobStartedPayload — agent has begun execution.
type JobStartedPayload struct {
JobID string `json:"job_id"`
Kind JobKind `json:"kind"`
StartedAt time.Time `json:"started_at"`
}
// JobProgressPayload — agent's periodic status while a job is running.
// Field set chosen to match what restic --json emits for `backup`;
// other kinds populate the subset that makes sense.
type JobProgressPayload struct {
JobID string `json:"job_id"`
PercentDone float64 `json:"percent_done"`
FilesDone int64 `json:"files_done"`
TotalFiles int64 `json:"total_files"`
BytesDone int64 `json:"bytes_done"`
TotalBytes int64 `json:"total_bytes"`
ETASeconds int64 `json:"eta_seconds"`
ThroughputBps int64 `json:"throughput_bps"`
}
// JobFinishedPayload — agent reports terminal state.
type JobFinishedPayload struct {
JobID string `json:"job_id"`
Status JobStatus `json:"status"`
ExitCode int `json:"exit_code"`
FinishedAt time.Time `json:"finished_at"`
Stats json.RawMessage `json:"stats,omitempty"` // restic summary blob
Error string `json:"error,omitempty"`
}
// LogStreamLine is one entry of the live job log.
type LogStreamLine struct {
JobID string `json:"job_id"`
Seq int64 `json:"seq"`
TS time.Time `json:"ts"`
Stream LogStream `json:"stream"`
Payload string `json:"payload"`
}
// LogStream identifies which channel a log line came from.
type LogStream string
// Allowed LogStream values. stdout/stderr are passed through verbatim;
// event is the parsed restic --json envelope (summary, error, etc).
const (
LogStdout LogStream = "stdout"
LogStderr LogStream = "stderr"
LogEvent LogStream = "event" // parsed restic --json event
)
// SnapshotsReportPayload — agent dumps its full snapshot list after
// each successful backup, so the server can refresh its projection.
type SnapshotsReportPayload struct {
Snapshots []Snapshot `json:"snapshots"`
}
// Snapshot is the projection mirrored from `restic snapshots --json`.
// SizeBytes / FileCount come from the embedded summary block on
// restic 0.16+; older clients leave them at zero (the UI degrades
// gracefully).
type Snapshot struct {
ID string `json:"id"` // long restic snapshot ID
ShortID string `json:"short_id"` // 8-hex-char form
Time time.Time `json:"time"`
Hostname string `json:"hostname"`
Paths []string `json:"paths"`
Tags []string `json:"tags,omitempty"`
SizeBytes int64 `json:"size_bytes,omitempty"`
FileCount int64 `json:"file_count,omitempty"`
}
// RepoStatsPayload carries a partial-update snapshot of repo health
// facts, shipped by the agent after prune/check/unlock or a periodic
// stats refresh. Pointer fields follow omitempty semantics: a nil
// pointer means "no update for this field" and is omitted on the
// wire; the server merges only the non-nil fields into its
// host_repo_stats row (matching UpsertHostRepoStats partial-update
// semantics). Non-pointer fields (LastCheckStatus) use the empty
// string as the "no update" sentinel.
type RepoStatsPayload struct {
TotalSizeBytes *int64 `json:"total_size_bytes,omitempty"`
RawSizeBytes *int64 `json:"raw_size_bytes,omitempty"`
UniqueFiles *int64 `json:"unique_files,omitempty"`
SnapshotCount *int64 `json:"snapshot_count,omitempty"`
LastCheckAt *time.Time `json:"last_check_at,omitempty"`
LastCheckStatus string `json:"last_check_status,omitempty"`
LockPresent *bool `json:"lock_present,omitempty"`
LastPruneAt *time.Time `json:"last_prune_at,omitempty"`
LastPruneFreedBytes *int64 `json:"last_prune_freed_bytes,omitempty"`
}
// Schedule is the agent-facing view of a slim Schedule row plus its
// resolved bundle of source groups. The agent's cron only needs to know
// when to fire (CronExpr + Enabled) and which schedule fired (ID); the
// SourceGroups are carried for forensic logs and so a future agent that
// elects to dispatch jobs locally has the data, but the server-side
// dispatch path uses the schedule's group list directly. Manual
// schedules are gone — Run-now targets a source group, not a schedule.
type Schedule struct {
ID string `json:"id"`
CronExpr string `json:"cron_expr"`
Enabled bool `json:"enabled"`
SourceGroups []ScheduleSourceGroup `json:"source_groups,omitempty"`
}
// ScheduleSourceGroup is the resolved-at-push-time view of a source
// group attached to a schedule. The agent doesn't need source_group_id
// — Name is the snapshot tag and is unique per host.
type ScheduleSourceGroup struct {
Name string `json:"name"`
Includes []string `json:"includes,omitempty"`
Excludes []string `json:"excludes,omitempty"`
RetentionPolicy json.RawMessage `json:"retention_policy,omitempty"`
RetryMax int `json:"retry_max,omitempty"`
RetryBackoffSeconds int `json:"retry_backoff_seconds,omitempty"`
}
// ScheduleSetPayload — server pushes the full canonical schedule list
// for a host. Agent reconciles its local cron and replies with
// ScheduleAckPayload carrying the same Version. An empty Schedules
// list is a valid push that disables every cron entry.
type ScheduleSetPayload struct {
Version int64 `json:"version"`
Schedules []Schedule `json:"schedules"`
}
// ScheduleAckPayload — agent confirms it has applied a given version.
type ScheduleAckPayload struct {
Version int64 `json:"version"`
AppliedAt time.Time `json:"applied_at"`
}
// ScheduleFirePayload — agent reports a local cron entry just fired.
// Server is expected to look up the schedule, build a CommandRun
// payload from it, persist a job row, and return MsgCommandRun on
// the same connection. ScheduledAt is the wall-clock time the
// agent's cron fired (audit / forensic value when network jitter
// pushes the actual command.run dispatch later).
type ScheduleFirePayload struct {
ScheduleID string `json:"schedule_id"`
ScheduledAt time.Time `json:"scheduled_at"`
}
// ConfigUpdatePayload — server pushes per-host config (currently just
// repo connection details). Empty fields mean "leave existing alone";
// to clear something, send an explicit zero value.
//
// Slot picks which secrets-store slot the agent writes the creds to.
// Empty / "repo" = everyday repo creds (default). "admin" = the
// prune-capable admin user (separate slot — not loaded for backups).
// Forwards-compatible: an agent that ignores Slot simply writes to the
// repo slot and admin pushes become no-ops.
type ConfigUpdatePayload struct {
RepoURL string `json:"repo_url,omitempty"`
RepoPassword string `json:"repo_password,omitempty"` // sensitive
RepoUsername string `json:"repo_username,omitempty"`
RepoCredential string `json:"repo_credential,omitempty"` // sensitive (for rest server basic auth)
HookShell string `json:"hook_shell,omitempty"`
Slot string `json:"slot,omitempty"`
// Bandwidth caps in KB/s. Pointer semantics so the server can
// disambiguate "no change in this push" (nil → omitted on the
// wire) from "explicitly clear the cap" (zero or negative value).
// Applied to every restic invocation as --limit-upload /
// --limit-download. Per-job overrides ride on CommandRunPayload.
BandwidthUpKBps *int `json:"bandwidth_up_kbps,omitempty"`
BandwidthDownKBps *int `json:"bandwidth_down_kbps,omitempty"`
}
// AgentUpdateAvailablePayload — informational only; the agent does
// NOT self-update. See spec.md §4.2 for the package-manager-based
// update model.
type AgentUpdateAvailablePayload struct {
LatestVersion string `json:"latest_version"`
PackageURL string `json:"package_url"` // apt repo / choco source
Changelog string `json:"changelog,omitempty"`
}
// TreeListRequestPayload is the body of a tree.list RPC. Used by the
// restore wizard to lazy-load directory contents from a snapshot.
//
// The exchange is synchronous: the server marshals MsgTreeList with a
// fresh Envelope.ID, sends to the agent, blocks on a channel keyed by
// that ID. The agent runs `restic ls --json <SnapshotID> <Path>`,
// emits direct children, and replies with MsgTreeListResult carrying
// the same ID. The server-side handler matches on ID and forwards to
// the waiting channel. See internal/server/ws/rpc.go for the helper.
type TreeListRequestPayload struct {
SnapshotID string `json:"snapshot_id"`
Path string `json:"path"` // absolute path inside the snapshot, "/" for root
}
// TreeListEntry is one direct child returned by a tree.list call.
// Type is "dir" | "file" | "symlink"; size is best-effort (zero on
// directories and symlinks).
type TreeListEntry struct {
Name string `json:"name"`
Type string `json:"type"`
Size int64 `json:"size,omitempty"`
}
// TreeListResultPayload is the reply to a tree.list. Error is set
// when the agent couldn't fulfill the request (missing snapshot,
// path doesn't exist, restic invocation failed); Entries is empty in
// that case. A successful empty directory has Error="" + nil Entries.
type TreeListResultPayload struct {
SnapshotID string `json:"snapshot_id"`
Path string `json:"path"`
Entries []TreeListEntry `json:"entries,omitempty"`
Error string `json:"error,omitempty"`
}