Files
steve 02e4ef7544 testing: bootstrap UI, agent reliability, NS-01..04 + alert username
Smoothes the rough edges that came up exercising a live deployment.

First-run bootstrap UI: /bootstrap renders a username + password form
that uses the in-memory token directly (operator no longer copies it
out of the log); /login redirects there while bootstrap is available.

Agent reliability: failJob synthetic envelopes so command.run early
returns no longer hang the server-side job; runtime probe of restic
restore --help drives --no-ownership instead of version sniffing
(0.18.x had it removed). Server unit re-shaped: ProtectSystem=full
plus ReadWritePaths=/etc/restic-manager, no ProtectHome — restore
can now write anywhere a user might want.

Restore wizard: default target is /root/rm-restore/<job-id>/ with
clearer help text. Re-init confirm input uses .field (was .input,
which doesn't exist — text was invisible).

NS-01 host delete: store DeleteHost, admin-band /hosts/{id}/delete
with hostname-confirm danger zone, audit, FK cascade, live WS close.

NS-02 enrollment-token recovery: outstanding-tokens panel on
/hosts/new, regenerate (preserves attachments) and revoke handlers
+ audit, store-level ListOutstandingEnrollmentTokens and
DeleteEnrollmentToken.

NS-03 repo init / probe surface: migration 0020 adds
hosts.repo_status + repo_status_error; WS handler projects every
init job's outcome onto the host row (idempotent already-initialised
collapses to ready); creds-save resets status and dispatches a fresh
probe; /hosts/{id}/repo/probe retry endpoint with banner.

NS-04 dashboard live + sort + filter: query-string filter
(q/status/repo_status/tag/sort/dir), 5s htmx live poll mirroring the
alerts pattern with a localStorage live toggle, sortable column
headers, filter row + clear.

Alerts page: ack'd-by line resolves user_id ULID to username.

Compose.yaml ignored — host-specific.
2026-05-05 22:03:15 +01:00

272 lines
8.8 KiB
Go

package restic
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
)
// RestoreStatus mirrors the JSON `status` lines `restic restore --json`
// emits while restoring. Field names track restic's wire format; we
// project a subset (the rest are cosmetic).
type RestoreStatus struct {
MessageType string `json:"message_type"`
SecondsElapsed int64 `json:"seconds_elapsed"`
PercentDone float64 `json:"percent_done"`
TotalFiles int64 `json:"total_files"`
FilesRestored int64 `json:"files_restored"`
FilesSkipped int64 `json:"files_skipped"`
TotalBytes int64 `json:"total_bytes"`
BytesRestored int64 `json:"bytes_restored"`
BytesSkipped int64 `json:"bytes_skipped"`
}
// RestoreSummary is the final summary line emitted after a successful
// restore. Newer restic prints it; older clients leave us with no
// summary, in which case the agent skips the stats and the live UI
// just sees percent reach 100%.
type RestoreSummary struct {
MessageType string `json:"message_type"`
SecondsElapsed int64 `json:"seconds_elapsed"`
TotalFiles int64 `json:"total_files"`
FilesRestored int64 `json:"files_restored"`
FilesSkipped int64 `json:"files_skipped"`
TotalBytes int64 `json:"total_bytes"`
BytesRestored int64 `json:"bytes_restored"`
BytesSkipped int64 `json:"bytes_skipped"`
}
// RunRestore executes `restic restore <snapshotID> --target <dir>
// [--include <p>...]` with --json and pumps progress events into
// handle. paths is the operator-selected list (each becomes an
// `--include` flag); preserveOwner controls --no-ownership.
//
// inPlace toggles target semantics:
// - true → target is "/" and ownership is preserved
// - false → target is targetDir and --no-ownership is passed
//
// targetDir is created on demand by restic itself.
func (e Env) RunRestore(ctx context.Context, snapshotID string, paths []string, inPlace bool, targetDir string, handle LineHandler) (*RestoreSummary, error) {
if snapshotID == "" {
return nil, fmt.Errorf("restic restore: snapshot id required")
}
if !inPlace && targetDir == "" {
return nil, fmt.Errorf("restic restore: target dir required for non-in-place restore")
}
args := []string{"restore", "--json", snapshotID}
target := targetDir
if inPlace {
target = "/"
} else {
// Expand $HOME / ${HOME} / leading ~/ in the operator-supplied
// path, using the agent's own HOME (typically /root for the
// User=root unit). The expansion runs agent-side so the
// operator can specify a portable default like
// $HOME/rm-restore/<job-id>/ in the wizard without the server
// needing to know which user the agent runs as.
target = expandHome(target)
// Ensure the target directory exists. Restic itself creates
// missing leaves but won't traverse multiple missing levels
// (and we don't want the operator to have to pre-create the
// per-job subdir). 0700 keeps the data root-only — the agent
// runs as root, and operators who want a different mode can
// chmod after the fact. If MkdirAll fails (operator typed a
// path inside a read-only sandbox mount, ENOSPC, etc.) we
// surface a clean error rather than letting restic fail with
// something cryptic.
if err := os.MkdirAll(target, 0o700); err != nil {
return nil, fmt.Errorf("restic restore: prepare target %q: %w", target, err)
}
}
args = append(args, "--target", target)
// --no-ownership is nominally a restic 0.17+ flag, but at least
// one downstream 0.18.1 build still rejects it. We rely on a
// runtime probe captured at agent startup (see
// SupportsRestoreNoOwnership) rather than version sniffing.
// In-place restores always preserve ownership — that's the whole
// point of in-place — so we only add the flag for new-dir mode.
if !inPlace && e.SupportsRestoreNoOwnership {
args = append(args, "--no-ownership")
}
for _, p := range paths {
args = append(args, "--include", p)
}
cmd := e.resticCmd(ctx, args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("restic restore: stdout pipe: %w", err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, fmt.Errorf("restic restore: stderr pipe: %w", err)
}
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("restic restore: start: %w", err)
}
var summary *RestoreSummary
done := make(chan error, 2)
go func() { done <- pumpRestoreStdout(stdout, handle, &summary) }()
go func() { done <- pumpStderr(stderr, handle) }()
for i := 0; i < 2; i++ {
if err := <-done; err != nil && handle != nil {
handle("event", fmt.Sprintf("pump error: %v", err), nil)
}
}
werr := cmd.Wait()
if werr != nil {
var ee *exec.ExitError
if errors.As(werr, &ee) {
return summary, fmt.Errorf("restic restore: exit %d", ee.ExitCode())
}
return summary, fmt.Errorf("restic restore: %w", werr)
}
return summary, nil
}
// pumpRestoreStdout is the restore variant of pumpStdout: it emits
// `event` lines for the parsed status/summary objects (so the runner
// can shape them into job.progress) and forwards everything else as
// stdout — but unlike backup we include the raw status JSON in
// log.stream too because restore is short and the live log audience
// genuinely benefits from the per-file traffic. Actually — we mirror
// backup's behaviour and DROP raw status lines from log.stream
// (they'd drown the log on a fast restore); the progress envelope
// covers them.
func pumpRestoreStdout(r io.Reader, handle LineHandler, summary **RestoreSummary) error {
scanner := bufio.NewScanner(r)
scanner.Buffer(make([]byte, 0, 64*1024), 4*1024*1024)
for scanner.Scan() {
line := scanner.Text()
if handle == nil {
continue
}
if !strings.HasPrefix(line, "{") {
handle("stdout", line, nil)
continue
}
var probe struct {
MessageType string `json:"message_type"`
}
if err := json.Unmarshal([]byte(line), &probe); err != nil {
handle("stdout", line, nil)
continue
}
switch probe.MessageType {
case "status":
var ev RestoreStatus
if json.Unmarshal([]byte(line), &ev) == nil {
// Don't tee status lines to log.stream — too chatty.
handle("event", line, ev)
continue
}
case "summary":
var ev RestoreSummary
if json.Unmarshal([]byte(line), &ev) == nil {
if summary != nil {
s := ev
*summary = &s
}
handle("event", line, ev)
continue
}
case "verbose_status":
handle("event", line, nil)
continue
}
handle("stdout", line, nil)
}
return scanner.Err()
}
// expandHome rewrites $HOME, ${HOME}, or a leading ~/ in p to the
// agent process's home directory. Other env-var references are left
// untouched on purpose (operator-supplied paths shouldn't be able to
// pick up arbitrary agent env values like $PATH or $RESTIC_PASSWORD).
// Returns p unchanged if HOME can't be resolved.
func expandHome(p string) string {
if p == "" {
return p
}
home, err := os.UserHomeDir()
if err != nil || home == "" {
return p
}
switch {
case strings.HasPrefix(p, "$HOME/"):
return filepath.Join(home, p[len("$HOME/"):])
case p == "$HOME":
return home
case strings.HasPrefix(p, "${HOME}/"):
return filepath.Join(home, p[len("${HOME}/"):])
case p == "${HOME}":
return home
case strings.HasPrefix(p, "~/"):
return filepath.Join(home, p[2:])
case p == "~":
return home
}
return p
}
// RunDiff executes `restic diff --json <a> <b>` and forwards every
// line to handle as stdout. Restic emits per-line "change" objects
// plus a final "statistics" object; we don't parse them server-side —
// the operator reads the raw output on the live job log page.
func (e Env) RunDiff(ctx context.Context, snapshotA, snapshotB string, handle LineHandler) error {
if snapshotA == "" || snapshotB == "" {
return fmt.Errorf("restic diff: two snapshot ids required")
}
cmd := e.resticCmd(ctx, "diff", "--json", snapshotA, snapshotB)
stdout, err := cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("restic diff: stdout pipe: %w", err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
return fmt.Errorf("restic diff: stderr pipe: %w", err)
}
if err := cmd.Start(); err != nil {
return fmt.Errorf("restic diff: start: %w", err)
}
done := make(chan error, 2)
// diff output isn't huge; pumpStderr-ish line-by-line forwarding
// is fine.
go func() {
s := bufio.NewScanner(stdout)
s.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for s.Scan() {
if handle != nil {
handle("stdout", s.Text(), nil)
}
}
done <- s.Err()
}()
go func() { done <- pumpStderr(stderr, handle) }()
for i := 0; i < 2; i++ {
if err := <-done; err != nil && handle != nil {
handle("event", fmt.Sprintf("pump error: %v", err), nil)
}
}
werr := cmd.Wait()
if werr != nil {
var ee *exec.ExitError
if errors.As(werr, &ee) {
return fmt.Errorf("restic diff: exit %d", ee.ExitCode())
}
return fmt.Errorf("restic diff: %w", werr)
}
return nil
}