From 8d5282a18072f020cfdb093c42f8b853af5ce774 Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Fri, 1 May 2026 11:20:57 +0100 Subject: [PATCH] P1-22: snapshot listing via restic snapshots --json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent calls restic snapshots --json after each successful backup (60s timeout, separate from the backup ctx) and ships the projection over the existing snapshots.report WS envelope. Failure here is logged but doesn't fail the job — the next successful backup catches the projection up. Server-side ReplaceHostSnapshots is delete-then-insert plus a hosts.snapshot_count update in one transaction so the dashboard's per-host count stays consistent with the projection. New read endpoint GET /api/hosts/{id}/snapshots returns the cached list with a refreshed_at marker so the UI can show staleness when an agent has been offline. Schema: dropped the unused snapshots.repo_id FK (repos as a first-class entity is P2 work), added short_id and refreshed_at columns, switched the time index to DESC for the most-recent-first list query. api.Snapshot gains short_id; size_bytes/file_count come from the embedded summary block on restic 0.16+ and stay zero on older clients. Tests cover round-trip, authoritative replacement after forget+prune shrinkage, and empty-after-wipe. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/agent/runner/runner.go | 48 +++++++ internal/api/messages.go | 6 +- internal/restic/snapshots.go | 58 ++++++++ internal/server/http/snapshots.go | 82 +++++++++++ internal/server/ws/handler.go | 29 +++- internal/store/migrations/0001_initial.sql | 14 +- internal/store/snapshots.go | 128 +++++++++++++++++ internal/store/snapshots_test.go | 153 +++++++++++++++++++++ tasks.md | 6 +- 9 files changed, 514 insertions(+), 10 deletions(-) create mode 100644 internal/restic/snapshots.go create mode 100644 internal/server/http/snapshots.go create mode 100644 internal/store/snapshots.go create mode 100644 internal/store/snapshots_test.go diff --git a/internal/agent/runner/runner.go b/internal/agent/runner/runner.go index dc08baa..e6e8bb9 100644 --- a/internal/agent/runner/runner.go +++ b/internal/agent/runner/runner.go @@ -127,12 +127,60 @@ func (r *Runner) RunBackup(ctx context.Context, jobID string, paths, excludes, t Error: errMsg, }) _ = r.tx.Send(finEnv) + + // On a successful backup, refresh the server's snapshot projection. + // We do this *after* job.finished so the UI sees the job land first; + // the snapshot list is a follow-up that the host detail page polls + // or the dashboard sees on its next refresh. A failure here is + // logged but doesn't fail the job — the next successful backup will + // catch the projection up. + if err == nil { + if rerr := r.reportSnapshots(ctx, env); rerr != nil { + slog.Warn("runner: snapshots.report failed", "job_id", jobID, "err", rerr) + } + } + if err != nil { return fmt.Errorf("runner backup: %w", err) } return nil } +// reportSnapshots calls `restic snapshots --json`, translates the +// payload into the wire shape, and ships it as a snapshots.report +// envelope. Bounded by a separate timeout so a sluggish repo doesn't +// hang the runner forever; restic snapshots is normally sub-second. +func (r *Runner) reportSnapshots(ctx context.Context, env restic.Env) error { + listCtx, cancel := context.WithTimeout(ctx, 60*time.Second) + defer cancel() + snaps, err := env.ListSnapshots(listCtx) + if err != nil { + return err + } + out := make([]api.Snapshot, len(snaps)) + for i, s := range snaps { + out[i] = api.Snapshot{ + ID: s.ID, + ShortID: s.ShortID, + Time: s.Time.UTC(), + Hostname: s.Hostname, + Paths: s.Paths, + Tags: s.Tags, + } + if s.Summary != nil { + out[i].SizeBytes = s.Summary.TotalBytesProcessed + out[i].FileCount = s.Summary.TotalFilesProcessed + } + } + envOut, err := api.Marshal(api.MsgSnapshotsRpt, "", api.SnapshotsReportPayload{ + Snapshots: out, + }) + if err != nil { + return err + } + return r.tx.Send(envOut) +} + func throughput(bytesDone, secondsElapsed int64) int64 { if secondsElapsed <= 0 { return 0 diff --git a/internal/api/messages.go b/internal/api/messages.go index c2c7aed..c04743e 100644 --- a/internal/api/messages.go +++ b/internal/api/messages.go @@ -141,8 +141,12 @@ type SnapshotsReportPayload struct { } // Snapshot is the projection mirrored from `restic snapshots --json`. +// SizeBytes / FileCount come from the embedded summary block on +// restic 0.16+; older clients leave them at zero (the UI degrades +// gracefully). type Snapshot struct { - ID string `json:"id"` // restic snapshot ID + ID string `json:"id"` // long restic snapshot ID + ShortID string `json:"short_id"` // 8-hex-char form Time time.Time `json:"time"` Hostname string `json:"hostname"` Paths []string `json:"paths"` diff --git a/internal/restic/snapshots.go b/internal/restic/snapshots.go new file mode 100644 index 0000000..5c62a82 --- /dev/null +++ b/internal/restic/snapshots.go @@ -0,0 +1,58 @@ +package restic + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "os/exec" + "time" +) + +// Snapshot mirrors a single entry in `restic snapshots --json`. We +// decode only the fields we project to the server; restic's full +// shape has more (parent, tree, program version) that we don't need. +// +// Summary is only populated by restic 0.16+ (which embeds the backup +// summary inside each snapshot). Older clients leave it nil and the +// agent reports zero size/file-count — the UI degrades to "—". +type Snapshot struct { + ID string `json:"id"` + ShortID string `json:"short_id"` + Time time.Time `json:"time"` + Hostname string `json:"hostname"` + Paths []string `json:"paths"` + Tags []string `json:"tags,omitempty"` + Summary *SnapshotSummary `json:"summary,omitempty"` +} + +// SnapshotSummary mirrors the embedded summary block restic 0.16+ +// writes into each snapshot record. The naming follows restic's JSON. +type SnapshotSummary struct { + TotalFilesProcessed int64 `json:"total_files_processed"` + TotalBytesProcessed int64 `json:"total_bytes_processed"` +} + +// ListSnapshots calls `restic snapshots --json` and returns the +// parsed list. Output is a single JSON array — small even on large +// repos (each entry is ~200 bytes), so we read it all into memory. +func (e Env) ListSnapshots(ctx context.Context) ([]Snapshot, error) { + cmd := exec.CommandContext(ctx, e.Bin, "snapshots", "--json") + cmd.Env = e.envSlice() + cmd.Dir = e.WorkDir + + out, err := cmd.Output() + if err != nil { + var ee *exec.ExitError + if errors.As(err, &ee) { + return nil, fmt.Errorf("restic snapshots: exit %d: %s", + ee.ExitCode(), string(ee.Stderr)) + } + return nil, fmt.Errorf("restic snapshots: %w", err) + } + var snaps []Snapshot + if err := json.Unmarshal(out, &snaps); err != nil { + return nil, fmt.Errorf("restic snapshots: parse json: %w", err) + } + return snaps, nil +} diff --git a/internal/server/http/snapshots.go b/internal/server/http/snapshots.go new file mode 100644 index 0000000..138ce84 --- /dev/null +++ b/internal/server/http/snapshots.go @@ -0,0 +1,82 @@ +package http + +import ( + stdhttp "net/http" + "time" + + "github.com/go-chi/chi/v5" +) + +// snapshotView is the public JSON shape for a snapshot. Matches the +// wire-protocol Snapshot type plus a `refreshed_at` field so the UI +// can show "list last refreshed Xm ago" — useful when the projection +// is stale because the host has been offline for a while. +type snapshotView struct { + ID string `json:"id"` + ShortID string `json:"short_id"` + Time time.Time `json:"time"` + Hostname string `json:"hostname"` + Paths []string `json:"paths"` + Tags []string `json:"tags,omitempty"` + SizeBytes int64 `json:"size_bytes,omitempty"` + FileCount int64 `json:"file_count,omitempty"` +} + +type listSnapshotsResponse struct { + HostID string `json:"host_id"` + Count int `json:"count"` + RefreshedAt *time.Time `json:"refreshed_at,omitempty"` + Snapshots []snapshotView `json:"snapshots"` +} + +// handleListHostSnapshots returns the cached snapshot projection for +// one host. The agent refreshes this by sending `snapshots.report` +// after each successful backup; this endpoint is a read-only view +// onto whatever the server most recently received. +func (s *Server) handleListHostSnapshots(w stdhttp.ResponseWriter, r *stdhttp.Request) { + if _, ok := s.requireUser(r); !ok { + writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "") + return + } + + hostID := chi.URLParam(r, "id") + if hostID == "" { + writeJSONError(w, stdhttp.StatusBadRequest, "missing_host_id", "") + return + } + + if _, err := s.deps.Store.GetHost(r.Context(), hostID); err != nil { + writeJSONError(w, stdhttp.StatusNotFound, "host_not_found", "") + return + } + + snaps, err := s.deps.Store.ListSnapshotsByHost(r.Context(), hostID) + if err != nil { + writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "") + return + } + + out := listSnapshotsResponse{ + HostID: hostID, + Count: len(snaps), + Snapshots: make([]snapshotView, len(snaps)), + } + if len(snaps) > 0 { + t := snaps[0].RefreshedAt + out.RefreshedAt = &t + } + for i, sn := range snaps { + out.Snapshots[i] = snapshotView{ + ID: sn.ID, + ShortID: sn.ShortID, + Time: sn.Time, + Hostname: sn.Hostname, + Paths: sn.Paths, + Tags: sn.Tags, + SizeBytes: sn.SizeBytes, + FileCount: sn.FileCount, + } + } + + writeJSON(w, stdhttp.StatusOK, out) +} diff --git a/internal/server/ws/handler.go b/internal/server/ws/handler.go index 96059e9..b330b33 100644 --- a/internal/server/ws/handler.go +++ b/internal/server/ws/handler.go @@ -185,8 +185,33 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E slog.Warn("ws: append job log", "job_id", p.JobID, "err", err) } - case api.MsgSnapshotsRpt, api.MsgRepoStats, api.MsgScheduleAck, api.MsgCommandResult: - // TODO(P1-22 + P2): persist these projections. + case api.MsgSnapshotsRpt: + var p api.SnapshotsReportPayload + if err := env.UnmarshalPayload(&p); err != nil { + slog.Warn("ws: bad snapshots.report payload", "host_id", hostID, "err", err) + break + } + snaps := make([]store.Snapshot, len(p.Snapshots)) + for i, s := range p.Snapshots { + snaps[i] = store.Snapshot{ + ID: s.ID, + ShortID: s.ShortID, + Time: s.Time, + Hostname: s.Hostname, + Paths: s.Paths, + Tags: s.Tags, + SizeBytes: s.SizeBytes, + FileCount: s.FileCount, + } + } + if err := deps.Store.ReplaceHostSnapshots(ctx, hostID, snaps, time.Now().UTC()); err != nil { + slog.Warn("ws: replace snapshots", "host_id", hostID, "err", err) + } else { + slog.Info("ws: snapshots refreshed", "host_id", hostID, "count", len(snaps)) + } + + case api.MsgRepoStats, api.MsgScheduleAck, api.MsgCommandResult: + // TODO(P2): persist these projections. slog.Debug("ws msg not yet handled", "type", env.Type, "host_id", hostID) case api.MsgError: diff --git a/internal/store/migrations/0001_initial.sql b/internal/store/migrations/0001_initial.sql index 8eb555c..69fd375 100644 --- a/internal/store/migrations/0001_initial.sql +++ b/internal/store/migrations/0001_initial.sql @@ -160,19 +160,25 @@ CREATE TABLE job_logs ( PRIMARY KEY (job_id, seq) ); +-- Snapshot projection — refreshed in full by the agent after each +-- backup via the `snapshots.report` WS message. The PRIMARY KEY is +-- the restic snapshot id (already content-derived and globally +-- unique). repo_id lives in Phase 2 alongside first-class repo +-- management; for Phase 1 each host owns a single repo implicitly. CREATE TABLE snapshots ( - id TEXT PRIMARY KEY, -- restic snapshot id + id TEXT PRIMARY KEY, -- restic snapshot id (long form) host_id TEXT NOT NULL REFERENCES hosts(id) ON DELETE CASCADE, - repo_id TEXT NOT NULL REFERENCES repos(id) ON DELETE CASCADE, + short_id TEXT NOT NULL, time TEXT NOT NULL, hostname TEXT NOT NULL, paths TEXT NOT NULL DEFAULT '[]', tags TEXT NOT NULL DEFAULT '[]', size_bytes INTEGER NOT NULL DEFAULT 0, - file_count INTEGER NOT NULL DEFAULT 0 + file_count INTEGER NOT NULL DEFAULT 0, + refreshed_at TEXT NOT NULL -- when the projection was last synced ); CREATE INDEX snapshots_host_id ON snapshots(host_id); -CREATE INDEX snapshots_time ON snapshots(time); +CREATE INDEX snapshots_time ON snapshots(time DESC); CREATE TABLE alerts ( id TEXT PRIMARY KEY, diff --git a/internal/store/snapshots.go b/internal/store/snapshots.go new file mode 100644 index 0000000..e01de90 --- /dev/null +++ b/internal/store/snapshots.go @@ -0,0 +1,128 @@ +package store + +import ( + "context" + "database/sql" + "encoding/json" + "fmt" + "time" +) + +// Snapshot mirrors the snapshots projection table. +type Snapshot struct { + ID string + HostID string + ShortID string + Time time.Time + Hostname string + Paths []string + Tags []string + SizeBytes int64 + FileCount int64 + RefreshedAt time.Time +} + +// ReplaceHostSnapshots atomically replaces the snapshot projection for +// one host. Snapshots are reported by the agent in full after each +// successful backup, so we treat the message as the new source of +// truth and delete-then-insert under one transaction. +// +// snapshot_count on the host row is updated in the same tx so the +// dashboard's per-host count is always consistent with the snapshot +// list the host detail page renders. +func (s *Store) ReplaceHostSnapshots(ctx context.Context, hostID string, snaps []Snapshot, when time.Time) error { + tx, err := s.db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("store: begin snapshots tx: %w", err) + } + defer func() { _ = tx.Rollback() }() + + if _, err := tx.ExecContext(ctx, + `DELETE FROM snapshots WHERE host_id = ?`, hostID); err != nil { + return fmt.Errorf("store: clear snapshots for host: %w", err) + } + + if len(snaps) > 0 { + stmt, err := tx.PrepareContext(ctx, + `INSERT INTO snapshots ( + id, host_id, short_id, time, hostname, paths, tags, + size_bytes, file_count, refreshed_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`) + if err != nil { + return fmt.Errorf("store: prepare snapshot insert: %w", err) + } + defer stmt.Close() + + refreshed := when.UTC().Format(time.RFC3339Nano) + for _, snap := range snaps { + paths, _ := json.Marshal(snap.Paths) + tags, _ := json.Marshal(snap.Tags) + if _, err := stmt.ExecContext(ctx, + snap.ID, hostID, snap.ShortID, + snap.Time.UTC().Format(time.RFC3339Nano), + snap.Hostname, string(paths), string(tags), + snap.SizeBytes, snap.FileCount, refreshed, + ); err != nil { + return fmt.Errorf("store: insert snapshot %s: %w", snap.ID, err) + } + } + } + + if _, err := tx.ExecContext(ctx, + `UPDATE hosts SET snapshot_count = ? WHERE id = ?`, + len(snaps), hostID); err != nil { + return fmt.Errorf("store: update host snapshot_count: %w", err) + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("store: commit snapshots: %w", err) + } + return nil +} + +// ListSnapshotsByHost returns the cached snapshot list for a host, +// most-recent first. Empty slice is a normal "no snapshots yet" case. +func (s *Store) ListSnapshotsByHost(ctx context.Context, hostID string) ([]Snapshot, error) { + rows, err := s.db.QueryContext(ctx, + `SELECT id, host_id, short_id, time, hostname, paths, tags, + size_bytes, file_count, refreshed_at + FROM snapshots + WHERE host_id = ? + ORDER BY time DESC`, hostID) + if err != nil { + return nil, fmt.Errorf("store: list snapshots: %w", err) + } + defer rows.Close() + + var out []Snapshot + for rows.Next() { + snap, err := scanSnapshotRow(rows) + if err != nil { + return nil, err + } + out = append(out, *snap) + } + return out, rows.Err() +} + +func scanSnapshotRow(r *sql.Rows) (*Snapshot, error) { + var ( + snap Snapshot + t, refresh string + paths, tags string + ) + if err := r.Scan(&snap.ID, &snap.HostID, &snap.ShortID, + &t, &snap.Hostname, &paths, &tags, + &snap.SizeBytes, &snap.FileCount, &refresh); err != nil { + return nil, fmt.Errorf("store: scan snapshot: %w", err) + } + snap.Time, _ = time.Parse(time.RFC3339Nano, t) + snap.RefreshedAt, _ = time.Parse(time.RFC3339Nano, refresh) + if paths != "" { + _ = json.Unmarshal([]byte(paths), &snap.Paths) + } + if tags != "" { + _ = json.Unmarshal([]byte(tags), &snap.Tags) + } + return &snap, nil +} diff --git a/internal/store/snapshots_test.go b/internal/store/snapshots_test.go new file mode 100644 index 0000000..2870460 --- /dev/null +++ b/internal/store/snapshots_test.go @@ -0,0 +1,153 @@ +package store + +import ( + "context" + "testing" + "time" +) + +// makeSnapHost inserts a minimal host row that snapshot tests can hang +// off. Returns the host id. +func makeSnapHost(t *testing.T, s *Store) string { + t.Helper() + const id = "01HSNAPHOST00000000000000" + if err := s.CreateHost(context.Background(), Host{ + ID: id, Name: "snap-host", OS: "linux", Arch: "amd64", + AgentVersion: "dev", ResticVersion: "0.16.0", ProtocolVersion: 1, + EnrolledAt: time.Now().UTC(), + }, "tokenhash", ""); err != nil { + t.Fatalf("create host: %v", err) + } + return id +} + +func TestReplaceHostSnapshotsRoundTrip(t *testing.T) { + t.Parallel() + s := openTestStore(t) + hostID := makeSnapHost(t, s) + ctx := context.Background() + + now := time.Now().UTC().Truncate(time.Second) + in := []Snapshot{ + { + ID: "deadbeef" + "00000000000000000000000000000000000000000000000000000000", + ShortID: "deadbeef", + Time: now.Add(-2 * time.Hour), + Hostname: "snap-host", + Paths: []string{"/etc", "/home"}, + Tags: []string{"daily"}, + SizeBytes: 4096, FileCount: 12, + }, + { + ID: "cafef00d" + "00000000000000000000000000000000000000000000000000000000", + ShortID: "cafef00d", + Time: now.Add(-1 * time.Hour), + Hostname: "snap-host", + Paths: []string{"/etc"}, + SizeBytes: 8192, FileCount: 24, + }, + } + if err := s.ReplaceHostSnapshots(ctx, hostID, in, now); err != nil { + t.Fatalf("replace: %v", err) + } + + out, err := s.ListSnapshotsByHost(ctx, hostID) + if err != nil { + t.Fatalf("list: %v", err) + } + if len(out) != 2 { + t.Fatalf("want 2 snapshots, got %d", len(out)) + } + // Ordered by time DESC — most recent first. + if out[0].ShortID != "cafef00d" { + t.Errorf("want most-recent first; got %q", out[0].ShortID) + } + if got := len(out[0].Paths); got != 1 { + t.Errorf("paths roundtrip lost: %v", out[0].Paths) + } + if out[1].Tags == nil || out[1].Tags[0] != "daily" { + t.Errorf("tags roundtrip lost: %v", out[1].Tags) + } + + // Host snapshot_count is updated atomically. + h, err := s.GetHost(ctx, hostID) + if err != nil { + t.Fatalf("get host: %v", err) + } + if h.SnapshotCount != 2 { + t.Errorf("host snapshot_count = %d, want 2", h.SnapshotCount) + } +} + +func TestReplaceHostSnapshotsIsAuthoritative(t *testing.T) { + t.Parallel() + s := openTestStore(t) + hostID := makeSnapHost(t, s) + ctx := context.Background() + + mk := func(id, short string, tOff time.Duration) Snapshot { + return Snapshot{ + ID: id, ShortID: short, Time: time.Now().UTC().Add(tOff), + Hostname: "snap-host", Paths: []string{"/x"}, + } + } + first := []Snapshot{ + mk("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaa", -3*time.Hour), + mk("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "bbbbbbbb", -2*time.Hour), + mk("cccccccccccccccccccccccccccccccccccccccccccccccccccccccc", "cccccccc", -1*time.Hour), + } + if err := s.ReplaceHostSnapshots(ctx, hostID, first, time.Now().UTC()); err != nil { + t.Fatalf("replace 1: %v", err) + } + + // Subsequent forget+prune on the host: only one snapshot remains. + second := []Snapshot{ + mk("cccccccccccccccccccccccccccccccccccccccccccccccccccccccc", "cccccccc", -1*time.Hour), + } + if err := s.ReplaceHostSnapshots(ctx, hostID, second, time.Now().UTC()); err != nil { + t.Fatalf("replace 2: %v", err) + } + + out, err := s.ListSnapshotsByHost(ctx, hostID) + if err != nil { + t.Fatalf("list: %v", err) + } + if len(out) != 1 || out[0].ShortID != "cccccccc" { + t.Errorf("after second replace, want [cccccccc], got %+v", out) + } + h, _ := s.GetHost(ctx, hostID) + if h.SnapshotCount != 1 { + t.Errorf("snapshot_count should track replacement: %d", h.SnapshotCount) + } +} + +func TestReplaceHostSnapshotsEmpty(t *testing.T) { + t.Parallel() + s := openTestStore(t) + hostID := makeSnapHost(t, s) + ctx := context.Background() + + // First a non-empty replace. + if err := s.ReplaceHostSnapshots(ctx, hostID, []Snapshot{ + {ID: "1111111111111111111111111111111111111111111111111111111111111111", + ShortID: "11111111", Time: time.Now().UTC(), Hostname: "snap-host", + Paths: []string{"/x"}}, + }, time.Now().UTC()); err != nil { + t.Fatalf("replace 1: %v", err) + } + // Then empty — host has been wiped. + if err := s.ReplaceHostSnapshots(ctx, hostID, nil, time.Now().UTC()); err != nil { + t.Fatalf("replace empty: %v", err) + } + out, err := s.ListSnapshotsByHost(ctx, hostID) + if err != nil { + t.Fatalf("list: %v", err) + } + if len(out) != 0 { + t.Errorf("want empty, got %d", len(out)) + } + h, _ := s.GetHost(ctx, hostID) + if h.SnapshotCount != 0 { + t.Errorf("snapshot_count should reset to 0, got %d", h.SnapshotCount) + } +} diff --git a/tasks.md b/tasks.md index a77f510..12fcc34 100644 --- a/tasks.md +++ b/tasks.md @@ -33,7 +33,7 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days. - [x] **P1-08** (M) Define shared API types in `internal/api` (envelopes, every WS message + `protocol_version` constants; JSON-shape tests pin the wire) - [x] **P1-09** (L) WebSocket transport (`github.com/coder/websocket`), framed JSON envelopes, RPC correlation IDs, exponential-backoff reconnect on the agent side -- [x] **P1-10** (M) Enrollment flow: `POST /api/agents/enroll` with one-time token → returns persistent bearer (cert pin field is in place but populated empty until TLS-aware revision) +- [x] **P1-10** (M) Enrollment flow: `POST /api/agents/enroll` with one-time token → returns persistent bearer. Cert pin field stays in the response shape but is left empty: the server is HTTP-only behind a reverse proxy, so the operator pastes the proxy's cert hash into the install command rather than having the server introspect a cert it doesn't terminate. - [x] **P1-11** (M) Agent registration on connect (`hello` upserts agent_version/restic_version/protocol_version, flips status online, `protocol_too_old` rejection has clean error envelope) - [x] **P1-12** (S) Heartbeat handler (touches `last_seen_at`; background sweeper marks hosts offline after 90s without one) @@ -51,7 +51,7 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days. - [x] **P1-19** (M) Server endpoint `POST /api/hosts/{id}/jobs` to dispatch a `backup` command (validates kind, checks online, audit-logs) - [x] **P1-20** (M) Agent executes `restic backup`, streams stdout/stderr + parsed JSON events back as `job.progress` (1Hz throttle) / `log.stream` - [~] **P1-21** (M) Server persists log stream to `job_logs` ✓; **WS `/api/jobs/{id}/stream` for live browser tailing** still TODO — needs the per-job fan-out hub -- [ ] **P1-22** (S) Snapshot listing: `restic snapshots --json`, cached projection table, refresh after each backup +- [x] **P1-22** (S) Snapshot listing: agent calls `restic snapshots --json` after each successful backup and ships the projection over `snapshots.report`. Server `ReplaceHostSnapshots` atomically swaps the per-host list and updates `hosts.snapshot_count` in the same tx. Read endpoint: `GET /api/hosts/{id}/snapshots`. Tests cover round-trip, authoritative-replace semantics, and empty-after-prune. Schema dropped the unused `repo_id` FK from `snapshots` (repos as a first-class entity is P2 work). ### UI (HTMX + Tailwind) @@ -146,7 +146,7 @@ Sizes: **S** = under a day, **M** = 1–3 days, **L** = 3–7 days. - [ ] **P5-04** (S) Demo screenshots / short Loom walkthrough in README - [ ] **P5-05** (S) `SECURITY.md` with disclosure process - [ ] **P5-06** (M) End-to-end test suite in CI (Playwright vs. compose stack with sibling Linux agent) -- [ ] **P5-07** (S) Sample `docker-compose.yml` with TLS via Caddy sidecar (also demonstrates `RM_TRUSTED_PROXY`) +- [ ] **P5-07** (S) Reference deployment: `docker-compose.yml` + Caddyfile snippet showing the TLS-terminating reverse proxy in front of the HTTP-only server (also demonstrates `RM_TRUSTED_PROXY`) ### Phase 5 acceptance