phase 1: run-now backup — restic wrapper, job lifecycle, end-to-end

Lands the operator → server → agent → restic → server roundtrip for
on-demand backups. The flow:

  POST /api/hosts/{id}/jobs {kind:"backup",args:["/path"]}
    → server creates a queued Job row
    → server emits command.run over WS to the host's agent
    → agent dispatcher spawns runner.RunBackup in a goroutine
    → runner spawns `restic backup --json`, parses each line
    → forwards: job.started, log.stream (every line), job.progress
      (throttled to 1/sec), job.finished (with summary stats blob)
    → server WS handler persists those into jobs / job_logs

P1-16 internal/restic: thin Locate + Env wrapper that runs `restic
  backup --json`, scans stdout/stderr, parses BackupStatus +
  BackupSummary, calls back into a LineHandler so the agent can fan
  out to log.stream + job.progress. Treats exit code 3 as
  "succeeded with issues" (matches restic's contract).

P1-18 store: jobs accessors (CreateJob, MarkJobStarted,
  MarkJobFinished, AppendJobLog, GetJob).

P1-19 server: POST /api/hosts/{id}/jobs creates the Job row,
  validates kind, dispatches via Hub.Send, audit-logs the action.

P1-20 agent runner: wraps restic.RunBackup with throttled progress
  emission. Sender abstraction was added to wsclient.Handler so
  background goroutines can keep replying after dispatch returns.

P1-21 server WS: dispatchAgentMessage now persists job.started,
  job.finished, log.stream into the database. Browser fan-out for
  live tailing lands with the UI work.

Agent gets repo_url + repo_password from agent.yaml in plaintext
for now (mode 0600, owned by service user); spec.md §7.3's keyring
storage moves there in P2. config.update over WS overrides the
in-memory copy (does not persist).

Build clean; all tests pass. End-to-end with a real restic still
needs a host that has restic installed — wire shape verified by
the existing hello/heartbeat round-trip test.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-01 00:45:04 +01:00
parent 24ab071702
commit a7c6a6e09c
10 changed files with 811 additions and 29 deletions
+134
View File
@@ -0,0 +1,134 @@
package http
import (
"encoding/json"
stdhttp "net/http"
"time"
"github.com/go-chi/chi/v5"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// runNowRequest is the body of POST /api/hosts/:id/jobs.
type runNowRequest struct {
Kind api.JobKind `json:"kind"`
Args []string `json:"args,omitempty"` // restic CLI args (paths for backup, etc.)
}
type runNowResponse struct {
JobID string `json:"job_id"`
Status string `json:"status"` // "queued"
}
// handleRunNow dispatches a job to the named host. Authenticated;
// rejects if the host isn't connected (caller should retry once
// the agent comes back).
func (s *Server) handleRunNow(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
return
}
hostID := chi.URLParam(r, "id")
if hostID == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_host_id", "")
return
}
var req runNowRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
if !validJobKind(req.Kind) {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_kind",
"kind must be one of backup|forget|prune|check|unlock")
return
}
host, err := s.deps.Store.GetHost(r.Context(), hostID)
if err != nil {
writeJSONError(w, stdhttp.StatusNotFound, "host_not_found", "")
return
}
if !s.deps.Hub.Connected(host.ID) {
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline",
"agent is not currently connected; try again when it reconnects")
return
}
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := s.deps.Store.CreateJob(r.Context(), store.Job{
ID: jobID,
HostID: host.ID,
Kind: string(req.Kind),
ActorKind: "user",
ActorID: &user.ID,
CreatedAt: now,
}); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
JobID: jobID,
Kind: req.Kind,
Args: req.Args,
})
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
if err := s.deps.Hub.Send(r.Context(), host.ID, env); err != nil {
writeJSONError(w, stdhttp.StatusServiceUnavailable, "host_offline", err.Error())
return
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &user.ID,
Actor: "user",
Action: "job.run_now",
TargetKind: ptr("job"),
TargetID: &jobID,
TS: now,
})
writeJSON(w, stdhttp.StatusAccepted, runNowResponse{
JobID: jobID,
Status: "queued",
})
}
// requireUser resolves the session cookie to a user row. Stub of the
// session-auth middleware that lands in P1-04's full pass.
func (s *Server) requireUser(r *stdhttp.Request) (*store.User, bool) {
c, err := r.Cookie(sessionCookieName)
if err != nil {
return nil, false
}
sess, err := s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
if err != nil {
return nil, false
}
u, err := s.deps.Store.GetUserByID(r.Context(), sess.UserID)
if err != nil {
return nil, false
}
return u, true
}
func validJobKind(k api.JobKind) bool {
switch k {
case api.JobBackup, api.JobForget, api.JobPrune, api.JobCheck, api.JobUnlock:
return true
}
return false
}
+3
View File
@@ -83,6 +83,9 @@ func (s *Server) routes(r chi.Router) {
// /hosts/{id}/enrollment-token (regenerate) lands when the
// host page can call it; for now just the create endpoint.
r.Post("/enrollment-tokens", s.handleCreateEnrollmentToken)
// Run-now: dispatch a job to a host's agent.
r.Post("/hosts/{id}/jobs", s.handleRunNow)
})
// Agent ↔ server WebSocket. Bearer-authenticated inside the handler.
+33 -7
View File
@@ -149,18 +149,44 @@ func runAgentLoop(ctx context.Context, c *Conn, hostID string, deps HandlerDeps)
}
}
// dispatchAgentMessage routes a single envelope to its handler. Only
// hello + heartbeat are wired up in Phase 1's first slice; the rest
// land with P1-18+ (jobs) and P2 (schedules).
// dispatchAgentMessage routes a single envelope to its handler.
func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.Envelope, deps HandlerDeps) {
switch env.Type {
case api.MsgHeartbeat:
_ = deps.Store.TouchHost(ctx, hostID, time.Now().UTC())
case api.MsgJobStarted, api.MsgJobProgress, api.MsgJobFinished,
api.MsgLogStream, api.MsgSnapshotsRpt, api.MsgRepoStats,
api.MsgScheduleAck, api.MsgCommandResult:
// TODO(P1-18+): persist + fan out to subscribed browsers.
case api.MsgJobStarted:
var p api.JobStartedPayload
_ = env.UnmarshalPayload(&p)
if err := deps.Store.MarkJobStarted(ctx, p.JobID, p.StartedAt); err != nil {
slog.Warn("ws: mark job started", "job_id", p.JobID, "err", err)
}
case api.MsgJobProgress:
// We don't persist every progress tick; the live UI subscribes
// to a fan-out channel that lands with P1-21 / the UI work.
// TODO: implement the ws fan-out hub for browsers.
_ = env
case api.MsgJobFinished:
var p api.JobFinishedPayload
_ = env.UnmarshalPayload(&p)
errMsg := p.Error
if err := deps.Store.MarkJobFinished(ctx, p.JobID,
string(p.Status), p.ExitCode, p.Stats, errMsg, p.FinishedAt); err != nil {
slog.Warn("ws: mark job finished", "job_id", p.JobID, "err", err)
}
case api.MsgLogStream:
var p api.LogStreamLine
_ = env.UnmarshalPayload(&p)
if err := deps.Store.AppendJobLog(ctx, p.JobID, p.Seq, p.TS,
string(p.Stream), p.Payload); err != nil {
slog.Warn("ws: append job log", "job_id", p.JobID, "err", err)
}
case api.MsgSnapshotsRpt, api.MsgRepoStats, api.MsgScheduleAck, api.MsgCommandResult:
// TODO(P1-22 + P2): persist these projections.
slog.Debug("ws msg not yet handled", "type", env.Type, "host_id", hostID)
case api.MsgError: