phase 1: run-now backup — restic wrapper, job lifecycle, end-to-end
Lands the operator → server → agent → restic → server roundtrip for
on-demand backups. The flow:
POST /api/hosts/{id}/jobs {kind:"backup",args:["/path"]}
→ server creates a queued Job row
→ server emits command.run over WS to the host's agent
→ agent dispatcher spawns runner.RunBackup in a goroutine
→ runner spawns `restic backup --json`, parses each line
→ forwards: job.started, log.stream (every line), job.progress
(throttled to 1/sec), job.finished (with summary stats blob)
→ server WS handler persists those into jobs / job_logs
P1-16 internal/restic: thin Locate + Env wrapper that runs `restic
backup --json`, scans stdout/stderr, parses BackupStatus +
BackupSummary, calls back into a LineHandler so the agent can fan
out to log.stream + job.progress. Treats exit code 3 as
"succeeded with issues" (matches restic's contract).
P1-18 store: jobs accessors (CreateJob, MarkJobStarted,
MarkJobFinished, AppendJobLog, GetJob).
P1-19 server: POST /api/hosts/{id}/jobs creates the Job row,
validates kind, dispatches via Hub.Send, audit-logs the action.
P1-20 agent runner: wraps restic.RunBackup with throttled progress
emission. Sender abstraction was added to wsclient.Handler so
background goroutines can keep replying after dispatch returns.
P1-21 server WS: dispatchAgentMessage now persists job.started,
job.finished, log.stream into the database. Browser fan-out for
live tailing lands with the UI work.
Agent gets repo_url + repo_password from agent.yaml in plaintext
for now (mode 0600, owned by service user); spec.md §7.3's keyring
storage moves there in P2. config.update over WS overrides the
in-memory copy (does not persist).
Build clean; all tests pass. End-to-end with a real restic still
needs a host that has restic installed — wire shape verified by
the existing hello/heartbeat round-trip test.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+81
-14
@@ -9,11 +9,14 @@ import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/config"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/runner"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/sysinfo"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/wsclient"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/restic"
|
||||
)
|
||||
|
||||
var version = "dev"
|
||||
@@ -45,10 +48,6 @@ func run() error {
|
||||
return fmt.Errorf("config: %w", err)
|
||||
}
|
||||
|
||||
// Enrollment mode: agent was started with -enroll-server -enroll-token.
|
||||
// On success we persist the credentials and exit (the install script
|
||||
// then starts the agent service). Avoiding a long-running process here
|
||||
// keeps the enrollment story restartable.
|
||||
if *enrollToken != "" {
|
||||
if *enrollServer == "" {
|
||||
return errors.New("enrollment: -enroll-server is required with -enroll-token")
|
||||
@@ -75,6 +74,8 @@ func run() error {
|
||||
"protocol_version", snap.ProtocolVersion,
|
||||
)
|
||||
|
||||
resticBin, _ := restic.Locate(cfg.ResticPath) // empty is fine; commands fail with a clear error later
|
||||
|
||||
wsCfg := wsclient.Config{
|
||||
ServerURL: cfg.ServerURL,
|
||||
AgentToken: cfg.AgentToken,
|
||||
@@ -90,35 +91,101 @@ func run() error {
|
||||
},
|
||||
}
|
||||
|
||||
if err := wsclient.Run(ctx, wsCfg, dispatch); err != nil {
|
||||
d := &dispatcher{
|
||||
resticBin: resticBin,
|
||||
repoURL: cfg.RepoURL,
|
||||
repoPassword: cfg.RepoPassword,
|
||||
}
|
||||
if err := wsclient.Run(ctx, wsCfg, d.handle); err != nil {
|
||||
return fmt.Errorf("ws run: %w", err)
|
||||
}
|
||||
slog.Info("agent shutting down")
|
||||
return nil
|
||||
}
|
||||
|
||||
// dispatch handles server-pushed envelopes. Phase 1's first slice
|
||||
// just logs; P1-19/20/21 wire command.run to the runner.
|
||||
func dispatch(_ context.Context, env api.Envelope) error {
|
||||
// dispatcher closes over the long-lived agent settings (restic path,
|
||||
// repo creds) so handle() can spawn the runner without re-loading
|
||||
// config every time.
|
||||
type dispatcher struct {
|
||||
resticBin string
|
||||
repoURL string
|
||||
repoPassword string
|
||||
}
|
||||
|
||||
func (d *dispatcher) handle(ctx context.Context, env api.Envelope, tx wsclient.Sender) error {
|
||||
switch env.Type {
|
||||
case api.MsgCommandRun:
|
||||
slog.Info("ws agent: command.run received (not yet implemented)", "id", env.ID)
|
||||
var p api.CommandRunPayload
|
||||
if err := env.UnmarshalPayload(&p); err != nil {
|
||||
return fmt.Errorf("command.run: %w", err)
|
||||
}
|
||||
return d.runJob(ctx, p, tx)
|
||||
|
||||
case api.MsgCommandCancel:
|
||||
slog.Info("ws agent: command.cancel received (not yet implemented)", "id", env.ID)
|
||||
// TODO(P2): cancellation requires keeping a job→cancelFunc map.
|
||||
slog.Info("ws agent: command.cancel received (cancellation lands in P2)", "id", env.ID)
|
||||
|
||||
case api.MsgScheduleSet:
|
||||
slog.Info("ws agent: schedule.set received (not yet implemented)", "id", env.ID)
|
||||
// TODO(P2): apply the schedule.
|
||||
slog.Info("ws agent: schedule.set received (handled in P2)", "id", env.ID)
|
||||
|
||||
case api.MsgConfigUpdate:
|
||||
slog.Info("ws agent: config.update received (not yet implemented)", "id", env.ID)
|
||||
var p api.ConfigUpdatePayload
|
||||
_ = env.UnmarshalPayload(&p)
|
||||
// In-memory only for now — restart loses these. Persistent
|
||||
// secret storage lands with P2's keyring work.
|
||||
if p.RepoURL != "" {
|
||||
d.repoURL = p.RepoURL
|
||||
slog.Info("ws agent: repo URL updated via config.update")
|
||||
}
|
||||
if p.RepoPassword != "" {
|
||||
d.repoPassword = p.RepoPassword
|
||||
slog.Info("ws agent: repo password updated via config.update")
|
||||
}
|
||||
|
||||
case api.MsgAgentUpdateAvail:
|
||||
slog.Info("ws agent: agent.update.available received (not yet implemented)", "id", env.ID)
|
||||
var p api.AgentUpdateAvailablePayload
|
||||
_ = env.UnmarshalPayload(&p)
|
||||
slog.Info("ws agent: update available", "version", p.LatestVersion, "url", p.PackageURL)
|
||||
|
||||
default:
|
||||
slog.Debug("ws agent: ignored message", "type", env.Type)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// runJob spawns a runner for one job. We launch a goroutine so the
|
||||
// WS read loop keeps draining messages while restic chugs along.
|
||||
func (d *dispatcher) runJob(ctx context.Context, p api.CommandRunPayload, tx wsclient.Sender) error {
|
||||
if d.resticBin == "" {
|
||||
return fmt.Errorf("restic binary not located on this agent")
|
||||
}
|
||||
if d.repoURL == "" || d.repoPassword == "" {
|
||||
return fmt.Errorf("repo credentials not configured (set repo_url + repo_password in agent.yaml or push via config.update)")
|
||||
}
|
||||
r := runner.New(runner.Config{
|
||||
ResticBin: d.resticBin,
|
||||
RepoURL: d.repoURL,
|
||||
RepoPassword: d.repoPassword,
|
||||
}, tx, time.Second)
|
||||
|
||||
switch p.Kind {
|
||||
case api.JobBackup:
|
||||
// Agent.Args carries [paths...]. Excludes/tags are not yet
|
||||
// surfaced over the wire; they come with P2 schedule support.
|
||||
go func() {
|
||||
if err := r.RunBackup(ctx, p.JobID, p.Args, nil, nil); err != nil {
|
||||
slog.Warn("agent: backup job failed", "job_id", p.JobID, "err", err)
|
||||
}
|
||||
}()
|
||||
default:
|
||||
return fmt.Errorf("kind %q not implemented yet (Phase 2 lands the rest)", p.Kind)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func doEnroll(serverURL, token string, cfg *config.Config, agentVersion string) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*1e9)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
snap, err := sysinfo.Collect(ctx, cfg.ResticPath)
|
||||
|
||||
Reference in New Issue
Block a user