Files
restic-manager/internal/agent/config/config.go
T
steve 9cc0caff1e phase 1: WS transport, enrollment, agent that hellos and heartbeats
Lands the protocol layer end-to-end: an agent can be enrolled
through the operator UI, store credentials, dial back to the server
over WS, complete the protocol_version handshake, and stay
connected with periodic heartbeats.

Server side:
- P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction,
  json envelope writer with a write mutex, reader, error envelopes.
- P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage
  (10s deadline, protocol_version checked against
  api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on
  reject), main read loop, defer hub register/unregister.
- P1-10 POST /api/agents/enroll consumes a one-time token, mints a
  persistent agent bearer (sha-256 stored), creates a host row.
- P1-10 POST /api/enrollment-tokens (operator, session-auth)
  issues a 1h one-time token.
- P1-11 hello upserts agent_version + restic_version +
  protocol_version on the host row, flips status to online.
- P1-12 heartbeat touches last_seen_at; background sweeper marks
  hosts offline after 90s without one.
- store: hosts table accessors, host_schedule_version,
  enrollment_tokens FK on consumed_host dropped (audit-only field;
  the token gets burned before the host row exists).

Agent side:
- P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml,
  atomic Save (tmp+fsync+rename), Enrolled() helper.
- P1-15 internal/agent/wsclient: dial with bearer + optional
  TLS cert pinning (sha-256 of leaf), exponential backoff with
  jitter (1s → 60s cap), heartbeat goroutine, fatal handling for
  ErrProtocolTooOld.
- P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo.
- P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version
  collection. restic detected by `restic version` parse; absent
  restic doesn't block startup.
- cmd/agent: -enroll-server / -enroll-token flags drive first-run
  enrollment then exit (so the install script can hand off to
  systemd to run the persistent service).

End-to-end smoke verified: bootstrap → login → issue token →
enroll → run agent → server logs `ws agent connected` with the
right host_id and protocol_version 1.

All tests still pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 00:39:00 +01:00

111 lines
3.2 KiB
Go

// Package config loads the agent's persistent configuration. After
// enrollment, the file holds the bearer token + server URL; it is
// only ever written via Save (which replaces atomically).
package config
import (
"fmt"
"os"
"path/filepath"
"gopkg.in/yaml.v3"
)
// Config is the on-disk shape of the agent's config file.
type Config struct {
// ServerURL is the base URL of the control plane, e.g.
// https://restic.lab.example. The agent appends /ws/agent and
// /api/agents/enroll.
ServerURL string `yaml:"server_url"`
// AgentToken is the bearer credential issued at enrollment.
// Empty means "not yet enrolled."
AgentToken string `yaml:"agent_token"`
// HostID is what the server thinks this host is.
HostID string `yaml:"host_id"`
// CertPinSHA256 (optional) is the SHA-256 of the server's TLS
// cert. When set, the agent refuses to connect to a server
// whose cert hash doesn't match.
CertPinSHA256 string `yaml:"cert_pin_sha256,omitempty"`
// ResticPath overrides the auto-detected restic binary path.
ResticPath string `yaml:"restic_path,omitempty"`
// path is the file we loaded from. Used by Save.
path string `yaml:"-"`
}
// DefaultPath returns the canonical config path for the current OS.
// Phase 1 ships Linux only; Windows path lives in the spec for P2.
func DefaultPath() string {
return "/etc/restic-manager/agent.yaml"
}
// Load reads and parses the config file at path. A missing file is
// returned as an empty Config (not an error) — first-run agents
// haven't been enrolled yet.
func Load(path string) (*Config, error) {
c := &Config{path: path}
body, err := os.ReadFile(path)
if err != nil {
if os.IsNotExist(err) {
return c, nil
}
return nil, fmt.Errorf("agent config: read %q: %w", path, err)
}
if err := yaml.Unmarshal(body, c); err != nil {
return nil, fmt.Errorf("agent config: parse %q: %w", path, err)
}
c.path = path
return c, nil
}
// Save writes the config back atomically: write to <path>.tmp, fsync,
// rename. A crash mid-write either leaves the old file or the new one,
// never a half-written one.
func (c *Config) Save() error {
if c.path == "" {
return fmt.Errorf("agent config: no path set")
}
dir := filepath.Dir(c.path)
if err := os.MkdirAll(dir, 0o700); err != nil {
return fmt.Errorf("agent config: mkdir %q: %w", dir, err)
}
body, err := yaml.Marshal(c)
if err != nil {
return fmt.Errorf("agent config: marshal: %w", err)
}
tmp := c.path + ".tmp"
f, err := os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
if err != nil {
return fmt.Errorf("agent config: create tmp: %w", err)
}
if _, err := f.Write(body); err != nil {
_ = f.Close()
_ = os.Remove(tmp)
return fmt.Errorf("agent config: write tmp: %w", err)
}
if err := f.Sync(); err != nil {
_ = f.Close()
_ = os.Remove(tmp)
return fmt.Errorf("agent config: fsync tmp: %w", err)
}
if err := f.Close(); err != nil {
_ = os.Remove(tmp)
return fmt.Errorf("agent config: close tmp: %w", err)
}
if err := os.Rename(tmp, c.path); err != nil {
_ = os.Remove(tmp)
return fmt.Errorf("agent config: rename: %w", err)
}
return nil
}
// Enrolled reports whether the agent has finished enrollment.
func (c *Config) Enrolled() bool {
return c.AgentToken != "" && c.HostID != "" && c.ServerURL != ""
}