From a3a53e3b8714805276aac7ea3bae692fea69af21 Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Mon, 4 May 2026 11:09:47 +0100 Subject: [PATCH] agent: P2-18c announce-and-approve enrolment path When -enroll-server is supplied without -enroll-token, the agent mints (and persists) an Ed25519 keypair, POSTs /api/agents/announce, prints the SHA256 fingerprint in a copy-friendly banner, opens /ws/agent/pending, signs the server's nonce, and blocks until the admin clicks Accept (1h ceiling). On accept, persists the bearer + host_id from the 'enrolled' message; on reject (close code 4001) exits with a clear error. Repo creds are pushed via config.update on the first standard WS hello (P1-32 path), not in the enrolled message itself. --- cmd/agent/announce.go | 262 ++++++++++++++++++++++++++++++++ cmd/agent/main.go | 11 +- internal/agent/config/config.go | 7 + 3 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 cmd/agent/announce.go diff --git a/cmd/agent/announce.go b/cmd/agent/announce.go new file mode 100644 index 0000000..536baba --- /dev/null +++ b/cmd/agent/announce.go @@ -0,0 +1,262 @@ +// announce.go — agent-side announce-and-approve enrolment (P2-18c). +// +// Run path: when the agent has no AgentToken set but RM_SERVER is +// configured (and no -enroll-token was supplied), main() switches +// into announce mode: +// 1. Load (or mint+persist) an Ed25519 keypair in agent.yaml. +// 2. POST {hostname, os, arch, agent_version, restic_version, +// public_key} to /api/agents/announce. +// 3. Print the fingerprint to stderr in a copy-friendly banner so +// the operator can compare it against the dashboard. +// 4. Open /ws/agent/pending?pending_id=…, sign the nonce with our +// private key, wait for an `enrolled` message. +// 5. On enrolled: persist the bearer + repo creds, return; main() +// then drops into the normal WS run loop with the new bearer. +// 6. On reject: server closes the socket with code 4001; we exit +// with a clear message. +package main + +import ( + "context" + "crypto/ed25519" + "crypto/rand" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + stdhttp "net/http" + "os" + "strings" + "time" + + "github.com/coder/websocket" + + "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/config" + "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/secrets" + "gitea.dcglab.co.uk/steve/restic-manager/internal/agent/sysinfo" + "gitea.dcglab.co.uk/steve/restic-manager/internal/store" +) + +// announceRequest mirrors the server's announceRequest. Duplicated +// here so cmd/agent stays decoupled from the http package. +type announceRequest struct { + Hostname string `json:"hostname"` + OS string `json:"os"` + Arch string `json:"arch"` + AgentVersion string `json:"agent_version"` + ResticVersion string `json:"restic_version"` + PublicKey string `json:"public_key"` +} + +type announceResponse struct { + PendingID string `json:"pending_id"` + Fingerprint string `json:"fingerprint"` + HostnameCollision bool `json:"hostname_collision"` +} + +type pendingNonceMessage struct { + Type string `json:"type"` + Nonce string `json:"nonce"` +} + +type pendingSignedMessage struct { + Type string `json:"type"` + Signature string `json:"signature"` +} + +type pendingEnrolledMessage struct { + Type string `json:"type"` + HostID string `json:"host_id"` + Bearer string `json:"bearer"` +} + +// doAnnounce runs the full announce → wait-for-accept flow. On +// success, persists the bearer + host_id into cfg + writes secrets +// for the repo creds the admin supplied at accept time. Returns +// only after the bearer has landed (or on hard error / reject). +func doAnnounce(serverURL string, cfg *config.Config, agentVersion string) error { + ctx, cancel := context.WithTimeout(context.Background(), 24*time.Hour) + defer cancel() + + // Ensure we have a keypair. + priv, pub, err := loadOrMintAnnounceKey(cfg) + if err != nil { + return fmt.Errorf("announce: keypair: %w", err) + } + fingerprint := store.FingerprintForKey(pub) + + snap, err := sysinfo.Collect(ctx, cfg.ResticPath) + if err != nil { + return fmt.Errorf("announce: sysinfo: %w", err) + } + + // POST /api/agents/announce. + body, _ := json.Marshal(announceRequest{ + Hostname: snap.Hostname, OS: string(snap.OS), Arch: string(snap.Arch), + AgentVersion: agentVersion, ResticVersion: snap.ResticVersion, + PublicKey: base64.StdEncoding.EncodeToString(pub), + }) + req, _ := stdhttp.NewRequestWithContext(ctx, "POST", + strings.TrimRight(serverURL, "/")+"/api/agents/announce", + strings.NewReader(string(body))) + req.Header.Set("Content-Type", "application/json") + res, err := stdhttp.DefaultClient.Do(req) + if err != nil { + return fmt.Errorf("announce: POST: %w", err) + } + rawBody := readAllShort(res) + _ = res.Body.Close() + if res.StatusCode != stdhttp.StatusOK { + return fmt.Errorf("announce: server returned %d: %s", res.StatusCode, rawBody) + } + var ar announceResponse + if err := json.Unmarshal(rawBody, &ar); err != nil { + return fmt.Errorf("announce: parse response: %w", err) + } + + // Print the fingerprint banner. + fmt.Fprintln(os.Stderr, strings.Repeat("=", 64)) + fmt.Fprintln(os.Stderr, " Restic-manager: announce-and-approve enrolment") + fmt.Fprintln(os.Stderr, "") + fmt.Fprintln(os.Stderr, " Hostname : "+snap.Hostname) + fmt.Fprintln(os.Stderr, " Server : "+serverURL) + fmt.Fprintln(os.Stderr, " Pending ID : "+ar.PendingID) + fmt.Fprintln(os.Stderr, " Fingerprint : "+fingerprint) + if ar.HostnameCollision { + fmt.Fprintln(os.Stderr, "") + fmt.Fprintln(os.Stderr, " WARNING: another pending host already uses this hostname.") + fmt.Fprintln(os.Stderr, " Confirm the fingerprint above matches what you see in the UI.") + } + fmt.Fprintln(os.Stderr, "") + fmt.Fprintln(os.Stderr, " Compare the fingerprint with the one in the UI before accepting.") + fmt.Fprintln(os.Stderr, " Waiting for an admin to accept (1 hour timeout)…") + fmt.Fprintln(os.Stderr, strings.Repeat("=", 64)) + + // Open /ws/agent/pending and run the nonce-sign handshake. + wsURL := wsURLFromHTTP(serverURL) + "/ws/agent/pending?pending_id=" + ar.PendingID + dialCtx, dialCancel := context.WithTimeout(ctx, 30*time.Second) + c, dialRes, err := websocket.Dial(dialCtx, wsURL, nil) + dialCancel() + if err != nil { + return fmt.Errorf("announce: dial pending ws: %w", err) + } + if dialRes != nil && dialRes.Body != nil { + _ = dialRes.Body.Close() + } + defer func() { _ = c.CloseNow() }() + + // Read nonce. + rctx, rcancel := context.WithTimeout(ctx, 30*time.Second) + _, raw, err := c.Read(rctx) + rcancel() + if err != nil { + return fmt.Errorf("announce: read nonce: %w", err) + } + var nm pendingNonceMessage + if err := json.Unmarshal(raw, &nm); err != nil { + return fmt.Errorf("announce: parse nonce: %w", err) + } + nonce, err := base64.StdEncoding.DecodeString(nm.Nonce) + if err != nil { + return fmt.Errorf("announce: decode nonce: %w", err) + } + sig := ed25519.Sign(priv, nonce) + reply, _ := json.Marshal(pendingSignedMessage{ + Type: "signed_nonce", Signature: base64.StdEncoding.EncodeToString(sig), + }) + wctx, wcancel := context.WithTimeout(ctx, 10*time.Second) + if err := c.Write(wctx, websocket.MessageText, reply); err != nil { + wcancel() + return fmt.Errorf("announce: write signed nonce: %w", err) + } + wcancel() + + // Block until enrolled (or reject / disconnect). + rctx2, rcancel2 := context.WithTimeout(ctx, 1*time.Hour) + defer rcancel2() + _, raw2, err := c.Read(rctx2) + if err != nil { + // CloseError with our reject code 4001 = admin rejected. + var ce websocket.CloseError + if errors.As(err, &ce) && ce.Code == 4001 { + return errors.New("announce: rejected by admin") + } + return fmt.Errorf("announce: wait for enrolled: %w", err) + } + var em pendingEnrolledMessage + if err := json.Unmarshal(raw2, &em); err != nil { + return fmt.Errorf("announce: parse enrolled: %w", err) + } + if em.Type != "enrolled" || em.Bearer == "" { + return fmt.Errorf("announce: bad enrolled payload: %s", raw2) + } + + // Persist the bearer + host_id. + cfg.ServerURL = serverURL + cfg.HostID = em.HostID + cfg.AgentToken = em.Bearer + if err := cfg.EnsureSecretsKey(); err != nil { + return fmt.Errorf("announce: mint secrets key: %w", err) + } + // Note: repo creds aren't pushed in the enrolled message — the + // server pushes them via `config.update` on first WS hello. The + // secrets store will start empty and fill in then. + if err := cfg.Save(); err != nil { + return fmt.Errorf("announce: save config: %w", err) + } + // Touch the secrets store so it exists with the right perms. + keyBytes, _ := cfg.SecretsKeyBytes() + if _, err := secrets.New(cfg.ResolvedSecretsPath(), keyBytes); err != nil { + return fmt.Errorf("announce: open secrets store: %w", err) + } + fmt.Fprintln(os.Stderr, "Accepted. Bearer persisted; reconnecting via the standard WS.") + return nil +} + +// loadOrMintAnnounceKey returns the (priv, pub) keypair, generating +// + persisting one when AnnounceKey is empty. The private key holds +// the public half in its tail 32 bytes per ed25519 convention. +func loadOrMintAnnounceKey(cfg *config.Config) (ed25519.PrivateKey, ed25519.PublicKey, error) { + if cfg.AnnounceKey != "" { + raw, err := base64.StdEncoding.DecodeString(cfg.AnnounceKey) + if err != nil { + return nil, nil, fmt.Errorf("decode AnnounceKey: %w", err) + } + if len(raw) != ed25519.PrivateKeySize { + return nil, nil, fmt.Errorf("AnnounceKey must be %d bytes, got %d", + ed25519.PrivateKeySize, len(raw)) + } + priv := ed25519.PrivateKey(raw) + pub := priv.Public().(ed25519.PublicKey) + return priv, pub, nil + } + pub, priv, err := ed25519.GenerateKey(rand.Reader) + if err != nil { + return nil, nil, fmt.Errorf("generate keypair: %w", err) + } + cfg.AnnounceKey = base64.StdEncoding.EncodeToString(priv) + if err := cfg.Save(); err != nil { + return nil, nil, fmt.Errorf("persist AnnounceKey: %w", err) + } + return priv, pub, nil +} + +// wsURLFromHTTP swaps the http(s) scheme for ws(s). +func wsURLFromHTTP(httpURL string) string { + switch { + case strings.HasPrefix(httpURL, "https://"): + return "wss://" + strings.TrimPrefix(httpURL, "https://") + case strings.HasPrefix(httpURL, "http://"): + return "ws://" + strings.TrimPrefix(httpURL, "http://") + default: + return httpURL + } +} + +// readAllShort reads up to 64KB of the response body. The announce +// response is small; we cap to avoid pathological server replies. +func readAllShort(res *stdhttp.Response) []byte { + buf := make([]byte, 64*1024) + n, _ := res.Body.Read(buf) + return buf[:n] +} diff --git a/cmd/agent/main.go b/cmd/agent/main.go index 0c2b691..1bf1954 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -59,8 +59,17 @@ func run() error { return doEnroll(*enrollServer, *enrollToken, cfg, version) } + // Announce-and-approve: -enroll-server set, no token, agent not + // yet enrolled. Run the announce flow inline; on success the cfg + // has the bearer + host_id and we drop into the normal run loop. + if !cfg.Enrolled() && *enrollServer != "" { + if err := doAnnounce(*enrollServer, cfg, version); err != nil { + return fmt.Errorf("announce: %w", err) + } + } + if !cfg.Enrolled() { - return fmt.Errorf("agent is not enrolled; run with -enroll-server and -enroll-token first (config %q)", *configPath) + return fmt.Errorf("agent is not enrolled; run with -enroll-server (and either -enroll-token or wait for admin to accept the announce) first (config %q)", *configPath) } ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) diff --git a/internal/agent/config/config.go b/internal/agent/config/config.go index c10e20c..1e0cdd1 100644 --- a/internal/agent/config/config.go +++ b/internal/agent/config/config.go @@ -62,6 +62,13 @@ type Config struct { LegacyRepoURL string `yaml:"repo_url,omitempty"` LegacyRepoPassword string `yaml:"repo_password,omitempty"` + // AnnounceKey is the base64-encoded Ed25519 private key used by + // announce-and-approve enrolment (P2-18). Generated on first + // announce, persisted so the agent can re-attach to the same + // pending row across restarts. 64 bytes when decoded. + // Empty for token-flow enrolments. + AnnounceKey string `yaml:"announce_key,omitempty"` + // path is the file we loaded from. Used by Save. path string `yaml:"-"` }