agent: P2-18c announce-and-approve enrolment path

When -enroll-server is supplied without -enroll-token, the agent
mints (and persists) an Ed25519 keypair, POSTs /api/agents/announce,
prints the SHA256 fingerprint in a copy-friendly banner, opens
/ws/agent/pending, signs the server's nonce, and blocks until the
admin clicks Accept (1h ceiling). On accept, persists the bearer +
host_id from the 'enrolled' message; on reject (close code 4001)
exits with a clear error.

Repo creds are pushed via config.update on the first standard WS
hello (P1-32 path), not in the enrolled message itself.
This commit is contained in:
2026-05-04 11:09:47 +01:00
parent fd87218b3f
commit a46d906d27
3 changed files with 279 additions and 1 deletions
+262
View File
@@ -0,0 +1,262 @@
// announce.go — agent-side announce-and-approve enrolment (P2-18c).
//
// Run path: when the agent has no AgentToken set but RM_SERVER is
// configured (and no -enroll-token was supplied), main() switches
// into announce mode:
// 1. Load (or mint+persist) an Ed25519 keypair in agent.yaml.
// 2. POST {hostname, os, arch, agent_version, restic_version,
// public_key} to /api/agents/announce.
// 3. Print the fingerprint to stderr in a copy-friendly banner so
// the operator can compare it against the dashboard.
// 4. Open /ws/agent/pending?pending_id=…, sign the nonce with our
// private key, wait for an `enrolled` message.
// 5. On enrolled: persist the bearer + repo creds, return; main()
// then drops into the normal WS run loop with the new bearer.
// 6. On reject: server closes the socket with code 4001; we exit
// with a clear message.
package main
import (
"context"
"crypto/ed25519"
"crypto/rand"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
stdhttp "net/http"
"os"
"strings"
"time"
"github.com/coder/websocket"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/config"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/secrets"
"gitea.dcglab.co.uk/steve/restic-manager/internal/agent/sysinfo"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// announceRequest mirrors the server's announceRequest. Duplicated
// here so cmd/agent stays decoupled from the http package.
type announceRequest struct {
Hostname string `json:"hostname"`
OS string `json:"os"`
Arch string `json:"arch"`
AgentVersion string `json:"agent_version"`
ResticVersion string `json:"restic_version"`
PublicKey string `json:"public_key"`
}
type announceResponse struct {
PendingID string `json:"pending_id"`
Fingerprint string `json:"fingerprint"`
HostnameCollision bool `json:"hostname_collision"`
}
type pendingNonceMessage struct {
Type string `json:"type"`
Nonce string `json:"nonce"`
}
type pendingSignedMessage struct {
Type string `json:"type"`
Signature string `json:"signature"`
}
type pendingEnrolledMessage struct {
Type string `json:"type"`
HostID string `json:"host_id"`
Bearer string `json:"bearer"`
}
// doAnnounce runs the full announce → wait-for-accept flow. On
// success, persists the bearer + host_id into cfg + writes secrets
// for the repo creds the admin supplied at accept time. Returns
// only after the bearer has landed (or on hard error / reject).
func doAnnounce(serverURL string, cfg *config.Config, agentVersion string) error {
ctx, cancel := context.WithTimeout(context.Background(), 24*time.Hour)
defer cancel()
// Ensure we have a keypair.
priv, pub, err := loadOrMintAnnounceKey(cfg)
if err != nil {
return fmt.Errorf("announce: keypair: %w", err)
}
fingerprint := store.FingerprintForKey(pub)
snap, err := sysinfo.Collect(ctx, cfg.ResticPath)
if err != nil {
return fmt.Errorf("announce: sysinfo: %w", err)
}
// POST /api/agents/announce.
body, _ := json.Marshal(announceRequest{
Hostname: snap.Hostname, OS: string(snap.OS), Arch: string(snap.Arch),
AgentVersion: agentVersion, ResticVersion: snap.ResticVersion,
PublicKey: base64.StdEncoding.EncodeToString(pub),
})
req, _ := stdhttp.NewRequestWithContext(ctx, "POST",
strings.TrimRight(serverURL, "/")+"/api/agents/announce",
strings.NewReader(string(body)))
req.Header.Set("Content-Type", "application/json")
res, err := stdhttp.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("announce: POST: %w", err)
}
rawBody := readAllShort(res)
_ = res.Body.Close()
if res.StatusCode != stdhttp.StatusOK {
return fmt.Errorf("announce: server returned %d: %s", res.StatusCode, rawBody)
}
var ar announceResponse
if err := json.Unmarshal(rawBody, &ar); err != nil {
return fmt.Errorf("announce: parse response: %w", err)
}
// Print the fingerprint banner.
fmt.Fprintln(os.Stderr, strings.Repeat("=", 64))
fmt.Fprintln(os.Stderr, " Restic-manager: announce-and-approve enrolment")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " Hostname : "+snap.Hostname)
fmt.Fprintln(os.Stderr, " Server : "+serverURL)
fmt.Fprintln(os.Stderr, " Pending ID : "+ar.PendingID)
fmt.Fprintln(os.Stderr, " Fingerprint : "+fingerprint)
if ar.HostnameCollision {
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " WARNING: another pending host already uses this hostname.")
fmt.Fprintln(os.Stderr, " Confirm the fingerprint above matches what you see in the UI.")
}
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " Compare the fingerprint with the one in the UI before accepting.")
fmt.Fprintln(os.Stderr, " Waiting for an admin to accept (1 hour timeout)…")
fmt.Fprintln(os.Stderr, strings.Repeat("=", 64))
// Open /ws/agent/pending and run the nonce-sign handshake.
wsURL := wsURLFromHTTP(serverURL) + "/ws/agent/pending?pending_id=" + ar.PendingID
dialCtx, dialCancel := context.WithTimeout(ctx, 30*time.Second)
c, dialRes, err := websocket.Dial(dialCtx, wsURL, nil)
dialCancel()
if err != nil {
return fmt.Errorf("announce: dial pending ws: %w", err)
}
if dialRes != nil && dialRes.Body != nil {
_ = dialRes.Body.Close()
}
defer func() { _ = c.CloseNow() }()
// Read nonce.
rctx, rcancel := context.WithTimeout(ctx, 30*time.Second)
_, raw, err := c.Read(rctx)
rcancel()
if err != nil {
return fmt.Errorf("announce: read nonce: %w", err)
}
var nm pendingNonceMessage
if err := json.Unmarshal(raw, &nm); err != nil {
return fmt.Errorf("announce: parse nonce: %w", err)
}
nonce, err := base64.StdEncoding.DecodeString(nm.Nonce)
if err != nil {
return fmt.Errorf("announce: decode nonce: %w", err)
}
sig := ed25519.Sign(priv, nonce)
reply, _ := json.Marshal(pendingSignedMessage{
Type: "signed_nonce", Signature: base64.StdEncoding.EncodeToString(sig),
})
wctx, wcancel := context.WithTimeout(ctx, 10*time.Second)
if err := c.Write(wctx, websocket.MessageText, reply); err != nil {
wcancel()
return fmt.Errorf("announce: write signed nonce: %w", err)
}
wcancel()
// Block until enrolled (or reject / disconnect).
rctx2, rcancel2 := context.WithTimeout(ctx, 1*time.Hour)
defer rcancel2()
_, raw2, err := c.Read(rctx2)
if err != nil {
// CloseError with our reject code 4001 = admin rejected.
var ce websocket.CloseError
if errors.As(err, &ce) && ce.Code == 4001 {
return errors.New("announce: rejected by admin")
}
return fmt.Errorf("announce: wait for enrolled: %w", err)
}
var em pendingEnrolledMessage
if err := json.Unmarshal(raw2, &em); err != nil {
return fmt.Errorf("announce: parse enrolled: %w", err)
}
if em.Type != "enrolled" || em.Bearer == "" {
return fmt.Errorf("announce: bad enrolled payload: %s", raw2)
}
// Persist the bearer + host_id.
cfg.ServerURL = serverURL
cfg.HostID = em.HostID
cfg.AgentToken = em.Bearer
if err := cfg.EnsureSecretsKey(); err != nil {
return fmt.Errorf("announce: mint secrets key: %w", err)
}
// Note: repo creds aren't pushed in the enrolled message — the
// server pushes them via `config.update` on first WS hello. The
// secrets store will start empty and fill in then.
if err := cfg.Save(); err != nil {
return fmt.Errorf("announce: save config: %w", err)
}
// Touch the secrets store so it exists with the right perms.
keyBytes, _ := cfg.SecretsKeyBytes()
if _, err := secrets.New(cfg.ResolvedSecretsPath(), keyBytes); err != nil {
return fmt.Errorf("announce: open secrets store: %w", err)
}
fmt.Fprintln(os.Stderr, "Accepted. Bearer persisted; reconnecting via the standard WS.")
return nil
}
// loadOrMintAnnounceKey returns the (priv, pub) keypair, generating
// + persisting one when AnnounceKey is empty. The private key holds
// the public half in its tail 32 bytes per ed25519 convention.
func loadOrMintAnnounceKey(cfg *config.Config) (ed25519.PrivateKey, ed25519.PublicKey, error) {
if cfg.AnnounceKey != "" {
raw, err := base64.StdEncoding.DecodeString(cfg.AnnounceKey)
if err != nil {
return nil, nil, fmt.Errorf("decode AnnounceKey: %w", err)
}
if len(raw) != ed25519.PrivateKeySize {
return nil, nil, fmt.Errorf("AnnounceKey must be %d bytes, got %d",
ed25519.PrivateKeySize, len(raw))
}
priv := ed25519.PrivateKey(raw)
pub := priv.Public().(ed25519.PublicKey)
return priv, pub, nil
}
pub, priv, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return nil, nil, fmt.Errorf("generate keypair: %w", err)
}
cfg.AnnounceKey = base64.StdEncoding.EncodeToString(priv)
if err := cfg.Save(); err != nil {
return nil, nil, fmt.Errorf("persist AnnounceKey: %w", err)
}
return priv, pub, nil
}
// wsURLFromHTTP swaps the http(s) scheme for ws(s).
func wsURLFromHTTP(httpURL string) string {
switch {
case strings.HasPrefix(httpURL, "https://"):
return "wss://" + strings.TrimPrefix(httpURL, "https://")
case strings.HasPrefix(httpURL, "http://"):
return "ws://" + strings.TrimPrefix(httpURL, "http://")
default:
return httpURL
}
}
// readAllShort reads up to 64KB of the response body. The announce
// response is small; we cap to avoid pathological server replies.
func readAllShort(res *stdhttp.Response) []byte {
buf := make([]byte, 64*1024)
n, _ := res.Body.Read(buf)
return buf[:n]
}