phase 1: WS transport, enrollment, agent that hellos and heartbeats
Lands the protocol layer end-to-end: an agent can be enrolled through the operator UI, store credentials, dial back to the server over WS, complete the protocol_version handshake, and stay connected with periodic heartbeats. Server side: - P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction, json envelope writer with a write mutex, reader, error envelopes. - P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage (10s deadline, protocol_version checked against api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on reject), main read loop, defer hub register/unregister. - P1-10 POST /api/agents/enroll consumes a one-time token, mints a persistent agent bearer (sha-256 stored), creates a host row. - P1-10 POST /api/enrollment-tokens (operator, session-auth) issues a 1h one-time token. - P1-11 hello upserts agent_version + restic_version + protocol_version on the host row, flips status to online. - P1-12 heartbeat touches last_seen_at; background sweeper marks hosts offline after 90s without one. - store: hosts table accessors, host_schedule_version, enrollment_tokens FK on consumed_host dropped (audit-only field; the token gets burned before the host row exists). Agent side: - P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml, atomic Save (tmp+fsync+rename), Enrolled() helper. - P1-15 internal/agent/wsclient: dial with bearer + optional TLS cert pinning (sha-256 of leaf), exponential backoff with jitter (1s → 60s cap), heartbeat goroutine, fatal handling for ErrProtocolTooOld. - P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo. - P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version collection. restic detected by `restic version` parse; absent restic doesn't block startup. - cmd/agent: -enroll-server / -enroll-token flags drive first-run enrollment then exit (so the install script can hand off to systemd to run the persistent service). End-to-end smoke verified: bootstrap → login → issue token → enroll → run agent → server logs `ws agent connected` with the right host_id and protocol_version 1. All tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+19
-5
@@ -14,8 +14,9 @@ import (
|
||||
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
|
||||
rmhttp "gitea.dcglab.co.uk/steve/restic-manager/internal/server/http"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
|
||||
rmhttp "gitea.dcglab.co.uk/steve/restic-manager/internal/server/http"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
|
||||
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
||||
)
|
||||
|
||||
@@ -76,10 +77,13 @@ func run() error {
|
||||
}
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
hub := ws.NewHub()
|
||||
|
||||
deps := rmhttp.Deps{
|
||||
Cfg: cfg,
|
||||
Store: st,
|
||||
AEAD: aead,
|
||||
Hub: hub,
|
||||
}
|
||||
|
||||
// First-run bootstrap: if the users table is empty, mint a one-time
|
||||
@@ -117,21 +121,31 @@ func run() error {
|
||||
errCh <- srv.Start()
|
||||
}()
|
||||
|
||||
// Background sweeper for expired sessions and enrollment tokens.
|
||||
tick := time.NewTicker(15 * time.Minute)
|
||||
defer tick.Stop()
|
||||
// Background sweepers:
|
||||
// - sessions/tokens purge: 15 min
|
||||
// - host offline-after-90s mark: every 30s (matches heartbeat
|
||||
// cadence — agent sends every 30s, P1-12)
|
||||
purgeTick := time.NewTicker(15 * time.Minute)
|
||||
defer purgeTick.Stop()
|
||||
offlineTick := time.NewTicker(30 * time.Second)
|
||||
defer offlineTick.Stop()
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-tick.C:
|
||||
case <-purgeTick.C:
|
||||
if n, err := st.PurgeExpiredSessions(ctx); err == nil && n > 0 {
|
||||
slog.Info("purged expired sessions", "n", n)
|
||||
}
|
||||
if n, err := st.PurgeExpiredEnrollmentTokens(ctx); err == nil && n > 0 {
|
||||
slog.Info("purged expired enrollment tokens", "n", n)
|
||||
}
|
||||
case <-offlineTick.C:
|
||||
cutoff := time.Now().Add(-90 * time.Second)
|
||||
if n, err := st.MarkHostsOfflineStale(ctx, cutoff); err == nil && n > 0 {
|
||||
slog.Info("marked hosts offline (stale heartbeat)", "n", n)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
Reference in New Issue
Block a user