Files
restic-manager/internal/server/http/server.go
T
steve f34773b505 phase 1: WS transport, enrollment, agent that hellos and heartbeats
Lands the protocol layer end-to-end: an agent can be enrolled
through the operator UI, store credentials, dial back to the server
over WS, complete the protocol_version handshake, and stay
connected with periodic heartbeats.

Server side:
- P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction,
  json envelope writer with a write mutex, reader, error envelopes.
- P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage
  (10s deadline, protocol_version checked against
  api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on
  reject), main read loop, defer hub register/unregister.
- P1-10 POST /api/agents/enroll consumes a one-time token, mints a
  persistent agent bearer (sha-256 stored), creates a host row.
- P1-10 POST /api/enrollment-tokens (operator, session-auth)
  issues a 1h one-time token.
- P1-11 hello upserts agent_version + restic_version +
  protocol_version on the host row, flips status to online.
- P1-12 heartbeat touches last_seen_at; background sweeper marks
  hosts offline after 90s without one.
- store: hosts table accessors, host_schedule_version,
  enrollment_tokens FK on consumed_host dropped (audit-only field;
  the token gets burned before the host row exists).

Agent side:
- P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml,
  atomic Save (tmp+fsync+rename), Enrolled() helper.
- P1-15 internal/agent/wsclient: dial with bearer + optional
  TLS cert pinning (sha-256 of leaf), exponential backoff with
  jitter (1s → 60s cap), heartbeat goroutine, fatal handling for
  ErrProtocolTooOld.
- P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo.
- P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version
  collection. restic detected by `restic version` parse; absent
  restic doesn't block startup.
- cmd/agent: -enroll-server / -enroll-token flags drive first-run
  enrollment then exit (so the install script can hand off to
  systemd to run the persistent service).

End-to-end smoke verified: bootstrap → login → issue token →
enroll → run agent → server logs `ws agent connected` with the
right host_id and protocol_version 1.

All tests still pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 00:39:00 +01:00

129 lines
3.8 KiB
Go

// Package http hosts the chi-based REST handlers for the control
// plane. The Server type owns the router, the handlers, and the
// graceful-shutdown lifecycle.
package http
import (
"context"
"errors"
"fmt"
stdhttp "net/http"
"time"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"gitea.dcglab.co.uk/steve/restic-manager/internal/crypto"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/config"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
// Deps bundles every collaborator the HTTP server depends on. Wired up
// in cmd/server; tests pass a pared-down Deps with fakes.
type Deps struct {
Cfg config.Config
Store *store.Store
AEAD *crypto.AEAD
Hub *ws.Hub
// BootstrapToken (optional, populated only on first run) is the raw
// admin-bootstrap token printed in the server logs. While set, the
// /bootstrap endpoint accepts it to create the first admin user.
BootstrapToken string
}
// Server is the running HTTP server.
type Server struct {
srv *stdhttp.Server
deps Deps
}
// New builds a configured but not-yet-started server.
func New(deps Deps) *Server {
r := chi.NewRouter()
// Built-in middleware: request ID for log correlation, recovery
// (don't crash the process on a panic in a handler), realIP iff a
// trusted proxy is configured.
r.Use(middleware.RequestID)
r.Use(middleware.Recoverer)
r.Use(requestLogger)
// Health endpoint — unauthenticated, no audit, deliberately cheap.
r.Get("/healthz", func(w stdhttp.ResponseWriter, _ *stdhttp.Request) {
w.WriteHeader(stdhttp.StatusNoContent)
})
s := &Server{deps: deps}
s.routes(r)
s.srv = &stdhttp.Server{
Addr: deps.Cfg.Listen,
Handler: r,
ReadHeaderTimeout: 10 * time.Second,
IdleTimeout: 60 * time.Second,
// Long write timeout — WS upgrades and live log streams need it.
WriteTimeout: 0,
}
return s
}
// routes wires the API tree. Subtrees live in this file by area so a
// reader can scan one place and see the surface.
func (s *Server) routes(r chi.Router) {
r.Route("/api", func(r chi.Router) {
r.Post("/auth/login", s.handleLogin)
r.Post("/auth/logout", s.handleLogout)
r.Post("/bootstrap", s.handleBootstrap)
// Agent enrollment (open endpoint — token is the credential).
r.Post("/agents/enroll", s.handleAgentEnroll)
// Operator → server (authenticated). Spec.md §6.1's
// /hosts/{id}/enrollment-token (regenerate) lands when the
// host page can call it; for now just the create endpoint.
r.Post("/enrollment-tokens", s.handleCreateEnrollmentToken)
})
// Agent ↔ server WebSocket. Bearer-authenticated inside the handler.
if s.deps.Hub != nil {
r.Mount("/ws/agent", ws.AgentHandler(ws.HandlerDeps{
Hub: s.deps.Hub,
Store: s.deps.Store,
}))
}
// UI handlers will hang off / — Phase 1 will add them.
r.Get("/", func(w stdhttp.ResponseWriter, _ *stdhttp.Request) {
_, _ = fmt.Fprint(w, "restic-manager — UI not yet implemented")
})
}
// Start begins listening. Blocks until ListenAndServe returns
// (typically only on Shutdown). Pass the result to errgroup.Group.Go.
func (s *Server) Start() error {
cfg := s.deps.Cfg
if cfg.TLSEnabled() {
err := s.srv.ListenAndServeTLS(cfg.TLSCert, cfg.TLSKey)
if errors.Is(err, stdhttp.ErrServerClosed) {
return nil
}
return err
}
err := s.srv.ListenAndServe()
if errors.Is(err, stdhttp.ErrServerClosed) {
return nil
}
return err
}
// Shutdown stops accepting new connections and waits up to ctx.Deadline
// for in-flight handlers to finish.
func (s *Server) Shutdown(ctx context.Context) error {
return s.srv.Shutdown(ctx)
}
// Addr returns the configured listen address. Useful in tests when
// the caller passes :0 to get a random port.
func (s *Server) Addr() string { return s.srv.Addr }