p6-01/02: agent self-update + fleet update server cluster
- alert: update_failed (per-host, dedup=hostID) + fleet_update_halted
(system-scoped, host_id NULL via new RaiseOrTouchSystem helper).
- ws: UpdateWatcher tracks in-flight command.update dispatches and
reconciles them against incoming hello envelopes — success path
marks the job succeeded and auto-resolves the alert; 90s timeout
marks the job failed and raises update_failed.
- http: POST /api/hosts/{id}/update (admin-only JSON) + the HTMX
/hosts/{id}/update form variant. Pre-checks: host exists, online,
agent_version != current, no running update job. Refactored core
into Server.dispatchHostUpdate so the fleet worker can share it
without going through HTTP.
- fleetupdate: rolling worker iterating through host slots, halting
on first failure and raising fleet_update_halted. Polling-based
version-match (re-read hosts.agent_version every 1s up to 95s) —
no extra plumbing into the WS hello path. At-most-one-running is
enforced at the store layer (ErrFleetUpdateRunning).
- cmd/server: wire UpdateWatcher and FleetWorker into the main
goroutine; the worker uses a small serverDispatcher adapter that
delegates back into Server.DispatchHostUpdate.
Tests: watcher (success/timeout/mismatch/late-hello), HTTP endpoint
(happy + four pre-check branches + RBAC), worker (two-host happy,
timeout-halt, host-offline-halt, already-at-target skip, cancel
mid-run, double-Start guard).
This commit is contained in:
@@ -39,6 +39,13 @@ type Deps struct {
|
||||
// NotificationHub (optional, wired in G1) is used by the test-fire
|
||||
// endpoint to dispatch a single synthetic payload through a channel.
|
||||
NotificationHub *notification.Hub
|
||||
// UpdateWatcher tracks in-flight agent self-update dispatches and
|
||||
// reconciles them against incoming hello envelopes. Optional;
|
||||
// nil = no-op (handlers degrade by skipping the Track call).
|
||||
UpdateWatcher UpdateWatcher
|
||||
// FleetWorker drives the rolling fleet-update worker. Optional;
|
||||
// nil = fleet update endpoints (P6-15) report unavailable.
|
||||
FleetWorker FleetWorker
|
||||
// Version is the binary's build version, surfaced in the chrome.
|
||||
// Empty falls back to "dev".
|
||||
Version string
|
||||
@@ -125,7 +132,7 @@ func (s *Server) routes(r chi.Router) {
|
||||
r.Get("/install/*", s.handleInstallAsset)
|
||||
r.Get("/api/version", s.handleVersion)
|
||||
if s.deps.Hub != nil {
|
||||
r.Mount("/ws/agent", ws.AgentHandler(ws.HandlerDeps{
|
||||
hd := ws.HandlerDeps{
|
||||
Hub: s.deps.Hub,
|
||||
Store: s.deps.Store,
|
||||
JobHub: s.deps.JobHub,
|
||||
@@ -133,7 +140,11 @@ func (s *Server) routes(r chi.Router) {
|
||||
OnHello: s.onAgentHello,
|
||||
OnScheduleAck: s.applyScheduleAck,
|
||||
OnScheduleFire: s.dispatchScheduledJob,
|
||||
}))
|
||||
}
|
||||
if w, ok := s.deps.UpdateWatcher.(*ws.UpdateWatcher); ok && w != nil {
|
||||
hd.UpdateWatcher = w
|
||||
}
|
||||
r.Mount("/ws/agent", ws.AgentHandler(hd))
|
||||
}
|
||||
r.Get("/ws/agent/pending", s.handlePendingWS)
|
||||
r.Mount("/static/", staticHandler())
|
||||
@@ -271,6 +282,9 @@ func (s *Server) routes(r chi.Router) {
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(s.requireRole(store.RoleAdmin))
|
||||
|
||||
r.Post("/api/hosts/{id}/update", s.handleHostUpdate)
|
||||
r.Post("/hosts/{id}/update", s.handleHostUpdateForm)
|
||||
|
||||
r.Get("/api/users", s.handleAPIUsersList)
|
||||
r.Post("/api/users", s.handleAPIUserCreate)
|
||||
r.Get("/api/users/{id}", s.handleAPIUserGet)
|
||||
@@ -322,6 +336,27 @@ func (s *Server) Shutdown(ctx context.Context) error {
|
||||
return s.srv.Shutdown(ctx)
|
||||
}
|
||||
|
||||
// SetFleetWorker installs the fleet-update worker post-construction.
|
||||
// Used to break the wiring loop in cmd/server (the worker depends on a
|
||||
// dispatcher that delegates back into the server's host-update path).
|
||||
func (s *Server) SetFleetWorker(fw FleetWorker) { s.deps.FleetWorker = fw }
|
||||
|
||||
// DispatchHostUpdate is the public entry point for callers (the fleet
|
||||
// worker) that need to drive the same dispatch path the HTTP handler
|
||||
// uses, without going through HTTP. Returns the structured result so
|
||||
// the caller can map error codes to its own status enum.
|
||||
func (s *Server) DispatchHostUpdate(ctx context.Context, hostID, actorUserID string) (jobID string, code string, err error) {
|
||||
var actorID *string
|
||||
if actorUserID != "" {
|
||||
actorID = &actorUserID
|
||||
}
|
||||
res := s.dispatchHostUpdate(ctx, hostID, "user", actorID)
|
||||
if res.Code != "" {
|
||||
return res.JobID, res.Code, nil
|
||||
}
|
||||
return res.JobID, "", nil
|
||||
}
|
||||
|
||||
// Addr returns the configured listen address. Useful in tests when
|
||||
// the caller passes :0 to get a random port.
|
||||
func (s *Server) Addr() string { return s.srv.Addr }
|
||||
|
||||
Reference in New Issue
Block a user