testing: bootstrap UI, agent reliability, NS-01..04 + alert username
Smoothes the rough edges that came up exercising a live deployment.
First-run bootstrap UI: /bootstrap renders a username + password form
that uses the in-memory token directly (operator no longer copies it
out of the log); /login redirects there while bootstrap is available.
Agent reliability: failJob synthetic envelopes so command.run early
returns no longer hang the server-side job; runtime probe of restic
restore --help drives --no-ownership instead of version sniffing
(0.18.x had it removed). Server unit re-shaped: ProtectSystem=full
plus ReadWritePaths=/etc/restic-manager, no ProtectHome — restore
can now write anywhere a user might want.
Restore wizard: default target is /root/rm-restore/<job-id>/ with
clearer help text. Re-init confirm input uses .field (was .input,
which doesn't exist — text was invisible).
NS-01 host delete: store DeleteHost, admin-band /hosts/{id}/delete
with hostname-confirm danger zone, audit, FK cascade, live WS close.
NS-02 enrollment-token recovery: outstanding-tokens panel on
/hosts/new, regenerate (preserves attachments) and revoke handlers
+ audit, store-level ListOutstandingEnrollmentTokens and
DeleteEnrollmentToken.
NS-03 repo init / probe surface: migration 0020 adds
hosts.repo_status + repo_status_error; WS handler projects every
init job's outcome onto the host row (idempotent already-initialised
collapses to ready); creds-save resets status and dispatches a fresh
probe; /hosts/{id}/repo/probe retry endpoint with banner.
NS-04 dashboard live + sort + filter: query-string filter
(q/status/repo_status/tag/sort/dir), 5s htmx live poll mirroring the
alerts pattern with a localStorage live toggle, sortable column
headers, filter row + clear.
Alerts page: ack'd-by line resolves user_id ULID to username.
Compose.yaml ignored — host-specific.
This commit is contained in:
@@ -146,6 +146,15 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
|
||||
return
|
||||
}
|
||||
|
||||
// NS-03: clear the host's last probe outcome — the new creds may
|
||||
// reach a different repo (or fix an auth typo), so any prior
|
||||
// "init_failed" / "ready" tag is stale. The next init dispatch
|
||||
// (below, when the agent is online) will set it to a fresh value
|
||||
// on completion.
|
||||
if err := s.deps.Store.SetHostRepoStatus(r.Context(), hostID, "unknown", ""); err != nil {
|
||||
slog.Warn("repo creds set: reset repo_status", "host_id", hostID, "err", err)
|
||||
}
|
||||
|
||||
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||
ID: ulid.Make().String(),
|
||||
UserID: &user.ID,
|
||||
@@ -160,11 +169,65 @@ func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.R
|
||||
// the next reconnect will pick the row up via the hello handler.
|
||||
if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
|
||||
_ = s.pushRepoCredsToAgent(r.Context(), hostID, existing)
|
||||
// Force a fresh probe so a typo / wrong URL surfaces now
|
||||
// rather than at the next scheduled job. No-op if offline —
|
||||
// the operator already saw "host offline" elsewhere.
|
||||
if err := s.dispatchInitJob(r.Context(), hostID, "user", &user.ID); err != nil {
|
||||
slog.Warn("repo creds set: dispatch init", "host_id", hostID, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
w.WriteHeader(stdhttp.StatusNoContent)
|
||||
}
|
||||
|
||||
// dispatchInitJob creates an init job row, marshals the command.run,
|
||||
// ships it down the agent's WS connection (when connected), and
|
||||
// audits. NS-03 path: callers use this to force a fresh probe after
|
||||
// credentials change without waiting for the next hello — and without
|
||||
// the maybeAutoInit "first time only" guard. actorKind should be
|
||||
// "user" for operator-driven dispatches and "system" for the
|
||||
// auto-init-on-hello case so audit reflects intent.
|
||||
func (s *Server) dispatchInitJob(ctx context.Context, hostID, actorKind string, actorID *string) error {
|
||||
jobID := ulid.Make().String()
|
||||
now := time.Now().UTC()
|
||||
if err := s.deps.Store.CreateJob(ctx, store.Job{
|
||||
ID: jobID,
|
||||
HostID: hostID,
|
||||
Kind: string(api.JobInit),
|
||||
ActorKind: actorKind,
|
||||
ActorID: actorID,
|
||||
CreatedAt: now,
|
||||
}); err != nil {
|
||||
return fmt.Errorf("dispatch init: persist job: %w", err)
|
||||
}
|
||||
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
|
||||
JobID: jobID,
|
||||
Kind: api.JobInit,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("dispatch init: marshal: %w", err)
|
||||
}
|
||||
if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
|
||||
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
if err := s.deps.Hub.Send(sendCtx, hostID, env); err != nil {
|
||||
// Job row stays — the host's pending-runs drain or the next
|
||||
// hello picks it up. We leave the slate clean for the caller.
|
||||
return fmt.Errorf("dispatch init: ws send: %w", err)
|
||||
}
|
||||
}
|
||||
_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
|
||||
ID: ulid.Make().String(),
|
||||
UserID: actorID,
|
||||
Actor: actorKind,
|
||||
Action: "host.repo_init_dispatched",
|
||||
TargetKind: ptr("host"),
|
||||
TargetID: &hostID,
|
||||
TS: now,
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
// pushRepoCredsToAgent serialises blob into a config.update envelope
|
||||
// and ships it down the agent's WS. Returns an error from the hub
|
||||
// (no-op if not connected — caller is expected to check first when it
|
||||
|
||||
Reference in New Issue
Block a user