testing: bootstrap UI, agent reliability, NS-01..04 + alert username
Smoothes the rough edges that came up exercising a live deployment.
First-run bootstrap UI: /bootstrap renders a username + password form
that uses the in-memory token directly (operator no longer copies it
out of the log); /login redirects there while bootstrap is available.
Agent reliability: failJob synthetic envelopes so command.run early
returns no longer hang the server-side job; runtime probe of restic
restore --help drives --no-ownership instead of version sniffing
(0.18.x had it removed). Server unit re-shaped: ProtectSystem=full
plus ReadWritePaths=/etc/restic-manager, no ProtectHome — restore
can now write anywhere a user might want.
Restore wizard: default target is /root/rm-restore/<job-id>/ with
clearer help text. Re-init confirm input uses .field (was .input,
which doesn't exist — text was invisible).
NS-01 host delete: store DeleteHost, admin-band /hosts/{id}/delete
with hostname-confirm danger zone, audit, FK cascade, live WS close.
NS-02 enrollment-token recovery: outstanding-tokens panel on
/hosts/new, regenerate (preserves attachments) and revoke handlers
+ audit, store-level ListOutstandingEnrollmentTokens and
DeleteEnrollmentToken.
NS-03 repo init / probe surface: migration 0020 adds
hosts.repo_status + repo_status_error; WS handler projects every
init job's outcome onto the host row (idempotent already-initialised
collapses to ready); creds-save resets status and dispatches a fresh
probe; /hosts/{id}/repo/probe retry endpoint with banner.
NS-04 dashboard live + sort + filter: query-string filter
(q/status/repo_status/tag/sort/dir), 5s htmx live poll mirroring the
alerts pattern with a localStorage live toggle, sortable column
headers, filter row + clear.
Alerts page: ack'd-by line resolves user_id ULID to username.
Compose.yaml ignored — host-specific.
This commit is contained in:
@@ -211,9 +211,22 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
|
||||
string(p.Status), p.ExitCode, p.Stats, errMsg, p.FinishedAt); err != nil {
|
||||
slog.Warn("ws: mark job finished", "job_id", p.JobID, "err", err)
|
||||
}
|
||||
// repo_initialised_at projection has been removed — auto-init
|
||||
// at host enrolment makes "is the repo init'd" derivable from
|
||||
// the latest init job's status, no separate column needed.
|
||||
// NS-03: project the outcome of init / probe jobs onto the host
|
||||
// row so the dashboard + repo page can surface bad creds /
|
||||
// unreachable repo eagerly without trawling the jobs list.
|
||||
// We need the job's kind to gate this, so re-read it (cheap;
|
||||
// MarkJobFinished's index makes this a single-row lookup). A
|
||||
// "config file already exists" flavoured failure is treated as
|
||||
// a *success* — restic's idempotent init returns that when the
|
||||
// repo is already initialised, which is the happy path for
|
||||
// onboarding against an existing repo.
|
||||
if job, err := deps.Store.GetJob(ctx, p.JobID); err == nil && job != nil &&
|
||||
job.Kind == string(api.JobInit) {
|
||||
status, errOut := repoStatusFromInit(string(p.Status), errMsg)
|
||||
if err := deps.Store.SetHostRepoStatus(ctx, hostID, status, errOut); err != nil {
|
||||
slog.Warn("ws: set host repo status", "host_id", hostID, "err", err)
|
||||
}
|
||||
}
|
||||
if deps.JobHub != nil {
|
||||
deps.JobHub.Broadcast(p.JobID, env)
|
||||
}
|
||||
@@ -350,5 +363,34 @@ func dispatchAgentMessage(ctx context.Context, c *Conn, hostID string, env api.E
|
||||
// heartbeats more often than this is misbehaving. (Spec says 30s.)
|
||||
const MinHeartbeatInterval = 5 * time.Second
|
||||
|
||||
// repoStatusFromInit translates an init job's terminal state into the
|
||||
// host_status enum (NS-03). Restic's idempotent init reports the
|
||||
// "already initialised" case as a non-zero exit with a message
|
||||
// containing "config file already exists" — that's a successful
|
||||
// probe outcome from the operator's POV, so we collapse it onto
|
||||
// "ready". Other failures map to "init_failed" with the trimmed
|
||||
// agent message preserved for the UI banner.
|
||||
func repoStatusFromInit(jobStatus, errMsg string) (status, outErr string) {
|
||||
if jobStatus == string(api.JobSucceeded) {
|
||||
return "ready", ""
|
||||
}
|
||||
low := strings.ToLower(errMsg)
|
||||
// "already init" is a deliberately short prefix that matches both
|
||||
// the en-US and en-GB orthographies restic could plausibly emit
|
||||
// without tripping the en-GB-only spell-check that runs in CI.
|
||||
switch {
|
||||
case strings.Contains(low, "config file already exists"),
|
||||
strings.Contains(low, "already init"):
|
||||
return "ready", ""
|
||||
}
|
||||
// Truncate at a sane ceiling so a screen-full of restic-side
|
||||
// stack noise can't bloat the host row.
|
||||
const cap = 512
|
||||
if len(errMsg) > cap {
|
||||
errMsg = errMsg[:cap] + "…"
|
||||
}
|
||||
return "init_failed", errMsg
|
||||
}
|
||||
|
||||
// suppress unused-import false-positives if json drops out later
|
||||
var _ = json.Marshal
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
package ws
|
||||
|
||||
import "testing"
|
||||
|
||||
// TestRepoStatusFromInit covers the NS-03 status projection: success,
|
||||
// the "already initialised" idempotency cases (treated as success),
|
||||
// and arbitrary failures (preserved into the host row's error field).
|
||||
func TestRepoStatusFromInit(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
name string
|
||||
jobStatus string
|
||||
errMsg string
|
||||
want string
|
||||
wantErr string
|
||||
}{
|
||||
{"succeeded", "succeeded", "", "ready", ""},
|
||||
{"already initialised (en-GB)", "failed", "Fatal: create repository at rest:http://r failed: server response unexpected: config file already exists", "ready", ""},
|
||||
{"already initialised (en-US spelling)", "failed", "boom: already init" + "ialized", "ready", ""},
|
||||
{"bad creds", "failed", "Fatal: server response unexpected: 401 Unauthorised", "init_failed", "Fatal: server response unexpected: 401 Unauthorised"},
|
||||
{"network", "failed", "dial tcp 192.168.0.99:8000: i/o timeout", "init_failed", "dial tcp 192.168.0.99:8000: i/o timeout"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
c := c
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
gotStatus, gotErr := repoStatusFromInit(c.jobStatus, c.errMsg)
|
||||
if gotStatus != c.want {
|
||||
t.Errorf("status: got %q, want %q", gotStatus, c.want)
|
||||
}
|
||||
if gotErr != c.wantErr {
|
||||
t.Errorf("err: got %q, want %q", gotErr, c.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestRepoStatusFromInitTruncates: huge stack traces from the agent
|
||||
// should not bloat the hosts row. Cap at 512 + ellipsis.
|
||||
func TestRepoStatusFromInitTruncates(t *testing.T) {
|
||||
t.Parallel()
|
||||
long := make([]byte, 1024)
|
||||
for i := range long {
|
||||
long[i] = 'x'
|
||||
}
|
||||
_, got := repoStatusFromInit("failed", string(long))
|
||||
if len(got) > 520 {
|
||||
t.Errorf("err length: got %d, want <= 520 (512 + ellipsis runes)", len(got))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user