testing: bootstrap UI, agent reliability, NS-01..04 + alert username
Smoothes the rough edges that came up exercising a live deployment.
First-run bootstrap UI: /bootstrap renders a username + password form
that uses the in-memory token directly (operator no longer copies it
out of the log); /login redirects there while bootstrap is available.
Agent reliability: failJob synthetic envelopes so command.run early
returns no longer hang the server-side job; runtime probe of restic
restore --help drives --no-ownership instead of version sniffing
(0.18.x had it removed). Server unit re-shaped: ProtectSystem=full
plus ReadWritePaths=/etc/restic-manager, no ProtectHome — restore
can now write anywhere a user might want.
Restore wizard: default target is /root/rm-restore/<job-id>/ with
clearer help text. Re-init confirm input uses .field (was .input,
which doesn't exist — text was invisible).
NS-01 host delete: store DeleteHost, admin-band /hosts/{id}/delete
with hostname-confirm danger zone, audit, FK cascade, live WS close.
NS-02 enrollment-token recovery: outstanding-tokens panel on
/hosts/new, regenerate (preserves attachments) and revoke handlers
+ audit, store-level ListOutstandingEnrollmentTokens and
DeleteEnrollmentToken.
NS-03 repo init / probe surface: migration 0020 adds
hosts.repo_status + repo_status_error; WS handler projects every
init job's outcome onto the host row (idempotent already-initialised
collapses to ready); creds-save resets status and dispatches a fresh
probe; /hosts/{id}/repo/probe retry endpoint with banner.
NS-04 dashboard live + sort + filter: query-string filter
(q/status/repo_status/tag/sort/dir), 5s htmx live poll mirroring the
alerts pattern with a localStorage live toggle, sortable column
headers, filter row + clear.
Alerts page: ack'd-by line resolves user_id ULID to username.
Compose.yaml ignored — host-specific.
This commit is contained in:
+51
-4
@@ -43,7 +43,8 @@ func (s *Store) LookupHostByAgentToken(ctx context.Context, tokenHash string) (*
|
||||
current_job_id, last_backup_at, last_backup_status,
|
||||
repo_size_bytes, snapshot_count, open_alert_count,
|
||||
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
|
||||
pre_hook_default, post_hook_default
|
||||
pre_hook_default, post_hook_default,
|
||||
repo_status, repo_status_error
|
||||
FROM hosts WHERE agent_token_hash = ?`,
|
||||
tokenHash)
|
||||
return scanHost(row)
|
||||
@@ -57,11 +58,55 @@ func (s *Store) GetHost(ctx context.Context, id string) (*Host, error) {
|
||||
current_job_id, last_backup_at, last_backup_status,
|
||||
repo_size_bytes, snapshot_count, open_alert_count,
|
||||
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
|
||||
pre_hook_default, post_hook_default
|
||||
pre_hook_default, post_hook_default,
|
||||
repo_status, repo_status_error
|
||||
FROM hosts WHERE id = ?`, id)
|
||||
return scanHost(row)
|
||||
}
|
||||
|
||||
// SetHostRepoStatus persists the outcome of the latest init / probe
|
||||
// attempt against this host's repo. Called by the WS handler on every
|
||||
// job.finished of kind=init, and reset to ("unknown", "") by
|
||||
// repo-credentials saves so the next probe reflects the new creds.
|
||||
//
|
||||
// errMsg is stored verbatim (truncate at the call site if you care
|
||||
// about row size). Empty for "ready".
|
||||
func (s *Store) SetHostRepoStatus(ctx context.Context, hostID, status, errMsg string) error {
|
||||
_, err := s.db.ExecContext(ctx,
|
||||
`UPDATE hosts SET repo_status = ?, repo_status_error = ? WHERE id = ?`,
|
||||
status, errMsg, hostID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: set host repo status: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteHost removes a host row by id. Returns ErrNotFound if no row
|
||||
// matched. Foreign-key cascades (declared on every dependent table —
|
||||
// schedules, jobs, snapshots, source_groups, host_credentials, etc.)
|
||||
// remove the rest. The connection DSN already pins
|
||||
// PRAGMA foreign_keys=ON, so the cascade is honoured here without an
|
||||
// explicit pragma roundtrip.
|
||||
//
|
||||
// The host's agent bearer is stored in agent_token_hash on this row,
|
||||
// so deleting the row also revokes the agent — a re-installed
|
||||
// instance must come back through the normal pending-host accept
|
||||
// flow.
|
||||
func (s *Store) DeleteHost(ctx context.Context, id string) error {
|
||||
res, err := s.db.ExecContext(ctx, `DELETE FROM hosts WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: delete host: %w", err)
|
||||
}
|
||||
n, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: delete host rows: %w", err)
|
||||
}
|
||||
if n == 0 {
|
||||
return ErrNotFound
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarkHostHello updates the host row with metadata received in the
|
||||
// agent's hello message and flips status to 'online'.
|
||||
func (s *Store) MarkHostHello(ctx context.Context, id string, agentVersion, resticVersion string, protoVersion int, when time.Time) error {
|
||||
@@ -168,7 +213,8 @@ func (s *Store) ListHosts(ctx context.Context) ([]Host, error) {
|
||||
current_job_id, last_backup_at, last_backup_status,
|
||||
repo_size_bytes, snapshot_count, open_alert_count,
|
||||
applied_schedule_version, bandwidth_up_kbps, bandwidth_down_kbps,
|
||||
pre_hook_default, post_hook_default
|
||||
pre_hook_default, post_hook_default,
|
||||
repo_status, repo_status_error
|
||||
FROM hosts ORDER BY name`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("store: list hosts: %w", err)
|
||||
@@ -215,7 +261,8 @@ func scanHostRow(s hostScanner) (*Host, error) {
|
||||
¤tJob, &lastBackupAt, &lastBkSt,
|
||||
&h.RepoSizeBytes, &h.SnapshotCount, &h.OpenAlertCount,
|
||||
&h.AppliedScheduleVersion, &bwUp, &bwDown,
|
||||
&preHook, &postHook)
|
||||
&preHook, &postHook,
|
||||
&h.RepoStatus, &h.RepoStatusError)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, ErrNotFound
|
||||
|
||||
Reference in New Issue
Block a user