Files
restic-manager/internal/server/http/host_credentials.go
T
steve 3800b34a2b
CI / Test (rest) (pull_request) Successful in 29s
CI / Lint (pull_request) Successful in 32s
CI / Build (windows/amd64) (pull_request) Successful in 22s
CI / Test (store) (pull_request) Successful in 1m22s
CI / Test (server-http) (pull_request) Successful in 1m30s
CI / Build (linux/amd64) (pull_request) Successful in 22s
CI / Build (linux/arm64) (pull_request) Successful in 41s
testing: bootstrap UI, agent reliability, NS-01..04 + alert username
Smoothes the rough edges that came up exercising a live deployment.

First-run bootstrap UI: /bootstrap renders a username + password form
that uses the in-memory token directly (operator no longer copies it
out of the log); /login redirects there while bootstrap is available.

Agent reliability: failJob synthetic envelopes so command.run early
returns no longer hang the server-side job; runtime probe of restic
restore --help drives --no-ownership instead of version sniffing
(0.18.x had it removed). Server unit re-shaped: ProtectSystem=full
plus ReadWritePaths=/etc/restic-manager, no ProtectHome — restore
can now write anywhere a user might want.

Restore wizard: default target is /root/rm-restore/<job-id>/ with
clearer help text. Re-init confirm input uses .field (was .input,
which doesn't exist — text was invisible).

NS-01 host delete: store DeleteHost, admin-band /hosts/{id}/delete
with hostname-confirm danger zone, audit, FK cascade, live WS close.

NS-02 enrollment-token recovery: outstanding-tokens panel on
/hosts/new, regenerate (preserves attachments) and revoke handlers
+ audit, store-level ListOutstandingEnrollmentTokens and
DeleteEnrollmentToken.

NS-03 repo init / probe surface: migration 0020 adds
hosts.repo_status + repo_status_error; WS handler projects every
init job's outcome onto the host row (idempotent already-initialised
collapses to ready); creds-save resets status and dispatches a fresh
probe; /hosts/{id}/repo/probe retry endpoint with banner.

NS-04 dashboard live + sort + filter: query-string filter
(q/status/repo_status/tag/sort/dir), 5s htmx live poll mirroring the
alerts pattern with a localStorage live toggle, sortable column
headers, filter row + clear.

Alerts page: ack'd-by line resolves user_id ULID to username.

Compose.yaml ignored — host-specific.
2026-05-05 22:03:15 +01:00

579 lines
20 KiB
Go

package http
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
stdhttp "net/http"
"time"
"github.com/go-chi/chi/v5"
"github.com/oklog/ulid/v2"
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
"gitea.dcglab.co.uk/steve/restic-manager/internal/server/ws"
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
)
func nowUTC() time.Time { return time.Now().UTC() }
// hostRepoCredsView is the body of GET /api/hosts/{id}/repo-credentials.
// Password is always redacted; the UI uses this to pre-fill an edit
// form with the URL/username already populated.
type hostRepoCredsView struct {
RepoURL string `json:"repo_url"`
RepoUsername string `json:"repo_username,omitempty"`
HasPassword bool `json:"has_password"`
}
// handleGetHostCredentials returns a redacted view of the host's repo
// creds for UI display. 404 if no credential has ever been set.
func (s *Server) handleGetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
if hostID == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
return
}
enc, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindRepo)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
writeJSONError(w, stdhttp.StatusNotFound, "not_set", "")
return
}
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
plain, err := s.deps.AEAD.Decrypt(enc, []byte("host:"+hostID))
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
return
}
var blob repoCredsBlob
if err := json.Unmarshal(plain, &blob); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
writeJSON(w, stdhttp.StatusOK, hostRepoCredsView{
RepoURL: blob.RepoURL,
RepoUsername: blob.RepoUsername,
HasPassword: blob.RepoPassword != "",
})
}
// hostRepoCredsRequest is the body of PUT /api/hosts/{id}/repo-credentials.
// Operator can edit any subset; missing fields preserve the existing
// value (so changing only the password doesn't require resending the URL).
//
// We model this as plaintext on the wire because the wire is HTTPS to
// the proxy. The values are AEAD-encrypted before they touch SQLite,
// and only ever leave the server again inside the authenticated WS
// `config.update` push.
type hostRepoCredsRequest struct {
RepoURL *string `json:"repo_url,omitempty"`
RepoUsername *string `json:"repo_username,omitempty"`
RepoPassword *string `json:"repo_password,omitempty"`
}
// handleSetHostCredentials lets an operator/admin update a host's
// repo creds. Any fields the operator sends overwrite the
// corresponding fields in the existing blob; the others are
// preserved. Re-encrypts under host_id and pushes a config.update
// over the WS if the agent is connected.
func (s *Server) handleSetHostCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
if hostID == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
return
}
if _, err := s.deps.Store.GetHost(r.Context(), hostID); err != nil {
writeJSONError(w, stdhttp.StatusNotFound, "host_not_found", "")
return
}
var req hostRepoCredsRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
// Merge with the existing row, if any.
existing := repoCredsBlob{}
if cur, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindRepo); err == nil {
plain, err := s.deps.AEAD.Decrypt(cur, []byte("host:"+hostID))
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
return
}
_ = json.Unmarshal(plain, &existing)
} else if !errors.Is(err, store.ErrNotFound) {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
if req.RepoURL != nil {
existing.RepoURL = *req.RepoURL
}
if req.RepoUsername != nil {
existing.RepoUsername = *req.RepoUsername
}
if req.RepoPassword != nil {
existing.RepoPassword = *req.RepoPassword
}
if existing.RepoURL == "" || existing.RepoPassword == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"repo_url and repo_password must end up non-empty")
return
}
enc, err := s.encryptRepoCreds(existing, []byte("host:"+hostID))
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, store.CredKindRepo, enc); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
// NS-03: clear the host's last probe outcome — the new creds may
// reach a different repo (or fix an auth typo), so any prior
// "init_failed" / "ready" tag is stale. The next init dispatch
// (below, when the agent is online) will set it to a fresh value
// on completion.
if err := s.deps.Store.SetHostRepoStatus(r.Context(), hostID, "unknown", ""); err != nil {
slog.Warn("repo creds set: reset repo_status", "host_id", hostID, "err", err)
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &user.ID,
Actor: "user",
Action: "host.repo_credentials_set",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: nowUTC(),
})
// Push to the agent if it's connected. Errors here are non-fatal:
// the next reconnect will pick the row up via the hello handler.
if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
_ = s.pushRepoCredsToAgent(r.Context(), hostID, existing)
// Force a fresh probe so a typo / wrong URL surfaces now
// rather than at the next scheduled job. No-op if offline —
// the operator already saw "host offline" elsewhere.
if err := s.dispatchInitJob(r.Context(), hostID, "user", &user.ID); err != nil {
slog.Warn("repo creds set: dispatch init", "host_id", hostID, "err", err)
}
}
w.WriteHeader(stdhttp.StatusNoContent)
}
// dispatchInitJob creates an init job row, marshals the command.run,
// ships it down the agent's WS connection (when connected), and
// audits. NS-03 path: callers use this to force a fresh probe after
// credentials change without waiting for the next hello — and without
// the maybeAutoInit "first time only" guard. actorKind should be
// "user" for operator-driven dispatches and "system" for the
// auto-init-on-hello case so audit reflects intent.
func (s *Server) dispatchInitJob(ctx context.Context, hostID, actorKind string, actorID *string) error {
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := s.deps.Store.CreateJob(ctx, store.Job{
ID: jobID,
HostID: hostID,
Kind: string(api.JobInit),
ActorKind: actorKind,
ActorID: actorID,
CreatedAt: now,
}); err != nil {
return fmt.Errorf("dispatch init: persist job: %w", err)
}
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
JobID: jobID,
Kind: api.JobInit,
})
if err != nil {
return fmt.Errorf("dispatch init: marshal: %w", err)
}
if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if err := s.deps.Hub.Send(sendCtx, hostID, env); err != nil {
// Job row stays — the host's pending-runs drain or the next
// hello picks it up. We leave the slate clean for the caller.
return fmt.Errorf("dispatch init: ws send: %w", err)
}
}
_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
ID: ulid.Make().String(),
UserID: actorID,
Actor: actorKind,
Action: "host.repo_init_dispatched",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: now,
})
return nil
}
// pushRepoCredsToAgent serialises blob into a config.update envelope
// and ships it down the agent's WS. Returns an error from the hub
// (no-op if not connected — caller is expected to check first when it
// matters).
func (s *Server) pushRepoCredsToAgent(ctx context.Context, hostID string, blob repoCredsBlob) error {
env, err := api.Marshal(api.MsgConfigUpdate, "", api.ConfigUpdatePayload{
RepoURL: blob.RepoURL,
RepoUsername: blob.RepoUsername,
RepoPassword: blob.RepoPassword,
})
if err != nil {
return err
}
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if err := s.deps.Hub.Send(sendCtx, hostID, env); err != nil {
slog.Warn("push repo creds: hub send failed", "host_id", hostID, "err", err)
return err
}
return nil
}
// handleGetAdminCredentials returns a redacted view of the host's admin
// creds for UI display. 404 if no admin slot has been set yet. Operator
// uses this to pre-fill the edit form.
func (s *Server) handleGetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
if !s.authedUser(r) {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
if hostID == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
return
}
enc, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindAdmin)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
writeJSONError(w, stdhttp.StatusNotFound, "not_set", "")
return
}
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
plain, err := s.deps.AEAD.Decrypt(enc, []byte("host:"+hostID+":admin"))
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
return
}
var blob repoCredsBlob
if err := json.Unmarshal(plain, &blob); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
writeJSON(w, stdhttp.StatusOK, hostRepoCredsView{
RepoURL: blob.RepoURL,
RepoUsername: blob.RepoUsername,
HasPassword: blob.RepoPassword != "",
})
}
// handleSetAdminCredentials lets an operator/admin update a host's admin
// creds (the prune-capable slot). Same merge-then-validate semantics as
// handleSetHostCredentials but operates on store.CredKindAdmin. After
// persisting, pushes a config.update with Slot:"admin" over the WS if
// the agent is connected.
func (s *Server) handleSetAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
if hostID == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
return
}
if _, err := s.deps.Store.GetHost(r.Context(), hostID); err != nil {
writeJSONError(w, stdhttp.StatusNotFound, "host_not_found", "")
return
}
var req hostRepoCredsRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
return
}
// Merge with the existing admin row, if any.
existing := repoCredsBlob{}
aad := []byte("host:" + hostID + ":admin")
if cur, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindAdmin); err == nil {
plain, err := s.deps.AEAD.Decrypt(cur, aad)
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "decrypt_failed", "")
return
}
_ = json.Unmarshal(plain, &existing)
} else if !errors.Is(err, store.ErrNotFound) {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
if req.RepoURL != nil {
existing.RepoURL = *req.RepoURL
}
if req.RepoUsername != nil {
existing.RepoUsername = *req.RepoUsername
}
if req.RepoPassword != nil {
existing.RepoPassword = *req.RepoPassword
}
if existing.RepoURL == "" || existing.RepoPassword == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_field",
"repo_url and repo_password must end up non-empty")
return
}
enc, err := s.encryptRepoCreds(existing, aad)
if err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, store.CredKindAdmin, enc); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &user.ID,
Actor: "user",
Action: "host.admin_credentials_set",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: nowUTC(),
})
// Push to the agent if it's connected. Non-fatal: the next
// handleRunRepoPrune call will push on-demand.
if s.deps.Hub != nil && s.deps.Hub.Connected(hostID) {
_ = s.pushAdminCredsToAgent(r.Context(), hostID)
}
w.WriteHeader(stdhttp.StatusNoContent)
}
// handleDeleteAdminCredentials removes the admin credentials row for the
// host. Returns 204 on success, 404 if the row wasn't set. Does NOT push
// a deletion to the agent — the agent's local admin slot stays as-is
// until the next deployment/reinstall.
func (s *Server) handleDeleteAdminCredentials(w stdhttp.ResponseWriter, r *stdhttp.Request) {
user, ok := s.requireUser(r)
if !ok {
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
return
}
hostID := chi.URLParam(r, "id")
if hostID == "" {
writeJSONError(w, stdhttp.StatusBadRequest, "missing_id", "")
return
}
// Check existence first so we can 404 cleanly.
if _, err := s.deps.Store.GetHostCredentials(r.Context(), hostID, store.CredKindAdmin); err != nil {
if errors.Is(err, store.ErrNotFound) {
writeJSONError(w, stdhttp.StatusNotFound, "not_set", "")
return
}
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
if err := s.deps.Store.DeleteHostCredentials(r.Context(), hostID, store.CredKindAdmin); err != nil {
writeJSONError(w, stdhttp.StatusInternalServerError, "internal", "")
return
}
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
ID: ulid.Make().String(),
UserID: &user.ID,
Actor: "user",
Action: "host.admin_credentials_deleted",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: nowUTC(),
})
w.WriteHeader(stdhttp.StatusNoContent)
}
// pushAdminCredsToAgent ships the admin-slot config.update down the
// agent's WS. Used by:
// - handleSetAdminCredentials (immediate push when operator saves).
// - handleRunRepoPrune (on-demand push right before a prune dispatch).
//
// Returns store.ErrNotFound if no admin row exists for the host
// (the prune endpoint uses this to refuse with a clear message).
func (s *Server) pushAdminCredsToAgent(ctx context.Context, hostID string) error {
enc, err := s.deps.Store.GetHostCredentials(ctx, hostID, store.CredKindAdmin)
if err != nil {
return err // ErrNotFound bubbles
}
plain, err := s.deps.AEAD.Decrypt(enc, []byte("host:"+hostID+":admin"))
if err != nil {
return fmt.Errorf("push admin creds: decrypt: %w", err)
}
var blob repoCredsBlob
if err := json.Unmarshal(plain, &blob); err != nil {
return fmt.Errorf("push admin creds: parse: %w", err)
}
env, err := api.Marshal(api.MsgConfigUpdate, "", api.ConfigUpdatePayload{
Slot: "admin",
RepoURL: blob.RepoURL,
RepoUsername: blob.RepoUsername,
RepoPassword: blob.RepoPassword,
})
if err != nil {
return err
}
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
return s.deps.Hub.Send(sendCtx, hostID, env)
}
// onAgentHello runs synchronously inside the WS handler immediately
// after a successful hello. It loads the host's encrypted creds (if
// any), decrypts, and ships them down the conn as a config.update so
// the agent has them before any command.run lands.
//
// The conn argument is used directly (rather than via the hub) so we
// don't race a brand-new register against an old still-closing conn.
func (s *Server) onAgentHello(ctx context.Context, hostID string, conn *ws.Conn) {
s.pushRepoCredsOnHello(ctx, hostID, conn)
// Bandwidth caps are sent unconditionally so an agent that
// reconnects after a cap edit picks up the new state without
// waiting for the next bandwidth PUT.
s.pushBandwidthOnHello(ctx, hostID, conn)
// Push the current schedule set in the same on-hello window so
// the agent's local cron is in sync before any command.run lands.
// An empty schedule list is a valid push: it tells the agent to
// drop any cron entries left over from a previous deployment.
// Always runs, even when the host has no repo credentials yet.
s.pushScheduleSetOnConn(ctx, hostID, conn)
// Auto-init the repo if we've never landed a successful init job
// against this host. Restic treats "config file already exists"
// as a soft success, so re-enrolment against a populated repo
// just no-ops. Skipped silently when the host has no creds yet —
// the next hello after the operator binds creds will dispatch.
s.maybeAutoInit(ctx, hostID, conn)
// Drain any pending runs that accumulated while this host was
// offline. Use a fresh context — the hello-bound ctx is short-lived,
// and the drain may take seconds across many rows. A non-blocking
// goroutine keeps the hello path snappy.
go s.DrainPending(context.Background(), hostID)
}
// maybeAutoInit dispatches a `restic init` job iff the host has no
// successful init in its history AND repo creds are bound (without
// them the runner can't talk to the repo). We rely on Restic's
// idempotent init for re-runs.
func (s *Server) maybeAutoInit(ctx context.Context, hostID string, conn *ws.Conn) {
if _, err := s.deps.Store.GetHostCredentials(ctx, hostID, store.CredKindRepo); err != nil {
// No creds bound yet — operator hasn't supplied them. The next
// hello after creds land will pick this up.
return
}
already, err := s.deps.Store.HasJobOfKind(ctx, hostID, string(api.JobInit))
if err != nil {
slog.Warn("auto-init: check job history", "host_id", hostID, "err", err)
return
}
if already {
return
}
jobID := ulid.Make().String()
now := time.Now().UTC()
if err := s.deps.Store.CreateJob(ctx, store.Job{
ID: jobID,
HostID: hostID,
Kind: string(api.JobInit),
ActorKind: "system",
CreatedAt: now,
}); err != nil {
slog.Warn("auto-init: persist job", "host_id", hostID, "err", err)
return
}
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
JobID: jobID,
Kind: api.JobInit,
})
if err != nil {
slog.Warn("auto-init: marshal command.run", "host_id", hostID, "err", err)
return
}
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if err := conn.Send(sendCtx, env); err != nil {
slog.Warn("auto-init: send command.run", "host_id", hostID, "err", err)
return
}
_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
ID: ulid.Make().String(),
Actor: "system",
Action: "host.auto_init",
TargetKind: ptr("host"),
TargetID: &hostID,
TS: now,
})
slog.Info("auto-init: dispatched", "host_id", hostID, "job_id", jobID)
}
// pushRepoCredsOnHello loads + decrypts + sends the host's repo
// credentials. Silent no-op when the host has nothing on file
// (the operator hasn't bound creds to it yet).
func (s *Server) pushRepoCredsOnHello(ctx context.Context, hostID string, conn *ws.Conn) {
enc, err := s.deps.Store.GetHostCredentials(ctx, hostID, store.CredKindRepo)
if err != nil {
if !errors.Is(err, store.ErrNotFound) {
slog.Warn("on-hello: load host creds", "host_id", hostID, "err", err)
}
return
}
plain, err := s.deps.AEAD.Decrypt(enc, []byte("host:"+hostID))
if err != nil {
slog.Error("on-hello: decrypt host creds", "host_id", hostID, "err", err)
return
}
var blob repoCredsBlob
if err := json.Unmarshal(plain, &blob); err != nil {
slog.Error("on-hello: parse host creds", "host_id", hostID, "err", err)
return
}
env, err := api.Marshal(api.MsgConfigUpdate, "", api.ConfigUpdatePayload{
RepoURL: blob.RepoURL,
RepoUsername: blob.RepoUsername,
RepoPassword: blob.RepoPassword,
})
if err != nil {
slog.Error("on-hello: marshal config.update", "host_id", hostID, "err", err)
return
}
sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if err := conn.Send(sendCtx, env); err != nil {
slog.Warn("on-hello: send config.update", "host_id", hostID, "err", err)
}
}