c8ead66f08
Cohesive batch from a smoke-test session against a real rest-server.
Themed bullets:
* Agent runs as root, sandboxed via systemd. CapabilityBoundingSet
drops to CAP_DAC_READ_SEARCH + restore caps; ProtectSystem=strict
with ReadWritePaths confined to /etc + /var/lib/restic-manager;
NoNewPrivileges blocks escalation. Install script no longer
creates a service user. spec.md §4.2 / §14.1 / §14.3 explain the
rationale (matches UrBackup / Veeam / Bareos defaults; trying to
back up "everything" as an unprivileged user creates silent skips
on /home, /root, /var/lib/* with no upside vs the threat model
the agent already implies).
* Init-repo end-to-end. New JobKind="init" wired through agent
runner, restic.Env.RunInit, server dispatcher, and a UI button
(red "Initialise repo" in the run-now panel). hosts.repo_initialised_at
flips on init success, on backup success, or on a non-empty
snapshots.report. The "Run now" / "Init" / "Retry" branching now
drives both the dashboard host row and the host-detail panel.
Migrations 0004 (column), 0005 (jobs.kind CHECK widened — using
the safe create-new-then-rename pattern; first version corrupted
job_logs.job_id FK), 0006 (cleans up job_logs FK on already-
affected DBs).
* rest-server creds embedded at exec time only. restic.Env gains
RepoUsername; mergeRestCreds() builds the user:pass@-prefixed URL
inside envSlice() and never assigns it back to the struct, so
nothing slog-able ever sees the cleartext form. RedactURL helper
for any future surface that needs to log a URL safely. Both
helpers tested.
* Add-host UX. Repo password is now optional — server mints a
24-byte URL-safe random one and surfaces it once, alongside an
htpasswd snippet ("echo PASS | htpasswd -B -i ... USERNAME") so
the operator pastes one command on the rest-server host and one
on the endpoint. Result page also links the install snippet at
/install/install.sh (was /install.sh — 404'd before) and pipes
to bash (not sh — script uses set -o pipefail and other
bashisms; on Debian/Ubuntu sh is dash).
* Late-subscriber race in JobHub. A fast-failing job could finish
(DB write + Broadcast) before the browser's HX-Redirect → page
load → WS-connect path completed, so the JS sat forever waiting
on a job.finished that already passed. JobHub split into
Register + Send + Run; handleJobStream now subscribes first,
re-fetches the job, and sends a synthetic job.finished if the
state is already terminal.
* HTMX error visibility. New toast partial listens to
htmx:responseError and surfaces the response body as a
bottom-right toast — every server-side validation error now
becomes visible without per-handler JS wiring. Also handles
custom rm:toast events for future server-pushed notifications
via the HX-Trigger header. Themed via existing CSS vars.
* Dashboard rows are now whole-row clickable to host detail
(CSS card-link pattern: absolute-positioned anchor + .row-action
z-index restoration so the action button stays clickable).
"View →" on a running job links to /jobs/<id> rather than
/hosts/<id> since the row click already covers the host page.
* "Run first" / "Run first backup" → "Run now" everywhere for
consistency.
* runbook (docs/e2e-smoke.md) updated — live-log streaming step
now reflects P1-26; mentions the browser-driven Run-now flow.
* _diag/dump-creds — moved out of cmd/ so go build doesn't pick
it up; .gitignore now excludes /_diag/ entirely.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
143 lines
4.2 KiB
Go
143 lines
4.2 KiB
Go
package http
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
stdhttp "net/http"
|
|
"time"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
"github.com/oklog/ulid/v2"
|
|
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/auth"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
|
)
|
|
|
|
// runNowRequest is the body of POST /api/hosts/:id/jobs.
|
|
type runNowRequest struct {
|
|
Kind api.JobKind `json:"kind"`
|
|
Args []string `json:"args,omitempty"` // restic CLI args (paths for backup, etc.)
|
|
}
|
|
|
|
type runNowResponse struct {
|
|
JobID string `json:"job_id"`
|
|
Status string `json:"status"` // "queued"
|
|
}
|
|
|
|
// handleRunNow dispatches a job to the named host. Authenticated;
|
|
// rejects if the host isn't connected (caller should retry once
|
|
// the agent comes back).
|
|
func (s *Server) handleRunNow(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
user, ok := s.requireUser(r)
|
|
if !ok {
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorized", "")
|
|
return
|
|
}
|
|
hostID := chi.URLParam(r, "id")
|
|
if hostID == "" {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "missing_host_id", "")
|
|
return
|
|
}
|
|
var req runNowRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeJSONError(w, stdhttp.StatusBadRequest, "invalid_json", err.Error())
|
|
return
|
|
}
|
|
|
|
res, status, code, msg := s.dispatchJob(r.Context(), user, hostID, req.Kind, req.Args)
|
|
if code != "" {
|
|
writeJSONError(w, status, code, msg)
|
|
return
|
|
}
|
|
writeJSON(w, stdhttp.StatusAccepted, res)
|
|
}
|
|
|
|
// dispatchJob is the common path for HTTP-driven job dispatch. It
|
|
// validates the kind, checks the host is online, persists the job
|
|
// row, and ships command.run over the WS. Returns:
|
|
// - res: the queued-job response (job_id + status)
|
|
// - status: HTTP status to return on failure (or 0 on success)
|
|
// - code, msg: error code/message for the wire (empty on success)
|
|
//
|
|
// JSON callers wrap with writeJSONError; HTML callers translate to
|
|
// flash banner + redirect.
|
|
func (s *Server) dispatchJob(ctx context.Context, user *store.User,
|
|
hostID string, kind api.JobKind, args []string,
|
|
) (res runNowResponse, status int, code, msg string) {
|
|
if !validJobKind(kind) {
|
|
return res, stdhttp.StatusBadRequest, "invalid_kind",
|
|
"kind must be one of backup|forget|prune|check|unlock"
|
|
}
|
|
host, err := s.deps.Store.GetHost(ctx, hostID)
|
|
if err != nil {
|
|
return res, stdhttp.StatusNotFound, "host_not_found", ""
|
|
}
|
|
if !s.deps.Hub.Connected(host.ID) {
|
|
return res, stdhttp.StatusServiceUnavailable, "host_offline",
|
|
"agent is not currently connected; try again when it reconnects"
|
|
}
|
|
|
|
jobID := ulid.Make().String()
|
|
now := time.Now().UTC()
|
|
if err := s.deps.Store.CreateJob(ctx, store.Job{
|
|
ID: jobID,
|
|
HostID: host.ID,
|
|
Kind: string(kind),
|
|
ActorKind: "user",
|
|
ActorID: &user.ID,
|
|
CreatedAt: now,
|
|
}); err != nil {
|
|
return res, stdhttp.StatusInternalServerError, "internal", ""
|
|
}
|
|
|
|
env, err := api.Marshal(api.MsgCommandRun, jobID, api.CommandRunPayload{
|
|
JobID: jobID,
|
|
Kind: kind,
|
|
Args: args,
|
|
})
|
|
if err != nil {
|
|
return res, stdhttp.StatusInternalServerError, "internal", ""
|
|
}
|
|
if err := s.deps.Hub.Send(ctx, host.ID, env); err != nil {
|
|
return res, stdhttp.StatusServiceUnavailable, "host_offline", err.Error()
|
|
}
|
|
|
|
_ = s.deps.Store.AppendAudit(ctx, store.AuditEntry{
|
|
ID: ulid.Make().String(),
|
|
UserID: &user.ID,
|
|
Actor: "user",
|
|
Action: "job.run_now",
|
|
TargetKind: ptr("job"),
|
|
TargetID: &jobID,
|
|
TS: now,
|
|
})
|
|
return runNowResponse{JobID: jobID, Status: "queued"}, 0, "", ""
|
|
}
|
|
|
|
// requireUser resolves the session cookie to a user row. Stub of the
|
|
// session-auth middleware that lands in P1-04's full pass.
|
|
func (s *Server) requireUser(r *stdhttp.Request) (*store.User, bool) {
|
|
c, err := r.Cookie(sessionCookieName)
|
|
if err != nil {
|
|
return nil, false
|
|
}
|
|
sess, err := s.deps.Store.LookupSession(r.Context(), auth.HashToken(c.Value))
|
|
if err != nil {
|
|
return nil, false
|
|
}
|
|
u, err := s.deps.Store.GetUserByID(r.Context(), sess.UserID)
|
|
if err != nil {
|
|
return nil, false
|
|
}
|
|
return u, true
|
|
}
|
|
|
|
func validJobKind(k api.JobKind) bool {
|
|
switch k {
|
|
case api.JobBackup, api.JobInit, api.JobForget, api.JobPrune, api.JobCheck, api.JobUnlock:
|
|
return true
|
|
}
|
|
return false
|
|
}
|