a45c801884
Until now the open-alert key was (host_id, kind, resolved_at IS NULL). A host with two source groups both failing collapsed onto one backup_failed row — second failure bumped last_seen_at and overwrote the message but never re-fan-out. Operators saw one alert that appeared to flap, not two distinct broken things. Schema changes (column-level ALTER, no rebuild): - 0015 jobs.source_group_id (FK → source_groups, ON DELETE SET NULL, index). Populated for backup jobs in CreateJob. - 0016 alerts.dedup_key (NOT NULL DEFAULT ''). The old alerts_open partial index gets dropped and replaced with a UNIQUE partial index on (host_id, kind, dedup_key) WHERE resolved_at IS NULL — the index is now the actual dedup primitive. Plumbing: - RaiseOrTouch / AutoResolve / Alert struct gain dedup_key. - engine.JobFinishedEvent gains SourceGroupID; handleJobFinished passes it through for backup_failed only (forget/prune/check stay repo-scoped with key=''). - ws.handler reads SourceGroupID off the freshly-loaded job row. - dispatchJobWithPayload gains a *string sourceGroupID arg; the per-group Run-now path and schedule.fire path pass &g.ID. Test coverage: TestRaiseOrTouchDedupsPerSourceGroup proves two distinct groups produce two distinct open alerts and that resolving one does not auto-resolve the other. Dev tool: cmd/_fake_alert gains -dedup-key flag.
133 lines
4.3 KiB
Go
133 lines
4.3 KiB
Go
// run_group.go — per-source-group Run-now endpoint.
|
|
//
|
|
// POST /hosts/{id}/source-groups/{gid}/run dispatches a backup job
|
|
// against the resolved includes/excludes/retention/tag of the named
|
|
// group. Replaces the old per-host /hosts/{id}/run-backup route (now
|
|
// 410 Gone).
|
|
package http
|
|
|
|
import (
|
|
"errors"
|
|
stdhttp "net/http"
|
|
"strconv"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/api"
|
|
"gitea.dcglab.co.uk/steve/restic-manager/internal/store"
|
|
)
|
|
|
|
// parseBandwidthOverride pulls optional bandwidth_up_kbps /
|
|
// bandwidth_down_kbps from the request (form or query). Returns nil
|
|
// for any field absent or empty; an explicit "0" produces a non-nil
|
|
// pointer to 0 — i.e., "no cap for this run, even if the host has
|
|
// one set." Non-integers / negatives are rejected with an error.
|
|
func parseBandwidthOverride(r *stdhttp.Request) (up *int, down *int, err error) {
|
|
parse := func(name string) (*int, error) {
|
|
v := r.FormValue(name)
|
|
if v == "" {
|
|
return nil, nil
|
|
}
|
|
n, perr := strconv.Atoi(v)
|
|
if perr != nil {
|
|
return nil, errors.New(name + " must be an integer")
|
|
}
|
|
if n < 0 {
|
|
return nil, errors.New(name + " must be >= 0")
|
|
}
|
|
return &n, nil
|
|
}
|
|
up, err = parse("bandwidth_up_kbps")
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
down, err = parse("bandwidth_down_kbps")
|
|
return up, down, err
|
|
}
|
|
|
|
func (s *Server) handleRunSourceGroup(w stdhttp.ResponseWriter, r *stdhttp.Request) {
|
|
user, ok := s.requireUser(r)
|
|
if !ok {
|
|
// HTML callers redirect to login; for JSON return 401.
|
|
if wantsHTML(r) {
|
|
stdhttp.Redirect(w, r, "/login", stdhttp.StatusSeeOther)
|
|
return
|
|
}
|
|
writeJSONError(w, stdhttp.StatusUnauthorized, "unauthorised", "")
|
|
return
|
|
}
|
|
hostID := chi.URLParam(r, "id")
|
|
groupID := chi.URLParam(r, "gid")
|
|
g, err := s.deps.Store.GetSourceGroup(r.Context(), hostID, groupID)
|
|
if err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
s.runGroupError(w, r, stdhttp.StatusNotFound, "group_not_found",
|
|
"source group not found on this host")
|
|
return
|
|
}
|
|
s.runGroupError(w, r, stdhttp.StatusInternalServerError, "internal", "")
|
|
return
|
|
}
|
|
|
|
// Optional per-run bandwidth override. Disclosed in the UI under a
|
|
// <details> "Limit bandwidth for this run" affordance; absent on
|
|
// the wire (and from JSON callers that don't supply it) means
|
|
// "fall back to the host's standing caps."
|
|
upOverride, downOverride, perr := parseBandwidthOverride(r)
|
|
if perr != nil {
|
|
s.runGroupError(w, r, stdhttp.StatusBadRequest, "invalid_value", perr.Error())
|
|
return
|
|
}
|
|
|
|
// Resolve hooks (group → host default → empty). Best-effort host
|
|
// lookup; failure proceeds with no hook rather than block the run.
|
|
var preHook, postHook string
|
|
if host, herr := s.deps.Store.GetHost(r.Context(), hostID); herr == nil {
|
|
preHook, postHook = s.resolveBackupHooks(host, g)
|
|
}
|
|
|
|
// Backup invocations don't consume RetentionPolicy — that lives on
|
|
// forget. Sending the resolved set here would just be dead weight.
|
|
res, status, code, msg := s.dispatchJobWithPayload(r.Context(), user, hostID, api.JobBackup, &g.ID,
|
|
api.CommandRunPayload{
|
|
Includes: g.Includes,
|
|
Excludes: g.Excludes,
|
|
Tag: g.Name,
|
|
BandwidthUpKBps: upOverride,
|
|
BandwidthDownKBps: downOverride,
|
|
PreHook: preHook,
|
|
PostHook: postHook,
|
|
})
|
|
if code != "" {
|
|
s.runGroupError(w, r, status, code, msg)
|
|
return
|
|
}
|
|
if wantsHTML(r) {
|
|
// HTMX action: redirect to the live job log so the operator
|
|
// sees streaming output immediately.
|
|
w.Header().Set("HX-Redirect", "/jobs/"+res.JobID)
|
|
w.WriteHeader(stdhttp.StatusNoContent)
|
|
return
|
|
}
|
|
writeJSON(w, stdhttp.StatusAccepted, res)
|
|
}
|
|
|
|
// runGroupError dispatches an error to JSON callers as the standard
|
|
// envelope; HTMX callers get a 4xx with a plain text body so the
|
|
// browser surfaces it via the existing toast handler.
|
|
func (s *Server) runGroupError(w stdhttp.ResponseWriter, r *stdhttp.Request, status int, code, msg string) {
|
|
if wantsHTML(r) {
|
|
stdhttp.Error(w, msg, status)
|
|
return
|
|
}
|
|
writeJSONError(w, status, code, msg)
|
|
}
|
|
|
|
// wantsHTML keys off HX-Request only. Browsers sending a default
|
|
// Accept (or curl's `*/*`) get the JSON shape, which is the safer
|
|
// default for non-htmx clients. HTMX always sets HX-Request=true on
|
|
// its action POSTs, so the form path is unambiguous.
|
|
func wantsHTML(r *stdhttp.Request) bool {
|
|
return r.Header.Get("HX-Request") == "true"
|
|
}
|