73e733be61
CI / Test (rest) (pull_request) Successful in 41s
CI / Test (store) (pull_request) Successful in 43s
CI / Lint (pull_request) Successful in 29s
CI / Build (windows/amd64) (pull_request) Successful in 44s
CI / Test (server-http) (pull_request) Successful in 1m47s
CI / Build (linux/arm64) (pull_request) Successful in 43s
CI / Build (linux/amd64) (pull_request) Successful in 2m1s
New internal/server/metrics package emits the legacy text/plain exposition format directly, so we don't pull in prometheus/client_golang. Endpoint is opt-in via RM_METRICS_TOKEN and/or RM_METRICS_TRUSTED_CIDR; route is not mounted at all if neither gate is set. Both gates ANDed when both configured. Per-host gauges (online, last_backup_*, repo_size_bytes, snapshot_count, open_alerts, repo_status), server gauges (hosts_total/online, active_alerts by severity, build_info), and an in-memory job-duration histogram observed from the existing MsgJobFinished branch in the WS handler. Docs in docs/prometheus.md (enable + scrape config + metric reference + dashboard import). Sample dashboard at deploy/grafana/restic-manager-dashboard.json - six panels, Grafana schema 39, single Prometheus datasource variable. Tests: golden render, concurrent observe, bucket boundaries in the metrics package; auth matrix (no auth -> 404, token gate, CIDR gate, both required) in the HTTP layer.
326 lines
9.2 KiB
JSON
326 lines
9.2 KiB
JSON
{
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": { "type": "grafana", "uid": "-- Grafana --" },
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"description": "restic-manager fleet overview. Imports against any Prometheus data source.",
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 0,
|
|
"id": null,
|
|
"links": [],
|
|
"liveNow": false,
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"title": "Fleet status",
|
|
"type": "stat",
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"gridPos": { "h": 6, "w": 6, "x": 0, "y": 0 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "red", "value": null },
|
|
{ "color": "green", "value": 1 }
|
|
]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"textMode": "auto"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "rm_hosts_online",
|
|
"legendFormat": "online",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "rm_hosts_total",
|
|
"legendFormat": "total",
|
|
"refId": "B"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 2,
|
|
"title": "Open alerts",
|
|
"type": "stat",
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"gridPos": { "h": 6, "w": 6, "x": 6, "y": 0 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 1 },
|
|
{ "color": "red", "value": 5 }
|
|
]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"textMode": "auto"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "sum by (severity) (rm_active_alerts)",
|
|
"legendFormat": "{{severity}}",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 3,
|
|
"title": "Backups failing (last reported run)",
|
|
"type": "stat",
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"gridPos": { "h": 6, "w": 6, "x": 12, "y": 0 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "thresholds" },
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "red", "value": 1 }
|
|
]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
"textMode": "auto"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "count(rm_host_last_backup_success == 0)",
|
|
"legendFormat": "failing",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 4,
|
|
"title": "Hosts",
|
|
"type": "table",
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 6 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"custom": { "align": "auto", "displayMode": "auto" }
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": { "id": "byName", "options": "Value #B" },
|
|
"properties": [
|
|
{ "id": "displayName", "value": "Last backup (s ago)" },
|
|
{ "id": "unit", "value": "s" }
|
|
]
|
|
},
|
|
{
|
|
"matcher": { "id": "byName", "options": "Value #C" },
|
|
"properties": [
|
|
{ "id": "displayName", "value": "Repo size" },
|
|
{ "id": "unit", "value": "bytes" }
|
|
]
|
|
},
|
|
{
|
|
"matcher": { "id": "byName", "options": "Value #D" },
|
|
"properties": [
|
|
{ "id": "displayName", "value": "Snapshots" }
|
|
]
|
|
},
|
|
{
|
|
"matcher": { "id": "byName", "options": "Value #A" },
|
|
"properties": [
|
|
{ "id": "displayName", "value": "Online" }
|
|
]
|
|
},
|
|
{
|
|
"matcher": { "id": "byName", "options": "Value #E" },
|
|
"properties": [
|
|
{ "id": "displayName", "value": "Open alerts" }
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"options": { "showHeader": true },
|
|
"transformations": [
|
|
{
|
|
"id": "merge",
|
|
"options": {}
|
|
}
|
|
],
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "rm_host_agent_online",
|
|
"format": "table",
|
|
"instant": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "time() - rm_host_last_backup_timestamp_seconds",
|
|
"format": "table",
|
|
"instant": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "rm_host_repo_size_bytes",
|
|
"format": "table",
|
|
"instant": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "rm_host_snapshot_count",
|
|
"format": "table",
|
|
"instant": true,
|
|
"refId": "D"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "rm_host_open_alerts",
|
|
"format": "table",
|
|
"instant": true,
|
|
"refId": "E"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 5,
|
|
"title": "Repo size over time",
|
|
"type": "timeseries",
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "palette-classic" },
|
|
"custom": {
|
|
"axisLabel": "",
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"showPoints": "never"
|
|
},
|
|
"unit": "bytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": { "calcs": ["last"], "displayMode": "list", "placement": "bottom", "showLegend": true },
|
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "rm_host_repo_size_bytes",
|
|
"legendFormat": "{{host}}",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 6,
|
|
"title": "Job duration p95 (last 1h, by kind)",
|
|
"type": "timeseries",
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "palette-classic" },
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"fillOpacity": 5,
|
|
"lineWidth": 1,
|
|
"pointSize": 4,
|
|
"showPoints": "never"
|
|
},
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": { "calcs": ["last"], "displayMode": "list", "placement": "bottom", "showLegend": true },
|
|
"tooltip": { "mode": "multi", "sort": "desc" }
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
"expr": "histogram_quantile(0.95, sum by (kind, le) (rate(rm_job_duration_seconds_bucket[1h])))",
|
|
"legendFormat": "{{kind}}",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"refresh": "30s",
|
|
"schemaVersion": 39,
|
|
"style": "dark",
|
|
"tags": ["restic-manager", "backups"],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {},
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"label": "Prometheus",
|
|
"multi": false,
|
|
"name": "DS_PROMETHEUS",
|
|
"options": [],
|
|
"query": "prometheus",
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"type": "datasource"
|
|
}
|
|
]
|
|
},
|
|
"time": { "from": "now-6h", "to": "now" },
|
|
"timepicker": {},
|
|
"timezone": "",
|
|
"title": "restic-manager — fleet",
|
|
"uid": "rm-fleet-overview",
|
|
"version": 1,
|
|
"weekStart": ""
|
|
}
|