Files
restic-manager/internal/store/migrations/0001_initial.sql
T
steve f34773b505 phase 1: WS transport, enrollment, agent that hellos and heartbeats
Lands the protocol layer end-to-end: an agent can be enrolled
through the operator UI, store credentials, dial back to the server
over WS, complete the protocol_version handshake, and stay
connected with periodic heartbeats.

Server side:
- P1-09 ws.Hub: one Conn per host_id, last-write-wins eviction,
  json envelope writer with a write mutex, reader, error envelopes.
- P1-09 ws.AgentHandler: bearer-auth, accept upgrade, hello-stage
  (10s deadline, protocol_version checked against
  api.MinAgentProtocolVersion → ErrProtocolTooOld with help URL on
  reject), main read loop, defer hub register/unregister.
- P1-10 POST /api/agents/enroll consumes a one-time token, mints a
  persistent agent bearer (sha-256 stored), creates a host row.
- P1-10 POST /api/enrollment-tokens (operator, session-auth)
  issues a 1h one-time token.
- P1-11 hello upserts agent_version + restic_version +
  protocol_version on the host row, flips status to online.
- P1-12 heartbeat touches last_seen_at; background sweeper marks
  hosts offline after 90s without one.
- store: hosts table accessors, host_schedule_version,
  enrollment_tokens FK on consumed_host dropped (audit-only field;
  the token gets burned before the host row exists).

Agent side:
- P1-13 internal/agent/config: yaml at /etc/restic-manager/agent.yaml,
  atomic Save (tmp+fsync+rename), Enrolled() helper.
- P1-15 internal/agent/wsclient: dial with bearer + optional
  TLS cert pinning (sha-256 of leaf), exponential backoff with
  jitter (1s → 60s cap), heartbeat goroutine, fatal handling for
  ErrProtocolTooOld.
- P1-15 wsclient.Enroll: HTTP POST /api/agents/enroll with sysinfo.
- P1-17 internal/agent/sysinfo: hostname/OS/arch/restic-version
  collection. restic detected by `restic version` parse; absent
  restic doesn't block startup.
- cmd/agent: -enroll-server / -enroll-token flags drive first-run
  enrollment then exit (so the install script can hand off to
  systemd to run the persistent service).

End-to-end smoke verified: bootstrap → login → issue token →
enroll → run agent → server logs `ws agent connected` with the
right host_id and protocol_version 1.

All tests still pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 00:39:00 +01:00

203 lines
8.5 KiB
SQL

-- 0001_initial.sql
--
-- Initial schema for restic-manager. Mirrors the domain model in
-- spec.md §5. We use TEXT primary keys (ULIDs) throughout: sortable,
-- URL-safe, no autoincrement contention. JSON blobs are stored as
-- TEXT; SQLite's json1 extension is available but we read/write
-- raw and parse in Go for portability.
--
-- All timestamps are stored as RFC 3339 TEXT (UTC). SQLite's INTEGER
-- (unix epoch) would be cheaper but text is human-readable in dumps
-- and the storage cost is negligible at this scale.
CREATE TABLE users (
id TEXT PRIMARY KEY,
username TEXT NOT NULL UNIQUE,
password_hash TEXT NOT NULL,
role TEXT NOT NULL CHECK (role IN ('admin','operator','viewer')),
created_at TEXT NOT NULL,
last_login_at TEXT
);
CREATE TABLE sessions (
id TEXT PRIMARY KEY, -- session token (high-entropy)
user_id TEXT NOT NULL REFERENCES users(id) ON DELETE CASCADE,
created_at TEXT NOT NULL,
expires_at TEXT NOT NULL,
ip TEXT,
ua TEXT
);
CREATE INDEX sessions_user_id ON sessions(user_id);
CREATE INDEX sessions_expires_at ON sessions(expires_at);
CREATE TABLE credentials (
id TEXT PRIMARY KEY,
kind TEXT NOT NULL, -- 'rest','s3','local'
username TEXT,
-- secret_ref is the AEAD ciphertext (nonce || ciphertext, base64).
-- The plaintext never lands on disk.
secret_ref TEXT NOT NULL,
rotated_at TEXT NOT NULL
);
CREATE TABLE repos (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
url TEXT NOT NULL,
kind TEXT NOT NULL CHECK (kind IN ('rest','s3','local')),
credential_id TEXT REFERENCES credentials(id) ON DELETE RESTRICT,
password_secret_id TEXT REFERENCES credentials(id) ON DELETE RESTRICT,
-- Cached projection from `restic stats` + lock-file inspection.
size_bytes INTEGER NOT NULL DEFAULT 0,
snapshot_count INTEGER NOT NULL DEFAULT 0,
dedup_ratio REAL NOT NULL DEFAULT 0,
last_check_at TEXT,
last_check_status TEXT,
lock_state TEXT NOT NULL DEFAULT 'unlocked'
CHECK (lock_state IN ('locked','unlocked')),
append_only INTEGER NOT NULL DEFAULT 1, -- bool
credential_rotated_at TEXT
);
CREATE TABLE hosts (
id TEXT PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
os TEXT NOT NULL,
arch TEXT NOT NULL,
agent_version TEXT NOT NULL DEFAULT '',
restic_version TEXT NOT NULL DEFAULT '',
protocol_version INTEGER NOT NULL DEFAULT 0,
enrolled_at TEXT NOT NULL,
last_seen_at TEXT,
status TEXT NOT NULL DEFAULT 'offline'
CHECK (status IN ('online','offline','degraded')),
repo_id TEXT REFERENCES repos(id) ON DELETE SET NULL,
tags TEXT NOT NULL DEFAULT '[]', -- json array
current_job_id TEXT,
-- Denormalised projections (refreshed on job.finished etc).
last_backup_at TEXT,
last_backup_status TEXT
CHECK (last_backup_status IN
('succeeded','failed','cancelled') OR
last_backup_status IS NULL),
repo_size_bytes INTEGER NOT NULL DEFAULT 0,
snapshot_count INTEGER NOT NULL DEFAULT 0,
open_alert_count INTEGER NOT NULL DEFAULT 0,
applied_schedule_version INTEGER NOT NULL DEFAULT 0,
-- Server-issued credentials for the agent ↔ server WS.
agent_token_hash TEXT NOT NULL DEFAULT '',
cert_pin_sha256 TEXT NOT NULL DEFAULT ''
);
CREATE INDEX hosts_status ON hosts(status);
CREATE INDEX hosts_last_seen_at ON hosts(last_seen_at);
-- Pending one-time enrollment tokens (TTL'd, single-use).
-- consumed_host is audit-only (no FK on purpose: we burn the token
-- before the host row exists, and we want this trail to survive a
-- later host deletion).
CREATE TABLE enrollment_tokens (
token_hash TEXT PRIMARY KEY,
created_at TEXT NOT NULL,
expires_at TEXT NOT NULL,
consumed_at TEXT,
consumed_host TEXT
);
CREATE INDEX enrollment_tokens_expires_at ON enrollment_tokens(expires_at);
CREATE TABLE schedules (
id TEXT PRIMARY KEY,
host_id TEXT NOT NULL REFERENCES hosts(id) ON DELETE CASCADE,
kind TEXT NOT NULL CHECK (kind IN ('backup','forget','prune','check')),
cron_expr TEXT NOT NULL,
paths TEXT NOT NULL DEFAULT '[]', -- json array
excludes TEXT NOT NULL DEFAULT '[]',
tags TEXT NOT NULL DEFAULT '[]',
retention_policy TEXT NOT NULL DEFAULT '{}', -- json object
options TEXT NOT NULL DEFAULT '{}', -- json object (bandwidth)
-- Hooks are encrypted at rest (AEAD ciphertext). Constraint enforced
-- in application code: hooks must be empty unless kind='backup'.
pre_hook TEXT NOT NULL DEFAULT '',
post_hook TEXT NOT NULL DEFAULT '',
enabled INTEGER NOT NULL DEFAULT 1,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE INDEX schedules_host_id ON schedules(host_id);
-- Per-host monotonic schedule version. Bumped on any schedules INSERT/
-- UPDATE/DELETE for that host. Pushed to the agent in schedule.set;
-- the agent acks back the same version in schedule.ack.
CREATE TABLE host_schedule_version (
host_id TEXT PRIMARY KEY REFERENCES hosts(id) ON DELETE CASCADE,
version INTEGER NOT NULL DEFAULT 0
);
CREATE TABLE jobs (
id TEXT PRIMARY KEY,
host_id TEXT NOT NULL REFERENCES hosts(id) ON DELETE CASCADE,
kind TEXT NOT NULL CHECK (kind IN ('backup','forget','prune','check','unlock')),
status TEXT NOT NULL CHECK (status IN ('queued','running','succeeded','failed','cancelled')),
scheduled_id TEXT REFERENCES schedules(id) ON DELETE SET NULL,
actor_kind TEXT NOT NULL CHECK (actor_kind IN ('user','schedule','system')),
actor_id TEXT, -- user id, schedule id, or null
started_at TEXT,
finished_at TEXT,
exit_code INTEGER,
stats TEXT, -- json blob from restic
error TEXT,
created_at TEXT NOT NULL
);
CREATE INDEX jobs_host_id ON jobs(host_id);
CREATE INDEX jobs_status ON jobs(status);
CREATE INDEX jobs_created_at ON jobs(created_at);
CREATE TABLE job_logs (
job_id TEXT NOT NULL REFERENCES jobs(id) ON DELETE CASCADE,
seq INTEGER NOT NULL,
ts TEXT NOT NULL,
stream TEXT NOT NULL CHECK (stream IN ('stdout','stderr','event')),
payload TEXT NOT NULL,
PRIMARY KEY (job_id, seq)
);
CREATE TABLE snapshots (
id TEXT PRIMARY KEY, -- restic snapshot id
host_id TEXT NOT NULL REFERENCES hosts(id) ON DELETE CASCADE,
repo_id TEXT NOT NULL REFERENCES repos(id) ON DELETE CASCADE,
time TEXT NOT NULL,
hostname TEXT NOT NULL,
paths TEXT NOT NULL DEFAULT '[]',
tags TEXT NOT NULL DEFAULT '[]',
size_bytes INTEGER NOT NULL DEFAULT 0,
file_count INTEGER NOT NULL DEFAULT 0
);
CREATE INDEX snapshots_host_id ON snapshots(host_id);
CREATE INDEX snapshots_time ON snapshots(time);
CREATE TABLE alerts (
id TEXT PRIMARY KEY,
host_id TEXT REFERENCES hosts(id) ON DELETE CASCADE,
kind TEXT NOT NULL,
severity TEXT NOT NULL CHECK (severity IN ('info','warning','critical')),
message TEXT NOT NULL,
created_at TEXT NOT NULL,
acknowledged_at TEXT,
acknowledged_by TEXT REFERENCES users(id) ON DELETE SET NULL,
resolved_at TEXT
);
CREATE INDEX alerts_host_id ON alerts(host_id);
CREATE INDEX alerts_open ON alerts(host_id) WHERE resolved_at IS NULL;
CREATE TABLE audit_log (
id TEXT PRIMARY KEY,
user_id TEXT REFERENCES users(id) ON DELETE SET NULL,
actor TEXT NOT NULL CHECK (actor IN ('user','agent','system')),
action TEXT NOT NULL,
target_kind TEXT,
target_id TEXT,
ts TEXT NOT NULL,
payload TEXT NOT NULL DEFAULT '{}'
);
CREATE INDEX audit_log_ts ON audit_log(ts);
CREATE INDEX audit_log_user ON audit_log(user_id);