Files
restic-manager/internal/store/enrollment.go
T
steve 44feb708bc fix: enrollment FK race + log-when-rejected; runbook fixes from dry-run
The smoke runbook caught a real bug: ConsumeEnrollmentToken was
inserting into host_credentials (FK -> hosts) inside the same tx as
the token burn, but the host row didn't exist yet — CreateHost
runs in the *next* statement. The agent saw a generic 401 with no
clue why.

Fix: drop the host_credentials insert from ConsumeEnrollmentToken;
the HTTP handler now does Consume -> CreateHost ->
SetHostCredentials. SetHostCredentials failure is logged loudly
but doesn't fail the enrol — operator recovers via PUT
/api/hosts/{id}/repo-credentials.

Adds slog.Warn lines on both 401 paths in handleAgentEnroll so the
underlying cause is visible in server logs (the wire response stays
generic to avoid leaking which step failed).

Test: TestEnrollmentTransfersRepoCreds rewritten to mirror the new
order (consume -> create host -> SetHostCredentials).

Runbook (docs/e2e-smoke.md): rest-server moved off 8000 (commonly
in use); URLs use trailing slash on the rest path; clarified that
secrets_key is minted on first agent start, not at enrol time.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 14:01:59 +01:00

104 lines
3.5 KiB
Go

package store
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
)
// CreateEnrollmentToken persists a fresh one-time token. The caller
// has already hashed the raw token; the raw form is returned to the
// operator (printed in the install snippet) and never persisted.
//
// encRepoCreds is the AEAD-encrypted blob of {repo_url, repo_username,
// repo_password} that ConsumeEnrollmentToken will promote to a
// host_credentials row. Empty string = operator chose to set creds
// later via PUT /api/hosts/{id}/repo-credentials; the agent will
// refuse backup jobs until that lands.
func (s *Store) CreateEnrollmentToken(ctx context.Context, tokenHash string, ttl time.Duration, encRepoCreds string) error {
now := time.Now().UTC()
var enc any = nil
if encRepoCreds != "" {
enc = encRepoCreds
}
_, err := s.db.ExecContext(ctx,
`INSERT INTO enrollment_tokens (token_hash, created_at, expires_at, enc_repo_creds)
VALUES (?, ?, ?, ?)`,
tokenHash,
now.Format(time.RFC3339Nano),
now.Add(ttl).Format(time.RFC3339Nano),
enc)
if err != nil {
return fmt.Errorf("store: create enrollment token: %w", err)
}
return nil
}
// ConsumeEnrollmentToken atomically validates a token (must exist,
// not be consumed, not be expired) and marks it consumed by hostID.
// Returns ErrNotFound on any failure.
//
// The associated repo creds (if any) are promoted into
// host_credentials by the caller via SetHostCredentials *after* the
// host row exists — host_credentials has a FK to hosts that would
// otherwise fire here, since the host is created by a separate
// statement immediately after this returns.
func (s *Store) ConsumeEnrollmentToken(ctx context.Context, tokenHash, hostID string) error {
now := time.Now().UTC().Format(time.RFC3339Nano)
res, err := s.db.ExecContext(ctx,
`UPDATE enrollment_tokens
SET consumed_at = ?, consumed_host = ?
WHERE token_hash = ? AND consumed_at IS NULL AND expires_at > ?`,
now, hostID, tokenHash, now)
if err != nil {
return fmt.Errorf("store: consume enrollment token: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return ErrNotFound
}
return nil
}
// GetEnrollmentTokenCreds returns the encrypted repo-creds blob the
// operator stashed when creating the token, or ("", ErrNotFound) if
// the token is gone / consumed / expired / had no creds attached.
//
// The caller decrypts using token_hash as the AEAD additional data,
// then re-encrypts using host_id as additional data before passing
// to ConsumeEnrollmentToken.
func (s *Store) GetEnrollmentTokenCreds(ctx context.Context, tokenHash string) (string, error) {
now := time.Now().UTC().Format(time.RFC3339Nano)
row := s.db.QueryRowContext(ctx,
`SELECT enc_repo_creds FROM enrollment_tokens
WHERE token_hash = ? AND consumed_at IS NULL AND expires_at > ?`,
tokenHash, now)
var enc sql.NullString
if err := row.Scan(&enc); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return "", ErrNotFound
}
return "", fmt.Errorf("store: get enrollment token creds: %w", err)
}
if !enc.Valid {
return "", nil
}
return enc.String, nil
}
// PurgeExpiredEnrollmentTokens deletes long-expired token rows. Tokens
// retained for ~24h after expiry so audit traces still resolve them.
func (s *Store) PurgeExpiredEnrollmentTokens(ctx context.Context) (int64, error) {
cutoff := time.Now().Add(-24 * time.Hour).UTC().Format(time.RFC3339Nano)
res, err := s.db.ExecContext(ctx,
`DELETE FROM enrollment_tokens WHERE expires_at <= ?`, cutoff)
if err != nil {
return 0, fmt.Errorf("store: purge enrollment tokens: %w", err)
}
n, _ := res.RowsAffected()
return n, nil
}