fix: enrollment FK race + log-when-rejected; runbook fixes from dry-run

The smoke runbook caught a real bug: ConsumeEnrollmentToken was
inserting into host_credentials (FK -> hosts) inside the same tx as
the token burn, but the host row didn't exist yet — CreateHost
runs in the *next* statement. The agent saw a generic 401 with no
clue why.

Fix: drop the host_credentials insert from ConsumeEnrollmentToken;
the HTTP handler now does Consume -> CreateHost ->
SetHostCredentials. SetHostCredentials failure is logged loudly
but doesn't fail the enrol — operator recovers via PUT
/api/hosts/{id}/repo-credentials.

Adds slog.Warn lines on both 401 paths in handleAgentEnroll so the
underlying cause is visible in server logs (the wire response stays
generic to avoid leaking which step failed).

Test: TestEnrollmentTransfersRepoCreds rewritten to mirror the new
order (consume -> create host -> SetHostCredentials).

Runbook (docs/e2e-smoke.md): rest-server moved off 8000 (commonly
in use); URLs use trailing slash on the rest path; clarified that
secrets_key is minted on first agent start, not at enrol time.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-01 14:01:59 +01:00
parent 6cfbdfc7ab
commit 44feb708bc
5 changed files with 47 additions and 44 deletions
+9 -30
View File
@@ -37,22 +37,17 @@ func (s *Store) CreateEnrollmentToken(ctx context.Context, tokenHash string, ttl
}
// ConsumeEnrollmentToken atomically validates a token (must exist,
// not be consumed, not be expired), marks it consumed by hostID, and
// — if the token carries encrypted repo creds — promotes them to a
// host_credentials row in the same tx. The encrypted blob is
// re-encrypted by the caller with host_id as additional data; we
// don't crack it open here.
//
// not be consumed, not be expired) and marks it consumed by hostID.
// Returns ErrNotFound on any failure.
func (s *Store) ConsumeEnrollmentToken(ctx context.Context, tokenHash, hostID, encRepoCredsForHost string) error {
//
// The associated repo creds (if any) are promoted into
// host_credentials by the caller via SetHostCredentials *after* the
// host row exists — host_credentials has a FK to hosts that would
// otherwise fire here, since the host is created by a separate
// statement immediately after this returns.
func (s *Store) ConsumeEnrollmentToken(ctx context.Context, tokenHash, hostID string) error {
now := time.Now().UTC().Format(time.RFC3339Nano)
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("store: consume enrollment token: begin: %w", err)
}
defer func() { _ = tx.Rollback() }()
res, err := tx.ExecContext(ctx,
res, err := s.db.ExecContext(ctx,
`UPDATE enrollment_tokens
SET consumed_at = ?, consumed_host = ?
WHERE token_hash = ? AND consumed_at IS NULL AND expires_at > ?`,
@@ -64,22 +59,6 @@ func (s *Store) ConsumeEnrollmentToken(ctx context.Context, tokenHash, hostID, e
if n == 0 {
return ErrNotFound
}
if encRepoCredsForHost != "" {
if _, err := tx.ExecContext(ctx,
`INSERT INTO host_credentials (host_id, enc_repo_creds, updated_at)
VALUES (?, ?, ?)
ON CONFLICT(host_id) DO UPDATE SET
enc_repo_creds = excluded.enc_repo_creds,
updated_at = excluded.updated_at`,
hostID, encRepoCredsForHost, now); err != nil {
return fmt.Errorf("store: promote host credentials: %w", err)
}
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("store: consume enrollment token: commit: %w", err)
}
return nil
}
+2 -2
View File
@@ -148,11 +148,11 @@ func TestEnrollmentTokenSingleUse(t *testing.T) {
t.Fatalf("insert host: %v", err)
}
if err := s.ConsumeEnrollmentToken(ctx, hash, "h1", ""); err != nil {
if err := s.ConsumeEnrollmentToken(ctx, hash, "h1"); err != nil {
t.Fatalf("consume: %v", err)
}
// Second consume must fail — the whole point of one-time tokens.
if err := s.ConsumeEnrollmentToken(ctx, hash, "h1", ""); !errors.Is(err, ErrNotFound) {
if err := s.ConsumeEnrollmentToken(ctx, hash, "h1"); !errors.Is(err, ErrNotFound) {
t.Errorf("re-consume: want ErrNotFound, got %v", err)
}
}