fix: enrollment FK race + log-when-rejected; runbook fixes from dry-run
The smoke runbook caught a real bug: ConsumeEnrollmentToken was
inserting into host_credentials (FK -> hosts) inside the same tx as
the token burn, but the host row didn't exist yet — CreateHost
runs in the *next* statement. The agent saw a generic 401 with no
clue why.
Fix: drop the host_credentials insert from ConsumeEnrollmentToken;
the HTTP handler now does Consume -> CreateHost ->
SetHostCredentials. SetHostCredentials failure is logged loudly
but doesn't fail the enrol — operator recovers via PUT
/api/hosts/{id}/repo-credentials.
Adds slog.Warn lines on both 401 paths in handleAgentEnroll so the
underlying cause is visible in server logs (the wire response stays
generic to avoid leaking which step failed).
Test: TestEnrollmentTransfersRepoCreds rewritten to mirror the new
order (consume -> create host -> SetHostCredentials).
Runbook (docs/e2e-smoke.md): rest-server moved off 8000 (commonly
in use); URLs use trailing slash on the rest path; clarified that
secrets_key is minted on first agent start, not at enrol time.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+13
-8
@@ -66,7 +66,9 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
- OPTIONS=--no-auth # smoke-test only; real deploys use --append-only + htpasswd
|
- OPTIONS=--no-auth # smoke-test only; real deploys use --append-only + htpasswd
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:8000:8000"
|
# Mapped to 8100 because most dev boxes already have something
|
||||||
|
# on 8000. Use any free port; just keep the URLs below in sync.
|
||||||
|
- "127.0.0.1:8100:8000"
|
||||||
volumes:
|
volumes:
|
||||||
- ./rest:/data
|
- ./rest:/data
|
||||||
|
|
||||||
@@ -118,7 +120,7 @@ ENROLL=$(curl -s -b /tmp/rm-smoke/cookies -X POST http://127.0.0.1:8080/api/enro
|
|||||||
-H 'content-type: application/json' \
|
-H 'content-type: application/json' \
|
||||||
-d '{
|
-d '{
|
||||||
"hostname":"smoke-host",
|
"hostname":"smoke-host",
|
||||||
"repo_url":"rest:http://127.0.0.1:8000/smoke",
|
"repo_url":"rest:http://127.0.0.1:8100/smoke/",
|
||||||
"repo_username":"",
|
"repo_username":"",
|
||||||
"repo_password":"smoke-pw"
|
"repo_password":"smoke-pw"
|
||||||
}')
|
}')
|
||||||
@@ -136,7 +138,7 @@ restic itself wants the repo initialised:
|
|||||||
|
|
||||||
```sh
|
```sh
|
||||||
RESTIC_PASSWORD=smoke-pw \
|
RESTIC_PASSWORD=smoke-pw \
|
||||||
restic -r rest:http://127.0.0.1:8000/smoke init
|
restic -r rest:http://127.0.0.1:8100/smoke/ init
|
||||||
```
|
```
|
||||||
|
|
||||||
## 6. Pretend to be a fresh endpoint
|
## 6. Pretend to be a fresh endpoint
|
||||||
@@ -169,9 +171,11 @@ HOST_ID=$(grep host_id "$CONFIG" | awk '{print $2}' | tr -d '"')
|
|||||||
echo "host id: $HOST_ID"
|
echo "host id: $HOST_ID"
|
||||||
```
|
```
|
||||||
|
|
||||||
After enrolment, `agent.yaml` should contain `secrets_key:` (a
|
After enrolment, `agent.yaml` should contain `host_id:` (a ULID),
|
||||||
base64 32-byte key) and `host_id:` (a ULID). It should **not**
|
`agent_token:`, and `server_url:`. It will **not** contain
|
||||||
contain `repo_url:` or `repo_password:`.
|
`secrets_key:` yet — that's minted on the first non-enroll start
|
||||||
|
of the agent (next step). It should **not** contain `repo_url:`
|
||||||
|
or `repo_password:` (those never appear in plaintext on disk).
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cat "$CONFIG"
|
cat "$CONFIG"
|
||||||
@@ -196,7 +200,8 @@ ws agent: repo credentials updated via config.update
|
|||||||
That last line confirms slice 1 + 2 of P1-32/33: the server pushed
|
That last line confirms slice 1 + 2 of P1-32/33: the server pushed
|
||||||
the encrypted creds, the agent decrypted, persisted to `secrets.enc`,
|
the encrypted creds, the agent decrypted, persisted to `secrets.enc`,
|
||||||
and is now ready to back up. `secrets.enc` should now exist and be
|
and is now ready to back up. `secrets.enc` should now exist and be
|
||||||
0600.
|
0600. `agent.yaml` should now also contain a freshly-minted
|
||||||
|
`secrets_key:` (base64-encoded 32 bytes).
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
ls -l /tmp/rm-smoke/agent/var-lib/secrets.enc
|
ls -l /tmp/rm-smoke/agent/var-lib/secrets.enc
|
||||||
@@ -235,7 +240,7 @@ curl -s -b /tmp/rm-smoke/cookies \
|
|||||||
"http://127.0.0.1:8080/api/hosts/$HOST_ID/repo-credentials" | jq
|
"http://127.0.0.1:8080/api/hosts/$HOST_ID/repo-credentials" | jq
|
||||||
```
|
```
|
||||||
|
|
||||||
Expect `{"repo_url":"rest:http://127.0.0.1:8000/smoke","has_password":true}`.
|
Expect `{"repo_url":"rest:http://127.0.0.1:8100/smoke/","has_password":true}`.
|
||||||
The password is never returned over this endpoint.
|
The password is never returned over this endpoint.
|
||||||
|
|
||||||
## 11. Edit creds + verify push-on-update
|
## 11. Edit creds + verify push-on-update
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
stdhttp "net/http"
|
stdhttp "net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -98,12 +99,14 @@ func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request)
|
|||||||
// the token, which is about to disappear).
|
// the token, which is about to disappear).
|
||||||
encForHost, err := s.rebindTokenCreds(r.Context(), tokHash, hostID)
|
encForHost, err := s.rebindTokenCreds(r.Context(), tokHash, hostID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
slog.Warn("enrollment: rebind token creds failed", "err", err)
|
||||||
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
|
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
|
||||||
"token unknown, expired, or already used")
|
"token unknown, expired, or already used")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID, encForHost); err != nil {
|
if err := s.deps.Store.ConsumeEnrollmentToken(r.Context(), tokHash, hostID); err != nil {
|
||||||
|
slog.Warn("enrollment: consume token failed", "err", err)
|
||||||
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
|
writeJSONError(w, stdhttp.StatusUnauthorized, "invalid_token",
|
||||||
"token unknown, expired, or already used")
|
"token unknown, expired, or already used")
|
||||||
return
|
return
|
||||||
@@ -131,6 +134,18 @@ func (s *Server) handleAgentEnroll(w stdhttp.ResponseWriter, r *stdhttp.Request)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Promote the encrypted repo creds onto the freshly-created host
|
||||||
|
// row. If this fails for any reason we log loudly but still
|
||||||
|
// return the bearer — the operator recovers via PUT
|
||||||
|
// /api/hosts/{id}/repo-credentials. Failing the whole enrolment
|
||||||
|
// here would leave a half-burned token + an orphan host.
|
||||||
|
if encForHost != "" {
|
||||||
|
if err := s.deps.Store.SetHostCredentials(r.Context(), hostID, encForHost); err != nil {
|
||||||
|
slog.Error("enrollment: set host credentials failed",
|
||||||
|
"host_id", hostID, "err", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
_ = s.deps.Store.AppendAudit(r.Context(), store.AuditEntry{
|
||||||
ID: ulid.Make().String(),
|
ID: ulid.Make().String(),
|
||||||
Actor: "system",
|
Actor: "system",
|
||||||
|
|||||||
@@ -46,14 +46,18 @@ func TestEnrollmentTransfersRepoCreds(t *testing.T) {
|
|||||||
t.Errorf("rebind should change ciphertext (additional-data differs); got identical")
|
t.Errorf("rebind should change ciphertext (additional-data differs); got identical")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Need a host row for the FK.
|
// Burn the token, then create the host row, then promote — same
|
||||||
|
// order the HTTP handler runs.
|
||||||
|
if err := st.ConsumeEnrollmentToken(ctx, tokHash, hostID); err != nil {
|
||||||
|
t.Fatalf("consume: %v", err)
|
||||||
|
}
|
||||||
if _, err := st.DB().Exec(
|
if _, err := st.DB().Exec(
|
||||||
`INSERT INTO hosts (id, name, os, arch, enrolled_at) VALUES (?,?,?,?,?)`,
|
`INSERT INTO hosts (id, name, os, arch, enrolled_at) VALUES (?,?,?,?,?)`,
|
||||||
hostID, "host42", "linux", "amd64", "2026-01-01T00:00:00Z"); err != nil {
|
hostID, "host42", "linux", "amd64", "2026-01-01T00:00:00Z"); err != nil {
|
||||||
t.Fatalf("insert host: %v", err)
|
t.Fatalf("insert host: %v", err)
|
||||||
}
|
}
|
||||||
if err := st.ConsumeEnrollmentToken(ctx, tokHash, hostID, encForHost); err != nil {
|
if err := st.SetHostCredentials(ctx, hostID, encForHost); err != nil {
|
||||||
t.Fatalf("consume: %v", err)
|
t.Fatalf("set host credentials: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// host_credentials row should now hold the host-bound ciphertext.
|
// host_credentials row should now hold the host-bound ciphertext.
|
||||||
|
|||||||
@@ -37,22 +37,17 @@ func (s *Store) CreateEnrollmentToken(ctx context.Context, tokenHash string, ttl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ConsumeEnrollmentToken atomically validates a token (must exist,
|
// ConsumeEnrollmentToken atomically validates a token (must exist,
|
||||||
// not be consumed, not be expired), marks it consumed by hostID, and
|
// not be consumed, not be expired) and marks it consumed by hostID.
|
||||||
// — if the token carries encrypted repo creds — promotes them to a
|
|
||||||
// host_credentials row in the same tx. The encrypted blob is
|
|
||||||
// re-encrypted by the caller with host_id as additional data; we
|
|
||||||
// don't crack it open here.
|
|
||||||
//
|
|
||||||
// Returns ErrNotFound on any failure.
|
// Returns ErrNotFound on any failure.
|
||||||
func (s *Store) ConsumeEnrollmentToken(ctx context.Context, tokenHash, hostID, encRepoCredsForHost string) error {
|
//
|
||||||
|
// The associated repo creds (if any) are promoted into
|
||||||
|
// host_credentials by the caller via SetHostCredentials *after* the
|
||||||
|
// host row exists — host_credentials has a FK to hosts that would
|
||||||
|
// otherwise fire here, since the host is created by a separate
|
||||||
|
// statement immediately after this returns.
|
||||||
|
func (s *Store) ConsumeEnrollmentToken(ctx context.Context, tokenHash, hostID string) error {
|
||||||
now := time.Now().UTC().Format(time.RFC3339Nano)
|
now := time.Now().UTC().Format(time.RFC3339Nano)
|
||||||
tx, err := s.db.BeginTx(ctx, nil)
|
res, err := s.db.ExecContext(ctx,
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("store: consume enrollment token: begin: %w", err)
|
|
||||||
}
|
|
||||||
defer func() { _ = tx.Rollback() }()
|
|
||||||
|
|
||||||
res, err := tx.ExecContext(ctx,
|
|
||||||
`UPDATE enrollment_tokens
|
`UPDATE enrollment_tokens
|
||||||
SET consumed_at = ?, consumed_host = ?
|
SET consumed_at = ?, consumed_host = ?
|
||||||
WHERE token_hash = ? AND consumed_at IS NULL AND expires_at > ?`,
|
WHERE token_hash = ? AND consumed_at IS NULL AND expires_at > ?`,
|
||||||
@@ -64,22 +59,6 @@ func (s *Store) ConsumeEnrollmentToken(ctx context.Context, tokenHash, hostID, e
|
|||||||
if n == 0 {
|
if n == 0 {
|
||||||
return ErrNotFound
|
return ErrNotFound
|
||||||
}
|
}
|
||||||
|
|
||||||
if encRepoCredsForHost != "" {
|
|
||||||
if _, err := tx.ExecContext(ctx,
|
|
||||||
`INSERT INTO host_credentials (host_id, enc_repo_creds, updated_at)
|
|
||||||
VALUES (?, ?, ?)
|
|
||||||
ON CONFLICT(host_id) DO UPDATE SET
|
|
||||||
enc_repo_creds = excluded.enc_repo_creds,
|
|
||||||
updated_at = excluded.updated_at`,
|
|
||||||
hostID, encRepoCredsForHost, now); err != nil {
|
|
||||||
return fmt.Errorf("store: promote host credentials: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := tx.Commit(); err != nil {
|
|
||||||
return fmt.Errorf("store: consume enrollment token: commit: %w", err)
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -148,11 +148,11 @@ func TestEnrollmentTokenSingleUse(t *testing.T) {
|
|||||||
t.Fatalf("insert host: %v", err)
|
t.Fatalf("insert host: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := s.ConsumeEnrollmentToken(ctx, hash, "h1", ""); err != nil {
|
if err := s.ConsumeEnrollmentToken(ctx, hash, "h1"); err != nil {
|
||||||
t.Fatalf("consume: %v", err)
|
t.Fatalf("consume: %v", err)
|
||||||
}
|
}
|
||||||
// Second consume must fail — the whole point of one-time tokens.
|
// Second consume must fail — the whole point of one-time tokens.
|
||||||
if err := s.ConsumeEnrollmentToken(ctx, hash, "h1", ""); !errors.Is(err, ErrNotFound) {
|
if err := s.ConsumeEnrollmentToken(ctx, hash, "h1"); !errors.Is(err, ErrNotFound) {
|
||||||
t.Errorf("re-consume: want ErrNotFound, got %v", err)
|
t.Errorf("re-consume: want ErrNotFound, got %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user