From bc02fcb498a44fdf2dadb438350ee7632186ab83 Mon Sep 17 00:00:00 2001 From: Steve Cliff Date: Mon, 4 May 2026 10:20:54 +0100 Subject: [PATCH] test: poll pending-row count in drain-on-reconnect test (race fix) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI run #50 failed with: --- FAIL: TestDrainPendingDispatchesOnReconnect (1.03s) pending_drain_test.go:150: pending rows after drain: got 1, want 0 The test waits for a backup command.run envelope on the wire and then checks the pending-row count. But conn.Send (the wire write) returns BEFORE DeletePendingRun runs in the drain goroutine — both fire serially inside drainOne, but the wire-side reader can observe the Send while the delete is still pending. Use the existing waitForPendingCount helper to poll the count with a 2s deadline. Behaviour unchanged when the delete is fast (count hits 0 immediately); only relevant under CI scheduling pressure. -race -count=10 locally now passes consistently. --- internal/server/http/pending_drain_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/server/http/pending_drain_test.go b/internal/server/http/pending_drain_test.go index a216c25..0cec822 100644 --- a/internal/server/http/pending_drain_test.go +++ b/internal/server/http/pending_drain_test.go @@ -145,7 +145,12 @@ func TestDrainPendingDispatchesOnReconnect(t *testing.T) { t.Errorf("backup tag: %q", got.Tag) } - // Pending row should be gone. + // Pending row should be gone. Poll briefly: the drain goroutine + // sends command.run via conn.Send and only then calls + // DeletePendingRun. Reading the envelope off the wire above proves + // the send happened, but the delete runs after that on the drain + // goroutine — small window where the count is still 1. + waitForPendingCount(t, st, hostID, 0, 2*time.Second) if n := countPendingForHost(t, st, hostID); n != 0 { t.Errorf("pending rows after drain: got %d, want 0", n) }