m/n/c/curator: deflake leader election test

The check for proper leadership re-election was vulnerable to a race
where it assumed optimistically that if a leader got re-elected, other
nodes would have already established them as followers. This isn't
always the case, so we re-work the check to instead be a wait-until
construct.

Change-Id: I6326dad9a88f7b1ba61c218e37a73e868d722506
Reviewed-on: https://review.monogon.dev/c/monogon/+/448
Reviewed-by: Mateusz Zalega <mateusz@monogon.tech>
diff --git a/metropolis/node/core/curator/curator_test.go b/metropolis/node/core/curator/curator_test.go
index af87c9e..5ce0077 100644
--- a/metropolis/node/core/curator/curator_test.go
+++ b/metropolis/node/core/curator/curator_test.go
@@ -296,7 +296,7 @@
 	// Wait until we switch leaders
 	dss, err = duts.wait(ctx, func(dss dutSetStatus) bool {
 		// Ensure we've lost leadership on the initial leader.
-		if i, ok := dss[leaderEndpoint]; ok && i.leader != nil {
+		if i, ok := dss[leaderEndpoint]; !ok || i.leader != nil {
 			return false
 		}
 		return len(dss.leaders()) == 1 && len(dss.followers()) == 1
@@ -332,17 +332,30 @@
 
 	// Ensure the last node of the cluster (not the current leader and not the
 	// previous leader) is now a follower pointing at the new leader.
-	for endpoint, status := range dss {
-		if endpoint == leaderEndpoint || endpoint == newLeaderEndpoint {
-			continue
+	_, err = duts.wait(ctx, func(dss dutSetStatus) bool {
+		for endpoint, status := range dss {
+			switch endpoint {
+			case leaderEndpoint:
+				// Don't care about the old leader, it's still partitioned off.
+				continue
+			case newLeaderEndpoint:
+				// Must be leader.
+				if status.leader == nil {
+					return false
+				}
+			default:
+				// Other nodes must be following new leader.
+				if status.follower == nil {
+					return false
+				}
+				if status.follower.lock.NodeId != newLeaderNodeID {
+					return false
+				}
+			}
 		}
-		follower := status.follower
-		if follower == nil {
-			t.Errorf("instance %q is not a follower", endpoint)
-			continue
-		}
-		if want, got := newLeaderNodeID, follower.lock.NodeId; want != got {
-			t.Errorf("instance %q sees node id %q as follower, wanted %q", endpoint, want, got)
-		}
+		return true
+	})
+	if err != nil {
+		t.Fatalf("waiting for dut set: %v", err)
 	}
 }