m/n/c/cluster: add proper exponential backoff to Join
Change-Id: I929ef0552912d1f765cbea7d2e0fb19561d2198c
Reviewed-on: https://review.monogon.dev/c/monogon/+/790
Tested-by: Jenkins CI
Reviewed-by: Mateusz Zalega <mateusz@monogon.tech>
diff --git a/metropolis/node/core/cluster/cluster_join.go b/metropolis/node/core/cluster/cluster_join.go
index 3349fb7..d2e5ad5 100644
--- a/metropolis/node/core/cluster/cluster_join.go
+++ b/metropolis/node/core/cluster/cluster_join.go
@@ -6,8 +6,8 @@
"crypto/x509"
"encoding/hex"
"fmt"
- "time"
+ "github.com/cenkalti/backoff/v4"
"google.golang.org/grpc"
ipb "source.monogon.dev/metropolis/node/core/curator/proto/api"
@@ -55,14 +55,17 @@
// Join the cluster and use the newly obtained CUK to mount the data
// partition.
var jr *ipb.JoinNodeResponse
- for {
+ bo := backoff.NewExponentialBackOff()
+ bo.MaxElapsedTime = 0
+ backoff.Retry(func() error {
jr, err = cur.JoinNode(ctx, &ipb.JoinNodeRequest{})
- if err == nil {
- break
+ if err != nil {
+ supervisor.Logger(ctx).Warningf("Join failed: %v", err)
+ // This is never used.
+ return fmt.Errorf("join call failed")
}
- supervisor.Logger(ctx).Warningf("JoinNode call failed, retrying: %v", err)
- time.Sleep(time.Second)
- }
+ return nil
+ }, bo)
if err := m.storageRoot.Data.MountExisting(sc, jr.ClusterUnlockKey); err != nil {
return fmt.Errorf("while mounting Data: %w", err)
}