m/n/core/cluster: rewrite bootstrap to conform to cluster lifecycle DD
This removes the existing cluster/manager code and reimplements it from
scratch, finally implementing the cluster lifecycle design document for
cluster bootstrap.
Test Plan:
E2e should cover this. Maybe we could unit test the manager? But that would
require a ton of DI work. Not sure if it's worth it.
X-Origin-Diff: phab/D735
GitOrigin-RevId: b00c97b0a102a21605d16086df82a6ece6eb7f4d
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index 7c3f10c..4b19b7f 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -151,21 +151,15 @@
}
// Wait until the cluster manager settles.
- success := m.WaitFinished()
- if !success {
+ node, err := m.Wait()
+ if err != nil {
close(trapdoor)
- return fmt.Errorf("enrolment failed, aborting")
+ return fmt.Errorf("enrolment failed, aborting: %w", err)
}
// We are now in a cluster. We can thus access our 'node' object and start all services that
// we should be running.
- node := m.Node()
- if err := node.ConfigureLocalHostname(&root.Ephemeral); err != nil {
- close(trapdoor)
- return fmt.Errorf("failed to set local hostname: %w", err)
- }
-
logger.Info("Enrolment success, continuing startup.")
logger.Info(fmt.Sprintf("This node (%s) has roles:", node.String()))
if cm := node.ConsensusMember(); cm != nil {
@@ -187,8 +181,7 @@
logger.Info("Starting Kubernetes worker services...")
// Ensure Kubernetes PKI objects exist in etcd.
- kpkiKV := m.ConsensusKV("cluster", "kpki")
- kpki := pki.New(lt.MustLeveledFor("pki.kubernetes"), kpkiKV)
+ kpki := pki.New(lt.MustLeveledFor("pki.kubernetes"), node.KV)
if err := kpki.EnsureAll(ctx); err != nil {
return fmt.Errorf("failed to ensure kubernetes PKI present: %w", err)
}