m/n/c/{cluster,roleserve}: implement Join Flow
This implements Join Flow for:
- Registered nodes attempting to re-join the cluster.
- Nodes bootstrapping the cluster.
See: Cluster Lifecycle and Integrity design document
Change-Id: I74ab98fdec650c4f6aa59e34a16c0f95745dc0e9
Reviewed-on: https://review.monogon.dev/c/monogon/+/556
Reviewed-by: Sergiusz Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/roleserve/BUILD.bazel b/metropolis/node/core/roleserve/BUILD.bazel
index 4188202..413d9a0 100644
--- a/metropolis/node/core/roleserve/BUILD.bazel
+++ b/metropolis/node/core/roleserve/BUILD.bazel
@@ -32,6 +32,9 @@
"//metropolis/pkg/pki",
"//metropolis/pkg/supervisor",
"//metropolis/proto/common",
+ "//metropolis/proto/private",
"@org_golang_google_grpc//:go_default_library",
+ "@org_golang_google_protobuf//proto",
+ "@org_golang_x_sys//unix",
],
)
diff --git a/metropolis/node/core/roleserve/roleserve.go b/metropolis/node/core/roleserve/roleserve.go
index 2c2e885..667564d 100644
--- a/metropolis/node/core/roleserve/roleserve.go
+++ b/metropolis/node/core/roleserve/roleserve.go
@@ -118,14 +118,16 @@
return s
}
-func (s *Service) ProvideBootstrapData(privkey ed25519.PrivateKey, iok, cuk []byte) {
+func (s *Service) ProvideBootstrapData(privkey ed25519.PrivateKey, iok, cuk, nuk, jkey []byte) {
s.ClusterMembership.set(&ClusterMembership{
pubkey: privkey.Public().(ed25519.PublicKey),
})
s.bootstrapData.set(&bootstrapData{
- nodePrivateKey: privkey,
- initialOwnerKey: iok,
- clusterUnlockKey: cuk,
+ nodePrivateKey: privkey,
+ initialOwnerKey: iok,
+ clusterUnlockKey: cuk,
+ nodeUnlockKey: nuk,
+ nodePrivateJoinKey: jkey,
})
}
@@ -137,6 +139,14 @@
})
}
+func (s *Service) ProvideJoinData(credentials identity.NodeCredentials, directory *cpb.ClusterDirectory) {
+ s.ClusterMembership.set(&ClusterMembership{
+ remoteCurators: directory,
+ credentials: &credentials,
+ pubkey: credentials.PublicKey(),
+ })
+}
+
// Run the Role Server service, which uses intermediary workload launchers to
// start/stop subordinate services as the Node's roles change.
func (s *Service) Run(ctx context.Context) error {
diff --git a/metropolis/node/core/roleserve/value_bootstrapdata.go b/metropolis/node/core/roleserve/value_bootstrapdata.go
index 4ab1250..85618bc 100644
--- a/metropolis/node/core/roleserve/value_bootstrapdata.go
+++ b/metropolis/node/core/roleserve/value_bootstrapdata.go
@@ -13,9 +13,11 @@
// the control plane logic to go into bootstrap mode and bring up a control
// plane from scratch.
type bootstrapData struct {
- nodePrivateKey ed25519.PrivateKey
- clusterUnlockKey []byte
- initialOwnerKey []byte
+ nodePrivateKey ed25519.PrivateKey
+ clusterUnlockKey []byte
+ nodeUnlockKey []byte
+ initialOwnerKey []byte
+ nodePrivateJoinKey ed25519.PrivateKey
}
type bootstrapDataValue struct {
diff --git a/metropolis/node/core/roleserve/worker_controlplane.go b/metropolis/node/core/roleserve/worker_controlplane.go
index aa5b4a3..f1ddadf 100644
--- a/metropolis/node/core/roleserve/worker_controlplane.go
+++ b/metropolis/node/core/roleserve/worker_controlplane.go
@@ -8,6 +8,9 @@
"fmt"
"time"
+ "golang.org/x/sys/unix"
+ "google.golang.org/protobuf/proto"
+
"source.monogon.dev/metropolis/node/core/consensus"
"source.monogon.dev/metropolis/node/core/curator"
"source.monogon.dev/metropolis/node/core/identity"
@@ -16,6 +19,7 @@
"source.monogon.dev/metropolis/pkg/pki"
"source.monogon.dev/metropolis/pkg/supervisor"
cpb "source.monogon.dev/metropolis/proto/common"
+ ppb "source.monogon.dev/metropolis/proto/private"
)
// workerControlPlane is the Control Plane Worker, responsible for maintaining a
@@ -281,6 +285,7 @@
// and a previously used cluster directory to be passed over to the new
// ClusterMembership, if any.
var creds *identity.NodeCredentials
+ var caCert []byte
var directory *cpb.ClusterDirectory
if b := startup.bootstrap; b != nil {
supervisor.Logger(ctx).Infof("Bootstrapping control plane. Waiting for consensus...")
@@ -304,9 +309,13 @@
// curator startup.
//
// TODO(q3k): collapse the curator bootstrap shenanigans into a single function.
- n := curator.NewNodeForBootstrap(b.clusterUnlockKey, b.nodePrivateKey.Public().(ed25519.PublicKey))
+ npub := b.nodePrivateKey.Public().(ed25519.PublicKey)
+ jpub := b.nodePrivateJoinKey.Public().(ed25519.PublicKey)
+ n := curator.NewNodeForBootstrap(b.clusterUnlockKey, npub, jpub)
n.EnableKubernetesWorker()
- caCert, nodeCert, err := curator.BootstrapNodeFinish(ctx, ckv, &n, b.initialOwnerKey)
+
+ var nodeCert []byte
+ caCert, nodeCert, err = curator.BootstrapNodeFinish(ctx, ckv, &n, b.initialOwnerKey)
if err != nil {
return fmt.Errorf("while bootstrapping node: %w", err)
}
@@ -360,6 +369,34 @@
})
}
+ // Save this node's credentials, cluster directory and configuration as
+ // part of the control plane bootstrap process.
+ if b := startup.bootstrap; b != nil && caCert != nil {
+ if err = creds.Save(&s.storageRoot.Data.Node.Credentials); err != nil {
+ return fmt.Errorf("while saving node credentials: %w", err)
+ }
+
+ cdirRaw, err := proto.Marshal(directory)
+ if err != nil {
+ return fmt.Errorf("couldn't marshal ClusterDirectory: %w", err)
+ }
+ if err = s.storageRoot.ESP.Metropolis.ClusterDirectory.Write(cdirRaw, 0644); err != nil {
+ return err
+ }
+
+ sc := ppb.SealedConfiguration{
+ NodeUnlockKey: b.nodeUnlockKey,
+ JoinKey: b.nodePrivateJoinKey,
+ ClusterCa: caCert,
+ }
+ if err = s.storageRoot.ESP.Metropolis.SealedConfiguration.SealSecureBoot(&sc); err != nil {
+ return err
+ }
+
+ supervisor.Logger(ctx).Infof("Saved bootstrapped node's credentials.")
+ unix.Sync()
+ }
+
// Start curator.
cur := curator.New(curator.Config{
NodeCredentials: creds,