m/n/c/{cluster,roleserve}: implement Join Flow
This implements Join Flow for:
- Registered nodes attempting to re-join the cluster.
- Nodes bootstrapping the cluster.
See: Cluster Lifecycle and Integrity design document
Change-Id: I74ab98fdec650c4f6aa59e34a16c0f95745dc0e9
Reviewed-on: https://review.monogon.dev/c/monogon/+/556
Reviewed-by: Sergiusz Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/cluster/BUILD.bazel b/metropolis/node/core/cluster/BUILD.bazel
index 78078bb..e8c7ff2 100644
--- a/metropolis/node/core/cluster/BUILD.bazel
+++ b/metropolis/node/core/cluster/BUILD.bazel
@@ -5,6 +5,7 @@
srcs = [
"cluster.go",
"cluster_bootstrap.go",
+ "cluster_join.go",
"cluster_register.go",
"platform.go",
],
@@ -22,9 +23,11 @@
"//metropolis/pkg/event/memory",
"//metropolis/pkg/supervisor",
"//metropolis/proto/api",
+ "//metropolis/proto/common",
"//metropolis/proto/private",
"@com_github_cenkalti_backoff_v4//:backoff",
"@org_golang_google_grpc//:go_default_library",
"@org_golang_google_protobuf//proto",
+ "@org_golang_x_sys//unix",
],
)
diff --git a/metropolis/node/core/cluster/cluster.go b/metropolis/node/core/cluster/cluster.go
index d51d7d9..1de24f3 100644
--- a/metropolis/node/core/cluster/cluster.go
+++ b/metropolis/node/core/cluster/cluster.go
@@ -31,20 +31,25 @@
"errors"
"fmt"
"io"
+ "net"
"net/http"
"os"
+ "strings"
"sync"
"github.com/cenkalti/backoff/v4"
"google.golang.org/protobuf/proto"
+ "source.monogon.dev/metropolis/node"
"source.monogon.dev/metropolis/node/core/consensus"
+ "source.monogon.dev/metropolis/node/core/identity"
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/node/core/network"
"source.monogon.dev/metropolis/node/core/roleserve"
"source.monogon.dev/metropolis/pkg/event/memory"
"source.monogon.dev/metropolis/pkg/supervisor"
apb "source.monogon.dev/metropolis/proto/api"
+ cpb "source.monogon.dev/metropolis/proto/common"
ppb "source.monogon.dev/metropolis/proto/private"
)
@@ -107,7 +112,15 @@
configuration, err := m.storageRoot.ESP.Metropolis.SealedConfiguration.Unseal()
if err == nil {
supervisor.Logger(ctx).Info("Sealed configuration present. attempting to join cluster")
- return m.join(ctx, configuration)
+
+ // Read Cluster Directory and unmarshal it. Since the node is already
+ // registered with the cluster, the directory won't be bootstrapped from
+ // Node Parameters.
+ cd, err := m.storageRoot.ESP.Metropolis.ClusterDirectory.Unmarshal()
+ if err != nil {
+ return fmt.Errorf("while reading cluster directory: %w", err)
+ }
+ return m.join(ctx, configuration, cd)
}
if !errors.Is(err, localstorage.ErrNoSealed) {
@@ -241,6 +254,30 @@
}
}
-func (m *Manager) join(ctx context.Context, cfg *ppb.SealedConfiguration) error {
- return fmt.Errorf("unimplemented")
+// logClusterDirectory verbosely logs the whole Cluster Directory passed to it.
+func logClusterDirectory(ctx context.Context, cd *cpb.ClusterDirectory) {
+ for _, node := range cd.Nodes {
+ id := identity.NodeID(node.PublicKey)
+ var addresses []string
+ for _, add := range node.Addresses {
+ addresses = append(addresses, add.Host)
+ }
+ supervisor.Logger(ctx).Infof(" Node ID: %s, Addresses: %s", id, strings.Join(addresses, ","))
+ }
+}
+
+// curatorRemote returns a host:port pair pointing at one of the cluster's
+// available Curator endpoints. It will return an empty string, and an error,
+// if the cluster directory is empty.
+// TODO(issues/117): use dynamic cluster client instead
+func curatorRemote(cd *cpb.ClusterDirectory) (string, error) {
+ if len(cd.Nodes) == 0 {
+ return "", fmt.Errorf("the Cluster Directory is empty.")
+ }
+ n := cd.Nodes[0]
+ if len(n.Addresses) == 0 {
+ return "", fmt.Errorf("the first node in the Cluster Directory doesn't have an associated Address.")
+ }
+ r := n.Addresses[0].Host
+ return net.JoinHostPort(r, node.CuratorServicePort.PortString()), nil
}
diff --git a/metropolis/node/core/cluster/cluster_bootstrap.go b/metropolis/node/core/cluster/cluster_bootstrap.go
index d40f179..9de5339 100644
--- a/metropolis/node/core/cluster/cluster_bootstrap.go
+++ b/metropolis/node/core/cluster/cluster_bootstrap.go
@@ -43,6 +43,7 @@
if err != nil {
return fmt.Errorf("could not make and mount data partition: %w", err)
}
+ nuk := state.configuration.NodeUnlockKey
pub, priv, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
@@ -50,7 +51,13 @@
}
supervisor.Logger(ctx).Infof("Bootstrapping: node public key: %s", hex.EncodeToString([]byte(pub)))
- m.roleServer.ProvideBootstrapData(priv, ownerKey, cuk)
+ jpub, jpriv, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ return fmt.Errorf("could not generate join keypair: %w", err)
+ }
+ supervisor.Logger(ctx).Infof("Bootstrapping: node public join key: %s", hex.EncodeToString([]byte(jpub)))
+
+ m.roleServer.ProvideBootstrapData(priv, ownerKey, cuk, nuk, jpriv)
supervisor.Signal(ctx, supervisor.SignalHealthy)
supervisor.Signal(ctx, supervisor.SignalDone)
diff --git a/metropolis/node/core/cluster/cluster_join.go b/metropolis/node/core/cluster/cluster_join.go
new file mode 100644
index 0000000..0cb68bb
--- /dev/null
+++ b/metropolis/node/core/cluster/cluster_join.go
@@ -0,0 +1,75 @@
+package cluster
+
+import (
+ "context"
+ "crypto/ed25519"
+ "crypto/x509"
+ "encoding/hex"
+ "fmt"
+
+ "google.golang.org/grpc"
+
+ ipb "source.monogon.dev/metropolis/node/core/curator/proto/api"
+ "source.monogon.dev/metropolis/node/core/identity"
+ "source.monogon.dev/metropolis/node/core/rpc"
+ "source.monogon.dev/metropolis/pkg/supervisor"
+ cpb "source.monogon.dev/metropolis/proto/common"
+ ppb "source.monogon.dev/metropolis/proto/private"
+)
+
+// join implements Join Flow of an already registered node.
+func (m *Manager) join(ctx context.Context, sc *ppb.SealedConfiguration, cd *cpb.ClusterDirectory) error {
+ // Generate a complete ED25519 Join Key based on the seed included in Sealed
+ // Configuration.
+ var jpriv ed25519.PrivateKey = sc.JoinKey
+
+ // Get Cluster CA from Sealed Configuration.
+ ca, err := x509.ParseCertificate(sc.ClusterCa)
+ if err != nil {
+ return fmt.Errorf("Cluster CA certificate present in Sealed Configuration could not be parsed: %w", err)
+ }
+
+ // Tell the user what we're doing.
+ hpkey := hex.EncodeToString(jpriv.Public().(ed25519.PublicKey))
+ supervisor.Logger(ctx).Infof("Joining an existing cluster.")
+ supervisor.Logger(ctx).Infof(" Node Join public key: %s", hpkey)
+ supervisor.Logger(ctx).Infof(" Directory:")
+ logClusterDirectory(ctx, cd)
+
+ // Attempt to connect to the first node in the cluster directory.
+ r, err := curatorRemote(cd)
+ if err != nil {
+ return fmt.Errorf("while picking a Curator endpoint: %w", err)
+ }
+ ephCreds, err := rpc.NewEphemeralCredentials(jpriv, ca)
+ if err != nil {
+ return fmt.Errorf("could not create ephemeral credentials: %w", err)
+ }
+ eph, err := grpc.Dial(r, grpc.WithTransportCredentials(ephCreds))
+ if err != nil {
+ return fmt.Errorf("could not create ephemeral client to %q: %w", r, err)
+ }
+ cur := ipb.NewCuratorClient(eph)
+
+ // Join the cluster and use the newly obtained CUK to mount the data
+ // partition.
+ jr, err := cur.JoinNode(ctx, &ipb.JoinNodeRequest{})
+ if err != nil {
+ return fmt.Errorf("join call failed: %w", err)
+ }
+ if err := m.storageRoot.Data.MountExisting(sc, jr.ClusterUnlockKey); err != nil {
+ return fmt.Errorf("while mounting Data: %w", err)
+ }
+
+ // Use the node credentials found in the data partition.
+ var creds identity.NodeCredentials
+ if err := creds.Read(&m.storageRoot.Data.Node.Credentials); err != nil {
+ return fmt.Errorf("while reading node credentials: %w", err)
+ }
+ m.roleServer.ProvideJoinData(creds, cd)
+
+ supervisor.Logger(ctx).Infof("Joined the cluster.")
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ supervisor.Signal(ctx, supervisor.SignalDone)
+ return nil
+}
diff --git a/metropolis/node/core/cluster/cluster_register.go b/metropolis/node/core/cluster/cluster_register.go
index 3acb7d7..c348c32 100644
--- a/metropolis/node/core/cluster/cluster_register.go
+++ b/metropolis/node/core/cluster/cluster_register.go
@@ -8,13 +8,12 @@
"encoding/hex"
"fmt"
"net"
- "strconv"
- "strings"
"time"
+ "golang.org/x/sys/unix"
"google.golang.org/grpc"
+ "google.golang.org/protobuf/proto"
- "source.monogon.dev/metropolis/node"
ipb "source.monogon.dev/metropolis/node/core/curator/proto/api"
"source.monogon.dev/metropolis/node/core/identity"
"source.monogon.dev/metropolis/node/core/rpc"
@@ -71,14 +70,7 @@
supervisor.Logger(ctx).Infof(" Cluster CA public key: %s", hex.EncodeToString(ca.PublicKey.(ed25519.PublicKey)))
supervisor.Logger(ctx).Infof(" Register Ticket: %s", hex.EncodeToString(register.RegisterTicket))
supervisor.Logger(ctx).Infof(" Directory:")
- for _, node := range register.ClusterDirectory.Nodes {
- id := identity.NodeID(node.PublicKey)
- var addresses []string
- for _, add := range node.Addresses {
- addresses = append(addresses, add.Host)
- }
- supervisor.Logger(ctx).Infof(" Node ID: %s, Addresses: %s", id, strings.Join(addresses, ","))
- }
+ logClusterDirectory(ctx, register.ClusterDirectory)
// Mount new storage with generated CUK, MountNew will save NUK into sc, to be
// saved into the ESP after successful registration.
@@ -97,20 +89,29 @@
supervisor.Logger(ctx).Infof("Registering: node public key: %s", hex.EncodeToString([]byte(pub)))
// Attempt to connect to first node in cluster directory and to call Register.
- //
- // MVP: this should be properly client-side loadbalanced.
- remote := register.ClusterDirectory.Nodes[0].Addresses[0].Host
- remote = net.JoinHostPort(remote, strconv.Itoa(int(node.CuratorServicePort)))
+ r, err := curatorRemote(register.ClusterDirectory)
+ if err != nil {
+ return fmt.Errorf("while picking a Curator endpoint: %w", err)
+ }
ephCreds, err := rpc.NewEphemeralCredentials(priv, ca)
if err != nil {
return fmt.Errorf("could not create ephemeral credentials: %w", err)
}
- eph, err := grpc.Dial(remote, grpc.WithTransportCredentials(ephCreds))
+ eph, err := grpc.Dial(r, grpc.WithTransportCredentials(ephCreds))
if err != nil {
- return fmt.Errorf("could not create ephemeral client to %q: %w", remote, err)
+ return fmt.Errorf("could not create ephemeral client to %q: %w", r, err)
}
cur := ipb.NewCuratorClient(eph)
+ // Generate Join Credentials. The private key will be stored in
+ // SealedConfiguration only if RegisterNode succeeds.
+ jpub, jpriv, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ return fmt.Errorf("could not generate join keypair: %w", err)
+ }
+ sc.JoinKey = jpriv
+ supervisor.Logger(ctx).Infof("Registering: join public key: %s", hex.EncodeToString([]byte(jpub)))
+
// Register this node.
//
// MVP: From this point on forward, we have very little resiliency to failure,
@@ -120,6 +121,7 @@
// code should let us do this quite easily.
_, err = cur.RegisterNode(ctx, &ipb.RegisterNodeRequest{
RegisterTicket: register.RegisterTicket,
+ JoinKey: jpub,
})
if err != nil {
return fmt.Errorf("register call failed: %w", err)
@@ -149,14 +151,25 @@
}
m.roleServer.ProvideRegisterData(*creds, register.ClusterDirectory)
- // Save NUK
- if err = m.storageRoot.ESP.Metropolis.SealedConfiguration.SealSecureBoot(&sc); err != nil {
- return fmt.Errorf("failed to seal and write configuration: %w", err)
- }
// Save Node Credentials
- if err = m.storageRoot.Data.Node.Credentials.WriteAll(certBytes, priv, caCertBytes); err != nil {
- return fmt.Errorf("while writing node credentials: %w", err)
+ if err = creds.Save(&m.storageRoot.Data.Node.Credentials); err != nil {
+ return fmt.Errorf("while saving node credentials: %w", err)
}
+ // Save the Cluster Directory into the ESP.
+ cdirRaw, err := proto.Marshal(register.ClusterDirectory)
+ if err != nil {
+ return fmt.Errorf("couldn't marshal ClusterDirectory: %w", err)
+ }
+ if err = m.storageRoot.ESP.Metropolis.ClusterDirectory.Write(cdirRaw, 0644); err != nil {
+ return err
+ }
+ // Include the Cluster CA in Sealed Configuration.
+ sc.ClusterCa = register.CaCertificate
+ // Save Cluster CA, NUK and Join Credentials into Sealed Configuration.
+ if err = m.storageRoot.ESP.Metropolis.SealedConfiguration.SealSecureBoot(&sc); err != nil {
+ return err
+ }
+ unix.Sync()
supervisor.Signal(ctx, supervisor.SignalHealthy)
supervisor.Signal(ctx, supervisor.SignalDone)
diff --git a/metropolis/node/core/curator/bootstrap.go b/metropolis/node/core/curator/bootstrap.go
index 215bfb7..4b0b743 100644
--- a/metropolis/node/core/curator/bootstrap.go
+++ b/metropolis/node/core/curator/bootstrap.go
@@ -105,14 +105,20 @@
if err != nil {
return nil, nil, fmt.Errorf("failed to marshal initial owner: %w", err)
}
+ joinKeyPath, err := node.etcdJoinKeyPath()
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to get join key: %w", err)
+ }
// We don't care about the result's success - this is idempotent.
_, err = etcd.Txn(ctx).If(
clientv3.Compare(clientv3.CreateRevision(nodePath), "=", 0),
clientv3.Compare(clientv3.CreateRevision(initialOwnerEtcdPath), "=", 0),
+ clientv3.Compare(clientv3.CreateRevision(joinKeyPath), "=", 0),
).Then(
clientv3.OpPut(nodePath, string(nodeRaw)),
clientv3.OpPut(initialOwnerEtcdPath, string(ownerRaw)),
+ clientv3.OpPut(joinKeyPath, node.ID()),
).Commit()
if err != nil {
return nil, nil, fmt.Errorf("failed to store initial cluster state: %w", err)
diff --git a/metropolis/node/core/curator/impl_leader_curator.go b/metropolis/node/core/curator/impl_leader_curator.go
index 1c7221f..6f48d45 100644
--- a/metropolis/node/core/curator/impl_leader_curator.go
+++ b/metropolis/node/core/curator/impl_leader_curator.go
@@ -2,6 +2,7 @@
import (
"context"
+ "crypto/ed25519"
"crypto/subtle"
"fmt"
"net"
@@ -265,8 +266,10 @@
}
pubkey := pi.Unauthenticated.SelfSignedPublicKey
- // TODO(mateusz@monogon.tech): check req.JoinKey length once Join Flow is
- // implemented on the client side.
+ // Check the Join Key size.
+ if want, got := ed25519.PublicKeySize, len(req.JoinKey); want != got {
+ return nil, status.Errorf(codes.InvalidArgument, "join_key must be set and be %d bytes long", want)
+ }
// Verify that call contains a RegisterTicket and that this RegisterTicket is
// valid.
diff --git a/metropolis/node/core/curator/impl_leader_test.go b/metropolis/node/core/curator/impl_leader_test.go
index 13564c4..2894b29 100644
--- a/metropolis/node/core/curator/impl_leader_test.go
+++ b/metropolis/node/core/curator/impl_leader_test.go
@@ -67,12 +67,18 @@
}
lockRev := res.Header.Revision
+ // Generate the node's public join key to be used in the bootstrap process.
+ nodeJoinPub, _, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ t.Fatalf("could not generate node join keypair: %v", err)
+ }
+
// Build cluster PKI with first node, replicating the cluster bootstrap process.
nodePub, nodePriv, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("could not generate node keypair: %v", err)
}
- cNode := NewNodeForBootstrap(nil, nodePub)
+ cNode := NewNodeForBootstrap(nil, nodePub, nodeJoinPub)
caCertBytes, nodeCertBytes, err := BootstrapNodeFinish(ctx, curEtcd, &cNode, nil)
if err != nil {
t.Fatalf("could not finish node bootstrap: %v", err)
diff --git a/metropolis/node/core/curator/state_node.go b/metropolis/node/core/curator/state_node.go
index 82f9859..9989987 100644
--- a/metropolis/node/core/curator/state_node.go
+++ b/metropolis/node/core/curator/state_node.go
@@ -91,10 +91,11 @@
// cluster state.
//
// This can only be used by the cluster bootstrap logic.
-func NewNodeForBootstrap(cuk, pubkey []byte) Node {
+func NewNodeForBootstrap(cuk, pubkey, jpub []byte) Node {
return Node{
clusterUnlockKey: cuk,
pubkey: pubkey,
+ jkey: jpub,
state: cpb.NodeState_NODE_STATE_UP,
}
}
@@ -325,27 +326,22 @@
return status.Errorf(codes.Unavailable, "could not marshal updated node")
}
ons := clientv3.OpPut(nkey, string(nodeBytes))
- ops := []clientv3.Op{ons}
// Build an etcd operation to map the node's Join Key into its ID for use in
- // Join Flow, if jkey is set. Once Join Flow is implemented on the client
- // side, this operation will become mandatory.
- if n.jkey != nil {
- jkey, err := n.etcdJoinKeyPath()
- if err != nil {
- // This should never happen.
- rpc.Trace(ctx).Printf("invalid join key representation: %v", err)
- return status.Errorf(codes.InvalidArgument, "invalid join key representation")
- }
- // TODO(mateusz@monogon.tech): ensure that if the join key index already
- // exists, it points to the node we're saving. Refuse to save/update the
- // node if it doesn't.
- oks := clientv3.OpPut(jkey, id)
- ops = append(ops, oks)
+ // Join Flow.
+ jkey, err := n.etcdJoinKeyPath()
+ if err != nil {
+ // This should never happen.
+ rpc.Trace(ctx).Printf("invalid join key representation: %v", err)
+ return status.Errorf(codes.InvalidArgument, "invalid join key representation")
}
+ // TODO(mateusz@monogon.tech): ensure that if the join key index already
+ // exists, it points to the node we're saving. Refuse to save/update the
+ // node if it doesn't.
+ oks := clientv3.OpPut(jkey, id)
- // Execute one or both operations atomically.
- _, err = l.txnAsLeader(ctx, ops...)
+ // Execute both operations atomically.
+ _, err = l.txnAsLeader(ctx, ons, oks)
if err != nil {
if rpcErr, ok := rpcError(err); ok {
return rpcErr
diff --git a/metropolis/node/core/identity/identity.go b/metropolis/node/core/identity/identity.go
index 862e794..8749b6d 100644
--- a/metropolis/node/core/identity/identity.go
+++ b/metropolis/node/core/identity/identity.go
@@ -124,6 +124,35 @@
return nil
}
+// Read initializes NodeCredentials' contents with the data stored in the
+// PKIDirectory d. It may return an I/O error, or a parsing error.
+func (n *NodeCredentials) Read(d *localstorage.PKIDirectory) error {
+ if car, err := d.CACertificate.Read(); err != nil {
+ return fmt.Errorf("while reading CA certificate: %w", err)
+ } else {
+ cert, err := x509.ParseCertificate(car)
+ if err != nil {
+ return fmt.Errorf("while parsing CA certificate: %w", err)
+ }
+ n.ca = cert
+ }
+ if nr, err := d.Certificate.Read(); err != nil {
+ return fmt.Errorf("while reading node certificate: %w", err)
+ } else {
+ cert, err := x509.ParseCertificate(nr)
+ if err != nil {
+ return fmt.Errorf("while parsing node certificate: %w", err)
+ }
+ n.node = cert
+ }
+ if npr, err := d.Key.Read(); err != nil {
+ return fmt.Errorf("while reading node private key: %w", err)
+ } else {
+ n.private = npr
+ }
+ return nil
+}
+
// NodeIDBare returns the `{pubkeyHash}` part of the node ID.
func NodeIDBare(pub []byte) string {
return hex.EncodeToString(pub[:16])
diff --git a/metropolis/node/core/roleserve/BUILD.bazel b/metropolis/node/core/roleserve/BUILD.bazel
index 4188202..413d9a0 100644
--- a/metropolis/node/core/roleserve/BUILD.bazel
+++ b/metropolis/node/core/roleserve/BUILD.bazel
@@ -32,6 +32,9 @@
"//metropolis/pkg/pki",
"//metropolis/pkg/supervisor",
"//metropolis/proto/common",
+ "//metropolis/proto/private",
"@org_golang_google_grpc//:go_default_library",
+ "@org_golang_google_protobuf//proto",
+ "@org_golang_x_sys//unix",
],
)
diff --git a/metropolis/node/core/roleserve/roleserve.go b/metropolis/node/core/roleserve/roleserve.go
index 2c2e885..667564d 100644
--- a/metropolis/node/core/roleserve/roleserve.go
+++ b/metropolis/node/core/roleserve/roleserve.go
@@ -118,14 +118,16 @@
return s
}
-func (s *Service) ProvideBootstrapData(privkey ed25519.PrivateKey, iok, cuk []byte) {
+func (s *Service) ProvideBootstrapData(privkey ed25519.PrivateKey, iok, cuk, nuk, jkey []byte) {
s.ClusterMembership.set(&ClusterMembership{
pubkey: privkey.Public().(ed25519.PublicKey),
})
s.bootstrapData.set(&bootstrapData{
- nodePrivateKey: privkey,
- initialOwnerKey: iok,
- clusterUnlockKey: cuk,
+ nodePrivateKey: privkey,
+ initialOwnerKey: iok,
+ clusterUnlockKey: cuk,
+ nodeUnlockKey: nuk,
+ nodePrivateJoinKey: jkey,
})
}
@@ -137,6 +139,14 @@
})
}
+func (s *Service) ProvideJoinData(credentials identity.NodeCredentials, directory *cpb.ClusterDirectory) {
+ s.ClusterMembership.set(&ClusterMembership{
+ remoteCurators: directory,
+ credentials: &credentials,
+ pubkey: credentials.PublicKey(),
+ })
+}
+
// Run the Role Server service, which uses intermediary workload launchers to
// start/stop subordinate services as the Node's roles change.
func (s *Service) Run(ctx context.Context) error {
diff --git a/metropolis/node/core/roleserve/value_bootstrapdata.go b/metropolis/node/core/roleserve/value_bootstrapdata.go
index 4ab1250..85618bc 100644
--- a/metropolis/node/core/roleserve/value_bootstrapdata.go
+++ b/metropolis/node/core/roleserve/value_bootstrapdata.go
@@ -13,9 +13,11 @@
// the control plane logic to go into bootstrap mode and bring up a control
// plane from scratch.
type bootstrapData struct {
- nodePrivateKey ed25519.PrivateKey
- clusterUnlockKey []byte
- initialOwnerKey []byte
+ nodePrivateKey ed25519.PrivateKey
+ clusterUnlockKey []byte
+ nodeUnlockKey []byte
+ initialOwnerKey []byte
+ nodePrivateJoinKey ed25519.PrivateKey
}
type bootstrapDataValue struct {
diff --git a/metropolis/node/core/roleserve/worker_controlplane.go b/metropolis/node/core/roleserve/worker_controlplane.go
index aa5b4a3..f1ddadf 100644
--- a/metropolis/node/core/roleserve/worker_controlplane.go
+++ b/metropolis/node/core/roleserve/worker_controlplane.go
@@ -8,6 +8,9 @@
"fmt"
"time"
+ "golang.org/x/sys/unix"
+ "google.golang.org/protobuf/proto"
+
"source.monogon.dev/metropolis/node/core/consensus"
"source.monogon.dev/metropolis/node/core/curator"
"source.monogon.dev/metropolis/node/core/identity"
@@ -16,6 +19,7 @@
"source.monogon.dev/metropolis/pkg/pki"
"source.monogon.dev/metropolis/pkg/supervisor"
cpb "source.monogon.dev/metropolis/proto/common"
+ ppb "source.monogon.dev/metropolis/proto/private"
)
// workerControlPlane is the Control Plane Worker, responsible for maintaining a
@@ -281,6 +285,7 @@
// and a previously used cluster directory to be passed over to the new
// ClusterMembership, if any.
var creds *identity.NodeCredentials
+ var caCert []byte
var directory *cpb.ClusterDirectory
if b := startup.bootstrap; b != nil {
supervisor.Logger(ctx).Infof("Bootstrapping control plane. Waiting for consensus...")
@@ -304,9 +309,13 @@
// curator startup.
//
// TODO(q3k): collapse the curator bootstrap shenanigans into a single function.
- n := curator.NewNodeForBootstrap(b.clusterUnlockKey, b.nodePrivateKey.Public().(ed25519.PublicKey))
+ npub := b.nodePrivateKey.Public().(ed25519.PublicKey)
+ jpub := b.nodePrivateJoinKey.Public().(ed25519.PublicKey)
+ n := curator.NewNodeForBootstrap(b.clusterUnlockKey, npub, jpub)
n.EnableKubernetesWorker()
- caCert, nodeCert, err := curator.BootstrapNodeFinish(ctx, ckv, &n, b.initialOwnerKey)
+
+ var nodeCert []byte
+ caCert, nodeCert, err = curator.BootstrapNodeFinish(ctx, ckv, &n, b.initialOwnerKey)
if err != nil {
return fmt.Errorf("while bootstrapping node: %w", err)
}
@@ -360,6 +369,34 @@
})
}
+ // Save this node's credentials, cluster directory and configuration as
+ // part of the control plane bootstrap process.
+ if b := startup.bootstrap; b != nil && caCert != nil {
+ if err = creds.Save(&s.storageRoot.Data.Node.Credentials); err != nil {
+ return fmt.Errorf("while saving node credentials: %w", err)
+ }
+
+ cdirRaw, err := proto.Marshal(directory)
+ if err != nil {
+ return fmt.Errorf("couldn't marshal ClusterDirectory: %w", err)
+ }
+ if err = s.storageRoot.ESP.Metropolis.ClusterDirectory.Write(cdirRaw, 0644); err != nil {
+ return err
+ }
+
+ sc := ppb.SealedConfiguration{
+ NodeUnlockKey: b.nodeUnlockKey,
+ JoinKey: b.nodePrivateJoinKey,
+ ClusterCa: caCert,
+ }
+ if err = s.storageRoot.ESP.Metropolis.SealedConfiguration.SealSecureBoot(&sc); err != nil {
+ return err
+ }
+
+ supervisor.Logger(ctx).Infof("Saved bootstrapped node's credentials.")
+ unix.Sync()
+ }
+
// Start curator.
cur := curator.New(curator.Config{
NodeCredentials: creds,
diff --git a/metropolis/proto/private/private.proto b/metropolis/proto/private/private.proto
index a09b0d3..c682311 100644
--- a/metropolis/proto/private/private.proto
+++ b/metropolis/proto/private/private.proto
@@ -67,4 +67,10 @@
// the data partition. The NUK and CUK are generated on bootstrap and
// registration.
bytes node_unlock_key = 1;
+ // join_key is an ED25519 private key generated during registration. The
+ // public part is shared with Curator to authenticate the join procedure.
+ bytes join_key = 2;
+ // cluster_ca is the X509 CA certificate of the cluster set during
+ // registration and used by nodes joining the cluster.
+ bytes cluster_ca = 3;
}