m/n/core/roleserve: rework cluster membership, reuse control plane connections
This changes up roleserver internals to simplify the handling of cluster
membership state. The end goal is to allow reusing control plane gRPC
connections across different components in a node, but the refactor goes
a bit beyond that.
Ever since the introduction of the rpc resolver, we have effectively
simplifies the control plane startup problem. This is because the
resolver allows the rest of the system to dynamically switch between
different gRPC endpoints for the control plane.
What this means is that some of the existing complexity in the
roleserver (which predates the resolver) can be thrown away. Notably, we
remove the ClusterMembership structure, and replace it with two
significantly simpler structures that represent two separate facts about
he local node:
1. localControlPlane carries information about whether this node has a
locally running control plane. This is only used by the statuspusher
(to report whether the control plane is running) and by the
Kubernetes control plane.
2. curatorConnection carries the credentials, resolver and an open gRPC
connection to the control plane, and is the only roleserver
EventValue now used by the vast majority of the roleserver runnables.
The resulting code, especially inside the control plane roleserver
runnable, is now less complex, at the cost of a bit of an ugly refactor.
Change-Id: Idbe1ff2ac3bfb2d570bed040a2f78ccabb66caba
Reviewed-on: https://review.monogon.dev/c/monogon/+/1749
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/node/core/roleserve/values.go b/metropolis/node/core/roleserve/values.go
new file mode 100644
index 0000000..3b7ff25
--- /dev/null
+++ b/metropolis/node/core/roleserve/values.go
@@ -0,0 +1,92 @@
+package roleserve
+
+import (
+ "crypto/ed25519"
+
+ "google.golang.org/grpc"
+
+ "source.monogon.dev/metropolis/node/core/consensus"
+ "source.monogon.dev/metropolis/node/core/curator"
+ "source.monogon.dev/metropolis/node/core/identity"
+ "source.monogon.dev/metropolis/node/core/rpc"
+ "source.monogon.dev/metropolis/node/core/rpc/resolver"
+ "source.monogon.dev/metropolis/node/kubernetes"
+
+ cpb "source.monogon.dev/metropolis/proto/common"
+)
+
+// bootstrapData is an internal EventValue structure which is populated by the
+// Cluster Enrolment logic via ProvideBootstrapData. It contains data needed by
+// the control plane logic to go into bootstrap mode and bring up a control
+// plane from scratch.
+type bootstrapData struct {
+ nodePrivateKey ed25519.PrivateKey
+ clusterUnlockKey []byte
+ nodeUnlockKey []byte
+ initialOwnerKey []byte
+ nodePrivateJoinKey ed25519.PrivateKey
+ initialClusterConfiguration *curator.Cluster
+ nodeTPMUsage cpb.NodeTPMUsage
+}
+
+// localControlPlane is an internal EventValue structure which carries
+// information about whether the node has a locally running consensus and curator
+// service. When it does, the structure pointer inside the EventValue will be
+// non-nil and its consensus and curator members will also be non-nil. If it
+// doesn't, either the pointer inside the EventValue will be nil, or will carry
+// nil pointers. Because of this, it is recommended to use the exists() method to
+// check for consensus/curator presence.
+type localControlPlane struct {
+ consensus *consensus.Service
+ curator *curator.Service
+}
+
+func (l *localControlPlane) exists() bool {
+ if l == nil {
+ return false
+ }
+ if l.consensus == nil || l.curator == nil {
+ return false
+ }
+ return true
+}
+
+// CuratorConnection carries information about the node having successfully
+// established connectivity to its cluster's control plane.
+//
+// It carries inside it a single gRPC client connection which is built using the
+// main roleserver resolver. This connection will automatically use any available
+// curator, whether running locally or remotely.
+//
+// This structure should also be used by roleserver runnables that simply wish to
+// access the node's credentials.
+type curatorConnection struct {
+ credentials *identity.NodeCredentials
+ resolver *resolver.Resolver
+ conn *grpc.ClientConn
+}
+
+func newCuratorConnection(creds *identity.NodeCredentials, res *resolver.Resolver) *curatorConnection {
+ c := rpc.NewAuthenticatedCredentials(creds.TLSCredentials(), rpc.WantRemoteCluster(creds.ClusterCA()))
+ conn, err := grpc.Dial(resolver.MetropolisControlAddress, grpc.WithTransportCredentials(c), grpc.WithResolvers(res))
+ if err != nil {
+ // TOOD(q3k): triple check that Dial will not fail
+ panic(err)
+ }
+ return &curatorConnection{
+ credentials: creds,
+ resolver: res,
+ conn: conn,
+ }
+}
+
+func (c *curatorConnection) nodeID() string {
+ return identity.NodeID(c.credentials.PublicKey())
+}
+
+// KubernetesStatus is an Event Value structure populated by a running
+// Kubernetes instance. It allows external services to access the Kubernetes
+// Service whenever available (ie. enabled and started by the Role Server).
+type KubernetesStatus struct {
+ Controller *kubernetes.Controller
+}