metropolis: rename KubernetesWorker to KubernetesController, add no-op KubernetesWorker

This is in preparation for actually splitting the Kubernetes service
into a control plane and data plane / worker. The new Worker service is
a no-op: it can be enabled/disabled, but has no practical effect.

Since we plan on keeping the control plane stuff mostly as is, and add
split worker functionality as a new codebase, it makes sense to rename
the existing role to Controller, and createa brand new Worker one (to
make future diffs smaller).

Change-Id: I79de3219f3c190d38469a0a8d7371820471c100d
Reviewed-on: https://review.monogon.dev/c/monogon/+/1325
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/cli/metroctl/format.go b/metropolis/cli/metroctl/format.go
index d2c5ddf..41987ec 100644
--- a/metropolis/cli/metroctl/format.go
+++ b/metropolis/cli/metroctl/format.go
@@ -5,6 +5,8 @@
 	"io"
 	"log"
 	"os"
+	"sort"
+	"strings"
 
 	"source.monogon.dev/metropolis/node/core/identity"
 	apb "source.monogon.dev/metropolis/proto/api"
@@ -42,14 +44,18 @@
 		return err
 	}
 
-	var roles string
-	if n.Roles.KubernetesWorker != nil {
-		roles += "KubernetesWorker"
-	}
+	var roles []string
 	if n.Roles.ConsensusMember != nil {
-		roles += ",ConsensusMember"
+		roles = append(roles, "ConsensusMember")
 	}
-	if _, err := fmt.Fprintf(e.out, "\t%s", roles); err != nil {
+	if n.Roles.KubernetesController != nil {
+		roles = append(roles, "KubernetesController")
+	}
+	if n.Roles.KubernetesWorker != nil {
+		roles = append(roles, "KubernetesWorker")
+	}
+	sort.Strings(roles)
+	if _, err := fmt.Fprintf(e.out, "\t%s", strings.Join(roles, ",")); err != nil {
 		return err
 	}
 
diff --git a/metropolis/cli/metroctl/set.go b/metropolis/cli/metroctl/set.go
index 2d6eaf9..b0bfaf5 100644
--- a/metropolis/cli/metroctl/set.go
+++ b/metropolis/cli/metroctl/set.go
@@ -23,7 +23,7 @@
 
 var addRoleCmd = &cobra.Command{
 	Short:   "Updates node roles.",
-	Use:     "role <KubernetesWorker|ConsensusMember> [NodeID, ...]",
+	Use:     "role <KubernetesController|KubernetesWorker|ConsensusMember> [NodeID, ...]",
 	Example: "metroctl node add role KubernetesWorker metropolis-25fa5f5e9349381d4a5e9e59de0215e3",
 	Args:    cobra.ArbitraryArgs,
 	Run:     doAdd,
@@ -31,7 +31,7 @@
 
 var removeRoleCmd = &cobra.Command{
 	Short:   "Updates node roles.",
-	Use:     "role <KubernetesWorker|ConsensusMember> [NodeID, ...]",
+	Use:     "role <KubernetesController|KubernetesWorker|ConsensusMember> [NodeID, ...]",
 	Example: "metroctl node remove role KubernetesWorker metropolis-25fa5f5e9349381d4a5e9e59de0215e3",
 	Args:    cobra.ArbitraryArgs,
 	Run:     doRemove,
@@ -65,6 +65,8 @@
 			},
 		}
 		switch role {
+		case "kubernetescontroller", "kc":
+			req.KubernetesController = opt(true)
 		case "kubernetesworker", "kw":
 			req.KubernetesWorker = opt(true)
 		case "consensusmember", "cm":
@@ -77,7 +79,7 @@
 		if err != nil {
 			log.Printf("Couldn't update node \"%s\": %v", node, err)
 		}
-		log.Printf("Updated node %s. Must be one of: KubernetesWorker, ConsensusMember.", node)
+		log.Printf("Updated node %s.", node)
 	}
 }
 
@@ -101,6 +103,8 @@
 			},
 		}
 		switch role {
+		case "kubernetescontroller", "kc":
+			req.KubernetesController = opt(false)
 		case "kubernetesworker", "kw":
 			req.KubernetesWorker = opt(false)
 		case "consensusmember", "cm":
diff --git a/metropolis/cli/metroctl/test/test.go b/metropolis/cli/metroctl/test/test.go
index e8310d7..a88a2e0 100644
--- a/metropolis/cli/metroctl/test/test.go
+++ b/metropolis/cli/metroctl/test/test.go
@@ -244,8 +244,8 @@
 			if onstatus != "HEALTHY" {
 				return fmt.Errorf("node status mismatch")
 			}
-			if onroles != "KubernetesWorker,ConsensusMember" {
-				return fmt.Errorf("node role mismatch")
+			if want, got := "ConsensusMember,KubernetesController", onroles; want != got {
+				return fmt.Errorf("node role mismatch: wanted %q, got %q", want, got)
 			}
 			if ontimeout < 0 || ontimeout > 30 {
 				return fmt.Errorf("node timeout mismatch")
@@ -254,44 +254,44 @@
 		})
 	})
 	t.Run("set/unset role", func(t *testing.T) {
-		util.TestEventual(t, "metroctl set/unset role KubernetesWorker", ctx, 10*time.Second, func(ctx context.Context) error {
+		util.TestEventual(t, "metroctl set/unset role KubernetesController", ctx, 10*time.Second, func(ctx context.Context) error {
 			nid := cl.NodeIDs[1]
 			naddr := cl.Nodes[nid].ManagementAddress
 
 			// In this test we'll unset a node role, make sure that it's been in fact
 			// unset, then set it again, and check again. This exercises commands of
-			// the form "metroctl set/unset role KubernetesWorker [NodeID, ...]".
+			// the form "metroctl set/unset role KubernetesController [NodeID, ...]".
 
-			// Check that KubernetesWorker role is set initially.
+			// Check that KubernetesController role is set initially.
 			var describeArgs []string
 			describeArgs = append(describeArgs, commonOpts...)
 			describeArgs = append(describeArgs, endpointOpts...)
 			describeArgs = append(describeArgs, "node", "describe", "--filter", fmt.Sprintf("node.status.external_address==\"%s\"", naddr))
-			if err := mctlFailIfMissing(t, ctx, describeArgs, "KubernetesWorker"); err != nil {
+			if err := mctlFailIfMissing(t, ctx, describeArgs, "KubernetesController"); err != nil {
 				return err
 			}
 			// Remove the role.
 			var unsetArgs []string
 			unsetArgs = append(unsetArgs, commonOpts...)
 			unsetArgs = append(unsetArgs, endpointOpts...)
-			unsetArgs = append(unsetArgs, "node", "remove", "role", "KubernetesWorker", nid)
+			unsetArgs = append(unsetArgs, "node", "remove", "role", "KubernetesController", nid)
 			if err := mctlRun(t, ctx, unsetArgs); err != nil {
 				return err
 			}
 			// Check that the role is unset.
-			if err := mctlFailIfFound(t, ctx, describeArgs, "KubernetesWorker"); err != nil {
+			if err := mctlFailIfFound(t, ctx, describeArgs, "KubernetesController"); err != nil {
 				return err
 			}
 			// Set the role back to the initial value.
 			var setArgs []string
 			setArgs = append(setArgs, commonOpts...)
 			setArgs = append(setArgs, endpointOpts...)
-			setArgs = append(setArgs, "node", "add", "role", "KubernetesWorker", nid)
+			setArgs = append(setArgs, "node", "add", "role", "KubernetesController", nid)
 			if err := mctlRun(t, ctx, setArgs); err != nil {
 				return err
 			}
 			// Chack that the role is set.
-			return mctlFailIfMissing(t, ctx, describeArgs, "KubernetesWorker")
+			return mctlFailIfMissing(t, ctx, describeArgs, "KubernetesController")
 		})
 	})
 }
diff --git a/metropolis/node/core/curator/impl_leader_curator.go b/metropolis/node/core/curator/impl_leader_curator.go
index 9978c7e..a06f8cd 100644
--- a/metropolis/node/core/curator/impl_leader_curator.go
+++ b/metropolis/node/core/curator/impl_leader_curator.go
@@ -442,7 +442,7 @@
 	node.state = cpb.NodeState_NODE_STATE_UP
 	node.clusterUnlockKey = req.ClusterUnlockKey
 	node.EnableConsensusMember(join)
-	node.EnableKubernetesWorker()
+	node.EnableKubernetesController()
 	if err := nodeSave(ctx, l.leadership, node); err != nil {
 		return nil, err
 	}
diff --git a/metropolis/node/core/curator/impl_leader_management.go b/metropolis/node/core/curator/impl_leader_management.go
index 819456c..b725759 100644
--- a/metropolis/node/core/curator/impl_leader_management.go
+++ b/metropolis/node/core/curator/impl_leader_management.go
@@ -201,6 +201,9 @@
 
 		// Convert node roles.
 		roles := &cpb.NodeRoles{}
+		if node.kubernetesController != nil {
+			roles.KubernetesController = &cpb.NodeRoles_KubernetesController{}
+		}
 		if node.kubernetesWorker != nil {
 			roles.KubernetesWorker = &cpb.NodeRoles_KubernetesWorker{}
 		}
@@ -335,15 +338,26 @@
 			// Modify the node's state to reflect the change.
 			node.EnableConsensusMember(join)
 		} else {
+			if node.kubernetesController != nil {
+				return nil, status.Errorf(codes.FailedPrecondition, "could not remove consensus member role while node is a kubernetes controller")
+			}
 			node.DisableConsensusMember()
 		}
 	}
 
+	if req.KubernetesController != nil {
+		if *req.KubernetesController {
+			if node.consensusMember == nil {
+				return nil, status.Errorf(codes.FailedPrecondition, "could not set role: Kubernetes controller nodes must also be consensus members")
+			}
+			node.EnableKubernetesController()
+		} else {
+			node.DisableKubernetesController()
+		}
+	}
+
 	if req.KubernetesWorker != nil {
 		if *req.KubernetesWorker {
-			if node.consensusMember == nil {
-				return nil, status.Errorf(codes.InvalidArgument, "could not set role: Kubernetes worker nodes must also be consensus members.")
-			}
 			node.EnableKubernetesWorker()
 		} else {
 			node.DisableKubernetesWorker()
diff --git a/metropolis/node/core/curator/impl_leader_test.go b/metropolis/node/core/curator/impl_leader_test.go
index 8cf1c3e..4d26b43 100644
--- a/metropolis/node/core/curator/impl_leader_test.go
+++ b/metropolis/node/core/curator/impl_leader_test.go
@@ -1123,23 +1123,23 @@
 		}
 	}
 
-	// Try running a request containing a contradictory set of roles. A cluster
-	// node currently can't be a KubernetesWorker if it's not a ConsensusMember
-	// as well.
+	// Try running a request containing a contradictory set of roles. A cluster node
+	// currently can't be a KubernetesController if it's not a ConsensusMember as
+	// well.
 	uf := []*apb.UpdateNodeRolesRequest{
 		&apb.UpdateNodeRolesRequest{
 			Node: &apb.UpdateNodeRolesRequest_Pubkey{
 				Pubkey: tn[0].pubkey,
 			},
-			KubernetesWorker: opt(true),
-			ConsensusMember:  opt(false),
+			KubernetesController: opt(true),
+			ConsensusMember:      opt(false),
 		},
 		&apb.UpdateNodeRolesRequest{
 			Node: &apb.UpdateNodeRolesRequest_Pubkey{
 				Pubkey: tn[0].pubkey,
 			},
-			KubernetesWorker: opt(true),
-			ConsensusMember:  nil,
+			KubernetesController: opt(true),
+			ConsensusMember:      nil,
 		},
 	}
 	for _, e := range uf {
diff --git a/metropolis/node/core/curator/state_node.go b/metropolis/node/core/curator/state_node.go
index f376c28..f2ee5c3 100644
--- a/metropolis/node/core/curator/state_node.go
+++ b/metropolis/node/core/curator/state_node.go
@@ -78,13 +78,15 @@
 	// of NodeRole* structures in this structure, with a nil pointer
 	// representing the lack of a role.
 
-	// kubernetesWorker is set if this node is a Kubernetes worker, ie. running the
-	// Kubernetes control plan and workload elements.
-	// In the future, this will be split into a separate worker and control plane
-	// role.
-	kubernetesWorker *NodeRoleKubernetesWorker
-
 	consensusMember *NodeRoleConsensusMember
+
+	// kubernetesController is set if this node is a Kubernetes controller, ie.
+	// running the Kubernetes control plane.
+	kubernetesController *NodeRoleKubernetesController
+
+	// kubernetesWorker is set if this node is a Kubernetes worker, ie. running the
+	// Kubernetes dataplane which runs user workloads.
+	kubernetesWorker *NodeRoleKubernetesWorker
 }
 
 // NewNodeForBootstrap creates a brand new node without regard for any other
@@ -100,8 +102,13 @@
 	}
 }
 
+// NodeRoleKubernetesController defines that the Node should be running the
+// Kubernetes control plane.
+type NodeRoleKubernetesController struct {
+}
+
 // NodeRoleKubernetesWorker defines that the Node should be running the
-// Kubernetes control and data plane.
+// Kubernetes data plane.
 type NodeRoleKubernetesWorker struct {
 }
 
@@ -163,6 +170,22 @@
 	n.kubernetesWorker = nil
 }
 
+func (n *Node) KubernetesController() *NodeRoleKubernetesController {
+	if n.kubernetesController == nil {
+		return nil
+	}
+	kcp := *n.kubernetesController
+	return &kcp
+}
+
+func (n *Node) EnableKubernetesController() {
+	n.kubernetesController = &NodeRoleKubernetesController{}
+}
+
+func (n *Node) DisableKubernetesController() {
+	n.kubernetesController = nil
+}
+
 func (n *Node) EnableConsensusMember(jc *consensus.JoinCluster) {
 	peers := make([]NodeRoleConsensusMemberPeer, len(jc.ExistingNodes))
 	for i, n := range jc.ExistingNodes {
@@ -214,6 +237,9 @@
 	if n.kubernetesWorker != nil {
 		msg.Roles.KubernetesWorker = &cpb.NodeRoles_KubernetesWorker{}
 	}
+	if n.kubernetesController != nil {
+		msg.Roles.KubernetesController = &cpb.NodeRoles_KubernetesController{}
+	}
 	if n.consensusMember != nil {
 		peers := make([]*cpb.NodeRoles_ConsensusMember_Peer, len(n.consensusMember.Peers))
 		for i, p := range n.consensusMember.Peers {
@@ -247,6 +273,9 @@
 	if msg.Roles.KubernetesWorker != nil {
 		n.kubernetesWorker = &NodeRoleKubernetesWorker{}
 	}
+	if msg.Roles.KubernetesController != nil {
+		n.kubernetesController = &NodeRoleKubernetesController{}
+	}
 	if cm := msg.Roles.ConsensusMember; cm != nil {
 		caCert, err := x509.ParseCertificate(cm.CaCertificate)
 		if err != nil {
diff --git a/metropolis/node/core/roleserve/worker_controlplane.go b/metropolis/node/core/roleserve/worker_controlplane.go
index c581c3d..5c8c4b2 100644
--- a/metropolis/node/core/roleserve/worker_controlplane.go
+++ b/metropolis/node/core/roleserve/worker_controlplane.go
@@ -27,13 +27,13 @@
 // locally running Control Plane (Consensus and Curator service pair) if needed.
 //
 // The Control Plane will run under the following conditions:
-//  - This node has been started in BOOTSTRAP mode and bootstrapData was provided
-//    by the cluster enrolment logic. In this case, the Control Plane Worker will
-//    perform the required bootstrap steps, creating a local node with appropriate
-//    roles, and will start Consensus and the Curator.
-//  - This node has the ConsensusMember Node Role. This will be true for nodes
-//    which are REGISTERing into the cluster, as well as already running nodes that
-//    have been assigned the role.
+//   - This node has been started in BOOTSTRAP mode and bootstrapData was provided
+//     by the cluster enrolment logic. In this case, the Control Plane Worker will
+//     perform the required bootstrap steps, creating a local node with appropriate
+//     roles, and will start Consensus and the Curator.
+//   - This node has the ConsensusMember Node Role. This will be true for nodes
+//     which are REGISTERing into the cluster, as well as already running nodes that
+//     have been assigned the role.
 //
 // In either case, ClusterMembership will be updated to allow connecting to the
 // newly locally running control plane. For nodes that are bootstrapping the
@@ -324,7 +324,7 @@
 				}
 
 				n.EnableConsensusMember(join)
-				n.EnableKubernetesWorker()
+				n.EnableKubernetesController()
 
 				var nodeCert []byte
 				caCert, nodeCert, err = curator.BootstrapNodeFinish(ctx, ckv, &n, b.initialOwnerKey)
diff --git a/metropolis/node/core/roleserve/worker_kubernetes.go b/metropolis/node/core/roleserve/worker_kubernetes.go
index 904e4f5..3be3c41 100644
--- a/metropolis/node/core/roleserve/worker_kubernetes.go
+++ b/metropolis/node/core/roleserve/worker_kubernetes.go
@@ -43,8 +43,8 @@
 // kubernetesStartups differ to the point where a restart of Kubernetes should
 // happen.
 func (k *kubernetesStartup) changed(o *kubernetesStartup) bool {
-	hasKubernetesA := k.roles.KubernetesWorker != nil
-	hasKubernetesB := o.roles.KubernetesWorker != nil
+	hasKubernetesA := k.roles.KubernetesController != nil
+	hasKubernetesB := o.roles.KubernetesController != nil
 	if hasKubernetesA != hasKubernetesB {
 		return true
 	}
@@ -124,7 +124,7 @@
 		supervisor.Logger(ctx).Infof("Waiting for startup data...")
 
 		// Acquire kubernetesStartup, waiting for it to contain local consensus and a
-		// KubernetesWorker local role.
+		// KubernetesController local role.
 		var d *kubernetesStartup
 		for {
 			dV, err := w.Get(ctx)
@@ -133,7 +133,7 @@
 			}
 			d = dV.(*kubernetesStartup)
 			supervisor.Logger(ctx).Infof("Got new startup data.")
-			if d.roles.KubernetesWorker == nil {
+			if d.roles.KubernetesController == nil {
 				supervisor.Logger(ctx).Infof("No Kubernetes role, not starting.")
 				continue
 			}
diff --git a/metropolis/node/core/roleserve/worker_rolefetch.go b/metropolis/node/core/roleserve/worker_rolefetch.go
index 77b3bf2..3856f62 100644
--- a/metropolis/node/core/roleserve/worker_rolefetch.go
+++ b/metropolis/node/core/roleserve/worker_rolefetch.go
@@ -64,6 +64,9 @@
 			if n.Roles.ConsensusMember != nil {
 				supervisor.Logger(ctx).Infof(" - control plane member, existing peers: %+v", n.Roles.ConsensusMember.Peers)
 			}
+			if n.Roles.KubernetesController != nil {
+				supervisor.Logger(ctx).Infof(" - kubernetes controller")
+			}
 			if n.Roles.KubernetesWorker != nil {
 				supervisor.Logger(ctx).Infof(" - kubernetes worker")
 			}
diff --git a/metropolis/proto/api/management.proto b/metropolis/proto/api/management.proto
index 396df83..0df1d69 100644
--- a/metropolis/proto/api/management.proto
+++ b/metropolis/proto/api/management.proto
@@ -168,9 +168,11 @@
     string id = 4;
   }
 
-  // kubernetesWorker adjusts the appropriate role when set. Nodes performing
-  // this role must also be consensus members. 
+  // kubernetesController adjusts the appropriate role when set.
   optional bool kubernetesWorker = 2;
+  // kubernetesController adjusts the appropriate role when set. Nodes performing
+  // this role must also be consensus members.
+  optional bool kubernetesController = 5;
   optional bool consensusMember = 3;
 }
 
diff --git a/metropolis/proto/common/common.proto b/metropolis/proto/common/common.proto
index ca4bfea..1c01834 100644
--- a/metropolis/proto/common/common.proto
+++ b/metropolis/proto/common/common.proto
@@ -28,6 +28,8 @@
 // can be used to carry required data to start up services for a given role,
 // this must not be confidential/private data.
 message NodeRoles {
+    message KubernetesController {
+    }
     message KubernetesWorker {
     }
     message ConsensusMember {
@@ -56,6 +58,7 @@
     }
     KubernetesWorker kubernetes_worker = 1;
     ConsensusMember consensus_member = 2;
+    KubernetesController kubernetes_controller = 3;
 }
 
 // NodeState is the state of a Metropolis node from the point of view of the