m/n/core/cluster: migrate to events and etcd namespaced client

This moves the status of the cluster manager to use a local event
variable. Watchers (like the node startup code) can now use this to get
updates on the state of the node and its cluster membership in a way
that's more abstracted from a sequential startup. This will permit us to
move a lof othe startup code into code common across different node
lifecycle paths.

Test Plan: Refactor, exercised by e2e.

X-Origin-Diff: phab/D757
GitOrigin-RevId: 31a3600ad2aab90a1e7f84d741e7ea40a0422724
diff --git a/metropolis/node/core/cluster/cluster.go b/metropolis/node/core/cluster/cluster.go
index 1277c2a..b194f25 100644
--- a/metropolis/node/core/cluster/cluster.go
+++ b/metropolis/node/core/cluster/cluster.go
@@ -22,20 +22,44 @@
 	"source.monogon.dev/metropolis/pkg/pki"
 )
 
+// ClusterState is the state of the cluster from the point of view of the
+// current node. Clients within the node code can watch this state to change
+// their behaviour as needed.
 type ClusterState int
 
 const (
+	// ClusterStateUnknown means the node has not yet determined the existence
+	// of a cluster it should join or start. This is a transient, initial state
+	// that should only manifest during boot.
 	ClusterUnknown ClusterState = iota
+	// ClusterForeign means the node is attempting to register into an already
+	// existing cluster with which it managed to make preliminary contact, but
+	// which the cluster has not yet fully productionized (eg. the node is
+	// still being hardware attested, or the operator needs to confirm the
+	// registration of this node).
 	ClusterForeign
+	// ClusterTrusted means the node is attempting to register into an already
+	// registered cluster, and has been trusted by it. The node is now
+	// attempting to finally commit into registering the cluster.
 	ClusterTrusted
+	// ClusterHome means the node is part of a cluster. This is the bulk of
+	// time in which this node will spend its time.
 	ClusterHome
+	// ClusterDisowning means the node has been disowned (ie., removed) by the
+	// cluster, and that it will not be ever part of any cluster again, and
+	// that it will be decommissioned by the operator.
 	ClusterDisowning
+	// ClusterSplit means that the node would usually be Home in a cluster, but
+	// has been split from the consensus of the cluster. This can happen for
+	// nodes running consensus when consensus is lost (eg. when there is no
+	// quorum or this node has been netsplit), and for other nodes if they have
+	// lost network connectivity to the consensus nodes. Clients should make
+	// their own decision what action to perform in this state, depending on
+	// the level of consistency required and whether it makes sense for the
+	// node to fence its services off.
+	ClusterSplit
 )
 
-type Cluster struct {
-	State ClusterState
-}
-
 func (s ClusterState) String() string {
 	switch s {
 	case ClusterForeign:
@@ -46,17 +70,12 @@
 		return "ClusterHome"
 	case ClusterDisowning:
 		return "ClusterDisowning"
+	case ClusterSplit:
+		return "ClusterSplit"
 	}
 	return fmt.Sprintf("Invalid(%d)", s)
 }
 
-var clusterStateTransitions = map[ClusterState][]ClusterState{
-	ClusterUnknown: {ClusterForeign, ClusterHome, ClusterDisowning},
-	ClusterForeign: {ClusterTrusted},
-	ClusterTrusted: {ClusterHome},
-	ClusterHome:    {ClusterHome, ClusterDisowning},
-}
-
 var (
 	PKINamespace = pki.Namespaced("/cluster-pki/")
 	PKICA        = PKINamespace.New(pki.SelfSigned, "cluster-ca", pki.CA("Metropolis Cluster CA"))