m/n/core/cluster: migrate to events and etcd namespaced client
This moves the status of the cluster manager to use a local event
variable. Watchers (like the node startup code) can now use this to get
updates on the state of the node and its cluster membership in a way
that's more abstracted from a sequential startup. This will permit us to
move a lof othe startup code into code common across different node
lifecycle paths.
Test Plan: Refactor, exercised by e2e.
X-Origin-Diff: phab/D757
GitOrigin-RevId: 31a3600ad2aab90a1e7f84d741e7ea40a0422724
diff --git a/metropolis/node/core/cluster/cluster.go b/metropolis/node/core/cluster/cluster.go
index 1277c2a..b194f25 100644
--- a/metropolis/node/core/cluster/cluster.go
+++ b/metropolis/node/core/cluster/cluster.go
@@ -22,20 +22,44 @@
"source.monogon.dev/metropolis/pkg/pki"
)
+// ClusterState is the state of the cluster from the point of view of the
+// current node. Clients within the node code can watch this state to change
+// their behaviour as needed.
type ClusterState int
const (
+ // ClusterStateUnknown means the node has not yet determined the existence
+ // of a cluster it should join or start. This is a transient, initial state
+ // that should only manifest during boot.
ClusterUnknown ClusterState = iota
+ // ClusterForeign means the node is attempting to register into an already
+ // existing cluster with which it managed to make preliminary contact, but
+ // which the cluster has not yet fully productionized (eg. the node is
+ // still being hardware attested, or the operator needs to confirm the
+ // registration of this node).
ClusterForeign
+ // ClusterTrusted means the node is attempting to register into an already
+ // registered cluster, and has been trusted by it. The node is now
+ // attempting to finally commit into registering the cluster.
ClusterTrusted
+ // ClusterHome means the node is part of a cluster. This is the bulk of
+ // time in which this node will spend its time.
ClusterHome
+ // ClusterDisowning means the node has been disowned (ie., removed) by the
+ // cluster, and that it will not be ever part of any cluster again, and
+ // that it will be decommissioned by the operator.
ClusterDisowning
+ // ClusterSplit means that the node would usually be Home in a cluster, but
+ // has been split from the consensus of the cluster. This can happen for
+ // nodes running consensus when consensus is lost (eg. when there is no
+ // quorum or this node has been netsplit), and for other nodes if they have
+ // lost network connectivity to the consensus nodes. Clients should make
+ // their own decision what action to perform in this state, depending on
+ // the level of consistency required and whether it makes sense for the
+ // node to fence its services off.
+ ClusterSplit
)
-type Cluster struct {
- State ClusterState
-}
-
func (s ClusterState) String() string {
switch s {
case ClusterForeign:
@@ -46,17 +70,12 @@
return "ClusterHome"
case ClusterDisowning:
return "ClusterDisowning"
+ case ClusterSplit:
+ return "ClusterSplit"
}
return fmt.Sprintf("Invalid(%d)", s)
}
-var clusterStateTransitions = map[ClusterState][]ClusterState{
- ClusterUnknown: {ClusterForeign, ClusterHome, ClusterDisowning},
- ClusterForeign: {ClusterTrusted},
- ClusterTrusted: {ClusterHome},
- ClusterHome: {ClusterHome, ClusterDisowning},
-}
-
var (
PKINamespace = pki.Namespaced("/cluster-pki/")
PKICA = PKINamespace.New(pki.SelfSigned, "cluster-ca", pki.CA("Metropolis Cluster CA"))