osbase/supervisor: export NodeState This will be exported as part of an upcoming Metrics interface, where runnable statuses are exposed to the user. Change-Id: Id47071545c3297b2ea36ba28017d6d4a7fbd9005 Reviewed-on: https://review.monogon.dev/c/monogon/+/3289 Tested-by: Jenkins CI Reviewed-by: Lorenz Brun <lorenz@monogon.tech>

commit: eca8ee3a89affc200d6056b04a14c4f20fddc91f [log] [tgz]
author: Serge Bazanski <serge@monogon.tech> Tue Jul 30 14:32:19 2024 +0000
committer: Serge Bazanski <serge@monogon.tech> Tue Aug 06 14:00:50 2024 +0000
tree: 8805a806294112753cb917f65754419bad38f41c
parent: 5cdb51bf4c7d7b2451037ba1dd05ce019a379683 [diff]
diff --git a/osbase/supervisor/supervisor_node.go b/osbase/supervisor/supervisor_node.go
index 76d656c..8b31ad4 100644
--- a/osbase/supervisor/supervisor_node.go
+++ b/osbase/supervisor/supervisor_node.go

@@ -54,7 +54,7 @@
 	groups []map[string]bool
 
 	// The current state of the runnable in this node.
-	state nodeState
+	state NodeState
 
 	// Backoff used to keep runnables from being restarted too fast.
 	bo *backoff.ExponentialBackOff
@@ -64,37 +64,37 @@
 	ctxC context.CancelFunc
 }
 
-// nodeState is the state of a runnable within a node, and in a way the node
+// NodeState is the state of a runnable within a node, and in a way the node
 // itself. This follows the state diagram from go/supervision.
-type nodeState int
+type NodeState int
 
 const (
 	// A node that has just been created, and whose runnable has been started
 	// already but hasn't signaled anything yet.
-	nodeStateNew nodeState = iota
+	NodeStateNew NodeState = iota
 	// A node whose runnable has signaled being healthy - this means it's ready
 	// to serve/act.
-	nodeStateHealthy
+	NodeStateHealthy
 	// A node that has unexpectedly returned or panicked.
-	nodeStateDead
+	NodeStateDead
 	// A node that has declared that its done with its work and should not be
 	// restarted, unless a supervision tree failure requires that.
-	nodeStateDone
+	NodeStateDone
 	// A node that has returned after being requested to cancel.
-	nodeStateCanceled
+	NodeStateCanceled
 )
 
-func (s nodeState) String() string {
+func (s NodeState) String() string {
 	switch s {
-	case nodeStateNew:
+	case NodeStateNew:
 		return "NODE_STATE_NEW"
-	case nodeStateHealthy:
+	case NodeStateHealthy:
 		return "NODE_STATE_HEALTHY"
-	case nodeStateDead:
+	case NodeStateDead:
 		return "NODE_STATE_DEAD"
-	case nodeStateDone:
+	case NodeStateDone:
 		return "NODE_STATE_DONE"
-	case nodeStateCanceled:
+	case NodeStateCanceled:
 		return "NODE_STATE_CANCELED"
 	}
 	return "UNKNOWN"
@@ -201,7 +201,7 @@
 	n.ctxC = ctxC
 
 	// Clear children and state
-	n.state = nodeStateNew
+	n.state = NodeStateNew
 	n.children = make(map[string]*node)
 	n.reserved = make(map[string]bool)
 	n.groups = nil
@@ -237,7 +237,7 @@
 // runGroup schedules a new group of runnables to run on a node.
 func (n *node) runGroup(runnables map[string]Runnable) error {
 	// Check that the parent node is in the right state.
-	if n.state != nodeStateNew {
+	if n.state != NodeStateNew {
 		return fmt.Errorf("cannot run new runnable on non-NEW node")
 	}
 
@@ -288,16 +288,16 @@
 func (n *node) signal(signal SignalType) {
 	switch signal {
 	case SignalHealthy:
-		if n.state != nodeStateNew {
+		if n.state != NodeStateNew {
 			panic(fmt.Errorf("node %s signaled healthy", n))
 		}
-		n.state = nodeStateHealthy
+		n.state = NodeStateHealthy
 		n.bo.Reset()
 	case SignalDone:
-		if n.state != nodeStateHealthy {
+		if n.state != NodeStateHealthy {
 			panic(fmt.Errorf("node %s signaled done", n))
 		}
-		n.state = nodeStateDone
+		n.state = NodeStateDone
 		n.bo.Reset()
 	}
 }

diff --git a/osbase/supervisor/supervisor_processor.go b/osbase/supervisor/supervisor_processor.go
index 46cd1aa..2a01cf7 100644
--- a/osbase/supervisor/supervisor_processor.go
+++ b/osbase/supervisor/supervisor_processor.go

@@ -134,13 +134,13 @@
 			s.ilogger.Infof("liquidator: refusing to schedule %s", r.schedule.dn)
 			s.mu.Lock()
 			n := s.nodeByDN(r.schedule.dn)
-			n.state = nodeStateDead
+			n.state = NodeStateDead
 			s.mu.Unlock()
 		case r.died != nil:
 			s.ilogger.Infof("liquidator: %s exited", r.died.dn)
 			s.mu.Lock()
 			n := s.nodeByDN(r.died.dn)
-			n.state = nodeStateDead
+			n.state = NodeStateDead
 			s.mu.Unlock()
 		}
 		live := s.liveRunnables()
@@ -179,7 +179,7 @@
 		}
 		seen[eldn] = true
 
-		if el.state != nodeStateDead && el.state != nodeStateDone {
+		if el.state != NodeStateDead && el.state != NodeStateDone {
 			live = append(live, eldn)
 		}
 
@@ -267,7 +267,7 @@
 	ctx := n.ctx
 
 	// Simple case: it was marked as Done and quit with no error.
-	if n.state == nodeStateDone && r.err == nil {
+	if n.state == NodeStateDone && r.err == nil {
 		// Do nothing. This was supposed to happen. Keep the process as DONE.
 		return
 	}
@@ -276,7 +276,7 @@
 	// context error.
 	if r.err != nil && ctx.Err() != nil && errors.Is(r.err, ctx.Err()) {
 		// Mark the node as canceled successfully.
-		n.state = nodeStateCanceled
+		n.state = NodeStateCanceled
 		return
 	}
 
@@ -290,7 +290,7 @@
 
 	s.ilogger.Errorf("%s: %v", n.dn(), err)
 	// Mark as dead.
-	n.state = nodeStateDead
+	n.state = NodeStateDead
 
 	// Cancel that node's context, just in case something still depends on it.
 	n.ctxC()
@@ -413,16 +413,16 @@
 		// DONE, DEAD or CANCELED).
 		curReady := false
 		switch cur.state {
-		case nodeStateDone:
+		case NodeStateDone:
 			curReady = true
-		case nodeStateCanceled:
+		case NodeStateCanceled:
 			curReady = true
-		case nodeStateDead:
+		case NodeStateDead:
 			curReady = true
 		default:
 		}
 
-		if cur.state == nodeStateDead && !childrenReady {
+		if cur.state == NodeStateDead && !childrenReady {
 			s.ilogger.Warningf("Not restarting %s: children not ready to be restarted: %v", curDn, childrenNotReady)
 		}
 
@@ -460,7 +460,7 @@
 		queue = queue[1:]
 
 		// If this node is DEAD or CANCELED it should be restarted.
-		if cur.state == nodeStateDead || cur.state == nodeStateCanceled {
+		if cur.state == NodeStateDead || cur.state == NodeStateCanceled {
 			want[cur.dn()] = true
 		}
 
@@ -488,7 +488,7 @@
 		// Only back off when the node unexpectedly died - not when it got
 		// canceled.
 		bo := time.Duration(0)
-		if n.state == nodeStateDead {
+		if n.state == NodeStateDead {
 			bo = n.bo.NextBackOff()
 		}
commit	eca8ee3a89affc200d6056b04a14c4f20fddc91f	[log] [tgz]
author	Serge Bazanski <serge@monogon.tech>	Tue Jul 30 14:32:19 2024 +0000
committer	Serge Bazanski <serge@monogon.tech>	Tue Aug 06 14:00:50 2024 +0000
tree	8805a806294112753cb917f65754419bad38f41c
parent	5cdb51bf4c7d7b2451037ba1dd05ce019a379683 [diff]