osbase/supervisor: export NodeState
This will be exported as part of an upcoming Metrics interface, where
runnable statuses are exposed to the user.
Change-Id: Id47071545c3297b2ea36ba28017d6d4a7fbd9005
Reviewed-on: https://review.monogon.dev/c/monogon/+/3289
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/osbase/supervisor/supervisor_node.go b/osbase/supervisor/supervisor_node.go
index 76d656c..8b31ad4 100644
--- a/osbase/supervisor/supervisor_node.go
+++ b/osbase/supervisor/supervisor_node.go
@@ -54,7 +54,7 @@
groups []map[string]bool
// The current state of the runnable in this node.
- state nodeState
+ state NodeState
// Backoff used to keep runnables from being restarted too fast.
bo *backoff.ExponentialBackOff
@@ -64,37 +64,37 @@
ctxC context.CancelFunc
}
-// nodeState is the state of a runnable within a node, and in a way the node
+// NodeState is the state of a runnable within a node, and in a way the node
// itself. This follows the state diagram from go/supervision.
-type nodeState int
+type NodeState int
const (
// A node that has just been created, and whose runnable has been started
// already but hasn't signaled anything yet.
- nodeStateNew nodeState = iota
+ NodeStateNew NodeState = iota
// A node whose runnable has signaled being healthy - this means it's ready
// to serve/act.
- nodeStateHealthy
+ NodeStateHealthy
// A node that has unexpectedly returned or panicked.
- nodeStateDead
+ NodeStateDead
// A node that has declared that its done with its work and should not be
// restarted, unless a supervision tree failure requires that.
- nodeStateDone
+ NodeStateDone
// A node that has returned after being requested to cancel.
- nodeStateCanceled
+ NodeStateCanceled
)
-func (s nodeState) String() string {
+func (s NodeState) String() string {
switch s {
- case nodeStateNew:
+ case NodeStateNew:
return "NODE_STATE_NEW"
- case nodeStateHealthy:
+ case NodeStateHealthy:
return "NODE_STATE_HEALTHY"
- case nodeStateDead:
+ case NodeStateDead:
return "NODE_STATE_DEAD"
- case nodeStateDone:
+ case NodeStateDone:
return "NODE_STATE_DONE"
- case nodeStateCanceled:
+ case NodeStateCanceled:
return "NODE_STATE_CANCELED"
}
return "UNKNOWN"
@@ -201,7 +201,7 @@
n.ctxC = ctxC
// Clear children and state
- n.state = nodeStateNew
+ n.state = NodeStateNew
n.children = make(map[string]*node)
n.reserved = make(map[string]bool)
n.groups = nil
@@ -237,7 +237,7 @@
// runGroup schedules a new group of runnables to run on a node.
func (n *node) runGroup(runnables map[string]Runnable) error {
// Check that the parent node is in the right state.
- if n.state != nodeStateNew {
+ if n.state != NodeStateNew {
return fmt.Errorf("cannot run new runnable on non-NEW node")
}
@@ -288,16 +288,16 @@
func (n *node) signal(signal SignalType) {
switch signal {
case SignalHealthy:
- if n.state != nodeStateNew {
+ if n.state != NodeStateNew {
panic(fmt.Errorf("node %s signaled healthy", n))
}
- n.state = nodeStateHealthy
+ n.state = NodeStateHealthy
n.bo.Reset()
case SignalDone:
- if n.state != nodeStateHealthy {
+ if n.state != NodeStateHealthy {
panic(fmt.Errorf("node %s signaled done", n))
}
- n.state = nodeStateDone
+ n.state = NodeStateDone
n.bo.Reset()
}
}
diff --git a/osbase/supervisor/supervisor_processor.go b/osbase/supervisor/supervisor_processor.go
index 46cd1aa..2a01cf7 100644
--- a/osbase/supervisor/supervisor_processor.go
+++ b/osbase/supervisor/supervisor_processor.go
@@ -134,13 +134,13 @@
s.ilogger.Infof("liquidator: refusing to schedule %s", r.schedule.dn)
s.mu.Lock()
n := s.nodeByDN(r.schedule.dn)
- n.state = nodeStateDead
+ n.state = NodeStateDead
s.mu.Unlock()
case r.died != nil:
s.ilogger.Infof("liquidator: %s exited", r.died.dn)
s.mu.Lock()
n := s.nodeByDN(r.died.dn)
- n.state = nodeStateDead
+ n.state = NodeStateDead
s.mu.Unlock()
}
live := s.liveRunnables()
@@ -179,7 +179,7 @@
}
seen[eldn] = true
- if el.state != nodeStateDead && el.state != nodeStateDone {
+ if el.state != NodeStateDead && el.state != NodeStateDone {
live = append(live, eldn)
}
@@ -267,7 +267,7 @@
ctx := n.ctx
// Simple case: it was marked as Done and quit with no error.
- if n.state == nodeStateDone && r.err == nil {
+ if n.state == NodeStateDone && r.err == nil {
// Do nothing. This was supposed to happen. Keep the process as DONE.
return
}
@@ -276,7 +276,7 @@
// context error.
if r.err != nil && ctx.Err() != nil && errors.Is(r.err, ctx.Err()) {
// Mark the node as canceled successfully.
- n.state = nodeStateCanceled
+ n.state = NodeStateCanceled
return
}
@@ -290,7 +290,7 @@
s.ilogger.Errorf("%s: %v", n.dn(), err)
// Mark as dead.
- n.state = nodeStateDead
+ n.state = NodeStateDead
// Cancel that node's context, just in case something still depends on it.
n.ctxC()
@@ -413,16 +413,16 @@
// DONE, DEAD or CANCELED).
curReady := false
switch cur.state {
- case nodeStateDone:
+ case NodeStateDone:
curReady = true
- case nodeStateCanceled:
+ case NodeStateCanceled:
curReady = true
- case nodeStateDead:
+ case NodeStateDead:
curReady = true
default:
}
- if cur.state == nodeStateDead && !childrenReady {
+ if cur.state == NodeStateDead && !childrenReady {
s.ilogger.Warningf("Not restarting %s: children not ready to be restarted: %v", curDn, childrenNotReady)
}
@@ -460,7 +460,7 @@
queue = queue[1:]
// If this node is DEAD or CANCELED it should be restarted.
- if cur.state == nodeStateDead || cur.state == nodeStateCanceled {
+ if cur.state == NodeStateDead || cur.state == NodeStateCanceled {
want[cur.dn()] = true
}
@@ -488,7 +488,7 @@
// Only back off when the node unexpectedly died - not when it got
// canceled.
bo := time.Duration(0)
- if n.state == nodeStateDead {
+ if n.state == NodeStateDead {
bo = n.bo.NextBackOff()
}