osbase/supervisor: canceled runnables are not live
This fixes a bug which caused a test to be flaky; it could get stuck
with the TestHarness waiting for runnables in CANCELED state to die.
Change-Id: I31394e407662fe05918907650931859c6bba35fe
Reviewed-on: https://review.monogon.dev/c/monogon/+/3713
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/osbase/supervisor/supervisor_processor.go b/osbase/supervisor/supervisor_processor.go
index 6304b09..ad39d1d 100644
--- a/osbase/supervisor/supervisor_processor.go
+++ b/osbase/supervisor/supervisor_processor.go
@@ -26,7 +26,7 @@
)
// The processor maintains runnable goroutines - ie., when requested will start
-// one, and then once it exists it will record the result and act accordingly.
+// one, and then once it exits, it will record the result and act accordingly.
// It is also responsible for detecting and acting upon supervision subtrees
// that need to be restarted after death (via a 'GC' process)
@@ -151,14 +151,15 @@
}
}
-// liveRunnables returns a list of runnable DNs that aren't DONE/DEAD. This is
-// used by the liquidator to figure out when its job is done, and by the
+// liveRunnables returns a list of runnable DNs that aren't DONE/DEAD/CANCELED.
+// This is used by the liquidator to figure out when its job is done, and by the
// TestHarness to know when to unblock the test cleanup function.
func (s *supervisor) liveRunnables() []string {
s.mu.RLock()
defer s.mu.RUnlock()
- // DFS through supervision tree, making not of live (non-DONE/DEAD runnables).
+ // DFS through supervision tree, making note of live (non-DONE/DEAD/CANCELED
+ // runnables).
var live []string
seen := make(map[string]bool)
q := []*node{s.root}
@@ -179,7 +180,7 @@
}
seen[eldn] = true
- if el.state != NodeStateDead && el.state != NodeStateDone {
+ if el.state != NodeStateDead && el.state != NodeStateDone && el.state != NodeStateCanceled {
live = append(live, eldn)
}