//core/internal/supervisor: drop panic propagation flag
This made the race detector unhappy (for a good reason), and was
actually unused. The only place where we do want panic propagation is in
tests, and making it configurable by an option passed to New() is much
more friendly, anyway.
Test Plan: Behaviour unchanged, covered by existing tests.
X-Origin-Diff: phab/D490
GitOrigin-RevId: 465a8244445906bbb12e8fec13ccab0c87ab50f6
diff --git a/core/internal/common/supervisor/supervisor.go b/core/internal/common/supervisor/supervisor.go
index 78fa5d2..e839d0a 100644
--- a/core/internal/common/supervisor/supervisor.go
+++ b/core/internal/common/supervisor/supervisor.go
@@ -93,16 +93,35 @@
// pReq is an interface channel to the lifecycle processor of the supervisor.
pReq chan *processorRequest
+
+ // propagate panics, ie. don't catch them.
+ propagatePanic bool
}
+// SupervisorOpt are runtime configurable options for the supervisor.
+type SupervisorOpt func(s *supervisor)
+
+var (
+ // WithPropagatePanic prevents the Supervisor from catching panics in runnables and treating them as failures.
+ // This is useful to enable for testing and local debugging.
+ WithPropagatePanic = func(s *supervisor) {
+ s.propagatePanic = true
+ }
+)
+
// New creates a new supervisor with its root running the given root runnable.
// The given context can be used to cancel the entire supervision tree.
-func New(ctx context.Context, logger *zap.Logger, rootRunnable Runnable) {
+func New(ctx context.Context, logger *zap.Logger, rootRunnable Runnable, opts ...SupervisorOpt) {
sup := &supervisor{
logger: logger,
ilogger: logger.Named("supervisor"),
pReq: make(chan *processorRequest),
}
+
+ for _, o := range opts {
+ o(sup)
+ }
+
sup.root = newNode("root", rootRunnable, sup, nil)
go sup.processor(ctx)
diff --git a/core/internal/common/supervisor/supervisor_processor.go b/core/internal/common/supervisor/supervisor_processor.go
index b1d92a4..3fcaef7 100644
--- a/core/internal/common/supervisor/supervisor_processor.go
+++ b/core/internal/common/supervisor/supervisor_processor.go
@@ -19,7 +19,6 @@
import (
"context"
"errors"
- "flag"
"fmt"
"runtime/debug"
"time"
@@ -31,16 +30,6 @@
// record the result and act accordingly. It is also responsible for detecting and acting upon supervision subtrees that
// need to be restarted after death (via a 'GC' process)
-// flagCatchPanic is a global flag that configures whether panics from runnables are handled and treated as errors, or
-// cause a panic of the entire supervisor.
-// For production use cases, you likely want to catch panics.
-// For debugging and tests, you likely want panics to bubble up and abort the entire supervisor early.
-var flagCatchPanic = true
-
-func init() {
- flag.BoolVar(&flagCatchPanic, "catch_panic", flagCatchPanic, "Catch service/runnable panics - disable this for development or testing")
-}
-
// processorRequest is a request for the processor. Only one of the fields can be set.
type processorRequest struct {
schedule *processorRequestSchedule
@@ -133,7 +122,7 @@
n := s.nodeByDN(r.dn)
go func() {
- if flagCatchPanic {
+ if !s.propagatePanic {
defer func() {
if rec := recover(); rec != nil {
s.pReq <- &processorRequest{
diff --git a/core/internal/common/supervisor/supervisor_test.go b/core/internal/common/supervisor/supervisor_test.go
index 1b440a5..6a9bf42 100644
--- a/core/internal/common/supervisor/supervisor_test.go
+++ b/core/internal/common/supervisor/supervisor_test.go
@@ -196,7 +196,7 @@
Signal(ctx, SignalHealthy)
Signal(ctx, SignalDone)
return nil
- })
+ }, WithPropagatePanic)
// Expect both to start running.
select {
@@ -230,7 +230,7 @@
Signal(ctx, SignalHealthy)
Signal(ctx, SignalDone)
return nil
- })
+ }, WithPropagatePanic)
two.becomeHealthy()
// Expect one to start running.
@@ -275,7 +275,7 @@
Signal(ctx, SignalHealthy)
Signal(ctx, SignalDone)
return nil
- })
+ }, WithPropagatePanic)
two.becomeHealthy()
// Expect one to start running.
@@ -296,18 +296,12 @@
// And one should start running again.
select {
case <-h1:
- case <-time.After(110 * time.Millisecond):
+ case <-time.After(100 * time.Millisecond):
t.Fatalf("runnable 'one' didn't restart on time")
}
}
func TestPanic(t *testing.T) {
- catchPanicPrev := flagCatchPanic
- flagCatchPanic = true
- defer func() {
- flagCatchPanic = catchPanicPrev
- }()
-
h1 := make(chan struct{})
d1 := make(chan struct{})
two := newRC()
@@ -367,7 +361,7 @@
Signal(ctx, SignalHealthy)
Signal(ctx, SignalDone)
return nil
- })
+ }, WithPropagatePanic)
}
func TestBackoff(t *testing.T) {
@@ -382,7 +376,7 @@
Signal(ctx, SignalHealthy)
Signal(ctx, SignalDone)
return nil
- })
+ }, WithPropagatePanic)
one.becomeHealthy()
// Die a bunch of times in a row, this brings up the next exponential backoff to over a second.