//core/internal/supervisor: drop panic propagation flag This made the race detector unhappy (for a good reason), and was actually unused. The only place where we do want panic propagation is in tests, and making it configurable by an option passed to New() is much more friendly, anyway. Test Plan: Behaviour unchanged, covered by existing tests. X-Origin-Diff: phab/D490 GitOrigin-RevId: 465a8244445906bbb12e8fec13ccab0c87ab50f6

commit: 19bb4125a7eb155a51143046a8501b40702aa650 [log] [tgz]
author: Serge Bazanski <serge@nexantic.com> Mon May 04 17:57:50 2020 +0200
committer: Serge Bazanski <serge@nexantic.com> Mon May 04 17:57:50 2020 +0200
tree: 3b3bbc665edc254ba901baaf883ac96712125c18
parent: f64021170952839c39f25e13e8771d8e377af898 [diff]
diff --git a/core/internal/common/supervisor/supervisor.go b/core/internal/common/supervisor/supervisor.go
index 78fa5d2..e839d0a 100644
--- a/core/internal/common/supervisor/supervisor.go
+++ b/core/internal/common/supervisor/supervisor.go

@@ -93,16 +93,35 @@
 
 	// pReq is an interface channel to the lifecycle processor of the supervisor.
 	pReq chan *processorRequest
+
+	// propagate panics, ie. don't catch them.
+	propagatePanic bool
 }
 
+// SupervisorOpt are runtime configurable options for the supervisor.
+type SupervisorOpt func(s *supervisor)
+
+var (
+	// WithPropagatePanic prevents the Supervisor from catching panics in runnables and treating them as failures.
+	// This is useful to enable for testing and local debugging.
+	WithPropagatePanic = func(s *supervisor) {
+		s.propagatePanic = true
+	}
+)
+
 // New creates a new supervisor with its root running the given root runnable.
 // The given context can be used to cancel the entire supervision tree.
-func New(ctx context.Context, logger *zap.Logger, rootRunnable Runnable) {
+func New(ctx context.Context, logger *zap.Logger, rootRunnable Runnable, opts ...SupervisorOpt) {
 	sup := &supervisor{
 		logger:  logger,
 		ilogger: logger.Named("supervisor"),
 		pReq:    make(chan *processorRequest),
 	}
+
+	for _, o := range opts {
+		o(sup)
+	}
+
 	sup.root = newNode("root", rootRunnable, sup, nil)
 
 	go sup.processor(ctx)

diff --git a/core/internal/common/supervisor/supervisor_processor.go b/core/internal/common/supervisor/supervisor_processor.go
index b1d92a4..3fcaef7 100644
--- a/core/internal/common/supervisor/supervisor_processor.go
+++ b/core/internal/common/supervisor/supervisor_processor.go

@@ -19,7 +19,6 @@
 import (
 	"context"
 	"errors"
-	"flag"
 	"fmt"
 	"runtime/debug"
 	"time"
@@ -31,16 +30,6 @@
 // record the result and act accordingly. It is also responsible for detecting and acting upon supervision subtrees that
 // need to be restarted after death (via a 'GC' process)
 
-// flagCatchPanic is a global flag that configures whether panics from runnables are handled and treated as errors, or
-// cause a panic of the entire supervisor.
-// For production use cases, you likely want to catch panics.
-// For debugging and tests, you likely want panics to bubble up and abort the entire supervisor early.
-var flagCatchPanic = true
-
-func init() {
-	flag.BoolVar(&flagCatchPanic, "catch_panic", flagCatchPanic, "Catch service/runnable panics - disable this for development or testing")
-}
-
 // processorRequest is a request for the processor. Only one of the fields can be set.
 type processorRequest struct {
 	schedule *processorRequestSchedule
@@ -133,7 +122,7 @@
 
 	n := s.nodeByDN(r.dn)
 	go func() {
-		if flagCatchPanic {
+		if !s.propagatePanic {
 			defer func() {
 				if rec := recover(); rec != nil {
 					s.pReq <- &processorRequest{

diff --git a/core/internal/common/supervisor/supervisor_test.go b/core/internal/common/supervisor/supervisor_test.go
index 1b440a5..6a9bf42 100644
--- a/core/internal/common/supervisor/supervisor_test.go
+++ b/core/internal/common/supervisor/supervisor_test.go

@@ -196,7 +196,7 @@
 		Signal(ctx, SignalHealthy)
 		Signal(ctx, SignalDone)
 		return nil
-	})
+	}, WithPropagatePanic)
 
 	// Expect both to start running.
 	select {
@@ -230,7 +230,7 @@
 		Signal(ctx, SignalHealthy)
 		Signal(ctx, SignalDone)
 		return nil
-	})
+	}, WithPropagatePanic)
 
 	two.becomeHealthy()
 	// Expect one to start running.
@@ -275,7 +275,7 @@
 		Signal(ctx, SignalHealthy)
 		Signal(ctx, SignalDone)
 		return nil
-	})
+	}, WithPropagatePanic)
 
 	two.becomeHealthy()
 	// Expect one to start running.
@@ -296,18 +296,12 @@
 	// And one should start running again.
 	select {
 	case <-h1:
-	case <-time.After(110 * time.Millisecond):
+	case <-time.After(100 * time.Millisecond):
 		t.Fatalf("runnable 'one' didn't restart on time")
 	}
 }
 
 func TestPanic(t *testing.T) {
-	catchPanicPrev := flagCatchPanic
-	flagCatchPanic = true
-	defer func() {
-		flagCatchPanic = catchPanicPrev
-	}()
-
 	h1 := make(chan struct{})
 	d1 := make(chan struct{})
 	two := newRC()
@@ -367,7 +361,7 @@
 		Signal(ctx, SignalHealthy)
 		Signal(ctx, SignalDone)
 		return nil
-	})
+	}, WithPropagatePanic)
 }
 
 func TestBackoff(t *testing.T) {
@@ -382,7 +376,7 @@
 		Signal(ctx, SignalHealthy)
 		Signal(ctx, SignalDone)
 		return nil
-	})
+	}, WithPropagatePanic)
 
 	one.becomeHealthy()
 	// Die a bunch of times in a row, this brings up the next exponential backoff to over a second.
commit	19bb4125a7eb155a51143046a8501b40702aa650	[log] [tgz]
author	Serge Bazanski <serge@nexantic.com>	Mon May 04 17:57:50 2020 +0200
committer	Serge Bazanski <serge@nexantic.com>	Mon May 04 17:57:50 2020 +0200
tree	3b3bbc665edc254ba901baaf883ac96712125c18
parent	f64021170952839c39f25e13e8771d8e377af898 [diff]