m/p/supervisor: allow signalling supervised processes Using Command.Process.Signal to send signals to supervised processes creates a data race between the Start function (called by Run) which populates Command.Process and the access to that field. Introduce a new option for RunCommand which takes a signal channel and internally solves this race by first calling Start (at which point Command.Process is populated and will not be mutated) and then spawning a Goroutine which does the actual sending. This is safe at least on Linux since Go 1.7 [1]. [1] https://github.com/golang/go/commit/cea29c4a358004d84d8711a07628c2f856b381e8 Change-Id: Ibb76ef8a0434574c4df9ff85545987026518d8c0 Reviewed-on: https://review.monogon.dev/c/monogon/+/1540 Tested-by: Jenkins CI Reviewed-by: Serge Bazanski <serge@monogon.tech>

commit: e3032bd090603b0344727647028d03273bd7d3f5 [log] [tgz]
author: Lorenz Brun <lorenz@monogon.tech> Thu Apr 13 14:43:41 2023 +0200
committer: Lorenz Brun <lorenz@monogon.tech> Tue Apr 18 11:48:46 2023 +0000
tree: 685a127ebd2955e1d586cc0a1277b8d464788fc8
parent: bdc803b54b55b61bb07cd75ba966320e42a9c3b7 [diff]
diff --git a/metropolis/pkg/supervisor/supervisor_support.go b/metropolis/pkg/supervisor/supervisor_support.go
index 699ea18..e7b5d34 100644
--- a/metropolis/pkg/supervisor/supervisor_support.go
+++ b/metropolis/pkg/supervisor/supervisor_support.go

@@ -21,7 +21,9 @@
 
 import (
 	"context"
+	"errors"
 	"net"
+	"os"
 	"os/exec"
 
 	"google.golang.org/grpc"
@@ -63,10 +65,14 @@
 	Signal(ctx, SignalHealthy)
 
 	var parseKLog bool
+	var signal <-chan os.Signal
 	for _, opt := range opts {
 		if opt.parseKlog {
 			parseKLog = true
 		}
+		if opt.signal != nil {
+			signal = opt.signal
+		}
 	}
 
 	if parseKLog {
@@ -83,13 +89,40 @@
 		cmd.Stdout = RawLogger(ctx)
 		cmd.Stderr = RawLogger(ctx)
 	}
-	err := cmd.Run()
+	err := cmd.Start()
+	if err != nil {
+		return err
+	}
+
+	exited := make(chan struct{})
+	if signal != nil {
+		go func() {
+			for {
+				var err error
+				select {
+				case s := <-signal:
+					err = cmd.Process.Signal(s)
+				case <-exited:
+					return
+				}
+				if err != nil && !errors.Is(err, os.ErrProcessDone) {
+					Logger(ctx).Warningf("Failed sending signal to process: %v", err)
+				}
+			}
+		}()
+	}
+
+	err = cmd.Wait()
+	if signal != nil {
+		exited <- struct{}{}
+	}
 	Logger(ctx).Infof("Command returned: %v", err)
 	return err
 }
 
 type RunCommandOption struct {
 	parseKlog bool
+	signal    <-chan os.Signal
 }
 
 // ParseKLog signals that the command being run will return klog-compatible
@@ -100,3 +133,31 @@
 		parseKlog: true,
 	}
 }
+
+// SignalChan takes a channel which can be used to send signals to the
+// supervised process.
+//
+// The given channel will be read from as long as the underlying process is
+// running. If the process doesn't start successfully the channel will not be
+// read. When the process exits, the channel will stop being read.
+//
+// With the above in mind, and also taking into account the inherent lack of
+// reliability in delivering any process-handled signals in POSIX/Linux, it is
+// recommended to use unbuffered channels, always write to them in a non-blocking
+// fashion (eg. in a select { ... default: } block), and to not rely only on the
+// signal delivery mechanism for the intended behaviour.
+//
+// For example, if the signals are used to trigger some configuration reload,
+// these configuration reloads should either be verified and signal delivery should
+// be retried until confirmed successful, or there should be a backup periodic
+// reload performed by the target process independently of signal-based reload
+// triggers.
+//
+// Another example: if the signal delivered is a SIGTERM used to gracefully
+// terminate some process, it should be attempted to be delivered a number of
+// times before finally SIGKILLing the process.
+func SignalChan(s <-chan os.Signal) RunCommandOption {
+	return RunCommandOption{
+		signal: s,
+	}
+}
commit	e3032bd090603b0344727647028d03273bd7d3f5	[log] [tgz]
author	Lorenz Brun <lorenz@monogon.tech>	Thu Apr 13 14:43:41 2023 +0200
committer	Lorenz Brun <lorenz@monogon.tech>	Tue Apr 18 11:48:46 2023 +0000
tree	685a127ebd2955e1d586cc0a1277b8d464788fc8
parent	bdc803b54b55b61bb07cd75ba966320e42a9c3b7 [diff]