m/n/c/n/dns: fix race condition

This fixes a race condition where updates could be lost while CoreDNS
is in the process of starting.

Change-Id: I87e83e2fd2de1bba456f135697815f0ddeb226d9
Reviewed-on: https://review.monogon.dev/c/monogon/+/1520
Tested-by: Jenkins CI
Reviewed-by: Leopold Schabel <leo@monogon.tech>
diff --git a/metropolis/node/core/network/dns/BUILD.bazel b/metropolis/node/core/network/dns/BUILD.bazel
index 7d5419e..4dccf49 100644
--- a/metropolis/node/core/network/dns/BUILD.bazel
+++ b/metropolis/node/core/network/dns/BUILD.bazel
@@ -11,6 +11,7 @@
     deps = [
         "//metropolis/pkg/fileargs",
         "//metropolis/pkg/supervisor",
+        "@org_golang_x_sys//unix",
     ],
 )
 
diff --git a/metropolis/node/core/network/dns/coredns.go b/metropolis/node/core/network/dns/coredns.go
index b57c080..f865403 100644
--- a/metropolis/node/core/network/dns/coredns.go
+++ b/metropolis/node/core/network/dns/coredns.go
@@ -21,10 +21,12 @@
 	"bytes"
 	"context"
 	"fmt"
+	"os"
 	"os/exec"
 	"strings"
 	"sync"
-	"syscall"
+
+	"golang.org/x/sys/unix"
 
 	"source.monogon.dev/metropolis/pkg/fileargs"
 	"source.monogon.dev/metropolis/pkg/supervisor"
@@ -39,6 +41,7 @@
 	
     cache 30
     loadbalance
+    reload 10s
 `
 
 type Service struct {
@@ -46,6 +49,7 @@
 	directives            map[string]ExtraDirective
 	cmd                   *exec.Cmd
 	args                  *fileargs.FileArgs
+	signalChan            chan os.Signal
 	// stateMu guards access to the directives, cmd and args fields
 	stateMu sync.Mutex
 }
@@ -59,6 +63,7 @@
 	return &Service{
 		directives:            map[string]ExtraDirective{},
 		directiveRegistration: directiveRegistration,
+		signalChan:            make(chan os.Signal),
 	}
 }
 
@@ -115,7 +120,7 @@
 	}
 
 	s.stateMu.Unlock()
-	return supervisor.RunCommand(ctx, s.cmd)
+	return supervisor.RunCommand(ctx, s.cmd, supervisor.SignalChan(s.signalChan))
 }
 
 // runRegistration runs the background registration runnable which has a
@@ -141,12 +146,17 @@
 	} else {
 		s.directives[d.ID] = *d
 	}
-	// If the process is not currenty running we're relying on corefile
-	// regeneration on startup
-	if s.cmd != nil && s.cmd.Process != nil && s.cmd.ProcessState == nil {
-		s.args.ArgPath("Corefile", s.makeCorefile(s.args))
-		if err := s.cmd.Process.Signal(syscall.SIGUSR1); err != nil {
-			supervisor.Logger(ctx).Warningf("Failed to send SIGUSR1 to CoreDNS for reload: %v", err)
-		}
+	s.args.ArgPath("Corefile", s.makeCorefile(s.args))
+	if s.args.Error() != nil {
+		supervisor.Logger(ctx).Errorf("error creating new Corefile: %v", s.args.Error())
+	}
+	// If the signal sending thread is not ready, do nothing. Sending signals is
+	// unreliable anyways as the handler might not be installed yet or another
+	// reload might be in progress. Doing it this way saves a significant amount
+	// of complexity.
+	select {
+	case s.signalChan <- unix.SIGUSR1:
+	default:
+		supervisor.Logger(ctx).Infof("Reload signal could not be sent, relying on restart/reload to pick up changes")
 	}
 }