m/n/core/curator: fix listener stuckness on restarts

This makes both gRPC listener runnables (local and public) manage their
own listening sockets, allowing them to restart independently of
eachother, and making sure that any listening sockets are cleaned up.

We also fix the existing curator test (which does not exercise the
listeners, just leadership election) to place the curators and their
local sockets in /tmp instead of the default bazel tempdir (as a path
based on that is longer than the maximum domain socket path). This makes
these tests slightly less noisy (as they kept crashing while not being
able to listen to the local socket).

This should've been caught by a curator listener test, if we had one
(other than the e2e test). I'm growing keen on spending some time
writing enough of a harness to actually do that. Maybe once we have a
follower implementation ready…

Change-Id: I0267292781b6ee8aff1d0557d420bbaa3c3d79f6
Reviewed-on: https://review.monogon.dev/c/monogon/+/304
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/node/core/curator/curator_test.go b/metropolis/node/core/curator/curator_test.go
index 14c705a..c67fc99 100644
--- a/metropolis/node/core/curator/curator_test.go
+++ b/metropolis/node/core/curator/curator_test.go
@@ -77,7 +77,7 @@
 
 	// Create ephemeral directory for curator and place it into /tmp.
 	dir := localstorage.EphemeralCuratorDirectory{}
-	tmp, err := ioutil.TempDir("", "curator-test-*")
+	tmp, err := ioutil.TempDir("/tmp", "curator-test-*")
 	if err != nil {
 		t.Fatalf("TempDir: %v", err)
 	}
diff --git a/metropolis/node/core/curator/listener.go b/metropolis/node/core/curator/listener.go
index e92ca0d..3b98315 100644
--- a/metropolis/node/core/curator/listener.go
+++ b/metropolis/node/core/curator/listener.go
@@ -198,35 +198,46 @@
 
 // run is the listener runnable. It listens on gRPC sockets and serves RPCs.
 func (l *listener) run(ctx context.Context) error {
-	supervisor.Logger(ctx).Info("Listener starting...")
+	supervisor.Logger(ctx).Info("Listeners starting...")
 	if err := supervisor.Run(ctx, "dispatcher", l.dispatcher); err != nil {
 		return fmt.Errorf("when starting dispatcher: %w", err)
 	}
 
-	// TODO(q3k): recreate socket if already exists? Is this needed?
-	lisLocal, err := net.ListenUnix("unix", &net.UnixAddr{Name: l.directory.ClientSocket.FullPath(), Net: "unix"})
-	if err != nil {
-		return fmt.Errorf("failed to listen on local curator socket: %w", err)
-	}
-	lisPublic, err := net.Listen("tcp", fmt.Sprintf(":%d", node.CuratorServicePort))
-	if err != nil {
-		return fmt.Errorf("failed to listen on public curator socket: %w", err)
-	}
-
 	srvLocal := grpc.NewServer()
 	srvPublic := grpc.NewServer(grpc.Creds(l.publicCreds))
 
 	cpb.RegisterCuratorServer(srvLocal, l)
 	apb.RegisterAAAServer(srvPublic, l)
 
-	if err := supervisor.Run(ctx, "local", supervisor.GRPCServer(srvLocal, lisLocal, true)); err != nil {
+	err := supervisor.Run(ctx, "local", func(ctx context.Context) error {
+		lisLocal, err := net.ListenUnix("unix", &net.UnixAddr{Name: l.directory.ClientSocket.FullPath(), Net: "unix"})
+		if err != nil {
+			return fmt.Errorf("failed to listen: %w", err)
+		}
+		defer lisLocal.Close()
+
+		runnable := supervisor.GRPCServer(srvLocal, lisLocal, true)
+		return runnable(ctx)
+	})
+	if err != nil {
 		return fmt.Errorf("while starting local gRPC listener: %w", err)
 	}
-	if err := supervisor.Run(ctx, "public", supervisor.GRPCServer(srvPublic, lisPublic, true)); err != nil {
+
+	err = supervisor.Run(ctx, "public", func(ctx context.Context) error {
+		lisPublic, err := net.Listen("tcp", fmt.Sprintf(":%d", node.CuratorServicePort))
+		if err != nil {
+			return fmt.Errorf("failed to listen on public curator socket: %w", err)
+		}
+		defer lisPublic.Close()
+
+		runnable := supervisor.GRPCServer(srvPublic, lisPublic, true)
+		return runnable(ctx)
+	})
+	if err != nil {
 		return fmt.Errorf("while starting public gRPC listener: %w", err)
 	}
 
-	supervisor.Logger(ctx).Info("Listeners running.")
+	supervisor.Logger(ctx).Info("Listeners started.")
 	supervisor.Signal(ctx, supervisor.SignalHealthy)
 
 	// Keep the listener running, as its a parent to the gRPC listener.