m/n/c/r/resolver: allow disabling curator updater

This allows some resolvers to not attempt to contact the cluster for
curator node updates. We use this in the Join and Register resolvers as
they don't have permission to access this data anywa.

We also generalize Resolver options into a proper WithX setup. We also
use this opportunity to move the resolver creation in node code outside
of the roleserver, as it should have been in the first place.

Change-Id: I1cc227711d784e07959371873029e09fc8cd1b99
Reviewed-on: https://review.monogon.dev/c/monogon/+/808
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/node/core/BUILD.bazel b/metropolis/node/core/BUILD.bazel
index 26ff5d5..37897e5 100644
--- a/metropolis/node/core/BUILD.bazel
+++ b/metropolis/node/core/BUILD.bazel
@@ -28,6 +28,7 @@
         "//metropolis/node/core/network",
         "//metropolis/node/core/network/hostsfile",
         "//metropolis/node/core/roleserve",
+        "//metropolis/node/core/rpc/resolver",
         "//metropolis/node/core/time",
         "//metropolis/pkg/logtree",
         "//metropolis/pkg/pstore",
diff --git a/metropolis/node/core/cluster/cluster_join.go b/metropolis/node/core/cluster/cluster_join.go
index 47f7589..2daea52 100644
--- a/metropolis/node/core/cluster/cluster_join.go
+++ b/metropolis/node/core/cluster/cluster_join.go
@@ -41,10 +41,11 @@
 	// Build resolver used by the join process, authenticating with join
 	// credentials. Once the join is complete, the rolesever will start its own
 	// long-term resolver.
-	r := resolver.New(ctx)
-	r.SetLogger(func(f string, args ...interface{}) {
+	rctx, rctxC := context.WithCancel(ctx)
+	defer rctxC()
+	r := resolver.New(rctx, resolver.WithoutCuratorUpdater(), resolver.WithLogger(func(f string, args ...interface{}) {
 		supervisor.Logger(ctx).WithAddedStackDepth(1).Infof(f, args...)
-	})
+	}))
 	addedNodes := 0
 	for _, node := range cd.Nodes {
 		if len(node.Addresses) == 0 {
diff --git a/metropolis/node/core/cluster/cluster_register.go b/metropolis/node/core/cluster/cluster_register.go
index 9e67000..7bd88b6 100644
--- a/metropolis/node/core/cluster/cluster_register.go
+++ b/metropolis/node/core/cluster/cluster_register.go
@@ -100,10 +100,11 @@
 	// Build resolver used by the register process, authenticating with ephemeral
 	// credentials. Once the join is complete, the rolesever will start its own
 	// long-term resolver.
-	r := resolver.New(ctx)
-	r.SetLogger(func(f string, args ...interface{}) {
+	rctx, rctxC := context.WithCancel(ctx)
+	defer rctxC()
+	r := resolver.New(rctx, resolver.WithoutCuratorUpdater(), resolver.WithLogger(func(f string, args ...interface{}) {
 		supervisor.Logger(ctx).WithAddedStackDepth(1).Infof(f, args...)
-	})
+	}))
 	addedNodes := 0
 	for _, node := range register.ClusterDirectory.Nodes {
 		if len(node.Addresses) == 0 {
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index ca71fa7..c6b0bd3 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -30,6 +30,7 @@
 	"source.monogon.dev/metropolis/node/core/network"
 	"source.monogon.dev/metropolis/node/core/network/hostsfile"
 	"source.monogon.dev/metropolis/node/core/roleserve"
+	"source.monogon.dev/metropolis/node/core/rpc/resolver"
 	timesvc "source.monogon.dev/metropolis/node/core/time"
 	"source.monogon.dev/metropolis/pkg/logtree"
 	"source.monogon.dev/metropolis/pkg/supervisor"
@@ -102,6 +103,11 @@
 	// Make context for supervisor. We cancel it when we reach the trapdoor.
 	ctxS, ctxC := context.WithCancel(context.Background())
 
+	// Make node-wide cluster resolver.
+	res := resolver.New(ctxS, resolver.WithLogger(func(f string, args ...interface{}) {
+		lt.MustLeveledFor("resolver").WithAddedStackDepth(1).Infof(f, args...)
+	}))
+
 	// Start root initialization code as a supervisor one-shot runnable. This
 	// means waiting for the network, starting the cluster manager, and then
 	// starting all services related to the node's roles.
@@ -127,6 +133,7 @@
 		rs := roleserve.New(roleserve.Config{
 			StorageRoot: root,
 			Network:     networkSvc,
+			Resolver:    res,
 		})
 		if err := supervisor.Run(ctx, "role", rs.Run); err != nil {
 			close(trapdoor)
diff --git a/metropolis/node/core/roleserve/roleserve.go b/metropolis/node/core/roleserve/roleserve.go
index 23076a4..5a0e2f6 100644
--- a/metropolis/node/core/roleserve/roleserve.go
+++ b/metropolis/node/core/roleserve/roleserve.go
@@ -63,6 +63,12 @@
 
 	// Network is a handle to the network service, used by workloads.
 	Network *network.Service
+
+	// resolver is the main, long-lived, authenticated cluster resolver that is used
+	// for all subsequent gRPC calls by the subordinates of the roleserver. It is
+	// created early in the roleserver lifecycle, and is seeded with node
+	// information as the first subordinate runs DialCurator().
+	Resolver *resolver.Resolver
 }
 
 // Service is the roleserver/“Role Server” service. See the package-level
@@ -80,23 +86,12 @@
 	heartbeat    *workerHeartbeat
 	kubernetes   *workerKubernetes
 	rolefetch    *workerRoleFetch
-
-	// resolver is the main, long-lived, authenticated cluster resolver that is used
-	// for all subsequent gRPC calls by the subordinates of the roleserver. It is
-	// created early in the roleserver lifecycle, and is seeded with node
-	// information as the first subordinate runs DialCurator().
-	resolver *resolver.Resolver
 }
 
 // New creates a Role Server services from a Config.
 func New(c Config) *Service {
-	// Run the resolver forever in the background, making sure to keep it as
-	// long-lived as possible.
-	rctx := context.Background()
-
 	s := &Service{
-		Config:   c,
-		resolver: resolver.New(rctx),
+		Config: c,
 	}
 	s.controlPlane = &workerControlPlane{
 		storageRoot: s.StorageRoot,
@@ -104,7 +99,7 @@
 		bootstrapData:     &s.bootstrapData,
 		clusterMembership: &s.ClusterMembership,
 		localRoles:        &s.localRoles,
-		resolver:          s.resolver,
+		resolver:          s.Resolver,
 	}
 
 	s.statusPush = &workerStatusPush{
@@ -144,11 +139,11 @@
 
 	// This is the first time we have the node ID, tell the resolver that it's
 	// available on the loopback interface.
-	s.resolver.AddOverride(nid, resolver.NodeByHostPort("127.0.0.1", uint16(common.CuratorServicePort)))
+	s.Resolver.AddOverride(nid, resolver.NodeByHostPort("127.0.0.1", uint16(common.CuratorServicePort)))
 
 	s.ClusterMembership.set(&ClusterMembership{
 		pubkey:   pubkey,
-		resolver: s.resolver,
+		resolver: s.Resolver,
 	})
 	s.bootstrapData.set(&bootstrapData{
 		nodePrivateKey:     privkey,
@@ -162,36 +157,32 @@
 func (s *Service) ProvideRegisterData(credentials identity.NodeCredentials, directory *cpb.ClusterDirectory) {
 	// This is the first time we have the node ID, tell the resolver that it's
 	// available on the loopback interface.
-	s.resolver.AddOverride(credentials.ID(), resolver.NodeByHostPort("127.0.0.1", uint16(common.CuratorServicePort)))
+	s.Resolver.AddOverride(credentials.ID(), resolver.NodeByHostPort("127.0.0.1", uint16(common.CuratorServicePort)))
 
 	s.ClusterMembership.set(&ClusterMembership{
 		remoteCurators: directory,
 		credentials:    &credentials,
 		pubkey:         credentials.PublicKey(),
-		resolver:       s.resolver,
+		resolver:       s.Resolver,
 	})
 }
 
 func (s *Service) ProvideJoinData(credentials identity.NodeCredentials, directory *cpb.ClusterDirectory) {
 	// This is the first time we have the node ID, tell the resolver that it's
 	// available on the loopback interface.
-	s.resolver.AddOverride(credentials.ID(), resolver.NodeByHostPort("127.0.0.1", uint16(common.CuratorServicePort)))
+	s.Resolver.AddOverride(credentials.ID(), resolver.NodeByHostPort("127.0.0.1", uint16(common.CuratorServicePort)))
 
 	s.ClusterMembership.set(&ClusterMembership{
 		remoteCurators: directory,
 		credentials:    &credentials,
 		pubkey:         credentials.PublicKey(),
-		resolver:       s.resolver,
+		resolver:       s.Resolver,
 	})
 }
 
 // Run the Role Server service, which uses intermediary workload launchers to
 // start/stop subordinate services as the Node's roles change.
 func (s *Service) Run(ctx context.Context) error {
-	s.resolver.SetLogger(func(f string, args ...interface{}) {
-		supervisor.Logger(ctx).WithAddedStackDepth(2).Infof(f, args...)
-	})
-
 	supervisor.Run(ctx, "controlplane", s.controlPlane.run)
 	supervisor.Run(ctx, "kubernetes", s.kubernetes.run)
 	supervisor.Run(ctx, "statuspush", s.statusPush.run)
diff --git a/metropolis/node/core/rpc/resolver/processor.go b/metropolis/node/core/rpc/resolver/processor.go
index 08f56b5..cc95896 100644
--- a/metropolis/node/core/rpc/resolver/processor.go
+++ b/metropolis/node/core/rpc/resolver/processor.go
@@ -175,7 +175,9 @@
 		case req.ds != nil:
 			// Dial options Set
 			if !running {
-				go r.runCuratorUpdater(ctx, req.ds.options)
+				if !r.noCuratorUpdater {
+					go r.runCuratorUpdater(ctx, req.ds.options)
+				}
 				go r.runLeaderUpdater(ctx, req.ds.options)
 			}
 			running = true
diff --git a/metropolis/node/core/rpc/resolver/resolver.go b/metropolis/node/core/rpc/resolver/resolver.go
index 41fa8a1..f5c011f 100644
--- a/metropolis/node/core/rpc/resolver/resolver.go
+++ b/metropolis/node/core/rpc/resolver/resolver.go
@@ -81,27 +81,49 @@
 	// debug logs from the running ClusterResolver, subordinate watchers and
 	// updaters.
 	logger func(f string, args ...interface{})
-}
 
-// SetLogger configures a given function as the logger of the resolver. The
-// function should take a printf-style format string and arguments.
-func (r *Resolver) SetLogger(logger func(f string, args ...interface{})) {
-	r.logger = logger
+	// noCuratorUpdater makes the resolver not run a curator updater. This is used
+	// in one-shot resolvers which are given an ahead-of-time list of curators to
+	// attempt to contact, eg. joining and registering nodes.
+	noCuratorUpdater bool
 }
 
 // New starts a new Resolver, ready to be used as a gRPC via WithResolvers.
 // However, it needs to be populated with at least one endpoint first (via
 // AddEndpoint).
-func New(ctx context.Context) *Resolver {
+func New(ctx context.Context, opts ...ResolverOption) *Resolver {
 	r := &Resolver{
 		reqC:   make(chan *request),
 		ctx:    ctx,
 		logger: func(string, ...interface{}) {},
 	}
+	for _, opt := range opts {
+		opt(r)
+	}
 	go r.run(ctx)
 	return r
 }
 
+// ResolverOptions are passed to a Resolver being created.
+type ResolverOption func(r *Resolver)
+
+// WithLogger configures a given function as the logger of the resolver. The
+// function should take a printf-style format string and arguments.
+func WithLogger(logger func(f string, args ...interface{})) ResolverOption {
+	return func(r *Resolver) {
+		r.logger = logger
+	}
+}
+
+// WithoutCuratorUpdater configures the Resolver to not attmept to update
+// curators from the cluster. This is useful in one-shot resolvers, eg.
+// unauthenticated ones.
+func WithoutCuratorUpdater() ResolverOption {
+	return func(r *Resolver) {
+		r.noCuratorUpdater = true
+	}
+}
+
 // NodeEndpoint is the gRPC endpoint (host+port) of a Metropolis control plane
 // node.
 type NodeEndpoint struct {