m/t/launch: use cluster resolver

This makes the cluster launch framework use a resolver to connect to
cluster nodes after credential escrow has been performed.

Change-Id: I09b0ec50bdb758e0c91e505a3c51839bb274f959
Reviewed-on: https://review.monogon.dev/c/monogon/+/797
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/test/launch/cluster/cluster.go b/metropolis/test/launch/cluster/cluster.go
index 7b5a211..d0325ce 100644
--- a/metropolis/test/launch/cluster/cluster.go
+++ b/metropolis/test/launch/cluster/cluster.go
@@ -30,9 +30,9 @@
 
 	"source.monogon.dev/metropolis/cli/pkg/datafile"
 	"source.monogon.dev/metropolis/node"
-	common "source.monogon.dev/metropolis/node"
 	"source.monogon.dev/metropolis/node/core/identity"
 	"source.monogon.dev/metropolis/node/core/rpc"
+	"source.monogon.dev/metropolis/node/core/rpc/resolver"
 	apb "source.monogon.dev/metropolis/proto/api"
 	cpb "source.monogon.dev/metropolis/proto/common"
 	"source.monogon.dev/metropolis/test/launch"
@@ -168,13 +168,27 @@
 	}, nil
 }
 
-// curatorClient returns an authenticated owner connection to a Curator
+// CuratorClient returns an authenticated owner connection to a Curator
 // instance within Cluster c, or nil together with an error.
-func (c *Cluster) curatorClient() (*grpc.ClientConn, error) {
+func (c *Cluster) CuratorClient() (*grpc.ClientConn, error) {
 	if c.authClient == nil {
 		authCreds := rpc.NewAuthenticatedCredentials(c.Owner, nil)
-		remote := net.JoinHostPort(c.NodeIDs[0], common.CuratorServicePort.PortString())
-		authClient, err := grpc.Dial(remote, grpc.WithTransportCredentials(authCreds), grpc.WithContextDialer(c.DialNode))
+		r := resolver.New(c.ctxT)
+		r.SetLogger(func(f string, args ...interface{}) {
+			log.Printf("Cluster: client resolver: %s", fmt.Sprintf(f, args...))
+		})
+		for _, n := range c.NodeIDs {
+			ep, err := resolver.NodeWithDefaultPort(n)
+			if err != nil {
+				return nil, fmt.Errorf("could not add node %q by DNS: %v", n, err)
+			}
+			r.AddEndpoint(ep)
+		}
+		authClient, err := grpc.Dial(resolver.MetropolisControlAddress,
+			grpc.WithTransportCredentials(authCreds),
+			grpc.WithResolvers(r),
+			grpc.WithContextDialer(c.DialNode),
+		)
 		if err != nil {
 			return nil, fmt.Errorf("dialing with owner credentials failed: %w", err)
 		}
@@ -531,13 +545,14 @@
 		cert, err = rpc.RetrieveOwnerCertificate(ctx, aaa, InsecurePrivateKey)
 		if st, ok := status.FromError(err); ok {
 			if st.Code() == codes.Unavailable {
+				log.Printf("Cluster: cluster UNAVAILABLE: %v", st.Message())
 				return err
 			}
 		}
 		return backoff.Permanent(err)
 	}, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, fmt.Errorf("couldn't retrieve owner certificate: %w", err)
 	}
 	log.Printf("Cluster: retrieved owner certificate.")
 
@@ -707,10 +722,10 @@
 	// Now start the rest of the nodes and register them into the cluster.
 
 	// Get an authenticated owner client within the cluster.
-	curC, err := cluster.curatorClient()
+	curC, err := cluster.CuratorClient()
 	if err != nil {
 		ctxC()
-		return nil, fmt.Errorf("curatorClient: %w", err)
+		return nil, fmt.Errorf("CuratorClient: %w", err)
 	}
 	mgmt := apb.NewManagementClient(curC)
 
@@ -845,7 +860,7 @@
 	id := c.NodeIDs[idx]
 
 	// Get an authenticated owner client within the cluster.
-	curC, err := c.curatorClient()
+	curC, err := c.CuratorClient()
 	if err != nil {
 		return err
 	}
@@ -891,8 +906,10 @@
 	for {
 		cs, err := getNode(ctx, mgmt, id)
 		if err != nil {
+			log.Printf("Cluster: node get error: %v", err)
 			return err
 		}
+		log.Printf("Cluster: node status: %+v", cs)
 		if cs.Status == nil {
 			continue
 		}