m/t/launch: use cluster resolver
This makes the cluster launch framework use a resolver to connect to
cluster nodes after credential escrow has been performed.
Change-Id: I09b0ec50bdb758e0c91e505a3c51839bb274f959
Reviewed-on: https://review.monogon.dev/c/monogon/+/797
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/test/launch/cluster/cluster.go b/metropolis/test/launch/cluster/cluster.go
index 7b5a211..d0325ce 100644
--- a/metropolis/test/launch/cluster/cluster.go
+++ b/metropolis/test/launch/cluster/cluster.go
@@ -30,9 +30,9 @@
"source.monogon.dev/metropolis/cli/pkg/datafile"
"source.monogon.dev/metropolis/node"
- common "source.monogon.dev/metropolis/node"
"source.monogon.dev/metropolis/node/core/identity"
"source.monogon.dev/metropolis/node/core/rpc"
+ "source.monogon.dev/metropolis/node/core/rpc/resolver"
apb "source.monogon.dev/metropolis/proto/api"
cpb "source.monogon.dev/metropolis/proto/common"
"source.monogon.dev/metropolis/test/launch"
@@ -168,13 +168,27 @@
}, nil
}
-// curatorClient returns an authenticated owner connection to a Curator
+// CuratorClient returns an authenticated owner connection to a Curator
// instance within Cluster c, or nil together with an error.
-func (c *Cluster) curatorClient() (*grpc.ClientConn, error) {
+func (c *Cluster) CuratorClient() (*grpc.ClientConn, error) {
if c.authClient == nil {
authCreds := rpc.NewAuthenticatedCredentials(c.Owner, nil)
- remote := net.JoinHostPort(c.NodeIDs[0], common.CuratorServicePort.PortString())
- authClient, err := grpc.Dial(remote, grpc.WithTransportCredentials(authCreds), grpc.WithContextDialer(c.DialNode))
+ r := resolver.New(c.ctxT)
+ r.SetLogger(func(f string, args ...interface{}) {
+ log.Printf("Cluster: client resolver: %s", fmt.Sprintf(f, args...))
+ })
+ for _, n := range c.NodeIDs {
+ ep, err := resolver.NodeWithDefaultPort(n)
+ if err != nil {
+ return nil, fmt.Errorf("could not add node %q by DNS: %v", n, err)
+ }
+ r.AddEndpoint(ep)
+ }
+ authClient, err := grpc.Dial(resolver.MetropolisControlAddress,
+ grpc.WithTransportCredentials(authCreds),
+ grpc.WithResolvers(r),
+ grpc.WithContextDialer(c.DialNode),
+ )
if err != nil {
return nil, fmt.Errorf("dialing with owner credentials failed: %w", err)
}
@@ -531,13 +545,14 @@
cert, err = rpc.RetrieveOwnerCertificate(ctx, aaa, InsecurePrivateKey)
if st, ok := status.FromError(err); ok {
if st.Code() == codes.Unavailable {
+ log.Printf("Cluster: cluster UNAVAILABLE: %v", st.Message())
return err
}
}
return backoff.Permanent(err)
}, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
if err != nil {
- return nil, nil, err
+ return nil, nil, fmt.Errorf("couldn't retrieve owner certificate: %w", err)
}
log.Printf("Cluster: retrieved owner certificate.")
@@ -707,10 +722,10 @@
// Now start the rest of the nodes and register them into the cluster.
// Get an authenticated owner client within the cluster.
- curC, err := cluster.curatorClient()
+ curC, err := cluster.CuratorClient()
if err != nil {
ctxC()
- return nil, fmt.Errorf("curatorClient: %w", err)
+ return nil, fmt.Errorf("CuratorClient: %w", err)
}
mgmt := apb.NewManagementClient(curC)
@@ -845,7 +860,7 @@
id := c.NodeIDs[idx]
// Get an authenticated owner client within the cluster.
- curC, err := c.curatorClient()
+ curC, err := c.CuratorClient()
if err != nil {
return err
}
@@ -891,8 +906,10 @@
for {
cs, err := getNode(ctx, mgmt, id)
if err != nil {
+ log.Printf("Cluster: node get error: %v", err)
return err
}
+ log.Printf("Cluster: node status: %+v", cs)
if cs.Status == nil {
continue
}