m/c/metroctl: implement multi-node installation

This enables metroctl to include the ClusterRegister part of
NodeParameters in generated installer images, making it possible for
newly installed nodes to join an existing cluster.

Change-Id: I648207d70a4bec2ed7acf42e02f2b2c93319f559
Reviewed-on: https://review.monogon.dev/c/monogon/+/822
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/cli/metroctl/BUILD.bazel b/metropolis/cli/metroctl/BUILD.bazel
index 827a7ae..e8dc410 100644
--- a/metropolis/cli/metroctl/BUILD.bazel
+++ b/metropolis/cli/metroctl/BUILD.bazel
@@ -7,6 +7,7 @@
         "install.go",
         "k8scredplugin.go",
         "main.go",
+        "rpc.go",
         "takeownership.go",
     ],
     data = [
@@ -23,6 +24,7 @@
         "//metropolis/cli/pkg/datafile",
         "//metropolis/node",
         "//metropolis/node/core/rpc",
+        "//metropolis/node/core/rpc/resolver",
         "//metropolis/proto/api",
         "@com_github_adrg_xdg//:xdg",
         "@com_github_spf13_cobra//:cobra",
@@ -31,6 +33,7 @@
         "@io_k8s_client_go//tools/clientcmd",
         "@io_k8s_client_go//tools/clientcmd/api",
         "@org_golang_google_grpc//:go_default_library",
+        "@org_golang_x_net//proxy",
     ],
 )
 
diff --git a/metropolis/cli/metroctl/install.go b/metropolis/cli/metroctl/install.go
index 7ea36e3..a3657ae 100644
--- a/metropolis/cli/metroctl/install.go
+++ b/metropolis/cli/metroctl/install.go
@@ -2,6 +2,7 @@
 
 import (
 	"bytes"
+	"context"
 	"crypto/ed25519"
 	"crypto/rand"
 	_ "embed"
@@ -15,6 +16,7 @@
 	"github.com/spf13/cobra"
 
 	"source.monogon.dev/metropolis/cli/metroctl/core"
+	clicontext "source.monogon.dev/metropolis/cli/pkg/context"
 	"source.monogon.dev/metropolis/cli/pkg/datafile"
 	"source.monogon.dev/metropolis/proto/api"
 )
@@ -34,6 +36,12 @@
 	Run:     doGenUSB,
 }
 
+// bootstrap is a flag controlling node parameters included in the installer
+// image. If set, the installed node will bootstrap a new cluster. Otherwise,
+// it will try to connect to the cluster which endpoints were provided with
+// the --endpoints flag.
+var bootstrap bool
+
 // A PEM block type for a Metropolis initial owner private key
 const ownerKeyType = "METROPOLIS INITIAL OWNER PRIVATE KEY"
 
@@ -65,48 +73,85 @@
 		bundleSize = uint64(bundleStat.Size())
 	}
 
+	ctx := clicontext.WithInterrupt(context.Background())
+
 	// TODO(lorenz): Have a key management story for this
 	if err := os.MkdirAll(filepath.Join(xdg.ConfigHome, "metroctl"), 0700); err != nil {
 		log.Fatalf("Failed to create config directory: %v", err)
 	}
-	var ownerPublicKey ed25519.PublicKey
-	ownerPrivateKeyPEM, err := os.ReadFile(filepath.Join(xdg.ConfigHome, "metroctl/owner-key.pem"))
-	if os.IsNotExist(err) {
-		pub, priv, err := ed25519.GenerateKey(rand.Reader)
-		if err != nil {
-			log.Fatalf("Failed to generate owner private key: %v", err)
-		}
-		pemPriv := pem.EncodeToMemory(&pem.Block{Type: ownerKeyType, Bytes: priv})
-		if err := os.WriteFile(filepath.Join(xdg.ConfigHome, "metroctl/owner-key.pem"), pemPriv, 0600); err != nil {
-			log.Fatalf("Failed to store owner private key: %v", err)
-		}
-		ownerPublicKey = pub
-	} else if err != nil {
-		log.Fatalf("Failed to load owner private key: %v", err)
-	} else {
-		block, _ := pem.Decode(ownerPrivateKeyPEM)
-		if block == nil {
-			log.Fatalf("owner-key.pem contains invalid PEM")
-		}
-		if block.Type != ownerKeyType {
-			log.Fatalf("owner-key.pem contains a PEM block that's not a %v", ownerKeyType)
-		}
-		if len(block.Bytes) != ed25519.PrivateKeySize {
-			log.Fatal("owner-key.pem contains non-Ed25519 key")
-		}
-		ownerPrivateKey := ed25519.PrivateKey(block.Bytes)
-		ownerPublicKey = ownerPrivateKey.Public().(ed25519.PublicKey)
-	}
 
-	// TODO(lorenz): This can only bootstrap right now. As soon as @serge's role
-	// management has stabilized we can replace this with a proper
-	// implementation.
-	params := &api.NodeParameters{
-		Cluster: &api.NodeParameters_ClusterBootstrap_{
-			ClusterBootstrap: &api.NodeParameters_ClusterBootstrap{
-				OwnerPublicKey: ownerPublicKey,
+	var params *api.NodeParameters
+	if bootstrap {
+		var ownerPublicKey ed25519.PublicKey
+		ownerPrivateKeyPEM, err := os.ReadFile(filepath.Join(xdg.ConfigHome, "metroctl/owner-key.pem"))
+		if os.IsNotExist(err) {
+			pub, priv, err := ed25519.GenerateKey(rand.Reader)
+			if err != nil {
+				log.Fatalf("Failed to generate owner private key: %v", err)
+			}
+			pemPriv := pem.EncodeToMemory(&pem.Block{Type: ownerKeyType, Bytes: priv})
+			if err := os.WriteFile(filepath.Join(xdg.ConfigHome, "metroctl/owner-key.pem"), pemPriv, 0600); err != nil {
+				log.Fatalf("Failed to store owner private key: %v", err)
+			}
+			ownerPublicKey = pub
+		} else if err != nil {
+			log.Fatalf("Failed to load owner private key: %v", err)
+		} else {
+			block, _ := pem.Decode(ownerPrivateKeyPEM)
+			if block == nil {
+				log.Fatalf("owner-key.pem contains invalid PEM")
+			}
+			if block.Type != ownerKeyType {
+				log.Fatalf("owner-key.pem contains a PEM block that's not a %v", ownerKeyType)
+			}
+			if len(block.Bytes) != ed25519.PrivateKeySize {
+				log.Fatal("owner-key.pem contains non-Ed25519 key")
+			}
+			ownerPrivateKey := ed25519.PrivateKey(block.Bytes)
+			ownerPublicKey = ownerPrivateKey.Public().(ed25519.PublicKey)
+		}
+
+		params = &api.NodeParameters{
+			Cluster: &api.NodeParameters_ClusterBootstrap_{
+				ClusterBootstrap: &api.NodeParameters_ClusterBootstrap{
+					OwnerPublicKey: ownerPublicKey,
+				},
 			},
-		},
+		}
+	} else {
+		ocert, opkey, err := getCredentials()
+		if err == noCredentialsError {
+			log.Fatalf("In order to create a non-bootstrap node installer, you have to take ownership of the cluster first: %v", err)
+		}
+		if err != nil {
+			log.Fatalf("While retrieving owner credentials: %v", err)
+		}
+		if len(flags.clusterEndpoints) == 0 {
+			log.Fatal("At least one cluster endpoint is required while generating non-bootstrap installer images.")
+		}
+		cc, err := dialCluster(ctx, opkey, ocert, "", flags.clusterEndpoints)
+		if err != nil {
+			log.Fatalf("While dialing the cluster: %v", err)
+		}
+		mgmt := api.NewManagementClient(cc)
+		resT, err := mgmt.GetRegisterTicket(ctx, &api.GetRegisterTicketRequest{})
+		if err != nil {
+			log.Fatalf("While receiving register ticket: %v", err)
+		}
+		resI, err := mgmt.GetClusterInfo(ctx, &api.GetClusterInfoRequest{})
+		if err != nil {
+			log.Fatalf("While receiving cluster directory: %v", err)
+		}
+
+		params = &api.NodeParameters{
+			Cluster: &api.NodeParameters_ClusterRegister_{
+				ClusterRegister: &api.NodeParameters_ClusterRegister{
+					RegisterTicket:   resT.Ticket,
+					ClusterDirectory: resI.ClusterDirectory,
+					CaCertificate:    resI.CaCertificate,
+				},
+			},
+		}
 	}
 
 	installerImageArgs := core.MakeInstallerImageArgs{
@@ -126,5 +171,7 @@
 
 func init() {
 	rootCmd.AddCommand(installCmd)
+
+	genusbCmd.Flags().BoolVar(&bootstrap, "bootstrap", false, "Create a bootstrap installer image.")
 	installCmd.AddCommand(genusbCmd)
 }
diff --git a/metropolis/cli/metroctl/main.go b/metropolis/cli/metroctl/main.go
index 29d44e8..d575c5b 100644
--- a/metropolis/cli/metroctl/main.go
+++ b/metropolis/cli/metroctl/main.go
@@ -10,6 +10,18 @@
 	Short: "metroctl controls Metropolis nodes and clusters.",
 }
 
+type metroctlFlags struct {
+	// clusterEndpoints is a list of the targeted cluster's endpoints, used by
+	// commands that perform RPC on it.
+	clusterEndpoints []string
+}
+
+var flags metroctlFlags
+
+func init() {
+	rootCmd.PersistentFlags().StringArrayVar(&flags.clusterEndpoints, "endpoints", nil, "A list of the target cluster's endpoints.")
+}
+
 func main() {
 	cobra.CheckErr(rootCmd.Execute())
 }
diff --git a/metropolis/cli/metroctl/rpc.go b/metropolis/cli/metroctl/rpc.go
new file mode 100644
index 0000000..b307d06
--- /dev/null
+++ b/metropolis/cli/metroctl/rpc.go
@@ -0,0 +1,72 @@
+package main
+
+import (
+	"context"
+	"crypto/ed25519"
+	"crypto/tls"
+	"crypto/x509"
+	"fmt"
+	"net"
+
+	"golang.org/x/net/proxy"
+	"google.golang.org/grpc"
+
+	"source.monogon.dev/metropolis/node"
+	"source.monogon.dev/metropolis/node/core/rpc"
+	"source.monogon.dev/metropolis/node/core/rpc/resolver"
+)
+
+// dialCluster dials the cluster control address. The owner certificate, and
+// proxy address parameters are optional and can be left nil, and empty,
+// respectively. At least one cluster endpoint must be provided. A missing
+// owner certificate will result in a connection that is authenticated with
+// ephemeral credentials, restricting the available API surface. proxyAddr
+// must point at a SOCKS5 endpoint.
+func dialCluster(ctx context.Context, opkey ed25519.PrivateKey, ocert *x509.Certificate, proxyAddr string, clusterEndpoints []string) (*grpc.ClientConn, error) {
+	var dialOpts []grpc.DialOption
+
+	if opkey == nil {
+		return nil, fmt.Errorf("an owner's private key must be provided")
+	}
+	if len(clusterEndpoints) == 0 {
+		return nil, fmt.Errorf("at least one cluster endpoint must be provided")
+	}
+
+	if proxyAddr != "" {
+		socksDialer, err := proxy.SOCKS5("tcp", proxyAddr, nil, proxy.Direct)
+		if err != nil {
+			return nil, fmt.Errorf("failed to build a SOCKS dialer: %v", err)
+		}
+		grpcd := func(_ context.Context, addr string) (net.Conn, error) {
+			return socksDialer.Dial("tcp", addr)
+		}
+		dialOpts = append(dialOpts, grpc.WithContextDialer(grpcd))
+	}
+
+	if ocert == nil {
+		creds, err := rpc.NewEphemeralCredentials(opkey, nil)
+		if err != nil {
+			return nil, fmt.Errorf("while building ephemeral credentials: %v", err)
+		}
+		dialOpts = append(dialOpts, grpc.WithTransportCredentials(creds))
+	} else {
+		tlsc := tls.Certificate{
+			Certificate: [][]byte{ocert.Raw},
+			PrivateKey:  opkey,
+		}
+		creds := rpc.NewAuthenticatedCredentials(tlsc, nil)
+		dialOpts = append(dialOpts, grpc.WithTransportCredentials(creds))
+	}
+
+	r := resolver.New(ctx)
+	for _, ep := range clusterEndpoints {
+		r.AddEndpoint(resolver.NodeByHostPort(ep, uint16(node.CuratorServicePort)))
+	}
+	dialOpts = append(dialOpts, grpc.WithResolvers(r))
+
+	c, err := grpc.Dial(resolver.MetropolisControlAddress, dialOpts...)
+	if err != nil {
+		return nil, fmt.Errorf("could not dial: %v", err)
+	}
+	return c, nil
+}