m/test/e2e: retrieve owner credentials in e2e test

This exercises AAA.Escrow for the initial cluster owner within our large
e2e test suite. The certificate retrieved this way is not yet used, but
is verified to be emitted for the correct public key.

Change-Id: Id33178cd223e3180d6f834c6fac94d6d657d5349
Reviewed-on: https://review.monogon.dev/c/monogon/+/290
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/test/e2e/main_test.go b/metropolis/test/e2e/main_test.go
index 6d9b1db..0fe1378 100644
--- a/metropolis/test/e2e/main_test.go
+++ b/metropolis/test/e2e/main_test.go
@@ -17,7 +17,9 @@
 package e2e
 
 import (
+	"bytes"
 	"context"
+	"crypto/ed25519"
 	"errors"
 	"fmt"
 	"log"
@@ -77,6 +79,7 @@
 
 	// Set a global timeout to make sure this terminates
 	ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
+	defer cancel()
 	portMap, err := launch.ConflictFreePortMap(launch.NodePorts)
 	if err != nil {
 		t.Fatalf("Failed to acquire ports for e2e test: %v", err)
@@ -90,7 +93,7 @@
 			SerialPort: os.Stdout,
 			NodeParameters: &apb.NodeParameters{
 				Cluster: &apb.NodeParameters_ClusterBootstrap_{
-					ClusterBootstrap: &apb.NodeParameters_ClusterBootstrap{},
+					ClusterBootstrap: launch.InsecureClusterBootstrap,
 				},
 			},
 		}); err != nil {
@@ -98,21 +101,45 @@
 		}
 		close(procExit)
 	}()
-	grpcClient, err := portMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure())
+
+	grpcDebug, err := portMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure())
 	if err != nil {
-		fmt.Printf("Failed to dial debug service (is it running): %v\n", err)
+		log.Printf("Failed to dial debug service (is it running?): %v", err)
+		return
 	}
-	debugClient := apb.NewNodeDebugServiceClient(grpcClient)
+	debug := apb.NewNodeDebugServiceClient(grpcDebug)
 
 	// This exists to keep the parent around while all the children race.
 	// It currently tests both a set of OS-level conditions and Kubernetes
 	// Deployments and StatefulSets
 	t.Run("RunGroup", func(t *testing.T) {
+		t.Run("Connect to Curator", func(t *testing.T) {
+			testEventual(t, "Retrieving owner credentials succesful", ctx, 60*time.Second, func(ctx context.Context) error {
+				initClient, err := launch.NewInitialClient(&launch.InitialClientOptions{
+					Remote:  fmt.Sprintf("localhost:%v", portMap[common.CuratorServicePort]),
+					Private: launch.InsecurePrivateKey,
+				})
+				if err != nil {
+					return fmt.Errorf("NewInitialClient: %w", err)
+				}
+
+				cert, err := initClient.RetrieveOwnerCertificate(ctx)
+				if err != nil {
+					return fmt.Errorf("RetrieveOwnerCertificate: %w", err)
+				}
+
+				if !bytes.Equal(cert.PrivateKey.(ed25519.PrivateKey), launch.InsecurePrivateKey) {
+					t.Fatalf("Received certificate for wrong private key")
+				}
+
+				return nil
+			})
+		})
 		t.Run("Get Kubernetes Debug Kubeconfig", func(t *testing.T) {
 			t.Parallel()
 			selfCtx, cancel := context.WithTimeout(ctx, largeTestTimeout)
 			defer cancel()
-			clientSet, err := GetKubeClientSet(selfCtx, debugClient, portMap[common.KubernetesAPIPort])
+			clientSet, err := GetKubeClientSet(selfCtx, debug, portMap[common.KubernetesAPIPort])
 			if err != nil {
 				t.Fatal(err)
 			}
diff --git a/metropolis/test/launch/BUILD.bazel b/metropolis/test/launch/BUILD.bazel
index 3f9d8fd..eeb78b5 100644
--- a/metropolis/test/launch/BUILD.bazel
+++ b/metropolis/test/launch/BUILD.bazel
@@ -3,6 +3,7 @@
 go_library(
     name = "go_default_library",
     srcs = [
+        "client.go",
         "insecure_key.go",
         "launch.go",
     ],
@@ -15,6 +16,7 @@
         "@com_github_golang_protobuf//proto:go_default_library",
         "@com_github_grpc_ecosystem_go_grpc_middleware//retry:go_default_library",
         "@org_golang_google_grpc//:go_default_library",
+        "@org_golang_google_grpc//credentials:go_default_library",
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/metropolis/test/launch/client.go b/metropolis/test/launch/client.go
new file mode 100644
index 0000000..5913f14
--- /dev/null
+++ b/metropolis/test/launch/client.go
@@ -0,0 +1,149 @@
+package launch
+
+import (
+	"context"
+	"crypto/ed25519"
+	"crypto/rand"
+	"crypto/tls"
+	"crypto/x509"
+	"encoding/pem"
+	"fmt"
+	"math/big"
+	"time"
+
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
+
+	apb "source.monogon.dev/metropolis/proto/api"
+)
+
+// InitialClient implements a gRPC wrapper for dialing a Metropolis cluster
+// while not (yet) authenticated, i.e. using only a self-signed public
+// certificate to prove ownership of an ed25519 public key.
+//
+// This is used to dial a cluster's AAA.Escrow service after cluster bootstrap
+// using the owner key configured in NodeParams.
+type InitialClient struct {
+	// conn is the underlying dialed gRPC connection to the cluster.
+	conn *grpc.ClientConn
+	// aaa is a stub to the AAA service running on conn.
+	aaa apb.AAAClient
+	// options are the options this client has been opened with.
+	options *InitialClientOptions
+}
+
+type InitialClientOptions struct {
+	// Remote is an address:port to connect to. This should be a cluster node's
+	// curator port.
+	Remote string
+	// Private is the cluster owner private key, which should correspond to the
+	// owner public key defined in NodeParametrs.ClusterBootstrap when a cluster
+	// is bootstrapped.
+	Private ed25519.PrivateKey
+}
+
+// NewInitialClient dials a cluster's curator service using just a self-signed
+// certificate and can be used to then escrow real cluster credentials for the
+// owner.
+//
+// MVP SECURITY: this does not verify the identity of the cluster/node. However,
+// because any intercepting party cannot forward the presented client
+// certificate to any real cluster, no danger of intercepting administrative
+// access to the expected cluster is possible. Instead, the interceptor can just
+// pretend to be the cluster which was expected.
+func NewInitialClient(o *InitialClientOptions) (*InitialClient, error) {
+	template := x509.Certificate{
+		SerialNumber: big.NewInt(1),
+		NotBefore:    time.Now(),
+		NotAfter:     time.Now().Add(time.Hour),
+
+		KeyUsage:              x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
+		ExtKeyUsage:           []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
+		BasicConstraintsValid: true,
+	}
+	certificateBytes, err := x509.CreateCertificate(rand.Reader, &template, &template, o.Private.Public(), o.Private)
+	if err != nil {
+		return nil, fmt.Errorf("when generating self-signed certificate: %w", err)
+	}
+	keyBytes, err := x509.MarshalPKCS8PrivateKey(o.Private)
+	if err != nil {
+		return nil, fmt.Errorf("when marshaling private key: %w", err)
+	}
+	key := pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: keyBytes})
+	certificate := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certificateBytes})
+	clientCert, err := tls.X509KeyPair(certificate, key)
+	if err != nil {
+		return nil, fmt.Errorf("when building self-signed TLS client certificate: %w", err)
+	}
+
+	creds := credentials.NewTLS(&tls.Config{
+		Certificates: []tls.Certificate{
+			clientCert,
+		},
+		InsecureSkipVerify:    true,
+		VerifyPeerCertificate: o.verify,
+	})
+
+	conn, err := grpc.Dial(o.Remote, grpc.WithTransportCredentials(creds))
+	if err != nil {
+		return nil, fmt.Errorf("when dialing: %w", err)
+	}
+
+	return &InitialClient{
+		conn:    conn,
+		aaa:     apb.NewAAAClient(conn),
+		options: o,
+	}, nil
+}
+
+// Close must be called when the InitialClient is not used anymore. This closes
+// the underlying gRPC connection(s).
+func (i *InitialClient) Close() error {
+	return i.conn.Close()
+}
+
+func (o *InitialClientOptions) verify(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
+	// SECURITY: Always permit all server certificates. See NewInitialClient godoc
+	// for more information.
+	return nil
+}
+
+// RetrieveOwnerCertificates uses AAA.Escrow to retrieve a cluster manager
+// certificate for the initial owner of the cluster, authenticated by the
+// public/private key set in the clusters NodeParameters.ClusterBoostrap.
+//
+// The retrieved certificate can be used to dial further cluster RPCs.
+func (i *InitialClient) RetrieveOwnerCertificate(ctx context.Context) (*tls.Certificate, error) {
+	srv, err := i.aaa.Escrow(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("when opening Escrow RPC: %w", err)
+	}
+	if err := srv.Send(&apb.EscrowFromClient{
+		Parameters: &apb.EscrowFromClient_Parameters{
+			RequestedIdentityName: "owner",
+			PublicKey:             i.options.Private.Public().(ed25519.PublicKey),
+		},
+	}); err != nil {
+		return nil, fmt.Errorf("when sending client parameters: %w", err)
+	}
+	resp, err := srv.Recv()
+	if err != nil {
+		return nil, fmt.Errorf("when receiving server message: %w", err)
+	}
+	if len(resp.EmittedCertificate) == 0 {
+		return nil, fmt.Errorf("expected certificate, instead got needed proofs: %+v", resp.Needed)
+	}
+
+	certificateBytes := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: resp.EmittedCertificate})
+	key, err := x509.MarshalPKCS8PrivateKey(i.options.Private)
+	if err != nil {
+		return nil, fmt.Errorf("while marshalling private key: %w", err)
+	}
+	keyBytes := pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: key})
+	ownerCert, err := tls.X509KeyPair(certificateBytes, keyBytes)
+	if err != nil {
+		return nil, fmt.Errorf("could not build certificate from data received from cluster: %w", err)
+	}
+
+	return &ownerCert, nil
+}