m/n/c/curator: implement IssueCertificate for Kubernetes Workers

This is not yet used in this change, but will be very soon.

Change-Id: I0283941f15211515537d2b23e0c8cd72dc2d77c5
Reviewed-on: https://review.monogon.dev/c/monogon/+/1378
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/core/curator/BUILD.bazel b/metropolis/node/core/curator/BUILD.bazel
index 2283b34..bb30bbe 100644
--- a/metropolis/node/core/curator/BUILD.bazel
+++ b/metropolis/node/core/curator/BUILD.bazel
@@ -9,6 +9,7 @@
         "impl_follower.go",
         "impl_leader.go",
         "impl_leader_aaa.go",
+        "impl_leader_certificates.go",
         "impl_leader_curator.go",
         "impl_leader_management.go",
         "listener.go",
@@ -27,6 +28,7 @@
         "//metropolis/node/core/curator/proto/private",
         "//metropolis/node/core/identity",
         "//metropolis/node/core/rpc",
+        "//metropolis/node/kubernetes/pki",
         "//metropolis/pkg/event",
         "//metropolis/pkg/event/etcd",
         "//metropolis/pkg/event/memory",
diff --git a/metropolis/node/core/curator/impl_leader_certificates.go b/metropolis/node/core/curator/impl_leader_certificates.go
new file mode 100644
index 0000000..f42b4a4
--- /dev/null
+++ b/metropolis/node/core/curator/impl_leader_certificates.go
@@ -0,0 +1,86 @@
+package curator
+
+import (
+	"context"
+	"crypto/ed25519"
+
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+
+	ipb "source.monogon.dev/metropolis/node/core/curator/proto/api"
+	"source.monogon.dev/metropolis/node/core/identity"
+	"source.monogon.dev/metropolis/node/core/rpc"
+	kpki "source.monogon.dev/metropolis/node/kubernetes/pki"
+)
+
+func issueKubernetesWorkerCertificates(ctx context.Context, kp *kpki.PKI, nodeID string, req *ipb.IssueCertificateRequest_KubernetesWorker) (*ipb.IssueCertificateResponse, error) {
+	idca, err := kp.Certificates[kpki.IdCA].Ensure(ctx, kp.KV)
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not ensure CA certificate: %v", err)
+	}
+
+	if len(req.KubeletPubkey) != ed25519.PublicKeySize {
+		return nil, status.Error(codes.InvalidArgument, "kubelet pubkey must be set and valid")
+	}
+	if len(req.CsiProvisionerPubkey) != ed25519.PublicKeySize {
+		return nil, status.Error(codes.InvalidArgument, "worker services pubkey must be set and valid")
+	}
+
+	kubeletServer, kubeletClient, err := kp.Kubelet(ctx, nodeID, req.KubeletPubkey)
+
+	kubeletServerCert, err := kubeletServer.Ensure(ctx, kp.KV)
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not ensure kubelet server certificate: %v", err)
+	}
+	kubeletClientCert, err := kubeletClient.Ensure(ctx, kp.KV)
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not ensure kubelet client certificate: %v", err)
+	}
+
+	csiClient, err := kp.CSIProvisioner(ctx, nodeID, req.CsiProvisionerPubkey)
+	csiClientCert, err := csiClient.Ensure(ctx, kp.KV)
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not ensure CSI provisioner client certificate: %v", err)
+	}
+
+	return &ipb.IssueCertificateResponse{
+		Kind: &ipb.IssueCertificateResponse_KubernetesWorker_{
+			KubernetesWorker: &ipb.IssueCertificateResponse_KubernetesWorker{
+				IdentityCaCertificate:     idca,
+				KubeletServerCertificate:  kubeletServerCert,
+				KubeletClientCertificate:  kubeletClientCert,
+				CsiProvisionerCertificate: csiClientCert,
+			},
+		},
+	}, nil
+}
+
+func (l *leaderCurator) IssueCertificate(ctx context.Context, req *ipb.IssueCertificateRequest) (*ipb.IssueCertificateResponse, error) {
+	// Get remote node.
+	pi := rpc.GetPeerInfo(ctx)
+	if pi == nil || pi.Node == nil {
+		return nil, status.Error(codes.PermissionDenied, "only nodes can request certificates")
+	}
+	id := identity.NodeID(pi.Node.PublicKey)
+	node, err := nodeLoad(ctx, l.leadership, id)
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not load node info: %v", err)
+	}
+
+	pki, err := kpki.FromLocalConsensus(ctx, l.consensus)
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not get kube PKI: %v", err)
+	}
+
+	// Issue certificate if appropriate.
+	switch kind := req.Kind.(type) {
+	case *ipb.IssueCertificateRequest_KubernetesWorker_:
+		if node.kubernetesWorker == nil {
+			rpc.Trace(ctx).Printf("refusing to issue kube worker certificates for node %s", id)
+			return nil, status.Errorf(codes.PermissionDenied, "node %s cannot request a kubelet certificate", id)
+		}
+		return issueKubernetesWorkerCertificates(ctx, pki, node.ID(), kind.KubernetesWorker)
+	default:
+		return nil, status.Error(codes.InvalidArgument, "certificate kind must be set")
+	}
+}
diff --git a/metropolis/node/core/curator/impl_leader_test.go b/metropolis/node/core/curator/impl_leader_test.go
index 4d26b43..cb87024 100644
--- a/metropolis/node/core/curator/impl_leader_test.go
+++ b/metropolis/node/core/curator/impl_leader_test.go
@@ -1176,3 +1176,120 @@
 		t.Errorf("Wanted leader port %d, got %d", want, got)
 	}
 }
+
+// TestIssueKubernetesWorkerCertificate exercises whether we can retrieve
+// Kubernetes Worker certificates from the curator.
+func TestIssueKubernetesWorkerCertificate(t *testing.T) {
+	cl := fakeLeader(t)
+	ctx, ctxC := context.WithCancel(context.Background())
+	defer ctxC()
+
+	mgmt := apb.NewManagementClient(cl.mgmtConn)
+	true_ := true
+	_, err := mgmt.UpdateNodeRoles(ctx, &apb.UpdateNodeRolesRequest{
+		Node: &apb.UpdateNodeRolesRequest_Id{
+			Id: cl.localNodeID,
+		},
+		KubernetesWorker: &true_,
+	})
+	if err != nil {
+		t.Fatalf("Could not make node into Kubernetes worker: %v", err)
+	}
+
+	// Issue certificates for some random pubkey.
+	kpub, _, _ := ed25519.GenerateKey(rand.Reader)
+	cpub, _, _ := ed25519.GenerateKey(rand.Reader)
+
+	curator := ipb.NewCuratorClient(cl.localNodeConn)
+	res, err := curator.IssueCertificate(ctx, &ipb.IssueCertificateRequest{
+		Kind: &ipb.IssueCertificateRequest_KubernetesWorker_{
+			KubernetesWorker: &ipb.IssueCertificateRequest_KubernetesWorker{
+				KubeletPubkey:        kpub,
+				CsiProvisionerPubkey: cpub,
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("IssueCertificate: %v", err)
+	}
+
+	kw := res.Kind.(*ipb.IssueCertificateResponse_KubernetesWorker_).KubernetesWorker
+	idca, err := x509.ParseCertificate(kw.IdentityCaCertificate)
+	if err != nil {
+		t.Fatalf("Could not parse IdCA cert: %v", err)
+	}
+	scert, err := x509.ParseCertificate(kw.KubeletServerCertificate)
+	if err != nil {
+		t.Fatalf("Could not parse server certificate: %v", err)
+	}
+	ccert, err := x509.ParseCertificate(kw.KubeletClientCertificate)
+	if err != nil {
+		t.Fatalf("Could not parse client certificate: %v", err)
+	}
+	pcert, err := x509.ParseCertificate(kw.CsiProvisionerCertificate)
+	if err != nil {
+		t.Fatalf("Could not parse CSI provisiooner certificate: %v", err)
+	}
+
+	if err := scert.CheckSignatureFrom(idca); err != nil {
+		t.Errorf("Server certificate not signed by IdCA: %v", err)
+	}
+	if err := ccert.CheckSignatureFrom(idca); err != nil {
+		t.Errorf("Client certificate not signed by IdCA: %v", err)
+	}
+	if err := pcert.CheckSignatureFrom(idca); err != nil {
+		t.Errorf("CSI provisioner certificate not signed by IdCA: %v", err)
+	}
+	scertPubkey := scert.PublicKey.(ed25519.PublicKey)
+	if !bytes.Equal(scertPubkey, kpub) {
+		t.Errorf("Server certificate not emitted for requested key")
+	}
+	ccertPubkey := ccert.PublicKey.(ed25519.PublicKey)
+	if !bytes.Equal(ccertPubkey, kpub) {
+		t.Errorf("Client certificate not emitted for requested key")
+	}
+	pcertPubkey := pcert.PublicKey.(ed25519.PublicKey)
+	if !bytes.Equal(pcertPubkey, cpub) {
+		t.Errorf("CSI provisioner certificate not emitted for requested key")
+	}
+
+	// Try issuing again for the same pubkeys. This should work.
+	_, err = curator.IssueCertificate(ctx, &ipb.IssueCertificateRequest{
+		Kind: &ipb.IssueCertificateRequest_KubernetesWorker_{
+			KubernetesWorker: &ipb.IssueCertificateRequest_KubernetesWorker{
+				KubeletPubkey:        kpub,
+				CsiProvisionerPubkey: cpub,
+			},
+		},
+	})
+	if err != nil {
+		t.Errorf("Certificate should have been re-issued: %v", err)
+	}
+
+	// Try issuing again for other pubkey. These should be rejected.
+	kpub2, _, _ := ed25519.GenerateKey(rand.Reader)
+	cpub2, _, _ := ed25519.GenerateKey(rand.Reader)
+
+	_, err = curator.IssueCertificate(ctx, &ipb.IssueCertificateRequest{
+		Kind: &ipb.IssueCertificateRequest_KubernetesWorker_{
+			KubernetesWorker: &ipb.IssueCertificateRequest_KubernetesWorker{
+				KubeletPubkey:        kpub2,
+				CsiProvisionerPubkey: cpub,
+			},
+		},
+	})
+	if err == nil {
+		t.Errorf("Certificate has been issued again for a different pubkey")
+	}
+	_, err = curator.IssueCertificate(ctx, &ipb.IssueCertificateRequest{
+		Kind: &ipb.IssueCertificateRequest_KubernetesWorker_{
+			KubernetesWorker: &ipb.IssueCertificateRequest_KubernetesWorker{
+				KubeletPubkey:        kpub,
+				CsiProvisionerPubkey: cpub2,
+			},
+		},
+	})
+	if err == nil {
+		t.Errorf("Certificate has been issued again for a different pubkey")
+	}
+}
diff --git a/metropolis/node/core/curator/proto/api/api.proto b/metropolis/node/core/curator/proto/api/api.proto
index 8791ad8..32fb2d1 100644
--- a/metropolis/node/core/curator/proto/api/api.proto
+++ b/metropolis/node/core/curator/proto/api/api.proto
@@ -119,6 +119,17 @@
             allow_unauthenticated: true
         };
     }
+
+    // IssueCertificate issues some TLS certificate (currently only for nodes),
+    // effectively performing credential escrow.
+    //
+    // This is currently used to issue Kubernetes component certificates for
+    // nodes (as Kubernetes doesn't understand Metropolis certificates, and we
+    // don't want to be running components with node private keys anyway).
+    rpc IssueCertificate(IssueCertificateRequest) returns (IssueCertificateResponse) {
+        option (metropolis.proto.ext.authorization) = {
+        };
+    }
 }
 
 // Node is the state and configuration of a node in the cluster.
@@ -296,3 +307,42 @@
     // this_node_id is the Node ID of the node which sent this response.
     string this_node_id = 4;
 }
+
+message IssueCertificateRequest {
+    // Issue a set of TLS certificates for a Kubernetes worker node.
+    message KubernetesWorker {
+        // The ED25519 public key of the keypair that will run the kubelet and
+        // cluster networking.
+        //
+        // Kubernetes worker certificates can only be issued for one pubkey for
+        // a given node. Attempting to retrieve certificates for a different
+        // pubkey will fail.
+        bytes kubelet_pubkey = 1;
+        // The ED25519 public key of the keypair that that will run the CSI provisioner.
+        bytes csi_provisioner_pubkey = 2;
+    }
+    oneof kind {
+        KubernetesWorker kubernetes_worker = 1;
+    };
+}
+
+message IssueCertificateResponse {
+    message KubernetesWorker {
+        // DER-encoded (but not PEM armored) certificate of the Kubernetes
+        // 'identity' CA, which is used to authenticate services users. The
+        // certificates issued below are signed by this CA.
+        bytes identity_ca_certificate = 1;
+        // DER-encoded (but not PEM armored) certificate to be used by kubelet
+        // when authenticating incoming connections.
+        bytes kubelet_server_certificate = 2;
+        // DER-encoded (but not PEM armored) certificate to be used by kubelet
+        // and cluster networking when connecting to the api server.
+        bytes kubelet_client_certificate = 3;
+        // DER-encoded (but not PEM armored) certificate to be used by the CSI
+        // provisioner when connecting to the api server.
+        bytes csi_provisioner_certificate = 4;
+    }
+    oneof kind {
+        KubernetesWorker kubernetes_worker = 1;
+    };
+}
\ No newline at end of file
diff --git a/metropolis/node/kubernetes/pki/kubernetes.go b/metropolis/node/kubernetes/pki/kubernetes.go
index dce019b..dbebf73 100644
--- a/metropolis/node/kubernetes/pki/kubernetes.go
+++ b/metropolis/node/kubernetes/pki/kubernetes.go
@@ -342,6 +342,56 @@
 	return key, nil
 }
 
+// Kubelet returns a pair of server/client ceritficates for the Kubelet to use.
+func (k *PKI) Kubelet(ctx context.Context, name string, pubkey ed25519.PublicKey) (server *opki.Certificate, client *opki.Certificate, err error) {
+	name = fmt.Sprintf("system:node:%s", name)
+	err = k.EnsureAll(ctx)
+	if err != nil {
+		return nil, nil, fmt.Errorf("could not ensure certificates exist: %w", err)
+	}
+	kubeCA := k.Certificates[IdCA]
+	serverName := fmt.Sprintf("kubelet-%s-server", name)
+	server = &opki.Certificate{
+		Name:      serverName,
+		Namespace: &k.namespace,
+		Issuer:    kubeCA,
+		Template:  opki.Server([]string{name}, nil),
+		Mode:      opki.CertificateExternal,
+		PublicKey: pubkey,
+	}
+	clientName := fmt.Sprintf("kubelet-%s-client", name)
+	client = &opki.Certificate{
+		Name:      clientName,
+		Namespace: &k.namespace,
+		Issuer:    kubeCA,
+		Template:  opki.Client(name, []string{"system:nodes"}),
+		Mode:      opki.CertificateExternal,
+		PublicKey: pubkey,
+	}
+	return server, client, nil
+}
+
+// CSIProvisioner returns a certificate to be used by the CSI provisioner running
+// on a worker node.
+func (k *PKI) CSIProvisioner(ctx context.Context, name string, pubkey ed25519.PublicKey) (client *opki.Certificate, err error) {
+	name = fmt.Sprintf("metropolis:csi-provisioner:%s", name)
+	err = k.EnsureAll(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("could not ensure certificates exist: %w", err)
+	}
+	kubeCA := k.Certificates[IdCA]
+	clientName := fmt.Sprintf("csi-provisioner-%s", name)
+	client = &opki.Certificate{
+		Name:      clientName,
+		Namespace: &k.namespace,
+		Issuer:    kubeCA,
+		Template:  opki.Client(name, []string{"metropolis:csi-provisioner"}),
+		Mode:      opki.CertificateExternal,
+		PublicKey: pubkey,
+	}
+	return client, nil
+}
+
 // VolatileKubelet returns a pair of server/client ceritficates for the Kubelet
 // to use. The certificates are ephemeral, meaning they are not stored in etcd,
 // and instead are regenerated any time this function is called.