m/n/c/{cluster,curator}: move NodeCredentials to cluster

This keeps the NodeCredentials/NodeCertificate logic types near their
consumer, the cluster bootstrap code. It also rewrites these structures
to be centered around the x509 data itself.

This is a followup to https://review.monogon.dev/c/monogon/+/186 .
Attempting to introduce it into that change was too complex due to the
surrounding cluster/curator refactoring.

We also take this opportunity to write some simple tests for the
credential validation logic.

Change-Id: Iead3cfdd5778274508d79799f4750f5fdf9385bc
Reviewed-on: https://review.monogon.dev/c/monogon/+/197
Reviewed-by: Lorenz Brun <lorenz@nexantic.com>
diff --git a/metropolis/node/core/cluster/BUILD.bazel b/metropolis/node/core/cluster/BUILD.bazel
index fcf8aae..322a337 100644
--- a/metropolis/node/core/cluster/BUILD.bazel
+++ b/metropolis/node/core/cluster/BUILD.bazel
@@ -1,10 +1,11 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
 
 go_library(
     name = "go_default_library",
     srcs = [
         "cluster.go",
         "cluster_bootstrap.go",
+        "node.go",
         "status.go",
         "watcher.go",
     ],
@@ -25,3 +26,10 @@
         "@org_golang_google_protobuf//proto:go_default_library",
     ],
 )
+
+go_test(
+    name = "go_default_test",
+    srcs = ["node_test.go"],
+    embed = [":go_default_library"],
+    deps = ["//metropolis/node/core/curator:go_default_library"],
+)
diff --git a/metropolis/node/core/cluster/cluster_bootstrap.go b/metropolis/node/core/cluster/cluster_bootstrap.go
index 7eb8058..30de8b3 100644
--- a/metropolis/node/core/cluster/cluster_bootstrap.go
+++ b/metropolis/node/core/cluster/cluster_bootstrap.go
@@ -20,6 +20,7 @@
 	"context"
 	"crypto/ed25519"
 	"crypto/rand"
+	"crypto/subtle"
 	"encoding/hex"
 	"fmt"
 
@@ -116,11 +117,25 @@
 	}
 
 	// And short-circuit creating the curator CA and node certificate.
-	creds, err := curator.BootstrapNodeCredentials(ctx, ckv, priv, pub)
+	caCert, nodeCert, err := curator.BootstrapNodeCredentials(ctx, ckv, priv, pub)
 	if err != nil {
 		return fmt.Errorf("failed to bootstrap node credentials: %w", err)
 	}
 
+	// Using the short-circuited credentials from the curator, build our
+	// NodeCredentials. That, and the public part of the credentials
+	// (NodeCertificate) are the primary output of the cluster manager.
+	creds, err := NewNodeCredentials(priv, nodeCert, caCert)
+	if err != nil {
+		return fmt.Errorf("failed to use newly bootstrapped node credentials: %w", err)
+	}
+
+	// Overly cautious check: ensure that the credentials are for the public key
+	// we've generated.
+	if want, got := pub, []byte(creds.PublicKey()); subtle.ConstantTimeCompare(want, got) != 1 {
+		return fmt.Errorf("newly bootstrapped node credentials emitted for wrong public key")
+	}
+
 	if err := creds.Save(&m.storageRoot.Data.Node.Credentials); err != nil {
 		return fmt.Errorf("failed to write node credentials: %w", err)
 	}
diff --git a/metropolis/node/core/cluster/node.go b/metropolis/node/core/cluster/node.go
new file mode 100644
index 0000000..0d4daac
--- /dev/null
+++ b/metropolis/node/core/cluster/node.go
@@ -0,0 +1,140 @@
+package cluster
+
+import (
+	"crypto/ed25519"
+	"crypto/subtle"
+	"crypto/x509"
+	"fmt"
+
+	"source.monogon.dev/metropolis/node/core/curator"
+	"source.monogon.dev/metropolis/node/core/localstorage"
+)
+
+// NodeCertificate is the public part of the credentials of a node. They are
+// emitted for a node by the cluster CA contained within the curator.
+type NodeCertificate struct {
+	node *x509.Certificate
+	ca   *x509.Certificate
+}
+
+// NodeCredentials are the public and private part of the credentials of a node.
+//
+// It represents all the data necessary for a node to authenticate over mTLS to
+// other nodes and the rest of the cluster.
+//
+// It must never be made available to any node other than the node it has been
+// emitted for.
+type NodeCredentials struct {
+	NodeCertificate
+	private ed25519.PrivateKey
+}
+
+// NewNodeCertificate wraps a pair CA and node DER-encoded certificates into
+// NodeCertificate, ensuring the given certificate data is valid and compatible
+// Metropolis assumptions.
+//
+// It does _not_ verify that the given CA is a known/trusted Metropolis CA for a
+// running cluster.
+func NewNodeCertificate(cert, ca []byte) (*NodeCertificate, error) {
+	certParsed, err := x509.ParseCertificate(cert)
+	if err != nil {
+		return nil, fmt.Errorf("could not parse node certificate: %w", err)
+	}
+	caCertParsed, err := x509.ParseCertificate(ca)
+	if err != nil {
+		return nil, fmt.Errorf("could not parse ca certificate: %w", err)
+	}
+
+	// Ensure both CA and node certs use ED25519.
+	if certParsed.PublicKeyAlgorithm != x509.Ed25519 {
+		return nil, fmt.Errorf("node certificate must use ED25519, is %s", certParsed.PublicKeyAlgorithm.String())
+	}
+	if pub, ok := certParsed.PublicKey.(ed25519.PublicKey); !ok || len(pub) != ed25519.PublicKeySize {
+		return nil, fmt.Errorf("node certificate ED25519 key invalid")
+	}
+	if caCertParsed.PublicKeyAlgorithm != x509.Ed25519 {
+		return nil, fmt.Errorf("CA certificate must use ED25519, is %s", caCertParsed.PublicKeyAlgorithm.String())
+	}
+	if pub, ok := caCertParsed.PublicKey.(ed25519.PublicKey); !ok || len(pub) != ed25519.PublicKeySize {
+		return nil, fmt.Errorf("CA certificate ED25519 key invalid")
+	}
+
+	// Ensure that the certificate is signed by the CA certificate.
+	if err := certParsed.CheckSignatureFrom(caCertParsed); err != nil {
+		return nil, fmt.Errorf("certificate not signed by given CA: %w", err)
+	}
+
+	// Ensure that the certificate has the node's calculated ID in its DNS names.
+	found := false
+	nid := curator.NodeID(certParsed.PublicKey.(ed25519.PublicKey))
+	for _, n := range certParsed.DNSNames {
+		if n == nid {
+			found = true
+			break
+		}
+	}
+	if !found {
+		return nil, fmt.Errorf("calculated node ID %q not found in node certificate's DNS names (%v)", nid, certParsed.DNSNames)
+	}
+
+	return &NodeCertificate{
+		node: certParsed,
+		ca:   caCertParsed,
+	}, nil
+}
+
+// NewNodeCredentials wraps a pair of CA and node DER-encoded certificates plus
+// a private key into NodeCredentials, ensuring that the given data is valid and
+// compatible with Metropolis assumptions.
+//
+// It does _not_ verify that the given CA is a known/trusted Metropolis CA for a
+// running cluster.
+func NewNodeCredentials(priv, cert, ca []byte) (*NodeCredentials, error) {
+	nc, err := NewNodeCertificate(cert, ca)
+	if err != nil {
+		return nil, err
+	}
+
+	// Ensure that the private key is a valid length.
+	if want, got := ed25519.PrivateKeySize, len(priv); want != got {
+		return nil, fmt.Errorf("private key is not the correct length, wanted %d, got %d", want, got)
+	}
+
+	// Ensure that the given private key matches the given public key.
+	if want, got := ed25519.PrivateKey(priv).Public().(ed25519.PublicKey), nc.PublicKey(); subtle.ConstantTimeCompare(want, got) != 1 {
+		return nil, fmt.Errorf("public key does not match private key")
+	}
+
+	return &NodeCredentials{
+		NodeCertificate: *nc,
+		private:         ed25519.PrivateKey(priv),
+	}, nil
+}
+
+// Save stores the given node credentials in local storage.
+func (c *NodeCredentials) Save(d *localstorage.PKIDirectory) error {
+	if err := d.CACertificate.Write(c.ca.Raw, 0400); err != nil {
+		return fmt.Errorf("when writing CA certificate: %w", err)
+	}
+	if err := d.Certificate.Write(c.node.Raw, 0400); err != nil {
+		return fmt.Errorf("when writing node certificate: %w", err)
+	}
+	if err := d.Key.Write(c.private, 0400); err != nil {
+		return fmt.Errorf("when writing node private key: %w", err)
+	}
+	return nil
+}
+
+// PublicKey returns the ED25519 public key corresponding to this node's
+// certificate/credentials.
+func (nc *NodeCertificate) PublicKey() ed25519.PublicKey {
+	// Safe: we have ensured that the given certificate has an ed25519 public key on
+	// NewNodeCertificate.
+	return nc.node.PublicKey.(ed25519.PublicKey)
+}
+
+// ID returns the canonical ID/name of the node for which this
+// certificate/credentials were emitted.
+func (nc *NodeCertificate) ID() string {
+	return curator.NodeID(nc.PublicKey())
+}
diff --git a/metropolis/node/core/cluster/node_test.go b/metropolis/node/core/cluster/node_test.go
new file mode 100644
index 0000000..079d4dc
--- /dev/null
+++ b/metropolis/node/core/cluster/node_test.go
@@ -0,0 +1,105 @@
+package cluster
+
+import (
+	"crypto/ed25519"
+	"crypto/rand"
+	"crypto/x509"
+	"crypto/x509/pkix"
+	"math/big"
+	"testing"
+	"time"
+
+	"source.monogon.dev/metropolis/node/core/curator"
+)
+
+type alterCert func(t *x509.Certificate)
+
+func createPKI(t *testing.T, fca, fnode alterCert) (caCertBytes, nodeCertBytes, nodePriv []byte) {
+	t.Helper()
+
+	caPub, caPriv, err := ed25519.GenerateKey(rand.Reader)
+	if err != nil {
+		t.Fatalf("GenerateKey: %v", err)
+	}
+	var nodePub ed25519.PublicKey
+	nodePub, nodePriv, err = ed25519.GenerateKey(rand.Reader)
+	if err != nil {
+		t.Fatalf("GenerateKey: %v", err)
+	}
+
+	caTemplate := &x509.Certificate{
+		SerialNumber:          big.NewInt(1),
+		Subject:               pkix.Name{CommonName: "CA"},
+		IsCA:                  true,
+		KeyUsage:              x509.KeyUsageCertSign | x509.KeyUsageDigitalSignature,
+		ExtKeyUsage:           []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageOCSPSigning},
+		NotBefore:             time.Now(),
+		NotAfter:              time.Unix(253402300799, 0),
+		BasicConstraintsValid: true,
+	}
+	fca(caTemplate)
+
+	caCertBytes, err = x509.CreateCertificate(rand.Reader, caTemplate, caTemplate, caPub, caPriv)
+	if err != nil {
+		t.Fatalf("CreateCertificate (CA): %v", err)
+	}
+	caCert, err := x509.ParseCertificate(caCertBytes)
+	if err != nil {
+		t.Fatalf("ParseCertificate (CA): %v", err)
+	}
+
+	nodeTemplate := &x509.Certificate{
+		SerialNumber: big.NewInt(2),
+		Subject:      pkix.Name{},
+		KeyUsage:     x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
+		NotBefore:    time.Now(),
+		NotAfter:     time.Unix(253402300799, 0),
+		DNSNames:     []string{curator.NodeID(nodePub)},
+	}
+	fnode(nodeTemplate)
+
+	nodeCertBytes, err = x509.CreateCertificate(rand.Reader, nodeTemplate, caCert, nodePub, caPriv)
+	if err != nil {
+		t.Fatalf("CreateCertificate (node): %v", err)
+	}
+
+	return
+}
+
+// TestNodeCertificateX509 exercises X509 validity checks performed by
+// NewNodeCertificate.
+func TestNodeCertificateX509(t *testing.T) {
+	for i, te := range []struct {
+		fca     alterCert
+		fnode   alterCert
+		success bool
+	}{
+		// Case 0: everything should work.
+		{
+			func(ca *x509.Certificate) {},
+			func(n *x509.Certificate) {},
+			true,
+		},
+		// Case 1: CA must be IsCA
+		{
+			func(ca *x509.Certificate) { ca.IsCA = false },
+			func(n *x509.Certificate) {},
+			false,
+		},
+		// Case 2: node must have its ID as a DNS name.
+		{
+			func(ca *x509.Certificate) {},
+			func(n *x509.Certificate) { n.DNSNames = []string{"node"} },
+			false,
+		},
+	} {
+		caCert, nodeCert, nodePriv := createPKI(t, te.fca, te.fnode)
+		_, err := NewNodeCredentials(nodePriv, nodeCert, caCert)
+		if te.success && err != nil {
+			t.Fatalf("Case %d: NewNodeCredentials failed: %v", i, err)
+		}
+		if !te.success && err == nil {
+			t.Fatalf("Case %d: NewNodeCredentials succeeded, wanted failure", i)
+		}
+	}
+}
diff --git a/metropolis/node/core/cluster/status.go b/metropolis/node/core/cluster/status.go
index d82130b..3f99567 100644
--- a/metropolis/node/core/cluster/status.go
+++ b/metropolis/node/core/cluster/status.go
@@ -5,7 +5,6 @@
 	"fmt"
 
 	"source.monogon.dev/metropolis/node/core/consensus/client"
-	"source.monogon.dev/metropolis/node/core/curator"
 	cpb "source.monogon.dev/metropolis/proto/common"
 )
 
@@ -27,7 +26,7 @@
 
 	// Credentials used for the node to authenticate to the Curator and other
 	// cluster services.
-	Credentials *curator.NodeCredentials
+	Credentials *NodeCredentials
 }
 
 // ConsensusUser is the to-level user of an etcd client in Metropolis node
diff --git a/metropolis/node/core/curator/bootstrap.go b/metropolis/node/core/curator/bootstrap.go
index 253ba72..4e809dd 100644
--- a/metropolis/node/core/curator/bootstrap.go
+++ b/metropolis/node/core/curator/bootstrap.go
@@ -22,31 +22,27 @@
 // consumer like the bootstrap code.
 
 // BootstrapNodeCredentials creates node credentials for the first node in a
-// cluster. It can only be called by cluster bootstrap code.
+// cluster. It can only be called by cluster bootstrap code. It returns the
+// generated x509 CA and node certificates.
 //
 // TODO(q3k): don't require privkey, but that needs some //metropolis/pkg/pki changes first.
-func BootstrapNodeCredentials(ctx context.Context, etcd client.Namespaced, priv, pub []byte) (*NodeCredentials, error) {
+func BootstrapNodeCredentials(ctx context.Context, etcd client.Namespaced, priv, pub []byte) (ca, node []byte, err error) {
 	id := NodeID(pub)
 
-	caCertBytes, _, err := pkiCA.Ensure(ctx, etcd)
+	ca, _, err = pkiCA.Ensure(ctx, etcd)
 	if err != nil {
-		return nil, fmt.Errorf("when ensuring CA: %w", err)
+		err = fmt.Errorf("when ensuring CA: %w", err)
+		return
 	}
 	nodeCert := pkiNamespace.New(pkiCA, "", pki.Server([]string{id}, nil))
 	nodeCert.UseExistingKey(priv)
-	nodeCertBytes, _, err := nodeCert.Ensure(ctx, etcd)
+	node, _, err = nodeCert.Ensure(ctx, etcd)
 	if err != nil {
-		return nil, fmt.Errorf("when ensuring node cert: %w", err)
+		err = fmt.Errorf("when ensuring node cert: %w", err)
+		return
 	}
 
-	return &NodeCredentials{
-		NodeCertificate: NodeCertificate{
-			PublicKey:     pub,
-			Certificate:   nodeCertBytes,
-			CACertificate: caCertBytes,
-		},
-		PrivateKey: priv,
-	}, nil
+	return
 }
 
 // BootstrapStore saves the Node into etcd, without regard for any other cluster
diff --git a/metropolis/node/core/curator/impl_leader.go b/metropolis/node/core/curator/impl_leader.go
index 885dee0..d90727b 100644
--- a/metropolis/node/core/curator/impl_leader.go
+++ b/metropolis/node/core/curator/impl_leader.go
@@ -60,9 +60,6 @@
 	if errors.Is(err, lostLeadership) {
 		return status.Error(codes.Unavailable, "lost leadership"), true
 	}
-	if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
-		return status.Errorf(codes.Unavailable, "%v", err), true
-	}
 	return err, false
 }
 
diff --git a/metropolis/node/core/curator/state_pki.go b/metropolis/node/core/curator/state_pki.go
index ce28aac..70423ed 100644
--- a/metropolis/node/core/curator/state_pki.go
+++ b/metropolis/node/core/curator/state_pki.go
@@ -1,9 +1,6 @@
 package curator
 
 import (
-	"fmt"
-
-	"source.monogon.dev/metropolis/node/core/localstorage"
 	"source.monogon.dev/metropolis/pkg/pki"
 )
 
@@ -15,49 +12,3 @@
 	// node and user certificates.
 	pkiCA = pkiNamespace.New(pki.SelfSigned, "cluster-ca", pki.CA("Metropolis Cluster CA"))
 )
-
-// NodeCredentials are the public and private part of the credentials of a node.
-//
-// It represents all the data necessary for a node to authenticate over mTLS to
-// other nodes and the rest of the cluster.
-//
-// It must never be made available to any node other than the node it has been
-// emitted for.
-type NodeCredentials struct {
-	NodeCertificate
-	// PrivateKey is the ED25519 private key of the node, corresponding to
-	// NodeCertificate.PublicKey.
-	PrivateKey []byte
-}
-
-// NodeCertificate is the public part of the credential of a node.
-type NodeCertificate struct {
-	// PublicKey is the ED25519 public key of the node.
-	PublicKey []byte
-	// Certificate is the DER-encoded TLS certificate emitted for the node (ie.
-	// PublicKey) by the cluster CA.
-	Certificate []byte
-	// CACertificate is the DER-encoded TLS certificate of the cluster CA at time of
-	// emitting the Certificate.
-	CACertificate []byte
-}
-
-// ID returns the Node ID of the node for which this NodeCertificate was
-// emitted.
-func (c *NodeCertificate) ID() string {
-	return NodeID(c.PublicKey)
-}
-
-// Save stores the given node credentials in local storage.
-func (c *NodeCredentials) Save(d *localstorage.PKIDirectory) error {
-	if err := d.CACertificate.Write(c.CACertificate, 0400); err != nil {
-		return fmt.Errorf("when writing CA certificate: %w", err)
-	}
-	if err := d.Certificate.Write(c.Certificate, 0400); err != nil {
-		return fmt.Errorf("when writing node certificate: %w", err)
-	}
-	if err := d.Key.Write(c.PrivateKey, 0400); err != nil {
-		return fmt.Errorf("when writing node private key: %w", err)
-	}
-	return nil
-}