m/n/core: factor out gRPC/TLS into rpc and identity libraries
This is an annoying large change, which started its life as me pulling
the 'let's add tests for authentication' thread, and ended up in
unifying a whole bunch of dispersed logic under two new libraries.
Notable changes:
- m/n/core/identity now contains the NodeCertificate (now called Node)
and NodeCredentials types. These used to exist in the cluster code,
but were factored out to prevent loops between the curator, the
cluster enrolment logic, and other code. They can now be shared by
nearly all of the node code, removing the need for some conversions
between subsystems/packages.
- Alongside Node{,Credentials} types, the identity package contains
code that creates x509 certificate templates and verifies x509
certificates, and has functions specific to nodes and users - not
clients and servers. This allows moving most of the rest of
certificate checking code into a single set of functions, and allows
us to test this logic thoroughly.
- pki.{Client,Server,CA} are not used by the node core code anymore,
and can now be moved to kubernetes-specific code (as that was their
original purpose and that's their only current use).
- m/n/core/rpc has been refactored to deduplicate code between the
local/external gRPC servers and unary/stream interceptors for these
servers, also allowing for more thorough testing and unified
behaviour between all.
- A PeerInfo structure is now injected into all gRPC handlers, and is
unified to contain information both about nodes, users, and possibly
unauthenticated callers.
- The AAA.Escrow implementation now makes use of PeerInfo in order to
retrieve the client's certificate, instead of rolling its own logic.
- The EphemeralClusterCredentials test helper has been moved to the rpc
library, and now returns identity objects, allowing for simplified
test code (less juggling of bare public keys and
{x509,tls}.Certificate objects).
Change-Id: I9284966b4f18c0d7628167ca3168b4b4037808c1
Reviewed-on: https://review.monogon.dev/c/monogon/+/325
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/node/core/identity/BUILD.bazel b/metropolis/node/core/identity/BUILD.bazel
new file mode 100644
index 0000000..c5b481a
--- /dev/null
+++ b/metropolis/node/core/identity/BUILD.bazel
@@ -0,0 +1,18 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "go_default_library",
+ srcs = [
+ "certificates.go",
+ "identity.go",
+ ],
+ importpath = "source.monogon.dev/metropolis/node/core/identity",
+ visibility = ["//visibility:public"],
+ deps = ["//metropolis/node/core/localstorage:go_default_library"],
+)
+
+go_test(
+ name = "go_default_test",
+ srcs = ["certificates_test.go"],
+ embed = [":go_default_library"],
+)
diff --git a/metropolis/node/core/identity/certificates.go b/metropolis/node/core/identity/certificates.go
new file mode 100644
index 0000000..95b7e0d
--- /dev/null
+++ b/metropolis/node/core/identity/certificates.go
@@ -0,0 +1,169 @@
+package identity
+
+import (
+ "crypto/ed25519"
+ "crypto/x509"
+ "crypto/x509/pkix"
+ "fmt"
+ "math/big"
+)
+
+// UserCertificate makes a Metropolis-compatible user certificate template.
+func UserCertificate(identity string) x509.Certificate {
+ return x509.Certificate{
+ Subject: pkix.Name{
+ CommonName: identity,
+ },
+ KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
+ ExtKeyUsage: []x509.ExtKeyUsage{
+ x509.ExtKeyUsageClientAuth,
+ },
+ }
+}
+
+// NodeCertificate makes a Metropolis-compatible node certificate template.
+func NodeCertificate(pubkey ed25519.PublicKey) x509.Certificate {
+ return x509.Certificate{
+ Subject: pkix.Name{
+ CommonName: NodeID(pubkey),
+ },
+ KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
+ ExtKeyUsage: []x509.ExtKeyUsage{
+ // Note: node certificates are also effectively being used to perform client
+ // authentication to other node certificates, but they don't have the ClientAuth
+ // bit set. Instead, Metropolis uses the ClientAuth and ServerAuth bits
+ // exclusively to distinguish Metropolis nodes from Metropolis users.
+ x509.ExtKeyUsageServerAuth,
+ },
+ // We populate the Node's ID (metropolis-xxxx) as the DNS name for this
+ // certificate for ease of use within Metropolis, where the local DNS setup
+ // allows each node's IP address to be resolvable through the Node's ID.
+ DNSNames: []string{
+ NodeID(pubkey),
+ },
+ }
+}
+
+// CA makes a Metropolis-compatible CA certificate template.
+//
+// cn is a human-readable string that can be used to distinguish Metropolis
+// clusters, if needed. It is not machine-parsed, instead only signature
+// verification and CA pinning is performed.
+func CACertificate(cn string) x509.Certificate {
+ return x509.Certificate{
+ SerialNumber: big.NewInt(1),
+ Subject: pkix.Name{
+ CommonName: cn,
+ },
+ IsCA: true,
+ KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign | x509.KeyUsageDigitalSignature,
+ ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageOCSPSigning},
+ }
+}
+
+// VerifyInCluster ensures that the given certificate has been signed by a CA
+// certificate and are both certificates emitted for ed25519 keypairs.
+//
+// The subject certificate's public key is returned if verification is
+// successful, and error is returned otherwise.
+func VerifyInCluster(cert, ca *x509.Certificate) (ed25519.PublicKey, error) {
+ // Ensure ca certificate uses ED25519 keypair.
+ if _, ok := ca.PublicKey.(ed25519.PublicKey); !ok {
+ return nil, fmt.Errorf("ca certificate not issued for ed25519 keypair")
+ }
+
+ // Ensure subject cert is signed by ca.
+ if err := cert.CheckSignatureFrom(ca); err != nil {
+ return nil, fmt.Errorf("signature veritifcation failed: %w", err)
+ }
+
+ // Ensure subject certificate is _not_ CA. CAs (cluster or possibly
+ // intermediaries) are not supposed to either directly serve traffic or perform
+ // client actions on the cluster.
+ if cert.IsCA {
+ return nil, fmt.Errorf("subject certificate is a CA")
+ }
+
+ // Extract subject ED25519 public key.
+ pubkey, ok := cert.PublicKey.(ed25519.PublicKey)
+ if !ok {
+ return nil, fmt.Errorf("certificate not issued for ed25519 keypair")
+ }
+
+ return pubkey, nil
+}
+
+// VerifyNodeInCluster ensures that a given certificate is a Metropolis node
+// certificate emitted by a given Metropolis CA.
+//
+// The node's public key is returned if verification is successful, and error is
+// returned otherwise.
+func VerifyNodeInCluster(node, ca *x509.Certificate) (ed25519.PublicKey, error) {
+ pk, err := VerifyInCluster(node, ca)
+ if err != nil {
+ return nil, err
+ }
+
+ // Ensure certificate has ServerAuth bit, thereby marking it as a node certificate.
+ found := false
+ for _, ku := range node.ExtKeyUsage {
+ if ku == x509.ExtKeyUsageServerAuth {
+ found = true
+ break
+ }
+ }
+ if !found {
+ return nil, fmt.Errorf("not a node certificate (missing ServerAuth key usage)")
+ }
+
+ id := NodeID(pk)
+
+ // Ensure node ID is present in Subject.CommonName and at least one DNS name.
+ if node.Subject.CommonName != id {
+ return nil, fmt.Errorf("node ID not found in CommonName")
+ }
+
+ found = false
+ for _, n := range node.DNSNames {
+ if n == id {
+ found = true
+ break
+ }
+ }
+ if !found {
+ return nil, fmt.Errorf("node ID not found in DNSNames")
+ }
+
+ return pk, nil
+}
+
+// VerifyUserInCluster ensures that a given certificate is a Metropolis user
+// certificate emitted by a given Metropolis CA.
+//
+// The user certificate's identity is returned if verification is successful,
+// and error is returned otherwise.
+func VerifyUserInCluster(user, ca *x509.Certificate) (string, error) {
+ _, err := VerifyInCluster(user, ca)
+ if err != nil {
+ return "", err
+ }
+
+ // Ensure certificate has ClientAuth bit, thereby marking it as a user certificate.
+ found := false
+ for _, ku := range user.ExtKeyUsage {
+ if ku == x509.ExtKeyUsageClientAuth {
+ found = true
+ break
+ }
+ }
+ if !found {
+ return "", fmt.Errorf("not a user certificate (missing ClientAuth key usage)")
+ }
+
+ // Extract identity from CommonName, ensure set.
+ identity := user.Subject.CommonName
+ if identity == "" {
+ return "", fmt.Errorf("CommonName not set")
+ }
+ return identity, nil
+}
diff --git a/metropolis/node/core/identity/certificates_test.go b/metropolis/node/core/identity/certificates_test.go
new file mode 100644
index 0000000..f96f517
--- /dev/null
+++ b/metropolis/node/core/identity/certificates_test.go
@@ -0,0 +1,180 @@
+package identity
+
+import (
+ "crypto/ed25519"
+ "crypto/rand"
+ "crypto/x509"
+ "math/big"
+ "testing"
+ "time"
+)
+
+// alterCert is used by test code to slightly alter certificates before they get
+// signed.
+type alterCert func(t *x509.Certificate)
+
+// basic is the bare minimum for ceritifcates to be properly issued over what
+// {CA,User,Node}Certificate return. The equivalent logic is present in the pki
+// codebase, we replicate it here because we don't use pki.
+func basic(t *x509.Certificate) {
+ t.SerialNumber = big.NewInt(1)
+ t.NotBefore = time.Now()
+ t.NotAfter = time.Unix(253402300799, 0)
+ t.BasicConstraintsValid = true
+}
+
+func noop(_ *x509.Certificate) {}
+
+// createPKI builds a minimum viable cluster PKI. We do not use
+// EphemeralClusterCredentials because we want to test the behaviour of the
+// certificate verification code when the certificate templates are slightly
+// altered, including in ways that the pki could would normally prevent us
+// from doing.
+func createPKI(t *testing.T, fca, fnode, fuser alterCert) (caCertBytes, nodeCertBytes, userCertBytes []byte) {
+ t.Helper()
+
+ caPub, caPriv, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ t.Fatalf("GenerateKey: %v", err)
+ }
+ nodePub, _, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ t.Fatalf("GenerateKey: %v", err)
+ }
+ userPub, _, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ t.Fatalf("GenerateKey: %v", err)
+ }
+
+ caTemplate := CACertificate("test metropolis CA")
+ basic(&caTemplate)
+ fca(&caTemplate)
+
+ caCertBytes, err = x509.CreateCertificate(rand.Reader, &caTemplate, &caTemplate, caPub, caPriv)
+ if err != nil {
+ t.Fatalf("CreateCertificate (CA): %v", err)
+ }
+ caCert, err := x509.ParseCertificate(caCertBytes)
+ if err != nil {
+ t.Fatalf("ParseCertificate (CA): %v", err)
+ }
+
+ nodeTemplate := NodeCertificate(nodePub)
+ basic(&nodeTemplate)
+ fnode(&nodeTemplate)
+ nodeCertBytes, err = x509.CreateCertificate(rand.Reader, &nodeTemplate, caCert, nodePub, caPriv)
+ if err != nil {
+ t.Fatalf("CreateCertificate (node): %v", err)
+ }
+
+ userTemplate := UserCertificate("test")
+ basic(&userTemplate)
+ fuser(&userTemplate)
+ userCertBytes, err = x509.CreateCertificate(rand.Reader, &userTemplate, caCert, userPub, caPriv)
+ if err != nil {
+ t.Fatalf("CreateCertificate (node): %v", err)
+ }
+
+ return
+}
+
+func TestCertificates(t *testing.T) {
+ for i, te := range []struct {
+ fca alterCert
+ fnode alterCert
+ fuser alterCert
+ successNode bool
+ successUser bool
+ }{
+ // Case 0: everything should work.
+ {
+ noop,
+ noop,
+ noop,
+ true, true,
+ },
+ // Case 1: CA must be IsCA
+ {
+ func(ca *x509.Certificate) { ca.IsCA = false },
+ noop,
+ noop,
+ false, false,
+ },
+ // Case 2: node must not have IsCA set
+ {
+ noop,
+ func(n *x509.Certificate) { n.IsCA = true },
+ noop,
+ false, true,
+ },
+ // Case 3: user must not have IsCA set
+ {
+ noop,
+ noop,
+ func(u *x509.Certificate) { u.IsCA = true },
+ true, false,
+ },
+ // Case 4: node must have its ID as a DNS name.
+ {
+ noop,
+ func(n *x509.Certificate) { n.DNSNames = []string{"node"} },
+ noop,
+ false, true,
+ },
+ // Case 5: node must have its ID as CommoNName.
+ {
+ noop,
+ func(n *x509.Certificate) { n.Subject.CommonName = "node" },
+ noop,
+ false, true,
+ },
+ // Case 6: user must have CommonName set.
+ {
+ noop,
+ noop,
+ func(u *x509.Certificate) { u.Subject.CommonName = "" },
+ true, false,
+ },
+ } {
+ caCert, nodeCert, userCert := createPKI(t, te.fca, te.fnode, te.fuser)
+ caCertParsed, err := x509.ParseCertificate(caCert)
+ if err != nil {
+ t.Fatalf("Case %d: ParseCertificate(ca): %v", i, err)
+ }
+ nodeCertParsed, err := x509.ParseCertificate(nodeCert)
+ if err != nil {
+ t.Fatalf("Case %d: ParseCertificate(node): %v", i, err)
+ }
+ userCertParsed, err := x509.ParseCertificate(userCert)
+ if err != nil {
+ t.Fatalf("Case %d: ParseCertificate(node): %v", i, err)
+ }
+
+ // Check node certificate as node certificate. Should succeed iff successNode.
+ _, err = VerifyNodeInCluster(nodeCertParsed, caCertParsed)
+ if te.successNode && err != nil {
+ t.Errorf("Case %d: VerifyNodeInCluster failed: %v", i, err)
+ }
+ if !te.successNode && err == nil {
+ t.Errorf("Case %d: VerifyNodeInCluster succeeded, wanted failure", i)
+ }
+
+ // Check user certificate as user certificate. Should succeed iff successUser.
+ _, err = VerifyUserInCluster(userCertParsed, caCertParsed)
+ if te.successUser && err != nil {
+ t.Errorf("Case %d: VerifyUserInCluster failed: %v", i, err)
+ }
+ if !te.successUser && err == nil {
+ t.Errorf("Case %d: VerifyUserInCluster succeeded, wanted failure", i)
+ }
+
+ // Check user certificate as node certificate. Should always fail.
+ if _, err := VerifyNodeInCluster(userCertParsed, caCertParsed); err == nil {
+ t.Errorf("Case %d: User certificate erroneously verified as node ceritficate", i)
+ }
+ // Check node certificate as user certificate. Should always fail.
+ if _, err := VerifyUserInCluster(nodeCertParsed, caCertParsed); err == nil {
+ t.Errorf("Case %d: Node certificate erroneously verified as user ceritficate", i)
+ }
+ }
+}
diff --git a/metropolis/node/core/identity/identity.go b/metropolis/node/core/identity/identity.go
new file mode 100644
index 0000000..862e794
--- /dev/null
+++ b/metropolis/node/core/identity/identity.go
@@ -0,0 +1,138 @@
+package identity
+
+import (
+ "crypto/ed25519"
+ "crypto/subtle"
+ "crypto/tls"
+ "crypto/x509"
+ "encoding/hex"
+ "fmt"
+
+ "source.monogon.dev/metropolis/node/core/localstorage"
+)
+
+// Node is the public part of the credentials of a node. They are
+// emitted for a node by the cluster CA contained within the curator.
+type Node struct {
+ node *x509.Certificate
+ ca *x509.Certificate
+}
+
+// NewNode wraps a pair CA and node DER-encoded certificates into
+// Node, ensuring the given certificate data is valid and compatible
+// with Metropolis assumptions.
+func NewNode(cert, ca []byte) (*Node, error) {
+ certParsed, err := x509.ParseCertificate(cert)
+ if err != nil {
+ return nil, fmt.Errorf("could not parse node certificate: %w", err)
+ }
+ caCertParsed, err := x509.ParseCertificate(ca)
+ if err != nil {
+ return nil, fmt.Errorf("could not parse ca certificate: %w", err)
+ }
+
+ if _, err := VerifyNodeInCluster(certParsed, caCertParsed); err != nil {
+ return nil, fmt.Errorf("could not node certificate within cluster CA: %w", err)
+ }
+
+ return &Node{
+ node: certParsed,
+ ca: caCertParsed,
+ }, nil
+}
+
+// PublicKey returns the Ed25519 public key corresponding to this node's
+// certificate/credentials.
+func (n *Node) PublicKey() ed25519.PublicKey {
+ // Safe: we have ensured that the given certificate has an Ed25519 public key on
+ // NewNode.
+ return n.node.PublicKey.(ed25519.PublicKey)
+}
+
+// ClusterCA returns the CA certificate of the cluster for which this
+// Node is emitted.
+func (n *Node) ClusterCA() *x509.Certificate {
+ return n.ca
+}
+
+// ID returns the canonical ID/name of the node for which this
+// certificate/credentials were emitted.
+func (n *Node) ID() string {
+ return NodeID(n.PublicKey())
+}
+
+func (n *Node) Certificate() *x509.Certificate {
+ return n.node
+}
+
+// NodeCredentials are the public and private part of the credentials of a node.
+//
+// It represents all the data necessary for a node to authenticate over mTLS to
+// other nodes and the rest of the cluster.
+//
+// It must never be made available to any node other than the node it has been
+// emitted for.
+type NodeCredentials struct {
+ Node
+ private ed25519.PrivateKey
+}
+
+// NewNodeCredentials wraps a pair of CA and node DER-encoded certificates plus
+// a private key into NodeCredentials, ensuring that the given data is valid and
+// compatible with Metropolis assumptions.
+func NewNodeCredentials(priv, cert, ca []byte) (*NodeCredentials, error) {
+ nc, err := NewNode(cert, ca)
+ if err != nil {
+ return nil, err
+ }
+
+ // Ensure that the private key is a valid length.
+ if want, got := ed25519.PrivateKeySize, len(priv); want != got {
+ return nil, fmt.Errorf("private key is not the correct length, wanted %d, got %d", want, got)
+ }
+
+ // Ensure that the given private key matches the given public key.
+ if want, got := ed25519.PrivateKey(priv).Public().(ed25519.PublicKey), nc.PublicKey(); subtle.ConstantTimeCompare(want, got) != 1 {
+ return nil, fmt.Errorf("public key does not match private key")
+ }
+
+ return &NodeCredentials{
+ Node: *nc,
+ private: priv,
+ }, nil
+}
+
+func (n *NodeCredentials) TLSCredentials() tls.Certificate {
+ return tls.Certificate{
+ Leaf: n.node,
+ Certificate: [][]byte{n.node.Raw},
+ PrivateKey: n.private,
+ }
+}
+
+// Save stores the given node credentials in local storage.
+func (n *NodeCredentials) Save(d *localstorage.PKIDirectory) error {
+ if err := d.CACertificate.Write(n.ca.Raw, 0400); err != nil {
+ return fmt.Errorf("when writing CA certificate: %w", err)
+ }
+ if err := d.Certificate.Write(n.node.Raw, 0400); err != nil {
+ return fmt.Errorf("when writing node certificate: %w", err)
+ }
+ if err := d.Key.Write(n.private, 0400); err != nil {
+ return fmt.Errorf("when writing node private key: %w", err)
+ }
+ return nil
+}
+
+// NodeIDBare returns the `{pubkeyHash}` part of the node ID.
+func NodeIDBare(pub []byte) string {
+ return hex.EncodeToString(pub[:16])
+}
+
+// NodeID returns the name of this node, which is `metropolis-{pubkeyHash}`.
+// This name should be the primary way to refer to Metropoils nodes within a
+// cluster, and is guaranteed to be unique by relying on cryptographic
+// randomness.
+func NodeID(pub []byte) string {
+ return fmt.Sprintf("metropolis-%s", NodeIDBare(pub))
+}