m/node/kubernetes/pki: refactor out CA functionality

This factors out all non-k8s-specific CA functionality from
metropolis/node/kubernetes/pki into metropolis/pkg/pki.

This will allow us to re-use the same PKI-in-CA system to issue
certificates for the Metropolis cluster and nodes.

We also drive-by change some Kubernetes/PKI interactions to make things
cleaner. Notably, this implements Certificate.Mount to return a
fileargs.FileArgs containing all the files neede to use this
Certificate.

Test Plan: covered by current e2e tests. An etcd harness to test this independently would be nice, though.

X-Origin-Diff: phab/D709
GitOrigin-RevId: bdc9ff215b94c9192f65c6da8935fe2818fd14ad
diff --git a/metropolis/pkg/pki/BUILD.bazel b/metropolis/pkg/pki/BUILD.bazel
new file mode 100644
index 0000000..243abf9
--- /dev/null
+++ b/metropolis/pkg/pki/BUILD.bazel
@@ -0,0 +1,17 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "ca.go",
+        "certificate.go",
+        "doc.go",
+        "x509.go",
+    ],
+    importpath = "source.monogon.dev/metropolis/pkg/pki",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//metropolis/pkg/fileargs:go_default_library",
+        "@io_etcd_go_etcd//clientv3:go_default_library",
+    ],
+)
diff --git a/metropolis/pkg/pki/ca.go b/metropolis/pkg/pki/ca.go
new file mode 100644
index 0000000..bbed085
--- /dev/null
+++ b/metropolis/pkg/pki/ca.go
@@ -0,0 +1,133 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pki
+
+import (
+	"context"
+	"crypto/ed25519"
+	"crypto/rand"
+	"crypto/x509"
+	"fmt"
+	"math/big"
+	"time"
+
+	"go.etcd.io/etcd/clientv3"
+)
+
+// Issuer is an entity that can issue certificates. This interface is
+// implemented by SelfSigned, which is an issuer that emits self-signed
+// certificates, and any other Certificate that has been created with CA(),
+// which makes this Certificate act as a CA and issue (sign) ceritficates.
+type Issuer interface {
+	// CACertificate returns the DER-encoded x509 certificate of the CA that will sign certificates when Issue is
+	// called, or nil if this is self-signing issuer.
+	CACertificate(ctx context.Context, kv clientv3.KV) ([]byte, error)
+	// Issue will generate a key and certificate signed by the Issuer. The returned certificate is x509 DER-encoded,
+	// while the key is a bare ed25519 key.
+	Issue(ctx context.Context, req *Certificate, kv clientv3.KV) (cert, key []byte, err error)
+}
+
+// issueCertificate is a generic low level certificate-and-key issuance function. If ca or cakey is null, the
+// certificate will be self-signed. The returned certificate is DER-encoded, while the returned key is internal.
+func issueCertificate(req *Certificate, ca *x509.Certificate, caKey interface{}) (cert, key []byte, err error) {
+	var privKey ed25519.PrivateKey
+	var pubKey ed25519.PublicKey
+	if req.key != nil {
+		privKey = req.key
+		pubKey = privKey.Public().(ed25519.PublicKey)
+	} else {
+		var err error
+		pubKey, privKey, err = ed25519.GenerateKey(rand.Reader)
+		if err != nil {
+			panic(err)
+		}
+	}
+
+	serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127)
+	serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
+	if err != nil {
+		err = fmt.Errorf("failed to generate serial number: %w", err)
+		return
+	}
+
+	skid, err := calculateSKID(pubKey)
+	if err != nil {
+		return []byte{}, privKey, err
+	}
+
+	req.template.SerialNumber = serialNumber
+	req.template.NotBefore = time.Now()
+	req.template.NotAfter = unknownNotAfter
+	req.template.BasicConstraintsValid = true
+	req.template.SubjectKeyId = skid
+
+	// Set the AuthorityKeyID to the SKID of the signing certificate (or self, if self-signing).
+	if ca != nil && caKey != nil {
+		req.template.AuthorityKeyId = ca.AuthorityKeyId
+	} else {
+		req.template.AuthorityKeyId = req.template.SubjectKeyId
+	}
+
+	if ca == nil || caKey == nil {
+		ca = &req.template
+		caKey = privKey
+	}
+
+	caCertRaw, err := x509.CreateCertificate(rand.Reader, &req.template, ca, pubKey, caKey)
+	return caCertRaw, privKey, err
+}
+
+type selfSigned struct{}
+
+var (
+	// SelfSigned is an Issuer that generates self-signed certificates.
+	SelfSigned = &selfSigned{}
+)
+
+// Issue will generate a key and certificate that is self-signed.
+func (s *selfSigned) Issue(ctx context.Context, req *Certificate, kv clientv3.KV) (cert, key []byte, err error) {
+	return issueCertificate(req, nil, nil)
+}
+
+// CACertificate returns nil for self-signed issuers.
+func (s *selfSigned) CACertificate(ctx context.Context, kv clientv3.KV) ([]byte, error) {
+	return nil, nil
+}
+
+// Issue will generate a key and certificate that is signed by this
+// Certificate, if the Certificate is a CA.
+func (c *Certificate) Issue(ctx context.Context, req *Certificate, kv clientv3.KV) (cert, key []byte, err error) {
+	caCert, caKey, err := c.ensure(ctx, kv)
+	if err != nil {
+		return nil, nil, fmt.Errorf("could not ensure CA certificate %q exists: %w", c.name, err)
+	}
+
+	ca, err := x509.ParseCertificate(caCert)
+	if err != nil {
+		return nil, nil, fmt.Errorf("could not parse CA certificate: %w", err)
+	}
+	// Ensure only one level of CAs exist, and that they are created explicitly.
+	req.template.IsCA = false
+	return issueCertificate(req, ca, ed25519.PrivateKey(caKey))
+}
+
+// CACertificate returns the DER encoded x509 form of this Certificate that
+// will be the used to issue child certificates.
+func (c *Certificate) CACertificate(ctx context.Context, kv clientv3.KV) ([]byte, error) {
+	cert, _, err := c.ensure(ctx, kv)
+	return cert, err
+}
diff --git a/metropolis/pkg/pki/certificate.go b/metropolis/pkg/pki/certificate.go
new file mode 100644
index 0000000..ff60f73
--- /dev/null
+++ b/metropolis/pkg/pki/certificate.go
@@ -0,0 +1,283 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pki
+
+import (
+	"context"
+	"crypto/ed25519"
+	"crypto/x509"
+	"crypto/x509/pkix"
+	"encoding/pem"
+	"fmt"
+	"net"
+
+	"go.etcd.io/etcd/clientv3"
+
+	"source.monogon.dev/metropolis/pkg/fileargs"
+)
+
+// Namespace represents some path in etcd where certificate/CA data will be
+// stored. Creating a namespace via Namespaced then permits the consumer of
+// this library to start creating certificates within this namespace.
+type Namespace struct {
+	prefix string
+}
+
+// Namespaced creates a namespace for storing certificate data in etcd at a given 'path' prefix.
+func Namespaced(prefix string) Namespace {
+	return Namespace{
+		prefix: prefix,
+	}
+}
+
+// Certificate is the promise of a Certificate being available to the caller.
+// In this case, Certificate refers to a pair of x509 certificate and
+// corresponding private key.  Certificates can be stored in etcd, and their
+// issuers might also be store on etcd. As such, this type's methods contain
+// references to an etcd KV client.  This Certificate type is agnostic to
+// usage, but mostly geared towards Kubernetes certificates.
+type Certificate struct {
+	namespace *Namespace
+
+	// issuer is the Issuer that will generate this certificate if one doesn't
+	// yet exist or etcd, or the requested certificate is volatile (not to be
+	// stored on etcd).
+	Issuer Issuer
+	// name is a unique key for storing the certificate in etcd. If empty,
+	// certificate is 'volatile', will not be stored on etcd, and every
+	// .Ensure() call will generate a new pair.
+	name string
+	// template is an x509 certificate definition that will be used to generate
+	// the certificate when issuing it.
+	template x509.Certificate
+	// key is the private key for which the certificate should emitted, or nil
+	// if the key should be generated. The private key is required (vs. the
+	// private one) because the Certificate might be attempted to be issued via
+	// self-signing.
+	key ed25519.PrivateKey
+}
+
+func (n *Namespace) etcdPath(f string, args ...interface{}) string {
+	return n.prefix + fmt.Sprintf(f, args...)
+}
+
+// New creates a new Certificate, or to be more precise, a promise that a
+// certificate will exist once Ensure is called.  Issuer must be a valid
+// certificate issuer (SelfSigned or another Certificate). Name must be unique
+// among all certificates, or empty (which will cause the certificate to be
+// volatile, ie. not stored in etcd).
+func (n *Namespace) New(issuer Issuer, name string, template x509.Certificate) *Certificate {
+	return &Certificate{
+		namespace: n,
+		Issuer:    issuer,
+		name:      name,
+		template:  template,
+	}
+}
+
+// Client makes a Kubernetes PKI-compatible client certificate template.
+// Directly derived from Kubernetes PKI requirements documented at
+// https://kubernetes.io/docs/setup/best-practices/certificates/#configure-certificates-manually
+func Client(identity string, groups []string) x509.Certificate {
+	return x509.Certificate{
+		Subject: pkix.Name{
+			CommonName:   identity,
+			Organization: groups,
+		},
+		KeyUsage:    x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
+		ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
+	}
+}
+
+// Server makes a Kubernetes PKI-compatible server certificate template.
+func Server(dnsNames []string, ips []net.IP) x509.Certificate {
+	return x509.Certificate{
+		Subject:     pkix.Name{},
+		KeyUsage:    x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
+		ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
+		DNSNames:    dnsNames,
+		IPAddresses: ips,
+	}
+}
+
+// CA makes a Certificate that can sign other certificates.
+func CA(cn string) x509.Certificate {
+	return x509.Certificate{
+		Subject: pkix.Name{
+			CommonName: cn,
+		},
+		IsCA:        true,
+		KeyUsage:    x509.KeyUsageCertSign | x509.KeyUsageCRLSign | x509.KeyUsageDigitalSignature,
+		ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageOCSPSigning},
+	}
+}
+
+func (c *Certificate) etcdPaths() (cert, key string) {
+	return c.namespace.etcdPath("%s-cert.der", c.name), c.namespace.etcdPath("%s-key.der", c.name)
+}
+
+func (c *Certificate) UseExistingKey(key ed25519.PrivateKey) {
+	c.key = key
+}
+
+// ensure returns a DER-encoded x509 certificate and internally encoded bare
+// ed25519 key for a given Certificate, in memory (if volatile), loading it
+// from etcd, or creating and saving it on etcd if needed.
+// This function is safe to call in parallel from multiple etcd clients
+// (including across machines), but it will error in case a concurrent
+// certificate generation happens. These errors are, however, safe to retry -
+// as long as all the certificate creators (ie., Metropolis nodes) run the same
+// version of this code.
+//
+// TODO(q3k): in the future, this should be handled better - especially as we
+// introduce new certificates, or worse, change the issuance chain. As a
+// stopgap measure, an explicit per-certificate or even global lock can be
+// implemented.  And, even before that, we can handle concurrency errors in a
+// smarter way.
+func (c *Certificate) ensure(ctx context.Context, kv clientv3.KV) (cert, key []byte, err error) {
+	if c.name == "" {
+		// Volatile certificate - generate.
+		// TODO(q3k): cache internally?
+		cert, key, err = c.Issuer.Issue(ctx, c, kv)
+		if err != nil {
+			err = fmt.Errorf("failed to issue: %w", err)
+			return
+		}
+		return
+	}
+
+	certPath, keyPath := c.etcdPaths()
+
+	// Try loading certificate and key from etcd.
+	certRes, err := kv.Get(ctx, certPath)
+	if err != nil {
+		err = fmt.Errorf("failed to get certificate from etcd: %w", err)
+		return
+	}
+	keyRes, err := kv.Get(ctx, keyPath)
+	if err != nil {
+		err = fmt.Errorf("failed to get key from etcd: %w", err)
+		return
+	}
+
+	if len(certRes.Kvs) == 1 && len(keyRes.Kvs) == 1 {
+		// Certificate and key exists in etcd, return that.
+		cert = certRes.Kvs[0].Value
+		key = keyRes.Kvs[0].Value
+
+		err = nil
+		// TODO(q3k): check for expiration
+		return
+	}
+
+	// No certificate found - issue one.
+	cert, key, err = c.Issuer.Issue(ctx, c, kv)
+	if err != nil {
+		err = fmt.Errorf("failed to issue: %w", err)
+		return
+	}
+
+	// Save to etcd in transaction. This ensures that no partial writes happen,
+	// and that we haven't been raced to the save.
+	res, err := kv.Txn(ctx).
+		If(
+			clientv3.Compare(clientv3.CreateRevision(certPath), "=", 0),
+			clientv3.Compare(clientv3.CreateRevision(keyPath), "=", 0),
+		).
+		Then(
+			clientv3.OpPut(certPath, string(cert)),
+			clientv3.OpPut(keyPath, string(key)),
+		).Commit()
+	if err != nil {
+		err = fmt.Errorf("failed to write newly issued certificate: %w", err)
+	} else if !res.Succeeded {
+		err = fmt.Errorf("certificate issuance transaction failed: concurrent write")
+	}
+
+	return
+}
+
+// Ensure returns an x509 DER-encoded (but not PEM-encoded) certificate and key
+// for a given Certificate.  If the certificate is volatile, each call to
+// Ensure will cause a new certificate to be generated.  Otherwise, it will be
+// retrieved from etcd, or generated and stored there if needed.
+func (c *Certificate) Ensure(ctx context.Context, kv clientv3.KV) (cert, key []byte, err error) {
+	cert, key, err = c.ensure(ctx, kv)
+	if err != nil {
+		return nil, nil, err
+	}
+	key, err = x509.MarshalPKCS8PrivateKey(ed25519.PrivateKey(key))
+	if err != nil {
+		err = fmt.Errorf("could not marshal private key (data corruption?): %w", err)
+		return
+	}
+	return cert, key, err
+}
+
+// FilesystemCertificate is a fileargs.FileArgs wrapper which will contain PEM
+// encoded certificate material when Mounted. This construct is useful when
+// dealing with services that want to access etcd-backed certificates as files
+// available locally.
+// Paths to the available files are considered opaque and should not be leaked
+// outside of the struct. Further restrictions on access to these files might
+// be imposed in the future.
+type FilesystemCertificate struct {
+	*fileargs.FileArgs
+	// CACertPath is the full path at which the CA certificate is available.
+	// Read only.
+	CACertPath string
+	// CertPath is the full path at which the certificate is available. Read
+	// only.
+	CertPath string
+	// KeyPath is the full path at which the key is available. Read only.
+	KeyPath string
+}
+
+// Mount returns a locally mounted FilesystemCertificate for this Certificate,
+// which allows services to access this Certificate via local filesystem
+// access.
+// The embeded fileargs.FileArgs can also be used to add additional file-backed
+// data under the same mount by calling ArgPath.
+// The returned FilesystemCertificate must be Closed in order to prevent a
+// system mount leak.
+func (c *Certificate) Mount(ctx context.Context, kv clientv3.KV) (*FilesystemCertificate, error) {
+	fa, err := fileargs.New()
+	if err != nil {
+		return nil, fmt.Errorf("when creating fileargs mount: %w", err)
+	}
+	fs := &FilesystemCertificate{FileArgs: fa}
+
+	cert, key, err := c.Ensure(ctx, kv)
+	if err != nil {
+		return nil, fmt.Errorf("when issuing certificate: %w", err)
+	}
+
+	cacert, err := c.Issuer.CACertificate(ctx, kv)
+	if err != nil {
+		return nil, fmt.Errorf("when getting issuer CA: %w", err)
+	}
+	// cacert will be null if this is a self-signed certificate.
+	if cacert == nil {
+		cacert = cert
+	}
+
+	fs.CACertPath = fs.ArgPath("ca.crt", pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: cacert}))
+	fs.CertPath = fs.ArgPath("tls.crt", pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: cert}))
+	fs.KeyPath = fs.ArgPath("tls.key", pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: key}))
+
+	return fs, nil
+}
diff --git a/metropolis/pkg/pki/doc.go b/metropolis/pkg/pki/doc.go
new file mode 100644
index 0000000..9174b0f
--- /dev/null
+++ b/metropolis/pkg/pki/doc.go
@@ -0,0 +1,52 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// package pki implements an x509 PKI (Public Key Infrastructure) system backed
+// on etcd.
+//
+// The PKI is made of Certificates, all constrained within a Namespace. The
+// Namespace allows for multiple users of this library to co-exist on a single
+// etcd server.
+//
+// Any time a Certificate object is created, it describes the promise (or
+// intent) of an x509 certificate to exist. For every created Certifiacte an
+// Issuer must be specified - either another Certificate (which will act as a
+// CA and sign that Certificate), or SelfSigned (which will cause the
+// Certificate to be self-signed when generated).
+//
+// Once a Certificate object is created, a call to Ensure() must be placed to
+// turn the intent of a certificate into physical bytes that can then be
+// accessed by the appliaction.
+//
+// Two kinds of Certificates can be created:
+//  - Named certificates are stored in etcd, and an Ensure call will either
+//    create them, or return a Certificate already stored in etcd. Multiple
+//    concurrent calls to Ensure for a Certificate with the same name are
+//    permitted, even across machines, as long as the Certificate intent data
+//    is the same. If not, it is still safe to perform this action
+//    concurrently, but the first transaction will win, causing the losing
+//    transaction to return the Ensure call with a certificate that was not
+//    based on the same intent.
+//    It is the responsibility of the caller to ensure these cases are handled
+//    gracefully.
+//  - Volatile certificates are stored in memory, and have an empty ("") name.
+//    Any time Ensure is called, the certificate already present in memory is
+//    returned, or one is created if it does not yet exist.
+//    Currently, these certificates live fully in memory, but in the future we
+//    will likely perform audit logging (and revocation) of these certificate
+//    within etcd, too.
+//
+package pki
diff --git a/metropolis/pkg/pki/x509.go b/metropolis/pkg/pki/x509.go
new file mode 100644
index 0000000..d2affe8
--- /dev/null
+++ b/metropolis/pkg/pki/x509.go
@@ -0,0 +1,57 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pki
+
+import (
+	"crypto"
+	"crypto/sha1"
+	"crypto/x509"
+	"crypto/x509/pkix"
+	"encoding/asn1"
+	"time"
+)
+
+var (
+	// From RFC 5280 Section 4.1.2.5
+	unknownNotAfter = time.Unix(253402300799, 0)
+)
+
+// Workaround for https://github.com/golang/go/issues/26676 in Go's crypto/x509. Specifically Go
+// violates Section 4.2.1.2 of RFC 5280 without this.
+// Fixed for 1.15 in https://go-review.googlesource.com/c/go/+/227098/.
+//
+// Taken from https://github.com/FiloSottile/mkcert/blob/master/cert.go#L295 written by one of Go's
+// crypto engineers
+//
+// TODO(lorenz): remove this once we migrate to Go 1.15.
+func calculateSKID(pubKey crypto.PublicKey) ([]byte, error) {
+	spkiASN1, err := x509.MarshalPKIXPublicKey(pubKey)
+	if err != nil {
+		return nil, err
+	}
+
+	var spki struct {
+		Algorithm        pkix.AlgorithmIdentifier
+		SubjectPublicKey asn1.BitString
+	}
+	_, err = asn1.Unmarshal(spkiASN1, &spki)
+	if err != nil {
+		return nil, err
+	}
+	skid := sha1.Sum(spki.SubjectPublicKey.Bytes)
+	return skid[:], nil
+}