m/p/pki: implement CRLs

This implements revokation and CRL watching functionality in the main
metropolis PKI library, in preparation for use in the consensus library
(which has full CRL support). In the future, this should also be
extended to be used in Metropolis authentication/authorization.

This also introduces a breaking change by changing the layout of etcd
storage for the PKI library - but we're pre-MVP, so this is fine.

Change-Id: If0775f5447a76949d8498d8853dd7b9c03e0e6dc
Reviewed-on: https://review.monogon.dev/c/monogon/+/465
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/pkg/pki/BUILD.bazel b/metropolis/pkg/pki/BUILD.bazel
index c215ce2..04f02ae 100644
--- a/metropolis/pkg/pki/BUILD.bazel
+++ b/metropolis/pkg/pki/BUILD.bazel
@@ -5,11 +5,15 @@
     srcs = [
         "ca.go",
         "certificate.go",
+        "crl.go",
         "x509.go",
     ],
     importpath = "source.monogon.dev/metropolis/pkg/pki",
     visibility = ["//visibility:public"],
     deps = [
+        "//metropolis/node/core/consensus/client:go_default_library",
+        "//metropolis/pkg/event:go_default_library",
+        "//metropolis/pkg/event/etcd:go_default_library",
         "//metropolis/pkg/fileargs:go_default_library",
         "@io_etcd_go_etcd//clientv3:go_default_library",
     ],
@@ -17,7 +21,13 @@
 
 go_test(
     name = "go_default_test",
-    srcs = ["certificate_test.go"],
+    srcs = [
+        "certificate_test.go",
+        "crl_test.go",
+    ],
     embed = [":go_default_library"],
-    deps = ["@io_etcd_go_etcd//integration:go_default_library"],
+    deps = [
+        "//metropolis/node/core/consensus/client:go_default_library",
+        "@io_etcd_go_etcd//integration:go_default_library",
+    ],
 )
diff --git a/metropolis/pkg/pki/certificate.go b/metropolis/pkg/pki/certificate.go
index e7788b1..f6d480a 100644
--- a/metropolis/pkg/pki/certificate.go
+++ b/metropolis/pkg/pki/certificate.go
@@ -185,7 +185,7 @@
 		}
 	}
 
-	certPath := c.Namespace.etcdPath("%s-cert.der", c.Name)
+	certPath := c.Namespace.etcdPath("issued/%s-cert.der", c.Name)
 
 	// Try loading certificate from etcd.
 	certRes, err := kv.Get(ctx, certPath)
@@ -273,7 +273,7 @@
 	}
 
 	// First, try loading.
-	privPath := c.Namespace.etcdPath("%s-privkey.bin", c.Name)
+	privPath := c.Namespace.etcdPath("keys/%s-privkey.bin", c.Name)
 	privRes, err := kv.Get(ctx, privPath)
 	if err != nil {
 		return fmt.Errorf("failed to get private key from etcd: %w", err)
@@ -307,6 +307,24 @@
 		return fmt.Errorf("key generation transaction failed: concurrent write")
 	}
 
+	crlPath := c.crlPath()
+	emptyCRL, err := c.makeCRL(ctx, kv, nil)
+	if err != nil {
+		return fmt.Errorf("failed to generate empty CRL: %w", err)
+	}
+
+	// Also attempt to emit an empty CRL if one doesn't exist yet.
+	_, err = kv.Txn(ctx).
+		If(
+			clientv3.Compare(clientv3.CreateRevision(crlPath), "=", 0),
+		).
+		Then(
+			clientv3.OpPut(crlPath, string(emptyCRL)),
+		).Commit()
+	if err != nil {
+		return fmt.Errorf("failed to upsert empty CRL")
+	}
+
 	c.PrivateKey = priv
 	c.PublicKey = pub
 	return nil
diff --git a/metropolis/pkg/pki/crl.go b/metropolis/pkg/pki/crl.go
new file mode 100644
index 0000000..2627776
--- /dev/null
+++ b/metropolis/pkg/pki/crl.go
@@ -0,0 +1,184 @@
+package pki
+
+import (
+	"context"
+	"crypto/rand"
+	"crypto/x509"
+	"crypto/x509/pkix"
+	"fmt"
+	"math/big"
+	"time"
+
+	"go.etcd.io/etcd/clientv3"
+
+	"source.monogon.dev/metropolis/node/core/consensus/client"
+	"source.monogon.dev/metropolis/pkg/event"
+	"source.monogon.dev/metropolis/pkg/event/etcd"
+)
+
+// crlPath returns the etcd path under which the marshaled X.509 Certificate
+// Revocation List is stored.
+//
+// TODO(q3k): use etcd keyspace API from
+func (c *Certificate) crlPath() string {
+	return c.Namespace.etcdPath("%s-crl.der", c.Name)
+}
+
+// Revoke performs a CRL-based revocation of a given certificate by this CA,
+// looking it up by DNS name. The revocation is immediately written to the
+// backing etcd store and will be available to consumers through the WatchCRL
+// API.
+//
+// An error is returned if the CRL could not be emitted (eg. due to an etcd
+// communication error, a conflicting CRL write) or if the given hostname
+// matches no emitted certificate.
+//
+// Only Managed and External certificates can be revoked.
+func (c Certificate) Revoke(ctx context.Context, kv clientv3.KV, hostname string) error {
+	crlPath := c.crlPath()
+	issuedCerts := c.Namespace.etcdPath("issued/")
+
+	res, err := kv.Txn(ctx).Then(
+		clientv3.OpGet(crlPath),
+		clientv3.OpGet(issuedCerts, clientv3.WithPrefix())).Commit()
+	if err != nil {
+		return fmt.Errorf("failed to retrieve certificates and CRL from etcd: %w", err)
+	}
+
+	// Parse certs, CRL and CRL revision from state.
+	var certs []*x509.Certificate
+	var crlRevision int64
+	var crl *pkix.CertificateList
+	for _, el := range res.Responses {
+		for _, kv := range el.GetResponseRange().GetKvs() {
+			if string(kv.Key) == crlPath {
+				crl, err = x509.ParseCRL(kv.Value)
+				if err != nil {
+					return fmt.Errorf("could not parse CRL from etcd: %w", err)
+				}
+				crlRevision = kv.CreateRevision
+			} else {
+				cert, err := x509.ParseCertificate(kv.Value)
+				if err != nil {
+					return fmt.Errorf("could not parse certificate %q from etcd: %w", string(kv.Key), err)
+				}
+				certs = append(certs, cert)
+			}
+		}
+	}
+	if crl == nil {
+		return fmt.Errorf("could not find CRL in etcd")
+	}
+	revoked := crl.TBSCertList.RevokedCertificates
+
+	// Find requested hostname in issued certificates.
+	var serial *big.Int
+	for _, cert := range certs {
+		for _, dnsName := range cert.DNSNames {
+			if dnsName == hostname {
+				serial = cert.SerialNumber
+				break
+			}
+		}
+		if serial != nil {
+			break
+		}
+	}
+	if serial == nil {
+		return fmt.Errorf("could not find requested hostname")
+	}
+
+	// Check if certificate has already been revoked.
+	for _, revokedCert := range revoked {
+		if revokedCert.SerialNumber.Cmp(serial) == 0 {
+			return nil // Already revoked
+		}
+	}
+
+	// Otherwise, revoke and save new CRL.
+	revoked = append(revoked, pkix.RevokedCertificate{
+		SerialNumber:   serial,
+		RevocationTime: time.Now(),
+	})
+
+	crlRaw, err := c.makeCRL(ctx, kv, revoked)
+	if err != nil {
+		return fmt.Errorf("when generating new CRL for revocation: %w", err)
+	}
+
+	res, err = kv.Txn(ctx).If(
+		clientv3.Compare(clientv3.CreateRevision(crlPath), "=", crlRevision),
+	).Then(
+		clientv3.OpPut(crlPath, string(crlRaw)),
+	).Commit()
+	if err != nil {
+		return fmt.Errorf("when saving new CRL: %w", err)
+	}
+	if !res.Succeeded {
+		return fmt.Errorf("CRL save transaction failed, retry possible")
+	}
+
+	return nil
+}
+
+// makeCRL returns a valid CRL for a given list of certificates to be revoked.
+// The given etcd client is used to ensure this CA certificate exists in etcd,
+// but is not used to write any CRL to etcd.
+func (c *Certificate) makeCRL(ctx context.Context, kv clientv3.KV, revoked []pkix.RevokedCertificate) ([]byte, error) {
+	if c.Mode != CertificateManaged {
+		return nil, fmt.Errorf("only managed certificates can issue CRLs")
+	}
+	certBytes, err := c.ensure(ctx, kv)
+	if err != nil {
+		return nil, fmt.Errorf("when ensuring certificate: %w", err)
+	}
+	cert, err := x509.ParseCertificate(certBytes)
+	if err != nil {
+		return nil, fmt.Errorf("when parsing issuing certificate: %w", err)
+	}
+	crl, err := cert.CreateCRL(rand.Reader, c.PrivateKey, revoked, time.Now(), UnknownNotAfter)
+	if err != nil {
+		return nil, fmt.Errorf("failed to generate CRL: %w", err)
+	}
+	return crl, nil
+}
+
+// WatchCRL returns and Event Value compatible CRLWatcher which can be used to
+// retrieve and watch for the newest CRL available from this CA certificate.
+func (c *Certificate) WatchCRL(cl client.Namespaced) CRLWatcher {
+	value := etcd.NewValue(cl, c.crlPath(), func(_, data []byte) (interface{}, error) {
+		crl, err := x509.ParseCRL(data)
+		if err != nil {
+			return nil, fmt.Errorf("could not parse CRL from etcd: %w", err)
+		}
+		return &CRL{
+			Raw:  data,
+			List: crl,
+		}, nil
+	})
+	return CRLWatcher{value.Watch()}
+}
+
+// CRLWatcher is a Event Value compatible Watcher which will be updated any time
+// a given CA certificate's CRL gets updated.
+type CRLWatcher struct {
+	event.Watcher
+}
+
+type CRL struct {
+	Raw  []byte
+	List *pkix.CertificateList
+}
+
+// Retrieve the newest available CRL from etcd, blocking until one is available
+// or updated.
+//
+// The first call will block until a CRL is available, which happens the first
+// time a given CA certificate is stored in etcd (eg. through an Ensure call).
+func (c *CRLWatcher) Get(ctx context.Context, opts ...event.GetOption) (*CRL, error) {
+	v, err := c.Watcher.Get(ctx, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return v.(*CRL), nil
+}
diff --git a/metropolis/pkg/pki/crl_test.go b/metropolis/pkg/pki/crl_test.go
new file mode 100644
index 0000000..39a0b0e
--- /dev/null
+++ b/metropolis/pkg/pki/crl_test.go
@@ -0,0 +1,139 @@
+package pki
+
+import (
+	"context"
+	"crypto/x509"
+	"testing"
+
+	"go.etcd.io/etcd/integration"
+
+	"source.monogon.dev/metropolis/node/core/consensus/client"
+)
+
+// TestRevoke exercises the CRL revocation and watching functionality of a CA
+// certificate.
+func TestRevoke(t *testing.T) {
+	cluster := integration.NewClusterV3(nil, &integration.ClusterConfig{
+		Size: 1,
+	})
+	cl := client.NewLocal(cluster.Client(0))
+	defer cluster.Terminate(nil)
+	ctx, ctxC := context.WithCancel(context.Background())
+	defer ctxC()
+	ns := Namespaced("/test-managed/")
+
+	ca := &Certificate{
+		Namespace: &ns,
+		Issuer:    SelfSigned,
+		Name:      "ca",
+		Template:  CA("Test CA"),
+	}
+	sub := &Certificate{
+		Namespace: &ns,
+		Issuer:    ca,
+		Name:      "sub",
+		Template:  Server([]string{"server"}, nil),
+	}
+
+	caCertBytes, err := ca.Ensure(ctx, cl)
+	if err != nil {
+		t.Fatalf("Ensuring ca certificate failed: %v", err)
+	}
+	caCert, err := x509.ParseCertificate(caCertBytes)
+	if err != nil {
+		t.Fatalf("Loading newly emitted CA certificate failed: %v", err)
+	}
+
+	subCertBytes, err := sub.Ensure(ctx, cl)
+	if err != nil {
+		t.Fatalf("Ensuring sub certificate failed: %v", err)
+	}
+	subCert, err := x509.ParseCertificate(subCertBytes)
+	if err != nil {
+		t.Fatalf("Loading newly emitted sub certificate failed: %v", err)
+	}
+
+	// Ensure CRL is correctly signed and that subCert is not yet on it.
+	crlW := ca.WatchCRL(cl)
+	crl, err := crlW.Get(ctx)
+	if err != nil {
+		t.Fatalf("Retrieving initial CRL failed: %v", err)
+	}
+	if err := caCert.CheckCRLSignature(crl.List); err != nil {
+		t.Fatalf("Initial CRL not signed by CA: %v", err)
+	}
+	for _, el := range crl.List.TBSCertList.RevokedCertificates {
+		if el.SerialNumber.Cmp(subCert.SerialNumber) == 0 {
+			t.Fatalf("Newly emitted certificate is already on CRL.")
+		}
+	}
+
+	// Emit yet another certificate. Also shouldn't be on CRL.
+	bad := &Certificate{
+		Namespace: &ns,
+		Issuer:    ca,
+		Name:      "bad",
+		Template:  Server([]string{"badserver"}, nil),
+	}
+	badCertBytes, err := bad.Ensure(ctx, cl)
+	if err != nil {
+		t.Fatalf("Ensuring bad certificate failed: %v", err)
+	}
+	badCert, err := x509.ParseCertificate(badCertBytes)
+	if err != nil {
+		t.Fatalf("Loading newly emitted bad certificate failed: %v", err)
+	}
+	for _, el := range crl.List.TBSCertList.RevokedCertificates {
+		if el.SerialNumber.Cmp(badCert.SerialNumber) == 0 {
+			t.Fatalf("Newly emitted bad certificate is already on CRL.")
+		}
+	}
+
+	// Revoke bad certificate. Should now be present in CRL.
+	if err := ca.Revoke(ctx, cl, "badserver"); err != nil {
+		t.Fatalf("Revoke failed: %v", err)
+	}
+	// Get in a loop until found.
+	for {
+		crl, err = crlW.Get(ctx)
+		if err != nil {
+			t.Fatalf("Get failed: %v", err)
+		}
+		found := false
+		for _, el := range crl.List.TBSCertList.RevokedCertificates {
+			if el.SerialNumber.Cmp(badCert.SerialNumber) == 0 {
+				found = true
+			}
+			if el.SerialNumber.Cmp(subCert.SerialNumber) == 0 {
+				t.Errorf("Found non-revoked cert in CRL")
+			}
+		}
+		if found {
+			break
+		}
+	}
+	// Now revoke first certificate. Both should be now present in CRL.
+	if err := ca.Revoke(ctx, cl, "server"); err != nil {
+		t.Fatalf("Revoke failed: %v", err)
+	}
+	// Get in a loop until found.
+	for {
+		crl, err = crlW.Get(ctx)
+		if err != nil {
+			t.Fatalf("Get failed: %v", err)
+		}
+		foundSub := false
+		foundBad := false
+		for _, el := range crl.List.TBSCertList.RevokedCertificates {
+			if el.SerialNumber.Cmp(badCert.SerialNumber) == 0 {
+				foundBad = true
+			}
+			if el.SerialNumber.Cmp(subCert.SerialNumber) == 0 {
+				foundSub = true
+			}
+		}
+		if foundBad && foundSub {
+			break
+		}
+	}
+}