core -> metropolis
Smalltown is now called Metropolis!
This is the first commit in a series of cleanup commits that prepare us
for an open source release. This one just some Bazel packages around to
follow a stricter directory layout.
All of Metropolis now lives in `//metropolis`.
All of Metropolis Node code now lives in `//metropolis/node`.
All of the main /init now lives in `//m/n/core`.
All of the Kubernetes functionality/glue now lives in `//m/n/kubernetes`.
Next steps:
- hunt down all references to Smalltown and replace them appropriately
- narrow down visibility rules
- document new code organization
- move `//build/toolchain` to `//monogon/build/toolchain`
- do another cleanup pass between `//golibs` and
`//monogon/node/{core,common}`.
- remove `//delta` and `//anubis`
Fixes T799.
Test Plan: Just a very large refactor. CI should help us out here.
Bug: T799
X-Origin-Diff: phab/D667
GitOrigin-RevId: 6029b8d4edc42325d50042596b639e8b122d0ded
diff --git a/metropolis/node/core/consensus/BUILD.bazel b/metropolis/node/core/consensus/BUILD.bazel
new file mode 100644
index 0000000..cab2c0a
--- /dev/null
+++ b/metropolis/node/core/consensus/BUILD.bazel
@@ -0,0 +1,30 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["consensus.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/core/consensus",
+ visibility = ["//:__subpackages__"],
+ deps = [
+ "//metropolis/node:go_default_library",
+ "//metropolis/node/common/supervisor:go_default_library",
+ "//metropolis/node/core/consensus/ca:go_default_library",
+ "//metropolis/node/core/localstorage:go_default_library",
+ "@io_etcd_go_etcd//clientv3:go_default_library",
+ "@io_etcd_go_etcd//clientv3/namespace:go_default_library",
+ "@io_etcd_go_etcd//embed:go_default_library",
+ "@org_uber_go_atomic//:go_default_library",
+ ],
+)
+
+go_test(
+ name = "go_default_test",
+ srcs = ["consensus_test.go"],
+ embed = [":go_default_library"],
+ deps = [
+ "//golibs/common:go_default_library",
+ "//metropolis/node/common/supervisor:go_default_library",
+ "//metropolis/node/core/localstorage:go_default_library",
+ "//metropolis/node/core/localstorage/declarative:go_default_library",
+ ],
+)
diff --git a/metropolis/node/core/consensus/ca/BUILD.bazel b/metropolis/node/core/consensus/ca/BUILD.bazel
new file mode 100644
index 0000000..fecffa0
--- /dev/null
+++ b/metropolis/node/core/consensus/ca/BUILD.bazel
@@ -0,0 +1,9 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["ca.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/core/consensus/ca",
+ visibility = ["//:__subpackages__"],
+ deps = ["@io_etcd_go_etcd//clientv3:go_default_library"],
+)
diff --git a/metropolis/node/core/consensus/ca/ca.go b/metropolis/node/core/consensus/ca/ca.go
new file mode 100644
index 0000000..9a1b634
--- /dev/null
+++ b/metropolis/node/core/consensus/ca/ca.go
@@ -0,0 +1,440 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// package ca implements a simple standards-compliant certificate authority.
+// It only supports ed25519 keys, and does not maintain any persistent state.
+//
+// The CA is backed by etcd storage, and can also bootstrap itself without a yet running etcd storage (and commit
+// in-memory secrets to etcd at a later date).
+//
+// CA and certificates successfully pass https://github.com/zmap/zlint
+// (minus the CA/B rules that a public CA would adhere to, which requires
+// things like OCSP servers, Certificate Policies and ECDSA/RSA-only keys).
+package ca
+
+// TODO(leo): add zlint test
+
+import (
+ "context"
+ "crypto"
+ "crypto/ed25519"
+ "crypto/rand"
+ "crypto/sha1"
+ "crypto/x509"
+ "crypto/x509/pkix"
+ "encoding/asn1"
+ "encoding/hex"
+ "errors"
+ "fmt"
+ "math/big"
+ "net"
+ "time"
+
+ "go.etcd.io/etcd/clientv3"
+)
+
+const (
+ // TODO(q3k): move this to a declarative storage layer
+ pathCACertificate = "/etcd-ca/ca.der"
+ pathCAKey = "/etcd-ca/ca-key.der"
+ pathCACRL = "/etcd-ca/crl.der"
+ pathIssuedCertificates = "/etcd-ca/certs/"
+)
+
+func pathIssuedCertificate(serial *big.Int) string {
+ return pathIssuedCertificates + hex.EncodeToString(serial.Bytes())
+}
+
+var (
+ // From RFC 5280 Section 4.1.2.5
+ unknownNotAfter = time.Unix(253402300799, 0)
+)
+
+type CA struct {
+ // TODO: Potentially protect the key with memguard
+ privateKey *ed25519.PrivateKey
+ CACert *x509.Certificate
+ CACertRaw []byte
+
+ // bootstrapIssued are certificates that have been issued by the CA before it has been successfully Saved to etcd.
+ bootstrapIssued [][]byte
+ // canBootstrapIssue is set on CAs that have been created by New and not yet stored to etcd. If not set,
+ // certificates cannot be issued in-memory.
+ canBootstrapIssue bool
+}
+
+// Workaround for https://github.com/golang/go/issues/26676 in Go's crypto/x509. Specifically Go
+// violates Section 4.2.1.2 of RFC 5280 without this.
+// Fixed for 1.15 in https://go-review.googlesource.com/c/go/+/227098/.
+//
+// Taken from https://github.com/FiloSottile/mkcert/blob/master/cert.go#L295 written by one of Go's
+// crypto engineers (BSD 3-clause).
+func calculateSKID(pubKey crypto.PublicKey) ([]byte, error) {
+ spkiASN1, err := x509.MarshalPKIXPublicKey(pubKey)
+ if err != nil {
+ return nil, err
+ }
+
+ var spki struct {
+ Algorithm pkix.AlgorithmIdentifier
+ SubjectPublicKey asn1.BitString
+ }
+ _, err = asn1.Unmarshal(spkiASN1, &spki)
+ if err != nil {
+ return nil, err
+ }
+ skid := sha1.Sum(spki.SubjectPublicKey.Bytes)
+ return skid[:], nil
+}
+
+// New creates a new certificate authority with the given common name. The newly created CA will be stored in memory
+// until committed to etcd by calling .Save.
+func New(name string) (*CA, error) {
+ pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ panic(err)
+ }
+
+ serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127)
+ serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
+ if err != nil {
+ return nil, fmt.Errorf("failed to generate serial number: %w", err)
+ }
+
+ skid, err := calculateSKID(pubKey)
+ if err != nil {
+ return nil, err
+ }
+
+ caCert := &x509.Certificate{
+ SerialNumber: serialNumber,
+ Subject: pkix.Name{
+ CommonName: name,
+ },
+ IsCA: true,
+ BasicConstraintsValid: true,
+ NotBefore: time.Now(),
+ NotAfter: unknownNotAfter,
+ KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign | x509.KeyUsageDigitalSignature,
+ ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageOCSPSigning},
+ AuthorityKeyId: skid,
+ SubjectKeyId: skid,
+ }
+
+ caCertRaw, err := x509.CreateCertificate(rand.Reader, caCert, caCert, pubKey, privKey)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create root certificate: %w", err)
+ }
+
+ ca := &CA{
+ privateKey: &privKey,
+ CACertRaw: caCertRaw,
+ CACert: caCert,
+
+ canBootstrapIssue: true,
+ }
+
+ return ca, nil
+}
+
+// Load restores CA state from etcd.
+func Load(ctx context.Context, kv clientv3.KV) (*CA, error) {
+ resp, err := kv.Txn(ctx).Then(
+ clientv3.OpGet(pathCACertificate),
+ clientv3.OpGet(pathCAKey),
+ // We only read the CRL to ensure it exists on etcd (and early fail on inconsistency)
+ clientv3.OpGet(pathCACRL)).Commit()
+ if err != nil {
+ return nil, fmt.Errorf("failed to retrieve CA from etcd: %w", err)
+ }
+
+ var caCert, caKey, caCRL []byte
+ for _, el := range resp.Responses {
+ for _, kv := range el.GetResponseRange().GetKvs() {
+ switch string(kv.Key) {
+ case pathCACertificate:
+ caCert = kv.Value
+ case pathCAKey:
+ caKey = kv.Value
+ case pathCACRL:
+ caCRL = kv.Value
+ }
+ }
+ }
+ if caCert == nil || caKey == nil || caCRL == nil {
+ return nil, fmt.Errorf("failed to retrieve CA from etcd, missing at least one of {ca key, ca crt, ca crl}")
+ }
+
+ if len(caKey) != ed25519.PrivateKeySize {
+ return nil, errors.New("invalid CA private key size")
+ }
+ privateKey := ed25519.PrivateKey(caKey)
+
+ caCertVal, err := x509.ParseCertificate(caCert)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse CA certificate: %w", err)
+ }
+ return &CA{
+ privateKey: &privateKey,
+ CACertRaw: caCert,
+ CACert: caCertVal,
+ }, nil
+}
+
+// Save stores a newly created CA into etcd, committing both the CA data and any certificates issued until then.
+func (c *CA) Save(ctx context.Context, kv clientv3.KV) error {
+ crl, err := c.makeCRL(nil)
+ if err != nil {
+ return fmt.Errorf("failed to generate initial CRL: %w", err)
+ }
+
+ ops := []clientv3.Op{
+ clientv3.OpPut(pathCACertificate, string(c.CACertRaw)),
+ clientv3.OpPut(pathCAKey, string([]byte(*c.privateKey))),
+ clientv3.OpPut(pathCACRL, string(crl)),
+ }
+ for i, certRaw := range c.bootstrapIssued {
+ cert, err := x509.ParseCertificate(certRaw)
+ if err != nil {
+ return fmt.Errorf("failed to parse in-memory certificate %d", i)
+ }
+ ops = append(ops, clientv3.OpPut(pathIssuedCertificate(cert.SerialNumber), string(certRaw)))
+ }
+
+ res, err := kv.Txn(ctx).If(
+ clientv3.Compare(clientv3.CreateRevision(pathCAKey), "=", 0),
+ ).Then(ops...).Commit()
+ if err != nil {
+ return fmt.Errorf("failed to store CA to etcd: %w", err)
+ }
+ if !res.Succeeded {
+ // This should pretty much never happen, but we want to catch it just in case.
+ return fmt.Errorf("failed to store CA to etcd: CA already present - cluster-level data inconsistency")
+ }
+ c.bootstrapIssued = nil
+ c.canBootstrapIssue = false
+ return nil
+}
+
+// Issue issues a certificate. If kv is non-nil, the newly issued certificate will be immediately stored to etcd,
+// otherwise it will be kept in memory (until .Save is called). Certificates can only be issued to memory on
+// newly-created CAs that have not been saved to etcd yet.
+func (c *CA) Issue(ctx context.Context, kv clientv3.KV, commonName string, externalAddress net.IP) (cert []byte, privkey []byte, err error) {
+ serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127)
+ serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
+ if err != nil {
+ err = fmt.Errorf("failed to generate serial number: %w", err)
+ return
+ }
+
+ pubKey, privKeyRaw, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ return
+ }
+ privkey, err = x509.MarshalPKCS8PrivateKey(privKeyRaw)
+ if err != nil {
+ return
+ }
+
+ etcdCert := &x509.Certificate{
+ SerialNumber: serialNumber,
+ Subject: pkix.Name{
+ CommonName: commonName,
+ OrganizationalUnit: []string{"etcd"},
+ },
+ IsCA: false,
+ BasicConstraintsValid: true,
+ NotBefore: time.Now(),
+ NotAfter: unknownNotAfter,
+ ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth},
+ DNSNames: []string{commonName},
+ IPAddresses: []net.IP{externalAddress},
+ }
+ cert, err = x509.CreateCertificate(rand.Reader, etcdCert, c.CACert, pubKey, c.privateKey)
+ if err != nil {
+ err = fmt.Errorf("failed to sign new certificate: %w", err)
+ return
+ }
+
+ if kv != nil {
+ path := pathIssuedCertificate(serialNumber)
+ _, err = kv.Put(ctx, path, string(cert))
+ if err != nil {
+ err = fmt.Errorf("failed to commit new certificate to etcd: %w", err)
+ return
+ }
+ } else {
+ if !c.canBootstrapIssue {
+ err = fmt.Errorf("cannot issue new certificate to memory on existing, etcd-backed CA")
+ return
+ }
+ c.bootstrapIssued = append(c.bootstrapIssued, cert)
+ }
+ return
+}
+
+func (c *CA) makeCRL(revoked []pkix.RevokedCertificate) ([]byte, error) {
+ crl, err := c.CACert.CreateCRL(rand.Reader, c.privateKey, revoked, time.Now(), unknownNotAfter)
+ if err != nil {
+ return nil, fmt.Errorf("failed to generate CRL: %w", err)
+ }
+ return crl, nil
+}
+
+// Revoke revokes a certificate by hostname. The selected hostname will be added to the CRL stored in etcd. This call
+// might fail (safely) if a simultaneous revoke happened that caused the CRL to be bumped. The call can be then retried
+// safely.
+func (c *CA) Revoke(ctx context.Context, kv clientv3.KV, hostname string) error {
+ res, err := kv.Txn(ctx).Then(
+ clientv3.OpGet(pathCACRL),
+ clientv3.OpGet(pathIssuedCertificates, clientv3.WithPrefix())).Commit()
+ if err != nil {
+ return fmt.Errorf("failed to retrieve certificates and CRL from etcd: %w", err)
+ }
+
+ var certs []*x509.Certificate
+ var crlRevision int64
+ var crl *pkix.CertificateList
+ for _, el := range res.Responses {
+ for _, kv := range el.GetResponseRange().GetKvs() {
+ if string(kv.Key) == pathCACRL {
+ crl, err = x509.ParseCRL(kv.Value)
+ if err != nil {
+ return fmt.Errorf("could not parse CRL from etcd: %w", err)
+ }
+ crlRevision = kv.CreateRevision
+ } else {
+ cert, err := x509.ParseCertificate(kv.Value)
+ if err != nil {
+ return fmt.Errorf("could not parse certificate %q from etcd: %w", string(kv.Key), err)
+ }
+ certs = append(certs, cert)
+ }
+ }
+ }
+
+ if crl == nil {
+ return fmt.Errorf("could not find CRL in etcd")
+ }
+ revoked := crl.TBSCertList.RevokedCertificates
+
+ // Find requested hostname in issued certificates.
+ var serial *big.Int
+ for _, cert := range certs {
+ for _, dnsName := range cert.DNSNames {
+ if dnsName == hostname {
+ serial = cert.SerialNumber
+ break
+ }
+ }
+ if serial != nil {
+ break
+ }
+ }
+ if serial == nil {
+ return fmt.Errorf("could not find requested hostname")
+ }
+
+ // Check if certificate has already been revoked.
+ for _, revokedCert := range revoked {
+ if revokedCert.SerialNumber.Cmp(serial) == 0 {
+ return nil // Already revoked
+ }
+ }
+
+ revoked = append(revoked, pkix.RevokedCertificate{
+ SerialNumber: serial,
+ RevocationTime: time.Now(),
+ })
+
+ crlRaw, err := c.makeCRL(revoked)
+ if err != nil {
+ return fmt.Errorf("when generating new CRL for revocation: %w", err)
+ }
+
+ res, err = kv.Txn(ctx).If(
+ clientv3.Compare(clientv3.CreateRevision(pathCACRL), "=", crlRevision),
+ ).Then(
+ clientv3.OpPut(pathCACRL, string(crlRaw)),
+ ).Commit()
+ if err != nil {
+ return fmt.Errorf("when saving new CRL: %w", err)
+ }
+ if !res.Succeeded {
+ return fmt.Errorf("CRL save transaction failed, retry possibly")
+ }
+
+ return nil
+}
+
+// WaitCRLChange returns a channel that will receive a CRLUpdate any time the remote CRL changed. Immediately after
+// calling this method, the current CRL is retrieved from the cluster and put into the channel.
+func (c *CA) WaitCRLChange(ctx context.Context, kv clientv3.KV, w clientv3.Watcher) <-chan CRLUpdate {
+ C := make(chan CRLUpdate)
+
+ go func(ctx context.Context) {
+ ctxC, cancel := context.WithCancel(ctx)
+ defer cancel()
+
+ fail := func(f string, args ...interface{}) {
+ C <- CRLUpdate{Err: fmt.Errorf(f, args...)}
+ close(C)
+ }
+
+ initial, err := kv.Get(ctx, pathCACRL)
+ if err != nil {
+ fail("failed to retrieve initial CRL: %w", err)
+ return
+ }
+
+ C <- CRLUpdate{CRL: initial.Kvs[0].Value}
+
+ for wr := range w.Watch(ctxC, pathCACRL, clientv3.WithRev(initial.Kvs[0].CreateRevision)) {
+ if wr.Err() != nil {
+ fail("failed watching CRL: %w", wr.Err())
+ return
+ }
+
+ for _, e := range wr.Events {
+ if string(e.Kv.Key) != pathCACRL {
+ continue
+ }
+
+ C <- CRLUpdate{CRL: e.Kv.Value}
+ }
+ }
+ }(ctx)
+
+ return C
+}
+
+// CRLUpdate is emitted for every remote CRL change, and spuriously on ever new WaitCRLChange.
+type CRLUpdate struct {
+ // The new (or existing, in the case of the first call) CRL. If nil, Err will be set.
+ CRL []byte
+ // If set, an error occurred and the WaitCRLChange call must be restarted. If set, CRL will be nil.
+ Err error
+}
+
+// GetCurrentCRL returns the current CRL for the CA. This should only be used for one-shot operations like
+// bootstrapping a new node that doesn't yet have access to etcd - otherwise, WaitCRLChange shoulde be used.
+func (c *CA) GetCurrentCRL(ctx context.Context, kv clientv3.KV) ([]byte, error) {
+ initial, err := kv.Get(ctx, pathCACRL)
+ if err != nil {
+ return nil, fmt.Errorf("failed to retrieve initial CRL: %w", err)
+ }
+ return initial.Kvs[0].Value, nil
+}
diff --git a/metropolis/node/core/consensus/consensus.go b/metropolis/node/core/consensus/consensus.go
new file mode 100644
index 0000000..8916164
--- /dev/null
+++ b/metropolis/node/core/consensus/consensus.go
@@ -0,0 +1,429 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package consensus implements a managed etcd cluster member service, with a self-hosted CA system for issuing peer
+// certificates. Currently each Smalltown node runs an etcd member, and connects to the etcd member locally over a unix
+// domain socket.
+//
+// The service supports two modes of startup:
+// - initializing a new cluster, by bootstrapping the CA in memory, starting a cluster, committing the CA to etcd
+// afterwards, and saving the new node's certificate to local storage
+// - joining an existing cluster, using certificates from local storage and loading the CA from etcd. This flow is also
+// used when the node joins a cluster for the first time (then the certificates required must be provisioned
+// externally before starting the consensus service).
+//
+// Regardless of how the etcd member service was started, the resulting running service is further managed and used
+// in the same way.
+//
+package consensus
+
+import (
+ "context"
+ "encoding/pem"
+ "fmt"
+ "net"
+ "net/url"
+ "sync"
+ "time"
+
+ "go.etcd.io/etcd/clientv3"
+ "go.etcd.io/etcd/clientv3/namespace"
+ "go.etcd.io/etcd/embed"
+ "go.uber.org/atomic"
+
+ common "git.monogon.dev/source/nexantic.git/metropolis/node"
+ "git.monogon.dev/source/nexantic.git/metropolis/node/common/supervisor"
+ "git.monogon.dev/source/nexantic.git/metropolis/node/core/consensus/ca"
+ "git.monogon.dev/source/nexantic.git/metropolis/node/core/localstorage"
+)
+
+const (
+ DefaultClusterToken = "SIGNOS"
+ DefaultLogger = "zap"
+)
+
+// Service is the etcd cluster member service.
+type Service struct {
+ // The configuration with which the service was started. This is immutable.
+ config *Config
+
+ // stateMu guards state. This is locked internally on public methods of Service that require access to state. The
+ // state might be recreated on service restart.
+ stateMu sync.Mutex
+ state *state
+}
+
+// state is the runtime state of a running etcd member.
+type state struct {
+ etcd *embed.Etcd
+ ready atomic.Bool
+
+ ca *ca.CA
+ // cl is an etcd client that loops back to the localy running etcd server. This runs over the Client unix domain
+ // socket that etcd starts.
+ cl *clientv3.Client
+}
+
+type Config struct {
+ // Data directory (persistent, encrypted storage) for etcd.
+ Data *localstorage.DataEtcdDirectory
+ // Ephemeral directory for etcd.
+ Ephemeral *localstorage.EphemeralConsensusDirectory
+
+ // Name is the cluster name. This must be the same amongst all etcd members within one cluster.
+ Name string
+ // NewCluster selects whether the etcd member will start a new cluster and bootstrap a CA and the first member
+ // certificate, or load existing PKI certificates from disk.
+ NewCluster bool
+ // InitialCluster sets the initial cluster peer URLs when NewCluster is set, and is ignored otherwise. Usually this
+ // will be just the new, single server, and more members will be added later.
+ InitialCluster string
+ // ExternalHost is the IP address or hostname at which this cluster member is reachable to other cluster members.
+ ExternalHost string
+ // ListenHost is the IP address or hostname at which this cluster member will listen.
+ ListenHost string
+ // Port is the port at which this cluster member will listen for other members. If zero, defaults to the global
+ // Smalltown setting.
+ Port int
+}
+
+func New(config Config) *Service {
+ return &Service{
+ config: &config,
+ }
+}
+
+// configure transforms the service configuration into an embedded etcd configuration. This is pure and side effect
+// free.
+func (s *Service) configure(ctx context.Context) (*embed.Config, error) {
+ if err := s.config.Ephemeral.MkdirAll(0700); err != nil {
+ return nil, fmt.Errorf("failed to create ephemeral directory: %w", err)
+ }
+ if err := s.config.Data.MkdirAll(0700); err != nil {
+ return nil, fmt.Errorf("failed to create data directory: %w", err)
+ }
+
+ port := s.config.Port
+ if port == 0 {
+ port = common.ConsensusPort
+ }
+
+ cfg := embed.NewConfig()
+
+ cfg.Name = s.config.Name
+ cfg.Dir = s.config.Data.Data.FullPath()
+ cfg.InitialClusterToken = DefaultClusterToken
+
+ cfg.PeerTLSInfo.CertFile = s.config.Data.PeerPKI.Certificate.FullPath()
+ cfg.PeerTLSInfo.KeyFile = s.config.Data.PeerPKI.Key.FullPath()
+ cfg.PeerTLSInfo.TrustedCAFile = s.config.Data.PeerPKI.CACertificate.FullPath()
+ cfg.PeerTLSInfo.ClientCertAuth = true
+ cfg.PeerTLSInfo.CRLFile = s.config.Data.PeerCRL.FullPath()
+
+ cfg.LCUrls = []url.URL{{
+ Scheme: "unix",
+ Path: s.config.Ephemeral.ClientSocket.FullPath() + ":0",
+ }}
+ cfg.ACUrls = []url.URL{}
+ cfg.LPUrls = []url.URL{{
+ Scheme: "https",
+ Host: fmt.Sprintf("%s:%d", s.config.ListenHost, port),
+ }}
+ cfg.APUrls = []url.URL{{
+ Scheme: "https",
+ Host: fmt.Sprintf("%s:%d", s.config.ExternalHost, port),
+ }}
+
+ if s.config.NewCluster {
+ cfg.ClusterState = "new"
+ cfg.InitialCluster = cfg.InitialClusterFromName(cfg.Name)
+ } else if s.config.InitialCluster != "" {
+ cfg.ClusterState = "existing"
+ cfg.InitialCluster = s.config.InitialCluster
+ }
+
+ // TODO(q3k): pipe logs from etcd to supervisor.RawLogger via a file.
+ cfg.Logger = DefaultLogger
+ cfg.LogOutputs = []string{"stderr"}
+
+ return cfg, nil
+}
+
+// Run is a Supervisor runnable that starts the etcd member service. It will become healthy once the member joins the
+// cluster successfully.
+func (s *Service) Run(ctx context.Context) error {
+ st := &state{
+ ready: *atomic.NewBool(false),
+ }
+ s.stateMu.Lock()
+ s.state = st
+ s.stateMu.Unlock()
+
+ if s.config.NewCluster {
+ // Expect certificate to be absent from disk.
+ absent, err := s.config.Data.PeerPKI.AllAbsent()
+ if err != nil {
+ return fmt.Errorf("checking certificate existence: %w", err)
+ }
+ if !absent {
+ return fmt.Errorf("want new cluster, but certificates already exist on disk")
+ }
+
+ // Generate CA, keep in memory, write it down in etcd later.
+ st.ca, err = ca.New("Smalltown etcd peer Root CA")
+ if err != nil {
+ return fmt.Errorf("when creating new cluster's peer CA: %w", err)
+ }
+
+ ip := net.ParseIP(s.config.ExternalHost)
+ if ip == nil {
+ return fmt.Errorf("configued external host is not an IP address (got %q)", s.config.ExternalHost)
+ }
+
+ cert, key, err := st.ca.Issue(ctx, nil, s.config.Name, ip)
+ if err != nil {
+ return fmt.Errorf("when issuing new cluster's first certificate: %w", err)
+ }
+
+ if err := s.config.Data.PeerPKI.MkdirAll(0600); err != nil {
+ return fmt.Errorf("when creating PKI directory: %w", err)
+ }
+ if err := s.config.Data.PeerPKI.CACertificate.Write(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: st.ca.CACertRaw}), 0600); err != nil {
+ return fmt.Errorf("when writing CA certificate to disk: %w", err)
+ }
+ if err := s.config.Data.PeerPKI.Certificate.Write(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: cert}), 0600); err != nil {
+ return fmt.Errorf("when writing certificate to disk: %w", err)
+ }
+ if err := s.config.Data.PeerPKI.Key.Write(pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: key}), 0600); err != nil {
+ return fmt.Errorf("when writing certificate to disk: %w", err)
+ }
+ } else {
+ // Expect certificate to be present on disk.
+ present, err := s.config.Data.PeerPKI.AllExist()
+ if err != nil {
+ return fmt.Errorf("checking certificate existence: %w", err)
+ }
+ if !present {
+ return fmt.Errorf("want existing cluster, but certificate is missing from disk")
+ }
+ }
+
+ if err := s.config.Data.MkdirAll(0600); err != nil {
+ return fmt.Errorf("failed to create data directory; %w", err)
+ }
+
+ cfg, err := s.configure(ctx)
+ if err != nil {
+ return fmt.Errorf("when configuring etcd: %w", err)
+ }
+
+ server, err := embed.StartEtcd(cfg)
+ keep := false
+ defer func() {
+ if !keep && server != nil {
+ server.Close()
+ }
+ }()
+ if err != nil {
+ return fmt.Errorf("failed to start etcd: %w", err)
+ }
+ st.etcd = server
+
+ supervisor.Logger(ctx).Info("waiting for etcd...")
+
+ okay := true
+ select {
+ case <-st.etcd.Server.ReadyNotify():
+ case <-ctx.Done():
+ okay = false
+ }
+
+ if !okay {
+ supervisor.Logger(ctx).Info("context done, aborting wait")
+ return ctx.Err()
+ }
+
+ socket := s.config.Ephemeral.ClientSocket.FullPath()
+ cl, err := clientv3.New(clientv3.Config{
+ Endpoints: []string{fmt.Sprintf("unix://%s:0", socket)},
+ DialTimeout: time.Second,
+ })
+ if err != nil {
+ return fmt.Errorf("failed to connect to new etcd instance: %w", err)
+ }
+ st.cl = cl
+
+ if s.config.NewCluster {
+ if st.ca == nil {
+ panic("peerCA has not been generated")
+ }
+
+ // Save new CA into etcd.
+ err = st.ca.Save(ctx, cl.KV)
+ if err != nil {
+ return fmt.Errorf("failed to save new CA to etcd: %w", err)
+ }
+ } else {
+ // Load existing CA from etcd.
+ st.ca, err = ca.Load(ctx, cl.KV)
+ if err != nil {
+ return fmt.Errorf("failed to load CA from etcd: %w", err)
+ }
+ }
+
+ // Start CRL watcher.
+ if err := supervisor.Run(ctx, "crl", s.watchCRL); err != nil {
+ return fmt.Errorf("failed to start CRL watcher: %w", err)
+ }
+ // Start autopromoter.
+ if err := supervisor.Run(ctx, "autopromoter", s.autopromoter); err != nil {
+ return fmt.Errorf("failed to start autopromoter: %w", err)
+ }
+
+ supervisor.Logger(ctx).Info("etcd is now ready")
+ keep = true
+ st.ready.Store(true)
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+
+ <-ctx.Done()
+ st.etcd.Close()
+ return ctx.Err()
+}
+
+// watchCRL is a sub-runnable of the etcd cluster member service that updates the on-local-storage CRL to match the
+// newest available version in etcd.
+func (s *Service) watchCRL(ctx context.Context) error {
+ s.stateMu.Lock()
+ cl := s.state.cl
+ ca := s.state.ca
+ s.stateMu.Unlock()
+
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ for e := range ca.WaitCRLChange(ctx, cl.KV, cl.Watcher) {
+ if e.Err != nil {
+ return fmt.Errorf("watching CRL: %w", e.Err)
+ }
+
+ if err := s.config.Data.PeerCRL.Write(e.CRL, 0600); err != nil {
+ return fmt.Errorf("saving CRL: %w", err)
+ }
+ }
+
+ // unreachable
+ return nil
+}
+
+func (s *Service) autopromoter(ctx context.Context) error {
+ t := time.NewTicker(5 * time.Second)
+ defer t.Stop()
+
+ autopromote := func() {
+ s.stateMu.Lock()
+ st := s.state
+ s.stateMu.Unlock()
+
+ if st.etcd.Server.Leader() != st.etcd.Server.ID() {
+ return
+ }
+
+ for _, member := range st.etcd.Server.Cluster().Members() {
+ if !member.IsLearner {
+ continue
+ }
+
+ // We always call PromoteMember since the metadata necessary to decide if we should is private.
+ // Luckily etcd already does sanity checks internally and will refuse to promote nodes that aren't
+ // connected or are still behind on transactions.
+ if _, err := st.etcd.Server.PromoteMember(ctx, uint64(member.ID)); err != nil {
+ supervisor.Logger(ctx).Infof("Failed to promote consensus node %s: %v", member.Name, err)
+ } else {
+ supervisor.Logger(ctx).Infof("Promoted new consensus node %s", member.Name)
+ }
+ }
+ }
+
+ for {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-t.C:
+ autopromote()
+ }
+ }
+}
+
+// IsReady returns whether etcd is ready and synced
+func (s *Service) IsReady() bool {
+ s.stateMu.Lock()
+ defer s.stateMu.Unlock()
+ if s.state == nil {
+ return false
+ }
+ return s.state.ready.Load()
+}
+
+func (s *Service) WaitReady(ctx context.Context) error {
+ // TODO(q3k): reimplement the atomic ready flag as an event synchronization mechanism
+ if s.IsReady() {
+ return nil
+ }
+ t := time.NewTicker(100 * time.Millisecond)
+ defer t.Stop()
+ for {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-t.C:
+ if s.IsReady() {
+ return nil
+ }
+ }
+ }
+}
+
+// KV returns and etcd KV client interface to the etcd member/cluster.
+func (s *Service) KV(module, space string) clientv3.KV {
+ s.stateMu.Lock()
+ defer s.stateMu.Unlock()
+ return namespace.NewKV(s.state.cl.KV, fmt.Sprintf("%s:%s", module, space))
+}
+
+func (s *Service) KVRoot() clientv3.KV {
+ s.stateMu.Lock()
+ defer s.stateMu.Unlock()
+ return s.state.cl.KV
+}
+
+func (s *Service) Cluster() clientv3.Cluster {
+ s.stateMu.Lock()
+ defer s.stateMu.Unlock()
+ return s.state.cl.Cluster
+}
+
+// MemberInfo returns information about this etcd cluster member: its ID and name. This will block until this
+// information is available (ie. the cluster status is Ready).
+func (s *Service) MemberInfo(ctx context.Context) (id uint64, name string, err error) {
+ if err = s.WaitReady(ctx); err != nil {
+ err = fmt.Errorf("when waiting for cluster readiness: %w", err)
+ return
+ }
+
+ s.stateMu.Lock()
+ defer s.stateMu.Unlock()
+ id = uint64(s.state.etcd.Server.ID())
+ name = s.config.Name
+ return
+}
diff --git a/metropolis/node/core/consensus/consensus_test.go b/metropolis/node/core/consensus/consensus_test.go
new file mode 100644
index 0000000..e08bd29
--- /dev/null
+++ b/metropolis/node/core/consensus/consensus_test.go
@@ -0,0 +1,261 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package consensus
+
+import (
+ "bytes"
+ "context"
+ "crypto/x509"
+ "io/ioutil"
+ "net"
+ "os"
+ "testing"
+ "time"
+
+ "git.monogon.dev/source/nexantic.git/golibs/common"
+ "git.monogon.dev/source/nexantic.git/metropolis/node/common/supervisor"
+ "git.monogon.dev/source/nexantic.git/metropolis/node/core/localstorage"
+ "git.monogon.dev/source/nexantic.git/metropolis/node/core/localstorage/declarative"
+)
+
+type boilerplate struct {
+ ctx context.Context
+ ctxC context.CancelFunc
+ root *localstorage.Root
+ tmpdir string
+}
+
+func prep(t *testing.T) *boilerplate {
+ ctx, ctxC := context.WithCancel(context.Background())
+ root := &localstorage.Root{}
+ tmp, err := ioutil.TempDir("", "smalltown-test")
+ if err != nil {
+ t.Fatal(err)
+ }
+ err = declarative.PlaceFS(root, tmp)
+ if err != nil {
+ t.Fatal(err)
+ }
+ os.MkdirAll(root.Data.Etcd.FullPath(), 0700)
+ os.MkdirAll(root.Ephemeral.Consensus.FullPath(), 0700)
+
+ return &boilerplate{
+ ctx: ctx,
+ ctxC: ctxC,
+ root: root,
+ tmpdir: tmp,
+ }
+}
+
+func (b *boilerplate) close() {
+ b.ctxC()
+ os.RemoveAll(b.tmpdir)
+}
+
+func waitEtcd(t *testing.T, s *Service) {
+ deadline := time.Now().Add(5 * time.Second)
+ for {
+ if time.Now().After(deadline) {
+ t.Fatalf("etcd did not start up on time")
+ }
+ if s.IsReady() {
+ break
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+}
+
+func TestBootstrap(t *testing.T) {
+ b := prep(t)
+ defer b.close()
+ etcd := New(Config{
+ Data: &b.root.Data.Etcd,
+ Ephemeral: &b.root.Ephemeral.Consensus,
+ Name: "test",
+ NewCluster: true,
+ InitialCluster: "127.0.0.1",
+ ExternalHost: "127.0.0.1",
+ ListenHost: "127.0.0.1",
+ Port: common.MustConsume(common.AllocateTCPPort()),
+ })
+
+ supervisor.New(b.ctx, etcd.Run)
+ waitEtcd(t, etcd)
+
+ kv := etcd.KV("foo", "bar")
+ if _, err := kv.Put(b.ctx, "/foo", "bar"); err != nil {
+ t.Fatalf("test key creation failed: %v", err)
+ }
+ if _, err := kv.Get(b.ctx, "/foo"); err != nil {
+ t.Fatalf("test key retrieval failed: %v", err)
+ }
+}
+
+func TestMemberInfo(t *testing.T) {
+ b := prep(t)
+ defer b.close()
+ etcd := New(Config{
+ Data: &b.root.Data.Etcd,
+ Ephemeral: &b.root.Ephemeral.Consensus,
+ Name: "test",
+ NewCluster: true,
+ InitialCluster: "127.0.0.1",
+ ExternalHost: "127.0.0.1",
+ ListenHost: "127.0.0.1",
+ Port: common.MustConsume(common.AllocateTCPPort()),
+ })
+ supervisor.New(b.ctx, etcd.Run)
+ waitEtcd(t, etcd)
+
+ id, name, err := etcd.MemberInfo(b.ctx)
+ if err != nil {
+ t.Fatalf("MemberInfo: %v", err)
+ }
+
+ // Compare name with configured name.
+ if want, got := "test", name; want != got {
+ t.Errorf("MemberInfo returned name %q, wanted %q (per config)", got, want)
+ }
+
+ // Compare name with cluster information.
+ members, err := etcd.Cluster().MemberList(b.ctx)
+ if err != nil {
+ t.Errorf("MemberList: %v", err)
+ }
+
+ if want, got := 1, len(members.Members); want != got {
+ t.Fatalf("expected one cluster member, got %d", got)
+ }
+ if want, got := id, members.Members[0].ID; want != got {
+ t.Errorf("MemberInfo returned ID %d, Cluster endpoint says %d", want, got)
+ }
+ if want, got := name, members.Members[0].Name; want != got {
+ t.Errorf("MemberInfo returned name %q, Cluster endpoint says %q", want, got)
+ }
+}
+
+func TestRestartFromDisk(t *testing.T) {
+ b := prep(t)
+ defer b.close()
+
+ startEtcd := func(new bool) (*Service, context.CancelFunc) {
+ etcd := New(Config{
+ Data: &b.root.Data.Etcd,
+ Ephemeral: &b.root.Ephemeral.Consensus,
+ Name: "test",
+ NewCluster: new,
+ InitialCluster: "127.0.0.1",
+ ExternalHost: "127.0.0.1",
+ ListenHost: "127.0.0.1",
+ Port: common.MustConsume(common.AllocateTCPPort()),
+ })
+ ctx, ctxC := context.WithCancel(b.ctx)
+ supervisor.New(ctx, etcd.Run)
+ waitEtcd(t, etcd)
+ kv := etcd.KV("foo", "bar")
+ if new {
+ if _, err := kv.Put(b.ctx, "/foo", "bar"); err != nil {
+ t.Fatalf("test key creation failed: %v", err)
+ }
+ }
+ if _, err := kv.Get(b.ctx, "/foo"); err != nil {
+ t.Fatalf("test key retrieval failed: %v", err)
+ }
+
+ return etcd, ctxC
+ }
+
+ etcd, ctxC := startEtcd(true)
+ etcd.stateMu.Lock()
+ firstCA := etcd.state.ca.CACertRaw
+ etcd.stateMu.Unlock()
+ ctxC()
+
+ etcd, ctxC = startEtcd(false)
+ etcd.stateMu.Lock()
+ secondCA := etcd.state.ca.CACertRaw
+ etcd.stateMu.Unlock()
+ ctxC()
+
+ if bytes.Compare(firstCA, secondCA) != 0 {
+ t.Fatalf("wanted same, got different CAs accross runs")
+ }
+}
+
+func TestCRL(t *testing.T) {
+ b := prep(t)
+ defer b.close()
+ etcd := New(Config{
+ Data: &b.root.Data.Etcd,
+ Ephemeral: &b.root.Ephemeral.Consensus,
+ Name: "test",
+ NewCluster: true,
+ InitialCluster: "127.0.0.1",
+ ExternalHost: "127.0.0.1",
+ ListenHost: "127.0.0.1",
+ Port: common.MustConsume(common.AllocateTCPPort()),
+ })
+ supervisor.New(b.ctx, etcd.Run)
+ waitEtcd(t, etcd)
+
+ etcd.stateMu.Lock()
+ ca := etcd.state.ca
+ kv := etcd.state.cl.KV
+ etcd.stateMu.Unlock()
+
+ certRaw, _, err := ca.Issue(b.ctx, kv, "revoketest", net.ParseIP("1.2.3.4"))
+ if err != nil {
+ t.Fatalf("cert issue failed: %v", err)
+ }
+ cert, err := x509.ParseCertificate(certRaw)
+ if err != nil {
+ t.Fatalf("cert parse failed: %v", err)
+ }
+
+ if err := ca.Revoke(b.ctx, kv, "revoketest"); err != nil {
+ t.Fatalf("cert revoke failed: %v", err)
+ }
+
+ deadline := time.Now().Add(5 * time.Second)
+ for {
+ if time.Now().After(deadline) {
+ t.Fatalf("CRL did not get updated in time")
+ }
+ time.Sleep(100 * time.Millisecond)
+
+ crlRaw, err := b.root.Data.Etcd.PeerCRL.Read()
+ if err != nil {
+ // That's fine. Maybe it hasn't been written yet.
+ continue
+ }
+ crl, err := x509.ParseCRL(crlRaw)
+ if err != nil {
+ // That's fine. Maybe it hasn't been written yet.
+ continue
+ }
+
+ found := false
+ for _, revoked := range crl.TBSCertList.RevokedCertificates {
+ if revoked.SerialNumber.Cmp(cert.SerialNumber) == 0 {
+ found = true
+ }
+ }
+ if found {
+ break
+ }
+ }
+}