m/node/core/consensus: always use member name as address

Instead of explicitly giving the consensus service an address to listen
on, we now just give it a hostname. Etcd will resolve the name itself on
startup. It's not yet known if it will re-resolve it and handle changes.
If that's not the case, we will have to implement a restarting
babysitter process instead.

Test Plan: E2e tests should cover this.

X-Origin-Diff: phab/D734
GitOrigin-RevId: c2255b2956137e2bbd705ac1965418c0540d0046
diff --git a/metropolis/node/core/cluster/manager.go b/metropolis/node/core/cluster/manager.go
index 6c85fb3..d7ffe5a 100644
--- a/metropolis/node/core/cluster/manager.go
+++ b/metropolis/node/core/cluster/manager.go
@@ -318,13 +318,12 @@
 	node := NewNode(cuk, ip, *cert.Leaf)
 
 	m.consensus = consensus.New(consensus.Config{
-		Data:           &m.storageRoot.Data.Etcd,
-		Ephemeral:      &m.storageRoot.Ephemeral.Consensus,
-		NewCluster:     true,
-		Name:           node.ID(),
-		InitialCluster: ip.String(),
-		ExternalHost:   ip.String(),
-		ListenHost:     ip.String(),
+		Data:       &m.storageRoot.Data.Etcd,
+		Ephemeral:  &m.storageRoot.Ephemeral.Consensus,
+		NewCluster: true,
+		Name:       node.ID(),
+		// STOPGAP: this will not be used after the manager rewrite.
+		ExternalHost: ip.String(),
 	})
 	if err := supervisor.Run(ctx, "consensus", m.consensus.Run); err != nil {
 		return fmt.Errorf("when starting consensus: %w", err)
diff --git a/metropolis/node/core/consensus/ca/ca.go b/metropolis/node/core/consensus/ca/ca.go
index 9a1b634..6bcc19b 100644
--- a/metropolis/node/core/consensus/ca/ca.go
+++ b/metropolis/node/core/consensus/ca/ca.go
@@ -17,8 +17,13 @@
 // package ca implements a simple standards-compliant certificate authority.
 // It only supports ed25519 keys, and does not maintain any persistent state.
 //
-// The CA is backed by etcd storage, and can also bootstrap itself without a yet running etcd storage (and commit
-// in-memory secrets to etcd at a later date).
+// The CA is backed by etcd storage, and can also bootstrap itself without a
+// yet running etcd storage (and commit in-memory secrets to etcd at a later
+// date).
+//
+// This is different from //metropolis/pkg/pki in that it has to solve the
+// certs-for-etcd-on-etcd bootstrap problem. Perhaps it should be rewritten to
+// implement the Issuer/Ceritifcate interface available there.
 //
 // CA and certificates successfully pass https://github.com/zmap/zlint
 // (minus the CA/B rules that a public CA would adhere to, which requires
@@ -40,7 +45,6 @@
 	"errors"
 	"fmt"
 	"math/big"
-	"net"
 	"time"
 
 	"go.etcd.io/etcd/clientv3"
@@ -232,7 +236,7 @@
 // Issue issues a certificate. If kv is non-nil, the newly issued certificate will be immediately stored to etcd,
 // otherwise it will be kept in memory (until .Save is called). Certificates can only be issued to memory on
 // newly-created CAs that have not been saved to etcd yet.
-func (c *CA) Issue(ctx context.Context, kv clientv3.KV, commonName string, externalAddress net.IP) (cert []byte, privkey []byte, err error) {
+func (c *CA) Issue(ctx context.Context, kv clientv3.KV, commonName string) (cert []byte, privkey []byte, err error) {
 	serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127)
 	serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
 	if err != nil {
@@ -261,7 +265,6 @@
 		NotAfter:              unknownNotAfter,
 		ExtKeyUsage:           []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth},
 		DNSNames:              []string{commonName},
-		IPAddresses:           []net.IP{externalAddress},
 	}
 	cert, err = x509.CreateCertificate(rand.Reader, etcdCert, c.CACert, pubKey, c.privateKey)
 	if err != nil {
diff --git a/metropolis/node/core/consensus/consensus.go b/metropolis/node/core/consensus/consensus.go
index 269ff7e..2bbe1ae 100644
--- a/metropolis/node/core/consensus/consensus.go
+++ b/metropolis/node/core/consensus/consensus.go
@@ -34,7 +34,6 @@
 	"context"
 	"encoding/pem"
 	"fmt"
-	"net"
 	"net/url"
 	"sync"
 	"time"
@@ -88,16 +87,13 @@
 	// NewCluster selects whether the etcd member will start a new cluster and bootstrap a CA and the first member
 	// certificate, or load existing PKI certificates from disk.
 	NewCluster bool
-	// InitialCluster sets the initial cluster peer URLs when NewCluster is set, and is ignored otherwise. Usually this
-	// will be just the new, single server, and more members will be added later.
-	InitialCluster string
-	// ExternalHost is the IP address or hostname at which this cluster member is reachable to other cluster members.
-	ExternalHost string
-	// ListenHost is the IP address or hostname at which this cluster member will listen.
-	ListenHost string
 	// Port is the port at which this cluster member will listen for other members. If zero, defaults to the global
 	// Metropolis setting.
 	Port int
+
+	// ExternalHost is used by tests to override the address at which etcd should listen for peer connections.
+	// TODO(q3k): make this unexported once the new cluster manager logic lands.
+	ExternalHost string
 }
 
 func New(config Config) *Service {
@@ -116,6 +112,9 @@
 		return nil, fmt.Errorf("failed to create data directory: %w", err)
 	}
 
+	if s.config.Name == "" {
+		return nil, fmt.Errorf("Name not set")
+	}
 	port := s.config.Port
 	if port == 0 {
 		port = node.ConsensusPort
@@ -140,19 +139,25 @@
 	cfg.ACUrls = []url.URL{}
 	cfg.LPUrls = []url.URL{{
 		Scheme: "https",
-		Host:   fmt.Sprintf("%s:%d", s.config.ListenHost, port),
+		Host:   fmt.Sprintf("[::]:%d", port),
 	}}
+
+	// Always listen on the address pointed to by our name - unless running in
+	// tests, where we can't control our hostname easily.
+	ExternalHost := fmt.Sprintf("%s:%d", s.config.Name, port)
+	if s.config.ExternalHost != "" {
+		ExternalHost = fmt.Sprintf("%s:%d", s.config.ExternalHost, port)
+	}
 	cfg.APUrls = []url.URL{{
 		Scheme: "https",
-		Host:   fmt.Sprintf("%s:%d", s.config.ExternalHost, port),
+		Host:   ExternalHost,
 	}}
 
 	if s.config.NewCluster {
 		cfg.ClusterState = "new"
 		cfg.InitialCluster = cfg.InitialClusterFromName(cfg.Name)
-	} else if s.config.InitialCluster != "" {
+	} else {
 		cfg.ClusterState = "existing"
-		cfg.InitialCluster = s.config.InitialCluster
 	}
 
 	// TODO(q3k): pipe logs from etcd to supervisor.RawLogger via a file.
@@ -189,12 +194,7 @@
 				return fmt.Errorf("when creating new cluster's peer CA: %w", err)
 			}
 
-			ip := net.ParseIP(s.config.ExternalHost)
-			if ip == nil {
-				return fmt.Errorf("configued external host is not an IP address (got %q)", s.config.ExternalHost)
-			}
-
-			cert, key, err := st.ca.Issue(ctx, nil, s.config.Name, ip)
+			cert, key, err := st.ca.Issue(ctx, nil, s.config.Name)
 			if err != nil {
 				return fmt.Errorf("when issuing new cluster's first certificate: %w", err)
 			}
@@ -240,8 +240,6 @@
 	}
 	st.etcd = server
 
-	supervisor.Logger(ctx).Info("waiting for etcd...")
-
 	okay := true
 	select {
 	case <-st.etcd.Server.ReadyNotify():
diff --git a/metropolis/node/core/consensus/consensus_test.go b/metropolis/node/core/consensus/consensus_test.go
index 5729bc1..00f4414 100644
--- a/metropolis/node/core/consensus/consensus_test.go
+++ b/metropolis/node/core/consensus/consensus_test.go
@@ -21,7 +21,6 @@
 	"context"
 	"crypto/x509"
 	"io/ioutil"
-	"net"
 	"os"
 	"testing"
 	"time"
@@ -86,14 +85,12 @@
 	b := prep(t)
 	defer b.close()
 	etcd := New(Config{
-		Data:           &b.root.Data.Etcd,
-		Ephemeral:      &b.root.Ephemeral.Consensus,
-		Name:           "test",
-		NewCluster:     true,
-		InitialCluster: "127.0.0.1",
-		ExternalHost:   "127.0.0.1",
-		ListenHost:     "127.0.0.1",
-		Port:           freeport.MustConsume(freeport.AllocateTCPPort()),
+		Data:         &b.root.Data.Etcd,
+		Ephemeral:    &b.root.Ephemeral.Consensus,
+		Name:         "test",
+		NewCluster:   true,
+		Port:         freeport.MustConsume(freeport.AllocateTCPPort()),
+		ExternalHost: "127.0.0.1",
 	})
 
 	supervisor.New(b.ctx, etcd.Run)
@@ -112,14 +109,12 @@
 	b := prep(t)
 	defer b.close()
 	etcd := New(Config{
-		Data:           &b.root.Data.Etcd,
-		Ephemeral:      &b.root.Ephemeral.Consensus,
-		Name:           "test",
-		NewCluster:     true,
-		InitialCluster: "127.0.0.1",
-		ExternalHost:   "127.0.0.1",
-		ListenHost:     "127.0.0.1",
-		Port:           freeport.MustConsume(freeport.AllocateTCPPort()),
+		Data:         &b.root.Data.Etcd,
+		Ephemeral:    &b.root.Ephemeral.Consensus,
+		Name:         "test",
+		NewCluster:   true,
+		Port:         freeport.MustConsume(freeport.AllocateTCPPort()),
+		ExternalHost: "127.0.0.1",
 	})
 	supervisor.New(b.ctx, etcd.Run)
 	waitEtcd(t, etcd)
@@ -157,14 +152,12 @@
 
 	startEtcd := func(new bool) (*Service, context.CancelFunc) {
 		etcd := New(Config{
-			Data:           &b.root.Data.Etcd,
-			Ephemeral:      &b.root.Ephemeral.Consensus,
-			Name:           "test",
-			NewCluster:     new,
-			InitialCluster: "127.0.0.1",
-			ExternalHost:   "127.0.0.1",
-			ListenHost:     "127.0.0.1",
-			Port:           freeport.MustConsume(freeport.AllocateTCPPort()),
+			Data:         &b.root.Data.Etcd,
+			Ephemeral:    &b.root.Ephemeral.Consensus,
+			Name:         "test",
+			NewCluster:   new,
+			Port:         freeport.MustConsume(freeport.AllocateTCPPort()),
+			ExternalHost: "127.0.0.1",
 		})
 		ctx, ctxC := context.WithCancel(b.ctx)
 		supervisor.New(ctx, etcd.Run)
@@ -203,14 +196,12 @@
 	b := prep(t)
 	defer b.close()
 	etcd := New(Config{
-		Data:           &b.root.Data.Etcd,
-		Ephemeral:      &b.root.Ephemeral.Consensus,
-		Name:           "test",
-		NewCluster:     true,
-		InitialCluster: "127.0.0.1",
-		ExternalHost:   "127.0.0.1",
-		ListenHost:     "127.0.0.1",
-		Port:           freeport.MustConsume(freeport.AllocateTCPPort()),
+		Data:         &b.root.Data.Etcd,
+		Ephemeral:    &b.root.Ephemeral.Consensus,
+		Name:         "test",
+		NewCluster:   true,
+		Port:         freeport.MustConsume(freeport.AllocateTCPPort()),
+		ExternalHost: "127.0.0.1",
 	})
 	supervisor.New(b.ctx, etcd.Run)
 	waitEtcd(t, etcd)
@@ -220,7 +211,7 @@
 	kv := etcd.state.cl.KV
 	etcd.stateMu.Unlock()
 
-	certRaw, _, err := ca.Issue(b.ctx, kv, "revoketest", net.ParseIP("1.2.3.4"))
+	certRaw, _, err := ca.Issue(b.ctx, kv, "revoketest")
 	if err != nil {
 		t.Fatalf("cert issue failed: %v", err)
 	}