m/node/core/consensus: always use member name as address
Instead of explicitly giving the consensus service an address to listen
on, we now just give it a hostname. Etcd will resolve the name itself on
startup. It's not yet known if it will re-resolve it and handle changes.
If that's not the case, we will have to implement a restarting
babysitter process instead.
Test Plan: E2e tests should cover this.
X-Origin-Diff: phab/D734
GitOrigin-RevId: c2255b2956137e2bbd705ac1965418c0540d0046
diff --git a/metropolis/node/core/cluster/manager.go b/metropolis/node/core/cluster/manager.go
index 6c85fb3..d7ffe5a 100644
--- a/metropolis/node/core/cluster/manager.go
+++ b/metropolis/node/core/cluster/manager.go
@@ -318,13 +318,12 @@
node := NewNode(cuk, ip, *cert.Leaf)
m.consensus = consensus.New(consensus.Config{
- Data: &m.storageRoot.Data.Etcd,
- Ephemeral: &m.storageRoot.Ephemeral.Consensus,
- NewCluster: true,
- Name: node.ID(),
- InitialCluster: ip.String(),
- ExternalHost: ip.String(),
- ListenHost: ip.String(),
+ Data: &m.storageRoot.Data.Etcd,
+ Ephemeral: &m.storageRoot.Ephemeral.Consensus,
+ NewCluster: true,
+ Name: node.ID(),
+ // STOPGAP: this will not be used after the manager rewrite.
+ ExternalHost: ip.String(),
})
if err := supervisor.Run(ctx, "consensus", m.consensus.Run); err != nil {
return fmt.Errorf("when starting consensus: %w", err)
diff --git a/metropolis/node/core/consensus/ca/ca.go b/metropolis/node/core/consensus/ca/ca.go
index 9a1b634..6bcc19b 100644
--- a/metropolis/node/core/consensus/ca/ca.go
+++ b/metropolis/node/core/consensus/ca/ca.go
@@ -17,8 +17,13 @@
// package ca implements a simple standards-compliant certificate authority.
// It only supports ed25519 keys, and does not maintain any persistent state.
//
-// The CA is backed by etcd storage, and can also bootstrap itself without a yet running etcd storage (and commit
-// in-memory secrets to etcd at a later date).
+// The CA is backed by etcd storage, and can also bootstrap itself without a
+// yet running etcd storage (and commit in-memory secrets to etcd at a later
+// date).
+//
+// This is different from //metropolis/pkg/pki in that it has to solve the
+// certs-for-etcd-on-etcd bootstrap problem. Perhaps it should be rewritten to
+// implement the Issuer/Ceritifcate interface available there.
//
// CA and certificates successfully pass https://github.com/zmap/zlint
// (minus the CA/B rules that a public CA would adhere to, which requires
@@ -40,7 +45,6 @@
"errors"
"fmt"
"math/big"
- "net"
"time"
"go.etcd.io/etcd/clientv3"
@@ -232,7 +236,7 @@
// Issue issues a certificate. If kv is non-nil, the newly issued certificate will be immediately stored to etcd,
// otherwise it will be kept in memory (until .Save is called). Certificates can only be issued to memory on
// newly-created CAs that have not been saved to etcd yet.
-func (c *CA) Issue(ctx context.Context, kv clientv3.KV, commonName string, externalAddress net.IP) (cert []byte, privkey []byte, err error) {
+func (c *CA) Issue(ctx context.Context, kv clientv3.KV, commonName string) (cert []byte, privkey []byte, err error) {
serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127)
serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
if err != nil {
@@ -261,7 +265,6 @@
NotAfter: unknownNotAfter,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth},
DNSNames: []string{commonName},
- IPAddresses: []net.IP{externalAddress},
}
cert, err = x509.CreateCertificate(rand.Reader, etcdCert, c.CACert, pubKey, c.privateKey)
if err != nil {
diff --git a/metropolis/node/core/consensus/consensus.go b/metropolis/node/core/consensus/consensus.go
index 269ff7e..2bbe1ae 100644
--- a/metropolis/node/core/consensus/consensus.go
+++ b/metropolis/node/core/consensus/consensus.go
@@ -34,7 +34,6 @@
"context"
"encoding/pem"
"fmt"
- "net"
"net/url"
"sync"
"time"
@@ -88,16 +87,13 @@
// NewCluster selects whether the etcd member will start a new cluster and bootstrap a CA and the first member
// certificate, or load existing PKI certificates from disk.
NewCluster bool
- // InitialCluster sets the initial cluster peer URLs when NewCluster is set, and is ignored otherwise. Usually this
- // will be just the new, single server, and more members will be added later.
- InitialCluster string
- // ExternalHost is the IP address or hostname at which this cluster member is reachable to other cluster members.
- ExternalHost string
- // ListenHost is the IP address or hostname at which this cluster member will listen.
- ListenHost string
// Port is the port at which this cluster member will listen for other members. If zero, defaults to the global
// Metropolis setting.
Port int
+
+ // ExternalHost is used by tests to override the address at which etcd should listen for peer connections.
+ // TODO(q3k): make this unexported once the new cluster manager logic lands.
+ ExternalHost string
}
func New(config Config) *Service {
@@ -116,6 +112,9 @@
return nil, fmt.Errorf("failed to create data directory: %w", err)
}
+ if s.config.Name == "" {
+ return nil, fmt.Errorf("Name not set")
+ }
port := s.config.Port
if port == 0 {
port = node.ConsensusPort
@@ -140,19 +139,25 @@
cfg.ACUrls = []url.URL{}
cfg.LPUrls = []url.URL{{
Scheme: "https",
- Host: fmt.Sprintf("%s:%d", s.config.ListenHost, port),
+ Host: fmt.Sprintf("[::]:%d", port),
}}
+
+ // Always listen on the address pointed to by our name - unless running in
+ // tests, where we can't control our hostname easily.
+ ExternalHost := fmt.Sprintf("%s:%d", s.config.Name, port)
+ if s.config.ExternalHost != "" {
+ ExternalHost = fmt.Sprintf("%s:%d", s.config.ExternalHost, port)
+ }
cfg.APUrls = []url.URL{{
Scheme: "https",
- Host: fmt.Sprintf("%s:%d", s.config.ExternalHost, port),
+ Host: ExternalHost,
}}
if s.config.NewCluster {
cfg.ClusterState = "new"
cfg.InitialCluster = cfg.InitialClusterFromName(cfg.Name)
- } else if s.config.InitialCluster != "" {
+ } else {
cfg.ClusterState = "existing"
- cfg.InitialCluster = s.config.InitialCluster
}
// TODO(q3k): pipe logs from etcd to supervisor.RawLogger via a file.
@@ -189,12 +194,7 @@
return fmt.Errorf("when creating new cluster's peer CA: %w", err)
}
- ip := net.ParseIP(s.config.ExternalHost)
- if ip == nil {
- return fmt.Errorf("configued external host is not an IP address (got %q)", s.config.ExternalHost)
- }
-
- cert, key, err := st.ca.Issue(ctx, nil, s.config.Name, ip)
+ cert, key, err := st.ca.Issue(ctx, nil, s.config.Name)
if err != nil {
return fmt.Errorf("when issuing new cluster's first certificate: %w", err)
}
@@ -240,8 +240,6 @@
}
st.etcd = server
- supervisor.Logger(ctx).Info("waiting for etcd...")
-
okay := true
select {
case <-st.etcd.Server.ReadyNotify():
diff --git a/metropolis/node/core/consensus/consensus_test.go b/metropolis/node/core/consensus/consensus_test.go
index 5729bc1..00f4414 100644
--- a/metropolis/node/core/consensus/consensus_test.go
+++ b/metropolis/node/core/consensus/consensus_test.go
@@ -21,7 +21,6 @@
"context"
"crypto/x509"
"io/ioutil"
- "net"
"os"
"testing"
"time"
@@ -86,14 +85,12 @@
b := prep(t)
defer b.close()
etcd := New(Config{
- Data: &b.root.Data.Etcd,
- Ephemeral: &b.root.Ephemeral.Consensus,
- Name: "test",
- NewCluster: true,
- InitialCluster: "127.0.0.1",
- ExternalHost: "127.0.0.1",
- ListenHost: "127.0.0.1",
- Port: freeport.MustConsume(freeport.AllocateTCPPort()),
+ Data: &b.root.Data.Etcd,
+ Ephemeral: &b.root.Ephemeral.Consensus,
+ Name: "test",
+ NewCluster: true,
+ Port: freeport.MustConsume(freeport.AllocateTCPPort()),
+ ExternalHost: "127.0.0.1",
})
supervisor.New(b.ctx, etcd.Run)
@@ -112,14 +109,12 @@
b := prep(t)
defer b.close()
etcd := New(Config{
- Data: &b.root.Data.Etcd,
- Ephemeral: &b.root.Ephemeral.Consensus,
- Name: "test",
- NewCluster: true,
- InitialCluster: "127.0.0.1",
- ExternalHost: "127.0.0.1",
- ListenHost: "127.0.0.1",
- Port: freeport.MustConsume(freeport.AllocateTCPPort()),
+ Data: &b.root.Data.Etcd,
+ Ephemeral: &b.root.Ephemeral.Consensus,
+ Name: "test",
+ NewCluster: true,
+ Port: freeport.MustConsume(freeport.AllocateTCPPort()),
+ ExternalHost: "127.0.0.1",
})
supervisor.New(b.ctx, etcd.Run)
waitEtcd(t, etcd)
@@ -157,14 +152,12 @@
startEtcd := func(new bool) (*Service, context.CancelFunc) {
etcd := New(Config{
- Data: &b.root.Data.Etcd,
- Ephemeral: &b.root.Ephemeral.Consensus,
- Name: "test",
- NewCluster: new,
- InitialCluster: "127.0.0.1",
- ExternalHost: "127.0.0.1",
- ListenHost: "127.0.0.1",
- Port: freeport.MustConsume(freeport.AllocateTCPPort()),
+ Data: &b.root.Data.Etcd,
+ Ephemeral: &b.root.Ephemeral.Consensus,
+ Name: "test",
+ NewCluster: new,
+ Port: freeport.MustConsume(freeport.AllocateTCPPort()),
+ ExternalHost: "127.0.0.1",
})
ctx, ctxC := context.WithCancel(b.ctx)
supervisor.New(ctx, etcd.Run)
@@ -203,14 +196,12 @@
b := prep(t)
defer b.close()
etcd := New(Config{
- Data: &b.root.Data.Etcd,
- Ephemeral: &b.root.Ephemeral.Consensus,
- Name: "test",
- NewCluster: true,
- InitialCluster: "127.0.0.1",
- ExternalHost: "127.0.0.1",
- ListenHost: "127.0.0.1",
- Port: freeport.MustConsume(freeport.AllocateTCPPort()),
+ Data: &b.root.Data.Etcd,
+ Ephemeral: &b.root.Ephemeral.Consensus,
+ Name: "test",
+ NewCluster: true,
+ Port: freeport.MustConsume(freeport.AllocateTCPPort()),
+ ExternalHost: "127.0.0.1",
})
supervisor.New(b.ctx, etcd.Run)
waitEtcd(t, etcd)
@@ -220,7 +211,7 @@
kv := etcd.state.cl.KV
etcd.stateMu.Unlock()
- certRaw, _, err := ca.Issue(b.ctx, kv, "revoketest", net.ParseIP("1.2.3.4"))
+ certRaw, _, err := ca.Issue(b.ctx, kv, "revoketest")
if err != nil {
t.Fatalf("cert issue failed: %v", err)
}