Serge Bazanski | f05e80a | 2021-10-12 11:53:34 +0200 | [diff] [blame] | 1 | package consensus |
| 2 | |
| 3 | import ( |
| 4 | "crypto/ed25519" |
| 5 | "crypto/x509" |
| 6 | "fmt" |
| 7 | "net" |
| 8 | "net/url" |
| 9 | "strconv" |
| 10 | "time" |
| 11 | |
| 12 | "go.etcd.io/etcd/clientv3" |
| 13 | "go.etcd.io/etcd/embed" |
| 14 | |
| 15 | "source.monogon.dev/metropolis/node" |
| 16 | "source.monogon.dev/metropolis/node/core/identity" |
| 17 | "source.monogon.dev/metropolis/node/core/localstorage" |
| 18 | "source.monogon.dev/metropolis/pkg/pki" |
| 19 | ) |
| 20 | |
| 21 | // Config describes the startup configuration of a consensus instance. |
| 22 | type Config struct { |
| 23 | // Data directory (persistent, encrypted storage) for etcd. |
| 24 | Data *localstorage.DataEtcdDirectory |
| 25 | // Ephemeral directory for etcd. |
| 26 | Ephemeral *localstorage.EphemeralConsensusDirectory |
| 27 | |
| 28 | // JoinCluster is set if this instance is to join an existing cluster for the |
| 29 | // first time. If not set, it's assumed this instance has ran before and has all |
| 30 | // the state on disk required to become part of whatever cluster it was before. |
| 31 | // If that data is not present, a new cluster will be bootstrapped. |
| 32 | JoinCluster *JoinCluster |
| 33 | |
| 34 | // NodePrivateKey is the node's main private key which is also used for |
| 35 | // Metropolis PKI. The same key will be used to identify consensus nodes, but |
| 36 | // different certificates will be used. |
| 37 | NodePrivateKey ed25519.PrivateKey |
| 38 | |
| 39 | testOverrides testOverrides |
| 40 | } |
| 41 | |
| 42 | // JoinCluster is all the data required for a node to join (for the first time) |
| 43 | // an already running cluster. This data is available from an already running |
| 44 | // consensus member by performing AddNode, which is called by the Curator when |
| 45 | // new etcd nodes are added to the cluster. |
| 46 | type JoinCluster struct { |
| 47 | CACertificate *x509.Certificate |
| 48 | NodeCertificate *x509.Certificate |
| 49 | // ExistingNodes are an arbitrarily ordered list of other consensus members that |
| 50 | // the node should attempt to contact. |
| 51 | ExistingNodes []ExistingNode |
| 52 | // InitialCRL is a certificate revocation list for this cluster. After the node |
| 53 | // starts, a CRL on disk will be maintained reflecting the PKI state within etcd. |
| 54 | InitialCRL *pki.CRL |
| 55 | } |
| 56 | |
| 57 | // ExistingNode is the peer URL and name of an already running consensus instance. |
| 58 | type ExistingNode struct { |
| 59 | Name string |
| 60 | URL string |
| 61 | } |
| 62 | |
| 63 | func (e *ExistingNode) connectionString() string { |
| 64 | return fmt.Sprintf("%s=%s", e.Name, e.URL) |
| 65 | } |
| 66 | |
| 67 | func (c *Config) nodePublicKey() ed25519.PublicKey { |
| 68 | return c.NodePrivateKey.Public().(ed25519.PublicKey) |
| 69 | } |
| 70 | |
| 71 | // testOverrides are available to test code to make some things easier in a test |
| 72 | // environment. |
| 73 | type testOverrides struct { |
| 74 | // externalPort overrides the default port used by the node. |
| 75 | externalPort int |
| 76 | // externalAddress overrides the address of the node, which is usually its ID. |
| 77 | externalAddress string |
| 78 | } |
| 79 | |
| 80 | // build takes a Config and returns an etcd embed.Config. |
| 81 | // |
| 82 | // enablePeers selects whether the etcd instance will listen for peer traffic |
| 83 | // over TLS. This requires TLS credentials to be present on disk, and will be |
| 84 | // disabled for bootstrapping the instance. |
| 85 | func (c *Config) build(enablePeers bool) *embed.Config { |
| 86 | nodeID := identity.NodeID(c.nodePublicKey()) |
| 87 | port := int(node.ConsensusPort) |
| 88 | if p := c.testOverrides.externalPort; p != 0 { |
| 89 | port = p |
| 90 | } |
| 91 | host := nodeID |
| 92 | var extraNames []string |
| 93 | if c.testOverrides.externalAddress != "" { |
| 94 | host = c.testOverrides.externalAddress |
| 95 | extraNames = append(extraNames, host) |
| 96 | } |
| 97 | |
| 98 | cfg := embed.NewConfig() |
| 99 | |
| 100 | cfg.Name = nodeID |
| 101 | cfg.ClusterState = "existing" |
| 102 | cfg.InitialClusterToken = "METROPOLIS" |
| 103 | cfg.Logger = "zap" |
| 104 | cfg.LogOutputs = []string{c.Ephemeral.ServerLogsFIFO.FullPath()} |
| 105 | |
| 106 | cfg.Dir = c.Data.Data.FullPath() |
| 107 | |
| 108 | // Client URL, ie. local UNIX socket to listen on for trusted, unauthenticated |
| 109 | // traffic. |
| 110 | cfg.LCUrls = []url.URL{{ |
| 111 | Scheme: "unix", |
| 112 | Path: c.Ephemeral.ClientSocket.FullPath() + ":0", |
| 113 | }} |
| 114 | |
| 115 | if enablePeers { |
| 116 | cfg.PeerTLSInfo.CertFile = c.Data.PeerPKI.Certificate.FullPath() |
| 117 | cfg.PeerTLSInfo.KeyFile = c.Data.PeerPKI.Key.FullPath() |
| 118 | cfg.PeerTLSInfo.TrustedCAFile = c.Data.PeerPKI.CACertificate.FullPath() |
| 119 | cfg.PeerTLSInfo.ClientCertAuth = true |
| 120 | cfg.PeerTLSInfo.CRLFile = c.Data.PeerCRL.FullPath() |
| 121 | |
| 122 | cfg.LPUrls = []url.URL{{ |
| 123 | Scheme: "https", |
| 124 | Host: fmt.Sprintf("[::]:%d", port), |
| 125 | }} |
| 126 | cfg.APUrls = []url.URL{{ |
| 127 | Scheme: "https", |
| 128 | Host: net.JoinHostPort(host, strconv.Itoa(port)), |
| 129 | }} |
| 130 | } else { |
| 131 | // When not enabling peer traffic, listen on loopback. We would not listen at |
| 132 | // all, but etcd seems to prevent us from doing that. |
| 133 | cfg.LPUrls = []url.URL{{ |
| 134 | Scheme: "http", |
| 135 | Host: fmt.Sprintf("127.0.0.1:%d", port), |
| 136 | }} |
| 137 | cfg.APUrls = []url.URL{{ |
| 138 | Scheme: "http", |
| 139 | Host: fmt.Sprintf("127.0.0.1:%d", port), |
| 140 | }} |
| 141 | } |
| 142 | |
| 143 | cfg.InitialCluster = cfg.InitialClusterFromName(nodeID) |
| 144 | if c.JoinCluster != nil { |
| 145 | for _, n := range c.JoinCluster.ExistingNodes { |
| 146 | cfg.InitialCluster += "," + n.connectionString() |
| 147 | } |
| 148 | } |
| 149 | return cfg |
| 150 | } |
| 151 | |
| 152 | // localClient returns an etcd client connected to the socket as configured in |
| 153 | // Config. |
| 154 | func (c *Config) localClient() (*clientv3.Client, error) { |
| 155 | socket := c.Ephemeral.ClientSocket.FullPath() |
| 156 | return clientv3.New(clientv3.Config{ |
| 157 | Endpoints: []string{fmt.Sprintf("unix://%s:0", socket)}, |
| 158 | DialTimeout: time.Second, |
| 159 | }) |
| 160 | } |