Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 17 | // package consensus manages the embedded etcd cluster. |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 18 | package consensus |
| 19 | |
| 20 | import ( |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 21 | "bytes" |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 22 | "context" |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 23 | "crypto/x509" |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 24 | "encoding/binary" |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 25 | "encoding/hex" |
| 26 | "encoding/pem" |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 27 | "fmt" |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 28 | "io/ioutil" |
| 29 | "math/rand" |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 30 | "net" |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 31 | "net/url" |
| 32 | "os" |
| 33 | "path" |
| 34 | "path/filepath" |
| 35 | "strings" |
| 36 | "time" |
| 37 | |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 38 | "github.com/pkg/errors" |
| 39 | "go.etcd.io/etcd/clientv3" |
| 40 | "go.etcd.io/etcd/clientv3/namespace" |
| 41 | "go.etcd.io/etcd/embed" |
| 42 | "go.etcd.io/etcd/etcdserver/api/membership" |
| 43 | "go.etcd.io/etcd/pkg/types" |
| 44 | "go.etcd.io/etcd/proxy/grpcproxy/adapter" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 45 | "go.uber.org/atomic" |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 46 | "go.uber.org/zap" |
Lorenz Brun | 60febd9 | 2020-05-07 14:08:18 +0200 | [diff] [blame] | 47 | "go.uber.org/zap/zapcore" |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 48 | "golang.org/x/sys/unix" |
Hendrik Hofstadt | 8efe51e | 2020-02-28 12:53:41 +0100 | [diff] [blame] | 49 | |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 50 | "git.monogon.dev/source/nexantic.git/core/generated/api" |
| 51 | "git.monogon.dev/source/nexantic.git/core/internal/common" |
| 52 | "git.monogon.dev/source/nexantic.git/core/internal/common/service" |
Hendrik Hofstadt | 8efe51e | 2020-02-28 12:53:41 +0100 | [diff] [blame] | 53 | "git.monogon.dev/source/nexantic.git/core/internal/consensus/ca" |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 54 | ) |
| 55 | |
| 56 | const ( |
| 57 | DefaultClusterToken = "SIGNOS" |
| 58 | DefaultLogger = "zap" |
| 59 | ) |
| 60 | |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 61 | const ( |
| 62 | CAPath = "ca.pem" |
| 63 | CertPath = "cert.pem" |
| 64 | KeyPath = "cert-key.pem" |
| 65 | CRLPath = "ca-crl.der" |
| 66 | CRLSwapPath = "ca-crl.der.swp" |
| 67 | ) |
| 68 | |
Lorenz Brun | 6e8f69c | 2019-11-18 10:44:24 +0100 | [diff] [blame] | 69 | const ( |
| 70 | LocalListenerURL = "unix:///consensus/listener.sock:0" |
| 71 | ) |
| 72 | |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 73 | type ( |
| 74 | Service struct { |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 75 | *service.BaseService |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 76 | |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 77 | etcd *embed.Etcd |
| 78 | kv clientv3.KV |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 79 | ready atomic.Bool |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 80 | |
| 81 | // bootstrapCA and bootstrapCert cache the etcd cluster CA data during bootstrap. |
| 82 | bootstrapCA *ca.CA |
| 83 | bootstrapCert []byte |
| 84 | |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 85 | watchCRLTicker *time.Ticker |
| 86 | lastCRL []byte |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 87 | |
| 88 | config *Config |
| 89 | } |
| 90 | |
| 91 | Config struct { |
| 92 | Name string |
| 93 | DataDir string |
| 94 | InitialCluster string |
| 95 | NewCluster bool |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 96 | ExternalHost string |
| 97 | ListenHost string |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 98 | } |
| 99 | |
| 100 | Member struct { |
| 101 | ID uint64 |
| 102 | Name string |
| 103 | Address string |
| 104 | Synced bool |
| 105 | } |
| 106 | ) |
| 107 | |
| 108 | func NewConsensusService(config Config, logger *zap.Logger) (*Service, error) { |
| 109 | consensusServer := &Service{ |
| 110 | config: &config, |
| 111 | } |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 112 | consensusServer.BaseService = service.NewBaseService("consensus", logger, consensusServer) |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 113 | |
| 114 | return consensusServer, nil |
| 115 | } |
| 116 | |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 117 | func peerURL(host string) url.URL { |
| 118 | return url.URL{Scheme: "https", Host: fmt.Sprintf("%s:%d", host, common.ConsensusPort)} |
| 119 | } |
| 120 | |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 121 | func (s *Service) OnStart() error { |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 122 | // See: https://godoc.org/github.com/coreos/etcd/embed#Config |
| 123 | |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 124 | if s.config == nil { |
| 125 | return errors.New("config for consensus is nil") |
| 126 | } |
| 127 | |
| 128 | cfg := embed.NewConfig() |
| 129 | |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 130 | cfg.PeerTLSInfo.CertFile = filepath.Join(s.config.DataDir, CertPath) |
| 131 | cfg.PeerTLSInfo.KeyFile = filepath.Join(s.config.DataDir, KeyPath) |
| 132 | cfg.PeerTLSInfo.TrustedCAFile = filepath.Join(s.config.DataDir, CAPath) |
| 133 | cfg.PeerTLSInfo.ClientCertAuth = true |
| 134 | cfg.PeerTLSInfo.CRLFile = filepath.Join(s.config.DataDir, CRLPath) |
| 135 | |
| 136 | lastCRL, err := ioutil.ReadFile(cfg.PeerTLSInfo.CRLFile) |
| 137 | if err != nil { |
| 138 | return fmt.Errorf("failed to read etcd CRL: %w", err) |
| 139 | } |
| 140 | s.lastCRL = lastCRL |
| 141 | |
Lorenz Brun | 6e8f69c | 2019-11-18 10:44:24 +0100 | [diff] [blame] | 142 | // Expose etcd to local processes |
| 143 | if err := os.MkdirAll("/consensus", 0700); err != nil { |
| 144 | return fmt.Errorf("Failed to create consensus runtime state directory: %w", err) |
| 145 | } |
| 146 | listenerURL, err := url.Parse(LocalListenerURL) |
| 147 | if err != nil { |
| 148 | panic(err) |
| 149 | } |
| 150 | cfg.LCUrls = []url.URL{*listenerURL} |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 151 | |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 152 | cfg.APUrls = []url.URL{peerURL(s.config.ExternalHost)} |
| 153 | cfg.LPUrls = []url.URL{peerURL(s.config.ListenHost)} |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 154 | cfg.ACUrls = []url.URL{} |
| 155 | |
| 156 | cfg.Dir = s.config.DataDir |
| 157 | cfg.InitialClusterToken = DefaultClusterToken |
| 158 | cfg.Name = s.config.Name |
| 159 | |
| 160 | // Only relevant if creating or joining a cluster; otherwise settings will be ignored |
| 161 | if s.config.NewCluster { |
| 162 | cfg.ClusterState = "new" |
| 163 | cfg.InitialCluster = cfg.InitialClusterFromName(cfg.Name) |
| 164 | } else if s.config.InitialCluster != "" { |
| 165 | cfg.ClusterState = "existing" |
| 166 | cfg.InitialCluster = s.config.InitialCluster |
| 167 | } |
| 168 | |
| 169 | cfg.Logger = DefaultLogger |
Lorenz Brun | 60febd9 | 2020-05-07 14:08:18 +0200 | [diff] [blame] | 170 | cfg.ZapLoggerBuilder = embed.NewZapCoreLoggerBuilder( |
| 171 | s.Logger.With(zap.String("component", "etcd")).WithOptions(zap.IncreaseLevel(zapcore.WarnLevel)), |
| 172 | s.Logger.Core(), |
| 173 | nil, |
| 174 | ) |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 175 | |
| 176 | server, err := embed.StartEtcd(cfg) |
| 177 | if err != nil { |
| 178 | return err |
| 179 | } |
| 180 | s.etcd = server |
| 181 | |
| 182 | // Override the logger |
| 183 | //*server.GetLogger() = *s.Logger.With(zap.String("component", "etcd")) |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 184 | // TODO(leo): can we uncomment this? |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 185 | |
| 186 | go func() { |
| 187 | s.Logger.Info("waiting for etcd to become ready") |
| 188 | <-s.etcd.Server.ReadyNotify() |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 189 | s.ready.Store(true) |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 190 | s.Logger.Info("etcd is now ready") |
| 191 | }() |
| 192 | |
| 193 | // Inject kv client |
| 194 | s.kv = clientv3.NewKVFromKVClient(adapter.KvServerToKvClient(s.etcd.Server), nil) |
| 195 | |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 196 | // Start CRL watcher |
| 197 | go s.watchCRL() |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 198 | ctx := context.TODO() |
| 199 | go s.autoPromote(ctx) |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 200 | |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 201 | return nil |
| 202 | } |
| 203 | |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 204 | // WriteCertificateFiles writes the given node certificate data to local storage |
| 205 | // such that it can be used by the embedded etcd server. |
| 206 | // Unfortunately, we cannot pass the certificates directly to etcd. |
| 207 | func (s *Service) WriteCertificateFiles(certs *api.ConsensusCertificates) error { |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 208 | if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, CRLPath), certs.Crl, 0600); err != nil { |
| 209 | return err |
| 210 | } |
| 211 | if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, CertPath), |
| 212 | pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certs.Cert}), 0600); err != nil { |
| 213 | return err |
| 214 | } |
| 215 | if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, KeyPath), |
| 216 | pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: certs.Key}), 0600); err != nil { |
| 217 | return err |
| 218 | } |
| 219 | if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, CAPath), |
| 220 | pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certs.Ca}), 0600); err != nil { |
| 221 | return err |
| 222 | } |
| 223 | return nil |
| 224 | } |
| 225 | |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 226 | // PrecreateCA generates the etcd cluster certificate authority and writes it to local storage. |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 227 | func (s *Service) PrecreateCA(extIP net.IP) error { |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 228 | // Provision an etcd CA |
| 229 | etcdRootCA, err := ca.New("Smalltown etcd Root CA") |
| 230 | if err != nil { |
| 231 | return err |
| 232 | } |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 233 | cert, privkey, err := etcdRootCA.IssueCertificate(s.config.ExternalHost, extIP) |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 234 | if err != nil { |
| 235 | return fmt.Errorf("failed to self-issue a certificate: %w", err) |
| 236 | } |
| 237 | if err := os.MkdirAll(s.config.DataDir, 0700); err != nil { |
| 238 | return fmt.Errorf("failed to create consensus data dir: %w", err) |
| 239 | } |
| 240 | // Preserve certificate for later injection |
| 241 | s.bootstrapCert = cert |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 242 | if err := s.WriteCertificateFiles(&api.ConsensusCertificates{ |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 243 | Ca: etcdRootCA.CACertRaw, |
| 244 | Crl: etcdRootCA.CRLRaw, |
| 245 | Cert: cert, |
| 246 | Key: privkey, |
| 247 | }); err != nil { |
| 248 | return fmt.Errorf("failed to setup certificates: %w", err) |
| 249 | } |
| 250 | s.bootstrapCA = etcdRootCA |
| 251 | return nil |
| 252 | } |
| 253 | |
| 254 | const ( |
Lorenz Brun | 6e8f69c | 2019-11-18 10:44:24 +0100 | [diff] [blame] | 255 | caPathEtcd = "/etcd-ca/ca.der" |
| 256 | caKeyPathEtcd = "/etcd-ca/ca-key.der" |
| 257 | crlPathEtcd = "/etcd-ca/crl.der" |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 258 | |
| 259 | // This prefix stores the individual certs the etcd CA has issued. |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 260 | certPrefixEtcd = "/etcd-ca/certs" |
| 261 | ) |
| 262 | |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 263 | // InjectCA copies the CA from data cached during PrecreateCA to etcd. |
| 264 | // Requires a previous call to PrecreateCA. |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 265 | func (s *Service) InjectCA() error { |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 266 | if s.bootstrapCA == nil || s.bootstrapCert == nil { |
| 267 | panic("bootstrapCA or bootstrapCert are nil - missing PrecreateCA call?") |
| 268 | } |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 269 | if _, err := s.kv.Put(context.Background(), caPathEtcd, string(s.bootstrapCA.CACertRaw)); err != nil { |
| 270 | return err |
| 271 | } |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 272 | // TODO(lorenz): Should be wrapped by the master key |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 273 | if _, err := s.kv.Put(context.Background(), caKeyPathEtcd, string([]byte(*s.bootstrapCA.PrivateKey))); err != nil { |
| 274 | return err |
| 275 | } |
| 276 | if _, err := s.kv.Put(context.Background(), crlPathEtcd, string(s.bootstrapCA.CRLRaw)); err != nil { |
| 277 | return err |
| 278 | } |
| 279 | certVal, err := x509.ParseCertificate(s.bootstrapCert) |
| 280 | if err != nil { |
| 281 | return err |
| 282 | } |
| 283 | serial := hex.EncodeToString(certVal.SerialNumber.Bytes()) |
| 284 | if _, err := s.kv.Put(context.Background(), path.Join(certPrefixEtcd, serial), string(s.bootstrapCert)); err != nil { |
| 285 | return fmt.Errorf("failed to persist certificate: %w", err) |
| 286 | } |
| 287 | // Clear out bootstrap CA after injecting |
| 288 | s.bootstrapCA = nil |
| 289 | s.bootstrapCert = []byte{} |
| 290 | return nil |
| 291 | } |
| 292 | |
| 293 | func (s *Service) etcdGetSingle(path string) ([]byte, int64, error) { |
| 294 | res, err := s.kv.Get(context.Background(), path) |
| 295 | if err != nil { |
| 296 | return nil, -1, fmt.Errorf("failed to get key from etcd: %w", err) |
| 297 | } |
| 298 | if len(res.Kvs) != 1 { |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 299 | return nil, -1, errors.New("key not available or multiple keys returned") |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 300 | } |
| 301 | return res.Kvs[0].Value, res.Kvs[0].ModRevision, nil |
| 302 | } |
| 303 | |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 304 | func (s *Service) getCAFromEtcd() (*ca.CA, int64, error) { |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 305 | // TODO: Technically this could be done in a single request, but it's more logic |
| 306 | caCert, _, err := s.etcdGetSingle(caPathEtcd) |
| 307 | if err != nil { |
| 308 | return nil, -1, fmt.Errorf("failed to get CA certificate from etcd: %w", err) |
| 309 | } |
| 310 | caKey, _, err := s.etcdGetSingle(caKeyPathEtcd) |
| 311 | if err != nil { |
| 312 | return nil, -1, fmt.Errorf("failed to get CA key from etcd: %w", err) |
| 313 | } |
| 314 | // TODO: Unwrap CA key once wrapping is implemented |
| 315 | crl, crlRevision, err := s.etcdGetSingle(crlPathEtcd) |
| 316 | if err != nil { |
| 317 | return nil, -1, fmt.Errorf("failed to get CRL from etcd: %w", err) |
| 318 | } |
| 319 | idCA, err := ca.FromCertificates(caCert, caKey, crl) |
| 320 | if err != nil { |
| 321 | return nil, -1, fmt.Errorf("failed to take CA online: %w", err) |
| 322 | } |
| 323 | return idCA, crlRevision, nil |
| 324 | } |
| 325 | |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 326 | // ProvisionMember sets up and returns provisioning data to join another node into the consensus. |
| 327 | // It issues PKI material, creates a static cluster bootstrap specification string (known as initial-cluster in etcd) |
| 328 | // and adds the new node as a learner (non-voting) member to the cluster. Once the new node has caught up with the |
| 329 | // cluster it is automatically promoted to a voting member by the autoPromote process. |
| 330 | func (s *Service) ProvisionMember(name string, ip net.IP) (*api.ConsensusCertificates, string, error) { |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 331 | idCA, _, err := s.getCAFromEtcd() |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 332 | if err != nil { |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 333 | return nil, "", fmt.Errorf("failed to get consensus CA: %w", err) |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 334 | } |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 335 | cert, key, err := idCA.IssueCertificate(name, ip) |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 336 | if err != nil { |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 337 | return nil, "", fmt.Errorf("failed to issue certificate: %w", err) |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 338 | } |
| 339 | certVal, err := x509.ParseCertificate(cert) |
| 340 | if err != nil { |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 341 | return nil, "", fmt.Errorf("failed to parse just-issued consensus cert: %w", err) |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 342 | } |
| 343 | serial := hex.EncodeToString(certVal.SerialNumber.Bytes()) |
| 344 | if _, err := s.kv.Put(context.Background(), path.Join(certPrefixEtcd, serial), string(cert)); err != nil { |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 345 | // We issued a certificate, but failed to persist it. Return an error and forget it ever happened. |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 346 | return nil, "", fmt.Errorf("failed to persist certificate: %w", err) |
| 347 | } |
| 348 | |
| 349 | currentMembers := s.etcd.Server.Cluster().Members() |
| 350 | var memberStrs []string |
| 351 | for _, member := range currentMembers { |
| 352 | memberStrs = append(memberStrs, fmt.Sprintf("%v=%v", member.Name, member.PickPeerURL())) |
| 353 | } |
| 354 | apURL := peerURL(ip.String()) |
| 355 | memberStrs = append(memberStrs, fmt.Sprintf("%s=%s", name, apURL.String())) |
| 356 | |
| 357 | pubKeyPrefix, err := common.IDKeyPrefixFromName(name) |
| 358 | if err != nil { |
| 359 | return nil, "", fmt.Errorf("invalid new node name: %v", err) |
| 360 | } |
| 361 | |
| 362 | crl, _, err := s.etcdGetSingle(crlPathEtcd) |
| 363 | |
| 364 | _, err = s.etcd.Server.AddMember(context.Background(), membership.Member{ |
| 365 | RaftAttributes: membership.RaftAttributes{ |
| 366 | PeerURLs: types.URLs{apURL}.StringSlice(), |
| 367 | IsLearner: true, |
| 368 | }, |
| 369 | Attributes: membership.Attributes{Name: name}, |
| 370 | ID: types.ID(binary.BigEndian.Uint64(pubKeyPrefix[:8])), |
| 371 | }) |
| 372 | if err != nil { |
| 373 | return nil, "", fmt.Errorf("failed to provision member: %w", err) |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 374 | } |
| 375 | return &api.ConsensusCertificates{ |
| 376 | Ca: idCA.CACertRaw, |
| 377 | Cert: cert, |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 378 | Crl: crl, |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 379 | Key: key, |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 380 | }, strings.Join(memberStrs, ","), nil |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 381 | } |
| 382 | |
| 383 | func (s *Service) RevokeCertificate(hostname string) error { |
| 384 | rand.Seed(time.Now().UnixNano()) |
| 385 | for { |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 386 | idCA, crlRevision, err := s.getCAFromEtcd() |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 387 | if err != nil { |
| 388 | return err |
| 389 | } |
| 390 | allIssuedCerts, err := s.kv.Get(context.Background(), certPrefixEtcd, clientv3.WithPrefix()) |
| 391 | for _, cert := range allIssuedCerts.Kvs { |
| 392 | certVal, err := x509.ParseCertificate(cert.Value) |
| 393 | if err != nil { |
| 394 | s.Logger.Error("Failed to parse previously issued certificate, this is a security risk", zap.Error(err)) |
| 395 | continue |
| 396 | } |
| 397 | for _, dnsName := range certVal.DNSNames { |
| 398 | if dnsName == hostname { |
| 399 | // Revoke this |
| 400 | if err := idCA.Revoke(certVal.SerialNumber); err != nil { |
| 401 | // We need to fail if any single revocation fails otherwise outer applications |
| 402 | // have no chance of calling this safely |
| 403 | return err |
| 404 | } |
| 405 | } |
| 406 | } |
| 407 | } |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 408 | // TODO(leo): this needs a test |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 409 | cmp := clientv3.Compare(clientv3.ModRevision(crlPathEtcd), "=", crlRevision) |
| 410 | op := clientv3.OpPut(crlPathEtcd, string(idCA.CRLRaw)) |
| 411 | res, err := s.kv.Txn(context.Background()).If(cmp).Then(op).Commit() |
| 412 | if err != nil { |
| 413 | return fmt.Errorf("failed to persist new CRL in etcd: %w", err) |
| 414 | } |
| 415 | if res.Succeeded { // Transaction has succeeded |
| 416 | break |
| 417 | } |
| 418 | // Sleep a random duration between 0 and 300ms to reduce serialization failures |
| 419 | time.Sleep(time.Duration(rand.Intn(300)) * time.Millisecond) |
| 420 | } |
| 421 | return nil |
| 422 | } |
| 423 | |
| 424 | func (s *Service) watchCRL() { |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 425 | // TODO(lorenz): Change etcd client to WatchableKV and make this an actual watch |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 426 | // This needs changes in more places, so leaving it now |
| 427 | s.watchCRLTicker = time.NewTicker(30 * time.Second) |
| 428 | for range s.watchCRLTicker.C { |
| 429 | crl, _, err := s.etcdGetSingle(crlPathEtcd) |
| 430 | if err != nil { |
| 431 | s.Logger.Warn("Failed to check for new CRL", zap.Error(err)) |
| 432 | continue |
| 433 | } |
| 434 | // This is cryptographic material but not secret, so no constant time compare necessary here |
| 435 | if !bytes.Equal(crl, s.lastCRL) { |
| 436 | if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, CRLSwapPath), crl, 0600); err != nil { |
| 437 | s.Logger.Warn("Failed to write updated CRL", zap.Error(err)) |
| 438 | } |
| 439 | // This uses unix.Rename to guarantee a particular atomic update behavior |
| 440 | if err := unix.Rename(filepath.Join(s.config.DataDir, CRLSwapPath), filepath.Join(s.config.DataDir, CRLPath)); err != nil { |
| 441 | s.Logger.Warn("Failed to atomically swap updated CRL", zap.Error(err)) |
| 442 | } |
| 443 | } |
| 444 | } |
| 445 | } |
| 446 | |
Lorenz Brun | 52f7f29 | 2020-06-24 16:42:02 +0200 | [diff] [blame] | 447 | // autoPromote automatically promotes learning (non-voting) members to voting members. etcd currently lacks auto-promote |
| 448 | // capabilities (https://github.com/etcd-io/etcd/issues/10537) so we need to do this ourselves. |
| 449 | func (s *Service) autoPromote(ctx context.Context) { |
| 450 | promoteTicker := time.NewTicker(5 * time.Second) |
| 451 | go func() { |
| 452 | <-ctx.Done() |
| 453 | promoteTicker.Stop() |
| 454 | }() |
| 455 | for range promoteTicker.C { |
| 456 | if s.etcd.Server.Leader() != s.etcd.Server.ID() { |
| 457 | continue |
| 458 | } |
| 459 | for _, member := range s.etcd.Server.Cluster().Members() { |
| 460 | if member.IsLearner { |
| 461 | // We always call PromoteMember since the metadata necessary to decide if we should is private. |
| 462 | // Luckily etcd already does sanity checks internally and will refuse to promote nodes that aren't |
| 463 | // connected or are still behind on transactions. |
| 464 | if _, err := s.etcd.Server.PromoteMember(context.Background(), uint64(member.ID)); err != nil { |
| 465 | s.Logger.Info("Failed to promote consensus node", zap.String("node", member.Name), zap.Error(err)) |
| 466 | } |
| 467 | s.Logger.Info("Promoted new consensus node", zap.String("node", member.Name)) |
| 468 | } |
| 469 | } |
| 470 | } |
| 471 | } |
| 472 | |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 473 | func (s *Service) OnStop() error { |
Lorenz Brun | a4ea9d0 | 2019-10-31 11:40:30 +0100 | [diff] [blame] | 474 | s.watchCRLTicker.Stop() |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 475 | s.etcd.Close() |
| 476 | |
| 477 | return nil |
| 478 | } |
| 479 | |
| 480 | // IsProvisioned returns whether the node has been setup before and etcd has a data directory |
| 481 | func (s *Service) IsProvisioned() bool { |
| 482 | _, err := os.Stat(s.config.DataDir) |
| 483 | |
| 484 | return !os.IsNotExist(err) |
| 485 | } |
| 486 | |
| 487 | // IsReady returns whether etcd is ready and synced |
| 488 | func (s *Service) IsReady() bool { |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 489 | return s.ready.Load() |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 490 | } |
| 491 | |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 492 | // GetConfig returns the current consensus config |
| 493 | func (s *Service) GetConfig() Config { |
| 494 | return *s.config |
| 495 | } |
| 496 | |
| 497 | // SetConfig sets the consensus config. Changes are only applied when the service is restarted. |
| 498 | func (s *Service) SetConfig(config Config) { |
| 499 | s.config = &config |
| 500 | } |
| 501 | |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 502 | func (s *Service) GetStore(module, space string) clientv3.KV { |
| 503 | return namespace.NewKV(s.kv, fmt.Sprintf("%s:%s", module, space)) |
| 504 | } |