blob: 77a8a11e09d7008983a9699b84654db6ac971c98 [file] [log] [blame]
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Leopold Schabel68c58752019-11-14 21:00:59 +010017// package consensus manages the embedded etcd cluster.
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020018package consensus
19
20import (
Lorenz Bruna4ea9d02019-10-31 11:40:30 +010021 "bytes"
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020022 "context"
Lorenz Bruna4ea9d02019-10-31 11:40:30 +010023 "crypto/x509"
24 "encoding/hex"
25 "encoding/pem"
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020026 "fmt"
Lorenz Bruna4ea9d02019-10-31 11:40:30 +010027 "io/ioutil"
28 "math/rand"
29 "net/url"
30 "os"
31 "path"
32 "path/filepath"
33 "strings"
34 "time"
35
Lorenz Brunaa6b7342019-12-12 02:55:02 +010036 "git.monogon.dev/source/nexantic.git/core/internal/common"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010037 "git.monogon.dev/source/nexantic.git/core/internal/common/service"
38
Lorenz Bruna4ea9d02019-10-31 11:40:30 +010039 "git.monogon.dev/source/nexantic.git/core/generated/api"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010040
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020041 "github.com/pkg/errors"
42 "go.etcd.io/etcd/clientv3"
43 "go.etcd.io/etcd/clientv3/namespace"
44 "go.etcd.io/etcd/embed"
45 "go.etcd.io/etcd/etcdserver/api/membership"
46 "go.etcd.io/etcd/pkg/types"
47 "go.etcd.io/etcd/proxy/grpcproxy/adapter"
48 "go.uber.org/zap"
Lorenz Bruna4ea9d02019-10-31 11:40:30 +010049 "golang.org/x/sys/unix"
Hendrik Hofstadt8efe51e2020-02-28 12:53:41 +010050
51 "git.monogon.dev/source/nexantic.git/core/internal/consensus/ca"
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020052)
53
54const (
55 DefaultClusterToken = "SIGNOS"
56 DefaultLogger = "zap"
57)
58
Lorenz Bruna4ea9d02019-10-31 11:40:30 +010059const (
60 CAPath = "ca.pem"
61 CertPath = "cert.pem"
62 KeyPath = "cert-key.pem"
63 CRLPath = "ca-crl.der"
64 CRLSwapPath = "ca-crl.der.swp"
65)
66
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010067const (
68 LocalListenerURL = "unix:///consensus/listener.sock:0"
69)
70
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020071type (
72 Service struct {
Leopold Schabel68c58752019-11-14 21:00:59 +010073 *service.BaseService
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020074
Leopold Schabel68c58752019-11-14 21:00:59 +010075 etcd *embed.Etcd
76 kv clientv3.KV
77 ready bool
78
79 // bootstrapCA and bootstrapCert cache the etcd cluster CA data during bootstrap.
80 bootstrapCA *ca.CA
81 bootstrapCert []byte
82
Lorenz Bruna4ea9d02019-10-31 11:40:30 +010083 watchCRLTicker *time.Ticker
84 lastCRL []byte
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020085
86 config *Config
87 }
88
89 Config struct {
90 Name string
91 DataDir string
92 InitialCluster string
93 NewCluster bool
Leopold Schabel68c58752019-11-14 21:00:59 +010094 ExternalHost string
95 ListenHost string
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020096 }
97
98 Member struct {
99 ID uint64
100 Name string
101 Address string
102 Synced bool
103 }
104)
105
106func NewConsensusService(config Config, logger *zap.Logger) (*Service, error) {
107 consensusServer := &Service{
108 config: &config,
109 }
Leopold Schabel68c58752019-11-14 21:00:59 +0100110 consensusServer.BaseService = service.NewBaseService("consensus", logger, consensusServer)
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200111
112 return consensusServer, nil
113}
114
115func (s *Service) OnStart() error {
Leopold Schabel68c58752019-11-14 21:00:59 +0100116 // See: https://godoc.org/github.com/coreos/etcd/embed#Config
117
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200118 if s.config == nil {
119 return errors.New("config for consensus is nil")
120 }
121
122 cfg := embed.NewConfig()
123
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100124 cfg.PeerTLSInfo.CertFile = filepath.Join(s.config.DataDir, CertPath)
125 cfg.PeerTLSInfo.KeyFile = filepath.Join(s.config.DataDir, KeyPath)
126 cfg.PeerTLSInfo.TrustedCAFile = filepath.Join(s.config.DataDir, CAPath)
127 cfg.PeerTLSInfo.ClientCertAuth = true
128 cfg.PeerTLSInfo.CRLFile = filepath.Join(s.config.DataDir, CRLPath)
129
130 lastCRL, err := ioutil.ReadFile(cfg.PeerTLSInfo.CRLFile)
131 if err != nil {
132 return fmt.Errorf("failed to read etcd CRL: %w", err)
133 }
134 s.lastCRL = lastCRL
135
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100136 // Expose etcd to local processes
137 if err := os.MkdirAll("/consensus", 0700); err != nil {
138 return fmt.Errorf("Failed to create consensus runtime state directory: %w", err)
139 }
140 listenerURL, err := url.Parse(LocalListenerURL)
141 if err != nil {
142 panic(err)
143 }
144 cfg.LCUrls = []url.URL{*listenerURL}
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200145
Leopold Schabel68c58752019-11-14 21:00:59 +0100146 // Advertise Peer URLs
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100147 apURL, err := url.Parse(fmt.Sprintf("https://%s:%d", s.config.ExternalHost, common.ConsensusPort))
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200148 if err != nil {
Leopold Schabel68c58752019-11-14 21:00:59 +0100149 return fmt.Errorf("invalid external_host or listen_port: %w", err)
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200150 }
151
Leopold Schabel68c58752019-11-14 21:00:59 +0100152 // Listen Peer URLs
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100153 lpURL, err := url.Parse(fmt.Sprintf("https://%s:%d", s.config.ListenHost, common.ConsensusPort))
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200154 if err != nil {
Leopold Schabel68c58752019-11-14 21:00:59 +0100155 return fmt.Errorf("invalid listen_host or listen_port: %w", err)
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200156 }
157 cfg.APUrls = []url.URL{*apURL}
158 cfg.LPUrls = []url.URL{*lpURL}
159 cfg.ACUrls = []url.URL{}
160
161 cfg.Dir = s.config.DataDir
162 cfg.InitialClusterToken = DefaultClusterToken
163 cfg.Name = s.config.Name
164
165 // Only relevant if creating or joining a cluster; otherwise settings will be ignored
166 if s.config.NewCluster {
167 cfg.ClusterState = "new"
168 cfg.InitialCluster = cfg.InitialClusterFromName(cfg.Name)
169 } else if s.config.InitialCluster != "" {
170 cfg.ClusterState = "existing"
171 cfg.InitialCluster = s.config.InitialCluster
172 }
173
174 cfg.Logger = DefaultLogger
175
176 server, err := embed.StartEtcd(cfg)
177 if err != nil {
178 return err
179 }
180 s.etcd = server
181
182 // Override the logger
183 //*server.GetLogger() = *s.Logger.With(zap.String("component", "etcd"))
Leopold Schabel68c58752019-11-14 21:00:59 +0100184 // TODO(leo): can we uncomment this?
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200185
186 go func() {
187 s.Logger.Info("waiting for etcd to become ready")
188 <-s.etcd.Server.ReadyNotify()
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200189 s.Logger.Info("etcd is now ready")
190 }()
191
192 // Inject kv client
193 s.kv = clientv3.NewKVFromKVClient(adapter.KvServerToKvClient(s.etcd.Server), nil)
194
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100195 // Start CRL watcher
196 go s.watchCRL()
197
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200198 return nil
199}
200
Leopold Schabel68c58752019-11-14 21:00:59 +0100201// WriteCertificateFiles writes the given node certificate data to local storage
202// such that it can be used by the embedded etcd server.
203// Unfortunately, we cannot pass the certificates directly to etcd.
204func (s *Service) WriteCertificateFiles(certs *api.ConsensusCertificates) error {
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100205 if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, CRLPath), certs.Crl, 0600); err != nil {
206 return err
207 }
208 if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, CertPath),
209 pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certs.Cert}), 0600); err != nil {
210 return err
211 }
212 if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, KeyPath),
213 pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: certs.Key}), 0600); err != nil {
214 return err
215 }
216 if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, CAPath),
217 pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certs.Ca}), 0600); err != nil {
218 return err
219 }
220 return nil
221}
222
Leopold Schabel68c58752019-11-14 21:00:59 +0100223// PrecreateCA generates the etcd cluster certificate authority and writes it to local storage.
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100224func (s *Service) PrecreateCA() error {
225 // Provision an etcd CA
226 etcdRootCA, err := ca.New("Smalltown etcd Root CA")
227 if err != nil {
228 return err
229 }
230 cert, privkey, err := etcdRootCA.IssueCertificate(s.config.ExternalHost)
231 if err != nil {
232 return fmt.Errorf("failed to self-issue a certificate: %w", err)
233 }
234 if err := os.MkdirAll(s.config.DataDir, 0700); err != nil {
235 return fmt.Errorf("failed to create consensus data dir: %w", err)
236 }
237 // Preserve certificate for later injection
238 s.bootstrapCert = cert
Leopold Schabel68c58752019-11-14 21:00:59 +0100239 if err := s.WriteCertificateFiles(&api.ConsensusCertificates{
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100240 Ca: etcdRootCA.CACertRaw,
241 Crl: etcdRootCA.CRLRaw,
242 Cert: cert,
243 Key: privkey,
244 }); err != nil {
245 return fmt.Errorf("failed to setup certificates: %w", err)
246 }
247 s.bootstrapCA = etcdRootCA
248 return nil
249}
250
251const (
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100252 caPathEtcd = "/etcd-ca/ca.der"
253 caKeyPathEtcd = "/etcd-ca/ca-key.der"
254 crlPathEtcd = "/etcd-ca/crl.der"
Leopold Schabel68c58752019-11-14 21:00:59 +0100255
256 // This prefix stores the individual certs the etcd CA has issued.
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100257 certPrefixEtcd = "/etcd-ca/certs"
258)
259
Leopold Schabel68c58752019-11-14 21:00:59 +0100260// InjectCA copies the CA from data cached during PrecreateCA to etcd.
261// Requires a previous call to PrecreateCA.
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100262func (s *Service) InjectCA() error {
Leopold Schabel68c58752019-11-14 21:00:59 +0100263 if s.bootstrapCA == nil || s.bootstrapCert == nil {
264 panic("bootstrapCA or bootstrapCert are nil - missing PrecreateCA call?")
265 }
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100266 if _, err := s.kv.Put(context.Background(), caPathEtcd, string(s.bootstrapCA.CACertRaw)); err != nil {
267 return err
268 }
Leopold Schabel68c58752019-11-14 21:00:59 +0100269 // TODO(lorenz): Should be wrapped by the master key
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100270 if _, err := s.kv.Put(context.Background(), caKeyPathEtcd, string([]byte(*s.bootstrapCA.PrivateKey))); err != nil {
271 return err
272 }
273 if _, err := s.kv.Put(context.Background(), crlPathEtcd, string(s.bootstrapCA.CRLRaw)); err != nil {
274 return err
275 }
276 certVal, err := x509.ParseCertificate(s.bootstrapCert)
277 if err != nil {
278 return err
279 }
280 serial := hex.EncodeToString(certVal.SerialNumber.Bytes())
281 if _, err := s.kv.Put(context.Background(), path.Join(certPrefixEtcd, serial), string(s.bootstrapCert)); err != nil {
282 return fmt.Errorf("failed to persist certificate: %w", err)
283 }
284 // Clear out bootstrap CA after injecting
285 s.bootstrapCA = nil
286 s.bootstrapCert = []byte{}
287 return nil
288}
289
290func (s *Service) etcdGetSingle(path string) ([]byte, int64, error) {
291 res, err := s.kv.Get(context.Background(), path)
292 if err != nil {
293 return nil, -1, fmt.Errorf("failed to get key from etcd: %w", err)
294 }
295 if len(res.Kvs) != 1 {
Leopold Schabel68c58752019-11-14 21:00:59 +0100296 return nil, -1, errors.New("key not available or multiple keys returned")
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100297 }
298 return res.Kvs[0].Value, res.Kvs[0].ModRevision, nil
299}
300
Leopold Schabel68c58752019-11-14 21:00:59 +0100301func (s *Service) getCAFromEtcd() (*ca.CA, int64, error) {
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100302 // TODO: Technically this could be done in a single request, but it's more logic
303 caCert, _, err := s.etcdGetSingle(caPathEtcd)
304 if err != nil {
305 return nil, -1, fmt.Errorf("failed to get CA certificate from etcd: %w", err)
306 }
307 caKey, _, err := s.etcdGetSingle(caKeyPathEtcd)
308 if err != nil {
309 return nil, -1, fmt.Errorf("failed to get CA key from etcd: %w", err)
310 }
311 // TODO: Unwrap CA key once wrapping is implemented
312 crl, crlRevision, err := s.etcdGetSingle(crlPathEtcd)
313 if err != nil {
314 return nil, -1, fmt.Errorf("failed to get CRL from etcd: %w", err)
315 }
316 idCA, err := ca.FromCertificates(caCert, caKey, crl)
317 if err != nil {
318 return nil, -1, fmt.Errorf("failed to take CA online: %w", err)
319 }
320 return idCA, crlRevision, nil
321}
322
323func (s *Service) IssueCertificate(hostname string) (*api.ConsensusCertificates, error) {
Leopold Schabel68c58752019-11-14 21:00:59 +0100324 idCA, _, err := s.getCAFromEtcd()
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100325 if err != nil {
326 return nil, err
327 }
328 cert, key, err := idCA.IssueCertificate(hostname)
329 if err != nil {
330 return nil, fmt.Errorf("failed to issue certificate: %w", err)
331 }
332 certVal, err := x509.ParseCertificate(cert)
333 if err != nil {
334 return nil, err
335 }
336 serial := hex.EncodeToString(certVal.SerialNumber.Bytes())
337 if _, err := s.kv.Put(context.Background(), path.Join(certPrefixEtcd, serial), string(cert)); err != nil {
Leopold Schabel68c58752019-11-14 21:00:59 +0100338 // We issued a certificate, but failed to persist it. Return an error and forget it ever happened.
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100339 return nil, fmt.Errorf("failed to persist certificate: %w", err)
340 }
341 return &api.ConsensusCertificates{
342 Ca: idCA.CACertRaw,
343 Cert: cert,
344 Crl: idCA.CRLRaw,
345 Key: key,
346 }, nil
347}
348
349func (s *Service) RevokeCertificate(hostname string) error {
350 rand.Seed(time.Now().UnixNano())
351 for {
Leopold Schabel68c58752019-11-14 21:00:59 +0100352 idCA, crlRevision, err := s.getCAFromEtcd()
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100353 if err != nil {
354 return err
355 }
356 allIssuedCerts, err := s.kv.Get(context.Background(), certPrefixEtcd, clientv3.WithPrefix())
357 for _, cert := range allIssuedCerts.Kvs {
358 certVal, err := x509.ParseCertificate(cert.Value)
359 if err != nil {
360 s.Logger.Error("Failed to parse previously issued certificate, this is a security risk", zap.Error(err))
361 continue
362 }
363 for _, dnsName := range certVal.DNSNames {
364 if dnsName == hostname {
365 // Revoke this
366 if err := idCA.Revoke(certVal.SerialNumber); err != nil {
367 // We need to fail if any single revocation fails otherwise outer applications
368 // have no chance of calling this safely
369 return err
370 }
371 }
372 }
373 }
Leopold Schabel68c58752019-11-14 21:00:59 +0100374 // TODO(leo): this needs a test
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100375 cmp := clientv3.Compare(clientv3.ModRevision(crlPathEtcd), "=", crlRevision)
376 op := clientv3.OpPut(crlPathEtcd, string(idCA.CRLRaw))
377 res, err := s.kv.Txn(context.Background()).If(cmp).Then(op).Commit()
378 if err != nil {
379 return fmt.Errorf("failed to persist new CRL in etcd: %w", err)
380 }
381 if res.Succeeded { // Transaction has succeeded
382 break
383 }
384 // Sleep a random duration between 0 and 300ms to reduce serialization failures
385 time.Sleep(time.Duration(rand.Intn(300)) * time.Millisecond)
386 }
387 return nil
388}
389
390func (s *Service) watchCRL() {
Leopold Schabel68c58752019-11-14 21:00:59 +0100391 // TODO(lorenz): Change etcd client to WatchableKV and make this an actual watch
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100392 // This needs changes in more places, so leaving it now
393 s.watchCRLTicker = time.NewTicker(30 * time.Second)
394 for range s.watchCRLTicker.C {
395 crl, _, err := s.etcdGetSingle(crlPathEtcd)
396 if err != nil {
397 s.Logger.Warn("Failed to check for new CRL", zap.Error(err))
398 continue
399 }
400 // This is cryptographic material but not secret, so no constant time compare necessary here
401 if !bytes.Equal(crl, s.lastCRL) {
402 if err := ioutil.WriteFile(filepath.Join(s.config.DataDir, CRLSwapPath), crl, 0600); err != nil {
403 s.Logger.Warn("Failed to write updated CRL", zap.Error(err))
404 }
405 // This uses unix.Rename to guarantee a particular atomic update behavior
406 if err := unix.Rename(filepath.Join(s.config.DataDir, CRLSwapPath), filepath.Join(s.config.DataDir, CRLPath)); err != nil {
407 s.Logger.Warn("Failed to atomically swap updated CRL", zap.Error(err))
408 }
409 }
410 }
411}
412
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200413func (s *Service) OnStop() error {
Lorenz Bruna4ea9d02019-10-31 11:40:30 +0100414 s.watchCRLTicker.Stop()
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +0200415 s.etcd.Close()
416
417 return nil
418}
419
420// IsProvisioned returns whether the node has been setup before and etcd has a data directory
421func (s *Service) IsProvisioned() bool {
422 _, err := os.Stat(s.config.DataDir)
423
424 return !os.IsNotExist(err)
425}
426
427// IsReady returns whether etcd is ready and synced
428func (s *Service) IsReady() bool {
429 return s.ready
430}
431
432// AddMember adds a new etcd member to the cluster
433func (s *Service) AddMember(ctx context.Context, name string, url string) (uint64, error) {
434 urls, err := types.NewURLs([]string{url})
435 if err != nil {
436 return 0, err
437 }
438
439 member := membership.NewMember(name, urls, DefaultClusterToken, nil)
440
441 _, err = s.etcd.Server.AddMember(ctx, *member)
442 if err != nil {
443 return 0, err
444 }
445
446 return uint64(member.ID), nil
447}
448
449// RemoveMember removes a member from the etcd cluster
450func (s *Service) RemoveMember(ctx context.Context, id uint64) error {
451 _, err := s.etcd.Server.RemoveMember(ctx, id)
452 return err
453}
454
455// Health returns the current cluster health
456func (s *Service) Health() {
457}
458
459// GetConfig returns the current consensus config
460func (s *Service) GetConfig() Config {
461 return *s.config
462}
463
464// SetConfig sets the consensus config. Changes are only applied when the service is restarted.
465func (s *Service) SetConfig(config Config) {
466 s.config = &config
467}
468
469// GetInitialClusterString returns the InitialCluster string that can be used to bootstrap a consensus node
470func (s *Service) GetInitialClusterString() string {
471 members := s.etcd.Server.Cluster().Members()
472 clusterString := strings.Builder{}
473
474 for i, m := range members {
475 if i != 0 {
476 clusterString.WriteString(",")
477 }
478 clusterString.WriteString(m.Name)
479 clusterString.WriteString("=")
480 clusterString.WriteString(m.PickPeerURL())
481 }
482
483 return clusterString.String()
484}
485
486// GetNodes returns a list of consensus nodes
487func (s *Service) GetNodes() []Member {
488 members := s.etcd.Server.Cluster().Members()
489 cMembers := make([]Member, len(members))
490 for i, m := range members {
491 cMembers[i] = Member{
492 ID: uint64(m.ID),
493 Name: m.Name,
494 Address: m.PickPeerURL(),
495 Synced: !m.IsLearner,
496 }
497 }
498
499 return cMembers
500}
501
502func (s *Service) GetStore(module, space string) clientv3.KV {
503 return namespace.NewKV(s.kv, fmt.Sprintf("%s:%s", module, space))
504}