| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 1 | package curator |
| 2 | |
| 3 | import ( |
| 4 | "context" |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 5 | "crypto/x509" |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 6 | "fmt" |
| 7 | |
| Lorenz Brun | d13c1c6 | 2022-03-30 19:58:58 +0200 | [diff] [blame] | 8 | clientv3 "go.etcd.io/etcd/client/v3" |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 9 | "google.golang.org/protobuf/proto" |
| 10 | |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 11 | "source.monogon.dev/metropolis/node/core/consensus" |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 12 | "source.monogon.dev/metropolis/node/core/consensus/client" |
| Serge Bazanski | 0375871 | 2021-08-17 12:52:11 +0200 | [diff] [blame] | 13 | ppb "source.monogon.dev/metropolis/node/core/curator/proto/private" |
| Serge Bazanski | 3379a5d | 2021-09-09 12:56:40 +0200 | [diff] [blame] | 14 | "source.monogon.dev/metropolis/node/core/identity" |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 15 | "source.monogon.dev/metropolis/pkg/pki" |
| 16 | ) |
| 17 | |
| 18 | // bootstrap.go contains functions specific for integration between the curator |
| 19 | // and cluster bootstrap code (//metropolis/node/core/cluster). |
| 20 | // |
| 21 | // These functions must only be called by the bootstrap code, and are |
| 22 | // effectively well-controlled abstraction leaks. An alternative would be to |
| 23 | // rework the curator API to explicitly support a well-contained and |
| 24 | // well-defined bootstrap procedure, formalized within bootstrap-specific types. |
| 25 | // However, that seems to not be worth the effort for a tightly coupled single |
| 26 | // consumer like the bootstrap code. |
| 27 | |
| Serge Bazanski | 0375871 | 2021-08-17 12:52:11 +0200 | [diff] [blame] | 28 | // BootstrapFinish saves the given Node and initial cluster owner pubkey into |
| 29 | // etcd, without regard for any other cluster state and directly using a given |
| 30 | // etcd client. |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 31 | // |
| Serge Bazanski | 0375871 | 2021-08-17 12:52:11 +0200 | [diff] [blame] | 32 | // This is ran by the cluster bootstrap workflow to finish bootstrapping a |
| 33 | // cluster - afterwards, this cluster will be ready to serve. |
| 34 | // |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 35 | // This must only be used by the cluster bootstrap logic. It is idempotent, thus |
| 36 | // can be called repeatedly in case of intermittent failures in the bootstrap |
| 37 | // logic. |
| 38 | func BootstrapNodeFinish(ctx context.Context, etcd client.Namespaced, node *Node, ownerKey []byte) (caCertBytes, nodeCertBytes []byte, err error) { |
| 39 | // Workaround for pkiCA being a global, but BootstrapNodeFinish being called for |
| 40 | // multiple, different etcd instances in tests. Doing this ensures that we |
| 41 | // always resynchronize with etcd, ie. not keep certificates loaded in memory |
| 42 | // even though the underlying certificate in etcd changed. |
| 43 | // |
| 44 | // TODO(q3k): pass pkiCA explicitly, eg. within a curator object? |
| 45 | pkiCA.PrivateKey = nil |
| 46 | pkiCA.PublicKey = nil |
| 47 | |
| 48 | // Issue CA and node certificates. |
| 49 | caCertBytes, err = pkiCA.Ensure(ctx, etcd) |
| Serge Bazanski | 080f7ff | 2021-09-09 13:01:00 +0200 | [diff] [blame] | 50 | if err != nil { |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 51 | return nil, nil, fmt.Errorf("when ensuring CA: %w", err) |
| Serge Bazanski | 080f7ff | 2021-09-09 13:01:00 +0200 | [diff] [blame] | 52 | } |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 53 | nodeCert := &pki.Certificate{ |
| 54 | Namespace: &pkiNamespace, |
| 55 | Issuer: pkiCA, |
| 56 | Template: identity.NodeCertificate(node.pubkey), |
| 57 | Mode: pki.CertificateExternal, |
| 58 | PublicKey: node.pubkey, |
| 59 | Name: fmt.Sprintf("node-%s", node.ID()), |
| 60 | } |
| 61 | nodeCertBytes, err = nodeCert.Ensure(ctx, etcd) |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 62 | if err != nil { |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 63 | err = fmt.Errorf("when ensuring node cert: %w", err) |
| 64 | return |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 65 | } |
| 66 | |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 67 | nodeCertX509, err := x509.ParseCertificate(nodeCertBytes) |
| Serge Bazanski | 0375871 | 2021-08-17 12:52:11 +0200 | [diff] [blame] | 68 | if err != nil { |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 69 | err = fmt.Errorf("when parsing node cert: %w", err) |
| 70 | return |
| Serge Bazanski | 0375871 | 2021-08-17 12:52:11 +0200 | [diff] [blame] | 71 | } |
| 72 | |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 73 | caCertX509, err := x509.ParseCertificate(caCertBytes) |
| 74 | if err != nil { |
| 75 | err = fmt.Errorf("when parsing CA cert: %w", err) |
| 76 | return |
| 77 | } |
| 78 | |
| 79 | w := pkiCA.WatchCRL(etcd) |
| 80 | defer w.Close() |
| 81 | crl, err := w.Get(ctx) |
| 82 | if err != nil { |
| 83 | err = fmt.Errorf("when retreiving CRL: %w", err) |
| 84 | return |
| 85 | } |
| 86 | |
| 87 | node.EnableConsensusMember(&consensus.JoinCluster{ |
| 88 | CACertificate: caCertX509, |
| 89 | NodeCertificate: nodeCertX509, |
| 90 | ExistingNodes: nil, |
| 91 | InitialCRL: crl, |
| 92 | }) |
| 93 | |
| Mateusz Zalega | 312a227 | 2022-04-25 12:03:58 +0200 | [diff] [blame] | 94 | nodePath, err := node.etcdNodePath() |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 95 | if err != nil { |
| 96 | return nil, nil, fmt.Errorf("failed to get node key: %w", err) |
| 97 | } |
| 98 | nodeRaw, err := proto.Marshal(node.proto()) |
| 99 | if err != nil { |
| 100 | return nil, nil, fmt.Errorf("failed to marshal node: %w", err) |
| 101 | } |
| 102 | ownerRaw, err := proto.Marshal(&ppb.InitialOwner{ |
| 103 | PublicKey: ownerKey, |
| 104 | }) |
| 105 | if err != nil { |
| 106 | return nil, nil, fmt.Errorf("failed to marshal initial owner: %w", err) |
| 107 | } |
| Mateusz Zalega | 2930e99 | 2022-04-25 12:52:35 +0200 | [diff] [blame] | 108 | joinKeyPath, err := node.etcdJoinKeyPath() |
| 109 | if err != nil { |
| 110 | return nil, nil, fmt.Errorf("failed to get join key: %w", err) |
| 111 | } |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 112 | |
| 113 | // We don't care about the result's success - this is idempotent. |
| 114 | _, err = etcd.Txn(ctx).If( |
| 115 | clientv3.Compare(clientv3.CreateRevision(nodePath), "=", 0), |
| 116 | clientv3.Compare(clientv3.CreateRevision(initialOwnerEtcdPath), "=", 0), |
| Mateusz Zalega | 2930e99 | 2022-04-25 12:52:35 +0200 | [diff] [blame] | 117 | clientv3.Compare(clientv3.CreateRevision(joinKeyPath), "=", 0), |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 118 | ).Then( |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 119 | clientv3.OpPut(nodePath, string(nodeRaw)), |
| 120 | clientv3.OpPut(initialOwnerEtcdPath, string(ownerRaw)), |
| Mateusz Zalega | 2930e99 | 2022-04-25 12:52:35 +0200 | [diff] [blame] | 121 | clientv3.OpPut(joinKeyPath, node.ID()), |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 122 | ).Commit() |
| 123 | if err != nil { |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 124 | return nil, nil, fmt.Errorf("failed to store initial cluster state: %w", err) |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 125 | } |
| 126 | |
| Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 127 | return |
| Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 128 | } |