blob: 4b0b743b1a5349354a0be4e6bb3c4a8de1dab445 [file] [log] [blame]
Serge Bazanskif0b4da52021-06-21 20:05:59 +02001package curator
2
3import (
4 "context"
Serge Bazanski5839e972021-11-16 15:46:19 +01005 "crypto/x509"
Serge Bazanskif0b4da52021-06-21 20:05:59 +02006 "fmt"
7
Lorenz Brund13c1c62022-03-30 19:58:58 +02008 clientv3 "go.etcd.io/etcd/client/v3"
Serge Bazanskif0b4da52021-06-21 20:05:59 +02009 "google.golang.org/protobuf/proto"
10
Serge Bazanski5839e972021-11-16 15:46:19 +010011 "source.monogon.dev/metropolis/node/core/consensus"
Serge Bazanskif0b4da52021-06-21 20:05:59 +020012 "source.monogon.dev/metropolis/node/core/consensus/client"
Serge Bazanski03758712021-08-17 12:52:11 +020013 ppb "source.monogon.dev/metropolis/node/core/curator/proto/private"
Serge Bazanski3379a5d2021-09-09 12:56:40 +020014 "source.monogon.dev/metropolis/node/core/identity"
Serge Bazanskif0b4da52021-06-21 20:05:59 +020015 "source.monogon.dev/metropolis/pkg/pki"
16)
17
18// bootstrap.go contains functions specific for integration between the curator
19// and cluster bootstrap code (//metropolis/node/core/cluster).
20//
21// These functions must only be called by the bootstrap code, and are
22// effectively well-controlled abstraction leaks. An alternative would be to
23// rework the curator API to explicitly support a well-contained and
24// well-defined bootstrap procedure, formalized within bootstrap-specific types.
25// However, that seems to not be worth the effort for a tightly coupled single
26// consumer like the bootstrap code.
27
Serge Bazanski03758712021-08-17 12:52:11 +020028// BootstrapFinish saves the given Node and initial cluster owner pubkey into
29// etcd, without regard for any other cluster state and directly using a given
30// etcd client.
Serge Bazanskif0b4da52021-06-21 20:05:59 +020031//
Serge Bazanski03758712021-08-17 12:52:11 +020032// This is ran by the cluster bootstrap workflow to finish bootstrapping a
33// cluster - afterwards, this cluster will be ready to serve.
34//
Serge Bazanski5839e972021-11-16 15:46:19 +010035// This must only be used by the cluster bootstrap logic. It is idempotent, thus
36// can be called repeatedly in case of intermittent failures in the bootstrap
37// logic.
38func BootstrapNodeFinish(ctx context.Context, etcd client.Namespaced, node *Node, ownerKey []byte) (caCertBytes, nodeCertBytes []byte, err error) {
39 // Workaround for pkiCA being a global, but BootstrapNodeFinish being called for
40 // multiple, different etcd instances in tests. Doing this ensures that we
41 // always resynchronize with etcd, ie. not keep certificates loaded in memory
42 // even though the underlying certificate in etcd changed.
43 //
44 // TODO(q3k): pass pkiCA explicitly, eg. within a curator object?
45 pkiCA.PrivateKey = nil
46 pkiCA.PublicKey = nil
47
48 // Issue CA and node certificates.
49 caCertBytes, err = pkiCA.Ensure(ctx, etcd)
Serge Bazanski080f7ff2021-09-09 13:01:00 +020050 if err != nil {
Serge Bazanski5839e972021-11-16 15:46:19 +010051 return nil, nil, fmt.Errorf("when ensuring CA: %w", err)
Serge Bazanski080f7ff2021-09-09 13:01:00 +020052 }
Serge Bazanski5839e972021-11-16 15:46:19 +010053 nodeCert := &pki.Certificate{
54 Namespace: &pkiNamespace,
55 Issuer: pkiCA,
56 Template: identity.NodeCertificate(node.pubkey),
57 Mode: pki.CertificateExternal,
58 PublicKey: node.pubkey,
59 Name: fmt.Sprintf("node-%s", node.ID()),
60 }
61 nodeCertBytes, err = nodeCert.Ensure(ctx, etcd)
Serge Bazanskif0b4da52021-06-21 20:05:59 +020062 if err != nil {
Serge Bazanski5839e972021-11-16 15:46:19 +010063 err = fmt.Errorf("when ensuring node cert: %w", err)
64 return
Serge Bazanskif0b4da52021-06-21 20:05:59 +020065 }
66
Serge Bazanski5839e972021-11-16 15:46:19 +010067 nodeCertX509, err := x509.ParseCertificate(nodeCertBytes)
Serge Bazanski03758712021-08-17 12:52:11 +020068 if err != nil {
Serge Bazanski5839e972021-11-16 15:46:19 +010069 err = fmt.Errorf("when parsing node cert: %w", err)
70 return
Serge Bazanski03758712021-08-17 12:52:11 +020071 }
72
Serge Bazanski5839e972021-11-16 15:46:19 +010073 caCertX509, err := x509.ParseCertificate(caCertBytes)
74 if err != nil {
75 err = fmt.Errorf("when parsing CA cert: %w", err)
76 return
77 }
78
79 w := pkiCA.WatchCRL(etcd)
80 defer w.Close()
81 crl, err := w.Get(ctx)
82 if err != nil {
83 err = fmt.Errorf("when retreiving CRL: %w", err)
84 return
85 }
86
87 node.EnableConsensusMember(&consensus.JoinCluster{
88 CACertificate: caCertX509,
89 NodeCertificate: nodeCertX509,
90 ExistingNodes: nil,
91 InitialCRL: crl,
92 })
93
Mateusz Zalega312a2272022-04-25 12:03:58 +020094 nodePath, err := node.etcdNodePath()
Serge Bazanski5839e972021-11-16 15:46:19 +010095 if err != nil {
96 return nil, nil, fmt.Errorf("failed to get node key: %w", err)
97 }
98 nodeRaw, err := proto.Marshal(node.proto())
99 if err != nil {
100 return nil, nil, fmt.Errorf("failed to marshal node: %w", err)
101 }
102 ownerRaw, err := proto.Marshal(&ppb.InitialOwner{
103 PublicKey: ownerKey,
104 })
105 if err != nil {
106 return nil, nil, fmt.Errorf("failed to marshal initial owner: %w", err)
107 }
Mateusz Zalega2930e992022-04-25 12:52:35 +0200108 joinKeyPath, err := node.etcdJoinKeyPath()
109 if err != nil {
110 return nil, nil, fmt.Errorf("failed to get join key: %w", err)
111 }
Serge Bazanski5839e972021-11-16 15:46:19 +0100112
113 // We don't care about the result's success - this is idempotent.
114 _, err = etcd.Txn(ctx).If(
115 clientv3.Compare(clientv3.CreateRevision(nodePath), "=", 0),
116 clientv3.Compare(clientv3.CreateRevision(initialOwnerEtcdPath), "=", 0),
Mateusz Zalega2930e992022-04-25 12:52:35 +0200117 clientv3.Compare(clientv3.CreateRevision(joinKeyPath), "=", 0),
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200118 ).Then(
Serge Bazanski5839e972021-11-16 15:46:19 +0100119 clientv3.OpPut(nodePath, string(nodeRaw)),
120 clientv3.OpPut(initialOwnerEtcdPath, string(ownerRaw)),
Mateusz Zalega2930e992022-04-25 12:52:35 +0200121 clientv3.OpPut(joinKeyPath, node.ID()),
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200122 ).Commit()
123 if err != nil {
Serge Bazanski5839e972021-11-16 15:46:19 +0100124 return nil, nil, fmt.Errorf("failed to store initial cluster state: %w", err)
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200125 }
126
Serge Bazanski5839e972021-11-16 15:46:19 +0100127 return
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200128}