blob: 841529dddbcb94ffa24ac8fbf19727990dd5a750 [file] [log] [blame]
Serge Bazanski1ebd1e12020-07-13 19:17:16 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package cluster
18
19import (
20 "context"
21 "crypto/ed25519"
22 "crypto/x509"
23 "encoding/hex"
24 "fmt"
25 "net"
26
27 "github.com/golang/protobuf/proto"
28 "go.etcd.io/etcd/clientv3"
29 "golang.org/x/sys/unix"
30
31 "git.monogon.dev/source/nexantic.git/core/internal/localstorage"
32 ipb "git.monogon.dev/source/nexantic.git/core/proto/internal"
33)
34
35// Node is a Smalltown cluster member. A node is a virtual or physical machine running Smalltown. This object represents a
36// node only as part of a Cluster - ie., this object will never be available outside of //core/internal/cluster if the
37// Node is not part of a Cluster.
38// Nodes are inherently tied to their long term storage, which is etcd. As such, methods on this object relate heavily
39// to the Node's expected lifecycle on etcd.
40type Node struct {
41 // clusterUnlockKey is half of the unlock key required to mount the node's data partition. It's stored in etcd, and
42 // will only be provided to the Node if it can prove its identity via an integrity mechanism (ie. via TPM), or when
43 // the Node was just created (as the key is generated locally by localstorage on first format/mount).
44 // The other part of the unlock key is the LocalUnlockKey that's present on the node's ESP partition.
45 clusterUnlockKey []byte
46 // certificate is the node's TLS certificate, used to authenticate Smalltown gRPC calls/services (but not
47 // consensus/etcd). The certificate for a node is permanent (and never expires). It's self-signed by the node on
48 // startup, and contains the node's IP address in its SAN. Callers/services should check directly against the
49 // expected certificate, and not against a CA.
50 certificate x509.Certificate
51 // address is the management IP address of the node. The management IP address of a node is permanent.
52 address net.IP
53
54 // A Node can have multiple Roles. Each Role is represented by the presence of NodeRole* structures in this
55 // structure, with a nil pointer representing the lack of a role.
56
57 consensusMember *NodeRoleConsensusMember
58 kubernetesWorker *NodeRoleKubernetesWorker
59}
60
61// NewNode creates a new Node. This is only called when a New node is supposed to be created as part of a cluster,
62// otherwise it should be loaded from Etcd.
63func NewNode(cuk []byte, address net.IP, certificate x509.Certificate) *Node {
64 if certificate.Raw == nil {
65 panic("new node must contain raw certificate")
66 }
67 return &Node{
68 clusterUnlockKey: cuk,
69 certificate: certificate,
70 address: address,
71 }
72}
73
74// NodeRoleConsensusMember defines that the Node is a consensus (etcd) cluster member.
75type NodeRoleConsensusMember struct {
76 // etcdMember is the name of the node in Kubernetes. This is for now usually the same as the ID() of the Node.
77 etcdMemberName string
78}
79
80// NodeRoleKubernetesWorker defines that the Node should be running the Kubernetes control and data plane.
81type NodeRoleKubernetesWorker struct {
82 // nodeName is the name of the node in Kubernetes. This is for now usually the same as the ID() of the Node.
83 nodeName string
84}
85
86// ID returns the name of this node, which is `smalltown-{pubkeyHash}`. This name should be the primary way to refer to
87// Smalltown nodes within a cluster, and is guaranteed to be unique by relying on cryptographic randomness.
88func (n *Node) ID() string {
89 return fmt.Sprintf("smalltown-%s", n.IDBare())
90}
91
92// IDBare returns the `{pubkeyHash}` part of the node ID.
93func (n Node) IDBare() string {
94 pubKey, ok := n.certificate.PublicKey.(ed25519.PublicKey)
95 if !ok {
96 panic("node has non-ed25519 public key")
97 }
98 return hex.EncodeToString(pubKey[:16])
99}
100
101func (n *Node) String() string {
102 return n.ID()
103}
104
105// ConsensusMember returns a copy of the NodeRoleConsensusMember struct if the Node is a consensus member, otherwise
106// nil.
107func (n *Node) ConsensusMember() *NodeRoleConsensusMember {
108 if n.consensusMember == nil {
109 return nil
110 }
111 cm := *n.consensusMember
112 return &cm
113}
114
115// KubernetesWorker returns a copy of the NodeRoleKubernetesWorker struct if the Node is a kubernetes worker, otherwise
116// nil.
117func (n *Node) KubernetesWorker() *NodeRoleKubernetesWorker {
118 if n.kubernetesWorker == nil {
119 return nil
120 }
121 kw := *n.kubernetesWorker
122 return &kw
123}
124
125// etcdPath builds the etcd path in which this node's protobuf-serialized state is stored in etcd.
126func (n *Node) etcdPath() string {
127 return fmt.Sprintf("/nodes/%s", n.ID())
128}
129
130// proto serializes the Node object into protobuf, to be used for saving to etcd.
131func (n *Node) proto() *ipb.Node {
132 msg := &ipb.Node{
133 Certificate: n.certificate.Raw,
134 ClusterUnlockKey: n.clusterUnlockKey,
135 Address: n.address.String(),
136 Roles: &ipb.Node_Roles{},
137 }
138 if n.consensusMember != nil {
139 msg.Roles.ConsensusMember = &ipb.Node_Roles_ConsensusMember{
140 EtcdMemberName: n.consensusMember.etcdMemberName,
141 }
142 }
143 if n.kubernetesWorker != nil {
144 msg.Roles.KubernetesWorker = &ipb.Node_Roles_KubernetesWorker{
145 NodeName: n.kubernetesWorker.nodeName,
146 }
147 }
148 return msg
149}
150
151// Store saves the Node into etcd. This should be called only once per Node (ie. when the Node has been created).
152func (n *Node) Store(ctx context.Context, kv clientv3.KV) error {
153 // Currently the only flow to store a node to etcd is a write-once flow: once a node is created, it cannot be
154 // deleted or updated. In the future, flows to change cluster node roles might be introduced (ie. to promote nodes
155 // to consensus members, etc).
156 key := n.etcdPath()
157 msg := n.proto()
158 nodeRaw, err := proto.Marshal(msg)
159 if err != nil {
160 return fmt.Errorf("failed to marshal node: %w", err)
161 }
162
163 res, err := kv.Txn(ctx).If(
164 clientv3.Compare(clientv3.CreateRevision(key), "=", 0),
165 ).Then(
166 clientv3.OpPut(key, string(nodeRaw)),
167 ).Commit()
168 if err != nil {
169 return fmt.Errorf("failed to store node: %w", err)
170 }
171
172 if !res.Succeeded {
173 return fmt.Errorf("attempted to re-register node (unsupported flow)")
174 }
175 return nil
176}
177
178// MakeConsensusMember turns the node into a consensus member with a given name. This only configures internal fields,
179// and does not actually start any services.
180func (n *Node) MakeConsensusMember(etcdMemberName string) error {
181 if n.consensusMember != nil {
182 return fmt.Errorf("node already is consensus member")
183 }
184 n.consensusMember = &NodeRoleConsensusMember{
185 etcdMemberName: etcdMemberName,
186 }
187 return nil
188}
189
190// MakeKubernetesWorker turns the node into a kubernetes worker with a given name. This only configures internal fields,
191// and does not actually start any services.
192func (n *Node) MakeKubernetesWorker(name string) error {
193 if n.kubernetesWorker != nil {
194 return fmt.Errorf("node is already kubernetes worker")
195 }
196 n.kubernetesWorker = &NodeRoleKubernetesWorker{
197 nodeName: name,
198 }
199 return nil
200}
201
202func (n *Node) Address() net.IP {
203 return n.address
204}
205
206// ConfigureLocalHostname uses the node's ID as a hostname, and sets the current hostname, and local files like hosts
207// and machine-id accordingly.
208func (n *Node) ConfigureLocalHostname(etc *localstorage.EtcDirectory) error {
209 if err := unix.Sethostname([]byte(n.ID())); err != nil {
210 return fmt.Errorf("failed to set runtime hostname: %w", err)
211 }
212 if err := etc.Hosts.Write([]byte(fmt.Sprintf("%s %s", "127.0.0.1", n.ID())), 0644); err != nil {
213 return fmt.Errorf("failed to write /etc/hosts: %w", err)
214 }
215 if err := etc.MachineID.Write([]byte(n.IDBare()), 0644); err != nil {
216 return fmt.Errorf("failed to write /etc/machine-id: %w", err)
217 }
218 return nil
219}