blob: 9a84bb754a6b3c6da7f867bf4bcb2ea9adbd3186 [file] [log] [blame]
Serge Bazanskif0b4da52021-06-21 20:05:59 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package curator
18
19import (
20 "context"
Serge Bazanskif0b4da52021-06-21 20:05:59 +020021 "fmt"
22 "net"
23 "strings"
24
25 "golang.org/x/sys/unix"
26 "google.golang.org/protobuf/proto"
27
28 ppb "source.monogon.dev/metropolis/node/core/curator/proto/private"
Serge Bazanski3379a5d2021-09-09 12:56:40 +020029 "source.monogon.dev/metropolis/node/core/identity"
Serge Bazanskif0b4da52021-06-21 20:05:59 +020030 "source.monogon.dev/metropolis/node/core/localstorage"
31 "source.monogon.dev/metropolis/pkg/supervisor"
32 cpb "source.monogon.dev/metropolis/proto/common"
33)
34
35// Node is a Metropolis cluster member. A node is a virtual or physical machine
36// running Metropolis. This object represents a node only as part of a cluster.
37// A machine running Metropolis that is not yet (attempting to be) part of a
38// cluster is not considered a Node.
39//
40// This object is used internally within the curator code. Curator clients do
41// not have access to this object and instead rely on protobuf representations
42// of objects from the Curator gRPC API. An exception is the cluster bootstrap
43// code which needs to bring up a new curator from scratch alongside the rest of
44// the cluster.
45type Node struct {
46 // clusterUnlockKey is half of the unlock key required to mount the node's
47 // data partition. It's stored in etcd, and will only be provided to the
48 // Node if it can prove its identity via an integrity mechanism (ie. via
49 // TPM), or when the Node was just created (as the key is generated locally
50 // by localstorage on first format/mount).
51 //
52 // The other part of the unlock key is the LocalUnlockKey that's present on the
53 // node's ESP partition.
54 clusterUnlockKey []byte
55
56 // pubkey is the ED25519 public key corresponding to the node's private key
57 // which it stores on its local data partition. The private part of the key
58 // never leaves the node.
59 //
60 // The public key is used to generate the Node's canonical ID.
61 pubkey []byte
62
63 // state is the state of this node as seen from the point of view of the
64 // cluster. See //metropolis/proto:common.proto for more information.
65 state cpb.NodeState
66
Serge Bazanski2893e982021-09-09 13:06:16 +020067 status *cpb.NodeStatus
68
Serge Bazanskif0b4da52021-06-21 20:05:59 +020069 // A Node can have multiple Roles. Each Role is represented by the presence
70 // of NodeRole* structures in this structure, with a nil pointer
71 // representing the lack of a role.
72
73 // kubernetesWorker is set if this node is a Kubernetes worker, ie. running the
74 // Kubernetes control plan and workload elements.
75 // In the future, this will be split into a separate worker and control plane
76 // role.
77 kubernetesWorker *NodeRoleKubernetesWorker
78}
79
80// NewNodeForBootstrap creates a brand new node without regard for any other
81// cluster state.
82//
83// This can only be used by the cluster bootstrap logic.
84func NewNodeForBootstrap(cuk, pubkey []byte) Node {
85 return Node{
86 clusterUnlockKey: cuk,
87 pubkey: pubkey,
88 state: cpb.NodeState_NODE_STATE_UP,
89 // TODO(q3k): make this configurable.
90 kubernetesWorker: &NodeRoleKubernetesWorker{},
91 }
92}
93
94// NodeRoleKubernetesWorker defines that the Node should be running the
95// Kubernetes control and data plane.
96type NodeRoleKubernetesWorker struct {
97}
98
Serge Bazanskif0b4da52021-06-21 20:05:59 +020099// ID returns the name of this node. See NodeID for more information.
100func (n *Node) ID() string {
Serge Bazanski3379a5d2021-09-09 12:56:40 +0200101 return identity.NodeID(n.pubkey)
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200102}
103
104func (n *Node) String() string {
105 return n.ID()
106}
107
108// KubernetesWorker returns a copy of the NodeRoleKubernetesWorker struct if
109// the Node is a kubernetes worker, otherwise nil.
110func (n *Node) KubernetesWorker() *NodeRoleKubernetesWorker {
111 if n.kubernetesWorker == nil {
112 return nil
113 }
114 kw := *n.kubernetesWorker
115 return &kw
116}
117
Serge Bazanski080f7ff2021-09-09 13:01:00 +0200118var (
119 nodeEtcdPrefix = mustNewEtcdPrefix("/nodes/")
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200120)
121
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200122// etcdPath builds the etcd path in which this node's protobuf-serialized state
123// is stored in etcd.
Serge Bazanski080f7ff2021-09-09 13:01:00 +0200124func (n *Node) etcdPath() (string, error) {
125 return nodeEtcdPrefix.Key(n.ID())
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200126}
127
128// proto serializes the Node object into protobuf, to be used for saving to
129// etcd.
130func (n *Node) proto() *ppb.Node {
131 msg := &ppb.Node{
132 ClusterUnlockKey: n.clusterUnlockKey,
133 PublicKey: n.pubkey,
134 FsmState: n.state,
135 Roles: &cpb.NodeRoles{},
Serge Bazanski2893e982021-09-09 13:06:16 +0200136 Status: n.status,
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200137 }
138 if n.kubernetesWorker != nil {
139 msg.Roles.KubernetesWorker = &cpb.NodeRoles_KubernetesWorker{}
140 }
141 return msg
142}
143
144func nodeUnmarshal(data []byte) (*Node, error) {
145 msg := ppb.Node{}
146 if err := proto.Unmarshal(data, &msg); err != nil {
147 return nil, fmt.Errorf("could not unmarshal proto: %w", err)
148 }
149 n := &Node{
150 clusterUnlockKey: msg.ClusterUnlockKey,
151 pubkey: msg.PublicKey,
152 state: msg.FsmState,
Serge Bazanski2893e982021-09-09 13:06:16 +0200153 status: msg.Status,
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200154 }
155 if msg.Roles.KubernetesWorker != nil {
156 n.kubernetesWorker = &NodeRoleKubernetesWorker{}
157 }
158 return n, nil
159}
160
161// ConfigureLocalHostname uses the node's ID as a hostname, and sets the
162// current hostname, and local files like hosts and machine-id accordingly.
163//
164// TODO(q3k): move this to roleserver?
165func (n *Node) ConfigureLocalHostname(ctx context.Context, ephemeral *localstorage.EphemeralDirectory, address net.IP) error {
166 if err := unix.Sethostname([]byte(n.ID())); err != nil {
167 return fmt.Errorf("failed to set runtime hostname: %w", err)
168 }
169 hosts := []string{
170 "127.0.0.1 localhost",
171 "::1 localhost",
172 fmt.Sprintf("%s %s", address.String(), n.ID()),
173 }
174 if err := ephemeral.Hosts.Write([]byte(strings.Join(hosts, "\n")), 0644); err != nil {
175 return fmt.Errorf("failed to write /ephemeral/hosts: %w", err)
176 }
Serge Bazanski3379a5d2021-09-09 12:56:40 +0200177 if err := ephemeral.MachineID.Write([]byte(identity.NodeIDBare(n.pubkey)), 0644); err != nil {
Serge Bazanskif0b4da52021-06-21 20:05:59 +0200178 return fmt.Errorf("failed to write /ephemeral/machine-id: %w", err)
179 }
180
181 // Check that we are self-resolvable.
182 ip, err := net.ResolveIPAddr("ip", n.ID())
183 if err != nil {
184 return fmt.Errorf("failed to self-resolve: %w", err)
185 }
186 supervisor.Logger(ctx).Infof("This is node %s at %v", n.ID(), ip)
187 return nil
188}