Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
| 17 | package curator |
| 18 | |
| 19 | import ( |
| 20 | "context" |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 21 | "fmt" |
| 22 | "net" |
| 23 | "strings" |
| 24 | |
| 25 | "golang.org/x/sys/unix" |
| 26 | "google.golang.org/protobuf/proto" |
| 27 | |
| 28 | ppb "source.monogon.dev/metropolis/node/core/curator/proto/private" |
Serge Bazanski | 3379a5d | 2021-09-09 12:56:40 +0200 | [diff] [blame] | 29 | "source.monogon.dev/metropolis/node/core/identity" |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 30 | "source.monogon.dev/metropolis/node/core/localstorage" |
| 31 | "source.monogon.dev/metropolis/pkg/supervisor" |
| 32 | cpb "source.monogon.dev/metropolis/proto/common" |
| 33 | ) |
| 34 | |
| 35 | // Node is a Metropolis cluster member. A node is a virtual or physical machine |
| 36 | // running Metropolis. This object represents a node only as part of a cluster. |
| 37 | // A machine running Metropolis that is not yet (attempting to be) part of a |
| 38 | // cluster is not considered a Node. |
| 39 | // |
| 40 | // This object is used internally within the curator code. Curator clients do |
| 41 | // not have access to this object and instead rely on protobuf representations |
| 42 | // of objects from the Curator gRPC API. An exception is the cluster bootstrap |
| 43 | // code which needs to bring up a new curator from scratch alongside the rest of |
| 44 | // the cluster. |
| 45 | type Node struct { |
| 46 | // clusterUnlockKey is half of the unlock key required to mount the node's |
| 47 | // data partition. It's stored in etcd, and will only be provided to the |
| 48 | // Node if it can prove its identity via an integrity mechanism (ie. via |
| 49 | // TPM), or when the Node was just created (as the key is generated locally |
| 50 | // by localstorage on first format/mount). |
| 51 | // |
| 52 | // The other part of the unlock key is the LocalUnlockKey that's present on the |
| 53 | // node's ESP partition. |
| 54 | clusterUnlockKey []byte |
| 55 | |
| 56 | // pubkey is the ED25519 public key corresponding to the node's private key |
| 57 | // which it stores on its local data partition. The private part of the key |
| 58 | // never leaves the node. |
| 59 | // |
| 60 | // The public key is used to generate the Node's canonical ID. |
| 61 | pubkey []byte |
| 62 | |
| 63 | // state is the state of this node as seen from the point of view of the |
| 64 | // cluster. See //metropolis/proto:common.proto for more information. |
| 65 | state cpb.NodeState |
| 66 | |
Serge Bazanski | 2893e98 | 2021-09-09 13:06:16 +0200 | [diff] [blame] | 67 | status *cpb.NodeStatus |
| 68 | |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 69 | // A Node can have multiple Roles. Each Role is represented by the presence |
| 70 | // of NodeRole* structures in this structure, with a nil pointer |
| 71 | // representing the lack of a role. |
| 72 | |
| 73 | // kubernetesWorker is set if this node is a Kubernetes worker, ie. running the |
| 74 | // Kubernetes control plan and workload elements. |
| 75 | // In the future, this will be split into a separate worker and control plane |
| 76 | // role. |
| 77 | kubernetesWorker *NodeRoleKubernetesWorker |
| 78 | } |
| 79 | |
| 80 | // NewNodeForBootstrap creates a brand new node without regard for any other |
| 81 | // cluster state. |
| 82 | // |
| 83 | // This can only be used by the cluster bootstrap logic. |
| 84 | func NewNodeForBootstrap(cuk, pubkey []byte) Node { |
| 85 | return Node{ |
| 86 | clusterUnlockKey: cuk, |
| 87 | pubkey: pubkey, |
| 88 | state: cpb.NodeState_NODE_STATE_UP, |
| 89 | // TODO(q3k): make this configurable. |
| 90 | kubernetesWorker: &NodeRoleKubernetesWorker{}, |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | // NodeRoleKubernetesWorker defines that the Node should be running the |
| 95 | // Kubernetes control and data plane. |
| 96 | type NodeRoleKubernetesWorker struct { |
| 97 | } |
| 98 | |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 99 | // ID returns the name of this node. See NodeID for more information. |
| 100 | func (n *Node) ID() string { |
Serge Bazanski | 3379a5d | 2021-09-09 12:56:40 +0200 | [diff] [blame] | 101 | return identity.NodeID(n.pubkey) |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 102 | } |
| 103 | |
| 104 | func (n *Node) String() string { |
| 105 | return n.ID() |
| 106 | } |
| 107 | |
| 108 | // KubernetesWorker returns a copy of the NodeRoleKubernetesWorker struct if |
| 109 | // the Node is a kubernetes worker, otherwise nil. |
| 110 | func (n *Node) KubernetesWorker() *NodeRoleKubernetesWorker { |
| 111 | if n.kubernetesWorker == nil { |
| 112 | return nil |
| 113 | } |
| 114 | kw := *n.kubernetesWorker |
| 115 | return &kw |
| 116 | } |
| 117 | |
Serge Bazanski | 080f7ff | 2021-09-09 13:01:00 +0200 | [diff] [blame] | 118 | var ( |
| 119 | nodeEtcdPrefix = mustNewEtcdPrefix("/nodes/") |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 120 | ) |
| 121 | |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 122 | // etcdPath builds the etcd path in which this node's protobuf-serialized state |
| 123 | // is stored in etcd. |
Serge Bazanski | 080f7ff | 2021-09-09 13:01:00 +0200 | [diff] [blame] | 124 | func (n *Node) etcdPath() (string, error) { |
| 125 | return nodeEtcdPrefix.Key(n.ID()) |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 126 | } |
| 127 | |
| 128 | // proto serializes the Node object into protobuf, to be used for saving to |
| 129 | // etcd. |
| 130 | func (n *Node) proto() *ppb.Node { |
| 131 | msg := &ppb.Node{ |
| 132 | ClusterUnlockKey: n.clusterUnlockKey, |
| 133 | PublicKey: n.pubkey, |
| 134 | FsmState: n.state, |
| 135 | Roles: &cpb.NodeRoles{}, |
Serge Bazanski | 2893e98 | 2021-09-09 13:06:16 +0200 | [diff] [blame] | 136 | Status: n.status, |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 137 | } |
| 138 | if n.kubernetesWorker != nil { |
| 139 | msg.Roles.KubernetesWorker = &cpb.NodeRoles_KubernetesWorker{} |
| 140 | } |
| 141 | return msg |
| 142 | } |
| 143 | |
| 144 | func nodeUnmarshal(data []byte) (*Node, error) { |
| 145 | msg := ppb.Node{} |
| 146 | if err := proto.Unmarshal(data, &msg); err != nil { |
| 147 | return nil, fmt.Errorf("could not unmarshal proto: %w", err) |
| 148 | } |
| 149 | n := &Node{ |
| 150 | clusterUnlockKey: msg.ClusterUnlockKey, |
| 151 | pubkey: msg.PublicKey, |
| 152 | state: msg.FsmState, |
Serge Bazanski | 2893e98 | 2021-09-09 13:06:16 +0200 | [diff] [blame] | 153 | status: msg.Status, |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 154 | } |
| 155 | if msg.Roles.KubernetesWorker != nil { |
| 156 | n.kubernetesWorker = &NodeRoleKubernetesWorker{} |
| 157 | } |
| 158 | return n, nil |
| 159 | } |
| 160 | |
| 161 | // ConfigureLocalHostname uses the node's ID as a hostname, and sets the |
| 162 | // current hostname, and local files like hosts and machine-id accordingly. |
| 163 | // |
| 164 | // TODO(q3k): move this to roleserver? |
| 165 | func (n *Node) ConfigureLocalHostname(ctx context.Context, ephemeral *localstorage.EphemeralDirectory, address net.IP) error { |
| 166 | if err := unix.Sethostname([]byte(n.ID())); err != nil { |
| 167 | return fmt.Errorf("failed to set runtime hostname: %w", err) |
| 168 | } |
| 169 | hosts := []string{ |
| 170 | "127.0.0.1 localhost", |
| 171 | "::1 localhost", |
| 172 | fmt.Sprintf("%s %s", address.String(), n.ID()), |
| 173 | } |
| 174 | if err := ephemeral.Hosts.Write([]byte(strings.Join(hosts, "\n")), 0644); err != nil { |
| 175 | return fmt.Errorf("failed to write /ephemeral/hosts: %w", err) |
| 176 | } |
Serge Bazanski | 3379a5d | 2021-09-09 12:56:40 +0200 | [diff] [blame] | 177 | if err := ephemeral.MachineID.Write([]byte(identity.NodeIDBare(n.pubkey)), 0644); err != nil { |
Serge Bazanski | f0b4da5 | 2021-06-21 20:05:59 +0200 | [diff] [blame] | 178 | return fmt.Errorf("failed to write /ephemeral/machine-id: %w", err) |
| 179 | } |
| 180 | |
| 181 | // Check that we are self-resolvable. |
| 182 | ip, err := net.ResolveIPAddr("ip", n.ID()) |
| 183 | if err != nil { |
| 184 | return fmt.Errorf("failed to self-resolve: %w", err) |
| 185 | } |
| 186 | supervisor.Logger(ctx).Infof("This is node %s at %v", n.ID(), ip) |
| 187 | return nil |
| 188 | } |