Lorenz Brun | dd8c80e | 2019-10-07 16:19:49 +0200 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 17 | syntax = "proto3"; |
Serge Bazanski | 662b5b3 | 2020-12-21 13:49:00 +0100 | [diff] [blame] | 18 | package metropolis.proto.common; |
Serge Bazanski | 31370b0 | 2021-01-07 16:31:14 +0100 | [diff] [blame] | 19 | option go_package = "source.monogon.dev/metropolis/proto/common"; |
Hendrik Hofstadt | 0d7c91e | 2019-10-23 21:44:47 +0200 | [diff] [blame] | 20 | |
Mateusz Zalega | 28800ad | 2022-07-08 14:56:02 +0200 | [diff] [blame] | 21 | import "google/protobuf/timestamp.proto"; |
| 22 | |
Serge Bazanski | 30653ee | 2021-06-17 15:44:29 +0200 | [diff] [blame] | 23 | // NodeRoles are the possible roles that a Metropolis Node should run within the |
| 24 | // cluster. These are configured by the cluster and can be retrieved through the |
| 25 | // Curator. |
Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 26 | // |
| 27 | // Fields contained within each individual are publicly available, so while they |
| 28 | // can be used to carry required data to start up services for a given role, |
| 29 | // this must not be confidential/private data. |
Serge Bazanski | 30653ee | 2021-06-17 15:44:29 +0200 | [diff] [blame] | 30 | message NodeRoles { |
Serge Bazanski | 15f7f63 | 2023-03-14 17:17:20 +0100 | [diff] [blame] | 31 | message KubernetesController { |
| 32 | } |
Serge Bazanski | 30653ee | 2021-06-17 15:44:29 +0200 | [diff] [blame] | 33 | message KubernetesWorker { |
| 34 | } |
Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 35 | message ConsensusMember { |
| 36 | // ca_certificate is a DER-encoded x509 certificate of the etcd |
| 37 | // cluster's CA. The member must use this certificate to verify the |
| 38 | // identity of the cluster it's connecting to. |
| 39 | bytes ca_certificate = 1; |
| 40 | // pper_certificate is a DER-encoded x509 certificate of this node's |
| 41 | // etcd peer listener. The member must serve member traffic using this |
| 42 | // certificate. The private key corresponding to this certificate is |
| 43 | // the same as the node's primary private keypair. |
| 44 | bytes peer_certificate = 2; |
| 45 | // initial_crl is a certificate revocation list that the etcd member |
| 46 | // should be started with. After startup, the member will maintain its |
| 47 | // own CRL by updating it from its primary storage location, and etcd |
| 48 | // value. |
| 49 | // |
| 50 | // TODO(q3k): don't pass this here, instead pass this over an etcd |
| 51 | // watcher and curator.Watch. |
| 52 | bytes initial_crl = 3; |
| 53 | message Peer { |
| 54 | string Name = 1; |
| 55 | string URL = 2; |
| 56 | } |
| 57 | repeated Peer peers = 4; |
| 58 | } |
Serge Bazanski | 30653ee | 2021-06-17 15:44:29 +0200 | [diff] [blame] | 59 | KubernetesWorker kubernetes_worker = 1; |
Serge Bazanski | 5839e97 | 2021-11-16 15:46:19 +0100 | [diff] [blame] | 60 | ConsensusMember consensus_member = 2; |
Serge Bazanski | 15f7f63 | 2023-03-14 17:17:20 +0100 | [diff] [blame] | 61 | KubernetesController kubernetes_controller = 3; |
Serge Bazanski | 30653ee | 2021-06-17 15:44:29 +0200 | [diff] [blame] | 62 | } |
| 63 | |
| 64 | // NodeState is the state of a Metropolis node from the point of view of the |
| 65 | // cluster it is a part of (or intending to be a part of). |
| 66 | enum NodeState { |
| 67 | NODE_STATE_INVALID = 0; |
| 68 | |
| 69 | // NEW: the node has established a first contact with the cluster and |
| 70 | // intends to register into it. The node's identity has not been verified |
| 71 | // and no hardware attestation of the new node was performed. |
Mateusz Zalega | d57ef1c | 2022-07-01 12:22:33 +0200 | [diff] [blame] | 72 | // The node has generated a CUK/NUK and set up storage encrypted with the |
Serge Bazanski | 30653ee | 2021-06-17 15:44:29 +0200 | [diff] [blame] | 73 | // combination of both keys. |
| 74 | // The node has generated a private/public keypair, and that keypair has |
| 75 | // been used to contact the already running Cluster. |
| 76 | NODE_STATE_NEW = 1; |
| 77 | // STANDBY: the node has successfully passed identity and hardware |
| 78 | // attestation checks as defined by the cluster policy. The node still isn't |
| 79 | // part of the cluster, as it itself might perform checks against the |
| 80 | // running Cluster. |
| 81 | NODE_STATE_STANDBY = 2; |
| 82 | // UP: the node has passed all preconditions for STANDBY and has also |
| 83 | // performed a commit into the cluster by exchanging its CUK for a |
| 84 | // certificate issued by the cluster. |
| 85 | // The node is now ready to serve, and its certificate can be used to |
| 86 | // authenticate its identity cryptographically. |
| 87 | NODE_STATE_UP = 3; |
Serge Bazanski | 7acd92d | 2023-10-30 18:56:00 +0100 | [diff] [blame] | 88 | // DECOMMISSIONED: The node has successfully been decommissioned and can be |
| 89 | // deleted. |
| 90 | // |
| 91 | // TODO(q3k): add missing -ING states. |
| 92 | NODE_STATE_DECOMMISSIONED = 4; |
Serge Bazanski | 30653ee | 2021-06-17 15:44:29 +0200 | [diff] [blame] | 93 | }; |
| 94 | |
| 95 | // ClusterState is the state of the cluster from the point of view of a node. |
| 96 | // Different subsystems can watch this state and depend on it for behaviour |
| 97 | // (eg. start serving when HOME, maybe self-fence on SPLIT, etc.). |
| 98 | enum ClusterState { |
| 99 | CLUSTER_STATE_INVALID = 0; |
| 100 | |
| 101 | // UNKNOWN: the node has not yet determined the existence of a cluster it |
| 102 | // should join or start. This is a transient, initial state that should only |
| 103 | // manifest during boot. |
| 104 | CLUSTER_STATE_UNKNOWN = 1; |
| 105 | // FOREIGN: the node is attempting to register into an already existing |
| 106 | // cluster with which it managed to make preliminary contact, but which the |
| 107 | // cluster has not yet fully productionized (eg. the node is still being |
| 108 | // hardware attested, or the operator needs to confirm the |
| 109 | // registration of this node). |
| 110 | CLUSTER_STATE_FOREIGN = 2; |
| 111 | // TRUSTED: the node is attempting to register into an already registered |
| 112 | // cluster, and has been trusted by it. The node is now attempting to fully |
| 113 | // commit to registering into the cluster. |
| 114 | CLUSTER_STATE_TRUSTED = 3; |
| 115 | // HOME: the node is part of this cluster. This is the bulk of time in which |
| 116 | // this node will spend its time. |
| 117 | CLUSTER_STATE_HOME = 4; |
| 118 | // DISOWNING: the node has been disowned (ie., removed) by the cluster, and |
| 119 | // that it will not be ever part of any cluster again, and that it will be |
| 120 | // decommissioned by the operator. |
| 121 | CLUSTER_STATE_DISOWNING = 5; |
| 122 | // SPLIT:the node would usually be Home in a cluster, but has been split |
| 123 | // from the consensus of the cluster. This can happen for nodes running |
| 124 | // consensus when consensus is lost (eg. when there is no quorum or this |
| 125 | // node has been netsplit), and for other nodes if they have lost network |
| 126 | // connectivity to the consensus nodes. Clients should make their own |
| 127 | // decision what action to perform in this state, depending on the level of |
| 128 | // consistency required and whether it makes sense for the node to fence its |
| 129 | // services off. |
| 130 | CLUSTER_STATE_SPLIT = 6; |
| 131 | } |
Serge Bazanski | 2893e98 | 2021-09-09 13:06:16 +0200 | [diff] [blame] | 132 | |
| 133 | // NodeStatus contains all fields self-reported by nodes. This data is |
| 134 | // inherently less trusted than other data available about a node, as it can be |
| 135 | // updated to any value by each node individually, including compromised nodes. |
| 136 | message NodeStatus { |
| 137 | // external_address is the IP address that the node expects management, |
| 138 | // cluster and user traffic to arrive at (ie. the address on which it is |
| 139 | // listening for gRPC, and role-specific services like etcd and |
| 140 | // Kubernetes). |
| 141 | string external_address = 1; |
Serge Bazanski | 966d40c | 2022-06-23 13:27:16 +0200 | [diff] [blame] | 142 | // running_curator contains information about the curator service running |
| 143 | // on this node, or is nil if the service is not running. |
| 144 | message RunningCurator { |
| 145 | // port is the TCP port on which the curator is listening. |
| 146 | int32 port = 1; |
| 147 | } |
| 148 | RunningCurator running_curator = 3; |
Mateusz Zalega | 312a227 | 2022-04-25 12:03:58 +0200 | [diff] [blame] | 149 | // timestamp is an epoch number associated with the last status update. |
| 150 | // It's set with a nanosecond granularity. |
Mateusz Zalega | 28800ad | 2022-07-08 14:56:02 +0200 | [diff] [blame] | 151 | google.protobuf.Timestamp timestamp = 2; |
Serge Bazanski | 2893e98 | 2021-09-09 13:06:16 +0200 | [diff] [blame] | 152 | } |
| 153 | |
| 154 | // The Cluster Directory is information about the network addressing of nodes |
| 155 | // in a cluster. It is a serialized snapshot of some of the state within the |
| 156 | // etcd cluster, and can be used by external processes (like a node Registering |
| 157 | // into the cluster) to know how to reach this cluster over the network. It can |
| 158 | // be thought of as a phonebook, or a static name/address configuration that |
| 159 | // could live in /etc/hosts. |
| 160 | // |
| 161 | // The directory explicitly doesn't carry any information about the cluster's |
| 162 | // identity or security - these should be configured and checked by higher |
| 163 | // level configuration and processes. The directory can be stored and |
| 164 | // transmitted in cleartext and without an integrity checks (like saved to the |
| 165 | // EFI system partition across reboots) and any malicious change to it will |
| 166 | // cause no more than a denial of service against the consumer of this |
| 167 | // directory. This is because all nodes contacted must present a valid cluster |
| 168 | // identity/certificate before they are trusted by the consumers of this |
| 169 | // directory. |
| 170 | message ClusterDirectory { |
| 171 | message Node { |
| 172 | bytes public_key = 1; |
| 173 | message Address { |
| 174 | string host = 1; |
| 175 | }; |
Serge Bazanski | bc671d0 | 2021-10-05 17:53:32 +0200 | [diff] [blame] | 176 | repeated Address addresses = 2; |
Serge Bazanski | 2893e98 | 2021-09-09 13:06:16 +0200 | [diff] [blame] | 177 | }; |
| 178 | repeated Node nodes = 1; |
| 179 | } |
Serge Bazanski | e6bc227 | 2023-03-28 16:28:13 +0200 | [diff] [blame] | 180 | |
| 181 | |
| 182 | // NodeClusterNetworking carries information about the cluster networking (ie. |
| 183 | // WireGuard mesh) connectivity of a node. |
| 184 | message NodeClusterNetworking { |
| 185 | message Prefix { |
| 186 | string cidr = 1; |
| 187 | } |
| 188 | // wireguard_pubkey is the base64-encoded public key used by the node. |
| 189 | string wireguard_pubkey = 1; |
| 190 | // prefixes are networking routes exported by the node to the cluster networking |
| 191 | // mesh, and are programmed by other nodes into their wireguard peer config. |
| 192 | repeated Prefix prefixes = 2; |
| 193 | } |
Serge Bazanski | da11486 | 2023-03-29 17:46:42 +0200 | [diff] [blame] | 194 | |
| 195 | // Severity level corresponding to //metropolis/pkg/logtree.Severity. |
| 196 | enum LeveledLogSeverity { |
| 197 | INVALID = 0; |
| 198 | INFO = 1; |
| 199 | WARNING = 2; |
| 200 | ERROR = 3; |
| 201 | FATAL = 4; |
| 202 | } |
| 203 | |
| 204 | // Filter set when requesting logs for a given DN. This message is equivalent to |
| 205 | // the following GADT enum: |
| 206 | // data LogFilter = WithChildren |
| 207 | // | OnlyRaw |
| 208 | // | OnlyLeveled |
| 209 | // | LeveledWithMinimumSeverity(Severity) |
| 210 | // |
| 211 | // Multiple LogFilters can be chained/combined when requesting logs, as long as |
| 212 | // they do not conflict. |
| 213 | message LogFilter { |
| 214 | // Entries will be returned not only for the given DN, but all child DNs as |
| 215 | // well. For instance, if the requested DN is foo, entries logged to foo, |
| 216 | // foo.bar and foo.bar.baz will all be returned. |
| 217 | message WithChildren { |
| 218 | } |
| 219 | // Only raw logging entries will be returned. Conflicts with OnlyLeveled |
| 220 | // filters. |
| 221 | message OnlyRaw { |
| 222 | } |
| 223 | // Only leveled logging entries will be returned. Conflicts with OnlyRaw |
| 224 | // filters. |
| 225 | message OnlyLeveled { |
| 226 | } |
| 227 | // If leveled logs are returned, all entries at severity lower than `minimum` |
| 228 | // will be discarded. |
| 229 | message LeveledWithMinimumSeverity { |
| 230 | LeveledLogSeverity minimum = 1; |
| 231 | } |
| 232 | oneof filter { |
| 233 | WithChildren with_children = 1; |
| 234 | OnlyRaw only_raw = 3; |
| 235 | OnlyLeveled only_leveled = 4; |
| 236 | LeveledWithMinimumSeverity leveled_with_minimum_severity = 5; |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | // LogEntry corresponding to logtree.LogEntry in //metropolis/pkg/logtree. |
| 241 | message LogEntry { |
| 242 | // A leveled log entry emitted from a compatible system, eg. Metorpolis code |
| 243 | // or a klog-parsed line. |
| 244 | message Leveled { |
| 245 | repeated string lines = 1; |
| 246 | google.protobuf.Timestamp timestamp = 2; |
| 247 | LeveledLogSeverity severity = 3; |
| 248 | // Source of the error, expressed as file:line. |
| 249 | string location = 4; |
| 250 | } |
| 251 | // Raw log entry, captured from an external system without parting. Might |
| 252 | // contain some timestamp/level/origin information embedded in data. Data |
| 253 | // contained within should be treated as unsanitized external data. |
| 254 | message Raw { |
| 255 | string data = 1; |
| 256 | // Original length of line, set if data was truncated. |
| 257 | int64 original_length = 2; |
| 258 | } |
| 259 | |
| 260 | // Origin DN. |
| 261 | string dn = 1; |
| 262 | oneof kind { |
| 263 | Leveled leveled = 2; |
| 264 | Raw raw = 3; |
| 265 | } |
| 266 | } |
| 267 | |
Serge Bazanski | 5df62ba | 2023-03-22 17:56:46 +0100 | [diff] [blame] | 268 | // ClusterConfiguration contains the entirety of the user-configurable behaviour |
| 269 | // of the cluster that is scoped to the entirety of the cluster (vs. per-node |
| 270 | // configuration, which is kept alongside Node). |
| 271 | // |
| 272 | // It can be set initially when a cluster is being bootstrapped (in |
| 273 | // NodeParamaters.ClusterBootstrap), and then can be partially managed by |
| 274 | // management calls to the curator. |
| 275 | message ClusterConfiguration { |
| 276 | // tpm_mode defines the TPM usage policy for cluster nodes. When nodes |
| 277 | // register into the cluster (and then join into it) they will report their |
| 278 | // TPM availability, and in return the cluster will respond whether they |
| 279 | // should use that TPM or not. |
| 280 | // |
| 281 | // If a node is instructed to use its TPM, it will use it to encrypt its part |
| 282 | // of the disk encryption key when saving it to the EFI system partition. |
| 283 | // That means that the node will only be able to re-join the cluster if its |
| 284 | // secure boot configuration doesn't change. |
| 285 | // |
| 286 | // If a node is instructed to not use its TPM, it will save its part of the |
| 287 | // disk encryption key straight onto the EFI system partition without any |
| 288 | // further encryption. It still needs to connect to a working cluster to |
| 289 | // retrieve the other part of the key. This means that the configuration is |
| 290 | // secure vs. offline disk decryption attempts, but not secure if an |
| 291 | // attacker can connect to a cluster and impersonate the node in order to |
| 292 | // retrieve the other part of its key. |
| 293 | enum TPMMode { |
| 294 | TPM_MODE_INVALID = 0; |
| 295 | // Nodes need to join with a TPM2.0 device and will be instructed to |
| 296 | // use it. |
| 297 | TPM_MODE_REQUIRED = 1; |
| 298 | // Nodes will be allowed to join regardless of TPM2.0 presence, and will |
| 299 | // be instructed to use it if they have one. |
| 300 | TPM_MODE_BEST_EFFORT = 2; |
| 301 | // Regardless of the node's local TPM presence it will be instructed to |
| 302 | // not use it. |
| 303 | TPM_MODE_DISABLED = 3; |
| 304 | } |
| 305 | TPMMode tpm_mode = 1; |
Serge Bazanski | fd6d4eb | 2023-05-25 14:45:48 +0200 | [diff] [blame] | 306 | |
| 307 | // storage_security_policy defines which node storage security settings are |
| 308 | // accepted by the cluster. Nodes are informed of the cluster policy when |
| 309 | // registering into the cluster, alongside a cluster-recommended storage |
| 310 | // security setting. The node then reports its selected node storage setting |
| 311 | // during its Commit call which the cluster verifies against its policy. |
| 312 | enum StorageSecurityPolicy { |
| 313 | STORAGE_SECURITY_POLICY_INVALID = 0; |
| 314 | // The cluster accepts any storage security. |
| 315 | STORAGE_SECURITY_POLICY_PERMISSIVE = 1; |
| 316 | // The cluster accepts any storage security that offers encryption. |
| 317 | STORAGE_SECURITY_POLICY_NEEDS_ENCRYPTION = 2; |
| 318 | // The cluster accepts any storage security that offers encryption and |
| 319 | // authentication. |
| 320 | STORAGE_SECURITY_POLICY_NEEDS_ENCRYPTION_AND_AUTHENTICATION = 3; |
| 321 | // The cluster only accepts unencrypted and unauthenticated node storage. |
| 322 | STORAGE_SECURITY_POLICY_NEEDS_INSECURE = 4; |
| 323 | } |
| 324 | StorageSecurityPolicy storage_security_policy = 2; |
Serge Bazanski | 5df62ba | 2023-03-22 17:56:46 +0100 | [diff] [blame] | 325 | } |
Serge Bazanski | e4a4ce1 | 2023-03-22 18:29:54 +0100 | [diff] [blame] | 326 | |
| 327 | // NodeTPMUsage describes whether a node has a TPM2.0 and if it is/should be |
| 328 | // actively used to seal secrets before saving them to its EFI system partition. |
| 329 | enum NodeTPMUsage { |
| 330 | NODE_TPM_INVALID = 0; |
| 331 | // This node has no TPM 2.0. |
| 332 | NODE_TPM_NOT_PRESENT = 1; |
| 333 | // This node has a TPM 2.0 but the cluster configuration mandates not using |
| 334 | // it. |
| 335 | NODE_TPM_PRESENT_BUT_UNUSED = 2; |
| 336 | // This node has a TPM 2.0 and it is being actively used. |
| 337 | NODE_TPM_PRESENT_AND_USED = 3; |
Serge Bazanski | fd6d4eb | 2023-05-25 14:45:48 +0200 | [diff] [blame] | 338 | } |
| 339 | |
| 340 | // NodeStorageSecurity describes how a node encrypts and/or authenticates its |
| 341 | // local storage. In other words, it's a configuration setting for disk |
| 342 | // encryption (ie. via dm-crypt) and disk integrity (ie. via dm-integrity) of |
| 343 | // the Metropolis data partition. |
| 344 | enum NodeStorageSecurity { |
| 345 | NODE_STORAGE_SECURITY_INVALID = 0; |
| 346 | // The node has unencrypted and unauthenticated disk storage. Its data |
| 347 | // partition is a plain XFS partition, and the node's credentials are stored |
| 348 | // on it directly. |
| 349 | NODE_STORAGE_SECURITY_INSECURE = 1; |
| 350 | // The node has encrypted but unauthenticated disk storage. Its data |
| 351 | // partition is an XFS partition mounted through dm-crypt. |
| 352 | NODE_STORAGE_SECURITY_ENCRYPTED = 2; |
| 353 | // The node has encrypted and authenticated storage. Its data |
| 354 | // partition is an XFS partition mounted through dm-integrity and dm-crypt. |
| 355 | NODE_STORAGE_SECURITY_AUTHENTICATED_ENCRYPTED = 3; |
Serge Bazanski | e4a4ce1 | 2023-03-22 18:29:54 +0100 | [diff] [blame] | 356 | } |