blob: 9d6b0c4311c6a5c17111d2683d6572eaa0f17851 [file] [log] [blame]
Lorenz Brundd8c80e2019-10-07 16:19:49 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020017syntax = "proto3";
Serge Bazanski662b5b32020-12-21 13:49:00 +010018package metropolis.proto.common;
Serge Bazanski31370b02021-01-07 16:31:14 +010019option go_package = "source.monogon.dev/metropolis/proto/common";
Hendrik Hofstadt0d7c91e2019-10-23 21:44:47 +020020
Mateusz Zalega28800ad2022-07-08 14:56:02 +020021import "google/protobuf/timestamp.proto";
Serge Bazanski0ccc85b2023-11-20 12:59:20 +010022import "version/spec/spec.proto";
Mateusz Zalega28800ad2022-07-08 14:56:02 +020023
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020024import "osbase/logtree/proto/logtree.proto";
Tim Windelschmidt8814f522024-05-08 00:41:13 +020025
Serge Bazanski30653ee2021-06-17 15:44:29 +020026// NodeRoles are the possible roles that a Metropolis Node should run within the
27// cluster. These are configured by the cluster and can be retrieved through the
28// Curator.
Serge Bazanski5839e972021-11-16 15:46:19 +010029//
30// Fields contained within each individual are publicly available, so while they
31// can be used to carry required data to start up services for a given role,
32// this must not be confidential/private data.
Serge Bazanski30653ee2021-06-17 15:44:29 +020033message NodeRoles {
Serge Bazanski15f7f632023-03-14 17:17:20 +010034 message KubernetesController {
35 }
Serge Bazanski30653ee2021-06-17 15:44:29 +020036 message KubernetesWorker {
37 }
Serge Bazanski5839e972021-11-16 15:46:19 +010038 message ConsensusMember {
39 // ca_certificate is a DER-encoded x509 certificate of the etcd
40 // cluster's CA. The member must use this certificate to verify the
41 // identity of the cluster it's connecting to.
42 bytes ca_certificate = 1;
43 // pper_certificate is a DER-encoded x509 certificate of this node's
44 // etcd peer listener. The member must serve member traffic using this
45 // certificate. The private key corresponding to this certificate is
46 // the same as the node's primary private keypair.
47 bytes peer_certificate = 2;
48 // initial_crl is a certificate revocation list that the etcd member
49 // should be started with. After startup, the member will maintain its
50 // own CRL by updating it from its primary storage location, and etcd
51 // value.
52 //
53 // TODO(q3k): don't pass this here, instead pass this over an etcd
54 // watcher and curator.Watch.
55 bytes initial_crl = 3;
56 message Peer {
Tim Windelschmidt20c2ff02025-01-11 08:34:38 +010057 string name = 1;
58 string url = 2;
Serge Bazanski5839e972021-11-16 15:46:19 +010059 }
60 repeated Peer peers = 4;
61 }
Serge Bazanski30653ee2021-06-17 15:44:29 +020062 KubernetesWorker kubernetes_worker = 1;
Serge Bazanski5839e972021-11-16 15:46:19 +010063 ConsensusMember consensus_member = 2;
Serge Bazanski15f7f632023-03-14 17:17:20 +010064 KubernetesController kubernetes_controller = 3;
Serge Bazanski30653ee2021-06-17 15:44:29 +020065}
66
Serge Bazanski1f789542024-05-22 14:01:50 +020067// NodeLabels are labels assigned to a node.
68//
69// Labels are string key/value pairs modeled after the Kubernetes label concept.
70// They can be used to assign user-specific metadata to nodes like IDs from other
71// systems or geographical location. They are treated like opaque strings by
72// Metropolis itself.
73//
74// Every key and value must be a string between 1 and 63 characters long
75// (inclusive). Each character must be a valid ASCII character from the following
76// range: a-z, A-Z, 0-9 '-', '_' or '.'. The first character must be a-z, A-Z or
77// 0-9. This is close but not exact to DNS label requirements (for example, '.'
78// or '_' are generally not valid DNS label parts... but that's a discussion for
79// another day).
80//
81// Keys must not repeat across node labels - that is, NodeLabels must be
82// convertable to/from a string/string map in Go. Pair ordering is not preserved,
83// but pair order in labels received from Metropolis API calls is stable (however
84// it is arbitrary).
85//
86// A node cannot have more than 128 labels.
87message NodeLabels {
88 message Pair {
89 string key = 1;
90 string value = 2;
91 }
92 repeated Pair pairs = 1;
93}
94
Serge Bazanski30653ee2021-06-17 15:44:29 +020095// NodeState is the state of a Metropolis node from the point of view of the
96// cluster it is a part of (or intending to be a part of).
97enum NodeState {
98 NODE_STATE_INVALID = 0;
99
100 // NEW: the node has established a first contact with the cluster and
101 // intends to register into it. The node's identity has not been verified
102 // and no hardware attestation of the new node was performed.
Mateusz Zalegad57ef1c2022-07-01 12:22:33 +0200103 // The node has generated a CUK/NUK and set up storage encrypted with the
Serge Bazanski30653ee2021-06-17 15:44:29 +0200104 // combination of both keys.
105 // The node has generated a private/public keypair, and that keypair has
106 // been used to contact the already running Cluster.
107 NODE_STATE_NEW = 1;
108 // STANDBY: the node has successfully passed identity and hardware
109 // attestation checks as defined by the cluster policy. The node still isn't
110 // part of the cluster, as it itself might perform checks against the
111 // running Cluster.
112 NODE_STATE_STANDBY = 2;
113 // UP: the node has passed all preconditions for STANDBY and has also
114 // performed a commit into the cluster by exchanging its CUK for a
115 // certificate issued by the cluster.
116 // The node is now ready to serve, and its certificate can be used to
117 // authenticate its identity cryptographically.
118 NODE_STATE_UP = 3;
Serge Bazanski7acd92d2023-10-30 18:56:00 +0100119 // DECOMMISSIONED: The node has successfully been decommissioned and can be
120 // deleted.
121 //
122 // TODO(q3k): add missing -ING states.
123 NODE_STATE_DECOMMISSIONED = 4;
Serge Bazanski30653ee2021-06-17 15:44:29 +0200124};
125
126// ClusterState is the state of the cluster from the point of view of a node.
127// Different subsystems can watch this state and depend on it for behaviour
128// (eg. start serving when HOME, maybe self-fence on SPLIT, etc.).
129enum ClusterState {
130 CLUSTER_STATE_INVALID = 0;
131
132 // UNKNOWN: the node has not yet determined the existence of a cluster it
133 // should join or start. This is a transient, initial state that should only
134 // manifest during boot.
135 CLUSTER_STATE_UNKNOWN = 1;
136 // FOREIGN: the node is attempting to register into an already existing
137 // cluster with which it managed to make preliminary contact, but which the
138 // cluster has not yet fully productionized (eg. the node is still being
139 // hardware attested, or the operator needs to confirm the
140 // registration of this node).
141 CLUSTER_STATE_FOREIGN = 2;
142 // TRUSTED: the node is attempting to register into an already registered
143 // cluster, and has been trusted by it. The node is now attempting to fully
144 // commit to registering into the cluster.
145 CLUSTER_STATE_TRUSTED = 3;
146 // HOME: the node is part of this cluster. This is the bulk of time in which
147 // this node will spend its time.
148 CLUSTER_STATE_HOME = 4;
149 // DISOWNING: the node has been disowned (ie., removed) by the cluster, and
150 // that it will not be ever part of any cluster again, and that it will be
151 // decommissioned by the operator.
152 CLUSTER_STATE_DISOWNING = 5;
153 // SPLIT:the node would usually be Home in a cluster, but has been split
154 // from the consensus of the cluster. This can happen for nodes running
155 // consensus when consensus is lost (eg. when there is no quorum or this
156 // node has been netsplit), and for other nodes if they have lost network
157 // connectivity to the consensus nodes. Clients should make their own
158 // decision what action to perform in this state, depending on the level of
159 // consistency required and whether it makes sense for the node to fence its
160 // services off.
161 CLUSTER_STATE_SPLIT = 6;
162}
Serge Bazanski2893e982021-09-09 13:06:16 +0200163
164// NodeStatus contains all fields self-reported by nodes. This data is
165// inherently less trusted than other data available about a node, as it can be
166// updated to any value by each node individually, including compromised nodes.
167message NodeStatus {
168 // external_address is the IP address that the node expects management,
169 // cluster and user traffic to arrive at (ie. the address on which it is
170 // listening for gRPC, and role-specific services like etcd and
171 // Kubernetes).
172 string external_address = 1;
Serge Bazanski966d40c2022-06-23 13:27:16 +0200173 // running_curator contains information about the curator service running
174 // on this node, or is nil if the service is not running.
175 message RunningCurator {
176 // port is the TCP port on which the curator is listening.
177 int32 port = 1;
178 }
179 RunningCurator running_curator = 3;
Mateusz Zalega312a2272022-04-25 12:03:58 +0200180 // timestamp is an epoch number associated with the last status update.
181 // It's set with a nanosecond granularity.
Mateusz Zalega28800ad2022-07-08 14:56:02 +0200182 google.protobuf.Timestamp timestamp = 2;
Serge Bazanski0ccc85b2023-11-20 12:59:20 +0100183 // version is the Metropolis version that this node is running.
184 version.spec.Version version = 4;
Lorenz Brun1587a802024-09-30 21:18:03 +0200185 // boot_id is a random value chosen for each kernel start.
186 // If this value changes, a new kernel instance is running on the node.
187 bytes boot_id = 5;
Serge Bazanski2893e982021-09-09 13:06:16 +0200188}
189
190// The Cluster Directory is information about the network addressing of nodes
191// in a cluster. It is a serialized snapshot of some of the state within the
192// etcd cluster, and can be used by external processes (like a node Registering
193// into the cluster) to know how to reach this cluster over the network. It can
194// be thought of as a phonebook, or a static name/address configuration that
195// could live in /etc/hosts.
196//
197// The directory explicitly doesn't carry any information about the cluster's
198// identity or security - these should be configured and checked by higher
199// level configuration and processes. The directory can be stored and
200// transmitted in cleartext and without an integrity checks (like saved to the
201// EFI system partition across reboots) and any malicious change to it will
202// cause no more than a denial of service against the consumer of this
203// directory. This is because all nodes contacted must present a valid cluster
204// identity/certificate before they are trusted by the consumers of this
205// directory.
206message ClusterDirectory {
207 message Node {
Serge Bazanski538292d2024-04-17 14:50:02 +0200208 string id = 3;
209 reserved 1;
Serge Bazanski2893e982021-09-09 13:06:16 +0200210 message Address {
211 string host = 1;
212 };
Serge Bazanskibc671d02021-10-05 17:53:32 +0200213 repeated Address addresses = 2;
Serge Bazanski2893e982021-09-09 13:06:16 +0200214 };
215 repeated Node nodes = 1;
216}
Serge Bazanskie6bc2272023-03-28 16:28:13 +0200217
218
219// NodeClusterNetworking carries information about the cluster networking (ie.
220// WireGuard mesh) connectivity of a node.
221message NodeClusterNetworking {
222 message Prefix {
223 string cidr = 1;
224 }
225 // wireguard_pubkey is the base64-encoded public key used by the node.
226 string wireguard_pubkey = 1;
227 // prefixes are networking routes exported by the node to the cluster networking
228 // mesh, and are programmed by other nodes into their wireguard peer config.
229 repeated Prefix prefixes = 2;
230}
Serge Bazanskida114862023-03-29 17:46:42 +0200231
Serge Bazanskida114862023-03-29 17:46:42 +0200232// Filter set when requesting logs for a given DN. This message is equivalent to
233// the following GADT enum:
234// data LogFilter = WithChildren
235// | OnlyRaw
236// | OnlyLeveled
237// | LeveledWithMinimumSeverity(Severity)
238//
239// Multiple LogFilters can be chained/combined when requesting logs, as long as
240// they do not conflict.
241message LogFilter {
242 // Entries will be returned not only for the given DN, but all child DNs as
243 // well. For instance, if the requested DN is foo, entries logged to foo,
244 // foo.bar and foo.bar.baz will all be returned.
245 message WithChildren {
246 }
247 // Only raw logging entries will be returned. Conflicts with OnlyLeveled
248 // filters.
249 message OnlyRaw {
250 }
251 // Only leveled logging entries will be returned. Conflicts with OnlyRaw
252 // filters.
253 message OnlyLeveled {
254 }
255 // If leveled logs are returned, all entries at severity lower than `minimum`
256 // will be discarded.
257 message LeveledWithMinimumSeverity {
Tim Windelschmidt2f9f6242025-01-11 08:25:54 +0100258 osbase.logtree.proto.LeveledLogSeverity minimum = 1;
Serge Bazanskida114862023-03-29 17:46:42 +0200259 }
260 oneof filter {
261 WithChildren with_children = 1;
262 OnlyRaw only_raw = 3;
263 OnlyLeveled only_leveled = 4;
264 LeveledWithMinimumSeverity leveled_with_minimum_severity = 5;
265 }
266}
267
Serge Bazanski5df62ba2023-03-22 17:56:46 +0100268// ClusterConfiguration contains the entirety of the user-configurable behaviour
269// of the cluster that is scoped to the entirety of the cluster (vs. per-node
270// configuration, which is kept alongside Node).
271//
272// It can be set initially when a cluster is being bootstrapped (in
273// NodeParamaters.ClusterBootstrap), and then can be partially managed by
274// management calls to the curator.
275message ClusterConfiguration {
Jan Schär39f4f5c2024-10-29 09:41:50 +0100276 // cluster_domain is the domain name which identifies the cluster.
277 // It should be unique, and ideally a public DNS name, but one under
278 // .internal works too. The cluster domain is used for different purposes:
279 //
280 // - To identify the cluster in clients like metroctl.
281 // - To resolve control plane endpoints with DNS in clients.
282 // - As the SPIFFE trust domain name of the cluster. Every identity
283 // issued by the cluster is rooted under `spiffe://cluster_domain/`.
284 // - As the issuer of OpenID Connect identity tokens. The discovery
285 // document is thus hosted at https://cluster_domain/.well-known/openid-configuration
286 string cluster_domain = 4;
287
Serge Bazanski5df62ba2023-03-22 17:56:46 +0100288 // tpm_mode defines the TPM usage policy for cluster nodes. When nodes
289 // register into the cluster (and then join into it) they will report their
290 // TPM availability, and in return the cluster will respond whether they
291 // should use that TPM or not.
292 //
293 // If a node is instructed to use its TPM, it will use it to encrypt its part
294 // of the disk encryption key when saving it to the EFI system partition.
295 // That means that the node will only be able to re-join the cluster if its
296 // secure boot configuration doesn't change.
297 //
298 // If a node is instructed to not use its TPM, it will save its part of the
299 // disk encryption key straight onto the EFI system partition without any
300 // further encryption. It still needs to connect to a working cluster to
301 // retrieve the other part of the key. This means that the configuration is
302 // secure vs. offline disk decryption attempts, but not secure if an
303 // attacker can connect to a cluster and impersonate the node in order to
304 // retrieve the other part of its key.
305 enum TPMMode {
306 TPM_MODE_INVALID = 0;
307 // Nodes need to join with a TPM2.0 device and will be instructed to
308 // use it.
309 TPM_MODE_REQUIRED = 1;
310 // Nodes will be allowed to join regardless of TPM2.0 presence, and will
311 // be instructed to use it if they have one.
312 TPM_MODE_BEST_EFFORT = 2;
313 // Regardless of the node's local TPM presence it will be instructed to
314 // not use it.
315 TPM_MODE_DISABLED = 3;
316 }
317 TPMMode tpm_mode = 1;
Serge Bazanskifd6d4eb2023-05-25 14:45:48 +0200318
319 // storage_security_policy defines which node storage security settings are
320 // accepted by the cluster. Nodes are informed of the cluster policy when
321 // registering into the cluster, alongside a cluster-recommended storage
322 // security setting. The node then reports its selected node storage setting
323 // during its Commit call which the cluster verifies against its policy.
324 enum StorageSecurityPolicy {
325 STORAGE_SECURITY_POLICY_INVALID = 0;
326 // The cluster accepts any storage security.
327 STORAGE_SECURITY_POLICY_PERMISSIVE = 1;
328 // The cluster accepts any storage security that offers encryption.
329 STORAGE_SECURITY_POLICY_NEEDS_ENCRYPTION = 2;
330 // The cluster accepts any storage security that offers encryption and
331 // authentication.
332 STORAGE_SECURITY_POLICY_NEEDS_ENCRYPTION_AND_AUTHENTICATION = 3;
333 // The cluster only accepts unencrypted and unauthenticated node storage.
334 STORAGE_SECURITY_POLICY_NEEDS_INSECURE = 4;
335 }
336 StorageSecurityPolicy storage_security_policy = 2;
Serge Bazanski9579be52024-09-30 17:01:04 +0000337
Serge Bazanski78567602024-10-31 13:42:04 +0000338 message Kubernetes {
Serge Bazanski9579be52024-09-30 17:01:04 +0000339 message NodeLabelsToSynchronize {
340 // Node labels matching this regexp will be synchronized.
341 //
342 // For example, the following regex: `^[^/]*foo$` would match:
343 // - foo: bar
344 // - bar-foo: baz
345 // But wouldn't match:
346 // - example.com/foo: bar
347 //
348 // Regexes are compiled using Go's regexp library, and must be anchored (with ^
349 // and $) by the user. An invalid regexp matches no label.
350 string regexp = 1;
351 }
352
353 // Rules to match Node labels that should be synchronized into Kubernetes
354 // node labels. A label matching any rule will be synchronized and managed by
355 // Metropolis. If a label stops matching a rule (ie., the rules gets modified
356 // so it doesn't match some label, or the label gets removed from the Node
357 // in Metropolis), the label will also be removed from the Kubernetes node.
358 //
359 // Users should be careful about not synchronizing labels that will collide
360 // with other Kubernetes node labels, as then that node's labels will not be
361 // synchronized at all as a safety precaution.
362 //
363 // Note: there are certain labels that Metropolis will always add to
364 // Kubernetes nodes, such as node-role.kubernetes.io/... . These are not
365 // influenced by these rules.
366 repeated NodeLabelsToSynchronize node_labels_to_synchronize = 3;
367 }
Serge Bazanski78567602024-10-31 13:42:04 +0000368 Kubernetes kubernetes = 3;
Serge Bazanski5df62ba2023-03-22 17:56:46 +0100369}
Serge Bazanskie4a4ce12023-03-22 18:29:54 +0100370
371// NodeTPMUsage describes whether a node has a TPM2.0 and if it is/should be
372// actively used to seal secrets before saving them to its EFI system partition.
373enum NodeTPMUsage {
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100374 NODE_TPM_USAGE_INVALID = 0;
Serge Bazanskie4a4ce12023-03-22 18:29:54 +0100375 // This node has no TPM 2.0.
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100376 NODE_TPM_USAGE_NOT_PRESENT = 1;
Serge Bazanskie4a4ce12023-03-22 18:29:54 +0100377 // This node has a TPM 2.0 but the cluster configuration mandates not using
378 // it.
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100379 NODE_TPM_USAGE_PRESENT_BUT_UNUSED = 2;
Serge Bazanskie4a4ce12023-03-22 18:29:54 +0100380 // This node has a TPM 2.0 and it is being actively used.
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100381 NODE_TPM_USAGE_PRESENT_AND_USED = 3;
Serge Bazanskifd6d4eb2023-05-25 14:45:48 +0200382}
383
384// NodeStorageSecurity describes how a node encrypts and/or authenticates its
385// local storage. In other words, it's a configuration setting for disk
386// encryption (ie. via dm-crypt) and disk integrity (ie. via dm-integrity) of
387// the Metropolis data partition.
388enum NodeStorageSecurity {
389 NODE_STORAGE_SECURITY_INVALID = 0;
390 // The node has unencrypted and unauthenticated disk storage. Its data
391 // partition is a plain XFS partition, and the node's credentials are stored
392 // on it directly.
393 NODE_STORAGE_SECURITY_INSECURE = 1;
394 // The node has encrypted but unauthenticated disk storage. Its data
395 // partition is an XFS partition mounted through dm-crypt.
396 NODE_STORAGE_SECURITY_ENCRYPTED = 2;
397 // The node has encrypted and authenticated storage. Its data
398 // partition is an XFS partition mounted through dm-integrity and dm-crypt.
399 NODE_STORAGE_SECURITY_AUTHENTICATED_ENCRYPTED = 3;
Jan Schär39f4f5c2024-10-29 09:41:50 +0100400}