| // Copyright 2020 The Monogon Project Authors. |
| // |
| // SPDX-License-Identifier: Apache-2.0 |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| syntax = "proto3"; |
| package metropolis.proto.common; |
| option go_package = "source.monogon.dev/metropolis/proto/common"; |
| |
| import "google/protobuf/timestamp.proto"; |
| |
| // NodeRoles are the possible roles that a Metropolis Node should run within the |
| // cluster. These are configured by the cluster and can be retrieved through the |
| // Curator. |
| // |
| // Fields contained within each individual are publicly available, so while they |
| // can be used to carry required data to start up services for a given role, |
| // this must not be confidential/private data. |
| message NodeRoles { |
| message KubernetesController { |
| } |
| message KubernetesWorker { |
| } |
| message ConsensusMember { |
| // ca_certificate is a DER-encoded x509 certificate of the etcd |
| // cluster's CA. The member must use this certificate to verify the |
| // identity of the cluster it's connecting to. |
| bytes ca_certificate = 1; |
| // pper_certificate is a DER-encoded x509 certificate of this node's |
| // etcd peer listener. The member must serve member traffic using this |
| // certificate. The private key corresponding to this certificate is |
| // the same as the node's primary private keypair. |
| bytes peer_certificate = 2; |
| // initial_crl is a certificate revocation list that the etcd member |
| // should be started with. After startup, the member will maintain its |
| // own CRL by updating it from its primary storage location, and etcd |
| // value. |
| // |
| // TODO(q3k): don't pass this here, instead pass this over an etcd |
| // watcher and curator.Watch. |
| bytes initial_crl = 3; |
| message Peer { |
| string Name = 1; |
| string URL = 2; |
| } |
| repeated Peer peers = 4; |
| } |
| KubernetesWorker kubernetes_worker = 1; |
| ConsensusMember consensus_member = 2; |
| KubernetesController kubernetes_controller = 3; |
| } |
| |
| // NodeState is the state of a Metropolis node from the point of view of the |
| // cluster it is a part of (or intending to be a part of). |
| enum NodeState { |
| NODE_STATE_INVALID = 0; |
| |
| // NEW: the node has established a first contact with the cluster and |
| // intends to register into it. The node's identity has not been verified |
| // and no hardware attestation of the new node was performed. |
| // The node has generated a CUK/NUK and set up storage encrypted with the |
| // combination of both keys. |
| // The node has generated a private/public keypair, and that keypair has |
| // been used to contact the already running Cluster. |
| NODE_STATE_NEW = 1; |
| // STANDBY: the node has successfully passed identity and hardware |
| // attestation checks as defined by the cluster policy. The node still isn't |
| // part of the cluster, as it itself might perform checks against the |
| // running Cluster. |
| NODE_STATE_STANDBY = 2; |
| // UP: the node has passed all preconditions for STANDBY and has also |
| // performed a commit into the cluster by exchanging its CUK for a |
| // certificate issued by the cluster. |
| // The node is now ready to serve, and its certificate can be used to |
| // authenticate its identity cryptographically. |
| NODE_STATE_UP = 3; |
| // DISOWNED: the node has been rejected or decommissioned by the cluster. |
| // Any further contact from the node to the cluster will be rejected. |
| NODE_STATE_DISOWNED = 4; |
| }; |
| |
| // ClusterState is the state of the cluster from the point of view of a node. |
| // Different subsystems can watch this state and depend on it for behaviour |
| // (eg. start serving when HOME, maybe self-fence on SPLIT, etc.). |
| enum ClusterState { |
| CLUSTER_STATE_INVALID = 0; |
| |
| // UNKNOWN: the node has not yet determined the existence of a cluster it |
| // should join or start. This is a transient, initial state that should only |
| // manifest during boot. |
| CLUSTER_STATE_UNKNOWN = 1; |
| // FOREIGN: the node is attempting to register into an already existing |
| // cluster with which it managed to make preliminary contact, but which the |
| // cluster has not yet fully productionized (eg. the node is still being |
| // hardware attested, or the operator needs to confirm the |
| // registration of this node). |
| CLUSTER_STATE_FOREIGN = 2; |
| // TRUSTED: the node is attempting to register into an already registered |
| // cluster, and has been trusted by it. The node is now attempting to fully |
| // commit to registering into the cluster. |
| CLUSTER_STATE_TRUSTED = 3; |
| // HOME: the node is part of this cluster. This is the bulk of time in which |
| // this node will spend its time. |
| CLUSTER_STATE_HOME = 4; |
| // DISOWNING: the node has been disowned (ie., removed) by the cluster, and |
| // that it will not be ever part of any cluster again, and that it will be |
| // decommissioned by the operator. |
| CLUSTER_STATE_DISOWNING = 5; |
| // SPLIT:the node would usually be Home in a cluster, but has been split |
| // from the consensus of the cluster. This can happen for nodes running |
| // consensus when consensus is lost (eg. when there is no quorum or this |
| // node has been netsplit), and for other nodes if they have lost network |
| // connectivity to the consensus nodes. Clients should make their own |
| // decision what action to perform in this state, depending on the level of |
| // consistency required and whether it makes sense for the node to fence its |
| // services off. |
| CLUSTER_STATE_SPLIT = 6; |
| } |
| |
| // NodeStatus contains all fields self-reported by nodes. This data is |
| // inherently less trusted than other data available about a node, as it can be |
| // updated to any value by each node individually, including compromised nodes. |
| message NodeStatus { |
| // external_address is the IP address that the node expects management, |
| // cluster and user traffic to arrive at (ie. the address on which it is |
| // listening for gRPC, and role-specific services like etcd and |
| // Kubernetes). |
| string external_address = 1; |
| // running_curator contains information about the curator service running |
| // on this node, or is nil if the service is not running. |
| message RunningCurator { |
| // port is the TCP port on which the curator is listening. |
| int32 port = 1; |
| } |
| RunningCurator running_curator = 3; |
| // timestamp is an epoch number associated with the last status update. |
| // It's set with a nanosecond granularity. |
| google.protobuf.Timestamp timestamp = 2; |
| } |
| |
| // The Cluster Directory is information about the network addressing of nodes |
| // in a cluster. It is a serialized snapshot of some of the state within the |
| // etcd cluster, and can be used by external processes (like a node Registering |
| // into the cluster) to know how to reach this cluster over the network. It can |
| // be thought of as a phonebook, or a static name/address configuration that |
| // could live in /etc/hosts. |
| // |
| // The directory explicitly doesn't carry any information about the cluster's |
| // identity or security - these should be configured and checked by higher |
| // level configuration and processes. The directory can be stored and |
| // transmitted in cleartext and without an integrity checks (like saved to the |
| // EFI system partition across reboots) and any malicious change to it will |
| // cause no more than a denial of service against the consumer of this |
| // directory. This is because all nodes contacted must present a valid cluster |
| // identity/certificate before they are trusted by the consumers of this |
| // directory. |
| message ClusterDirectory { |
| message Node { |
| bytes public_key = 1; |
| message Address { |
| string host = 1; |
| }; |
| repeated Address addresses = 2; |
| }; |
| repeated Node nodes = 1; |
| } |
| |
| |
| // NodeClusterNetworking carries information about the cluster networking (ie. |
| // WireGuard mesh) connectivity of a node. |
| message NodeClusterNetworking { |
| message Prefix { |
| string cidr = 1; |
| } |
| // wireguard_pubkey is the base64-encoded public key used by the node. |
| string wireguard_pubkey = 1; |
| // prefixes are networking routes exported by the node to the cluster networking |
| // mesh, and are programmed by other nodes into their wireguard peer config. |
| repeated Prefix prefixes = 2; |
| } |
| |
| // Severity level corresponding to //metropolis/pkg/logtree.Severity. |
| enum LeveledLogSeverity { |
| INVALID = 0; |
| INFO = 1; |
| WARNING = 2; |
| ERROR = 3; |
| FATAL = 4; |
| } |
| |
| // Filter set when requesting logs for a given DN. This message is equivalent to |
| // the following GADT enum: |
| // data LogFilter = WithChildren |
| // | OnlyRaw |
| // | OnlyLeveled |
| // | LeveledWithMinimumSeverity(Severity) |
| // |
| // Multiple LogFilters can be chained/combined when requesting logs, as long as |
| // they do not conflict. |
| message LogFilter { |
| // Entries will be returned not only for the given DN, but all child DNs as |
| // well. For instance, if the requested DN is foo, entries logged to foo, |
| // foo.bar and foo.bar.baz will all be returned. |
| message WithChildren { |
| } |
| // Only raw logging entries will be returned. Conflicts with OnlyLeveled |
| // filters. |
| message OnlyRaw { |
| } |
| // Only leveled logging entries will be returned. Conflicts with OnlyRaw |
| // filters. |
| message OnlyLeveled { |
| } |
| // If leveled logs are returned, all entries at severity lower than `minimum` |
| // will be discarded. |
| message LeveledWithMinimumSeverity { |
| LeveledLogSeverity minimum = 1; |
| } |
| oneof filter { |
| WithChildren with_children = 1; |
| OnlyRaw only_raw = 3; |
| OnlyLeveled only_leveled = 4; |
| LeveledWithMinimumSeverity leveled_with_minimum_severity = 5; |
| } |
| } |
| |
| // LogEntry corresponding to logtree.LogEntry in //metropolis/pkg/logtree. |
| message LogEntry { |
| // A leveled log entry emitted from a compatible system, eg. Metorpolis code |
| // or a klog-parsed line. |
| message Leveled { |
| repeated string lines = 1; |
| google.protobuf.Timestamp timestamp = 2; |
| LeveledLogSeverity severity = 3; |
| // Source of the error, expressed as file:line. |
| string location = 4; |
| } |
| // Raw log entry, captured from an external system without parting. Might |
| // contain some timestamp/level/origin information embedded in data. Data |
| // contained within should be treated as unsanitized external data. |
| message Raw { |
| string data = 1; |
| // Original length of line, set if data was truncated. |
| int64 original_length = 2; |
| } |
| |
| // Origin DN. |
| string dn = 1; |
| oneof kind { |
| Leveled leveled = 2; |
| Raw raw = 3; |
| } |
| } |
| |
| // ClusterConfiguration contains the entirety of the user-configurable behaviour |
| // of the cluster that is scoped to the entirety of the cluster (vs. per-node |
| // configuration, which is kept alongside Node). |
| // |
| // It can be set initially when a cluster is being bootstrapped (in |
| // NodeParamaters.ClusterBootstrap), and then can be partially managed by |
| // management calls to the curator. |
| message ClusterConfiguration { |
| // tpm_mode defines the TPM usage policy for cluster nodes. When nodes |
| // register into the cluster (and then join into it) they will report their |
| // TPM availability, and in return the cluster will respond whether they |
| // should use that TPM or not. |
| // |
| // If a node is instructed to use its TPM, it will use it to encrypt its part |
| // of the disk encryption key when saving it to the EFI system partition. |
| // That means that the node will only be able to re-join the cluster if its |
| // secure boot configuration doesn't change. |
| // |
| // If a node is instructed to not use its TPM, it will save its part of the |
| // disk encryption key straight onto the EFI system partition without any |
| // further encryption. It still needs to connect to a working cluster to |
| // retrieve the other part of the key. This means that the configuration is |
| // secure vs. offline disk decryption attempts, but not secure if an |
| // attacker can connect to a cluster and impersonate the node in order to |
| // retrieve the other part of its key. |
| enum TPMMode { |
| TPM_MODE_INVALID = 0; |
| // Nodes need to join with a TPM2.0 device and will be instructed to |
| // use it. |
| TPM_MODE_REQUIRED = 1; |
| // Nodes will be allowed to join regardless of TPM2.0 presence, and will |
| // be instructed to use it if they have one. |
| TPM_MODE_BEST_EFFORT = 2; |
| // Regardless of the node's local TPM presence it will be instructed to |
| // not use it. |
| TPM_MODE_DISABLED = 3; |
| } |
| TPMMode tpm_mode = 1; |
| } |