| syntax = "proto3"; |
| package metropolis.proto.api; |
| option go_package = "source.monogon.dev/metropolis/proto/api"; |
| |
| import "google/protobuf/duration.proto"; |
| import "google/protobuf/timestamp.proto"; |
| |
| import "metropolis/proto/common/common.proto"; |
| import "metropolis/proto/ext/authorization.proto"; |
| |
| // Management service available to Cluster Managers, allowing operational work |
| // to be performed on the cluster (eg. adding nodes, retrieving information |
| // about a running cluster, etc.). |
| service Management { |
| // GetRegisterTicket retrieves the current RegisterTicket which is required |
| // for new nodes to register into the cluster. Presenting this ticket on |
| // registration does not automatically grant access to arbitrary node |
| // registration. Instead, it is used to guard the API surface of the |
| // Register RPC from potential denial of service attacks, and can be |
| // regenerated at any time in case it leaks. |
| rpc GetRegisterTicket(GetRegisterTicketRequest) returns (GetRegisterTicketResponse) { |
| option (metropolis.proto.ext.authorization) = { |
| need: PERMISSION_GET_REGISTER_TICKET |
| }; |
| } |
| |
| // GetClusterInfo retrieves publicly available summary information about |
| // this cluster, notably data required for nodes to register into a cluster |
| // or join it (other than the Register Ticket, which is gated by an |
| // additional permission). |
| rpc GetClusterInfo(GetClusterInfoRequest) returns (GetClusterInfoResponse) { |
| option (metropolis.proto.ext.authorization) = { |
| need: PERMISSION_READ_CLUSTER_STATUS |
| }; |
| } |
| |
| // GetNodes retrieves information about nodes in the cluster. Currently, |
| // it returns all available data about all nodes. |
| rpc GetNodes(GetNodesRequest) returns (stream Node) { |
| option (metropolis.proto.ext.authorization) = { |
| need: PERMISSION_READ_CLUSTER_STATUS |
| }; |
| } |
| |
| // ApproveNode progresses a node's registration process by changing its state |
| // in the cluster from NEW to STANDBY, if not yet STANDBY. This is required |
| // for the node to fully become part of the cluster (ie. have an UP state), |
| // and is required to be called by a manager manually. |
| // |
| // Managers can find out what nodes require approval by performing |
| // a GetNodes call and filtering for nodes in the NEW state. This call is |
| // idempotent and can be executed multiple times, and is a no-op if the node |
| // is already in the STANDBY or even UP states. |
| // |
| // In the future, approval process will be governed by cluster policy, but |
| // currently any node can be approved by a manager, and the manager is |
| // responsible for performing an out-of-band attestation of the node being/ |
| // approved (eg. by verifying that the node that is being approved has the |
| // same public key as what the registering node displays in its startup |
| // logs). |
| rpc ApproveNode(ApproveNodeRequest) returns (ApproveNodeResponse) { |
| option (metropolis.proto.ext.authorization) = { |
| need: PERMISSION_APPROVE_NODE |
| }; |
| } |
| |
| // UpdateNodeRoles updates a single node's roles. |
| rpc UpdateNodeRoles(UpdateNodeRolesRequest) returns (UpdateNodeRolesResponse) { |
| option (metropolis.proto.ext.authorization) = { |
| need: PERMISSION_UPDATE_NODE_ROLES |
| }; |
| } |
| } |
| |
| message GetRegisterTicketRequest { |
| } |
| |
| message GetRegisterTicketResponse { |
| // Opaque bytes that comprise the RegisterTicket. |
| bytes ticket = 1; |
| } |
| |
| message GetClusterInfoRequest { |
| } |
| |
| message GetClusterInfoResponse { |
| // cluster_directory contains information about individual nodes in the |
| // cluster that can be used to dial the cluster's services. |
| metropolis.proto.common.ClusterDirectory cluster_directory = 1; |
| |
| // ca_certificate is the x509 DER encoded CA certificate of the cluster. |
| bytes ca_certificate = 2; |
| } |
| |
| message GetNodesRequest { |
| // filter is a CEL expression used to limit the count of GetNodes results. |
| // Each processed node protobuf message is exposed to the filter as |
| // "node" variable, while related state and health enum constants are |
| // anchored in the root namespace, eg. NODE_STATE_UP, or HEARTBEAT_TIMEOUT. |
| // A node is returned each time the expression is evaluated as true. If |
| // empty, all nodes are returned. |
| string filter = 1; |
| } |
| |
| // Node in a Metropolis cluster, streamed by Management.GetNodes. For each node |
| // in the cluster, this message will be emitted and will contain information |
| // about that node. |
| // |
| // The fields contained are node fields that PERMISSION_READ_CLUSTER_STATUS |
| // allows access to, ie. 'non-private' fields, ones that might be internal to |
| // the cluster and possibly considered sensitive information about the |
| // infrastructure, but whose knowledge does not allow to escalate privileges |
| // within the cluster. |
| message Node { |
| // Raw Ed25519 public key of this node, which can be used to generate |
| // the node's ID. This is always set. |
| bytes pubkey = 1; |
| // Node ID calculated from pubkey, ie. 'metropolis-123456'. |
| string id = 7; |
| // State of the node from the point of view of the cluster. This is |
| // always set. |
| metropolis.proto.common.NodeState state = 2; |
| // Last reported status by the Node, absent if a node hasn't yet reported |
| // its status. |
| metropolis.proto.common.NodeStatus status = 3; |
| // Roles assigned by the cluster. This is always set. |
| metropolis.proto.common.NodeRoles roles = 4; |
| |
| // Health describes node's health as seen from the cluster perspective. |
| enum Health { |
| INVALID = 0; |
| // UNKNOWN is used whenever there were no heartbeats received from a |
| // given node AND too little time has passed since last Curator leader |
| // election to know whether the node is actually timing out. UNKNOWN |
| // is also returned for nodes which NodeState does not equal |
| // NODE_STATE_UP. |
| UNKNOWN = 1; |
| // HEALTHY describes nodes that have sent a heartbeat recently. |
| HEALTHY = 2; |
| // HEARTBEAT_TIMEOUT describes nodes that have not sent a heartbeat in |
| // the interval specified by curator.HeartbeatTimeout. |
| HEARTBEAT_TIMEOUT = 3; |
| } |
| Health health = 5; |
| // time_since_heartbeat is the duration since the last of the node's |
| // heartbeats was received, expressed in nanoseconds. It is only valid with |
| // the health status of either HEALTHY or HEARTBEAT_TIMEOUT. |
| google.protobuf.Duration time_since_heartbeat = 6; |
| } |
| |
| message ApproveNodeRequest { |
| // Raw public key of the node being approved, has to correspond to a node |
| // currently in the cluster. |
| bytes pubkey = 1; |
| } |
| |
| message ApproveNodeResponse { |
| } |
| |
| // UpdateNodeRolesRequest updates roles of a single node matching pubkey. All |
| // role fields are optional, and no change will result if they're either unset |
| // or if their value matches existing state. |
| message UpdateNodeRolesRequest { |
| // node uniquely identifies the node subject to this request. |
| oneof node { |
| // pubkey is the Ed25519 public key of this node, which can be used to |
| // generate the node's ID. |
| bytes pubkey = 1; |
| // id is the human-readable identifier of the node, based on its public |
| // key. |
| string id = 4; |
| } |
| |
| // kubernetesController adjusts the appropriate role when set. |
| optional bool kubernetesWorker = 2; |
| // kubernetesController adjusts the appropriate role when set. Nodes performing |
| // this role must also be consensus members. |
| optional bool kubernetesController = 5; |
| optional bool consensusMember = 3; |
| } |
| |
| message UpdateNodeRolesResponse { |
| } |
| |
| // NodeManagement runs on every node of the cluster and providers management |
| // and troubleshooting RPCs to operators. All requests must be authenticated. |
| service NodeManagement { |
| rpc Logs(GetLogsRequest) returns (stream GetLogsResponse) { |
| option (metropolis.proto.ext.authorization) = { |
| need: PERMISSION_READ_NODE_LOGS |
| }; |
| } |
| } |
| |
| |
| // Severity level corresponding to //metropolis/pkg/logtree.Severity. |
| enum LeveledLogSeverity { |
| INVALID = 0; |
| INFO = 1; |
| WARNING = 2; |
| ERROR = 3; |
| FATAL = 4; |
| } |
| |
| // Filter set when requesting logs for a given DN. This message is equivalent to |
| // the following GADT enum: |
| // data LogFilter = WithChildren |
| // | OnlyRaw |
| // | OnlyLeveled |
| // | LeveledWithMinimumSeverity(Severity) |
| // |
| // Multiple LogFilters can be chained/combined when requesting logs, as long as |
| // they do not conflict. |
| message LogFilter { |
| // Entries will be returned not only for the given DN, but all child DNs as |
| // well. For instance, if the requested DN is foo, entries logged to foo, |
| // foo.bar and foo.bar.baz will all be returned. |
| message WithChildren { |
| } |
| // Only raw logging entries will be returned. Conflicts with OnlyLeveled |
| // filters. |
| message OnlyRaw { |
| } |
| // Only leveled logging entries will be returned. Conflicts with OnlyRaw |
| // filters. |
| message OnlyLeveled { |
| } |
| // If leveled logs are returned, all entries at severity lower than `minimum` |
| // will be discarded. |
| message LeveledWithMinimumSeverity { |
| LeveledLogSeverity minimum = 1; |
| } |
| oneof filter { |
| WithChildren with_children = 1; |
| OnlyRaw only_raw = 3; |
| OnlyLeveled only_leveled = 4; |
| LeveledWithMinimumSeverity leveled_with_minimum_severity = 5; |
| } |
| } |
| |
| message GetLogsRequest { |
| // DN from which to request logs. All supervised runnables live at `root.`, |
| // the init code lives at `init.`. |
| string dn = 1; |
| // Filters to apply to returned data. |
| repeated LogFilter filters = 2; |
| |
| enum BacklogMode { |
| BACKLOG_INVALID = 0; |
| // No historic data will be returned. |
| BACKLOG_DISABLE = 1; |
| // All available historic data will be returned. |
| BACKLOG_ALL = 2; |
| // At most backlog_count entries will be returned, if available. |
| BACKLOG_COUNT = 3; |
| } |
| BacklogMode backlog_mode = 3; |
| int64 backlog_count = 4; |
| |
| enum StreamMode { |
| STREAM_INVALID = 0; |
| // No streaming entries, gRPC stream will be closed as soon as all backlog data is served. |
| STREAM_DISABLE = 1; |
| // Entries will be streamed as early as available right after all backlog data is served. |
| STREAM_UNBUFFERED = 2; |
| } |
| StreamMode stream_mode = 5; |
| } |
| |
| message LogEntry { |
| message Leveled { |
| repeated string lines = 1; |
| google.protobuf.Timestamp timestamp = 2; |
| LeveledLogSeverity severity = 3; |
| string location = 4; |
| } |
| message Raw { |
| string data = 1; |
| int64 original_length = 2; |
| } |
| |
| string dn = 1; |
| oneof kind { |
| Leveled leveled = 2; |
| Raw raw = 3; |
| } |
| } |
| |
| message GetLogsResponse { |
| // Entries from the requested historical entries (via WithBackLog). They will all be served before the first |
| // stream_entries are served (if any). |
| repeated LogEntry backlog_entries = 1; |
| // Entries streamed as they arrive. Currently no server-side buffering is enabled, instead every line is served |
| // as early as it arrives. However, this might change in the future, so this behaviour cannot be depended |
| // upon. |
| repeated LogEntry stream_entries = 2; |
| } |