Serge Bazanski | 6bd4159 | 2021-08-23 13:18:37 +0200 | [diff] [blame] | 1 | syntax = "proto3"; |
| 2 | package metropolis.proto.api; |
| 3 | option go_package = "source.monogon.dev/metropolis/proto/api"; |
| 4 | |
Mateusz Zalega | 944cb53 | 2022-06-20 16:54:17 +0200 | [diff] [blame] | 5 | import "google/protobuf/duration.proto"; |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 6 | import "google/protobuf/timestamp.proto"; |
Mateusz Zalega | 944cb53 | 2022-06-20 16:54:17 +0200 | [diff] [blame] | 7 | |
Serge Bazanski | bc671d0 | 2021-10-05 17:53:32 +0200 | [diff] [blame] | 8 | import "metropolis/proto/common/common.proto"; |
Serge Bazanski | 9ffa1f9 | 2021-09-01 15:42:23 +0200 | [diff] [blame] | 9 | import "metropolis/proto/ext/authorization.proto"; |
| 10 | |
Serge Bazanski | 5611447 | 2021-10-11 14:47:54 +0200 | [diff] [blame] | 11 | // Management service available to Cluster Managers, allowing operational work |
| 12 | // to be performed on the cluster (eg. adding nodes, retrieving information |
| 13 | // about a running cluster, etc.). |
Serge Bazanski | 6bd4159 | 2021-08-23 13:18:37 +0200 | [diff] [blame] | 14 | service Management { |
| 15 | // GetRegisterTicket retrieves the current RegisterTicket which is required |
| 16 | // for new nodes to register into the cluster. Presenting this ticket on |
| 17 | // registration does not automatically grant access to arbitrary node |
| 18 | // registration. Instead, it is used to guard the API surface of the |
| 19 | // Register RPC from potential denial of service attacks, and can be |
| 20 | // regenerated at any time in case it leaks. |
Serge Bazanski | 9ffa1f9 | 2021-09-01 15:42:23 +0200 | [diff] [blame] | 21 | rpc GetRegisterTicket(GetRegisterTicketRequest) returns (GetRegisterTicketResponse) { |
| 22 | option (metropolis.proto.ext.authorization) = { |
| 23 | need: PERMISSION_GET_REGISTER_TICKET |
| 24 | }; |
| 25 | } |
Serge Bazanski | 5611447 | 2021-10-11 14:47:54 +0200 | [diff] [blame] | 26 | |
Serge Bazanski | bc671d0 | 2021-10-05 17:53:32 +0200 | [diff] [blame] | 27 | // GetClusterInfo retrieves publicly available summary information about |
| 28 | // this cluster, notably data required for nodes to register into a cluster |
| 29 | // or join it (other than the Register Ticket, which is gated by an |
| 30 | // additional permission). |
| 31 | rpc GetClusterInfo(GetClusterInfoRequest) returns (GetClusterInfoResponse) { |
| 32 | option (metropolis.proto.ext.authorization) = { |
| 33 | need: PERMISSION_READ_CLUSTER_STATUS |
| 34 | }; |
| 35 | } |
Serge Bazanski | 5611447 | 2021-10-11 14:47:54 +0200 | [diff] [blame] | 36 | |
| 37 | // GetNodes retrieves information about nodes in the cluster. Currently, |
| 38 | // it returns all available data about all nodes. |
| 39 | rpc GetNodes(GetNodesRequest) returns (stream Node) { |
| 40 | option (metropolis.proto.ext.authorization) = { |
| 41 | need: PERMISSION_READ_CLUSTER_STATUS |
| 42 | }; |
| 43 | } |
Serge Bazanski | 1612d4b | 2021-11-12 13:54:15 +0100 | [diff] [blame] | 44 | |
| 45 | // ApproveNode progresses a node's registration process by changing its state |
| 46 | // in the cluster from NEW to STANDBY, if not yet STANDBY. This is required |
| 47 | // for the node to fully become part of the cluster (ie. have an UP state), |
| 48 | // and is required to be called by a manager manually. |
| 49 | // |
| 50 | // Managers can find out what nodes require approval by performing |
| 51 | // a GetNodes call and filtering for nodes in the NEW state. This call is |
| 52 | // idempotent and can be executed multiple times, and is a no-op if the node |
| 53 | // is already in the STANDBY or even UP states. |
| 54 | // |
| 55 | // In the future, approval process will be governed by cluster policy, but |
| 56 | // currently any node can be approved by a manager, and the manager is |
| 57 | // responsible for performing an out-of-band attestation of the node being/ |
| 58 | // approved (eg. by verifying that the node that is being approved has the |
| 59 | // same public key as what the registering node displays in its startup |
| 60 | // logs). |
| 61 | rpc ApproveNode(ApproveNodeRequest) returns (ApproveNodeResponse) { |
| 62 | option (metropolis.proto.ext.authorization) = { |
| 63 | need: PERMISSION_APPROVE_NODE |
| 64 | }; |
| 65 | } |
Mateusz Zalega | bb2edbe | 2022-06-08 11:57:09 +0200 | [diff] [blame] | 66 | |
| 67 | // UpdateNodeRoles updates a single node's roles. |
| 68 | rpc UpdateNodeRoles(UpdateNodeRolesRequest) returns (UpdateNodeRolesResponse) { |
| 69 | option (metropolis.proto.ext.authorization) = { |
| 70 | need: PERMISSION_UPDATE_NODE_ROLES |
| 71 | }; |
| 72 | } |
Serge Bazanski | 6bd4159 | 2021-08-23 13:18:37 +0200 | [diff] [blame] | 73 | } |
| 74 | |
| 75 | message GetRegisterTicketRequest { |
| 76 | } |
| 77 | |
| 78 | message GetRegisterTicketResponse { |
| 79 | // Opaque bytes that comprise the RegisterTicket. |
| 80 | bytes ticket = 1; |
Serge Bazanski | 2893e98 | 2021-09-09 13:06:16 +0200 | [diff] [blame] | 81 | } |
Serge Bazanski | bc671d0 | 2021-10-05 17:53:32 +0200 | [diff] [blame] | 82 | |
| 83 | message GetClusterInfoRequest { |
| 84 | } |
| 85 | |
| 86 | message GetClusterInfoResponse { |
| 87 | // cluster_directory contains information about individual nodes in the |
| 88 | // cluster that can be used to dial the cluster's services. |
| 89 | metropolis.proto.common.ClusterDirectory cluster_directory = 1; |
Serge Bazanski | 2f58ac0 | 2021-10-05 11:47:20 +0200 | [diff] [blame] | 90 | |
Serge Bazanski | fbd38e2 | 2021-10-08 14:41:16 +0200 | [diff] [blame] | 91 | // ca_certificate is the x509 DER encoded CA certificate of the cluster. |
| 92 | bytes ca_certificate = 2; |
Serge Bazanski | bc671d0 | 2021-10-05 17:53:32 +0200 | [diff] [blame] | 93 | } |
Serge Bazanski | 5611447 | 2021-10-11 14:47:54 +0200 | [diff] [blame] | 94 | |
| 95 | message GetNodesRequest { |
Mateusz Zalega | 955e46e | 2022-05-27 18:00:50 +0200 | [diff] [blame] | 96 | // filter is a CEL expression used to limit the count of GetNodes results. |
| 97 | // Each processed node protobuf message is exposed to the filter as |
| 98 | // "node" variable, while related state and health enum constants are |
| 99 | // anchored in the root namespace, eg. NODE_STATE_UP, or HEARTBEAT_TIMEOUT. |
| 100 | // A node is returned each time the expression is evaluated as true. If |
| 101 | // empty, all nodes are returned. |
| 102 | string filter = 1; |
Serge Bazanski | 5611447 | 2021-10-11 14:47:54 +0200 | [diff] [blame] | 103 | } |
| 104 | |
| 105 | // Node in a Metropolis cluster, streamed by Management.GetNodes. For each node |
| 106 | // in the cluster, this message will be emitted and will contain information |
| 107 | // about that node. |
| 108 | // |
| 109 | // The fields contained are node fields that PERMISSION_READ_CLUSTER_STATUS |
| 110 | // allows access to, ie. 'non-private' fields, ones that might be internal to |
| 111 | // the cluster and possibly considered sensitive information about the |
| 112 | // infrastructure, but whose knowledge does not allow to escalate privileges |
| 113 | // within the cluster. |
| 114 | message Node { |
| 115 | // Raw Ed25519 public key of this node, which can be used to generate |
| 116 | // the node's ID. This is always set. |
| 117 | bytes pubkey = 1; |
Serge Bazanski | 30fd154 | 2023-03-29 14:19:02 +0200 | [diff] [blame] | 118 | // Node ID calculated from pubkey, ie. 'metropolis-123456'. |
| 119 | string id = 7; |
Serge Bazanski | 5611447 | 2021-10-11 14:47:54 +0200 | [diff] [blame] | 120 | // State of the node from the point of view of the cluster. This is |
| 121 | // always set. |
| 122 | metropolis.proto.common.NodeState state = 2; |
| 123 | // Last reported status by the Node, absent if a node hasn't yet reported |
| 124 | // its status. |
| 125 | metropolis.proto.common.NodeStatus status = 3; |
| 126 | // Roles assigned by the cluster. This is always set. |
| 127 | metropolis.proto.common.NodeRoles roles = 4; |
Serge Bazanski | 1612d4b | 2021-11-12 13:54:15 +0100 | [diff] [blame] | 128 | |
Mateusz Zalega | 32b1929 | 2022-05-17 13:26:55 +0200 | [diff] [blame] | 129 | // Health describes node's health as seen from the cluster perspective. |
| 130 | enum Health { |
| 131 | INVALID = 0; |
| 132 | // UNKNOWN is used whenever there were no heartbeats received from a |
| 133 | // given node AND too little time has passed since last Curator leader |
| 134 | // election to know whether the node is actually timing out. UNKNOWN |
| 135 | // is also returned for nodes which NodeState does not equal |
| 136 | // NODE_STATE_UP. |
| 137 | UNKNOWN = 1; |
| 138 | // HEALTHY describes nodes that have sent a heartbeat recently. |
| 139 | HEALTHY = 2; |
| 140 | // HEARTBEAT_TIMEOUT describes nodes that have not sent a heartbeat in |
| 141 | // the interval specified by curator.HeartbeatTimeout. |
| 142 | HEARTBEAT_TIMEOUT = 3; |
| 143 | } |
| 144 | Health health = 5; |
Mateusz Zalega | 2175ec9 | 2022-06-13 09:29:09 +0200 | [diff] [blame] | 145 | // time_since_heartbeat is the duration since the last of the node's |
| 146 | // heartbeats was received, expressed in nanoseconds. It is only valid with |
| 147 | // the health status of either HEALTHY or HEARTBEAT_TIMEOUT. |
Mateusz Zalega | 944cb53 | 2022-06-20 16:54:17 +0200 | [diff] [blame] | 148 | google.protobuf.Duration time_since_heartbeat = 6; |
Mateusz Zalega | 32b1929 | 2022-05-17 13:26:55 +0200 | [diff] [blame] | 149 | } |
Serge Bazanski | 1612d4b | 2021-11-12 13:54:15 +0100 | [diff] [blame] | 150 | |
| 151 | message ApproveNodeRequest { |
| 152 | // Raw public key of the node being approved, has to correspond to a node |
| 153 | // currently in the cluster. |
| 154 | bytes pubkey = 1; |
| 155 | } |
| 156 | |
| 157 | message ApproveNodeResponse { |
Mateusz Zalega | 32b1929 | 2022-05-17 13:26:55 +0200 | [diff] [blame] | 158 | } |
Mateusz Zalega | bb2edbe | 2022-06-08 11:57:09 +0200 | [diff] [blame] | 159 | |
| 160 | // UpdateNodeRolesRequest updates roles of a single node matching pubkey. All |
| 161 | // role fields are optional, and no change will result if they're either unset |
| 162 | // or if their value matches existing state. |
| 163 | message UpdateNodeRolesRequest { |
Mateusz Zalega | 9c315f1 | 2022-08-11 16:31:22 +0200 | [diff] [blame] | 164 | // node uniquely identifies the node subject to this request. |
| 165 | oneof node { |
| 166 | // pubkey is the Ed25519 public key of this node, which can be used to |
| 167 | // generate the node's ID. |
| 168 | bytes pubkey = 1; |
| 169 | // id is the human-readable identifier of the node, based on its public |
| 170 | // key. |
| 171 | string id = 4; |
| 172 | } |
Mateusz Zalega | bb2edbe | 2022-06-08 11:57:09 +0200 | [diff] [blame] | 173 | |
Serge Bazanski | 15f7f63 | 2023-03-14 17:17:20 +0100 | [diff] [blame] | 174 | // kubernetesController adjusts the appropriate role when set. |
Mateusz Zalega | bb2edbe | 2022-06-08 11:57:09 +0200 | [diff] [blame] | 175 | optional bool kubernetesWorker = 2; |
Serge Bazanski | 15f7f63 | 2023-03-14 17:17:20 +0100 | [diff] [blame] | 176 | // kubernetesController adjusts the appropriate role when set. Nodes performing |
| 177 | // this role must also be consensus members. |
| 178 | optional bool kubernetesController = 5; |
Mateusz Zalega | bb2edbe | 2022-06-08 11:57:09 +0200 | [diff] [blame] | 179 | optional bool consensusMember = 3; |
| 180 | } |
| 181 | |
| 182 | message UpdateNodeRolesResponse { |
| 183 | } |
Serge Bazanski | b40c008 | 2023-03-29 14:28:04 +0200 | [diff] [blame] | 184 | |
| 185 | // NodeManagement runs on every node of the cluster and providers management |
| 186 | // and troubleshooting RPCs to operators. All requests must be authenticated. |
| 187 | service NodeManagement { |
Serge Bazanski | da11486 | 2023-03-29 17:46:42 +0200 | [diff] [blame^] | 188 | // GetLogs Returns historical and/or streaming logs for a given DN with given |
| 189 | // filters from the system global LogTree. |
| 190 | // |
| 191 | // For more information about this API, see //metropolis/pkg/logtree. But, in |
| 192 | // summary: |
| 193 | // - All logging is performed to a DN (distinguished name), which is a |
| 194 | // dot-delimited string like foo.bar.baz. |
| 195 | // - Log entries can be either raw (coming from unstructured logging from |
| 196 | // an external service, like a running process) or leveled (emitted by |
| 197 | // Metropolis code with a source line, timestamp, and severity). |
| 198 | // - The DNs form a tree of logging nodes - and when requesting logs, a |
| 199 | // given subtree of DNs can be requested, instead of just a given DN. |
| 200 | // - All supervised processes live at `root.<supervisor DN>`. For more |
| 201 | // example paths, see the console logs of a running Metropolis node, or |
| 202 | // request all logs (at DN ""). |
| 203 | // |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 204 | rpc Logs(GetLogsRequest) returns (stream GetLogsResponse) { |
| 205 | option (metropolis.proto.ext.authorization) = { |
| 206 | need: PERMISSION_READ_NODE_LOGS |
| 207 | }; |
| 208 | } |
| 209 | } |
| 210 | |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 211 | message GetLogsRequest { |
| 212 | // DN from which to request logs. All supervised runnables live at `root.`, |
| 213 | // the init code lives at `init.`. |
| 214 | string dn = 1; |
| 215 | // Filters to apply to returned data. |
Serge Bazanski | da11486 | 2023-03-29 17:46:42 +0200 | [diff] [blame^] | 216 | repeated metropolis.proto.common.LogFilter filters = 2; |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 217 | |
| 218 | enum BacklogMode { |
| 219 | BACKLOG_INVALID = 0; |
| 220 | // No historic data will be returned. |
| 221 | BACKLOG_DISABLE = 1; |
| 222 | // All available historic data will be returned. |
| 223 | BACKLOG_ALL = 2; |
| 224 | // At most backlog_count entries will be returned, if available. |
| 225 | BACKLOG_COUNT = 3; |
| 226 | } |
| 227 | BacklogMode backlog_mode = 3; |
| 228 | int64 backlog_count = 4; |
| 229 | |
| 230 | enum StreamMode { |
| 231 | STREAM_INVALID = 0; |
Serge Bazanski | da11486 | 2023-03-29 17:46:42 +0200 | [diff] [blame^] | 232 | // No streaming entries, gRPC stream will be closed as soon as all backlog |
| 233 | // data is served. |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 234 | STREAM_DISABLE = 1; |
Serge Bazanski | da11486 | 2023-03-29 17:46:42 +0200 | [diff] [blame^] | 235 | // Entries will be streamed as early as available right after all backlog |
| 236 | // data is served. |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 237 | STREAM_UNBUFFERED = 2; |
| 238 | } |
| 239 | StreamMode stream_mode = 5; |
| 240 | } |
| 241 | |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 242 | message GetLogsResponse { |
Serge Bazanski | da11486 | 2023-03-29 17:46:42 +0200 | [diff] [blame^] | 243 | // Entries from the requested historical entries (via WithBackLog). They will |
| 244 | // all be served before the first stream_entries are served (if any). |
| 245 | repeated metropolis.proto.common.LogEntry backlog_entries = 1; |
| 246 | // Entries streamed as they arrive. Currently no server-side buffering is |
| 247 | // enabled, instead every line is served as early as it arrives. However, this |
| 248 | // might change in the future, so this behaviour cannot be depended upon. |
| 249 | repeated metropolis.proto.common.LogEntry stream_entries = 2; |
Serge Bazanski | b40c008 | 2023-03-29 14:28:04 +0200 | [diff] [blame] | 250 | } |