blob: 17ff6af3ed0c2fd28ccc26f7db057c2944472310 [file] [log] [blame]
Serge Bazanski6bd41592021-08-23 13:18:37 +02001syntax = "proto3";
2package metropolis.proto.api;
3option go_package = "source.monogon.dev/metropolis/proto/api";
4
Serge Bazanskibc671d02021-10-05 17:53:32 +02005import "metropolis/proto/common/common.proto";
Serge Bazanski9ffa1f92021-09-01 15:42:23 +02006import "metropolis/proto/ext/authorization.proto";
7
Serge Bazanski56114472021-10-11 14:47:54 +02008// Management service available to Cluster Managers, allowing operational work
9// to be performed on the cluster (eg. adding nodes, retrieving information
10// about a running cluster, etc.).
Serge Bazanski6bd41592021-08-23 13:18:37 +020011service Management {
12 // GetRegisterTicket retrieves the current RegisterTicket which is required
13 // for new nodes to register into the cluster. Presenting this ticket on
14 // registration does not automatically grant access to arbitrary node
15 // registration. Instead, it is used to guard the API surface of the
16 // Register RPC from potential denial of service attacks, and can be
17 // regenerated at any time in case it leaks.
Serge Bazanski9ffa1f92021-09-01 15:42:23 +020018 rpc GetRegisterTicket(GetRegisterTicketRequest) returns (GetRegisterTicketResponse) {
19 option (metropolis.proto.ext.authorization) = {
20 need: PERMISSION_GET_REGISTER_TICKET
21 };
22 }
Serge Bazanski56114472021-10-11 14:47:54 +020023
Serge Bazanskibc671d02021-10-05 17:53:32 +020024 // GetClusterInfo retrieves publicly available summary information about
25 // this cluster, notably data required for nodes to register into a cluster
26 // or join it (other than the Register Ticket, which is gated by an
27 // additional permission).
28 rpc GetClusterInfo(GetClusterInfoRequest) returns (GetClusterInfoResponse) {
29 option (metropolis.proto.ext.authorization) = {
30 need: PERMISSION_READ_CLUSTER_STATUS
31 };
32 }
Serge Bazanski56114472021-10-11 14:47:54 +020033
34 // GetNodes retrieves information about nodes in the cluster. Currently,
35 // it returns all available data about all nodes.
36 rpc GetNodes(GetNodesRequest) returns (stream Node) {
37 option (metropolis.proto.ext.authorization) = {
38 need: PERMISSION_READ_CLUSTER_STATUS
39 };
40 }
Serge Bazanski1612d4b2021-11-12 13:54:15 +010041
42 // ApproveNode progresses a node's registration process by changing its state
43 // in the cluster from NEW to STANDBY, if not yet STANDBY. This is required
44 // for the node to fully become part of the cluster (ie. have an UP state),
45 // and is required to be called by a manager manually.
46 //
47 // Managers can find out what nodes require approval by performing
48 // a GetNodes call and filtering for nodes in the NEW state. This call is
49 // idempotent and can be executed multiple times, and is a no-op if the node
50 // is already in the STANDBY or even UP states.
51 //
52 // In the future, approval process will be governed by cluster policy, but
53 // currently any node can be approved by a manager, and the manager is
54 // responsible for performing an out-of-band attestation of the node being/
55 // approved (eg. by verifying that the node that is being approved has the
56 // same public key as what the registering node displays in its startup
57 // logs).
58 rpc ApproveNode(ApproveNodeRequest) returns (ApproveNodeResponse) {
59 option (metropolis.proto.ext.authorization) = {
60 need: PERMISSION_APPROVE_NODE
61 };
62 }
Serge Bazanski6bd41592021-08-23 13:18:37 +020063}
64
65message GetRegisterTicketRequest {
66}
67
68message GetRegisterTicketResponse {
69 // Opaque bytes that comprise the RegisterTicket.
70 bytes ticket = 1;
Serge Bazanski2893e982021-09-09 13:06:16 +020071}
Serge Bazanskibc671d02021-10-05 17:53:32 +020072
73message GetClusterInfoRequest {
74}
75
76message GetClusterInfoResponse {
77 // cluster_directory contains information about individual nodes in the
78 // cluster that can be used to dial the cluster's services.
79 metropolis.proto.common.ClusterDirectory cluster_directory = 1;
Serge Bazanski2f58ac02021-10-05 11:47:20 +020080
Serge Bazanskifbd38e22021-10-08 14:41:16 +020081 // ca_certificate is the x509 DER encoded CA certificate of the cluster.
82 bytes ca_certificate = 2;
Serge Bazanskibc671d02021-10-05 17:53:32 +020083}
Serge Bazanski56114472021-10-11 14:47:54 +020084
85message GetNodesRequest {
Mateusz Zalega955e46e2022-05-27 18:00:50 +020086 // filter is a CEL expression used to limit the count of GetNodes results.
87 // Each processed node protobuf message is exposed to the filter as
88 // "node" variable, while related state and health enum constants are
89 // anchored in the root namespace, eg. NODE_STATE_UP, or HEARTBEAT_TIMEOUT.
90 // A node is returned each time the expression is evaluated as true. If
91 // empty, all nodes are returned.
92 string filter = 1;
Serge Bazanski56114472021-10-11 14:47:54 +020093}
94
95// Node in a Metropolis cluster, streamed by Management.GetNodes. For each node
96// in the cluster, this message will be emitted and will contain information
97// about that node.
98//
99// The fields contained are node fields that PERMISSION_READ_CLUSTER_STATUS
100// allows access to, ie. 'non-private' fields, ones that might be internal to
101// the cluster and possibly considered sensitive information about the
102// infrastructure, but whose knowledge does not allow to escalate privileges
103// within the cluster.
104message Node {
105 // Raw Ed25519 public key of this node, which can be used to generate
106 // the node's ID. This is always set.
107 bytes pubkey = 1;
108 // State of the node from the point of view of the cluster. This is
109 // always set.
110 metropolis.proto.common.NodeState state = 2;
111 // Last reported status by the Node, absent if a node hasn't yet reported
112 // its status.
113 metropolis.proto.common.NodeStatus status = 3;
114 // Roles assigned by the cluster. This is always set.
115 metropolis.proto.common.NodeRoles roles = 4;
Serge Bazanski1612d4b2021-11-12 13:54:15 +0100116
Mateusz Zalega32b19292022-05-17 13:26:55 +0200117 // Health describes node's health as seen from the cluster perspective.
118 enum Health {
119 INVALID = 0;
120 // UNKNOWN is used whenever there were no heartbeats received from a
121 // given node AND too little time has passed since last Curator leader
122 // election to know whether the node is actually timing out. UNKNOWN
123 // is also returned for nodes which NodeState does not equal
124 // NODE_STATE_UP.
125 UNKNOWN = 1;
126 // HEALTHY describes nodes that have sent a heartbeat recently.
127 HEALTHY = 2;
128 // HEARTBEAT_TIMEOUT describes nodes that have not sent a heartbeat in
129 // the interval specified by curator.HeartbeatTimeout.
130 HEARTBEAT_TIMEOUT = 3;
131 }
132 Health health = 5;
133 // heartbeat_timestamp is the duration since the last of the node's
134 // heartbeats was received, expressed in nanoseconds. It equals zero if no
135 // heartbeats were received.
136 int64 heartbeat_timestamp = 6;
137}
Serge Bazanski1612d4b2021-11-12 13:54:15 +0100138
139message ApproveNodeRequest {
140 // Raw public key of the node being approved, has to correspond to a node
141 // currently in the cluster.
142 bytes pubkey = 1;
143}
144
145message ApproveNodeResponse {
Mateusz Zalega32b19292022-05-17 13:26:55 +0200146}