blob: 3c869a91e5e956fc73577af0a2ac6f53ea136889 [file] [log] [blame]
Serge Bazanski6bd41592021-08-23 13:18:37 +02001syntax = "proto3";
2package metropolis.proto.api;
3option go_package = "source.monogon.dev/metropolis/proto/api";
4
Mateusz Zalega944cb532022-06-20 16:54:17 +02005import "google/protobuf/duration.proto";
Serge Bazanskib91938f2023-03-29 14:31:22 +02006import "google/protobuf/timestamp.proto";
Mateusz Zalega944cb532022-06-20 16:54:17 +02007
Serge Bazanskibc671d02021-10-05 17:53:32 +02008import "metropolis/proto/common/common.proto";
Serge Bazanski9ffa1f92021-09-01 15:42:23 +02009import "metropolis/proto/ext/authorization.proto";
10
Serge Bazanski56114472021-10-11 14:47:54 +020011// Management service available to Cluster Managers, allowing operational work
12// to be performed on the cluster (eg. adding nodes, retrieving information
13// about a running cluster, etc.).
Serge Bazanski6bd41592021-08-23 13:18:37 +020014service Management {
15 // GetRegisterTicket retrieves the current RegisterTicket which is required
16 // for new nodes to register into the cluster. Presenting this ticket on
17 // registration does not automatically grant access to arbitrary node
18 // registration. Instead, it is used to guard the API surface of the
19 // Register RPC from potential denial of service attacks, and can be
20 // regenerated at any time in case it leaks.
Serge Bazanski9ffa1f92021-09-01 15:42:23 +020021 rpc GetRegisterTicket(GetRegisterTicketRequest) returns (GetRegisterTicketResponse) {
22 option (metropolis.proto.ext.authorization) = {
23 need: PERMISSION_GET_REGISTER_TICKET
24 };
25 }
Serge Bazanski56114472021-10-11 14:47:54 +020026
Serge Bazanskibc671d02021-10-05 17:53:32 +020027 // GetClusterInfo retrieves publicly available summary information about
28 // this cluster, notably data required for nodes to register into a cluster
29 // or join it (other than the Register Ticket, which is gated by an
30 // additional permission).
31 rpc GetClusterInfo(GetClusterInfoRequest) returns (GetClusterInfoResponse) {
32 option (metropolis.proto.ext.authorization) = {
33 need: PERMISSION_READ_CLUSTER_STATUS
34 };
35 }
Serge Bazanski56114472021-10-11 14:47:54 +020036
37 // GetNodes retrieves information about nodes in the cluster. Currently,
38 // it returns all available data about all nodes.
39 rpc GetNodes(GetNodesRequest) returns (stream Node) {
40 option (metropolis.proto.ext.authorization) = {
41 need: PERMISSION_READ_CLUSTER_STATUS
42 };
43 }
Serge Bazanski1612d4b2021-11-12 13:54:15 +010044
45 // ApproveNode progresses a node's registration process by changing its state
46 // in the cluster from NEW to STANDBY, if not yet STANDBY. This is required
47 // for the node to fully become part of the cluster (ie. have an UP state),
48 // and is required to be called by a manager manually.
49 //
50 // Managers can find out what nodes require approval by performing
51 // a GetNodes call and filtering for nodes in the NEW state. This call is
52 // idempotent and can be executed multiple times, and is a no-op if the node
53 // is already in the STANDBY or even UP states.
54 //
55 // In the future, approval process will be governed by cluster policy, but
56 // currently any node can be approved by a manager, and the manager is
57 // responsible for performing an out-of-band attestation of the node being/
58 // approved (eg. by verifying that the node that is being approved has the
59 // same public key as what the registering node displays in its startup
60 // logs).
61 rpc ApproveNode(ApproveNodeRequest) returns (ApproveNodeResponse) {
62 option (metropolis.proto.ext.authorization) = {
63 need: PERMISSION_APPROVE_NODE
64 };
65 }
Mateusz Zalegabb2edbe2022-06-08 11:57:09 +020066
67 // UpdateNodeRoles updates a single node's roles.
68 rpc UpdateNodeRoles(UpdateNodeRolesRequest) returns (UpdateNodeRolesResponse) {
69 option (metropolis.proto.ext.authorization) = {
70 need: PERMISSION_UPDATE_NODE_ROLES
71 };
72 }
Serge Bazanski6bd41592021-08-23 13:18:37 +020073}
74
75message GetRegisterTicketRequest {
76}
77
78message GetRegisterTicketResponse {
79 // Opaque bytes that comprise the RegisterTicket.
80 bytes ticket = 1;
Serge Bazanski2893e982021-09-09 13:06:16 +020081}
Serge Bazanskibc671d02021-10-05 17:53:32 +020082
83message GetClusterInfoRequest {
84}
85
86message GetClusterInfoResponse {
87 // cluster_directory contains information about individual nodes in the
88 // cluster that can be used to dial the cluster's services.
89 metropolis.proto.common.ClusterDirectory cluster_directory = 1;
Serge Bazanski2f58ac02021-10-05 11:47:20 +020090
Serge Bazanskifbd38e22021-10-08 14:41:16 +020091 // ca_certificate is the x509 DER encoded CA certificate of the cluster.
92 bytes ca_certificate = 2;
Serge Bazanskibc671d02021-10-05 17:53:32 +020093}
Serge Bazanski56114472021-10-11 14:47:54 +020094
95message GetNodesRequest {
Mateusz Zalega955e46e2022-05-27 18:00:50 +020096 // filter is a CEL expression used to limit the count of GetNodes results.
97 // Each processed node protobuf message is exposed to the filter as
98 // "node" variable, while related state and health enum constants are
99 // anchored in the root namespace, eg. NODE_STATE_UP, or HEARTBEAT_TIMEOUT.
100 // A node is returned each time the expression is evaluated as true. If
101 // empty, all nodes are returned.
102 string filter = 1;
Serge Bazanski56114472021-10-11 14:47:54 +0200103}
104
105// Node in a Metropolis cluster, streamed by Management.GetNodes. For each node
106// in the cluster, this message will be emitted and will contain information
107// about that node.
108//
109// The fields contained are node fields that PERMISSION_READ_CLUSTER_STATUS
110// allows access to, ie. 'non-private' fields, ones that might be internal to
111// the cluster and possibly considered sensitive information about the
112// infrastructure, but whose knowledge does not allow to escalate privileges
113// within the cluster.
114message Node {
115 // Raw Ed25519 public key of this node, which can be used to generate
116 // the node's ID. This is always set.
117 bytes pubkey = 1;
Serge Bazanski30fd1542023-03-29 14:19:02 +0200118 // Node ID calculated from pubkey, ie. 'metropolis-123456'.
119 string id = 7;
Serge Bazanski56114472021-10-11 14:47:54 +0200120 // State of the node from the point of view of the cluster. This is
121 // always set.
122 metropolis.proto.common.NodeState state = 2;
123 // Last reported status by the Node, absent if a node hasn't yet reported
124 // its status.
125 metropolis.proto.common.NodeStatus status = 3;
126 // Roles assigned by the cluster. This is always set.
127 metropolis.proto.common.NodeRoles roles = 4;
Serge Bazanski1612d4b2021-11-12 13:54:15 +0100128
Mateusz Zalega32b19292022-05-17 13:26:55 +0200129 // Health describes node's health as seen from the cluster perspective.
130 enum Health {
131 INVALID = 0;
132 // UNKNOWN is used whenever there were no heartbeats received from a
133 // given node AND too little time has passed since last Curator leader
134 // election to know whether the node is actually timing out. UNKNOWN
135 // is also returned for nodes which NodeState does not equal
136 // NODE_STATE_UP.
137 UNKNOWN = 1;
138 // HEALTHY describes nodes that have sent a heartbeat recently.
139 HEALTHY = 2;
140 // HEARTBEAT_TIMEOUT describes nodes that have not sent a heartbeat in
141 // the interval specified by curator.HeartbeatTimeout.
142 HEARTBEAT_TIMEOUT = 3;
143 }
144 Health health = 5;
Mateusz Zalega2175ec92022-06-13 09:29:09 +0200145 // time_since_heartbeat is the duration since the last of the node's
146 // heartbeats was received, expressed in nanoseconds. It is only valid with
147 // the health status of either HEALTHY or HEARTBEAT_TIMEOUT.
Mateusz Zalega944cb532022-06-20 16:54:17 +0200148 google.protobuf.Duration time_since_heartbeat = 6;
Mateusz Zalega32b19292022-05-17 13:26:55 +0200149}
Serge Bazanski1612d4b2021-11-12 13:54:15 +0100150
151message ApproveNodeRequest {
152 // Raw public key of the node being approved, has to correspond to a node
153 // currently in the cluster.
154 bytes pubkey = 1;
155}
156
157message ApproveNodeResponse {
Mateusz Zalega32b19292022-05-17 13:26:55 +0200158}
Mateusz Zalegabb2edbe2022-06-08 11:57:09 +0200159
160// UpdateNodeRolesRequest updates roles of a single node matching pubkey. All
161// role fields are optional, and no change will result if they're either unset
162// or if their value matches existing state.
163message UpdateNodeRolesRequest {
Mateusz Zalega9c315f12022-08-11 16:31:22 +0200164 // node uniquely identifies the node subject to this request.
165 oneof node {
166 // pubkey is the Ed25519 public key of this node, which can be used to
167 // generate the node's ID.
168 bytes pubkey = 1;
169 // id is the human-readable identifier of the node, based on its public
170 // key.
171 string id = 4;
172 }
Mateusz Zalegabb2edbe2022-06-08 11:57:09 +0200173
Serge Bazanski15f7f632023-03-14 17:17:20 +0100174 // kubernetesController adjusts the appropriate role when set.
Mateusz Zalegabb2edbe2022-06-08 11:57:09 +0200175 optional bool kubernetesWorker = 2;
Serge Bazanski15f7f632023-03-14 17:17:20 +0100176 // kubernetesController adjusts the appropriate role when set. Nodes performing
177 // this role must also be consensus members.
178 optional bool kubernetesController = 5;
Mateusz Zalegabb2edbe2022-06-08 11:57:09 +0200179 optional bool consensusMember = 3;
180}
181
182message UpdateNodeRolesResponse {
183}
Serge Bazanskib40c0082023-03-29 14:28:04 +0200184
185// NodeManagement runs on every node of the cluster and providers management
186// and troubleshooting RPCs to operators. All requests must be authenticated.
187service NodeManagement {
Serge Bazanskib91938f2023-03-29 14:31:22 +0200188 rpc Logs(GetLogsRequest) returns (stream GetLogsResponse) {
189 option (metropolis.proto.ext.authorization) = {
190 need: PERMISSION_READ_NODE_LOGS
191 };
192 }
193}
194
195
196// Severity level corresponding to //metropolis/pkg/logtree.Severity.
197enum LeveledLogSeverity {
198 INVALID = 0;
199 INFO = 1;
200 WARNING = 2;
201 ERROR = 3;
202 FATAL = 4;
203}
204
205// Filter set when requesting logs for a given DN. This message is equivalent to
206// the following GADT enum:
207// data LogFilter = WithChildren
208// | OnlyRaw
209// | OnlyLeveled
210// | LeveledWithMinimumSeverity(Severity)
211//
212// Multiple LogFilters can be chained/combined when requesting logs, as long as
213// they do not conflict.
214message LogFilter {
215 // Entries will be returned not only for the given DN, but all child DNs as
216 // well. For instance, if the requested DN is foo, entries logged to foo,
217 // foo.bar and foo.bar.baz will all be returned.
218 message WithChildren {
219 }
220 // Only raw logging entries will be returned. Conflicts with OnlyLeveled
221 // filters.
222 message OnlyRaw {
223 }
224 // Only leveled logging entries will be returned. Conflicts with OnlyRaw
225 // filters.
226 message OnlyLeveled {
227 }
228 // If leveled logs are returned, all entries at severity lower than `minimum`
229 // will be discarded.
230 message LeveledWithMinimumSeverity {
231 LeveledLogSeverity minimum = 1;
232 }
233 oneof filter {
234 WithChildren with_children = 1;
235 OnlyRaw only_raw = 3;
236 OnlyLeveled only_leveled = 4;
237 LeveledWithMinimumSeverity leveled_with_minimum_severity = 5;
238 }
239}
240
241message GetLogsRequest {
242 // DN from which to request logs. All supervised runnables live at `root.`,
243 // the init code lives at `init.`.
244 string dn = 1;
245 // Filters to apply to returned data.
246 repeated LogFilter filters = 2;
247
248 enum BacklogMode {
249 BACKLOG_INVALID = 0;
250 // No historic data will be returned.
251 BACKLOG_DISABLE = 1;
252 // All available historic data will be returned.
253 BACKLOG_ALL = 2;
254 // At most backlog_count entries will be returned, if available.
255 BACKLOG_COUNT = 3;
256 }
257 BacklogMode backlog_mode = 3;
258 int64 backlog_count = 4;
259
260 enum StreamMode {
261 STREAM_INVALID = 0;
262 // No streaming entries, gRPC stream will be closed as soon as all backlog data is served.
263 STREAM_DISABLE = 1;
264 // Entries will be streamed as early as available right after all backlog data is served.
265 STREAM_UNBUFFERED = 2;
266 }
267 StreamMode stream_mode = 5;
268}
269
270message LogEntry {
271 message Leveled {
272 repeated string lines = 1;
273 google.protobuf.Timestamp timestamp = 2;
274 LeveledLogSeverity severity = 3;
275 string location = 4;
276 }
277 message Raw {
278 string data = 1;
279 int64 original_length = 2;
280 }
281
282 string dn = 1;
283 oneof kind {
284 Leveled leveled = 2;
285 Raw raw = 3;
286 }
287}
288
289message GetLogsResponse {
290 // Entries from the requested historical entries (via WithBackLog). They will all be served before the first
291 // stream_entries are served (if any).
292 repeated LogEntry backlog_entries = 1;
293 // Entries streamed as they arrive. Currently no server-side buffering is enabled, instead every line is served
294 // as early as it arrives. However, this might change in the future, so this behaviour cannot be depended
295 // upon.
296 repeated LogEntry stream_entries = 2;
Serge Bazanskib40c0082023-03-29 14:28:04 +0200297}