blob: 3c869a91e5e956fc73577af0a2ac6f53ea136889 [file] [log] [blame]
syntax = "proto3";
package metropolis.proto.api;
option go_package = "source.monogon.dev/metropolis/proto/api";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
import "metropolis/proto/common/common.proto";
import "metropolis/proto/ext/authorization.proto";
// Management service available to Cluster Managers, allowing operational work
// to be performed on the cluster (eg. adding nodes, retrieving information
// about a running cluster, etc.).
service Management {
// GetRegisterTicket retrieves the current RegisterTicket which is required
// for new nodes to register into the cluster. Presenting this ticket on
// registration does not automatically grant access to arbitrary node
// registration. Instead, it is used to guard the API surface of the
// Register RPC from potential denial of service attacks, and can be
// regenerated at any time in case it leaks.
rpc GetRegisterTicket(GetRegisterTicketRequest) returns (GetRegisterTicketResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_GET_REGISTER_TICKET
};
}
// GetClusterInfo retrieves publicly available summary information about
// this cluster, notably data required for nodes to register into a cluster
// or join it (other than the Register Ticket, which is gated by an
// additional permission).
rpc GetClusterInfo(GetClusterInfoRequest) returns (GetClusterInfoResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_READ_CLUSTER_STATUS
};
}
// GetNodes retrieves information about nodes in the cluster. Currently,
// it returns all available data about all nodes.
rpc GetNodes(GetNodesRequest) returns (stream Node) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_READ_CLUSTER_STATUS
};
}
// ApproveNode progresses a node's registration process by changing its state
// in the cluster from NEW to STANDBY, if not yet STANDBY. This is required
// for the node to fully become part of the cluster (ie. have an UP state),
// and is required to be called by a manager manually.
//
// Managers can find out what nodes require approval by performing
// a GetNodes call and filtering for nodes in the NEW state. This call is
// idempotent and can be executed multiple times, and is a no-op if the node
// is already in the STANDBY or even UP states.
//
// In the future, approval process will be governed by cluster policy, but
// currently any node can be approved by a manager, and the manager is
// responsible for performing an out-of-band attestation of the node being/
// approved (eg. by verifying that the node that is being approved has the
// same public key as what the registering node displays in its startup
// logs).
rpc ApproveNode(ApproveNodeRequest) returns (ApproveNodeResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_APPROVE_NODE
};
}
// UpdateNodeRoles updates a single node's roles.
rpc UpdateNodeRoles(UpdateNodeRolesRequest) returns (UpdateNodeRolesResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_UPDATE_NODE_ROLES
};
}
}
message GetRegisterTicketRequest {
}
message GetRegisterTicketResponse {
// Opaque bytes that comprise the RegisterTicket.
bytes ticket = 1;
}
message GetClusterInfoRequest {
}
message GetClusterInfoResponse {
// cluster_directory contains information about individual nodes in the
// cluster that can be used to dial the cluster's services.
metropolis.proto.common.ClusterDirectory cluster_directory = 1;
// ca_certificate is the x509 DER encoded CA certificate of the cluster.
bytes ca_certificate = 2;
}
message GetNodesRequest {
// filter is a CEL expression used to limit the count of GetNodes results.
// Each processed node protobuf message is exposed to the filter as
// "node" variable, while related state and health enum constants are
// anchored in the root namespace, eg. NODE_STATE_UP, or HEARTBEAT_TIMEOUT.
// A node is returned each time the expression is evaluated as true. If
// empty, all nodes are returned.
string filter = 1;
}
// Node in a Metropolis cluster, streamed by Management.GetNodes. For each node
// in the cluster, this message will be emitted and will contain information
// about that node.
//
// The fields contained are node fields that PERMISSION_READ_CLUSTER_STATUS
// allows access to, ie. 'non-private' fields, ones that might be internal to
// the cluster and possibly considered sensitive information about the
// infrastructure, but whose knowledge does not allow to escalate privileges
// within the cluster.
message Node {
// Raw Ed25519 public key of this node, which can be used to generate
// the node's ID. This is always set.
bytes pubkey = 1;
// Node ID calculated from pubkey, ie. 'metropolis-123456'.
string id = 7;
// State of the node from the point of view of the cluster. This is
// always set.
metropolis.proto.common.NodeState state = 2;
// Last reported status by the Node, absent if a node hasn't yet reported
// its status.
metropolis.proto.common.NodeStatus status = 3;
// Roles assigned by the cluster. This is always set.
metropolis.proto.common.NodeRoles roles = 4;
// Health describes node's health as seen from the cluster perspective.
enum Health {
INVALID = 0;
// UNKNOWN is used whenever there were no heartbeats received from a
// given node AND too little time has passed since last Curator leader
// election to know whether the node is actually timing out. UNKNOWN
// is also returned for nodes which NodeState does not equal
// NODE_STATE_UP.
UNKNOWN = 1;
// HEALTHY describes nodes that have sent a heartbeat recently.
HEALTHY = 2;
// HEARTBEAT_TIMEOUT describes nodes that have not sent a heartbeat in
// the interval specified by curator.HeartbeatTimeout.
HEARTBEAT_TIMEOUT = 3;
}
Health health = 5;
// time_since_heartbeat is the duration since the last of the node's
// heartbeats was received, expressed in nanoseconds. It is only valid with
// the health status of either HEALTHY or HEARTBEAT_TIMEOUT.
google.protobuf.Duration time_since_heartbeat = 6;
}
message ApproveNodeRequest {
// Raw public key of the node being approved, has to correspond to a node
// currently in the cluster.
bytes pubkey = 1;
}
message ApproveNodeResponse {
}
// UpdateNodeRolesRequest updates roles of a single node matching pubkey. All
// role fields are optional, and no change will result if they're either unset
// or if their value matches existing state.
message UpdateNodeRolesRequest {
// node uniquely identifies the node subject to this request.
oneof node {
// pubkey is the Ed25519 public key of this node, which can be used to
// generate the node's ID.
bytes pubkey = 1;
// id is the human-readable identifier of the node, based on its public
// key.
string id = 4;
}
// kubernetesController adjusts the appropriate role when set.
optional bool kubernetesWorker = 2;
// kubernetesController adjusts the appropriate role when set. Nodes performing
// this role must also be consensus members.
optional bool kubernetesController = 5;
optional bool consensusMember = 3;
}
message UpdateNodeRolesResponse {
}
// NodeManagement runs on every node of the cluster and providers management
// and troubleshooting RPCs to operators. All requests must be authenticated.
service NodeManagement {
rpc Logs(GetLogsRequest) returns (stream GetLogsResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_READ_NODE_LOGS
};
}
}
// Severity level corresponding to //metropolis/pkg/logtree.Severity.
enum LeveledLogSeverity {
INVALID = 0;
INFO = 1;
WARNING = 2;
ERROR = 3;
FATAL = 4;
}
// Filter set when requesting logs for a given DN. This message is equivalent to
// the following GADT enum:
// data LogFilter = WithChildren
// | OnlyRaw
// | OnlyLeveled
// | LeveledWithMinimumSeverity(Severity)
//
// Multiple LogFilters can be chained/combined when requesting logs, as long as
// they do not conflict.
message LogFilter {
// Entries will be returned not only for the given DN, but all child DNs as
// well. For instance, if the requested DN is foo, entries logged to foo,
// foo.bar and foo.bar.baz will all be returned.
message WithChildren {
}
// Only raw logging entries will be returned. Conflicts with OnlyLeveled
// filters.
message OnlyRaw {
}
// Only leveled logging entries will be returned. Conflicts with OnlyRaw
// filters.
message OnlyLeveled {
}
// If leveled logs are returned, all entries at severity lower than `minimum`
// will be discarded.
message LeveledWithMinimumSeverity {
LeveledLogSeverity minimum = 1;
}
oneof filter {
WithChildren with_children = 1;
OnlyRaw only_raw = 3;
OnlyLeveled only_leveled = 4;
LeveledWithMinimumSeverity leveled_with_minimum_severity = 5;
}
}
message GetLogsRequest {
// DN from which to request logs. All supervised runnables live at `root.`,
// the init code lives at `init.`.
string dn = 1;
// Filters to apply to returned data.
repeated LogFilter filters = 2;
enum BacklogMode {
BACKLOG_INVALID = 0;
// No historic data will be returned.
BACKLOG_DISABLE = 1;
// All available historic data will be returned.
BACKLOG_ALL = 2;
// At most backlog_count entries will be returned, if available.
BACKLOG_COUNT = 3;
}
BacklogMode backlog_mode = 3;
int64 backlog_count = 4;
enum StreamMode {
STREAM_INVALID = 0;
// No streaming entries, gRPC stream will be closed as soon as all backlog data is served.
STREAM_DISABLE = 1;
// Entries will be streamed as early as available right after all backlog data is served.
STREAM_UNBUFFERED = 2;
}
StreamMode stream_mode = 5;
}
message LogEntry {
message Leveled {
repeated string lines = 1;
google.protobuf.Timestamp timestamp = 2;
LeveledLogSeverity severity = 3;
string location = 4;
}
message Raw {
string data = 1;
int64 original_length = 2;
}
string dn = 1;
oneof kind {
Leveled leveled = 2;
Raw raw = 3;
}
}
message GetLogsResponse {
// Entries from the requested historical entries (via WithBackLog). They will all be served before the first
// stream_entries are served (if any).
repeated LogEntry backlog_entries = 1;
// Entries streamed as they arrive. Currently no server-side buffering is enabled, instead every line is served
// as early as it arrives. However, this might change in the future, so this behaviour cannot be depended
// upon.
repeated LogEntry stream_entries = 2;
}