blob: 80cb19529370700f0a4466d1880c64e2f8d10c0e [file] [log] [blame]
syntax = "proto3";
package metropolis.proto.api;
option go_package = "source.monogon.dev/metropolis/proto/api";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
import "metropolis/proto/common/common.proto";
import "metropolis/proto/ext/authorization.proto";
// Management service available to Cluster Managers, allowing operational work
// to be performed on the cluster (eg. adding nodes, retrieving information
// about a running cluster, etc.).
service Management {
// GetRegisterTicket retrieves the current RegisterTicket which is required
// for new nodes to register into the cluster. Presenting this ticket on
// registration does not automatically grant access to arbitrary node
// registration. Instead, it is used to guard the API surface of the
// Register RPC from potential denial of service attacks, and can be
// regenerated at any time in case it leaks.
rpc GetRegisterTicket(GetRegisterTicketRequest) returns (GetRegisterTicketResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_GET_REGISTER_TICKET
};
}
// GetClusterInfo retrieves publicly available summary information about
// this cluster, notably data required for nodes to register into a cluster
// or join it (other than the Register Ticket, which is gated by an
// additional permission).
rpc GetClusterInfo(GetClusterInfoRequest) returns (GetClusterInfoResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_READ_CLUSTER_STATUS
};
}
// GetNodes retrieves information about nodes in the cluster. Currently,
// it returns all available data about all nodes.
rpc GetNodes(GetNodesRequest) returns (stream Node) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_READ_CLUSTER_STATUS
};
}
// ApproveNode progresses a node's registration process by changing its state
// in the cluster from NEW to STANDBY, if not yet STANDBY. This is required
// for the node to fully become part of the cluster (ie. have an UP state),
// and is required to be called by a manager manually.
//
// Managers can find out what nodes require approval by performing
// a GetNodes call and filtering for nodes in the NEW state. This call is
// idempotent and can be executed multiple times, and is a no-op if the node
// is already in the STANDBY or even UP states.
//
// In the future, approval process will be governed by cluster policy, but
// currently any node can be approved by a manager, and the manager is
// responsible for performing an out-of-band attestation of the node being/
// approved (eg. by verifying that the node that is being approved has the
// same public key as what the registering node displays in its startup
// logs).
rpc ApproveNode(ApproveNodeRequest) returns (ApproveNodeResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_APPROVE_NODE
};
}
// UpdateNodeRoles updates a single node's roles.
rpc UpdateNodeRoles(UpdateNodeRolesRequest) returns (UpdateNodeRolesResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_UPDATE_NODE_ROLES
};
}
}
message GetRegisterTicketRequest {
}
message GetRegisterTicketResponse {
// Opaque bytes that comprise the RegisterTicket.
bytes ticket = 1;
}
message GetClusterInfoRequest {
}
message GetClusterInfoResponse {
// cluster_directory contains information about individual nodes in the
// cluster that can be used to dial the cluster's services.
metropolis.proto.common.ClusterDirectory cluster_directory = 1;
// ca_certificate is the x509 DER encoded CA certificate of the cluster.
bytes ca_certificate = 2;
metropolis.proto.common.ClusterConfiguration cluster_configuration = 3;
}
message GetNodesRequest {
// filter is a CEL expression used to limit the count of GetNodes results.
// Each processed node protobuf message is exposed to the filter as
// "node" variable, while related state and health enum constants are
// anchored in the root namespace, eg. NODE_STATE_UP, or HEARTBEAT_TIMEOUT.
// A node is returned each time the expression is evaluated as true. If
// empty, all nodes are returned.
string filter = 1;
}
// Node in a Metropolis cluster, streamed by Management.GetNodes. For each node
// in the cluster, this message will be emitted and will contain information
// about that node.
//
// The fields contained are node fields that PERMISSION_READ_CLUSTER_STATUS
// allows access to, ie. 'non-private' fields, ones that might be internal to
// the cluster and possibly considered sensitive information about the
// infrastructure, but whose knowledge does not allow to escalate privileges
// within the cluster.
message Node {
// Raw Ed25519 public key of this node, which can be used to generate
// the node's ID. This is always set.
bytes pubkey = 1;
// Node ID calculated from pubkey, ie. 'metropolis-123456'.
string id = 7;
// State of the node from the point of view of the cluster. This is
// always set.
metropolis.proto.common.NodeState state = 2;
// Last reported status by the Node, absent if a node hasn't yet reported
// its status.
metropolis.proto.common.NodeStatus status = 3;
// Roles assigned by the cluster. This is always set.
metropolis.proto.common.NodeRoles roles = 4;
// Health describes node's health as seen from the cluster perspective.
enum Health {
INVALID = 0;
// UNKNOWN is used whenever there were no heartbeats received from a
// given node AND too little time has passed since last Curator leader
// election to know whether the node is actually timing out. UNKNOWN
// is also returned for nodes which NodeState does not equal
// NODE_STATE_UP.
UNKNOWN = 1;
// HEALTHY describes nodes that have sent a heartbeat recently.
HEALTHY = 2;
// HEARTBEAT_TIMEOUT describes nodes that have not sent a heartbeat in
// the interval specified by curator.HeartbeatTimeout.
HEARTBEAT_TIMEOUT = 3;
}
Health health = 5;
// time_since_heartbeat is the duration since the last of the node's
// heartbeats was received, expressed in nanoseconds. It is only valid with
// the health status of either HEALTHY or HEARTBEAT_TIMEOUT.
google.protobuf.Duration time_since_heartbeat = 6;
// tpm_usage describes whether this node has a TPM 2.0 and whether it is
// being actively used as part of its membership in the Metropolis cluster.
//
// Currently, the TPM 2.0 is only used to seal the local part of the disk
// encryption key and the early join credentials of the node. Depending on
// future cluster configuration settings, this might also indicate that the
// node has actually passed high assurance hardware attestation against the
// cluster.
metropolis.proto.common.NodeTPMUsage tpm_usage = 8;
}
message ApproveNodeRequest {
// Raw public key of the node being approved, has to correspond to a node
// currently in the cluster.
bytes pubkey = 1;
}
message ApproveNodeResponse {
}
// UpdateNodeRolesRequest updates roles of a single node matching pubkey. All
// role fields are optional, and no change will result if they're either unset
// or if their value matches existing state.
message UpdateNodeRolesRequest {
// node uniquely identifies the node subject to this request.
oneof node {
// pubkey is the Ed25519 public key of this node, which can be used to
// generate the node's ID.
bytes pubkey = 1;
// id is the human-readable identifier of the node, based on its public
// key.
string id = 4;
}
// kubernetesController adjusts the appropriate role when set.
optional bool kubernetesWorker = 2;
// kubernetesController adjusts the appropriate role when set. Nodes performing
// this role must also be consensus members.
optional bool kubernetesController = 5;
optional bool consensusMember = 3;
}
message UpdateNodeRolesResponse {
}
// NodeManagement runs on every node of the cluster and providers management
// and troubleshooting RPCs to operators. All requests must be authenticated.
service NodeManagement {
// GetLogs Returns historical and/or streaming logs for a given DN with given
// filters from the system global LogTree.
//
// For more information about this API, see //metropolis/pkg/logtree. But, in
// summary:
// - All logging is performed to a DN (distinguished name), which is a
// dot-delimited string like foo.bar.baz.
// - Log entries can be either raw (coming from unstructured logging from
// an external service, like a running process) or leveled (emitted by
// Metropolis code with a source line, timestamp, and severity).
// - The DNs form a tree of logging nodes - and when requesting logs, a
// given subtree of DNs can be requested, instead of just a given DN.
// - All supervised processes live at `root.<supervisor DN>`. For more
// example paths, see the console logs of a running Metropolis node, or
// request all logs (at DN "").
//
rpc Logs(GetLogsRequest) returns (stream GetLogsResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_READ_NODE_LOGS
};
}
// UpdateNode updates the node operating system to a new version.
//
// Metropolis uses a side-by-side (A/B) update process. This method installs
// the OS from the given bundle into the inactive slot, activates that slot
// and then (optionally) reboots to activate it.
rpc UpdateNode(UpdateNodeRequest) returns (UpdateNodeResponse) {
option (metropolis.proto.ext.authorization) = {
need: PERMISSION_UPDATE_NODE
};
}
}
message GetLogsRequest {
// DN from which to request logs. All supervised runnables live at `root.`,
// the init code lives at `init.`.
string dn = 1;
// Filters to apply to returned data.
repeated metropolis.proto.common.LogFilter filters = 2;
enum BacklogMode {
BACKLOG_INVALID = 0;
// No historic data will be returned.
BACKLOG_DISABLE = 1;
// All available historic data will be returned.
BACKLOG_ALL = 2;
// At most backlog_count entries will be returned, if available.
BACKLOG_COUNT = 3;
}
BacklogMode backlog_mode = 3;
int64 backlog_count = 4;
enum StreamMode {
STREAM_INVALID = 0;
// No streaming entries, gRPC stream will be closed as soon as all backlog
// data is served.
STREAM_DISABLE = 1;
// Entries will be streamed as early as available right after all backlog
// data is served.
STREAM_UNBUFFERED = 2;
}
StreamMode stream_mode = 5;
}
message GetLogsResponse {
// Entries from the requested historical entries (via WithBackLog). They will
// all be served before the first stream_entries are served (if any).
repeated metropolis.proto.common.LogEntry backlog_entries = 1;
// Entries streamed as they arrive. Currently no server-side buffering is
// enabled, instead every line is served as early as it arrives. However, this
// might change in the future, so this behaviour cannot be depended upon.
repeated metropolis.proto.common.LogEntry stream_entries = 2;
}
message UpdateNodeRequest {
// An HTTPS URL to a Metropolis bundle containing the new OS to install.
string bundle_url = 1;
// If set, do not reboot the node after installation. This means the updated
// version will not be active until the node has been rebooted via another
// method.
bool no_reboot = 2;
}
message UpdateNodeResponse {}