m/n/core: implement node heartbeats

This change introduces cluster member node health monitoring by
implementing a bidirectional RPC stream the nodes will periodically
send their heartbeat updates through. Management.GetNodes call was
modified to include the new node health information.

Relevant data available through the management API is non-persistent,
and stored within current Curator leader's local state. As such, it
will become briefly unavailable in an event of leader re-election. The
information returned, however, is guaranteed to be correct.

Change-Id: I916ac48f496941a7decc09d672ecf72a914b0d88
Reviewed-on: https://review.monogon.dev/c/monogon/+/694
Reviewed-by: Sergiusz Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/proto/api/management.proto b/metropolis/proto/api/management.proto
index ba50849..88cab77 100644
--- a/metropolis/proto/api/management.proto
+++ b/metropolis/proto/api/management.proto
@@ -106,8 +106,28 @@
     metropolis.proto.common.NodeStatus status = 3;
     // Roles assigned by the cluster. This is always set.
     metropolis.proto.common.NodeRoles roles = 4;
-}
 
+    // Health describes node's health as seen from the cluster perspective.
+    enum Health {
+      INVALID = 0;
+      // UNKNOWN is used whenever there were no heartbeats received from a
+      // given node AND too little time has passed since last Curator leader
+      // election to know whether the node is actually timing out. UNKNOWN
+      // is also returned for nodes which NodeState does not equal
+      // NODE_STATE_UP.
+      UNKNOWN = 1;
+      // HEALTHY describes nodes that have sent a heartbeat recently.
+      HEALTHY = 2;
+      // HEARTBEAT_TIMEOUT describes nodes that have not sent a heartbeat in
+      // the interval specified by curator.HeartbeatTimeout.
+      HEARTBEAT_TIMEOUT = 3;
+    }
+    Health health = 5;
+    // heartbeat_timestamp is the duration since the last of the node's
+    // heartbeats was received, expressed in nanoseconds. It equals zero if no
+    // heartbeats were received.
+    int64 heartbeat_timestamp = 6;
+}
 
 message ApproveNodeRequest {
     // Raw public key of the node being approved, has to correspond to a node
@@ -116,4 +136,4 @@
 }
 
 message ApproveNodeResponse {
-}
\ No newline at end of file
+}