osbase/supervisor: implement Metrics API

This is a base building block for exporting per-DN/runnable status from
the supervisor into an external system. A sample implementation is
provided which can be used in simple debug facilities to inspect the
current supervision tree.

A follow-up change will use the same API to implement Prometheus
metrics.

Change-Id: I0d586b03a397a3ccf8dac2d8043b9dd2f319be4e
Reviewed-on: https://review.monogon.dev/c/monogon/+/3290
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/osbase/supervisor/supervisor_processor.go b/osbase/supervisor/supervisor_processor.go
index 2a01cf7..6304b09 100644
--- a/osbase/supervisor/supervisor_processor.go
+++ b/osbase/supervisor/supervisor_processor.go
@@ -230,6 +230,10 @@
 	defer s.mu.Unlock()
 
 	n := s.nodeByDN(r.dn)
+	if n.state != NodeStateNew {
+		panic("programming error: scheduled node not new")
+	}
+	s.metrics.NotifyNodeState(r.dn, n.state)
 	go func() {
 		if !s.propagatePanic {
 			defer func() {
@@ -268,6 +272,7 @@
 
 	// Simple case: it was marked as Done and quit with no error.
 	if n.state == NodeStateDone && r.err == nil {
+		s.metrics.NotifyNodeState(r.dn, n.state)
 		// Do nothing. This was supposed to happen. Keep the process as DONE.
 		return
 	}
@@ -277,6 +282,7 @@
 	if r.err != nil && ctx.Err() != nil && errors.Is(r.err, ctx.Err()) {
 		// Mark the node as canceled successfully.
 		n.state = NodeStateCanceled
+		s.metrics.NotifyNodeState(r.dn, n.state)
 		return
 	}
 
@@ -291,6 +297,7 @@
 	s.ilogger.Errorf("%s: %v", n.dn(), err)
 	// Mark as dead.
 	n.state = NodeStateDead
+	s.metrics.NotifyNodeState(r.dn, n.state)
 
 	// Cancel that node's context, just in case something still depends on it.
 	n.ctxC()