osbase/supervisor: implement Prometheus metrics
This is a simple implementation of supervisor.Metrics which will be used
in Metropolis.
Change-Id: I8d47b3aa631dec5b07295d5498b3b0d1ad36c0f7
Reviewed-on: https://review.monogon.dev/c/monogon/+/3291
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
Tested-by: Jenkins CI
diff --git a/osbase/supervisor/supervisor_metrics_prometheus.go b/osbase/supervisor/supervisor_metrics_prometheus.go
new file mode 100644
index 0000000..49ee973
--- /dev/null
+++ b/osbase/supervisor/supervisor_metrics_prometheus.go
@@ -0,0 +1,88 @@
+package supervisor
+
+import (
+ "fmt"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+// MetricsPrometheus is a Metrics implementation which exports the supervisor
+// metrics over some prometheus registry.
+//
+// This structure must be constructed with NewMetricsPrometheus.
+//
+// The metrics exported are:
+// - monogon_supervisor_dn_state_total
+// - monogon_superfisor_dn_state_transition_count
+type MetricsPrometheus struct {
+ exportedState *prometheus.GaugeVec
+ exportedEdge *prometheus.CounterVec
+ cachedState map[string]*NodeState
+}
+
+// NewMetricsPrometheus initializes Supervisor metrics in a prometheus registry
+// and return a Metrics instance to be used with WithMetrics.
+//
+// This should only be called once for a given registry.
+func NewMetricsPrometheus(registry *prometheus.Registry) (*MetricsPrometheus, error) {
+ res := &MetricsPrometheus{
+ exportedState: prometheus.NewGaugeVec(prometheus.GaugeOpts{
+ Namespace: "monogon",
+ Subsystem: "supervisor",
+ Name: "dn_state_total",
+ Help: "Total count of supervisor runnables, broken up by DN and state",
+ }, []string{"dn", "state"}),
+ exportedEdge: prometheus.NewCounterVec(prometheus.CounterOpts{
+ Namespace: "monogon",
+ Subsystem: "supervisor",
+ Name: "dn_state_transition_count",
+ Help: "Total count of supervisor runnable state transitions, broken up by DN and (old_state, new_state) tuple",
+ ConstLabels: nil,
+ }, []string{"dn", "old_state", "new_state"}),
+ cachedState: make(map[string]*NodeState),
+ }
+ if err := registry.Register(res.exportedState); err != nil {
+ return nil, fmt.Errorf("when registering dn_state_total: %w", err)
+ }
+ if err := registry.Register(res.exportedEdge); err != nil {
+ return nil, fmt.Errorf("when registering dn_state_transition_count: %w", err)
+ }
+ return res, nil
+}
+
+func (m *MetricsPrometheus) exportState(dn string, state NodeState, value float64) {
+ m.exportedState.With(map[string]string{
+ "state": state.String(),
+ "dn": dn,
+ }).Set(value)
+}
+
+func (m *MetricsPrometheus) exportEdge(dn string, oldState, newState NodeState) {
+ m.exportedEdge.With(map[string]string{
+ "old_state": oldState.String(),
+ "new_state": newState.String(),
+ "dn": dn,
+ }).Inc()
+}
+
+func (m *MetricsPrometheus) NotifyNodeState(dn string, state NodeState) {
+ // Set all other exported states to zero, so that a given DN is only in a single
+ // state.
+ for _, st := range NodeStates {
+ if st == state {
+ continue
+ }
+ m.exportState(dn, st, 0.0)
+ }
+ // Export new state.
+ m.exportState(dn, state, 1.0)
+
+ // Export edge transition (assume previous state was Dead if this is the first
+ // time we see this DN).
+ previous := NodeStateDead
+ if m.cachedState[dn] != nil {
+ previous = *m.cachedState[dn]
+ }
+ m.exportEdge(dn, previous, state)
+ m.cachedState[dn] = &state
+}