blob: 87f3104a46619229d6ffe199a0342f543b584bf7 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Serge Bazanskif9a8dcd2024-07-31 14:46:06 +00004package supervisor
5
6import (
Serge Bazanskif9a8dcd2024-07-31 14:46:06 +00007 "github.com/prometheus/client_golang/prometheus"
Tim Windelschmidt3c6183f2024-12-16 02:42:21 +01008 "github.com/prometheus/client_golang/prometheus/promauto"
Serge Bazanskif9a8dcd2024-07-31 14:46:06 +00009)
10
11// MetricsPrometheus is a Metrics implementation which exports the supervisor
12// metrics over some prometheus registry.
13//
14// This structure must be constructed with NewMetricsPrometheus.
15//
16// The metrics exported are:
17// - monogon_supervisor_dn_state_total
18// - monogon_superfisor_dn_state_transition_count
19type MetricsPrometheus struct {
20 exportedState *prometheus.GaugeVec
21 exportedEdge *prometheus.CounterVec
22 cachedState map[string]*NodeState
23}
24
25// NewMetricsPrometheus initializes Supervisor metrics in a prometheus registry
26// and return a Metrics instance to be used with WithMetrics.
27//
28// This should only be called once for a given registry.
Tim Windelschmidt3c6183f2024-12-16 02:42:21 +010029func NewMetricsPrometheus(registry *prometheus.Registry) *MetricsPrometheus {
30 factory := promauto.With(registry)
Serge Bazanskif9a8dcd2024-07-31 14:46:06 +000031 res := &MetricsPrometheus{
Tim Windelschmidt3c6183f2024-12-16 02:42:21 +010032 exportedState: factory.NewGaugeVec(prometheus.GaugeOpts{
Serge Bazanskif9a8dcd2024-07-31 14:46:06 +000033 Namespace: "monogon",
34 Subsystem: "supervisor",
35 Name: "dn_state_total",
36 Help: "Total count of supervisor runnables, broken up by DN and state",
37 }, []string{"dn", "state"}),
Tim Windelschmidt3c6183f2024-12-16 02:42:21 +010038 exportedEdge: factory.NewCounterVec(prometheus.CounterOpts{
Serge Bazanskif9a8dcd2024-07-31 14:46:06 +000039 Namespace: "monogon",
40 Subsystem: "supervisor",
41 Name: "dn_state_transition_count",
42 Help: "Total count of supervisor runnable state transitions, broken up by DN and (old_state, new_state) tuple",
43 ConstLabels: nil,
44 }, []string{"dn", "old_state", "new_state"}),
45 cachedState: make(map[string]*NodeState),
46 }
Tim Windelschmidt3c6183f2024-12-16 02:42:21 +010047 return res
Serge Bazanskif9a8dcd2024-07-31 14:46:06 +000048}
49
50func (m *MetricsPrometheus) exportState(dn string, state NodeState, value float64) {
51 m.exportedState.With(map[string]string{
52 "state": state.String(),
53 "dn": dn,
54 }).Set(value)
55}
56
57func (m *MetricsPrometheus) exportEdge(dn string, oldState, newState NodeState) {
58 m.exportedEdge.With(map[string]string{
59 "old_state": oldState.String(),
60 "new_state": newState.String(),
61 "dn": dn,
62 }).Inc()
63}
64
65func (m *MetricsPrometheus) NotifyNodeState(dn string, state NodeState) {
66 // Set all other exported states to zero, so that a given DN is only in a single
67 // state.
68 for _, st := range NodeStates {
69 if st == state {
70 continue
71 }
72 m.exportState(dn, st, 0.0)
73 }
74 // Export new state.
75 m.exportState(dn, state, 1.0)
76
77 // Export edge transition (assume previous state was Dead if this is the first
78 // time we see this DN).
79 previous := NodeStateDead
80 if m.cachedState[dn] != nil {
81 previous = *m.cachedState[dn]
82 }
83 m.exportEdge(dn, previous, state)
84 m.cachedState[dn] = &state
85}