Serge Bazanski | c50f694 | 2023-04-24 18:27:22 +0200 | [diff] [blame] | 1 | // Package metrics implements a Prometheus metrics submission interface for BMDB |
| 2 | // client components. A Metrics object can be attached to a BMDB object, which |
| 3 | // will make all BMDB sessions/transactions/work statistics be submitted to that |
| 4 | // Metrics object. |
| 5 | package metrics |
| 6 | |
| 7 | import ( |
| 8 | "github.com/prometheus/client_golang/prometheus" |
| 9 | |
| 10 | "source.monogon.dev/cloud/bmaas/bmdb/model" |
| 11 | ) |
| 12 | |
| 13 | // Processor describes some cloud component and possibly sub-component which acts |
| 14 | // upon the BMDB. When starting a BMDB session, this Processor can be provided to |
| 15 | // contextualize the metrics emitted by this session. Because the selected |
| 16 | // Processor ends up directly as a Prometheus metric label, it must be |
| 17 | // low-cardinality - thus all possible values are defined as an enum here. If a |
| 18 | // Session is not configured with a Processor, the default (ProcessorUnknown) |
| 19 | // will be used. |
| 20 | type Processor string |
| 21 | |
| 22 | const ( |
| 23 | ProcessorUnknown Processor = "" |
| 24 | ProcessorShepherdInitializer Processor = "shepherd-initializer" |
| 25 | ProcessorShepherdProvisioner Processor = "shepherd-provisioner" |
| 26 | ProcessorShepherdRecoverer Processor = "shepherd-recoverer" |
| 27 | ProcessorShepherdUpdater Processor = "shepherd-updater" |
| 28 | ProcessorBMSRV Processor = "bmsrv" |
| 29 | ) |
| 30 | |
| 31 | // String returns the Prometheus label value for use with the 'processor' label |
| 32 | // key. |
| 33 | func (p Processor) String() string { |
| 34 | switch p { |
| 35 | case ProcessorUnknown: |
| 36 | return "unknown" |
| 37 | default: |
| 38 | return string(p) |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | // MetricsSet contains all the Prometheus metrics objects related to a BMDB |
| 43 | // client. |
| 44 | // |
| 45 | // The MetricsSet object is goroutine-safe. |
| 46 | // |
| 47 | // An empty MetricsSet object is not valid, and should be instead constructed |
| 48 | // using New. |
| 49 | // |
| 50 | // A nil MetricsSet object is valid and represents a no-op metrics recorder |
| 51 | // that's never collected. |
| 52 | type MetricsSet struct { |
| 53 | sessionStarted *prometheus.CounterVec |
| 54 | transactionExecuted *prometheus.CounterVec |
| 55 | transactionRetried *prometheus.CounterVec |
| 56 | transactionFailed *prometheus.CounterVec |
| 57 | workStarted *prometheus.CounterVec |
| 58 | workFinished *prometheus.CounterVec |
| 59 | } |
| 60 | |
| 61 | func processorCounter(name, help string, labels ...string) *prometheus.CounterVec { |
| 62 | labels = append([]string{"processor"}, labels...) |
| 63 | return prometheus.NewCounterVec( |
| 64 | prometheus.CounterOpts{ |
| 65 | Name: name, |
| 66 | Help: help, |
| 67 | }, |
| 68 | labels, |
| 69 | ) |
| 70 | } |
| 71 | |
| 72 | // New creates a new BMDB MetricsSet object which can be then attached to a BMDB |
| 73 | // object by calling BMDB.EnableMetrics on the MetricsSet object. |
| 74 | // |
| 75 | // The given registry must be a valid Prometheus registry, and all metrics |
| 76 | // contained in this MetricsSet object will be registered into it. |
| 77 | // |
| 78 | // The MetricsSet object can be shared between multiple BMDB object. |
| 79 | // |
| 80 | // The MetricsSet object is goroutine-safe. |
| 81 | func New(registry *prometheus.Registry) *MetricsSet { |
| 82 | m := &MetricsSet{ |
| 83 | sessionStarted: processorCounter("bmdb_session_started", "How many sessions this worker started"), |
| 84 | transactionExecuted: processorCounter("bmdb_transaction_executed", "How many transactions were performed by this worker"), |
| 85 | transactionRetried: processorCounter("bmdb_transaction_retried", "How many transaction retries were performed by this worker"), |
| 86 | transactionFailed: processorCounter("bmdb_transaction_failed", "How many transactions failed permanently on this worker"), |
| 87 | workStarted: processorCounter("bmdb_work_started", "How many work items were performed by this worker, partitioned by process", "process"), |
| 88 | workFinished: processorCounter("bmdb_work_finished", "How many work items were finished by this worker, partitioned by process and result", "process", "result"), |
| 89 | } |
| 90 | registry.MustRegister( |
| 91 | m.sessionStarted, |
| 92 | m.transactionExecuted, |
| 93 | m.transactionRetried, |
| 94 | m.transactionFailed, |
| 95 | m.workStarted, |
| 96 | m.workFinished, |
| 97 | ) |
| 98 | return m |
| 99 | } |
| 100 | |
| 101 | // ProcessorRecorder wraps a MetricsSet object with the context of some |
| 102 | // Processor. It exposes methods that record specific events into the managed |
| 103 | // Metrics. |
| 104 | // |
| 105 | // The ProcessorRecorder object is goroutine safe. |
| 106 | // |
| 107 | // An empty ProcessorRecorder object is not valid, and should be instead |
| 108 | // constructed using Metrics.Recorder. |
| 109 | // |
| 110 | // A nil ProcessorRecorder object is valid and represents a no-op metrics |
| 111 | // recorder. |
| 112 | type ProcessorRecorder struct { |
| 113 | m *MetricsSet |
| 114 | labels prometheus.Labels |
| 115 | } |
| 116 | |
| 117 | // Recorder builds a ProcessorRecorder for the given Metrics and a given |
| 118 | // Processor. |
| 119 | func (m *MetricsSet) Recorder(p Processor) *ProcessorRecorder { |
| 120 | if m == nil { |
| 121 | return nil |
| 122 | } |
| 123 | return &ProcessorRecorder{ |
| 124 | m: m, |
| 125 | labels: prometheus.Labels{ |
| 126 | "processor": p.String(), |
| 127 | }, |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | // OnTransactionStarted should be called any time a BMDB client starts or |
| 132 | // re-starts a BMDB Transaction. The attempt should either be '1' (for the first |
| 133 | // attempt) or a number larger than 1 for any subsequent attempt (i.e. retry) of |
| 134 | // a transaction. |
| 135 | func (r *ProcessorRecorder) OnTransactionStarted(attempt int64) { |
| 136 | if r == nil { |
| 137 | return |
| 138 | } |
| 139 | if attempt == 1 { |
| 140 | r.m.transactionExecuted.With(r.labels).Inc() |
| 141 | } else { |
| 142 | r.m.transactionRetried.With(r.labels).Inc() |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | // OnTransactionFailed should be called any time a BMDB client fails a |
| 147 | // BMDB Transaction permanently. |
| 148 | func (r *ProcessorRecorder) OnTransactionFailed() { |
| 149 | if r == nil { |
| 150 | return |
| 151 | } |
| 152 | r.m.transactionFailed.With(r.labels).Inc() |
| 153 | } |
| 154 | |
| 155 | // OnSessionStarted should be called any time a BMDB client opens a new BMDB |
| 156 | // Session. |
| 157 | func (r *ProcessorRecorder) OnSessionStarted() { |
| 158 | if r == nil { |
| 159 | return |
| 160 | } |
| 161 | r.m.sessionStarted.With(r.labels).Inc() |
| 162 | } |
| 163 | |
| 164 | // ProcessRecorder wraps a ProcessorRecorder with an additional model.Process. |
| 165 | // The resulting object can then record work-specific events. |
| 166 | // |
| 167 | // The PusherWithProcess object is goroutine-safe. |
| 168 | type ProcessRecorder struct { |
| 169 | *ProcessorRecorder |
| 170 | labels prometheus.Labels |
| 171 | } |
| 172 | |
| 173 | // WithProcess wraps a given Pusher with a Process. |
| 174 | // |
| 175 | // The resulting PusherWithProcess object is goroutine-safe. |
| 176 | func (r *ProcessorRecorder) WithProcess(process model.Process) *ProcessRecorder { |
| 177 | if r == nil { |
| 178 | return nil |
| 179 | } |
| 180 | return &ProcessRecorder{ |
| 181 | ProcessorRecorder: r, |
| 182 | labels: prometheus.Labels{ |
| 183 | "processor": r.labels["processor"], |
| 184 | "process": string(process), |
| 185 | }, |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | // OnWorkStarted should be called any time a BMDB client starts a new Work item. |
| 190 | func (r *ProcessRecorder) OnWorkStarted() { |
| 191 | if r == nil { |
| 192 | return |
| 193 | } |
| 194 | r.m.workStarted.With(r.labels).Inc() |
| 195 | } |
| 196 | |
| 197 | type WorkResult string |
| 198 | |
| 199 | const ( |
| 200 | WorkResultFinished WorkResult = "finished" |
| 201 | WorkResultCanceled WorkResult = "canceled" |
| 202 | WorkResultFailed WorkResult = "failed" |
| 203 | ) |
| 204 | |
| 205 | // OnWorkFinished should be called any time a BMDB client finishes, cancels or |
| 206 | // fails a Work item. |
| 207 | func (r *ProcessRecorder) OnWorkFinished(result WorkResult) { |
| 208 | if r == nil { |
| 209 | return |
| 210 | } |
| 211 | r.m.workFinished.MustCurryWith(r.labels).With(prometheus.Labels{"result": string(result)}).Inc() |
| 212 | } |