blob: 242ac2d0745abba3323f86a2b1cbdef9f39bb9ca [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Serge Bazanskic50f6942023-04-24 18:27:22 +02004// Package metrics implements a Prometheus metrics submission interface for BMDB
5// client components. A Metrics object can be attached to a BMDB object, which
6// will make all BMDB sessions/transactions/work statistics be submitted to that
7// Metrics object.
8package metrics
9
10import (
11 "github.com/prometheus/client_golang/prometheus"
12
13 "source.monogon.dev/cloud/bmaas/bmdb/model"
14)
15
16// Processor describes some cloud component and possibly sub-component which acts
17// upon the BMDB. When starting a BMDB session, this Processor can be provided to
18// contextualize the metrics emitted by this session. Because the selected
19// Processor ends up directly as a Prometheus metric label, it must be
20// low-cardinality - thus all possible values are defined as an enum here. If a
21// Session is not configured with a Processor, the default (ProcessorUnknown)
22// will be used.
23type Processor string
24
25const (
26 ProcessorUnknown Processor = ""
27 ProcessorShepherdInitializer Processor = "shepherd-initializer"
28 ProcessorShepherdProvisioner Processor = "shepherd-provisioner"
29 ProcessorShepherdRecoverer Processor = "shepherd-recoverer"
30 ProcessorShepherdUpdater Processor = "shepherd-updater"
31 ProcessorBMSRV Processor = "bmsrv"
Serge Bazanski6f599512023-04-26 19:08:19 +020032 ProcessorScruffyStats Processor = "scruffy-stats"
Serge Bazanskic50f6942023-04-24 18:27:22 +020033)
34
35// String returns the Prometheus label value for use with the 'processor' label
36// key.
37func (p Processor) String() string {
38 switch p {
39 case ProcessorUnknown:
40 return "unknown"
41 default:
42 return string(p)
43 }
44}
45
46// MetricsSet contains all the Prometheus metrics objects related to a BMDB
47// client.
48//
49// The MetricsSet object is goroutine-safe.
50//
51// An empty MetricsSet object is not valid, and should be instead constructed
52// using New.
53//
54// A nil MetricsSet object is valid and represents a no-op metrics recorder
55// that's never collected.
56type MetricsSet struct {
57 sessionStarted *prometheus.CounterVec
58 transactionExecuted *prometheus.CounterVec
59 transactionRetried *prometheus.CounterVec
60 transactionFailed *prometheus.CounterVec
61 workStarted *prometheus.CounterVec
62 workFinished *prometheus.CounterVec
63}
64
65func processorCounter(name, help string, labels ...string) *prometheus.CounterVec {
66 labels = append([]string{"processor"}, labels...)
67 return prometheus.NewCounterVec(
68 prometheus.CounterOpts{
69 Name: name,
70 Help: help,
71 },
72 labels,
73 )
74}
75
76// New creates a new BMDB MetricsSet object which can be then attached to a BMDB
77// object by calling BMDB.EnableMetrics on the MetricsSet object.
78//
79// The given registry must be a valid Prometheus registry, and all metrics
80// contained in this MetricsSet object will be registered into it.
81//
82// The MetricsSet object can be shared between multiple BMDB object.
83//
84// The MetricsSet object is goroutine-safe.
85func New(registry *prometheus.Registry) *MetricsSet {
86 m := &MetricsSet{
87 sessionStarted: processorCounter("bmdb_session_started", "How many sessions this worker started"),
88 transactionExecuted: processorCounter("bmdb_transaction_executed", "How many transactions were performed by this worker"),
89 transactionRetried: processorCounter("bmdb_transaction_retried", "How many transaction retries were performed by this worker"),
90 transactionFailed: processorCounter("bmdb_transaction_failed", "How many transactions failed permanently on this worker"),
91 workStarted: processorCounter("bmdb_work_started", "How many work items were performed by this worker, partitioned by process", "process"),
92 workFinished: processorCounter("bmdb_work_finished", "How many work items were finished by this worker, partitioned by process and result", "process", "result"),
93 }
94 registry.MustRegister(
95 m.sessionStarted,
96 m.transactionExecuted,
97 m.transactionRetried,
98 m.transactionFailed,
99 m.workStarted,
100 m.workFinished,
101 )
102 return m
103}
104
105// ProcessorRecorder wraps a MetricsSet object with the context of some
106// Processor. It exposes methods that record specific events into the managed
107// Metrics.
108//
109// The ProcessorRecorder object is goroutine safe.
110//
111// An empty ProcessorRecorder object is not valid, and should be instead
112// constructed using Metrics.Recorder.
113//
114// A nil ProcessorRecorder object is valid and represents a no-op metrics
115// recorder.
116type ProcessorRecorder struct {
117 m *MetricsSet
118 labels prometheus.Labels
119}
120
121// Recorder builds a ProcessorRecorder for the given Metrics and a given
122// Processor.
123func (m *MetricsSet) Recorder(p Processor) *ProcessorRecorder {
124 if m == nil {
125 return nil
126 }
127 return &ProcessorRecorder{
128 m: m,
129 labels: prometheus.Labels{
130 "processor": p.String(),
131 },
132 }
133}
134
135// OnTransactionStarted should be called any time a BMDB client starts or
136// re-starts a BMDB Transaction. The attempt should either be '1' (for the first
137// attempt) or a number larger than 1 for any subsequent attempt (i.e. retry) of
138// a transaction.
139func (r *ProcessorRecorder) OnTransactionStarted(attempt int64) {
140 if r == nil {
141 return
142 }
143 if attempt == 1 {
144 r.m.transactionExecuted.With(r.labels).Inc()
145 } else {
146 r.m.transactionRetried.With(r.labels).Inc()
147 }
148}
149
150// OnTransactionFailed should be called any time a BMDB client fails a
151// BMDB Transaction permanently.
152func (r *ProcessorRecorder) OnTransactionFailed() {
153 if r == nil {
154 return
155 }
156 r.m.transactionFailed.With(r.labels).Inc()
157}
158
159// OnSessionStarted should be called any time a BMDB client opens a new BMDB
160// Session.
161func (r *ProcessorRecorder) OnSessionStarted() {
162 if r == nil {
163 return
164 }
165 r.m.sessionStarted.With(r.labels).Inc()
166}
167
168// ProcessRecorder wraps a ProcessorRecorder with an additional model.Process.
169// The resulting object can then record work-specific events.
170//
171// The PusherWithProcess object is goroutine-safe.
172type ProcessRecorder struct {
173 *ProcessorRecorder
174 labels prometheus.Labels
175}
176
177// WithProcess wraps a given Pusher with a Process.
178//
179// The resulting PusherWithProcess object is goroutine-safe.
180func (r *ProcessorRecorder) WithProcess(process model.Process) *ProcessRecorder {
181 if r == nil {
182 return nil
183 }
184 return &ProcessRecorder{
185 ProcessorRecorder: r,
186 labels: prometheus.Labels{
187 "processor": r.labels["processor"],
188 "process": string(process),
189 },
190 }
191}
192
193// OnWorkStarted should be called any time a BMDB client starts a new Work item.
194func (r *ProcessRecorder) OnWorkStarted() {
195 if r == nil {
196 return
197 }
198 r.m.workStarted.With(r.labels).Inc()
199}
200
201type WorkResult string
202
203const (
204 WorkResultFinished WorkResult = "finished"
205 WorkResultCanceled WorkResult = "canceled"
206 WorkResultFailed WorkResult = "failed"
207)
208
209// OnWorkFinished should be called any time a BMDB client finishes, cancels or
210// fails a Work item.
211func (r *ProcessRecorder) OnWorkFinished(result WorkResult) {
212 if r == nil {
213 return
214 }
215 r.m.workFinished.MustCurryWith(r.labels).With(prometheus.Labels{"result": string(result)}).Inc()
216}