blob: f6cabcb56383d9babbbcbda5c1f614042b1777f1 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Serge Bazanskidea7cd02023-04-26 13:58:17 +02004package wrapngo
5
6import (
7 "context"
8 "errors"
9 "fmt"
10 "net/http"
11 "regexp"
12 "strings"
13 "time"
14
15 "github.com/prometheus/client_golang/prometheus"
16 "k8s.io/klog/v2"
17)
18
19// metricsSet contains all the Prometheus metrics collected by wrapngo.
20type metricsSet struct {
21 requestLatencies *prometheus.HistogramVec
22 waiting prometheus.GaugeFunc
23 inFlight prometheus.GaugeFunc
24}
25
26func newMetricsSet(ser *serializer) *metricsSet {
27 return &metricsSet{
28 requestLatencies: prometheus.NewHistogramVec(
29 prometheus.HistogramOpts{
30 Name: "equinix_api_latency",
31 Help: "Equinix API request latency in seconds, partitioned by endpoint status code",
32 },
33 []string{"endpoint", "status_code"},
34 ),
35 waiting: prometheus.NewGaugeFunc(
36 prometheus.GaugeOpts{
37 Name: "equinix_api_waiting",
38 Help: "Number of API requests pending to be sent to Equinix but waiting on semaphore",
39 },
40 func() float64 {
41 _, waiting := ser.stats()
42 return float64(waiting)
43 },
44 ),
45 inFlight: prometheus.NewGaugeFunc(
46 prometheus.GaugeOpts{
47 Name: "equinix_api_in_flight",
48 Help: "Number of API requests currently being processed by Equinix",
49 },
50 func() float64 {
51 inFlight, _ := ser.stats()
52 return float64(inFlight)
53 },
54 ),
55 }
56}
57
58// getEndpointForPath converts from an Equinix API method and path (eg.
59// /metal/v1/devices/deadbeef) into an 'endpoint' name, which is an imaginary,
60// Monogon-specific name for the API endpoint accessed by this call.
61//
62// If the given path is unknown and thus cannot be converted to an endpoint name,
63// 'Unknown' is return and a warning is logged.
64//
65// We use this function to partition request statistics per API 'endpoint'. An
66// alternative to this would be to record high-level packngo function names, but
67// one packngo function call might actually emit multiple HTTP API requests - so
68// we're stuck recording the low-level requests and gathering statistics from
69// there instead.
70func getEndpointForPath(method, path string) string {
71 path = strings.TrimPrefix(path, "/metal/v1")
72 for name, match := range endpointNames {
73 if match.matches(method, path) {
74 return name
75 }
76 }
77 klog.Warningf("Unknown Equinix API %s %s - cannot determine metric endpoint name", method, path)
78 return "Unknown"
79}
80
81// requestMatch is used to match a HTTP request method/path.
82type requestMatch struct {
83 method string
84 regexp *regexp.Regexp
85}
86
87func (r *requestMatch) matches(method, path string) bool {
88 if r.method != method {
89 return false
90 }
91 return r.regexp.MatchString(path)
92}
93
94var (
95 endpointNames = map[string]requestMatch{
96 "GetDevice": {"GET", regexp.MustCompile(`^/devices/[^/]+$`)},
97 "ListDevices": {"GET", regexp.MustCompile(`^/(organizations|projects)/[^/]+/devices$`)},
98 "CreateDevice": {"POST", regexp.MustCompile(`^/projects/[^/]+/devices$`)},
Tim Windelschmidt7e1c4892023-06-20 12:07:02 +020099 "ListReservations": {"GET", regexp.MustCompile(`^/projects/[^/]+/hardware-reservations$`)},
Serge Bazanskidea7cd02023-04-26 13:58:17 +0200100 "ListSSHKeys": {"GET", regexp.MustCompile(`^/ssh-keys$`)},
101 "CreateSSHKey": {"POST", regexp.MustCompile(`^/project/[^/]+/ssh-keys$`)},
102 "GetSSHKey": {"GET", regexp.MustCompile(`^/ssh-keys/[^/]+$`)},
103 "UpdateSSHKey": {"PATCH", regexp.MustCompile(`^/ssh-keys/[^/]+$`)},
104 "PerformDeviceAction": {"POST", regexp.MustCompile(`^/devices/[^/]+/actions$`)},
105 }
106)
107
108// onAPIRequestDone is called by the wrapngo code on every API response from
109// Equinix, and records the given parameters into metrics.
110func (m *metricsSet) onAPIRequestDone(req *http.Request, res *http.Response, err error, latency time.Duration) {
111 if m == nil {
112 return
113 }
114
115 code := "unknown"
116 if err == nil {
117 code = fmt.Sprintf("%d", res.StatusCode)
118 } else {
119 switch {
120 case errors.Is(err, context.Canceled):
121 code = "ctx canceled"
122 case errors.Is(err, context.DeadlineExceeded):
123 code = "deadline exceeded"
124 }
125 }
126 if code == "unknown" {
127 klog.Warningf("Unexpected HTTP result: req %s %s, error: %v", req.Method, req.URL.Path, res)
128 }
129
130 endpoint := getEndpointForPath(req.Method, req.URL.Path)
131 m.requestLatencies.With(prometheus.Labels{"endpoint": endpoint, "status_code": code}).Observe(latency.Seconds())
132}