metropolis/node: export core/supervisor metrics
Change-Id: Ibe3be27f9a5b3fc5e36babecc74d7d784d1f5e10
Reviewed-on: https://review.monogon.dev/c/monogon/+/3292
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/cli/metroctl/cmd_node_metrics.go b/metropolis/cli/metroctl/cmd_node_metrics.go
index e445086..f94f020 100644
--- a/metropolis/cli/metroctl/cmd_node_metrics.go
+++ b/metropolis/cli/metroctl/cmd_node_metrics.go
@@ -28,6 +28,8 @@
A node ID and exporter must be provided. Currently available exporters are:
+ - core: metrics from the core process of the node (which contains the
+ supervision tree)
- node: node_exporter metrics for the node
- etcd: etcd metrics, if the node is running the cluster control plane
- kubernetes-scheduler, kubernetes-controller-manager, kubernetes-apiserver:
diff --git a/metropolis/node/core/BUILD.bazel b/metropolis/node/core/BUILD.bazel
index 47ee2c1..f881f04 100644
--- a/metropolis/node/core/BUILD.bazel
+++ b/metropolis/node/core/BUILD.bazel
@@ -28,6 +28,7 @@
"//metropolis/node/core/devmgr",
"//metropolis/node/core/localstorage",
"//metropolis/node/core/localstorage/declarative",
+ "//metropolis/node/core/metrics",
"//metropolis/node/core/mgmt",
"//metropolis/node/core/network",
"//metropolis/node/core/roleserve",
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index e9d1ad1..ede3478 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -31,6 +31,7 @@
"source.monogon.dev/metropolis/node/core/devmgr"
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/node/core/localstorage/declarative"
+ "source.monogon.dev/metropolis/node/core/metrics"
"source.monogon.dev/metropolis/node/core/network"
"source.monogon.dev/metropolis/node/core/roleserve"
"source.monogon.dev/metropolis/node/core/rpc/resolver"
@@ -221,6 +222,12 @@
return m.Run(ctx)
}
+ pm, err := supervisor.NewMetricsPrometheus(metrics.CoreRegistry)
+ if err != nil {
+ // Fatal, because this generally shouldn't happen.
+ logger.Fatalf("Failed to register supervisor metrics: %v", err)
+ }
+
// Start the init function in a one-shot runnable. Smuggle out any errors from
// the init function and stuff them into the fatal channel. This is where the
// system supervisor takes over as the main process management system.
@@ -232,11 +239,11 @@
select {}
}
return nil
- }, supervisor.WithExistingLogtree(lt))
+ }, supervisor.WithExistingLogtree(lt), supervisor.WithMetrics(pm))
// Meanwhile, wait for any fatal error from the init process, and handle it
// accordingly.
- err := <-fatal
+ err = <-fatal
// Log error with primary logging mechanism still active.
logger.Infof("Node startup failed: %v", err)
// Start shutting down the supervision tree...
diff --git a/metropolis/node/core/metrics/BUILD.bazel b/metropolis/node/core/metrics/BUILD.bazel
index 6385bb8..30d08a7 100644
--- a/metropolis/node/core/metrics/BUILD.bazel
+++ b/metropolis/node/core/metrics/BUILD.bazel
@@ -16,6 +16,8 @@
"//metropolis/node/core/curator/watcher",
"//metropolis/node/core/identity",
"//osbase/supervisor",
+ "@com_github_prometheus_client_golang//prometheus",
+ "@com_github_prometheus_client_golang//prometheus/promhttp",
],
)
diff --git a/metropolis/node/core/metrics/exporters.go b/metropolis/node/core/metrics/exporters.go
index 2dd2cfc..2e8c1df 100644
--- a/metropolis/node/core/metrics/exporters.go
+++ b/metropolis/node/core/metrics/exporters.go
@@ -5,21 +5,37 @@
"io"
"net/http"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/client_golang/prometheus/promhttp"
+
"source.monogon.dev/metropolis/node"
"source.monogon.dev/osbase/supervisor"
)
-// An Exporter is a Prometheus binary running under the Metrics service which
-// collects some metrics and exposes them on a locally bound TCP port.
+// An Exporter is a source of Prometheus metrics. There are two possible kinds of
+// exporters:
+//
+// 1. A binary running under the Metrics service which collects some metrics and
+// exposes them on a locally bound TCP port (either started by the Exporter or
+// already running as part of Metropolis).
+//
+// 2. An in-memory Prometheus registry/gatherer for metrics generated by the
+// Metropolis core process.
//
// The Metrics Service will forward requests from /metrics/<name> to the
// exporter.
type Exporter struct {
// Name of the exporter, which becomes part of the metrics URL for this exporter.
Name string
- // Port on which this exporter will be running.
+ // Gatherer, if provided, is a Prometheus registry (or other Gatherer) that will
+ // be queried for metrics for this exporter. Exactly one of Gatherer or Port must
+ // be set.
+ Gatherer prometheus.Gatherer
+ // Port on which an exporter is/will be running to which metrics requests will be
+ // proxied to. Exactly one of Gatherer or Port must be set.
Port node.Port
- // Executable to run to start the exporter.
+ // Executable to run to start the exporter. If empty, no executable will be
+ // started.
Executable string
// Arguments to start the exporter. The exporter should listen at 127.0.0.1 and
// the port specified by Port, and serve its metrics on /metrics.
@@ -28,9 +44,17 @@
Path string
}
+// CoreRegistry is the metrics registry that will be served at /core. All
+// prometheus metrics exported by the node core should register here.
+var CoreRegistry = prometheus.NewRegistry()
+
// DefaultExporters are the exporters which we run by default in Metropolis.
var DefaultExporters = []*Exporter{
{
+ Name: "core",
+ Gatherer: CoreRegistry,
+ },
+ {
Name: "node",
Port: node.MetricsNodeListenerPort,
Executable: "/metrics/bin/node_exporter",
@@ -71,12 +95,7 @@
},
}
-func (e *Exporter) ServeHTTP(w http.ResponseWriter, r *http.Request) {
- if r.Method != http.MethodGet {
- http.Error(w, fmt.Sprintf("method %q not allowed", r.Method), http.StatusMethodNotAllowed)
- return
- }
-
+func (e *Exporter) serveHTTPForward(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
// We are supplying the http.Server with a BaseContext that contains the
@@ -113,6 +132,27 @@
}
}
+func (e *Exporter) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ http.Error(w, fmt.Sprintf("method %q not allowed", r.Method), http.StatusMethodNotAllowed)
+ return
+ }
+
+ if e.Port != 0 {
+ e.serveHTTPForward(w, r)
+ return
+ }
+
+ if e.Gatherer != nil {
+ h := promhttp.HandlerFor(e.Gatherer, promhttp.HandlerOpts{})
+ h.ServeHTTP(w, r)
+ return
+ }
+
+ w.WriteHeader(500)
+ fmt.Fprintf(w, "invalid exporter configuration (no port, no gatherer)")
+}
+
func copyHeader(dst, src http.Header) {
for k, vv := range src {
for _, v := range vv {