metropolis: implement Metrics Service
This is the first pass at a Metrics Service. It currently consists of an
HTTP reverse proxy which authenticates incoming connections using the
Cluster CA and certificates, and passes these connections over to a
locally running node_exporter.
In the future more exporters will be added, and we will likely also run
our own exporter for Metropolis-specific metrics.
Change-Id: Ibab52aa303965dd7d975f5035f411d1c56ad73e6
Reviewed-on: https://review.monogon.dev/c/monogon/+/1816
Tested-by: Jenkins CI
Reviewed-by: Leopold Schabel <leo@monogon.tech>
diff --git a/metropolis/node/core/roleserve/BUILD.bazel b/metropolis/node/core/roleserve/BUILD.bazel
index ab84395..3e68818 100644
--- a/metropolis/node/core/roleserve/BUILD.bazel
+++ b/metropolis/node/core/roleserve/BUILD.bazel
@@ -10,6 +10,7 @@
"worker_heartbeat.go",
"worker_hostsfile.go",
"worker_kubernetes.go",
+ "worker_metrics.go",
"worker_nodemgmt.go",
"worker_rolefetch.go",
"worker_statuspush.go",
@@ -24,6 +25,7 @@
"//metropolis/node/core/curator/proto/api",
"//metropolis/node/core/identity",
"//metropolis/node/core/localstorage",
+ "//metropolis/node/core/metrics",
"//metropolis/node/core/mgmt",
"//metropolis/node/core/network",
"//metropolis/node/core/network/hostsfile",
diff --git a/metropolis/node/core/roleserve/roleserve.go b/metropolis/node/core/roleserve/roleserve.go
index 68b9f59..f0fa273 100644
--- a/metropolis/node/core/roleserve/roleserve.go
+++ b/metropolis/node/core/roleserve/roleserve.go
@@ -93,6 +93,7 @@
nodeMgmt *workerNodeMgmt
clusternet *workerClusternet
hostsfile *workerHostsfile
+ metrics *workerMetrics
}
// New creates a Role Server services from a Config.
@@ -163,6 +164,10 @@
clusterDirectorySaved: &s.clusterDirectorySaved,
}
+ s.metrics = &workerMetrics{
+ curatorConnection: &s.CuratorConnection,
+ }
+
return s
}
@@ -232,6 +237,7 @@
supervisor.Run(ctx, "nodemgmt", s.nodeMgmt.run)
supervisor.Run(ctx, "clusternet", s.clusternet.run)
supervisor.Run(ctx, "hostsfile", s.hostsfile.run)
+ supervisor.Run(ctx, "metrics", s.metrics.run)
supervisor.Signal(ctx, supervisor.SignalHealthy)
<-ctx.Done()
diff --git a/metropolis/node/core/roleserve/worker_metrics.go b/metropolis/node/core/roleserve/worker_metrics.go
new file mode 100644
index 0000000..78c62d6
--- /dev/null
+++ b/metropolis/node/core/roleserve/worker_metrics.go
@@ -0,0 +1,34 @@
+package roleserve
+
+import (
+ "context"
+
+ "source.monogon.dev/metropolis/node/core/metrics"
+ "source.monogon.dev/metropolis/pkg/event/memory"
+ "source.monogon.dev/metropolis/pkg/supervisor"
+)
+
+// workerMetrics runs the Metrics Service, which runs local Prometheus collectors
+// (themselves usually instances of existing Prometheus Exporters running as
+// sub-processes), and a forwarding service that lets external users access them
+// over HTTPS using the Cluster CA.
+type workerMetrics struct {
+ curatorConnection *memory.Value[*curatorConnection]
+}
+
+func (s *workerMetrics) run(ctx context.Context) error {
+ w := s.curatorConnection.Watch()
+ defer w.Close()
+
+ supervisor.Logger(ctx).Infof("Waiting for curator connection")
+ cc, err := w.Get(ctx)
+ if err != nil {
+ return err
+ }
+ supervisor.Logger(ctx).Infof("Got curator connection, starting...")
+
+ svc := metrics.Service{
+ Credentials: cc.credentials,
+ }
+ return svc.Run(ctx)
+}