m/n/kubernetes: add metricsprovider

Kubernetes has a metrics provider interface, add an adapter to be able
to get these into our Prometheus registry. This code exists in a similar
form inside K8s but against their custom metrics architecture, not plain
Prometheus.

As these metrics are shared across all workqueues we follow K8s in
implementing this with a singleton/global. It's not the prettiest, but
otherwise we may get issues with Prometheus and duplicate metrics.

Change-Id: I0b6d608d14793e44859166a5a59d446c8f662a25
Reviewed-on: https://review.monogon.dev/c/monogon/+/3829
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/kubernetes/BUILD.bazel b/metropolis/node/kubernetes/BUILD.bazel
index 16f773c..636295c 100644
--- a/metropolis/node/kubernetes/BUILD.bazel
+++ b/metropolis/node/kubernetes/BUILD.bazel
@@ -27,9 +27,11 @@
         "//metropolis/node/core/curator/watcher",
         "//metropolis/node/core/identity",
         "//metropolis/node/core/localstorage",
+        "//metropolis/node/core/metrics",
         "//metropolis/node/core/network",
         "//metropolis/node/kubernetes/authproxy",
         "//metropolis/node/kubernetes/clusternet",
+        "//metropolis/node/kubernetes/metricsprovider",
         "//metropolis/node/kubernetes/metricsproxy",
         "//metropolis/node/kubernetes/nfproxy",
         "//metropolis/node/kubernetes/pki",
diff --git a/metropolis/node/kubernetes/metricsprovider/BUILD.bazel b/metropolis/node/kubernetes/metricsprovider/BUILD.bazel
new file mode 100644
index 0000000..4bba439
--- /dev/null
+++ b/metropolis/node/kubernetes/metricsprovider/BUILD.bazel
@@ -0,0 +1,12 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "metricsprovider",
+    srcs = ["metricsprovider.go"],
+    importpath = "source.monogon.dev/metropolis/node/kubernetes/metricsprovider",
+    visibility = ["//visibility:public"],
+    deps = [
+        "@com_github_prometheus_client_golang//prometheus",
+        "@io_k8s_client_go//util/workqueue",
+    ],
+)
diff --git a/metropolis/node/kubernetes/metricsprovider/metricsprovider.go b/metropolis/node/kubernetes/metricsprovider/metricsprovider.go
new file mode 100644
index 0000000..23aa254
--- /dev/null
+++ b/metropolis/node/kubernetes/metricsprovider/metricsprovider.go
@@ -0,0 +1,114 @@
+// Copyright The Monogon Project Authors.
+// Copyright 2019 The Kubernetes Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+// Package metricsprovider provides a Prometheus registry for code in K8s
+// client-go capable of providing metrics. Currently it registers itself
+// as a metrics backend for workqueues, more can be added in the future.
+// The registry with all the metrics is available as `Registry`.
+package metricsprovider
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+	"k8s.io/client-go/util/workqueue"
+)
+
+// Metrics subsystem and keys used by the workqueue.
+const (
+	WorkQueueSubsystem         = "workqueue"
+	DepthKey                   = "depth"
+	AddsKey                    = "adds_total"
+	QueueLatencyKey            = "queue_duration_seconds"
+	WorkDurationKey            = "work_duration_seconds"
+	UnfinishedWorkKey          = "unfinished_work_seconds"
+	LongestRunningProcessorKey = "longest_running_processor_seconds"
+	RetriesKey                 = "retries_total"
+)
+
+var Registry = prometheus.NewRegistry()
+
+var (
+	depth = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Subsystem: WorkQueueSubsystem,
+		Name:      DepthKey,
+		Help:      "Current depth of workqueue",
+	}, []string{"name"})
+
+	adds = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Subsystem: WorkQueueSubsystem,
+		Name:      AddsKey,
+		Help:      "Total number of adds handled by workqueue",
+	}, []string{"name"})
+
+	latency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Subsystem: WorkQueueSubsystem,
+		Name:      QueueLatencyKey,
+		Help:      "How long in seconds an item stays in the workqueue before being requested.",
+		Buckets:   prometheus.ExponentialBuckets(10e-9, 10, 10),
+	}, []string{"name"})
+
+	workDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Subsystem: WorkQueueSubsystem,
+		Name:      WorkDurationKey,
+		Help:      "How long in seconds processing an item from workqueue takes.",
+		Buckets:   prometheus.ExponentialBuckets(10e-9, 10, 10),
+	}, []string{"name"})
+
+	unfinished = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Subsystem: WorkQueueSubsystem,
+		Name:      UnfinishedWorkKey,
+		Help: "How many seconds of work has done that " +
+			"is in progress and hasn't been observed by work_duration. Large " +
+			"values indicate stuck threads. One can deduce the number of stuck " +
+			"threads by observing the rate at which this increases.",
+	}, []string{"name"})
+
+	longestRunningProcessor = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Subsystem: WorkQueueSubsystem,
+		Name:      LongestRunningProcessorKey,
+		Help: "How many seconds has the longest running " +
+			"processor for workqueue been running.",
+	}, []string{"name"})
+
+	retries = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Subsystem: WorkQueueSubsystem,
+		Name:      RetriesKey,
+		Help:      "Total number of retries handled by workqueue",
+	}, []string{"name"})
+)
+
+func init() {
+	Registry.MustRegister(depth, adds, latency, workDuration, unfinished, longestRunningProcessor, retries)
+	workqueue.SetProvider(&promProvider{})
+}
+
+type promProvider struct {
+}
+
+func (promProvider) NewDepthMetric(name string) workqueue.GaugeMetric {
+	return depth.WithLabelValues(name)
+}
+
+func (promProvider) NewAddsMetric(name string) workqueue.CounterMetric {
+	return adds.WithLabelValues(name)
+}
+
+func (promProvider) NewLatencyMetric(name string) workqueue.HistogramMetric {
+	return latency.WithLabelValues(name)
+}
+
+func (promProvider) NewWorkDurationMetric(name string) workqueue.HistogramMetric {
+	return workDuration.WithLabelValues(name)
+}
+
+func (promProvider) NewUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric {
+	return unfinished.WithLabelValues(name)
+}
+
+func (promProvider) NewLongestRunningProcessorSecondsMetric(name string) workqueue.SettableGaugeMetric {
+	return longestRunningProcessor.WithLabelValues(name)
+}
+
+func (promProvider) NewRetriesMetric(name string) workqueue.CounterMetric {
+	return retries.WithLabelValues(name)
+}
diff --git a/metropolis/node/kubernetes/service_worker.go b/metropolis/node/kubernetes/service_worker.go
index e65d39b..6f6633b 100644
--- a/metropolis/node/kubernetes/service_worker.go
+++ b/metropolis/node/kubernetes/service_worker.go
@@ -19,8 +19,10 @@
 	"source.monogon.dev/metropolis/node"
 	oclusternet "source.monogon.dev/metropolis/node/core/clusternet"
 	"source.monogon.dev/metropolis/node/core/localstorage"
+	"source.monogon.dev/metropolis/node/core/metrics"
 	"source.monogon.dev/metropolis/node/core/network"
 	"source.monogon.dev/metropolis/node/kubernetes/clusternet"
+	"source.monogon.dev/metropolis/node/kubernetes/metricsprovider"
 	"source.monogon.dev/metropolis/node/kubernetes/nfproxy"
 	kpki "source.monogon.dev/metropolis/node/kubernetes/pki"
 	"source.monogon.dev/metropolis/node/kubernetes/plugins/kvmdevice"
@@ -56,6 +58,8 @@
 }
 
 func (s *Worker) Run(ctx context.Context) error {
+	metrics.CoreRegistry.MustRegister(metricsprovider.Registry)
+	defer metrics.CoreRegistry.Unregister(metricsprovider.Registry)
 	// Run apiproxy, which load-balances connections from worker components to this
 	// cluster's api servers. This is necessary as we want to round-robin across all
 	// available apiservers, and Kubernetes components do not implement client-side