osbase/net/dns/kubernetes: add Kubernetes DNS handler

This adds a DNS server handler for Kubernetes DNS service discovery. It
is partially based on the CoreDNS Kubernetes plugin. The query handler
however is written completely from scratch. The handler in the CoreDNS
plugin is very weird; it first handles each query type separately, and
generates msg.Service objects which then need to be converted to dns
records. The new implementation is much simpler, and also more correct:
It handles ANY queries, and follows the rules for NXDOMAIN (If a name is
NXDOMAIN for one qtype, it is NXDOMAIN for all qtypes, and subdomains of
the name are also NXDOMAIN.)

Change-Id: Id1d498ca5384a3b047587ed73e95e4871d82d499
Reviewed-on: https://review.monogon.dev/c/monogon/+/3259
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/osbase/net/dns/kubernetes/metrics.go b/osbase/net/dns/kubernetes/metrics.go
new file mode 100644
index 0000000..83deeee
--- /dev/null
+++ b/osbase/net/dns/kubernetes/metrics.go
@@ -0,0 +1,44 @@
+package kubernetes
+
+// Taken and modified from the Kubernetes plugin of CoreDNS, under Apache 2.0.
+
+import (
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+
+	"source.monogon.dev/osbase/net/dns"
+)
+
+var (
+	// dnsProgrammingLatency is defined as the time it took to program a DNS
+	// instance - from the time a service or pod has changed to the time the
+	// change was propagated and was available to be served by a DNS server.
+	// The definition of this SLI can be found at https://github.com/kubernetes/community/blob/master/sig-scalability/slos/dns_programming_latency.md
+	// Note that the metrics is partially based on the time exported by the
+	// endpoints controller on the master machine. The measurement may be
+	// inaccurate if there is a clock drift between the node and master machine.
+	// The service_kind label can be one of:
+	//   * cluster_ip
+	//   * headless_with_selector
+	//   * headless_without_selector
+	dnsProgrammingLatency = dns.MetricsFactory.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "dnsserver",
+		Subsystem: "kubernetes",
+		Name:      "dns_programming_duration_seconds",
+		// From 1 millisecond to ~17 minutes.
+		Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
+		Help:    "In Cluster DNS Programming Latency in seconds",
+	}, []string{"service_kind"})
+)
+
+func recordDNSProgrammingLatency(lastChangeTriggerTime time.Time) {
+	if !lastChangeTriggerTime.IsZero() {
+		// If we're here it means that the Endpoints object is for a headless service
+		// and that the Endpoints object was created by the endpoints-controller
+		// (because the LastChangeTriggerTime annotation is set). It means that the
+		// corresponding service is a "headless service with selector".
+		dnsProgrammingLatency.WithLabelValues("headless_with_selector").
+			Observe(time.Since(lastChangeTriggerTime).Seconds())
+	}
+}