metropolis/node/core/metrics: expose etcd metrics
Change-Id: Ie916d497b44c05ab51b13d0bb14f4e850291a77e
Reviewed-on: https://review.monogon.dev/c/monogon/+/1950
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/consensus/BUILD.bazel b/metropolis/node/core/consensus/BUILD.bazel
index 693f789..359b6dc 100644
--- a/metropolis/node/core/consensus/BUILD.bazel
+++ b/metropolis/node/core/consensus/BUILD.bazel
@@ -48,6 +48,7 @@
         "//metropolis/pkg/logbuffer",
         "//metropolis/pkg/logtree",
         "//metropolis/pkg/supervisor",
+        "//metropolis/test/util",
         "@com_github_google_go_cmp//cmp",
     ],
 )
diff --git a/metropolis/node/core/consensus/configuration.go b/metropolis/node/core/consensus/configuration.go
index 6c07ee7..f7fe954 100644
--- a/metropolis/node/core/consensus/configuration.go
+++ b/metropolis/node/core/consensus/configuration.go
@@ -75,6 +75,8 @@
 	externalPort int
 	// externalAddress overrides the address of the node, which is usually its ID.
 	externalAddress string
+	// etcdMetricsPort overrides the default etcd metrics port used by the node.
+	etcdMetricsPort int
 }
 
 // build takes a Config and returns an etcd embed.Config.
@@ -94,6 +96,10 @@
 		host = c.testOverrides.externalAddress
 		extraNames = append(extraNames, host)
 	}
+	etcdPort := int(node.MetricsEtcdListenerPort)
+	if p := c.testOverrides.etcdMetricsPort; p != 0 {
+		etcdPort = p
+	}
 
 	cfg := embed.NewConfig()
 
@@ -102,6 +108,9 @@
 	cfg.InitialClusterToken = "METROPOLIS"
 	cfg.Logger = "zap"
 	cfg.LogOutputs = []string{c.Ephemeral.ServerLogsFIFO.FullPath()}
+	cfg.ListenMetricsUrls = []url.URL{
+		{Scheme: "http", Host: net.JoinHostPort("127.0.0.1", fmt.Sprintf("%d", etcdPort))},
+	}
 
 	cfg.Dir = c.Data.Data.FullPath()
 
diff --git a/metropolis/node/core/consensus/consensus_test.go b/metropolis/node/core/consensus/consensus_test.go
index b11a053..06d5d1b 100644
--- a/metropolis/node/core/consensus/consensus_test.go
+++ b/metropolis/node/core/consensus/consensus_test.go
@@ -21,12 +21,16 @@
 	"context"
 	"crypto/ed25519"
 	"crypto/rand"
+	"fmt"
+	"net/http"
 	"os"
 	"testing"
+	"time"
 
 	"source.monogon.dev/metropolis/node/core/localstorage"
 	"source.monogon.dev/metropolis/node/core/localstorage/declarative"
 	"source.monogon.dev/metropolis/pkg/supervisor"
+	"source.monogon.dev/metropolis/test/util"
 )
 
 type boilerplate struct {
@@ -74,6 +78,43 @@
 	os.RemoveAll(b.tmpdir)
 }
 
+func TestEtcdMetrics(t *testing.T) {
+	b := prep(t)
+	defer b.close()
+	etcd := New(Config{
+		Data:           &b.root.Data.Etcd,
+		Ephemeral:      &b.root.Ephemeral.Consensus,
+		NodePrivateKey: b.privkey,
+		testOverrides: testOverrides{
+			externalPort:    2345,
+			etcdMetricsPort: 4100,
+		},
+	})
+
+	ctxC, _ := supervisor.TestHarness(t, etcd.Run)
+	defer ctxC()
+
+	ctx, ctxC := context.WithCancel(context.Background())
+	defer ctxC()
+
+	util.TestEventual(t, "metrics-reachable", ctx, 10*time.Second, func(ctx context.Context) error {
+		req, err := http.NewRequestWithContext(ctx, "GET", "http://localhost:4100/metrics", nil)
+		if err != nil {
+			return err
+		}
+		resp, err := http.DefaultClient.Do(req)
+		if err != nil {
+			return fmt.Errorf("Get: %w", err)
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode != http.StatusOK {
+			return fmt.Errorf("StatusCode: wanted 200, got %d", resp.StatusCode)
+		}
+		return nil
+	})
+}
+
 func TestBootstrap(t *testing.T) {
 	b := prep(t)
 	defer b.close()
@@ -214,6 +255,7 @@
 		testOverrides: testOverrides{
 			externalPort:    3000,
 			externalAddress: "localhost",
+			etcdMetricsPort: 3100,
 		},
 	})
 	ctxC, _ := supervisor.TestHarness(t, etcd.Run)
@@ -253,6 +295,7 @@
 		testOverrides: testOverrides{
 			externalPort:    3001,
 			externalAddress: "localhost",
+			etcdMetricsPort: 3101,
 		},
 	})
 	ctxC, _ = supervisor.TestHarness(t, etcd2.Run)
diff --git a/metropolis/node/core/metrics/exporters.go b/metropolis/node/core/metrics/exporters.go
index 2d2a74b..2cbe18c 100644
--- a/metropolis/node/core/metrics/exporters.go
+++ b/metropolis/node/core/metrics/exporters.go
@@ -42,6 +42,10 @@
 			"--collector.filesystem.mount-points-exclude=^/(dev|proc|sys|data/kubernetes/kubelet/pods/.+|tmp/.+|ephermal/containerd/.+)($|/)",
 		},
 	},
+	{
+		Name: "etcd",
+		Port: node.MetricsEtcdListenerPort,
+	},
 }
 
 // forward a given HTTP request to this exporter.
diff --git a/metropolis/node/ports.go b/metropolis/node/ports.go
index afa4b1a..40c106f 100644
--- a/metropolis/node/ports.go
+++ b/metropolis/node/ports.go
@@ -47,6 +47,10 @@
 	// runs, bound to 127.0.0.1. The Metrics Service proxies traffic to it from the
 	// public MetricsPort.
 	MetricsNodeListenerPort Port = 7841
+	// MetricsEtcdListenerPort is the TCP port on which the etcd exporter
+	// runs, bound to 127.0.0.1. The Metrics Service proxies traffic to it from the
+	// public MetricsPort.
+	MetricsEtcdListenerPort Port = 7842
 	// KubernetesAPIPort is the TCP port on which the Kubernetes API is
 	// exposed.
 	KubernetesAPIPort Port = 6443
@@ -70,6 +74,7 @@
 	NodeManagement,
 	MetricsPort,
 	MetricsNodeListenerPort,
+	MetricsEtcdListenerPort,
 	KubernetesAPIPort,
 	KubernetesAPIWrappedPort,
 	KubernetesWorkerLocalAPIPort,