metroctl: add metrics command

This is a little helper command to access Node metrics easily for people
(eg. developers!) who don't have a metrics collection infrastructure set
up.

Change-Id: Ibe3b4356db88e31c3156289ab8d8ca2985266b4b
Reviewed-on: https://review.monogon.dev/c/monogon/+/3288
Tested-by: Jenkins CI
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
diff --git a/metropolis/cli/metroctl/BUILD.bazel b/metropolis/cli/metroctl/BUILD.bazel
index 7b36452..2b6b31a 100644
--- a/metropolis/cli/metroctl/BUILD.bazel
+++ b/metropolis/cli/metroctl/BUILD.bazel
@@ -25,6 +25,7 @@
         "cmd_node.go",
         "cmd_node_approve.go",
         "cmd_node_logs.go",
+        "cmd_node_metrics.go",
         "cmd_node_set.go",
         "cmd_takeownership.go",
         "main.go",
@@ -43,6 +44,7 @@
     deps = [
         "//go/clitable",
         "//metropolis/cli/metroctl/core",
+        "//metropolis/node",
         "//metropolis/node/core/identity",
         "//metropolis/node/core/rpc",
         "//metropolis/node/core/rpc/resolver",
@@ -59,6 +61,7 @@
         "@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
         "@io_k8s_client_go//pkg/apis/clientauthentication/v1:clientauthentication",
         "@org_golang_google_grpc//:grpc",
+        "@org_golang_x_net//proxy",
         "@org_golang_x_sync//semaphore",
     ],
 )
diff --git a/metropolis/cli/metroctl/cmd_node_metrics.go b/metropolis/cli/metroctl/cmd_node_metrics.go
new file mode 100644
index 0000000..e445086
--- /dev/null
+++ b/metropolis/cli/metroctl/cmd_node_metrics.go
@@ -0,0 +1,79 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"os"
+
+	"github.com/spf13/cobra"
+
+	"source.monogon.dev/metropolis/cli/metroctl/core"
+	common "source.monogon.dev/metropolis/node"
+	"source.monogon.dev/metropolis/proto/api"
+)
+
+var nodeMetricsCmd = &cobra.Command{
+	Short: "Get metrics from node",
+	Long: `Get metrics from node.
+
+Node metrics are exported in the Prometheus format, and can be collected by any
+number of metrics collection software compatible with said format.
+
+This helper tool can be used to manually fetch metrics from a node using the same
+credentials as used to manage the cluster, and is designed to be used as a
+troubleshooting tool when a proper metrics collection system has not been set up
+for the cluster.
+
+A node ID and exporter must be provided. Currently available exporters are:
+
+  - node: node_exporter metrics for the node
+  - etcd: etcd metrics, if the node is running the cluster control plane
+  - kubernetes-scheduler, kubernetes-controller-manager, kubernetes-apiserver:
+    metrics for kubernetes control plane components, if the node runs the
+    Kubernetes control plane
+  - containerd: containerd metrics, if the node is a Kubernetes worker
+
+`,
+	Use:  "metrics [node-id] [exporter]",
+	Args: cobra.MinimumNArgs(2),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		ctx := cmd.Context()
+
+		// First connect to the main management service and figure out the node's IP
+		// address.
+		cc := dialAuthenticated(ctx)
+		mgmt := api.NewManagementClient(cc)
+		nodes, err := core.GetNodes(ctx, mgmt, fmt.Sprintf("node.id == %q", args[0]))
+		if err != nil {
+			return fmt.Errorf("when getting node info: %w", err)
+		}
+
+		if len(nodes) == 0 {
+			return fmt.Errorf("no such node")
+		}
+		if len(nodes) > 1 {
+			return fmt.Errorf("expression matched more than one node")
+		}
+		n := nodes[0]
+		if n.Status == nil || n.Status.ExternalAddress == "" {
+			return fmt.Errorf("node has no external address")
+		}
+
+		client := http.Client{
+			Transport: newAuthenticatedNodeHTTPTransport(ctx, n.Id),
+		}
+		res, err := client.Get(fmt.Sprintf("https://%s/metrics/%s", net.JoinHostPort(n.Status.ExternalAddress, common.MetricsPort.PortString()), args[1]))
+		if err != nil {
+			return fmt.Errorf("metrics HTTP request failed: %v", err)
+		}
+		defer res.Body.Close()
+		_, err = io.Copy(os.Stdout, res.Body)
+		return err
+	},
+}
+
+func init() {
+	nodeCmd.AddCommand(nodeMetricsCmd)
+}
diff --git a/metropolis/cli/metroctl/rpc.go b/metropolis/cli/metroctl/rpc.go
index cab6d4f..512cb98 100644
--- a/metropolis/cli/metroctl/rpc.go
+++ b/metropolis/cli/metroctl/rpc.go
@@ -6,7 +6,10 @@
 	"crypto/x509"
 	"errors"
 	"log"
+	"net"
+	"net/http"
 
+	"golang.org/x/net/proxy"
 	"google.golang.org/grpc"
 
 	"source.monogon.dev/metropolis/cli/metroctl/core"
@@ -21,6 +24,9 @@
 	if errors.Is(err, core.ErrNoCredentials) {
 		log.Fatalf("You have to take ownership of the cluster first: %v", err)
 	}
+	if err != nil {
+		log.Fatalf("Failed to get owner credentials: %v", err)
+	}
 	if len(flags.clusterEndpoints) == 0 {
 		log.Fatal("Please provide at least one cluster endpoint using the --endpoint parameter.")
 	}
@@ -61,3 +67,33 @@
 	}
 	return cc
 }
+
+func newAuthenticatedNodeHTTPTransport(ctx context.Context, id string) *http.Transport {
+	cacert, err := core.GetClusterCAWithTOFU(ctx, connectOptions())
+	if err != nil {
+		log.Fatalf("Could not get CA certificate: %v", err)
+	}
+	ocert, opkey, err := core.GetOwnerCredentials(flags.configPath)
+	if errors.Is(err, core.ErrNoCredentials) {
+		log.Fatalf("You have to take ownership of the cluster first: %v", err)
+	}
+	tlsc := tls.Certificate{
+		Certificate: [][]byte{ocert.Raw},
+		PrivateKey:  opkey,
+	}
+	tlsconf := rpc.NewAuthenticatedTLSConfig(tlsc, rpc.WantRemoteCluster(cacert), rpc.WantRemoteNode(id))
+	transport := &http.Transport{
+		TLSClientConfig: tlsconf,
+	}
+	if flags.proxyAddr != "" {
+		dialer, err := proxy.SOCKS5("tcp", flags.proxyAddr, nil, proxy.Direct)
+		if err != nil {
+			log.Fatalf("Failed to create proxy dialer: %v", err)
+		}
+		transport.DialContext = func(ctx context.Context, network, addr string) (net.Conn, error) {
+			// TODO(q3k): handle context
+			return dialer.Dial(network, addr)
+		}
+	}
+	return transport
+}
diff --git a/metropolis/node/core/rpc/client.go b/metropolis/node/core/rpc/client.go
index 4b64654..5fc76e3 100644
--- a/metropolis/node/core/rpc/client.go
+++ b/metropolis/node/core/rpc/client.go
@@ -147,9 +147,8 @@
 	}
 }
 
-// NewAuthenticatedCredentials returns gRPC TransportCredentials that can be used
-// to dial a cluster with a given TLS certificate (from node or manager
-// credentials).
+// NewAuthenticatedTLSConfig returns a tls.Config that can be used to dial a
+// cluster with a given TLS certificate (from node or manager credentials).
 //
 // The provided CredentialsOpt specify the verification of the remote side of the
 // connection. When connecting to a cluster (any node), use WantRemoteCluster. If
@@ -158,7 +157,7 @@
 // WantInsecure.
 //
 // The given options are parsed on a first-wins basis.
-func NewAuthenticatedCredentials(cert tls.Certificate, opts ...CredentialsOpt) credentials.TransportCredentials {
+func NewAuthenticatedTLSConfig(cert tls.Certificate, opts ...CredentialsOpt) *tls.Config {
 	config := &tls.Config{
 		Certificates:       []tls.Certificate{cert},
 		InsecureSkipVerify: true,
@@ -188,7 +187,22 @@
 		}
 	}
 
-	return credentials.NewTLS(config)
+	return config
+}
+
+// NewAuthenticatedCredentials returns gRPC TransportCredentials that can be used
+// to dial a cluster with a given TLS certificate (from node or manager
+// credentials).
+//
+// The provided CredentialsOpt specify the verification of the remote side of the
+// connection. When connecting to a cluster (any node), use WantRemoteCluster. If
+// you also want to verify the connection to a particular node, specify
+// WantRemoteNode alongside it. If no verification should be performed use
+// WantInsecure.
+//
+// The given options are parsed on a first-wins basis.
+func NewAuthenticatedCredentials(cert tls.Certificate, opts ...CredentialsOpt) credentials.TransportCredentials {
+	return credentials.NewTLS(NewAuthenticatedTLSConfig(cert, opts...))
 }
 
 // RetrieveOwnerCertificate uses AAA.Escrow to retrieve a cluster manager