blob: f94f020433e4475e81fc09dcd373420bab87af6d [file] [log] [blame]
Serge Bazanskia3e38cf2024-07-31 14:40:04 +00001package main
2
3import (
4 "fmt"
5 "io"
6 "net"
7 "net/http"
8 "os"
9
10 "github.com/spf13/cobra"
11
12 "source.monogon.dev/metropolis/cli/metroctl/core"
13 common "source.monogon.dev/metropolis/node"
14 "source.monogon.dev/metropolis/proto/api"
15)
16
17var nodeMetricsCmd = &cobra.Command{
18 Short: "Get metrics from node",
19 Long: `Get metrics from node.
20
21Node metrics are exported in the Prometheus format, and can be collected by any
22number of metrics collection software compatible with said format.
23
24This helper tool can be used to manually fetch metrics from a node using the same
25credentials as used to manage the cluster, and is designed to be used as a
26troubleshooting tool when a proper metrics collection system has not been set up
27for the cluster.
28
29A node ID and exporter must be provided. Currently available exporters are:
30
Serge Bazanskiefbde192024-07-31 14:53:20 +000031 - core: metrics from the core process of the node (which contains the
32 supervision tree)
Serge Bazanskia3e38cf2024-07-31 14:40:04 +000033 - node: node_exporter metrics for the node
34 - etcd: etcd metrics, if the node is running the cluster control plane
35 - kubernetes-scheduler, kubernetes-controller-manager, kubernetes-apiserver:
36 metrics for kubernetes control plane components, if the node runs the
37 Kubernetes control plane
38 - containerd: containerd metrics, if the node is a Kubernetes worker
39
40`,
41 Use: "metrics [node-id] [exporter]",
42 Args: cobra.MinimumNArgs(2),
43 RunE: func(cmd *cobra.Command, args []string) error {
44 ctx := cmd.Context()
45
46 // First connect to the main management service and figure out the node's IP
47 // address.
48 cc := dialAuthenticated(ctx)
49 mgmt := api.NewManagementClient(cc)
50 nodes, err := core.GetNodes(ctx, mgmt, fmt.Sprintf("node.id == %q", args[0]))
51 if err != nil {
52 return fmt.Errorf("when getting node info: %w", err)
53 }
54
55 if len(nodes) == 0 {
56 return fmt.Errorf("no such node")
57 }
58 if len(nodes) > 1 {
59 return fmt.Errorf("expression matched more than one node")
60 }
61 n := nodes[0]
62 if n.Status == nil || n.Status.ExternalAddress == "" {
63 return fmt.Errorf("node has no external address")
64 }
65
66 client := http.Client{
67 Transport: newAuthenticatedNodeHTTPTransport(ctx, n.Id),
68 }
69 res, err := client.Get(fmt.Sprintf("https://%s/metrics/%s", net.JoinHostPort(n.Status.ExternalAddress, common.MetricsPort.PortString()), args[1]))
70 if err != nil {
71 return fmt.Errorf("metrics HTTP request failed: %v", err)
72 }
73 defer res.Body.Close()
74 _, err = io.Copy(os.Stdout, res.Body)
75 return err
76 },
77}
78
79func init() {
80 nodeCmd.AddCommand(nodeMetricsCmd)
81}