| Serge Bazanski | a3e38cf | 2024-07-31 14:40:04 +0000 | [diff] [blame] | 1 | package main |
| 2 | |
| 3 | import ( |
| 4 | "fmt" |
| 5 | "io" |
| 6 | "net" |
| 7 | "net/http" |
| 8 | "os" |
| 9 | |
| 10 | "github.com/spf13/cobra" |
| 11 | |
| 12 | "source.monogon.dev/metropolis/cli/metroctl/core" |
| 13 | common "source.monogon.dev/metropolis/node" |
| 14 | "source.monogon.dev/metropolis/proto/api" |
| 15 | ) |
| 16 | |
| 17 | var nodeMetricsCmd = &cobra.Command{ |
| 18 | Short: "Get metrics from node", |
| 19 | Long: `Get metrics from node. |
| 20 | |
| 21 | Node metrics are exported in the Prometheus format, and can be collected by any |
| 22 | number of metrics collection software compatible with said format. |
| 23 | |
| 24 | This helper tool can be used to manually fetch metrics from a node using the same |
| 25 | credentials as used to manage the cluster, and is designed to be used as a |
| 26 | troubleshooting tool when a proper metrics collection system has not been set up |
| 27 | for the cluster. |
| 28 | |
| 29 | A node ID and exporter must be provided. Currently available exporters are: |
| 30 | |
| Serge Bazanski | efbde19 | 2024-07-31 14:53:20 +0000 | [diff] [blame] | 31 | - core: metrics from the core process of the node (which contains the |
| 32 | supervision tree) |
| Serge Bazanski | a3e38cf | 2024-07-31 14:40:04 +0000 | [diff] [blame] | 33 | - node: node_exporter metrics for the node |
| 34 | - etcd: etcd metrics, if the node is running the cluster control plane |
| 35 | - kubernetes-scheduler, kubernetes-controller-manager, kubernetes-apiserver: |
| 36 | metrics for kubernetes control plane components, if the node runs the |
| 37 | Kubernetes control plane |
| 38 | - containerd: containerd metrics, if the node is a Kubernetes worker |
| 39 | |
| 40 | `, |
| 41 | Use: "metrics [node-id] [exporter]", |
| Tim Windelschmidt | fc6e1cf | 2024-09-18 17:34:07 +0200 | [diff] [blame] | 42 | Args: PrintUsageOnWrongArgs(cobra.MinimumNArgs(2)), |
| Serge Bazanski | a3e38cf | 2024-07-31 14:40:04 +0000 | [diff] [blame] | 43 | RunE: func(cmd *cobra.Command, args []string) error { |
| 44 | ctx := cmd.Context() |
| 45 | |
| 46 | // First connect to the main management service and figure out the node's IP |
| 47 | // address. |
| Tim Windelschmidt | 0b4fb8c | 2024-09-18 17:34:23 +0200 | [diff] [blame] | 48 | cc, err := dialAuthenticated(ctx) |
| 49 | if err != nil { |
| 50 | return fmt.Errorf("while dialing node: %w", err) |
| 51 | } |
| Serge Bazanski | a3e38cf | 2024-07-31 14:40:04 +0000 | [diff] [blame] | 52 | mgmt := api.NewManagementClient(cc) |
| 53 | nodes, err := core.GetNodes(ctx, mgmt, fmt.Sprintf("node.id == %q", args[0])) |
| 54 | if err != nil { |
| 55 | return fmt.Errorf("when getting node info: %w", err) |
| 56 | } |
| 57 | |
| 58 | if len(nodes) == 0 { |
| 59 | return fmt.Errorf("no such node") |
| 60 | } |
| 61 | if len(nodes) > 1 { |
| 62 | return fmt.Errorf("expression matched more than one node") |
| 63 | } |
| 64 | n := nodes[0] |
| 65 | if n.Status == nil || n.Status.ExternalAddress == "" { |
| 66 | return fmt.Errorf("node has no external address") |
| 67 | } |
| 68 | |
| Tim Windelschmidt | 0b4fb8c | 2024-09-18 17:34:23 +0200 | [diff] [blame] | 69 | transport, err := newAuthenticatedNodeHTTPTransport(ctx, n.Id) |
| 70 | if err != nil { |
| 71 | return err |
| 72 | } |
| Serge Bazanski | a3e38cf | 2024-07-31 14:40:04 +0000 | [diff] [blame] | 73 | client := http.Client{ |
| Tim Windelschmidt | 0b4fb8c | 2024-09-18 17:34:23 +0200 | [diff] [blame] | 74 | Transport: transport, |
| Serge Bazanski | a3e38cf | 2024-07-31 14:40:04 +0000 | [diff] [blame] | 75 | } |
| 76 | res, err := client.Get(fmt.Sprintf("https://%s/metrics/%s", net.JoinHostPort(n.Status.ExternalAddress, common.MetricsPort.PortString()), args[1])) |
| 77 | if err != nil { |
| Tim Windelschmidt | 5f1a7de | 2024-09-19 02:00:14 +0200 | [diff] [blame] | 78 | return fmt.Errorf("metrics HTTP request failed: %w", err) |
| Serge Bazanski | a3e38cf | 2024-07-31 14:40:04 +0000 | [diff] [blame] | 79 | } |
| 80 | defer res.Body.Close() |
| 81 | _, err = io.Copy(os.Stdout, res.Body) |
| 82 | return err |
| 83 | }, |
| 84 | } |
| 85 | |
| 86 | func init() { |
| 87 | nodeCmd.AddCommand(nodeMetricsCmd) |
| 88 | } |