Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 1 | package main |
| 2 | |
| 3 | import ( |
| 4 | "crypto/x509" |
| 5 | "errors" |
| 6 | "fmt" |
| 7 | "io" |
| 8 | |
| 9 | "github.com/spf13/cobra" |
| 10 | |
| 11 | "source.monogon.dev/metropolis/cli/metroctl/core" |
| 12 | "source.monogon.dev/metropolis/pkg/logtree" |
| 13 | "source.monogon.dev/metropolis/proto/api" |
Serge Bazanski | e012b72 | 2023-03-29 17:49:04 +0200 | [diff] [blame] | 14 | |
Serge Bazanski | da11486 | 2023-03-29 17:46:42 +0200 | [diff] [blame] | 15 | cpb "source.monogon.dev/metropolis/proto/common" |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 16 | ) |
| 17 | |
Serge Bazanski | e012b72 | 2023-03-29 17:49:04 +0200 | [diff] [blame] | 18 | type metroctlLogFlags struct { |
| 19 | // follow (ie. stream) logs live. |
| 20 | follow bool |
| 21 | // dn to query. |
| 22 | dn string |
| 23 | // exact dn query, i.e. without children/recursion. |
| 24 | exact bool |
| 25 | // concise logging output format. |
| 26 | concise bool |
| 27 | // backlog: >0 for a concrete limit, -1 for all, 0 for none |
| 28 | backlog int |
| 29 | } |
| 30 | |
| 31 | var logFlags metroctlLogFlags |
| 32 | |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 33 | var nodeLogsCmd = &cobra.Command{ |
| 34 | Short: "Get/stream logs from node", |
Serge Bazanski | e012b72 | 2023-03-29 17:49:04 +0200 | [diff] [blame] | 35 | Long: `Get or stream logs from node. |
| 36 | |
| 37 | Node logs are structured in a 'log tree' structure, in which different subsystems |
| 38 | log to DNs (distinguished names). For example, service 'foo' might log to |
| 39 | root.role.foo, while service 'bar' might log to root.role.bar. |
| 40 | |
| 41 | To set the DN you want to request logs from, use --dn. The default is to return |
| 42 | all logs. The default output is also also a good starting point to figure out |
| 43 | what DNs are active in the system. |
| 44 | |
| 45 | When requesting logs for a DN by default all sub-DNs will also be returned (ie. |
| 46 | with the above example, when requesting DN 'root.role' logs at root.role.foo and |
| 47 | root.role.bar would also be returned). This behaviour can be disabled by setting |
| 48 | --exact. |
| 49 | |
| 50 | To stream logs, use --follow. |
| 51 | |
| 52 | By default, all available logs are returned. To limit the number of historical |
| 53 | log lines (a.k.a. 'backlog') to return, set --backlog. This similar to requesting |
| 54 | all lines and then piping the result through 'tail' - but more efficient, as no |
| 55 | unnecessary lines are fetched. |
| 56 | `, |
| 57 | Use: "logs [node-id]", |
| 58 | Args: cobra.MinimumNArgs(1), |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 59 | RunE: func(cmd *cobra.Command, args []string) error { |
| 60 | ctx := cmd.Context() |
| 61 | |
| 62 | // First connect to the main management service and figure out the node's IP |
| 63 | // address. |
| 64 | cc := dialAuthenticated(ctx) |
| 65 | mgmt := api.NewManagementClient(cc) |
| 66 | nodes, err := core.GetNodes(ctx, mgmt, fmt.Sprintf("node.id == %q", args[0])) |
| 67 | if err != nil { |
| 68 | return fmt.Errorf("when getting node info: %w", err) |
| 69 | } |
| 70 | |
| 71 | if len(nodes) == 0 { |
| 72 | return fmt.Errorf("no such node") |
| 73 | } |
| 74 | if len(nodes) > 1 { |
| 75 | return fmt.Errorf("expression matched more than one node") |
| 76 | } |
| 77 | n := nodes[0] |
| 78 | if n.Status == nil || n.Status.ExternalAddress == "" { |
| 79 | return fmt.Errorf("node has no external address") |
| 80 | } |
| 81 | |
| 82 | // TODO(q3k): save CA certificate on takeover |
| 83 | info, err := mgmt.GetClusterInfo(ctx, &api.GetClusterInfoRequest{}) |
| 84 | if err != nil { |
| 85 | return fmt.Errorf("couldn't get cluster info: %w", err) |
| 86 | } |
| 87 | cacert, err := x509.ParseCertificate(info.CaCertificate) |
| 88 | if err != nil { |
| 89 | return fmt.Errorf("remote CA certificate invalid: %w", err) |
| 90 | } |
| 91 | |
Serge Bazanski | e012b72 | 2023-03-29 17:49:04 +0200 | [diff] [blame] | 92 | fmt.Printf("=== Logs from %s (%s):\n", n.Id, n.Status.ExternalAddress) |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 93 | // Dial the actual node at its management port. |
| 94 | cl := dialAuthenticatedNode(ctx, n.Id, n.Status.ExternalAddress, cacert) |
| 95 | nmgmt := api.NewNodeManagementClient(cl) |
| 96 | |
Serge Bazanski | e012b72 | 2023-03-29 17:49:04 +0200 | [diff] [blame] | 97 | streamMode := api.GetLogsRequest_STREAM_DISABLE |
| 98 | if logFlags.follow { |
| 99 | streamMode = api.GetLogsRequest_STREAM_UNBUFFERED |
| 100 | } |
| 101 | var filters []*cpb.LogFilter |
| 102 | if !logFlags.exact { |
| 103 | filters = append(filters, &cpb.LogFilter{ |
| 104 | Filter: &cpb.LogFilter_WithChildren_{ |
| 105 | WithChildren: &cpb.LogFilter_WithChildren{}, |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 106 | }, |
Serge Bazanski | e012b72 | 2023-03-29 17:49:04 +0200 | [diff] [blame] | 107 | }) |
| 108 | } |
| 109 | backlogMode := api.GetLogsRequest_BACKLOG_ALL |
| 110 | var backlogCount int64 |
| 111 | switch { |
| 112 | case logFlags.backlog > 0: |
| 113 | backlogMode = api.GetLogsRequest_BACKLOG_COUNT |
| 114 | backlogCount = int64(logFlags.backlog) |
| 115 | case logFlags.backlog == 0: |
| 116 | backlogMode = api.GetLogsRequest_BACKLOG_DISABLE |
| 117 | } |
| 118 | |
| 119 | srv, err := nmgmt.Logs(ctx, &api.GetLogsRequest{ |
| 120 | Dn: logFlags.dn, |
| 121 | BacklogMode: backlogMode, |
| 122 | BacklogCount: backlogCount, |
| 123 | StreamMode: streamMode, |
| 124 | Filters: filters, |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 125 | }) |
| 126 | if err != nil { |
| 127 | return fmt.Errorf("failed to get logs: %w", err) |
| 128 | } |
| 129 | for { |
| 130 | res, err := srv.Recv() |
| 131 | if errors.Is(err, io.EOF) { |
Serge Bazanski | e012b72 | 2023-03-29 17:49:04 +0200 | [diff] [blame] | 132 | fmt.Println("=== Done.") |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 133 | break |
| 134 | } |
| 135 | if err != nil { |
| 136 | return fmt.Errorf("log stream failed: %w", err) |
| 137 | } |
| 138 | for _, entry := range res.BacklogEntries { |
Serge Bazanski | e012b72 | 2023-03-29 17:49:04 +0200 | [diff] [blame] | 139 | printEntry(entry) |
| 140 | } |
| 141 | for _, entry := range res.StreamEntries { |
| 142 | printEntry(entry) |
Serge Bazanski | b91938f | 2023-03-29 14:31:22 +0200 | [diff] [blame] | 143 | } |
| 144 | } |
| 145 | |
| 146 | return nil |
| 147 | }, |
| 148 | } |
Serge Bazanski | e012b72 | 2023-03-29 17:49:04 +0200 | [diff] [blame] | 149 | |
| 150 | func printEntry(e *cpb.LogEntry) { |
| 151 | entry, err := logtree.LogEntryFromProto(e) |
| 152 | if err != nil { |
| 153 | fmt.Printf("invalid stream entry: %v\n", err) |
| 154 | return |
| 155 | } |
| 156 | if logFlags.concise { |
| 157 | fmt.Println(entry.ConciseString(logtree.MetropolisShortenDict, 0)) |
| 158 | } else { |
| 159 | fmt.Println(entry.String()) |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | func init() { |
| 164 | nodeLogsCmd.Flags().BoolVarP(&logFlags.follow, "follow", "f", false, "Continue streaming logs after fetching backlog.") |
| 165 | nodeLogsCmd.Flags().StringVar(&logFlags.dn, "dn", "", "Distinguished Name to get logs from (and children, if --exact is not set). If not set, defaults to '', which is the top-level DN.") |
| 166 | nodeLogsCmd.Flags().BoolVarP(&logFlags.exact, "exact", "e", false, "Only show logs for exactly the DN, do not recurse down the tree.") |
| 167 | nodeLogsCmd.Flags().BoolVarP(&logFlags.concise, "concise", "c", false, "Output concise logs.") |
| 168 | nodeLogsCmd.Flags().IntVar(&logFlags.backlog, "backlog", -1, "How many lines of historical log data to return. The default (-1) returns all available lines. Zero value means no backlog is returned (useful when using --follow).") |
| 169 | nodeCmd.AddCommand(nodeLogsCmd) |
| 170 | } |