blob: 30f7ac77c68fa160195d16a8d0d1bd03b00144e8 [file] [log] [blame]
Serge Bazanski1ebd1e12020-07-13 19:17:16 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package main
18
19import (
Lorenz Brun09c275b2021-03-30 12:47:09 +020020 "bufio"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020021 "context"
Lorenz Brun09c275b2021-03-30 12:47:09 +020022 "fmt"
23 "io/ioutil"
24 "os"
25 "regexp"
26 "strings"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020027
28 "google.golang.org/grpc/codes"
29 "google.golang.org/grpc/status"
Serge Bazanskib0272182020-11-02 18:39:44 +010030
Serge Bazanskif9edf522021-06-17 15:57:13 +020031 "source.monogon.dev/metropolis/node/core/roleserve"
Serge Bazanski31370b02021-01-07 16:31:14 +010032 "source.monogon.dev/metropolis/pkg/logtree"
Serge Bazanskif9edf522021-06-17 15:57:13 +020033 apb "source.monogon.dev/metropolis/proto/api"
Serge Bazanskib0272182020-11-02 18:39:44 +010034)
35
36const (
37 logFilterMax = 1000
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020038)
39
Serge Bazanski662b5b32020-12-21 13:49:00 +010040// debugService implements the Metropolis node debug API.
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020041type debugService struct {
Serge Bazanskif9edf522021-06-17 15:57:13 +020042 roleserve *roleserve.Service
43 logtree *logtree.LogTree
Serge Bazanski216fe7b2021-05-21 18:36:16 +020044 // traceLock provides exclusive access to the Linux tracing infrastructure
45 // (ftrace)
46 // This is a channel because Go's mutexes can't be cancelled or be acquired
47 // in a non-blocking way.
Lorenz Brun09c275b2021-03-30 12:47:09 +020048 traceLock chan struct{}
Serge Bazanskib0272182020-11-02 18:39:44 +010049}
50
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020051func (s *debugService) GetDebugKubeconfig(ctx context.Context, req *apb.GetDebugKubeconfigRequest) (*apb.GetDebugKubeconfigResponse, error) {
Serge Bazanskif9edf522021-06-17 15:57:13 +020052 w := s.roleserve.Watch()
53 defer w.Close()
54 for {
55 v, err := w.Get(ctx)
56 if err != nil {
57 return nil, status.Errorf(codes.Unavailable, "could not get roleserve status: %v", err)
58 }
59 if v.Kubernetes == nil {
60 continue
61 }
62 return v.Kubernetes.GetDebugKubeconfig(ctx, req)
63 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020064}
65
Serge Bazanskib0272182020-11-02 18:39:44 +010066func (s *debugService) GetLogs(req *apb.GetLogsRequest, srv apb.NodeDebugService_GetLogsServer) error {
67 if len(req.Filters) > logFilterMax {
68 return status.Errorf(codes.InvalidArgument, "requested %d filters, maximum permitted is %d", len(req.Filters), logFilterMax)
69 }
70 dn := logtree.DN(req.Dn)
71 _, err := dn.Path()
72 switch err {
73 case nil:
74 case logtree.ErrInvalidDN:
75 return status.Errorf(codes.InvalidArgument, "invalid DN")
76 default:
77 return status.Errorf(codes.Unavailable, "could not parse DN: %v", err)
78 }
79
80 var options []logtree.LogReadOption
81
82 // Turn backlog mode into logtree option(s).
83 switch req.BacklogMode {
84 case apb.GetLogsRequest_BACKLOG_DISABLE:
85 case apb.GetLogsRequest_BACKLOG_ALL:
86 options = append(options, logtree.WithBacklog(logtree.BacklogAllAvailable))
87 case apb.GetLogsRequest_BACKLOG_COUNT:
88 count := int(req.BacklogCount)
89 if count <= 0 {
90 return status.Errorf(codes.InvalidArgument, "backlog_count must be > 0 if backlog_mode is BACKLOG_COUNT")
91 }
92 options = append(options, logtree.WithBacklog(count))
93 default:
94 return status.Errorf(codes.InvalidArgument, "unknown backlog_mode %d", req.BacklogMode)
95 }
96
97 // Turn stream mode into logtree option(s).
98 streamEnable := false
99 switch req.StreamMode {
100 case apb.GetLogsRequest_STREAM_DISABLE:
101 case apb.GetLogsRequest_STREAM_UNBUFFERED:
102 streamEnable = true
103 options = append(options, logtree.WithStream())
104 }
105
106 // Parse proto filters into logtree options.
107 for i, filter := range req.Filters {
108 switch inner := filter.Filter.(type) {
109 case *apb.LogFilter_WithChildren_:
110 options = append(options, logtree.WithChildren())
111 case *apb.LogFilter_OnlyRaw_:
112 options = append(options, logtree.OnlyRaw())
113 case *apb.LogFilter_OnlyLeveled_:
114 options = append(options, logtree.OnlyLeveled())
115 case *apb.LogFilter_LeveledWithMinimumSeverity_:
116 severity, err := logtree.SeverityFromProto(inner.LeveledWithMinimumSeverity.Minimum)
117 if err != nil {
118 return status.Errorf(codes.InvalidArgument, "filter %d has invalid severity: %v", i, err)
119 }
120 options = append(options, logtree.LeveledWithMinimumSeverity(severity))
121 }
122 }
123
124 reader, err := s.logtree.Read(logtree.DN(req.Dn), options...)
125 switch err {
126 case nil:
127 case logtree.ErrRawAndLeveled:
128 return status.Errorf(codes.InvalidArgument, "requested only raw and only leveled logs simultaneously")
129 default:
130 return status.Errorf(codes.Unavailable, "could not retrieve logs: %v", err)
131 }
132 defer reader.Close()
133
134 // Default protobuf message size limit is 64MB. We want to limit ourselves
135 // to 10MB.
136 // Currently each raw log line can be at most 1024 unicode codepoints (or
137 // 4096 bytes). To cover extra metadata and proto overhead, let's round
138 // this up to 4500 bytes. This in turn means we can store a maximum of
139 // (10e6/4500) == 2222 entries.
140 // Currently each leveled log line can also be at most 1024 unicode
141 // codepoints (or 4096 bytes). To cover extra metadata and proto overhead
142 // let's round this up to 2000 bytes. This in turn means we can store a
143 // maximum of (10e6/5000) == 2000 entries.
144 // The lowever of these numbers, ie the worst case scenario, is 2000
145 // maximum entries.
146 maxChunkSize := 2000
147
148 // Serve all backlog entries in chunks.
149 chunk := make([]*apb.LogEntry, 0, maxChunkSize)
150 for _, entry := range reader.Backlog {
151 p := entry.Proto()
152 if p == nil {
153 // TODO(q3k): log this once we have logtree/gRPC compatibility.
154 continue
155 }
156 chunk = append(chunk, p)
157
158 if len(chunk) >= maxChunkSize {
159 err := srv.Send(&apb.GetLogsResponse{
160 BacklogEntries: chunk,
161 })
162 if err != nil {
163 return err
164 }
165 chunk = make([]*apb.LogEntry, 0, maxChunkSize)
166 }
167 }
168
169 // Send last chunk of backlog, if present..
170 if len(chunk) > 0 {
171 err := srv.Send(&apb.GetLogsResponse{
172 BacklogEntries: chunk,
173 })
174 if err != nil {
175 return err
176 }
177 chunk = make([]*apb.LogEntry, 0, maxChunkSize)
178 }
179
180 // Start serving streaming data, if streaming has been requested.
181 if !streamEnable {
182 return nil
183 }
184
185 for {
186 entry, ok := <-reader.Stream
187 if !ok {
188 // Streaming has been ended by logtree - tell the client and return.
189 return status.Error(codes.Unavailable, "log streaming aborted by system")
190 }
191 p := entry.Proto()
192 if p == nil {
193 // TODO(q3k): log this once we have logtree/gRPC compatibility.
194 continue
195 }
196 err := srv.Send(&apb.GetLogsResponse{
197 StreamEntries: []*apb.LogEntry{p},
198 })
199 if err != nil {
200 return err
201 }
202 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200203}
Lorenz Brun09c275b2021-03-30 12:47:09 +0200204
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200205// Validate property names as they are used in path construction and we really
206// don't want a path traversal vulnerability
Lorenz Brun09c275b2021-03-30 12:47:09 +0200207var safeTracingPropertyNamesRe = regexp.MustCompile("^[a-z0-9_]+$")
208
209func writeTracingProperty(name string, value string) error {
210 if !safeTracingPropertyNamesRe.MatchString(name) {
211 return fmt.Errorf("disallowed tracing property name received: \"%v\"", name)
212 }
213 return ioutil.WriteFile("/sys/kernel/tracing/"+name, []byte(value+"\n"), 0)
214}
215
216func (s *debugService) Trace(req *apb.TraceRequest, srv apb.NodeDebugService_TraceServer) error {
217 // Don't allow more than one trace as the kernel doesn't support this.
218 select {
219 case s.traceLock <- struct{}{}:
220 defer func() {
221 <-s.traceLock
222 }()
223 default:
224 return status.Error(codes.FailedPrecondition, "a trace is already in progress")
225 }
226
227 if len(req.FunctionFilter) == 0 {
228 req.FunctionFilter = []string{"*"} // For reset purposes
229 }
230 if len(req.GraphFunctionFilter) == 0 {
231 req.GraphFunctionFilter = []string{"*"} // For reset purposes
232 }
233
234 defer writeTracingProperty("current_tracer", "nop")
235 if err := writeTracingProperty("current_tracer", req.Tracer); err != nil {
236 return status.Errorf(codes.InvalidArgument, "requested tracer not available: %v", err)
237 }
238
239 if err := writeTracingProperty("set_ftrace_filter", strings.Join(req.FunctionFilter, " ")); err != nil {
240 return status.Errorf(codes.InvalidArgument, "setting ftrace filter failed: %v", err)
241 }
242 if err := writeTracingProperty("set_graph_function", strings.Join(req.GraphFunctionFilter, " ")); err != nil {
243 return status.Errorf(codes.InvalidArgument, "setting graph filter failed: %v", err)
244 }
245 tracePipe, err := os.Open("/sys/kernel/tracing/trace_pipe")
246 if err != nil {
247 return status.Errorf(codes.Unavailable, "cannot open trace output pipe: %v", err)
248 }
249 defer tracePipe.Close()
250
251 defer writeTracingProperty("tracing_on", "0")
252 if err := writeTracingProperty("tracing_on", "1"); err != nil {
253 return status.Errorf(codes.InvalidArgument, "requested tracer not available: %v", err)
254 }
255
256 go func() {
257 <-srv.Context().Done()
258 tracePipe.Close()
259 }()
260
261 eventScanner := bufio.NewScanner(tracePipe)
262 for eventScanner.Scan() {
263 if err := eventScanner.Err(); err != nil {
264 return status.Errorf(codes.Unavailable, "event pipe read error: %v", err)
265 }
266 err := srv.Send(&apb.TraceEvent{
267 RawLine: eventScanner.Text(),
268 })
269 if err != nil {
270 return err
271 }
272 }
273 return nil
274}