Add ftrace support to DebugService
This allows us to do ad-hoc kernel-level tracing on a running Metropolis node.
Useful for tracking down complex bugs.
Example: `bazel run //metropolis/cli/dbg -- trace -function_graph_filter blkdev_* function_graph`
Test Plan: Debug utility, manually tested
X-Origin-Diff: phab/D748
GitOrigin-RevId: 924eb795250412a73eb30c0eef4a8c1cc726e5fd
diff --git a/metropolis/cli/dbg/main.go b/metropolis/cli/dbg/main.go
index eb9070f..75685e9 100644
--- a/metropolis/cli/dbg/main.go
+++ b/metropolis/cli/dbg/main.go
@@ -24,6 +24,7 @@
"io/ioutil"
"math/rand"
"os"
+ "strings"
"time"
"github.com/spf13/pflag"
@@ -69,6 +70,14 @@
fmt.Fprintf(os.Stderr, "Example:\n %s %s IPAssigned\n", os.Args[0], os.Args[1])
}
+ traceCmd := flag.NewFlagSet("trace", flag.ExitOnError)
+ traceCmd.Usage = func() {
+ fmt.Fprintf(os.Stderr, "Usage: %v %v [options] tracer\n", os.Args[0], os.Args[1])
+ flag.PrintDefaults()
+ }
+ functionFilter := traceCmd.String("function_filter", "", "Only trace functions matched by this filter (comma-separated, supports wildcards via *)")
+ functionGraphFilter := traceCmd.String("function_graph_filter", "", "Only trace functions matched by this filter and their children (syntax same as function_filter)")
+
switch os.Args[1] {
case "logs":
logsCmd.Parse(os.Args[2:])
@@ -157,5 +166,37 @@
if err := command.Execute(); err != nil {
os.Exit(1)
}
+ case "trace":
+ traceCmd.Parse(os.Args[2:])
+ tracer := traceCmd.Arg(0)
+ var fgf []string
+ var ff []string
+ if len(*functionGraphFilter) > 0 {
+ fgf = strings.Split(*functionGraphFilter, ",")
+ }
+ if len(*functionFilter) > 0 {
+ ff = strings.Split(*functionFilter, ",")
+ }
+ req := apb.TraceRequest{
+ GraphFunctionFilter: fgf,
+ FunctionFilter: ff,
+ Tracer: tracer,
+ }
+ traceEvents, err := debugClient.Trace(ctx, &req)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "failed to trace: %v", err)
+ os.Exit(1)
+ }
+ for {
+ traceEvent, err := traceEvents.Recv()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ fmt.Fprintf(os.Stderr, "stream aborted unexpectedly: %v", err)
+ os.Exit(1)
+ }
+ fmt.Println(traceEvent.RawLine)
+ }
}
}
diff --git a/metropolis/node/core/debug_service.go b/metropolis/node/core/debug_service.go
index 6dee3d9..964642b 100644
--- a/metropolis/node/core/debug_service.go
+++ b/metropolis/node/core/debug_service.go
@@ -17,15 +17,22 @@
package main
import (
+ "bufio"
"context"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "regexp"
+ "strings"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
+ apb "source.monogon.dev/metropolis/proto/api"
+
"source.monogon.dev/metropolis/node/core/cluster"
"source.monogon.dev/metropolis/node/kubernetes"
"source.monogon.dev/metropolis/pkg/logtree"
- apb "source.monogon.dev/metropolis/proto/api"
)
const (
@@ -37,6 +44,9 @@
cluster *cluster.Manager
kubernetes *kubernetes.Service
logtree *logtree.LogTree
+ // traceLock provides exclusive access to the Linux tracing infrastructure (ftrace)
+ // This is a channel because Go's mutexes can't be cancelled or be acquired in a non-blocking way.
+ traceLock chan struct{}
}
func (s *debugService) GetDebugKubeconfig(ctx context.Context, req *apb.GetDebugKubeconfigRequest) (*apb.GetDebugKubeconfigResponse, error) {
@@ -181,3 +191,73 @@
}
}
}
+
+// Validate property names as they are used in path construction and we really don't want a path traversal vulnerability
+var safeTracingPropertyNamesRe = regexp.MustCompile("^[a-z0-9_]+$")
+
+func writeTracingProperty(name string, value string) error {
+ if !safeTracingPropertyNamesRe.MatchString(name) {
+ return fmt.Errorf("disallowed tracing property name received: \"%v\"", name)
+ }
+ return ioutil.WriteFile("/sys/kernel/tracing/"+name, []byte(value+"\n"), 0)
+}
+
+func (s *debugService) Trace(req *apb.TraceRequest, srv apb.NodeDebugService_TraceServer) error {
+ // Don't allow more than one trace as the kernel doesn't support this.
+ select {
+ case s.traceLock <- struct{}{}:
+ defer func() {
+ <-s.traceLock
+ }()
+ default:
+ return status.Error(codes.FailedPrecondition, "a trace is already in progress")
+ }
+
+ if len(req.FunctionFilter) == 0 {
+ req.FunctionFilter = []string{"*"} // For reset purposes
+ }
+ if len(req.GraphFunctionFilter) == 0 {
+ req.GraphFunctionFilter = []string{"*"} // For reset purposes
+ }
+
+ defer writeTracingProperty("current_tracer", "nop")
+ if err := writeTracingProperty("current_tracer", req.Tracer); err != nil {
+ return status.Errorf(codes.InvalidArgument, "requested tracer not available: %v", err)
+ }
+
+ if err := writeTracingProperty("set_ftrace_filter", strings.Join(req.FunctionFilter, " ")); err != nil {
+ return status.Errorf(codes.InvalidArgument, "setting ftrace filter failed: %v", err)
+ }
+ if err := writeTracingProperty("set_graph_function", strings.Join(req.GraphFunctionFilter, " ")); err != nil {
+ return status.Errorf(codes.InvalidArgument, "setting graph filter failed: %v", err)
+ }
+ tracePipe, err := os.Open("/sys/kernel/tracing/trace_pipe")
+ if err != nil {
+ return status.Errorf(codes.Unavailable, "cannot open trace output pipe: %v", err)
+ }
+ defer tracePipe.Close()
+
+ defer writeTracingProperty("tracing_on", "0")
+ if err := writeTracingProperty("tracing_on", "1"); err != nil {
+ return status.Errorf(codes.InvalidArgument, "requested tracer not available: %v", err)
+ }
+
+ go func() {
+ <-srv.Context().Done()
+ tracePipe.Close()
+ }()
+
+ eventScanner := bufio.NewScanner(tracePipe)
+ for eventScanner.Scan() {
+ if err := eventScanner.Err(); err != nil {
+ return status.Errorf(codes.Unavailable, "event pipe read error: %v", err)
+ }
+ err := srv.Send(&apb.TraceEvent{
+ RawLine: eventScanner.Text(),
+ })
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index 4b19b7f..442102f 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -208,6 +208,7 @@
cluster: m,
logtree: lt,
kubernetes: kubeSvc,
+ traceLock: make(chan struct{}, 1),
}
dbgSrv := grpc.NewServer()
apb.RegisterNodeDebugServiceServer(dbgSrv, dbg)
diff --git a/metropolis/node/core/mounts.go b/metropolis/node/core/mounts.go
index c6a626f..7a15c30 100644
--- a/metropolis/node/core/mounts.go
+++ b/metropolis/node/core/mounts.go
@@ -37,6 +37,7 @@
flags uintptr
}{
{"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/sys/kernel/tracing", "tracefs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
{"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
{"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
{"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
diff --git a/metropolis/proto/api/debug.proto b/metropolis/proto/api/debug.proto
index 25e369e..6cbe32b 100644
--- a/metropolis/proto/api/debug.proto
+++ b/metropolis/proto/api/debug.proto
@@ -39,6 +39,9 @@
//
// TODO(q3k): move method and its related messages to the non-debug node endpoint once we have one.
rpc GetLogs(GetLogsRequest) returns (stream GetLogsResponse);
+
+ // Trace enables tracing of Metropolis using the Linux ftrace infrastructure.
+ rpc Trace(TraceRequest) returns (stream TraceEvent);
}
@@ -146,3 +149,21 @@
Raw raw = 3;
}
}
+
+message TraceRequest {
+ // Name of the tracer to use. Defined in https://www.kernel.org/doc/html/latest/trace/ftrace.html#the-tracers.
+ // Useful ones enabled in Metropolis: function_graph, function.
+ // Gets reset to nop automatically after the stream is terminated.
+ string tracer = 1;
+
+ // List of functions to trace. Accepts wildcards using the '*' character. If left empty traces all functions.
+ repeated string function_filter = 2;
+
+ // List of functions and their descendants to trace with the function_graph tracer.
+ repeated string graph_function_filter = 3;
+}
+
+message TraceEvent {
+ // Currently we do not parse the event data and just return what the kernel outputs, line-by-line.
+ string raw_line = 1;
+}
\ No newline at end of file