blob: 97174f4b52ba6e1c77d2ac96adaa88dd25ec6e39 [file] [log] [blame]
Lorenz Brun4025c9b2022-06-16 16:12:53 +00001// This uses the unstable overrideWrite interface to also emit all runtime
2// writes to a dedicated runtime file descriptor to catch and debug crash dumps.
3// See https://go-review.googlesource.com/c/go/+/278792 for details about the
4// interface. This interface is relatively special, refrain from using most Go
5// features in here as it might cause unexpected behavior. Especially yielding
6// is a bad idea as the scheduler might be in an inconsistent state. But using
7// this interface was judged to be vastly more maintenance-friendly than
8// attempting to parse out this information from a combined stderr.
9package main
10
11import (
12 "io"
13 "os"
14 "unsafe"
15
16 "golang.org/x/sys/unix"
17
18 "source.monogon.dev/metropolis/pkg/logtree"
19)
20
21// This hooks into a global variable which is checked by runtime.write and used
22// instead of runtime.write1 if populated.
23//go:linkname overrideWrite runtime.overrideWrite
24var overrideWrite func(fd uintptr, p unsafe.Pointer, n int32) int32
25
26// Contains the file into which runtime logs and crashes are written.
27var runtimeFd os.File
28
29// This is essentially a reimplementation of the assembly function
30// runtime.write1, just with a hardcoded file descriptor and using the assembly
31// function unix.RawSyscall to not get a dependency on Go's calling convention
32// and needing an implementation for every architecture.
33//go:nosplit
34func runtimeWrite(fd uintptr, p unsafe.Pointer, n int32) int32 {
35 _, _, err := unix.RawSyscall(unix.SYS_WRITE, runtimeFd.Fd(), uintptr(p), uintptr(n))
36 if err != 0 {
37 return int32(err)
38 }
39 // Also write to original FD
40 _, _, err = unix.RawSyscall(unix.SYS_WRITE, fd, uintptr(p), uintptr(n))
41 return int32(err)
42}
43
44const runtimeLogPath = "/esp/core_runtime.log"
45
46func initPanicHandler(lt *logtree.LogTree) {
47 rl := lt.MustRawFor("panichandler")
48 l := lt.MustLeveledFor("panichandler")
49 runtimeLogFile, err := os.Open(runtimeLogPath)
50 if err != nil && !os.IsNotExist(err) {
51 l.Errorf("Failed to open runtimeLogFile: %v", err)
52 }
53 if err == nil {
54 if _, err := io.Copy(rl, runtimeLogFile); err != nil {
55 l.Errorf("Failed to log old persistent crash: %v", err)
56 }
57 runtimeLogFile.Close()
58 if err := os.Remove(runtimeLogPath); err != nil {
59 l.Errorf("Failed to delete old persistent runtime crash log: %v", err)
60 }
61 }
62
63 file, err := os.Create(runtimeLogPath)
64 if err != nil {
65 l.Errorf("Failed to open core runtime log file: %w", err)
66 l.Warningf("Continuing without persistent panic storage.")
67 return
68 }
69 runtimeFd = *file
70 // Make sure the Fd is in blocking mode. Go's runtime opens all FDs in non-
71 // blocking mode by default and switches them back once you get a reference
72 // to the raw file descriptor to not break existing code. This switching
73 // back is done on the first Fd() call and involves calls into the runtime
74 // scheduler as it issues non-raw syscalls. Calling Fd() here makes sure
75 // that these calls happen in a sane environment before any actual panic.
76 // After this Fd() performs only memory accesses which is safe even when
77 // panicing the runtime.
78 // Keeping the raw fd is not possible as Go's runtime would eventually
79 // garbage-collect the backing os.File and close it, so we must keep around
80 // the actual os.File.
81 _ = runtimeFd.Fd()
82 // This could cause a data race if the runtime crashed while we're
83 // initializing the crash handler, but there is no locking infrastructure
84 // for this so we have to take that risk.
85 overrideWrite = runtimeWrite
86 return
87}