Blame - metropolis/node/core/panichandler.go - monogon

blob: 97174f4b52ba6e1c77d2ac96adaa88dd25ec6e39 [file] [log] [blame]

Lorenz Brun	4025c9b	2022-06-16 16:12:53 +0000	[diff] [blame]	1	// This uses the unstable overrideWrite interface to also emit all runtime
				2	// writes to a dedicated runtime file descriptor to catch and debug crash dumps.
				3	// See https://go-review.googlesource.com/c/go/+/278792 for details about the
				4	// interface. This interface is relatively special, refrain from using most Go
				5	// features in here as it might cause unexpected behavior. Especially yielding
				6	// is a bad idea as the scheduler might be in an inconsistent state. But using
				7	// this interface was judged to be vastly more maintenance-friendly than
				8	// attempting to parse out this information from a combined stderr.
				9	package main
				10
				11	import (
				12	"io"
				13	"os"
				14	"unsafe"
				15
				16	"golang.org/x/sys/unix"
				17
				18	"source.monogon.dev/metropolis/pkg/logtree"
				19	)
				20
				21	// This hooks into a global variable which is checked by runtime.write and used
				22	// instead of runtime.write1 if populated.
				23	//go:linkname overrideWrite runtime.overrideWrite
				24	var overrideWrite func(fd uintptr, p unsafe.Pointer, n int32) int32
				25
				26	// Contains the file into which runtime logs and crashes are written.
				27	var runtimeFd os.File
				28
				29	// This is essentially a reimplementation of the assembly function
				30	// runtime.write1, just with a hardcoded file descriptor and using the assembly
				31	// function unix.RawSyscall to not get a dependency on Go's calling convention
				32	// and needing an implementation for every architecture.
				33	//go:nosplit
				34	func runtimeWrite(fd uintptr, p unsafe.Pointer, n int32) int32 {
				35	_, _, err := unix.RawSyscall(unix.SYS_WRITE, runtimeFd.Fd(), uintptr(p), uintptr(n))
				36	if err != 0 {
				37	return int32(err)
				38	}
				39	// Also write to original FD
				40	_, _, err = unix.RawSyscall(unix.SYS_WRITE, fd, uintptr(p), uintptr(n))
				41	return int32(err)
				42	}
				43
				44	const runtimeLogPath = "/esp/core_runtime.log"
				45
				46	func initPanicHandler(lt *logtree.LogTree) {
				47	rl := lt.MustRawFor("panichandler")
				48	l := lt.MustLeveledFor("panichandler")
				49	runtimeLogFile, err := os.Open(runtimeLogPath)
				50	if err != nil && !os.IsNotExist(err) {
				51	l.Errorf("Failed to open runtimeLogFile: %v", err)
				52	}
				53	if err == nil {
				54	if _, err := io.Copy(rl, runtimeLogFile); err != nil {
				55	l.Errorf("Failed to log old persistent crash: %v", err)
				56	}
				57	runtimeLogFile.Close()
				58	if err := os.Remove(runtimeLogPath); err != nil {
				59	l.Errorf("Failed to delete old persistent runtime crash log: %v", err)
				60	}
				61	}
				62
				63	file, err := os.Create(runtimeLogPath)
				64	if err != nil {
				65	l.Errorf("Failed to open core runtime log file: %w", err)
				66	l.Warningf("Continuing without persistent panic storage.")
				67	return
				68	}
				69	runtimeFd = *file
				70	// Make sure the Fd is in blocking mode. Go's runtime opens all FDs in non-
				71	// blocking mode by default and switches them back once you get a reference
				72	// to the raw file descriptor to not break existing code. This switching
				73	// back is done on the first Fd() call and involves calls into the runtime
				74	// scheduler as it issues non-raw syscalls. Calling Fd() here makes sure
				75	// that these calls happen in a sane environment before any actual panic.
				76	// After this Fd() performs only memory accesses which is safe even when
				77	// panicing the runtime.
				78	// Keeping the raw fd is not possible as Go's runtime would eventually
				79	// garbage-collect the backing os.File and close it, so we must keep around
				80	// the actual os.File.
				81	_ = runtimeFd.Fd()
				82	// This could cause a data race if the runtime crashed while we're
				83	// initializing the crash handler, but there is no locking infrastructure
				84	// for this so we have to take that risk.
				85	overrideWrite = runtimeWrite
				86	return
				87	}