m/node: refactor panic handling
This change significantly changes how we handle panics and runtime
errors in our core process. The explicit panic handler is gone and has
been replaced by a file storing the panic persistently and
the informational message has been moved out to minit.
The runtime log file is stored on the ESP to allow for debugging if the
node crashes before unlocking and gets reset every boot. It also dumps
its previous state into the logtree to allow administrators to look into
these errors without launching another OS to dump the file.
Change-Id: I3503eeced2da0bbcb6301a6c39e502bbb9afa827
Reviewed-on: https://review.monogon.dev/c/monogon/+/772
Tested-by: Jenkins CI
Reviewed-by: Sergiusz Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/BUILD.bazel b/metropolis/node/core/BUILD.bazel
index f62982f..26ff5d5 100644
--- a/metropolis/node/core/BUILD.bazel
+++ b/metropolis/node/core/BUILD.bazel
@@ -6,6 +6,7 @@
     srcs = [
         "main.go",
         "mounts.go",
+        "panichandler.go",
         "pstore.go",
     ] + select({
         "//metropolis/node:debug_build": [
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index eb674ae..ca71fa7 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -21,7 +21,6 @@
 	"fmt"
 	"io"
 	"os"
-	"runtime/debug"
 
 	"golang.org/x/sys/unix"
 
@@ -38,22 +37,6 @@
 )
 
 func main() {
-	defer func() {
-		if r := recover(); r != nil {
-			fmt.Fprintf(os.Stderr, "\n\n")
-			fmt.Fprintf(os.Stderr, "  Metropolis encountered an uncorrectable error and this node must be restarted.\n")
-			fmt.Fprintf(os.Stderr, "  Core panicked: %v\n\n", r)
-			debug.PrintStack()
-		}
-		unix.Sync()
-		// TODO(lorenz): Switch this to Reboot when init panics are less likely.
-		if err := unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF); err != nil {
-			// Best effort, nothing we can do if this fails except printing the error to the
-			// console.
-			panic(fmt.Sprintf("failed to halt node: %v\n", err))
-		}
-	}()
-
 	// Set up basic mounts (like /dev, /sys...).
 	if err := setupMounts(); err != nil {
 		panic(fmt.Errorf("could not set up basic mounts: %w", err))
@@ -81,6 +64,8 @@
 			}
 		}(p, f)
 	}
+	// Initialize persistent panic handler early
+	initPanicHandler(lt)
 
 	// Initial logger. Used until we get to a supervisor.
 	logger := lt.MustLeveledFor("init")
diff --git a/metropolis/node/core/minit/main.c b/metropolis/node/core/minit/main.c
index cee0878..4677c8e 100644
--- a/metropolis/node/core/minit/main.c
+++ b/metropolis/node/core/minit/main.c
@@ -157,19 +157,18 @@
             cprintf("child status not EXITED nor SIGNALED: %d\n", status);
             exit_status = 1;
         }
-    }
 
-    // Direct child exited, let's also exit.
-    if (exit_status >= 0) {
-        cprintf("\n  Metropolis core exited with status: %d\n", exit_status);
-        sync();
-        if (exit_status != 0) {
-            cprintf("  Disks synced, rebooting in 30 seconds...\n", exit_status);
-            sleep(30);
-            cprintf("  Rebooting...\n\n", exit_status);
-        } else {
+        // Direct child exited, let's also exit.
+        if (exit_status >= 0) {
+            if (exit_status == 0) {
+                reboot(LINUX_REBOOT_CMD_RESTART);
+                return;
+            }
+            cprintf("\n Metropolis encountered an uncorrectable error and this node must be restarted.\n");
+            cprintf("core exit status: %d\n", exit_status);
+            sync();
             cprintf("  Disks synced, rebooting...\n\n");
+            reboot(LINUX_REBOOT_CMD_RESTART);
         }
-        reboot(LINUX_REBOOT_CMD_RESTART);
     }
 }
diff --git a/metropolis/node/core/panichandler.go b/metropolis/node/core/panichandler.go
new file mode 100644
index 0000000..97174f4
--- /dev/null
+++ b/metropolis/node/core/panichandler.go
@@ -0,0 +1,87 @@
+// This uses the unstable overrideWrite interface to also emit all runtime
+// writes to a dedicated runtime file descriptor to catch and debug crash dumps.
+// See https://go-review.googlesource.com/c/go/+/278792 for details about the
+// interface. This interface is relatively special, refrain from using most Go
+// features in here as it might cause unexpected behavior. Especially yielding
+// is a bad idea as the scheduler might be in an inconsistent state. But using
+// this interface was judged to be vastly more maintenance-friendly than
+// attempting to parse out this information from a combined stderr.
+package main
+
+import (
+	"io"
+	"os"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+
+	"source.monogon.dev/metropolis/pkg/logtree"
+)
+
+// This hooks into a global variable which is checked by runtime.write and used
+// instead of runtime.write1 if populated.
+//go:linkname overrideWrite runtime.overrideWrite
+var overrideWrite func(fd uintptr, p unsafe.Pointer, n int32) int32
+
+// Contains the file into which runtime logs and crashes are written.
+var runtimeFd os.File
+
+// This is essentially a reimplementation of the assembly function
+// runtime.write1, just with a hardcoded file descriptor and using the assembly
+// function unix.RawSyscall to not get a dependency on Go's calling convention
+// and needing an implementation for every architecture.
+//go:nosplit
+func runtimeWrite(fd uintptr, p unsafe.Pointer, n int32) int32 {
+	_, _, err := unix.RawSyscall(unix.SYS_WRITE, runtimeFd.Fd(), uintptr(p), uintptr(n))
+	if err != 0 {
+		return int32(err)
+	}
+	// Also write to original FD
+	_, _, err = unix.RawSyscall(unix.SYS_WRITE, fd, uintptr(p), uintptr(n))
+	return int32(err)
+}
+
+const runtimeLogPath = "/esp/core_runtime.log"
+
+func initPanicHandler(lt *logtree.LogTree) {
+	rl := lt.MustRawFor("panichandler")
+	l := lt.MustLeveledFor("panichandler")
+	runtimeLogFile, err := os.Open(runtimeLogPath)
+	if err != nil && !os.IsNotExist(err) {
+		l.Errorf("Failed to open runtimeLogFile: %v", err)
+	}
+	if err == nil {
+		if _, err := io.Copy(rl, runtimeLogFile); err != nil {
+			l.Errorf("Failed to log old persistent crash: %v", err)
+		}
+		runtimeLogFile.Close()
+		if err := os.Remove(runtimeLogPath); err != nil {
+			l.Errorf("Failed to delete old persistent runtime crash log: %v", err)
+		}
+	}
+
+	file, err := os.Create(runtimeLogPath)
+	if err != nil {
+		l.Errorf("Failed to open core runtime log file: %w", err)
+		l.Warningf("Continuing without persistent panic storage.")
+		return
+	}
+	runtimeFd = *file
+	// Make sure the Fd is in blocking mode. Go's runtime opens all FDs in non-
+	// blocking mode by default and switches them back once you get a reference
+	// to the raw file descriptor to not break existing code. This switching
+	// back is done on the first Fd() call and involves calls into the runtime
+	// scheduler as it issues non-raw syscalls. Calling Fd() here makes sure
+	// that these calls happen in a sane environment before any actual panic.
+	// After this Fd() performs only memory accesses which is safe even when
+	// panicing the runtime.
+	// Keeping the raw fd is not possible as Go's runtime would eventually
+	// garbage-collect the backing os.File and close it, so we must keep around
+	// the actual os.File.
+	_ = runtimeFd.Fd()
+	// This could cause a data race if the runtime crashed while we're
+	// initializing the crash handler, but there is no locking infrastructure
+	// for this so we have to take that risk.
+	overrideWrite = runtimeWrite
+	return
+}