m/n/core: fix panic logging

We fix a few issues:

 1. Logging to the runtime file descriptors didn't work for some
    reason. Opening the FD(s) manually works.
 2. We didn't log into consoles.
 3. We didn't return errors/results correctly. RawSyscall performs its
    own '>0' check on a syscall result and routes the result to either
    the first or last return value. We need to undo this check to return
    the same unified argument as runtime.write expects and
    runtime.write1 provides.

Change-Id: Ie718a47139dd0f700d53466a1250593025c9dcbd
Reviewed-on: https://review.monogon.dev/c/monogon/+/809
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index c6b0bd3..5fde55b 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -45,10 +45,9 @@
 
 	// Set up logger for Metropolis. Currently logs everything to /dev/tty0 and
 	// /dev/ttyS0.
+	consoles := []string{"/dev/tty0", "/dev/ttyS0"}
 	lt := logtree.New()
-	for _, p := range []string{
-		"/dev/tty0", "/dev/ttyS0",
-	} {
+	for _, p := range consoles {
 		f, err := os.OpenFile(p, os.O_WRONLY, 0)
 		if err != nil {
 			continue
@@ -66,7 +65,7 @@
 		}(p, f)
 	}
 	// Initialize persistent panic handler early
-	initPanicHandler(lt)
+	initPanicHandler(lt, consoles)
 
 	// Initial logger. Used until we get to a supervisor.
 	logger := lt.MustLeveledFor("init")
diff --git a/metropolis/node/core/panichandler.go b/metropolis/node/core/panichandler.go
index 97174f4..fe0e2e1 100644
--- a/metropolis/node/core/panichandler.go
+++ b/metropolis/node/core/panichandler.go
@@ -23,8 +23,8 @@
 //go:linkname overrideWrite runtime.overrideWrite
 var overrideWrite func(fd uintptr, p unsafe.Pointer, n int32) int32
 
-// Contains the file into which runtime logs and crashes are written.
-var runtimeFd os.File
+// Contains the files into which runtime logs and crashes are written.
+var runtimeFds []int
 
 // This is essentially a reimplementation of the assembly function
 // runtime.write1, just with a hardcoded file descriptor and using the assembly
@@ -32,20 +32,33 @@
 // and needing an implementation for every architecture.
 //go:nosplit
 func runtimeWrite(fd uintptr, p unsafe.Pointer, n int32) int32 {
-	_, _, err := unix.RawSyscall(unix.SYS_WRITE, runtimeFd.Fd(), uintptr(p), uintptr(n))
-	if err != 0 {
+	// Only redirect writes to stderr.
+	if fd != 2 {
+		a, _, err := unix.RawSyscall(unix.SYS_WRITE, fd, uintptr(p), uintptr(n))
+		if err == 0 {
+			return int32(a)
+		}
 		return int32(err)
 	}
-	// Also write to original FD
-	_, _, err = unix.RawSyscall(unix.SYS_WRITE, fd, uintptr(p), uintptr(n))
+	// Write to the runtime panic FDs.
+	for _, f := range runtimeFds {
+		_, _, _ = unix.RawSyscall(unix.SYS_WRITE, uintptr(f), uintptr(p), uintptr(n))
+	}
+
+	// Finally, write to original FD
+	a, _, err := unix.RawSyscall(unix.SYS_WRITE, fd, uintptr(p), uintptr(n))
+	if err == 0 {
+		return int32(a)
+	}
 	return int32(err)
 }
 
 const runtimeLogPath = "/esp/core_runtime.log"
 
-func initPanicHandler(lt *logtree.LogTree) {
+func initPanicHandler(lt *logtree.LogTree, consoles []string) {
 	rl := lt.MustRawFor("panichandler")
 	l := lt.MustLeveledFor("panichandler")
+
 	runtimeLogFile, err := os.Open(runtimeLogPath)
 	if err != nil && !os.IsNotExist(err) {
 		l.Errorf("Failed to open runtimeLogFile: %v", err)
@@ -60,25 +73,23 @@
 		}
 	}
 
-	file, err := os.Create(runtimeLogPath)
+	// Setup ESP file.
+	fd, err := unix.Open(runtimeLogPath, os.O_CREATE|os.O_WRONLY, 0)
 	if err != nil {
-		l.Errorf("Failed to open core runtime log file: %w", err)
+		l.Errorf("Failed to open core runtime log file: %v", err)
 		l.Warningf("Continuing without persistent panic storage.")
-		return
+	} else {
+		runtimeFds = append(runtimeFds, fd)
 	}
-	runtimeFd = *file
-	// Make sure the Fd is in blocking mode. Go's runtime opens all FDs in non-
-	// blocking mode by default and switches them back once you get a reference
-	// to the raw file descriptor to not break existing code. This switching
-	// back is done on the first Fd() call and involves calls into the runtime
-	// scheduler as it issues non-raw syscalls. Calling Fd() here makes sure
-	// that these calls happen in a sane environment before any actual panic.
-	// After this Fd() performs only memory accesses which is safe even when
-	// panicing the runtime.
-	// Keeping the raw fd is not possible as Go's runtime would eventually
-	// garbage-collect the backing os.File and close it, so we must keep around
-	// the actual os.File.
-	_ = runtimeFd.Fd()
+
+	for _, s := range consoles {
+		fd, err := unix.Open(s, os.O_WRONLY, 0)
+		if err == nil {
+			runtimeFds = append(runtimeFds, fd)
+			l.Infof("Panic console: %s", s)
+		}
+	}
+
 	// This could cause a data race if the runtime crashed while we're
 	// initializing the crash handler, but there is no locking infrastructure
 	// for this so we have to take that risk.