m/node: use pstore for panic storage

The old solution never worked as the ESP was not mounted at that stage.
In general storing crash data there is suboptimal as it makes lots of
assumptions about the system state.

For kernel crashes we already use pstore and there is an interface for
storing userspace messages in pstore as well. Set up the panic handler
to put its logs in there and extend the pstore cleanup runnable to also
dump that part of pstore into the logtree after reboot.

In most cases this also requires a kernel patch as most pstore backends
to not allow userspace messages, probably to preserve limited space.
Since we always clean pstore after reboot, this should be fine.

Change-Id: I011109112e7bfd24d1772d5853a1d491c0cfd026
Reviewed-on: https://review.monogon.dev/c/monogon/+/2753
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/core/panichandler.go b/metropolis/node/core/panichandler.go
index 3f17ffc..7a6534b 100644
--- a/metropolis/node/core/panichandler.go
+++ b/metropolis/node/core/panichandler.go
@@ -9,7 +9,6 @@
 package main
 
 import (
-	"io"
 	"os"
 	"unsafe"
 
@@ -55,30 +54,13 @@
 	return int32(err)
 }
 
-const runtimeLogPath = "/esp/core_runtime.log"
-
 func initPanicHandler(lt *logtree.LogTree, consoles []console) {
-	rl := lt.MustRawFor("panichandler")
 	l := lt.MustLeveledFor("panichandler")
 
-	runtimeLogFile, err := os.Open(runtimeLogPath)
-	if err != nil && !os.IsNotExist(err) {
-		l.Errorf("Failed to open runtimeLogFile: %v", err)
-	}
-	if err == nil {
-		if _, err := io.Copy(rl, runtimeLogFile); err != nil {
-			l.Errorf("Failed to log old persistent crash: %v", err)
-		}
-		runtimeLogFile.Close()
-		if err := os.Remove(runtimeLogPath); err != nil {
-			l.Errorf("Failed to delete old persistent runtime crash log: %v", err)
-		}
-	}
-
-	// Setup ESP file.
-	fd, err := unix.Open(runtimeLogPath, os.O_CREATE|os.O_WRONLY, 0)
+	// Setup pstore userspace message buffer
+	fd, err := unix.Open("/dev/pmsg0", os.O_WRONLY, 0)
 	if err != nil {
-		l.Errorf("Failed to open core runtime log file: %v", err)
+		l.Errorf("Failed to open pstore userspace device (pstore probably unavailable): %v", err)
 		l.Warningf("Continuing without persistent panic storage.")
 	} else {
 		runtimeFds = append(runtimeFds, fd)
diff --git a/metropolis/node/core/pstore.go b/metropolis/node/core/pstore.go
index 857488a..c8863e5 100644
--- a/metropolis/node/core/pstore.go
+++ b/metropolis/node/core/pstore.go
@@ -23,6 +23,13 @@
 			logger.Warning(line)
 		}
 	}
+	userspaceLines, err := pstore.GetPmsgDump()
+	if err != nil {
+		logger.Errorf("Failed to recover userspace logs from pstore: %v", err)
+	}
+	for _, line := range userspaceLines {
+		logger.Warning(line)
+	}
 	cleanErr := pstore.ClearAll()
 	if cleanErr != nil {
 		logger.Errorf("Failed to clear pstore: %v", err)
diff --git a/metropolis/pkg/pstore/pstore.go b/metropolis/pkg/pstore/pstore.go
index b553c3c..1bfe586 100644
--- a/metropolis/pkg/pstore/pstore.go
+++ b/metropolis/pkg/pstore/pstore.go
@@ -73,6 +73,8 @@
 
 var dmesgFileRegexp = regexp.MustCompile("^dmesg-.*-([0-9]+)")
 
+var pmsgFileRegexp = regexp.MustCompile("^pmsg-.*-([0-9]+)")
+
 type pstoreDmesgFile struct {
 	hdr   pstoreDmesgHeader
 	ctime time.Time
@@ -85,6 +87,32 @@
 	return getKmsgDumpsFromFS(os.DirFS(CanonicalMountPath))
 }
 
+// GetPmsgDump returns lines written into /dev/pmsg0
+func GetPmsgDump() ([]string, error) {
+	var lines []string
+	pstoreEntries, err := os.ReadDir(CanonicalMountPath)
+	if err != nil {
+		return []string{}, fmt.Errorf("failed to list files in pstore: %w", err)
+	}
+	for _, entry := range pstoreEntries {
+		if !pmsgFileRegexp.MatchString(entry.Name()) {
+			continue
+		}
+		f, err := os.Open(filepath.Join(CanonicalMountPath, entry.Name()))
+		if err != nil {
+			return lines, fmt.Errorf("failed to open pstore entry file: %w", err)
+		}
+		// This only closes after all files have been read, but the number of
+		// files is heavily bound by very small amounts of pstore space.
+		defer f.Close()
+		s := bufio.NewScanner(f)
+		for s.Scan() {
+			lines = append(lines, s.Text())
+		}
+	}
+	return lines, nil
+}
+
 // f is injected here for testing
 func getKmsgDumpsFromFS(f fs.FS) ([]KmsgDump, error) {
 	var events []KmsgDump