m/node: use pstore for panic storage
The old solution never worked as the ESP was not mounted at that stage.
In general storing crash data there is suboptimal as it makes lots of
assumptions about the system state.
For kernel crashes we already use pstore and there is an interface for
storing userspace messages in pstore as well. Set up the panic handler
to put its logs in there and extend the pstore cleanup runnable to also
dump that part of pstore into the logtree after reboot.
In most cases this also requires a kernel patch as most pstore backends
to not allow userspace messages, probably to preserve limited space.
Since we always clean pstore after reboot, this should be fine.
Change-Id: I011109112e7bfd24d1772d5853a1d491c0cfd026
Reviewed-on: https://review.monogon.dev/c/monogon/+/2753
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/core/panichandler.go b/metropolis/node/core/panichandler.go
index 3f17ffc..7a6534b 100644
--- a/metropolis/node/core/panichandler.go
+++ b/metropolis/node/core/panichandler.go
@@ -9,7 +9,6 @@
package main
import (
- "io"
"os"
"unsafe"
@@ -55,30 +54,13 @@
return int32(err)
}
-const runtimeLogPath = "/esp/core_runtime.log"
-
func initPanicHandler(lt *logtree.LogTree, consoles []console) {
- rl := lt.MustRawFor("panichandler")
l := lt.MustLeveledFor("panichandler")
- runtimeLogFile, err := os.Open(runtimeLogPath)
- if err != nil && !os.IsNotExist(err) {
- l.Errorf("Failed to open runtimeLogFile: %v", err)
- }
- if err == nil {
- if _, err := io.Copy(rl, runtimeLogFile); err != nil {
- l.Errorf("Failed to log old persistent crash: %v", err)
- }
- runtimeLogFile.Close()
- if err := os.Remove(runtimeLogPath); err != nil {
- l.Errorf("Failed to delete old persistent runtime crash log: %v", err)
- }
- }
-
- // Setup ESP file.
- fd, err := unix.Open(runtimeLogPath, os.O_CREATE|os.O_WRONLY, 0)
+ // Setup pstore userspace message buffer
+ fd, err := unix.Open("/dev/pmsg0", os.O_WRONLY, 0)
if err != nil {
- l.Errorf("Failed to open core runtime log file: %v", err)
+ l.Errorf("Failed to open pstore userspace device (pstore probably unavailable): %v", err)
l.Warningf("Continuing without persistent panic storage.")
} else {
runtimeFds = append(runtimeFds, fd)
diff --git a/metropolis/node/core/pstore.go b/metropolis/node/core/pstore.go
index 857488a..c8863e5 100644
--- a/metropolis/node/core/pstore.go
+++ b/metropolis/node/core/pstore.go
@@ -23,6 +23,13 @@
logger.Warning(line)
}
}
+ userspaceLines, err := pstore.GetPmsgDump()
+ if err != nil {
+ logger.Errorf("Failed to recover userspace logs from pstore: %v", err)
+ }
+ for _, line := range userspaceLines {
+ logger.Warning(line)
+ }
cleanErr := pstore.ClearAll()
if cleanErr != nil {
logger.Errorf("Failed to clear pstore: %v", err)