m/n/c/update: implement kexec-based activation
As we've had some issues with EFI-based slot activation and enterprise
server firmware is extremely slow, this implements kexec-based
activation. This just kexecs into the freshly-installed slot instead of
rebooting. It still updates the BootOrder on successful boot to allow
cold-boots if the server crashes or loses power, but no longer uses the
NextBoot mechanism to boot into the new slot once (this is taken care of
by kexec).
Change-Id: I6092c47d988634ba39fb6bdd7fd7ccd41ceb02ef
Reviewed-on: https://review.monogon.dev/c/monogon/+/2021
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/core/update/update.go b/metropolis/node/core/update/update.go
index 107f9cc..84a5db9 100644
--- a/metropolis/node/core/update/update.go
+++ b/metropolis/node/core/update/update.go
@@ -4,6 +4,7 @@
"archive/zip"
"bytes"
"context"
+ "debug/pe"
"errors"
"fmt"
"io"
@@ -12,8 +13,10 @@
"path/filepath"
"regexp"
"strconv"
+ "strings"
"github.com/cenkalti/backoff/v4"
+ "golang.org/x/sys/unix"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
@@ -21,6 +24,7 @@
"source.monogon.dev/metropolis/pkg/blockdev"
"source.monogon.dev/metropolis/pkg/efivarfs"
"source.monogon.dev/metropolis/pkg/gpt"
+ "source.monogon.dev/metropolis/pkg/kexec"
"source.monogon.dev/metropolis/pkg/logtree"
)
@@ -177,6 +181,7 @@
efivarfs.FilePath(slot.EFIBootPath()),
},
}
+ s.Logger.Infof("Recreated boot entry %s", newEntry.Description)
newIdx, err := efivarfs.AddBootEntry(newEntry)
if err == nil {
existing[newIdx] = newEntry
@@ -274,7 +279,7 @@
// InstallBundle installs the bundle at the given HTTP(S) URL into the currently
// inactive slot and sets that slot to boot next. If it doesn't return an error,
// a reboot boots into the new slot.
-func (s *Service) InstallBundle(ctx context.Context, bundleURL string) error {
+func (s *Service) InstallBundle(ctx context.Context, bundleURL string, withKexec bool) error {
if s.ESPPath == "" {
return errors.New("no ESP information provided to update service, cannot continue")
}
@@ -353,8 +358,14 @@
return fmt.Errorf("failed setting boot entry %d active: %w", targetSlotBootEntryIdx, err)
}
- if err := efivarfs.SetBootNext(uint16(targetSlotBootEntryIdx)); err != nil {
- return fmt.Errorf("failed to set BootNext variable: %w", err)
+ if withKexec {
+ if err := s.stageKexec(bootFile, targetSlot); err != nil {
+ return fmt.Errorf("while kexec staging: %w", err)
+ }
+ } else {
+ if err := efivarfs.SetBootNext(uint16(targetSlotBootEntryIdx)); err != nil {
+ return fmt.Errorf("failed to set BootNext variable: %w", err)
+ }
}
return nil
@@ -388,3 +399,62 @@
}
return nil
}
+
+// newMemfile creates a new file which is not located on a specific filesystem,
+// but is instead backed by anonymous memory.
+func newMemfile(name string, flags int) (*os.File, error) {
+ fd, err := unix.MemfdCreate(name, flags)
+ if err != nil {
+ return nil, fmt.Errorf("memfd_create: %w", err)
+ }
+ return os.NewFile(uintptr(fd), name), nil
+}
+
+// stageKexec stages the kernel, command line and initramfs if available for
+// a future kexec. It extracts the relevant data from the EFI boot executable.
+func (s *Service) stageKexec(bootFile io.ReaderAt, targetSlot Slot) error {
+ bootPE, err := pe.NewFile(bootFile)
+ if err != nil {
+ return fmt.Errorf("unable to open bootFile as PE: %w", err)
+ }
+ var cmdlineRaw []byte
+ cmdlineSection := bootPE.Section(".cmdline")
+ if cmdlineSection == nil {
+ return fmt.Errorf("no .cmdline section in boot PE")
+ }
+ cmdlineRaw, err = cmdlineSection.Data()
+ if err != nil {
+ return fmt.Errorf("while reading .cmdline PE section: %w", err)
+ }
+ cmdline := string(bytes.TrimRight(cmdlineRaw, "\x00"))
+ cmdline = strings.ReplaceAll(cmdline, "METROPOLIS-SYSTEM-X", fmt.Sprintf("METROPOLIS-SYSTEM-%s", targetSlot))
+ kernelFile, err := newMemfile("kernel", 0)
+ if err != nil {
+ return fmt.Errorf("failed to create kernel memfile: %w", err)
+ }
+ defer kernelFile.Close()
+ kernelSection := bootPE.Section(".linux")
+ if kernelSection == nil {
+ return fmt.Errorf("no .linux section in boot PE")
+ }
+ if _, err := io.Copy(kernelFile, kernelSection.Open()); err != nil {
+ return fmt.Errorf("while copying .linux PE section: %w", err)
+ }
+
+ initramfsSection := bootPE.Section(".initrd")
+ var initramfsFile *os.File
+ if initramfsSection != nil && initramfsSection.Size > 0 {
+ initramfsFile, err = newMemfile("initramfs", 0)
+ if err != nil {
+ return fmt.Errorf("failed to create initramfs memfile: %w", err)
+ }
+ defer initramfsFile.Close()
+ if _, err := io.Copy(initramfsFile, initramfsSection.Open()); err != nil {
+ return fmt.Errorf("while copying .initrd PE section: %w", err)
+ }
+ }
+ if err := kexec.FileLoad(kernelFile, initramfsFile, cmdline); err != nil {
+ return fmt.Errorf("while staging new kexec kernel: %w", err)
+ }
+ return nil
+}