metropolis: implement and use A/B preloader
This switches over from using the EFI built-in bootloader for A/B
updates to using our own EFI preloader due to significant issues with
in-the-wild EFI implementations. It is a very minimal design relying
on a single Protobuf state file instead of EFI variables.
Change-Id: Ieebd0a8172ebe3f44c69b3e8c278c53d3fe2eeb4
Reviewed-on: https://review.monogon.dev/c/monogon/+/2203
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/update/update.go b/metropolis/node/core/update/update.go
index 75fe752..6486768 100644
--- a/metropolis/node/core/update/update.go
+++ b/metropolis/node/core/update/update.go
@@ -19,8 +19,10 @@
"golang.org/x/sys/unix"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
+ "google.golang.org/protobuf/proto"
"source.monogon.dev/metropolis/node/build/mkimage/osimage"
+ abloaderpb "source.monogon.dev/metropolis/node/core/abloader/spec"
"source.monogon.dev/metropolis/pkg/blockdev"
"source.monogon.dev/metropolis/pkg/efivarfs"
"source.monogon.dev/metropolis/pkg/gpt"
@@ -143,61 +145,6 @@
return res, nil
}
-func (s *Service) getOrMakeBootEntry(existing map[int]*efivarfs.LoadOption, slot Slot) (int, error) {
- idx, ok := s.findBootEntry(existing, slot)
- if ok {
- return idx, nil
- }
- newEntry := &efivarfs.LoadOption{
- Description: fmt.Sprintf("Metropolis Slot %s", slot),
- FilePath: efivarfs.DevicePath{
- &efivarfs.HardDrivePath{
- PartitionNumber: s.ESPPartNumber,
- PartitionStartBlock: s.ESPPart.FirstBlock,
- PartitionSizeBlocks: s.ESPPart.SizeBlocks(),
- PartitionMatch: efivarfs.PartitionGPT{
- PartitionUUID: s.ESPPart.ID,
- },
- },
- efivarfs.FilePath(slot.EFIBootPath()),
- },
- }
- s.Logger.Infof("Recreated boot entry %s", newEntry.Description)
- newIdx, err := efivarfs.AddBootEntry(newEntry)
- if err == nil {
- existing[newIdx] = newEntry
- }
- return newIdx, err
-}
-
-func (s *Service) findBootEntry(existing map[int]*efivarfs.LoadOption, slot Slot) (int, bool) {
- for idx, e := range existing {
- if len(e.FilePath) != 2 {
- // Not our entry
- continue
- }
- switch p := e.FilePath[0].(type) {
- case *efivarfs.HardDrivePath:
- gptMatch, ok := p.PartitionMatch.(efivarfs.PartitionGPT)
- if !(ok && gptMatch.PartitionUUID == s.ESPPart.ID) {
- // Not related to our ESP
- continue
- }
- default:
- continue
- }
- switch p := e.FilePath[1].(type) {
- case efivarfs.FilePath:
- if string(p) == slot.EFIBootPath() {
- return idx, true
- }
- default:
- continue
- }
- }
- return 0, false
-}
-
// MarkBootSuccessful must be called after each boot if some implementation-
// defined criteria for a successful boot are met. If an update has been
// installed and booted and this function is called, the updated version is
@@ -207,64 +154,24 @@
if s.ESPPath == "" {
return errors.New("no ESP information provided to update service, cannot continue")
}
- bootEntries, err := s.getAllBootEntries()
- if err != nil {
- return fmt.Errorf("while getting boot entries: %w", err)
- }
- aIdx, err := s.getOrMakeBootEntry(bootEntries, SlotA)
- if err != nil {
- return fmt.Errorf("while ensuring slot A boot entry: %w", err)
- }
- bIdx, err := s.getOrMakeBootEntry(bootEntries, SlotB)
- if err != nil {
- return fmt.Errorf("while ensuring slot B boot entry: %w", err)
- }
-
activeSlot := s.CurrentlyRunningSlot()
- firstSlot := SlotInvalid
-
- ord, err := efivarfs.GetBootOrder()
+ abState, err := s.getABState()
if err != nil {
- return fmt.Errorf("failed to get boot order: %w", err)
- }
-
- for _, e := range ord {
- if int(e) == aIdx {
- firstSlot = SlotA
- break
+ s.Logger.Warningf("Error while getting A/B loader state, recreating: %v", err)
+ abState = &abloaderpb.ABLoaderData{
+ ActiveSlot: abloaderpb.Slot(activeSlot),
}
- if int(e) == bIdx {
- firstSlot = SlotB
- break
+ err := s.setABState(abState)
+ if err != nil {
+ return fmt.Errorf("while recreating A/B loader state: %w", err)
}
}
-
- if firstSlot == SlotInvalid {
- bootOrder := make(efivarfs.BootOrder, 2)
- switch activeSlot {
- case SlotA:
- bootOrder[0], bootOrder[1] = uint16(aIdx), uint16(bIdx)
- case SlotB:
- bootOrder[0], bootOrder[1] = uint16(bIdx), uint16(aIdx)
- default:
- return fmt.Errorf("invalid active slot")
- }
- efivarfs.SetBootOrder(bootOrder)
- s.Logger.Infof("Metropolis missing from boot order, recreated it")
- } else if activeSlot != firstSlot {
- var aPos, bPos int
- for i, e := range ord {
- if int(e) == aIdx {
- aPos = i
- }
- if int(e) == bIdx {
- bPos = i
- }
- }
- // swap A and B slots in boot order
- ord[aPos], ord[bPos] = ord[bPos], ord[aPos]
- if err := efivarfs.SetBootOrder(ord); err != nil {
- return fmt.Errorf("failed to set boot order to permanently switch slot: %w", err)
+ if Slot(abState.ActiveSlot) != activeSlot {
+ err := s.setABState(&abloaderpb.ABLoaderData{
+ ActiveSlot: abloaderpb.Slot(activeSlot),
+ })
+ if err != nil {
+ return fmt.Errorf("while setting next A/B slot: %w", err)
}
s.Logger.Infof("Permanently activated slot %v", activeSlot)
} else {
@@ -285,6 +192,29 @@
}
}
+func (s *Service) getABState() (*abloaderpb.ABLoaderData, error) {
+ abDataRaw, err := os.ReadFile(filepath.Join(s.ESPPath, "EFI/metropolis/loader_state.pb"))
+ if err != nil {
+ return nil, err
+ }
+ var abData abloaderpb.ABLoaderData
+ if err := proto.Unmarshal(abDataRaw, &abData); err != nil {
+ return nil, err
+ }
+ return &abData, nil
+}
+
+func (s *Service) setABState(d *abloaderpb.ABLoaderData) error {
+ abDataRaw, err := proto.Marshal(d)
+ if err != nil {
+ return fmt.Errorf("while marshaling: %w", err)
+ }
+ if err := os.WriteFile(filepath.Join(s.ESPPath, "EFI/metropolis/loader_state.pb"), abDataRaw, 0666); err != nil {
+ return err
+ }
+ return nil
+}
+
// InstallBundle installs the bundle at the given HTTP(S) URL into the currently
// inactive slot and sets that slot to boot next. If it doesn't return an error,
// a reboot boots into the new slot.
@@ -326,22 +256,6 @@
}
targetSlot := activeSlot.Other()
- bootEntries, err := s.getAllBootEntries()
- if err != nil {
- return fmt.Errorf("while getting boot entries: %w", err)
- }
- targetSlotBootEntryIdx, err := s.getOrMakeBootEntry(bootEntries, targetSlot)
- if err != nil {
- return fmt.Errorf("while ensuring target slot boot entry: %w", err)
- }
- targetSlotBootEntry := bootEntries[targetSlotBootEntryIdx]
-
- // Disable boot entry while the corresponding slot is being modified.
- targetSlotBootEntry.Inactive = true
- if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
- return fmt.Errorf("failed setting boot entry %d inactive: %w", targetSlotBootEntryIdx, err)
- }
-
systemPart, err := openSystemSlot(targetSlot)
if err != nil {
return status.Errorf(codes.Internal, "Inactive system slot unavailable: %v", err)
@@ -360,20 +274,17 @@
return fmt.Errorf("failed to write boot file: %w", err)
}
- // Reenable target slot boot entry after boot and system have been written
- // fully. The slot should now be bootable again.
- targetSlotBootEntry.Inactive = false
- if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
- return fmt.Errorf("failed setting boot entry %d active: %w", targetSlotBootEntryIdx, err)
- }
-
if withKexec {
if err := s.stageKexec(bootFile, targetSlot); err != nil {
return fmt.Errorf("while kexec staging: %w", err)
}
} else {
- if err := efivarfs.SetBootNext(uint16(targetSlotBootEntryIdx)); err != nil {
- return fmt.Errorf("failed to set BootNext variable: %w", err)
+ err := s.setABState(&abloaderpb.ABLoaderData{
+ ActiveSlot: abloaderpb.Slot(activeSlot),
+ NextSlot: abloaderpb.Slot(targetSlot),
+ })
+ if err != nil {
+ return fmt.Errorf("while setting next A/B slot: %w", err)
}
}