metropolis: implement and use A/B preloader

This switches over from using the EFI built-in bootloader for A/B
updates to using our own EFI preloader due to significant issues with
in-the-wild EFI implementations.  It is a very minimal design relying
on a single Protobuf state file instead of EFI variables.

Change-Id: Ieebd0a8172ebe3f44c69b3e8c278c53d3fe2eeb4
Reviewed-on: https://review.monogon.dev/c/monogon/+/2203
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/update/BUILD.bazel b/metropolis/node/core/update/BUILD.bazel
index e506984..3be122e 100644
--- a/metropolis/node/core/update/BUILD.bazel
+++ b/metropolis/node/core/update/BUILD.bazel
@@ -1,4 +1,4 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
 
 go_library(
     name = "update",
@@ -7,6 +7,7 @@
     visibility = ["//visibility:public"],
     deps = [
         "//metropolis/node/build/mkimage/osimage",
+        "//metropolis/node/core/abloader/spec",
         "//metropolis/pkg/blockdev",
         "//metropolis/pkg/efivarfs",
         "//metropolis/pkg/gpt",
@@ -15,17 +16,7 @@
         "@com_github_cenkalti_backoff_v4//:backoff",
         "@org_golang_google_grpc//codes",
         "@org_golang_google_grpc//status",
+        "@org_golang_google_protobuf//proto",
         "@org_golang_x_sys//unix",
     ],
 )
-
-go_test(
-    name = "update_test",
-    srcs = ["update_test.go"],
-    embed = [":update"],
-    deps = [
-        "//metropolis/pkg/efivarfs",
-        "//metropolis/pkg/gpt",
-        "@com_github_google_uuid//:uuid",
-    ],
-)
diff --git a/metropolis/node/core/update/e2e/BUILD.bazel b/metropolis/node/core/update/e2e/BUILD.bazel
index 3905036..5f2b15f 100644
--- a/metropolis/node/core/update/e2e/BUILD.bazel
+++ b/metropolis/node/core/update/e2e/BUILD.bazel
@@ -9,6 +9,7 @@
         # For the initial image creation
         "//metropolis/node/core/update/e2e/testos:verity_rootfs_x",
         "//metropolis/node/core/update/e2e/testos:kernel_efi_x",
+        "//metropolis/node/core/abloader",
         # For the two update tests
         "//metropolis/node/core/update/e2e/testos:testos_bundle_y",
         "//metropolis/node/core/update/e2e/testos:testos_bundle_z",
diff --git a/metropolis/node/core/update/e2e/e2e_test.go b/metropolis/node/core/update/e2e/e2e_test.go
index 491259a..b5de3da 100644
--- a/metropolis/node/core/update/e2e/e2e_test.go
+++ b/metropolis/node/core/update/e2e/e2e_test.go
@@ -207,8 +207,18 @@
 	}
 	defer system.Close()
 
+	abloaderPath, err := datafile.ResolveRunfile("metropolis/node/core/abloader/abloader_bin.efi")
+	if err != nil {
+		t.Fatal(err)
+	}
+	loader, err := blkio.NewFileReader(abloaderPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+
 	if _, err := osimage.Create(&osimage.Params{
 		Output:      rootDisk,
+		ABLoader:    loader,
 		EFIPayload:  boot,
 		SystemImage: system,
 		PartitionSize: osimage.PartitionSizeInfo{
diff --git a/metropolis/node/core/update/update.go b/metropolis/node/core/update/update.go
index 75fe752..6486768 100644
--- a/metropolis/node/core/update/update.go
+++ b/metropolis/node/core/update/update.go
@@ -19,8 +19,10 @@
 	"golang.org/x/sys/unix"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
+	"google.golang.org/protobuf/proto"
 
 	"source.monogon.dev/metropolis/node/build/mkimage/osimage"
+	abloaderpb "source.monogon.dev/metropolis/node/core/abloader/spec"
 	"source.monogon.dev/metropolis/pkg/blockdev"
 	"source.monogon.dev/metropolis/pkg/efivarfs"
 	"source.monogon.dev/metropolis/pkg/gpt"
@@ -143,61 +145,6 @@
 	return res, nil
 }
 
-func (s *Service) getOrMakeBootEntry(existing map[int]*efivarfs.LoadOption, slot Slot) (int, error) {
-	idx, ok := s.findBootEntry(existing, slot)
-	if ok {
-		return idx, nil
-	}
-	newEntry := &efivarfs.LoadOption{
-		Description: fmt.Sprintf("Metropolis Slot %s", slot),
-		FilePath: efivarfs.DevicePath{
-			&efivarfs.HardDrivePath{
-				PartitionNumber:     s.ESPPartNumber,
-				PartitionStartBlock: s.ESPPart.FirstBlock,
-				PartitionSizeBlocks: s.ESPPart.SizeBlocks(),
-				PartitionMatch: efivarfs.PartitionGPT{
-					PartitionUUID: s.ESPPart.ID,
-				},
-			},
-			efivarfs.FilePath(slot.EFIBootPath()),
-		},
-	}
-	s.Logger.Infof("Recreated boot entry %s", newEntry.Description)
-	newIdx, err := efivarfs.AddBootEntry(newEntry)
-	if err == nil {
-		existing[newIdx] = newEntry
-	}
-	return newIdx, err
-}
-
-func (s *Service) findBootEntry(existing map[int]*efivarfs.LoadOption, slot Slot) (int, bool) {
-	for idx, e := range existing {
-		if len(e.FilePath) != 2 {
-			// Not our entry
-			continue
-		}
-		switch p := e.FilePath[0].(type) {
-		case *efivarfs.HardDrivePath:
-			gptMatch, ok := p.PartitionMatch.(efivarfs.PartitionGPT)
-			if !(ok && gptMatch.PartitionUUID == s.ESPPart.ID) {
-				// Not related to our ESP
-				continue
-			}
-		default:
-			continue
-		}
-		switch p := e.FilePath[1].(type) {
-		case efivarfs.FilePath:
-			if string(p) == slot.EFIBootPath() {
-				return idx, true
-			}
-		default:
-			continue
-		}
-	}
-	return 0, false
-}
-
 // MarkBootSuccessful must be called after each boot if some implementation-
 // defined criteria for a successful boot are met. If an update has been
 // installed and booted and this function is called, the updated version is
@@ -207,64 +154,24 @@
 	if s.ESPPath == "" {
 		return errors.New("no ESP information provided to update service, cannot continue")
 	}
-	bootEntries, err := s.getAllBootEntries()
-	if err != nil {
-		return fmt.Errorf("while getting boot entries: %w", err)
-	}
-	aIdx, err := s.getOrMakeBootEntry(bootEntries, SlotA)
-	if err != nil {
-		return fmt.Errorf("while ensuring slot A boot entry: %w", err)
-	}
-	bIdx, err := s.getOrMakeBootEntry(bootEntries, SlotB)
-	if err != nil {
-		return fmt.Errorf("while ensuring slot B boot entry: %w", err)
-	}
-
 	activeSlot := s.CurrentlyRunningSlot()
-	firstSlot := SlotInvalid
-
-	ord, err := efivarfs.GetBootOrder()
+	abState, err := s.getABState()
 	if err != nil {
-		return fmt.Errorf("failed to get boot order: %w", err)
-	}
-
-	for _, e := range ord {
-		if int(e) == aIdx {
-			firstSlot = SlotA
-			break
+		s.Logger.Warningf("Error while getting A/B loader state, recreating: %v", err)
+		abState = &abloaderpb.ABLoaderData{
+			ActiveSlot: abloaderpb.Slot(activeSlot),
 		}
-		if int(e) == bIdx {
-			firstSlot = SlotB
-			break
+		err := s.setABState(abState)
+		if err != nil {
+			return fmt.Errorf("while recreating A/B loader state: %w", err)
 		}
 	}
-
-	if firstSlot == SlotInvalid {
-		bootOrder := make(efivarfs.BootOrder, 2)
-		switch activeSlot {
-		case SlotA:
-			bootOrder[0], bootOrder[1] = uint16(aIdx), uint16(bIdx)
-		case SlotB:
-			bootOrder[0], bootOrder[1] = uint16(bIdx), uint16(aIdx)
-		default:
-			return fmt.Errorf("invalid active slot")
-		}
-		efivarfs.SetBootOrder(bootOrder)
-		s.Logger.Infof("Metropolis missing from boot order, recreated it")
-	} else if activeSlot != firstSlot {
-		var aPos, bPos int
-		for i, e := range ord {
-			if int(e) == aIdx {
-				aPos = i
-			}
-			if int(e) == bIdx {
-				bPos = i
-			}
-		}
-		// swap A and B slots in boot order
-		ord[aPos], ord[bPos] = ord[bPos], ord[aPos]
-		if err := efivarfs.SetBootOrder(ord); err != nil {
-			return fmt.Errorf("failed to set boot order to permanently switch slot: %w", err)
+	if Slot(abState.ActiveSlot) != activeSlot {
+		err := s.setABState(&abloaderpb.ABLoaderData{
+			ActiveSlot: abloaderpb.Slot(activeSlot),
+		})
+		if err != nil {
+			return fmt.Errorf("while setting next A/B slot: %w", err)
 		}
 		s.Logger.Infof("Permanently activated slot %v", activeSlot)
 	} else {
@@ -285,6 +192,29 @@
 	}
 }
 
+func (s *Service) getABState() (*abloaderpb.ABLoaderData, error) {
+	abDataRaw, err := os.ReadFile(filepath.Join(s.ESPPath, "EFI/metropolis/loader_state.pb"))
+	if err != nil {
+		return nil, err
+	}
+	var abData abloaderpb.ABLoaderData
+	if err := proto.Unmarshal(abDataRaw, &abData); err != nil {
+		return nil, err
+	}
+	return &abData, nil
+}
+
+func (s *Service) setABState(d *abloaderpb.ABLoaderData) error {
+	abDataRaw, err := proto.Marshal(d)
+	if err != nil {
+		return fmt.Errorf("while marshaling: %w", err)
+	}
+	if err := os.WriteFile(filepath.Join(s.ESPPath, "EFI/metropolis/loader_state.pb"), abDataRaw, 0666); err != nil {
+		return err
+	}
+	return nil
+}
+
 // InstallBundle installs the bundle at the given HTTP(S) URL into the currently
 // inactive slot and sets that slot to boot next. If it doesn't return an error,
 // a reboot boots into the new slot.
@@ -326,22 +256,6 @@
 	}
 	targetSlot := activeSlot.Other()
 
-	bootEntries, err := s.getAllBootEntries()
-	if err != nil {
-		return fmt.Errorf("while getting boot entries: %w", err)
-	}
-	targetSlotBootEntryIdx, err := s.getOrMakeBootEntry(bootEntries, targetSlot)
-	if err != nil {
-		return fmt.Errorf("while ensuring target slot boot entry: %w", err)
-	}
-	targetSlotBootEntry := bootEntries[targetSlotBootEntryIdx]
-
-	// Disable boot entry while the corresponding slot is being modified.
-	targetSlotBootEntry.Inactive = true
-	if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
-		return fmt.Errorf("failed setting boot entry %d inactive: %w", targetSlotBootEntryIdx, err)
-	}
-
 	systemPart, err := openSystemSlot(targetSlot)
 	if err != nil {
 		return status.Errorf(codes.Internal, "Inactive system slot unavailable: %v", err)
@@ -360,20 +274,17 @@
 		return fmt.Errorf("failed to write boot file: %w", err)
 	}
 
-	// Reenable target slot boot entry after boot and system have been written
-	// fully. The slot should now be bootable again.
-	targetSlotBootEntry.Inactive = false
-	if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
-		return fmt.Errorf("failed setting boot entry %d active: %w", targetSlotBootEntryIdx, err)
-	}
-
 	if withKexec {
 		if err := s.stageKexec(bootFile, targetSlot); err != nil {
 			return fmt.Errorf("while kexec staging: %w", err)
 		}
 	} else {
-		if err := efivarfs.SetBootNext(uint16(targetSlotBootEntryIdx)); err != nil {
-			return fmt.Errorf("failed to set BootNext variable: %w", err)
+		err := s.setABState(&abloaderpb.ABLoaderData{
+			ActiveSlot: abloaderpb.Slot(activeSlot),
+			NextSlot:   abloaderpb.Slot(targetSlot),
+		})
+		if err != nil {
+			return fmt.Errorf("while setting next A/B slot: %w", err)
 		}
 	}
 
diff --git a/metropolis/node/core/update/update_test.go b/metropolis/node/core/update/update_test.go
deleted file mode 100644
index 8206a22..0000000
--- a/metropolis/node/core/update/update_test.go
+++ /dev/null
@@ -1,127 +0,0 @@
-package update
-
-import (
-	"testing"
-
-	"github.com/google/uuid"
-
-	"source.monogon.dev/metropolis/pkg/efivarfs"
-	"source.monogon.dev/metropolis/pkg/gpt"
-)
-
-func TestFindBootEntry(t *testing.T) {
-	testUUID1 := uuid.MustParse("85cb7a0c-d31d-4b65-1111-919b069915f1")
-	testUUID2 := uuid.MustParse("d3086aa2-0327-4634-2222-5c6c8488cae3")
-	cases := []struct {
-		name        string
-		slot        Slot
-		espid       uuid.UUID
-		entries     map[int]*efivarfs.LoadOption
-		expectedOk  bool
-		expectedIdx int
-	}{
-		{
-			name:       "NoEntries",
-			slot:       SlotA,
-			espid:      testUUID1,
-			entries:    make(map[int]*efivarfs.LoadOption),
-			expectedOk: false,
-		},
-		{
-			name:  "FindSimple",
-			slot:  SlotB,
-			espid: testUUID1,
-			entries: map[int]*efivarfs.LoadOption{
-				5: &efivarfs.LoadOption{
-					Description: "Other Entry",
-					FilePath: efivarfs.DevicePath{
-						&efivarfs.HardDrivePath{
-							PartitionNumber: 1,
-							PartitionMatch: efivarfs.PartitionMBR{
-								DiskSignature: [4]byte{1, 2, 3, 4},
-							},
-						},
-						efivarfs.FilePath("EFI/something/else.efi"),
-					},
-				},
-				6: &efivarfs.LoadOption{
-					Description: "Completely different entry",
-					FilePath: efivarfs.DevicePath{
-						&efivarfs.UnknownPath{
-							// Vendor-specific subtype
-							TypeVal:    1,
-							SubTypeVal: 4,
-							DataVal:    []byte{1, 2, 3, 4},
-						},
-						efivarfs.FilePath("EFI/something"),
-						efivarfs.FilePath("else.efi"),
-					},
-				},
-				16: &efivarfs.LoadOption{
-					Description: "Target Entry",
-					FilePath: efivarfs.DevicePath{
-						&efivarfs.HardDrivePath{
-							PartitionNumber: 2,
-							PartitionMatch: efivarfs.PartitionGPT{
-								PartitionUUID: testUUID1,
-							},
-						},
-						efivarfs.FilePath("/EFI/metropolis/boot-b.efi"),
-					},
-				},
-			},
-			expectedOk:  true,
-			expectedIdx: 16,
-		},
-		{
-			name:  "FindViaESPUUID",
-			slot:  SlotA,
-			espid: testUUID1,
-			entries: map[int]*efivarfs.LoadOption{
-				6: &efivarfs.LoadOption{
-					Description: "Other ESP UUID",
-					FilePath: efivarfs.DevicePath{
-						&efivarfs.HardDrivePath{
-							PartitionNumber: 2,
-							PartitionMatch: efivarfs.PartitionGPT{
-								PartitionUUID: testUUID2,
-							},
-						},
-						efivarfs.FilePath("/EFI/metropolis/boot-a.efi"),
-					},
-				},
-				7: &efivarfs.LoadOption{
-					Description: "Target Entry",
-					FilePath: efivarfs.DevicePath{
-						&efivarfs.HardDrivePath{
-							PartitionNumber: 2,
-							PartitionMatch: efivarfs.PartitionGPT{
-								PartitionUUID: testUUID1,
-							},
-						},
-						efivarfs.FilePath("/EFI/metropolis/boot-a.efi"),
-					},
-				},
-			},
-			expectedOk:  true,
-			expectedIdx: 7,
-		},
-	}
-
-	for _, c := range cases {
-		t.Run(c.name, func(t *testing.T) {
-			s := Service{
-				ESPPart: &gpt.Partition{
-					ID: c.espid,
-				},
-			}
-			idx, ok := s.findBootEntry(c.entries, c.slot)
-			if ok != c.expectedOk {
-				t.Fatalf("expected ok %t, got %t", c.expectedOk, ok)
-			}
-			if idx != c.expectedIdx {
-				t.Fatalf("expected idx %d, got %d", c.expectedIdx, idx)
-			}
-		})
-	}
-}