metropolis: implement A/B updates
This implements an A/B update mechanism using two slots, A and B.
This is realized with two system partitions as well as two EFI
loaders/kernels.
The A/B system relies on two EFI loader entries. This has the advantage
that there is no preloader required, which makes the system more
reliable as well as avoiding the complexity of having an un-updatable
preloader (CoreOS has this issue where their GRUB2 crashed booting newer
kernels, sadly the issue seems lost with the migration to Fedora
CoreOS). It also means that the operator can easily override the slot
being booted via the boot loader entries. Primary disadvantage is that
it relies on EFI working somewhat to spec.
New versions are booted into only once by setting NextBoot, if the
bootup doesn't succeed, i.e. if the boot doesn't get to a cluster rejoin
the next boot will be the old slot. Once it gets to this stage the
permanent BootOrder is changed.
The EFI loaders don't know if they are slot A or B because they are
identical and relying on OptionalData in the boot entry to indicate the
slot means that if the EFI boot entries go away, recovering is very hard.
Thus the loaders look at their own file name to determine what slot they
are in. If no slot could be determined, they default to booting slot A.
It is planned to eventually use Authenticode Stamping (passing data in
fake certificates) to stamp the slot into the loader without affecting
the TPM hash logged.
Change-Id: I40de2df8ff7ff660c17d2c97f3d9eb1bd4ddf5bc
Reviewed-on: https://review.monogon.dev/c/monogon/+/1874
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/update/BUILD.bazel b/metropolis/node/core/update/BUILD.bazel
new file mode 100644
index 0000000..4bb7915
--- /dev/null
+++ b/metropolis/node/core/update/BUILD.bazel
@@ -0,0 +1,18 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "update",
+ srcs = ["update.go"],
+ importpath = "source.monogon.dev/metropolis/node/core/update",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//metropolis/node/build/mkimage/osimage",
+ "//metropolis/pkg/blockdev",
+ "//metropolis/pkg/efivarfs",
+ "//metropolis/pkg/logtree",
+ "@com_github_cenkalti_backoff_v4//:backoff",
+ "@com_github_google_uuid//:uuid",
+ "@org_golang_google_grpc//codes",
+ "@org_golang_google_grpc//status",
+ ],
+)
diff --git a/metropolis/node/core/update/e2e/BUILD.bazel b/metropolis/node/core/update/e2e/BUILD.bazel
new file mode 100644
index 0000000..3905036
--- /dev/null
+++ b/metropolis/node/core/update/e2e/BUILD.bazel
@@ -0,0 +1,22 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_test")
+
+go_test(
+ name = "e2e_test",
+ srcs = ["e2e_test.go"],
+ data = [
+ # For emulation
+ "//third_party/edk2:firmware",
+ # For the initial image creation
+ "//metropolis/node/core/update/e2e/testos:verity_rootfs_x",
+ "//metropolis/node/core/update/e2e/testos:kernel_efi_x",
+ # For the two update tests
+ "//metropolis/node/core/update/e2e/testos:testos_bundle_y",
+ "//metropolis/node/core/update/e2e/testos:testos_bundle_z",
+ ],
+ deps = [
+ "//metropolis/cli/pkg/datafile",
+ "//metropolis/node/build/mkimage/osimage",
+ "//metropolis/pkg/blkio",
+ "//metropolis/pkg/blockdev",
+ ],
+)
diff --git a/metropolis/node/core/update/e2e/e2e_test.go b/metropolis/node/core/update/e2e/e2e_test.go
new file mode 100644
index 0000000..7524d2a
--- /dev/null
+++ b/metropolis/node/core/update/e2e/e2e_test.go
@@ -0,0 +1,231 @@
+package e2e
+
+import (
+ "bufio"
+ "context"
+ "fmt"
+ "io"
+ "net"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "regexp"
+ "strings"
+ "sync"
+ "testing"
+ "time"
+
+ "source.monogon.dev/metropolis/cli/pkg/datafile"
+ "source.monogon.dev/metropolis/node/build/mkimage/osimage"
+ "source.monogon.dev/metropolis/pkg/blkio"
+ "source.monogon.dev/metropolis/pkg/blockdev"
+)
+
+const Mi = 1024 * 1024
+
+var variantRegexp = regexp.MustCompile(`TESTOS_VARIANT=([A-Z])`)
+
+func runAndCheckVariant(t *testing.T, expectedVariant string, qemuArgs []string) {
+ t.Helper()
+ ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+ defer cancel()
+ qemuCmdLaunch := exec.CommandContext(ctx, "qemu-system-x86_64", qemuArgs...)
+ stdoutPipe, err := qemuCmdLaunch.StdoutPipe()
+ if err != nil {
+ t.Fatal(err)
+ }
+ stderrPipe, err := qemuCmdLaunch.StderrPipe()
+ if err != nil {
+ t.Fatal(err)
+ }
+ testosStarted := make(chan string, 1)
+ go func() {
+ s := bufio.NewScanner(stdoutPipe)
+ for s.Scan() {
+ if strings.HasPrefix(s.Text(), "[") {
+ continue
+ }
+ errIdx := strings.Index(s.Text(), "Error installing new bundle")
+ if errIdx != -1 {
+ t.Error(s.Text()[errIdx:])
+ }
+ t.Log("vm: " + s.Text())
+ if m := variantRegexp.FindStringSubmatch(s.Text()); len(m) == 2 {
+ select {
+ case testosStarted <- m[1]:
+ default:
+ }
+ }
+ }
+ }()
+ go func() {
+ s := bufio.NewScanner(stderrPipe)
+ for s.Scan() {
+ if strings.HasPrefix(s.Text(), "[") {
+ continue
+ }
+ t.Log("qemu: " + s.Text())
+ }
+ }()
+ if err := qemuCmdLaunch.Start(); err != nil {
+ t.Fatal(err)
+ }
+ procExit := make(chan error)
+ go func() {
+ procExit <- qemuCmdLaunch.Wait()
+ close(procExit)
+ }()
+ select {
+ case variant := <-testosStarted:
+ if variant != expectedVariant {
+ t.Fatalf("expected variant %s to launch, got %s", expectedVariant, variant)
+ }
+ select {
+ case <-procExit:
+ return
+ case <-ctx.Done():
+ t.Log("Canceled VM")
+ cancel()
+ <-procExit
+ return
+ }
+ case err := <-procExit:
+ t.Fatalf("QEMU exited unexpectedly: %v", err)
+ return
+ case <-ctx.Done():
+ t.Fatalf("Waiting for TestOS variant %s launch timed out", expectedVariant)
+ }
+}
+
+func TestABUpdateSequence(t *testing.T) {
+ blobAddr := net.TCPAddr{
+ IP: net.IPv4(10, 42, 0, 5),
+ Port: 80,
+ }
+
+ var nextBundlePathToInstall string
+ var nbpMutex sync.Mutex
+
+ m := http.NewServeMux()
+ bundleYPath, err := datafile.ResolveRunfile("metropolis/node/core/update/e2e/testos/testos_bundle_y.zip")
+ if err != nil {
+ t.Fatal(err)
+ }
+ bundleZPath, err := datafile.ResolveRunfile("metropolis/node/core/update/e2e/testos/testos_bundle_z.zip")
+ if err != nil {
+ t.Fatal(err)
+ }
+ m.HandleFunc("/bundle.bin", func(w http.ResponseWriter, req *http.Request) {
+ nbpMutex.Lock()
+ bundleFilePath := nextBundlePathToInstall
+ nbpMutex.Unlock()
+ if bundleFilePath == "" {
+ w.WriteHeader(http.StatusBadRequest)
+ w.Write([]byte("No next bundle set in the test harness"))
+ }
+ http.ServeFile(w, req, bundleFilePath)
+ })
+ blobLis, err := net.Listen("tcp", "127.0.0.1:0")
+ if err != nil {
+ t.Fatal(err)
+ }
+ blobListenAddr := blobLis.Addr().(*net.TCPAddr)
+ go http.Serve(blobLis, m)
+
+ rootDevPath := filepath.Join(t.TempDir(), "root.img")
+ // Make a 512 bytes * 2Mi = 1Gi file-backed block device
+ rootDisk, err := blockdev.CreateFile(rootDevPath, 512, 2097152)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.Remove(rootDevPath)
+ defer rootDisk.Close()
+
+ ovmfVarsPath, err := datafile.ResolveRunfile("external/edk2/OVMF_VARS.fd")
+ if err != nil {
+ t.Fatal(err)
+ }
+ ovmfCodePath, err := datafile.ResolveRunfile("external/edk2/OVMF_CODE.fd")
+ if err != nil {
+ t.Fatal(err)
+ }
+ bootPath, err := datafile.ResolveRunfile("metropolis/node/core/update/e2e/testos/kernel_efi_x.efi")
+ if err != nil {
+ t.Fatal(err)
+ }
+ boot, err := blkio.NewFileReader(bootPath)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer boot.Close()
+ systemXPath, err := datafile.ResolveRunfile("metropolis/node/core/update/e2e/testos/verity_rootfs_x.img")
+ if err != nil {
+ t.Fatal(err)
+ }
+ system, err := os.Open(systemXPath)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer system.Close()
+
+ if _, err := osimage.Create(&osimage.Params{
+ Output: rootDisk,
+ EFIPayload: boot,
+ SystemImage: system,
+ PartitionSize: osimage.PartitionSizeInfo{
+ ESP: 128,
+ System: 256,
+ Data: 10,
+ },
+ }); err != nil {
+ t.Fatalf("unable to generate starting point image: %v", err)
+ }
+ rootDisk.Close()
+
+ blobGuestFwd := fmt.Sprintf("guestfwd=tcp:%s-tcp:127.0.0.1:%d", blobAddr.String(), blobListenAddr.Port)
+
+ ovmfVars, err := os.CreateTemp("", "agent-ovmf-vars")
+ if err != nil {
+ t.Fatal(err)
+ }
+ ovmfVarsTmpl, err := os.Open(ovmfVarsPath)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := io.Copy(ovmfVars, ovmfVarsTmpl); err != nil {
+ t.Fatal(err)
+ }
+
+ qemuArgs := []string{
+ "-machine", "q35", "-accel", "kvm", "-nographic", "-nodefaults", "-m", "1024",
+ "-cpu", "max", "-smp", "sockets=1,cpus=1,cores=2,threads=2,maxcpus=4",
+ "-drive", "if=pflash,format=raw,readonly=on,file=" + ovmfCodePath,
+ "-drive", "if=pflash,format=raw,file=" + ovmfVars.Name(),
+ "-drive", "if=virtio,format=raw,cache=unsafe,file=" + rootDevPath,
+ "-netdev", fmt.Sprintf("user,id=net0,net=10.42.0.0/24,dhcpstart=10.42.0.10,%s", blobGuestFwd),
+ "-device", "virtio-net-pci,netdev=net0,mac=22:d5:8e:76:1d:07",
+ "-device", "virtio-rng-pci",
+ "-serial", "stdio",
+ "-trace", "pflash*",
+ "-no-reboot",
+ }
+ // Install Bundle Y next
+ nbpMutex.Lock()
+ nextBundlePathToInstall = bundleYPath
+ nbpMutex.Unlock()
+
+ t.Log("Launching X image to install Y")
+ runAndCheckVariant(t, "X", qemuArgs)
+
+ // Install Bundle Z next
+ nbpMutex.Lock()
+ nextBundlePathToInstall = bundleZPath
+ nbpMutex.Unlock()
+
+ t.Log("Launching Y on slot B to install Z on slot A")
+ runAndCheckVariant(t, "Y", qemuArgs)
+
+ t.Log("Launching Z on slot A")
+ runAndCheckVariant(t, "Z", qemuArgs)
+}
diff --git a/metropolis/node/core/update/e2e/testos/BUILD.bazel b/metropolis/node/core/update/e2e/testos/BUILD.bazel
new file mode 100644
index 0000000..79fd0f9
--- /dev/null
+++ b/metropolis/node/core/update/e2e/testos/BUILD.bazel
@@ -0,0 +1,31 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load(":testos.bzl", "testos")
+
+testos(variant = "x")
+
+testos(variant = "y")
+
+testos(variant = "z")
+
+go_library(
+ name = "testos_lib",
+ srcs = ["main.go"],
+ importpath = "source.monogon.dev/metropolis/node/core/update/e2e/testos",
+ visibility = ["//visibility:private"],
+ deps = [
+ "//metropolis/node/build/mkimage/osimage",
+ "//metropolis/node/core/network",
+ "//metropolis/node/core/update",
+ "//metropolis/pkg/blockdev",
+ "//metropolis/pkg/gpt",
+ "//metropolis/pkg/logtree",
+ "//metropolis/pkg/supervisor",
+ "@org_golang_x_sys//unix",
+ ],
+)
+
+go_binary(
+ name = "testos",
+ embed = [":testos_lib"],
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/node/core/update/e2e/testos/main.go b/metropolis/node/core/update/e2e/testos/main.go
new file mode 100644
index 0000000..b780d17
--- /dev/null
+++ b/metropolis/node/core/update/e2e/testos/main.go
@@ -0,0 +1,137 @@
+package main
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "time"
+
+ "golang.org/x/sys/unix"
+
+ "source.monogon.dev/metropolis/node/build/mkimage/osimage"
+ "source.monogon.dev/metropolis/node/core/network"
+ "source.monogon.dev/metropolis/node/core/update"
+ "source.monogon.dev/metropolis/pkg/blockdev"
+ "source.monogon.dev/metropolis/pkg/gpt"
+ "source.monogon.dev/metropolis/pkg/logtree"
+ "source.monogon.dev/metropolis/pkg/supervisor"
+)
+
+var Variant = "U"
+
+func mkdirAndMount(dir, fs string, flags uintptr) error {
+ if err := os.MkdirAll(dir, 0o755); err != nil {
+ return fmt.Errorf("could not make %s: %w", dir, err)
+ }
+ if err := unix.Mount(fs, dir, fs, flags, ""); err != nil {
+ return fmt.Errorf("could not mount %s on %s: %w", fs, dir, err)
+ }
+ return nil
+}
+
+// setupMounts sets up basic mounts like sysfs, procfs, devtmpfs and cgroups.
+// This should be called early during init as a lot of processes depend on this
+// being available.
+func setupMounts() error {
+ // Set up target filesystems.
+ for _, el := range []struct {
+ dir string
+ fs string
+ flags uintptr
+ }{
+ {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/sys/kernel/tracing", "tracefs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/sys/fs/pstore", "pstore", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/sys/firmware/efi/efivars", "efivarfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
+ {"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
+ {"/tmp", "tmpfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ } {
+ if err := mkdirAndMount(el.dir, el.fs, el.flags); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func main() {
+ if err := setupMounts(); err != nil {
+ fmt.Printf("early init error, stopping: %v\n", err)
+ unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF)
+ return
+ }
+ lt := logtree.New()
+ f, err := os.OpenFile("/dev/ttyS0", os.O_WRONLY, 0)
+ if err != nil {
+ fmt.Printf("early init error, stopping: %v\n", err)
+ unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF)
+ return
+ }
+ reader, err := lt.Read("", logtree.WithChildren(), logtree.WithStream())
+ if err != nil {
+ fmt.Printf("early init error, stopping: %v\n", err)
+ unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF)
+ return
+ }
+
+ sCtx := context.Background()
+ supervisor.New(sCtx, testosRunnable, supervisor.WithExistingLogtree(lt))
+
+ for {
+ p := <-reader.Stream
+ fmt.Fprintf(f, "%s\n", p.String())
+ }
+}
+
+func testosRunnable(ctx context.Context) error {
+ supervisor.Logger(ctx).Info("TESTOS_VARIANT=" + Variant)
+ networkSvc := network.New(nil)
+ networkSvc.DHCPVendorClassID = "dev.monogon.testos.v1"
+ supervisor.Run(ctx, "networking", networkSvc.Run)
+
+ vda, err := blockdev.Open("/dev/vda")
+ if err != nil {
+ return fmt.Errorf("unable to open root device: %w", err)
+ }
+ defer vda.Close()
+ vdaParts, err := gpt.Read(vda)
+ if err != nil {
+ return fmt.Errorf("unable to read GPT from root device: %w", err)
+ }
+
+ updateSvc := update.Service{
+ Logger: supervisor.MustSubLogger(ctx, "update"),
+ }
+ for pn, p := range vdaParts.Partitions {
+ switch p.Type {
+ case gpt.PartitionTypeEFISystem:
+ if err := unix.Mount(fmt.Sprintf("/dev/vda%d", pn+1), "/esp", "vfat", unix.MS_SYNC, ""); err != nil {
+ return fmt.Errorf("unable to mkdir ESP mountpoint: %w", err)
+ }
+ updateSvc.ProvideESP("/esp", p.ID, uint32(pn+1))
+ case osimage.SystemAType:
+ if err := unix.Symlink(fmt.Sprintf("/dev/vda%d", pn+1), "/dev/system-a"); err != nil {
+ return fmt.Errorf("failed to symlink system-a: %w", err)
+ }
+ case osimage.SystemBType:
+ if err := unix.Symlink(fmt.Sprintf("/dev/vda%d", pn+1), "/dev/system-b"); err != nil {
+ return fmt.Errorf("failed to symlink system-b: %w", err)
+ }
+ }
+ }
+ if err := updateSvc.MarkBootSuccessful(); err != nil {
+ supervisor.Logger(ctx).Errorf("error marking boot successful: %w", err)
+ }
+ if Variant != "Z" {
+ if err := updateSvc.InstallBundle(ctx, "http://10.42.0.5:80/bundle.bin"); err != nil {
+ supervisor.Logger(ctx).Errorf("Error installing new bundle: %v", err)
+ }
+ }
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ supervisor.Logger(ctx).Info("Installed bundle successfully, powering off")
+ unix.Sync()
+ time.Sleep(1 * time.Second)
+ unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF)
+ return nil
+}
diff --git a/metropolis/node/core/update/e2e/testos/rootfs.fsspec b/metropolis/node/core/update/e2e/testos/rootfs.fsspec
new file mode 100644
index 0000000..03ec153
--- /dev/null
+++ b/metropolis/node/core/update/e2e/testos/rootfs.fsspec
@@ -0,0 +1,20 @@
+directory <
+ path: "/sys"
+ mode: 0555 uid: 0 gid: 0
+>
+directory <
+ path: "/proc"
+ mode: 0555 uid: 0 gid: 0
+>
+directory <
+ path: "/dev"
+ mode: 0555 uid: 0 gid: 0
+>
+directory <
+ path: "/esp"
+ mode: 0555 uid: 0 gid: 0
+>
+directory <
+ path: "/tmp"
+ mode: 0555 uid: 0 gid: 0
+>
diff --git a/metropolis/node/core/update/e2e/testos/testos.bzl b/metropolis/node/core/update/e2e/testos/testos.bzl
new file mode 100644
index 0000000..a123ea6
--- /dev/null
+++ b/metropolis/node/core/update/e2e/testos/testos.bzl
@@ -0,0 +1,63 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary")
+load("//metropolis/node/build:def.bzl", "erofs_image", "verity_image")
+load("//metropolis/node/build:efi.bzl", "efi_unified_kernel_image")
+load("@rules_pkg//:pkg.bzl", "pkg_zip")
+load("@rules_pkg//:mappings.bzl", "pkg_files")
+
+# Macro for generating multiple TestOS instances to check if the updater works.
+def testos(variant):
+ erofs_image(
+ name = "rootfs_" + variant,
+ files = {
+ ":testos_" + variant: "/init",
+ "//metropolis/node/core/network/dns:resolv.conf": "/etc/resolv.conf",
+ "@com_github_coredns_coredns//:coredns": "/kubernetes/bin/coredns",
+ },
+ fsspecs = [
+ "//metropolis/node/build:earlydev.fsspec",
+ ":rootfs.fsspec",
+ ],
+ )
+
+ verity_image(
+ name = "verity_rootfs_" + variant,
+ source = ":rootfs_" + variant,
+ visibility = ["//metropolis/node/core/update/e2e:__pkg__"],
+ )
+
+ efi_unified_kernel_image(
+ name = "kernel_efi_" + variant,
+ cmdline = "console=ttyS0 init=/init",
+ kernel = "//third_party/linux",
+ verity = ":verity_rootfs_" + variant,
+ visibility = ["//metropolis/node/core/update/e2e:__pkg__"],
+ )
+
+ # An intermediary "bundle" format until we finalize the actual bundle format. This is NOT stable until migrated
+ # to the actual bundle format.
+ # TODO(lorenz): Replace this
+ pkg_files(
+ name = "testos_bundle_files_" + variant,
+ srcs = [
+ ":kernel_efi_" + variant,
+ ":verity_rootfs_" + variant,
+ ],
+ renames = {
+ ":kernel_efi_" + variant: "kernel_efi.efi",
+ ":verity_rootfs_" + variant: "verity_rootfs.img",
+ },
+ )
+ pkg_zip(
+ name = "testos_bundle_" + variant,
+ srcs = [
+ ":testos_bundle_files_" + variant,
+ ],
+ visibility = ["//metropolis/node/core/update/e2e:__pkg__"],
+ )
+
+ go_binary(
+ name = "testos_" + variant,
+ embed = [":testos_lib"],
+ visibility = ["//visibility:public"],
+ x_defs = {"source.monogon.dev/metropolis/node/core/update/e2e/testos.Variant": variant.upper()},
+ )
diff --git a/metropolis/node/core/update/update.go b/metropolis/node/core/update/update.go
new file mode 100644
index 0000000..92e2e88
--- /dev/null
+++ b/metropolis/node/core/update/update.go
@@ -0,0 +1,387 @@
+package update
+
+import (
+ "archive/zip"
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strconv"
+
+ "github.com/cenkalti/backoff/v4"
+ "github.com/google/uuid"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
+
+ "source.monogon.dev/metropolis/node/build/mkimage/osimage"
+ "source.monogon.dev/metropolis/pkg/blockdev"
+ "source.monogon.dev/metropolis/pkg/efivarfs"
+ "source.monogon.dev/metropolis/pkg/logtree"
+)
+
+// Service contains data and functionality to perform A/B updates on a
+// Metropolis node.
+type Service struct {
+ // Path to the mount point of the EFI System Partition (ESP).
+ ESPPath string
+ // UUID of the ESP System Partition.
+ ESPUUID uuid.UUID
+ // Partition number (1-based) of the ESP in the GPT partitions array.
+ ESPPartNumber uint32
+ // Logger service for the update service.
+ Logger logtree.LeveledLogger
+}
+
+type Slot int
+
+const (
+ SlotInvalid Slot = 0
+ SlotA Slot = 1
+ SlotB Slot = 2
+)
+
+// Other returns the "other" slot, i.e. returns slot A for B and B for A.
+// It returns SlotInvalid for any s which is not SlotA or SlotB.
+func (s Slot) Other() Slot {
+ switch s {
+ case SlotA:
+ return SlotB
+ case SlotB:
+ return SlotA
+ default:
+ return SlotInvalid
+ }
+}
+
+func (s Slot) String() string {
+ switch s {
+ case SlotA:
+ return "A"
+ case SlotB:
+ return "B"
+ default:
+ return "<invalid slot>"
+ }
+}
+
+func (s Slot) EFIBootPath() string {
+ switch s {
+ case SlotA:
+ return osimage.EFIBootAPath
+ case SlotB:
+ return osimage.EFIBootBPath
+ default:
+ return ""
+ }
+}
+
+var slotRegexp = regexp.MustCompile(`PARTLABEL=METROPOLIS-SYSTEM-([AB])`)
+
+// ProvideESP is a convenience function for providing information about the
+// ESP after the update service has been instantiated.
+func (s *Service) ProvideESP(path string, partUUID uuid.UUID, partNum uint32) {
+ s.ESPPath = path
+ s.ESPPartNumber = partNum
+ s.ESPUUID = partUUID
+}
+
+// CurrentlyRunningSlot returns the slot the current system is booted from.
+func (s *Service) CurrentlyRunningSlot() Slot {
+ cmdline, err := os.ReadFile("/proc/cmdline")
+ if err != nil {
+ return SlotInvalid
+ }
+ slotMatches := slotRegexp.FindStringSubmatch(string(cmdline))
+ if len(slotMatches) != 2 {
+ return SlotInvalid
+ }
+ switch slotMatches[1] {
+ case "A":
+ return SlotA
+ case "B":
+ return SlotB
+ default:
+ panic("unreachable")
+ }
+}
+
+var bootVarRegexp = regexp.MustCompile(`^Boot([0-9A-Fa-f]{4})$`)
+
+func (s *Service) getAllBootEntries() (map[int]*efivarfs.LoadOption, error) {
+ res := make(map[int]*efivarfs.LoadOption)
+ varNames, err := efivarfs.List(efivarfs.ScopeGlobal)
+ if err != nil {
+ return nil, fmt.Errorf("failed to list EFI variables: %w", err)
+ }
+ for _, varName := range varNames {
+ m := bootVarRegexp.FindStringSubmatch(varName)
+ if m == nil {
+ continue
+ }
+ idx, err := strconv.ParseUint(m[1], 16, 16)
+ if err != nil {
+ // This cannot be hit as all regexp matches are parseable.
+ panic(err)
+ }
+ e, err := efivarfs.GetBootEntry(int(idx))
+ if err != nil {
+ return nil, fmt.Errorf("failed to get boot entry %d: %w", idx, err)
+ }
+ res[int(idx)] = e
+ }
+ return res, nil
+}
+
+func (s *Service) getOrMakeBootEntry(existing map[int]*efivarfs.LoadOption, slot Slot) (int, error) {
+ for idx, e := range existing {
+ if len(e.FilePath) != 2 {
+ // Not our entry
+ continue
+ }
+ switch p := e.FilePath[0].(type) {
+ case *efivarfs.HardDrivePath:
+ gptMatch, ok := p.PartitionMatch.(*efivarfs.PartitionGPT)
+ if ok && gptMatch.PartitionUUID != s.ESPUUID {
+ // Not related to our ESP
+ continue
+ }
+ default:
+ continue
+ }
+ switch p := e.FilePath[1].(type) {
+ case efivarfs.FilePath:
+ if string(p) == slot.EFIBootPath() {
+ return idx, nil
+ }
+ default:
+ continue
+ }
+ }
+ newEntry := &efivarfs.LoadOption{
+ Description: fmt.Sprintf("Metropolis Slot %s", slot),
+ FilePath: efivarfs.DevicePath{
+ &efivarfs.HardDrivePath{
+ PartitionNumber: s.ESPPartNumber,
+ PartitionMatch: efivarfs.PartitionGPT{
+ PartitionUUID: s.ESPUUID,
+ },
+ },
+ efivarfs.FilePath(slot.EFIBootPath()),
+ },
+ }
+ newIdx, err := efivarfs.AddBootEntry(newEntry)
+ if err == nil {
+ existing[newIdx] = newEntry
+ }
+ return newIdx, err
+}
+
+// MarkBootSuccessful must be called after each boot if some implementation-
+// defined criteria for a successful boot are met. If an update has been
+// installed and booted and this function is called, the updated version is
+// marked as default. If an issue occurs during boot and so this function is
+// not called the old version will be started again on next boot.
+func (s *Service) MarkBootSuccessful() error {
+ if s.ESPPath == "" {
+ return errors.New("no ESP information provided to update service, cannot continue")
+ }
+ bootEntries, err := s.getAllBootEntries()
+ if err != nil {
+ return fmt.Errorf("while getting boot entries: %w", err)
+ }
+ aIdx, err := s.getOrMakeBootEntry(bootEntries, SlotA)
+ if err != nil {
+ return fmt.Errorf("while ensuring slot A boot entry: %w", err)
+ }
+ bIdx, err := s.getOrMakeBootEntry(bootEntries, SlotB)
+ if err != nil {
+ return fmt.Errorf("while ensuring slot B boot entry: %w", err)
+ }
+
+ activeSlot := s.CurrentlyRunningSlot()
+ firstSlot := SlotInvalid
+
+ ord, err := efivarfs.GetBootOrder()
+ if err != nil {
+ return fmt.Errorf("failed to get boot order: %w", err)
+ }
+
+ for _, e := range ord {
+ if int(e) == aIdx {
+ firstSlot = SlotA
+ break
+ }
+ if int(e) == bIdx {
+ firstSlot = SlotB
+ break
+ }
+ }
+
+ if firstSlot == SlotInvalid {
+ bootOrder := make(efivarfs.BootOrder, 2)
+ switch activeSlot {
+ case SlotA:
+ bootOrder[0], bootOrder[1] = uint16(aIdx), uint16(bIdx)
+ case SlotB:
+ bootOrder[0], bootOrder[1] = uint16(bIdx), uint16(aIdx)
+ default:
+ return fmt.Errorf("invalid active slot")
+ }
+ efivarfs.SetBootOrder(bootOrder)
+ s.Logger.Infof("Metropolis missing from boot order, recreated it")
+ } else if activeSlot != firstSlot {
+ var aPos, bPos int
+ for i, e := range ord {
+ if int(e) == aIdx {
+ aPos = i
+ }
+ if int(e) == bIdx {
+ bPos = i
+ }
+ }
+ // swap A and B slots in boot order
+ ord[aPos], ord[bPos] = ord[bPos], ord[aPos]
+ if err := efivarfs.SetBootOrder(ord); err != nil {
+ return fmt.Errorf("failed to set boot order to permanently switch slot: %w", err)
+ }
+ s.Logger.Infof("Permanently activated slot %v", activeSlot)
+ } else {
+ s.Logger.Infof("Normal boot from slot %v", activeSlot)
+ }
+
+ return nil
+}
+
+func openSystemSlot(slot Slot) (*blockdev.Device, error) {
+ switch slot {
+ case SlotA:
+ return blockdev.Open("/dev/system-a")
+ case SlotB:
+ return blockdev.Open("/dev/system-b")
+ default:
+ return nil, errors.New("invalid slot identifier given")
+ }
+}
+
+// InstallBundle installs the bundle at the given HTTP(S) URL into the currently
+// inactive slot and sets that slot to boot next. If it doesn't return an error,
+// a reboot boots into the new slot.
+func (s *Service) InstallBundle(ctx context.Context, bundleURL string) error {
+ if s.ESPPath == "" {
+ return errors.New("no ESP information provided to update service, cannot continue")
+ }
+ // Download into a buffer as ZIP files cannot efficiently be read from
+ // HTTP in Go as the ReaderAt has no way of indicating continuous sections,
+ // thus a ton of small range requests would need to be used, causing
+ // a huge latency penalty as well as costing a lot of money on typical
+ // object storages. This should go away when we switch to a better bundle
+ // format which can be streamed.
+ var bundleRaw bytes.Buffer
+ b := backoff.NewExponentialBackOff()
+ err := backoff.Retry(func() error {
+ return s.tryDownloadBundle(ctx, bundleURL, &bundleRaw)
+ }, backoff.WithContext(b, ctx))
+ if err != nil {
+ return fmt.Errorf("error downloading Metropolis bundle: %v", err)
+ }
+ bundle, err := zip.NewReader(bytes.NewReader(bundleRaw.Bytes()), int64(bundleRaw.Len()))
+ if err != nil {
+ return fmt.Errorf("failed to open node bundle: %w", err)
+ }
+ efiPayload, err := bundle.Open("kernel_efi.efi")
+ if err != nil {
+ return fmt.Errorf("invalid bundle: %w", err)
+ }
+ defer efiPayload.Close()
+ systemImage, err := bundle.Open("verity_rootfs.img")
+ if err != nil {
+ return fmt.Errorf("invalid bundle: %w", err)
+ }
+ defer systemImage.Close()
+ activeSlot := s.CurrentlyRunningSlot()
+ if activeSlot == SlotInvalid {
+ return errors.New("unable to determine active slot, cannot continue")
+ }
+ targetSlot := activeSlot.Other()
+
+ bootEntries, err := s.getAllBootEntries()
+ if err != nil {
+ return fmt.Errorf("while getting boot entries: %w", err)
+ }
+ targetSlotBootEntryIdx, err := s.getOrMakeBootEntry(bootEntries, targetSlot)
+ if err != nil {
+ return fmt.Errorf("while ensuring target slot boot entry: %w", err)
+ }
+ targetSlotBootEntry := bootEntries[targetSlotBootEntryIdx]
+
+ // Disable boot entry while the corresponding slot is being modified.
+ targetSlotBootEntry.Inactive = true
+ if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
+ return fmt.Errorf("failed setting boot entry %d inactive: %w", targetSlotBootEntryIdx, err)
+ }
+
+ systemPart, err := openSystemSlot(targetSlot)
+ if err != nil {
+ return status.Errorf(codes.Internal, "Inactive system slot unavailable: %v", err)
+ }
+ defer systemPart.Close()
+ if _, err := io.Copy(blockdev.NewRWS(systemPart), systemImage); err != nil {
+ return status.Errorf(codes.Unavailable, "Failed to copy system image: %v", err)
+ }
+
+ bootFile, err := os.Create(filepath.Join(s.ESPPath, targetSlot.EFIBootPath()))
+ if err != nil {
+ return fmt.Errorf("failed to open boot file: %w", err)
+ }
+ defer bootFile.Close()
+ if _, err := io.Copy(bootFile, efiPayload); err != nil {
+ return fmt.Errorf("failed to write boot file: %w", err)
+ }
+
+ // Reenable target slot boot entry after boot and system have been written
+ // fully. The slot should now be bootable again.
+ targetSlotBootEntry.Inactive = false
+ if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
+ return fmt.Errorf("failed setting boot entry %d active: %w", targetSlotBootEntryIdx, err)
+ }
+
+ if err := efivarfs.SetBootNext(uint16(targetSlotBootEntryIdx)); err != nil {
+ return fmt.Errorf("failed to set BootNext variable: %w", err)
+ }
+
+ return nil
+}
+
+func (*Service) tryDownloadBundle(ctx context.Context, bundleURL string, bundleRaw *bytes.Buffer) error {
+ bundleReq, err := http.NewRequestWithContext(ctx, "GET", bundleURL, nil)
+ bundleRes, err := http.DefaultClient.Do(bundleReq)
+ if err != nil {
+ return fmt.Errorf("HTTP request failed: %w", err)
+ }
+ defer bundleRes.Body.Close()
+ switch bundleRes.StatusCode {
+ case http.StatusTooEarly, http.StatusTooManyRequests,
+ http.StatusInternalServerError, http.StatusBadGateway,
+ http.StatusServiceUnavailable, http.StatusGatewayTimeout:
+ return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
+ default:
+ // Non-standard code range used for proxy-related issue by various
+ // vendors. Treat as non-permanent error.
+ if bundleRes.StatusCode >= 520 && bundleRes.StatusCode < 599 {
+ return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
+ }
+ if bundleRes.StatusCode != 200 {
+ return backoff.Permanent(fmt.Errorf("HTTP error %d", bundleRes.StatusCode))
+ }
+ }
+ if _, err := bundleRaw.ReadFrom(bundleRes.Body); err != nil {
+ bundleRaw.Reset()
+ return err
+ }
+ return nil
+}