metropolis: implement A/B updates
This implements an A/B update mechanism using two slots, A and B.
This is realized with two system partitions as well as two EFI
loaders/kernels.
The A/B system relies on two EFI loader entries. This has the advantage
that there is no preloader required, which makes the system more
reliable as well as avoiding the complexity of having an un-updatable
preloader (CoreOS has this issue where their GRUB2 crashed booting newer
kernels, sadly the issue seems lost with the migration to Fedora
CoreOS). It also means that the operator can easily override the slot
being booted via the boot loader entries. Primary disadvantage is that
it relies on EFI working somewhat to spec.
New versions are booted into only once by setting NextBoot, if the
bootup doesn't succeed, i.e. if the boot doesn't get to a cluster rejoin
the next boot will be the old slot. Once it gets to this stage the
permanent BootOrder is changed.
The EFI loaders don't know if they are slot A or B because they are
identical and relying on OptionalData in the boot entry to indicate the
slot means that if the EFI boot entries go away, recovering is very hard.
Thus the loaders look at their own file name to determine what slot they
are in. If no slot could be determined, they default to booting slot A.
It is planned to eventually use Authenticode Stamping (passing data in
fake certificates) to stamp the slot into the loader without affecting
the TPM hash logged.
Change-Id: I40de2df8ff7ff660c17d2c97f3d9eb1bd4ddf5bc
Reviewed-on: https://review.monogon.dev/c/monogon/+/1874
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/BUILD.bazel b/metropolis/node/core/BUILD.bazel
index 09f99a7..e8367f9 100644
--- a/metropolis/node/core/BUILD.bazel
+++ b/metropolis/node/core/BUILD.bazel
@@ -32,6 +32,7 @@
"//metropolis/node/core/roleserve",
"//metropolis/node/core/rpc/resolver",
"//metropolis/node/core/time",
+ "//metropolis/node/core/update",
"//metropolis/pkg/logtree",
"//metropolis/pkg/pstore",
"//metropolis/pkg/supervisor",
diff --git a/metropolis/node/core/cluster/BUILD.bazel b/metropolis/node/core/cluster/BUILD.bazel
index 93cad93..e002a31 100644
--- a/metropolis/node/core/cluster/BUILD.bazel
+++ b/metropolis/node/core/cluster/BUILD.bazel
@@ -19,6 +19,7 @@
"//metropolis/node/core/roleserve",
"//metropolis/node/core/rpc",
"//metropolis/node/core/rpc/resolver",
+ "//metropolis/node/core/update",
"//metropolis/pkg/supervisor",
"//metropolis/proto/api",
"//metropolis/proto/common",
diff --git a/metropolis/node/core/cluster/cluster.go b/metropolis/node/core/cluster/cluster.go
index 6049c17..323c8ca 100644
--- a/metropolis/node/core/cluster/cluster.go
+++ b/metropolis/node/core/cluster/cluster.go
@@ -33,6 +33,7 @@
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/node/core/network"
"source.monogon.dev/metropolis/node/core/roleserve"
+ "source.monogon.dev/metropolis/node/core/update"
"source.monogon.dev/metropolis/pkg/supervisor"
apb "source.monogon.dev/metropolis/proto/api"
cpb "source.monogon.dev/metropolis/proto/common"
@@ -42,6 +43,7 @@
storageRoot *localstorage.Root
networkService *network.Service
roleServer *roleserve.Service
+ updateService *update.Service
nodeParams *apb.NodeParameters
haveTPM bool
@@ -51,11 +53,12 @@
// NewManager creates a new cluster Manager. The given localstorage Root must
// be places, but not yet started (and will be started as the Manager makes
// progress). The given network Service must already be running.
-func NewManager(storageRoot *localstorage.Root, networkService *network.Service, rs *roleserve.Service, nodeParams *apb.NodeParameters, haveTPM bool) *Manager {
+func NewManager(storageRoot *localstorage.Root, networkService *network.Service, rs *roleserve.Service, updateService *update.Service, nodeParams *apb.NodeParameters, haveTPM bool) *Manager {
return &Manager{
storageRoot: storageRoot,
networkService: networkService,
roleServer: rs,
+ updateService: updateService,
nodeParams: nodeParams,
haveTPM: haveTPM,
oneway: make(chan struct{}),
diff --git a/metropolis/node/core/cluster/cluster_join.go b/metropolis/node/core/cluster/cluster_join.go
index ac226c1..9a940a2 100644
--- a/metropolis/node/core/cluster/cluster_join.go
+++ b/metropolis/node/core/cluster/cluster_join.go
@@ -110,6 +110,13 @@
}
m.roleServer.ProvideJoinData(creds, cd)
+ // After successfully joining cluster, mark boot as successful.
+ // This allows the update service to mark the currently-booted slot as good
+ // if an update has been performed.
+ if err := m.updateService.MarkBootSuccessful(); err != nil {
+ supervisor.Logger(ctx).Errorf("Failed to mark boot as successful: %v", err)
+ }
+
supervisor.Logger(ctx).Infof("Joined the cluster.")
supervisor.Signal(ctx, supervisor.SignalHealthy)
supervisor.Signal(ctx, supervisor.SignalDone)
diff --git a/metropolis/node/core/localstorage/BUILD.bazel b/metropolis/node/core/localstorage/BUILD.bazel
index 3d4b352..075a07c 100644
--- a/metropolis/node/core/localstorage/BUILD.bazel
+++ b/metropolis/node/core/localstorage/BUILD.bazel
@@ -14,6 +14,7 @@
deps = [
"//metropolis/node/core/localstorage/crypt",
"//metropolis/node/core/localstorage/declarative",
+ "//metropolis/node/core/update",
"//metropolis/pkg/tpm",
"//metropolis/proto/api",
"//metropolis/proto/common",
diff --git a/metropolis/node/core/localstorage/crypt/BUILD.bazel b/metropolis/node/core/localstorage/crypt/BUILD.bazel
index 44188d1..d8e9881 100644
--- a/metropolis/node/core/localstorage/crypt/BUILD.bazel
+++ b/metropolis/node/core/localstorage/crypt/BUILD.bazel
@@ -13,6 +13,7 @@
importpath = "source.monogon.dev/metropolis/node/core/localstorage/crypt",
visibility = ["//metropolis/node/core/localstorage:__subpackages__"],
deps = [
+ "//metropolis/node/core/update",
"//metropolis/pkg/blockdev",
"//metropolis/pkg/devicemapper",
"//metropolis/pkg/efivarfs",
diff --git a/metropolis/node/core/localstorage/crypt/blockdev.go b/metropolis/node/core/localstorage/crypt/blockdev.go
index 0dadb6d..532033e 100644
--- a/metropolis/node/core/localstorage/crypt/blockdev.go
+++ b/metropolis/node/core/localstorage/crypt/blockdev.go
@@ -27,6 +27,7 @@
"github.com/google/uuid"
"golang.org/x/sys/unix"
+ "source.monogon.dev/metropolis/node/core/update"
"source.monogon.dev/metropolis/pkg/blockdev"
"source.monogon.dev/metropolis/pkg/efivarfs"
"source.monogon.dev/metropolis/pkg/gpt"
@@ -38,9 +39,16 @@
// data partition.
var NodeDataPartitionType = uuid.MustParse("9eeec464-6885-414a-b278-4305c51f7966")
+var (
+ SystemAType = uuid.MustParse("ee96054b-f6d0-4267-aaaa-724b2afea74c")
+ SystemBType = uuid.MustParse("ee96054b-f6d0-4267-bbbb-724b2afea74c")
+)
+
const (
- ESPDevicePath = "/dev/esp"
- NodeDataRawPath = "/dev/data-raw"
+ ESPDevicePath = "/dev/esp"
+ NodeDataRawPath = "/dev/data-raw"
+ SystemADevicePath = "/dev/system-a"
+ SystemBDevicePath = "/dev/system-b"
)
// nodePathForPartitionType returns the device node path
@@ -51,6 +59,10 @@
return ESPDevicePath
case NodeDataPartitionType:
return NodeDataRawPath
+ case SystemAType:
+ return SystemADevicePath
+ case SystemBType:
+ return SystemBDevicePath
}
return ""
}
@@ -58,7 +70,7 @@
// MakeBlockDevices looks for the ESP and the node data partition and maps them
// to ESPDevicePath and NodeDataCryptPath respectively. This doesn't fail if it
// doesn't find the partitions, only if something goes catastrophically wrong.
-func MakeBlockDevices(ctx context.Context) error {
+func MakeBlockDevices(ctx context.Context, updateSvc *update.Service) error {
espUUID, err := efivarfs.ReadLoaderDevicePartUUID()
if err != nil {
supervisor.Logger(ctx).Warningf("No EFI variable for the loader device partition UUID present")
@@ -70,7 +82,7 @@
}
for _, blockDev := range blockDevs {
- if err := handleBlockDevice(blockDev.Name(), blockDevs, espUUID); err != nil {
+ if err := handleBlockDevice(blockDev.Name(), blockDevs, espUUID, updateSvc); err != nil {
supervisor.Logger(ctx).Errorf("Failed to create block device %s: %w", blockDev.Name(), err)
}
}
@@ -80,7 +92,7 @@
// handleBlockDevice reads the uevent data and continues to iterate over all
// partitions to create all required device nodes.
-func handleBlockDevice(diskBlockDev string, blockDevs []os.DirEntry, espUUID uuid.UUID) error {
+func handleBlockDevice(diskBlockDev string, blockDevs []os.DirEntry, espUUID uuid.UUID, updateSvc *update.Service) error {
data, err := readUEvent(diskBlockDev)
if err != nil {
return err
@@ -120,7 +132,7 @@
seenTypes := make(map[uuid.UUID]bool)
for _, dev := range blockDevs {
- if err := handlePartition(diskBlockDev, dev.Name(), table, seenTypes); err != nil {
+ if err := handlePartition(diskBlockDev, dev.Name(), table, seenTypes, updateSvc); err != nil {
return fmt.Errorf("when creating partition %s: %w", dev.Name(), err)
}
}
@@ -128,7 +140,7 @@
return nil
}
-func handlePartition(diskBlockDev string, partBlockDev string, table *gpt.Table, seenTypes map[uuid.UUID]bool) error {
+func handlePartition(diskBlockDev string, partBlockDev string, table *gpt.Table, seenTypes map[uuid.UUID]bool, updateSvc *update.Service) error {
// Skip all blockdev that dont share the same name/prefix,
// also skip the blockdev itself.
if !strings.HasPrefix(partBlockDev, diskBlockDev) || partBlockDev == diskBlockDev {
@@ -152,6 +164,8 @@
part := table.Partitions[pi.partNumber-1]
+ updateSvc.ProvideESP("/esp", part.ID, uint32(pi.partNumber))
+
nodePath := nodePathForPartitionType(part.Type)
if nodePath == "" {
// Ignore partitions with an unknown type.
diff --git a/metropolis/node/core/localstorage/directory_root.go b/metropolis/node/core/localstorage/directory_root.go
index c385f70..98eede9 100644
--- a/metropolis/node/core/localstorage/directory_root.go
+++ b/metropolis/node/core/localstorage/directory_root.go
@@ -25,16 +25,17 @@
"source.monogon.dev/metropolis/node/core/localstorage/crypt"
"source.monogon.dev/metropolis/node/core/localstorage/declarative"
+ "source.monogon.dev/metropolis/node/core/update"
)
-func (r *Root) Start(ctx context.Context) error {
+func (r *Root) Start(ctx context.Context, updateSvc *update.Service) error {
r.Data.flagLock.Lock()
defer r.Data.flagLock.Unlock()
if r.Data.canMount {
return fmt.Errorf("cannot re-start root storage")
}
// TODO(q3k): turn this into an Ensure call
- err := crypt.MakeBlockDevices(ctx)
+ err := crypt.MakeBlockDevices(ctx, updateSvc)
if err != nil {
return fmt.Errorf("MakeBlockDevices: %w", err)
}
diff --git a/metropolis/node/core/localstorage/storage_esp.go b/metropolis/node/core/localstorage/storage_esp.go
index c1779e9..8991359 100644
--- a/metropolis/node/core/localstorage/storage_esp.go
+++ b/metropolis/node/core/localstorage/storage_esp.go
@@ -37,6 +37,23 @@
type ESPDirectory struct {
declarative.Directory
Metropolis ESPMetropolisDirectory `dir:"metropolis"`
+ EFI ESPEFIDirectory `dir:"ESP"`
+}
+
+type ESPEFIDirectory struct {
+ declarative.Directory
+ Boot ESPBootDirectory `dir:"BOOT"`
+ Metropolis ESPEFIMetropolisDirectory `dir:"metropolis"`
+}
+
+type ESPEFIMetropolisDirectory struct {
+ declarative.Directory
+ BootA declarative.File `file:"boot-a.efi"`
+ BootB declarative.File `file:"boot-b.efi"`
+}
+
+type ESPBootDirectory struct {
+ declarative.Directory
}
// ESPMetropolisDirectory is the directory inside the EFI System Partition where
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index d40942d..a3b7f0a 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -36,6 +36,7 @@
"source.monogon.dev/metropolis/node/core/roleserve"
"source.monogon.dev/metropolis/node/core/rpc/resolver"
timesvc "source.monogon.dev/metropolis/node/core/time"
+ "source.monogon.dev/metropolis/node/core/update"
"source.monogon.dev/metropolis/pkg/logtree"
"source.monogon.dev/metropolis/pkg/supervisor"
"source.monogon.dev/metropolis/pkg/tpm"
@@ -132,6 +133,10 @@
panic(fmt.Errorf("when placing root FS: %w", err))
}
+ updateSvc := &update.Service{
+ Logger: lt.MustLeveledFor("update"),
+ }
+
// Make context for supervisor. We cancel it when we reach the trapdoor.
ctxS, ctxC := context.WithCancel(context.Background())
@@ -145,7 +150,7 @@
// services related to the node's roles.
init := func(ctx context.Context) error {
// Start storage and network - we need this to get anything else done.
- if err := root.Start(ctx); err != nil {
+ if err := root.Start(ctx, updateSvc); err != nil {
return fmt.Errorf("cannot start root FS: %w", err)
}
nodeParams, err := getNodeParams(ctx, root)
@@ -187,6 +192,7 @@
Network: networkSvc,
Resolver: res,
LogTree: lt,
+ Update: updateSvc,
})
if err := supervisor.Run(ctx, "role", rs.Run); err != nil {
return fmt.Errorf("failed to start role service: %w", err)
@@ -198,7 +204,7 @@
// Start cluster manager. This kicks off cluster membership machinery,
// which will either start a new cluster, enroll into one or join one.
- m := cluster.NewManager(root, networkSvc, rs, nodeParams, haveTPM)
+ m := cluster.NewManager(root, networkSvc, rs, updateSvc, nodeParams, haveTPM)
return m.Run(ctx)
}
diff --git a/metropolis/node/core/mgmt/BUILD.bazel b/metropolis/node/core/mgmt/BUILD.bazel
index dff5bac..e683eee 100644
--- a/metropolis/node/core/mgmt/BUILD.bazel
+++ b/metropolis/node/core/mgmt/BUILD.bazel
@@ -5,6 +5,7 @@
srcs = [
"mgmt.go",
"svc_logs.go",
+ "update.go",
],
importpath = "source.monogon.dev/metropolis/node/core/mgmt",
visibility = ["//visibility:public"],
@@ -12,6 +13,7 @@
"//metropolis/node",
"//metropolis/node/core/identity",
"//metropolis/node/core/rpc",
+ "//metropolis/node/core/update",
"//metropolis/pkg/logtree",
"//metropolis/pkg/supervisor",
"//metropolis/proto/api",
@@ -19,6 +21,7 @@
"@org_golang_google_grpc//:go_default_library",
"@org_golang_google_grpc//codes",
"@org_golang_google_grpc//status",
+ "@org_golang_x_sys//unix",
],
)
diff --git a/metropolis/node/core/mgmt/mgmt.go b/metropolis/node/core/mgmt/mgmt.go
index 2572764..a9f5973 100644
--- a/metropolis/node/core/mgmt/mgmt.go
+++ b/metropolis/node/core/mgmt/mgmt.go
@@ -12,6 +12,7 @@
"source.monogon.dev/metropolis/node"
"source.monogon.dev/metropolis/node/core/identity"
"source.monogon.dev/metropolis/node/core/rpc"
+ "source.monogon.dev/metropolis/node/core/update"
"source.monogon.dev/metropolis/pkg/logtree"
"source.monogon.dev/metropolis/pkg/supervisor"
@@ -24,6 +25,8 @@
NodeCredentials *identity.NodeCredentials
// LogTree from which NodeManagement.Logs will be served.
LogTree *logtree.LogTree
+ // Update service handle for performing updates via the API.
+ UpdateService *update.Service
// Automatically populated on Run.
LogService
diff --git a/metropolis/node/core/mgmt/update.go b/metropolis/node/core/mgmt/update.go
new file mode 100644
index 0000000..28a2a0a
--- /dev/null
+++ b/metropolis/node/core/mgmt/update.go
@@ -0,0 +1,27 @@
+package mgmt
+
+import (
+ "context"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
+
+ apb "source.monogon.dev/metropolis/proto/api"
+)
+
+func (s *Service) UpdateNode(ctx context.Context, req *apb.UpdateNodeRequest) (*apb.UpdateNodeResponse, error) {
+ if err := s.UpdateService.InstallBundle(ctx, req.BundleUrl); err != nil {
+ return nil, status.Errorf(codes.Unavailable, "error installing update: %v", err)
+ }
+ if !req.NoReboot {
+ // TODO(#253): Tell Supervisor to shut down gracefully and reboot
+ go func() {
+ time.Sleep(10 * time.Second)
+ unix.Sync()
+ unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
+ }()
+ }
+ return &apb.UpdateNodeResponse{}, nil
+}
diff --git a/metropolis/node/core/roleserve/BUILD.bazel b/metropolis/node/core/roleserve/BUILD.bazel
index abdf8b3..ce0b5cc 100644
--- a/metropolis/node/core/roleserve/BUILD.bazel
+++ b/metropolis/node/core/roleserve/BUILD.bazel
@@ -31,6 +31,7 @@
"//metropolis/node/core/network/hostsfile",
"//metropolis/node/core/rpc",
"//metropolis/node/core/rpc/resolver",
+ "//metropolis/node/core/update",
"//metropolis/node/kubernetes",
"//metropolis/node/kubernetes/containerd",
"//metropolis/node/kubernetes/pki",
diff --git a/metropolis/node/core/roleserve/roleserve.go b/metropolis/node/core/roleserve/roleserve.go
index 0c486d1..3140ea8 100644
--- a/metropolis/node/core/roleserve/roleserve.go
+++ b/metropolis/node/core/roleserve/roleserve.go
@@ -46,6 +46,7 @@
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/node/core/network"
"source.monogon.dev/metropolis/node/core/rpc/resolver"
+ "source.monogon.dev/metropolis/node/core/update"
"source.monogon.dev/metropolis/pkg/event/memory"
"source.monogon.dev/metropolis/pkg/logtree"
"source.monogon.dev/metropolis/pkg/supervisor"
@@ -69,6 +70,9 @@
// information from the ProvideXXX methods.
Resolver *resolver.Resolver
+ // Update is a handle to the update service, used by workloads.
+ Update *update.Service
+
LogTree *logtree.LogTree
}
@@ -148,6 +152,7 @@
s.nodeMgmt = &workerNodeMgmt{
curatorConnection: &s.CuratorConnection,
logTree: s.LogTree,
+ updateService: s.Update,
}
s.clusternet = &workerClusternet{
diff --git a/metropolis/node/core/roleserve/worker_nodemgmt.go b/metropolis/node/core/roleserve/worker_nodemgmt.go
index 7516f2d..17fd0d4 100644
--- a/metropolis/node/core/roleserve/worker_nodemgmt.go
+++ b/metropolis/node/core/roleserve/worker_nodemgmt.go
@@ -4,6 +4,7 @@
"context"
"source.monogon.dev/metropolis/node/core/mgmt"
+ "source.monogon.dev/metropolis/node/core/update"
"source.monogon.dev/metropolis/pkg/event/memory"
"source.monogon.dev/metropolis/pkg/logtree"
"source.monogon.dev/metropolis/pkg/supervisor"
@@ -12,6 +13,7 @@
type workerNodeMgmt struct {
curatorConnection *memory.Value[*curatorConnection]
logTree *logtree.LogTree
+ updateService *update.Service
}
func (s *workerNodeMgmt) run(ctx context.Context) error {
@@ -27,6 +29,7 @@
srv := mgmt.Service{
NodeCredentials: cc.credentials,
LogTree: s.logTree,
+ UpdateService: s.updateService,
}
return srv.Run(ctx)
}
diff --git a/metropolis/node/core/update/BUILD.bazel b/metropolis/node/core/update/BUILD.bazel
new file mode 100644
index 0000000..4bb7915
--- /dev/null
+++ b/metropolis/node/core/update/BUILD.bazel
@@ -0,0 +1,18 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "update",
+ srcs = ["update.go"],
+ importpath = "source.monogon.dev/metropolis/node/core/update",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//metropolis/node/build/mkimage/osimage",
+ "//metropolis/pkg/blockdev",
+ "//metropolis/pkg/efivarfs",
+ "//metropolis/pkg/logtree",
+ "@com_github_cenkalti_backoff_v4//:backoff",
+ "@com_github_google_uuid//:uuid",
+ "@org_golang_google_grpc//codes",
+ "@org_golang_google_grpc//status",
+ ],
+)
diff --git a/metropolis/node/core/update/e2e/BUILD.bazel b/metropolis/node/core/update/e2e/BUILD.bazel
new file mode 100644
index 0000000..3905036
--- /dev/null
+++ b/metropolis/node/core/update/e2e/BUILD.bazel
@@ -0,0 +1,22 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_test")
+
+go_test(
+ name = "e2e_test",
+ srcs = ["e2e_test.go"],
+ data = [
+ # For emulation
+ "//third_party/edk2:firmware",
+ # For the initial image creation
+ "//metropolis/node/core/update/e2e/testos:verity_rootfs_x",
+ "//metropolis/node/core/update/e2e/testos:kernel_efi_x",
+ # For the two update tests
+ "//metropolis/node/core/update/e2e/testos:testos_bundle_y",
+ "//metropolis/node/core/update/e2e/testos:testos_bundle_z",
+ ],
+ deps = [
+ "//metropolis/cli/pkg/datafile",
+ "//metropolis/node/build/mkimage/osimage",
+ "//metropolis/pkg/blkio",
+ "//metropolis/pkg/blockdev",
+ ],
+)
diff --git a/metropolis/node/core/update/e2e/e2e_test.go b/metropolis/node/core/update/e2e/e2e_test.go
new file mode 100644
index 0000000..7524d2a
--- /dev/null
+++ b/metropolis/node/core/update/e2e/e2e_test.go
@@ -0,0 +1,231 @@
+package e2e
+
+import (
+ "bufio"
+ "context"
+ "fmt"
+ "io"
+ "net"
+ "net/http"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "regexp"
+ "strings"
+ "sync"
+ "testing"
+ "time"
+
+ "source.monogon.dev/metropolis/cli/pkg/datafile"
+ "source.monogon.dev/metropolis/node/build/mkimage/osimage"
+ "source.monogon.dev/metropolis/pkg/blkio"
+ "source.monogon.dev/metropolis/pkg/blockdev"
+)
+
+const Mi = 1024 * 1024
+
+var variantRegexp = regexp.MustCompile(`TESTOS_VARIANT=([A-Z])`)
+
+func runAndCheckVariant(t *testing.T, expectedVariant string, qemuArgs []string) {
+ t.Helper()
+ ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+ defer cancel()
+ qemuCmdLaunch := exec.CommandContext(ctx, "qemu-system-x86_64", qemuArgs...)
+ stdoutPipe, err := qemuCmdLaunch.StdoutPipe()
+ if err != nil {
+ t.Fatal(err)
+ }
+ stderrPipe, err := qemuCmdLaunch.StderrPipe()
+ if err != nil {
+ t.Fatal(err)
+ }
+ testosStarted := make(chan string, 1)
+ go func() {
+ s := bufio.NewScanner(stdoutPipe)
+ for s.Scan() {
+ if strings.HasPrefix(s.Text(), "[") {
+ continue
+ }
+ errIdx := strings.Index(s.Text(), "Error installing new bundle")
+ if errIdx != -1 {
+ t.Error(s.Text()[errIdx:])
+ }
+ t.Log("vm: " + s.Text())
+ if m := variantRegexp.FindStringSubmatch(s.Text()); len(m) == 2 {
+ select {
+ case testosStarted <- m[1]:
+ default:
+ }
+ }
+ }
+ }()
+ go func() {
+ s := bufio.NewScanner(stderrPipe)
+ for s.Scan() {
+ if strings.HasPrefix(s.Text(), "[") {
+ continue
+ }
+ t.Log("qemu: " + s.Text())
+ }
+ }()
+ if err := qemuCmdLaunch.Start(); err != nil {
+ t.Fatal(err)
+ }
+ procExit := make(chan error)
+ go func() {
+ procExit <- qemuCmdLaunch.Wait()
+ close(procExit)
+ }()
+ select {
+ case variant := <-testosStarted:
+ if variant != expectedVariant {
+ t.Fatalf("expected variant %s to launch, got %s", expectedVariant, variant)
+ }
+ select {
+ case <-procExit:
+ return
+ case <-ctx.Done():
+ t.Log("Canceled VM")
+ cancel()
+ <-procExit
+ return
+ }
+ case err := <-procExit:
+ t.Fatalf("QEMU exited unexpectedly: %v", err)
+ return
+ case <-ctx.Done():
+ t.Fatalf("Waiting for TestOS variant %s launch timed out", expectedVariant)
+ }
+}
+
+func TestABUpdateSequence(t *testing.T) {
+ blobAddr := net.TCPAddr{
+ IP: net.IPv4(10, 42, 0, 5),
+ Port: 80,
+ }
+
+ var nextBundlePathToInstall string
+ var nbpMutex sync.Mutex
+
+ m := http.NewServeMux()
+ bundleYPath, err := datafile.ResolveRunfile("metropolis/node/core/update/e2e/testos/testos_bundle_y.zip")
+ if err != nil {
+ t.Fatal(err)
+ }
+ bundleZPath, err := datafile.ResolveRunfile("metropolis/node/core/update/e2e/testos/testos_bundle_z.zip")
+ if err != nil {
+ t.Fatal(err)
+ }
+ m.HandleFunc("/bundle.bin", func(w http.ResponseWriter, req *http.Request) {
+ nbpMutex.Lock()
+ bundleFilePath := nextBundlePathToInstall
+ nbpMutex.Unlock()
+ if bundleFilePath == "" {
+ w.WriteHeader(http.StatusBadRequest)
+ w.Write([]byte("No next bundle set in the test harness"))
+ }
+ http.ServeFile(w, req, bundleFilePath)
+ })
+ blobLis, err := net.Listen("tcp", "127.0.0.1:0")
+ if err != nil {
+ t.Fatal(err)
+ }
+ blobListenAddr := blobLis.Addr().(*net.TCPAddr)
+ go http.Serve(blobLis, m)
+
+ rootDevPath := filepath.Join(t.TempDir(), "root.img")
+ // Make a 512 bytes * 2Mi = 1Gi file-backed block device
+ rootDisk, err := blockdev.CreateFile(rootDevPath, 512, 2097152)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.Remove(rootDevPath)
+ defer rootDisk.Close()
+
+ ovmfVarsPath, err := datafile.ResolveRunfile("external/edk2/OVMF_VARS.fd")
+ if err != nil {
+ t.Fatal(err)
+ }
+ ovmfCodePath, err := datafile.ResolveRunfile("external/edk2/OVMF_CODE.fd")
+ if err != nil {
+ t.Fatal(err)
+ }
+ bootPath, err := datafile.ResolveRunfile("metropolis/node/core/update/e2e/testos/kernel_efi_x.efi")
+ if err != nil {
+ t.Fatal(err)
+ }
+ boot, err := blkio.NewFileReader(bootPath)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer boot.Close()
+ systemXPath, err := datafile.ResolveRunfile("metropolis/node/core/update/e2e/testos/verity_rootfs_x.img")
+ if err != nil {
+ t.Fatal(err)
+ }
+ system, err := os.Open(systemXPath)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer system.Close()
+
+ if _, err := osimage.Create(&osimage.Params{
+ Output: rootDisk,
+ EFIPayload: boot,
+ SystemImage: system,
+ PartitionSize: osimage.PartitionSizeInfo{
+ ESP: 128,
+ System: 256,
+ Data: 10,
+ },
+ }); err != nil {
+ t.Fatalf("unable to generate starting point image: %v", err)
+ }
+ rootDisk.Close()
+
+ blobGuestFwd := fmt.Sprintf("guestfwd=tcp:%s-tcp:127.0.0.1:%d", blobAddr.String(), blobListenAddr.Port)
+
+ ovmfVars, err := os.CreateTemp("", "agent-ovmf-vars")
+ if err != nil {
+ t.Fatal(err)
+ }
+ ovmfVarsTmpl, err := os.Open(ovmfVarsPath)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if _, err := io.Copy(ovmfVars, ovmfVarsTmpl); err != nil {
+ t.Fatal(err)
+ }
+
+ qemuArgs := []string{
+ "-machine", "q35", "-accel", "kvm", "-nographic", "-nodefaults", "-m", "1024",
+ "-cpu", "max", "-smp", "sockets=1,cpus=1,cores=2,threads=2,maxcpus=4",
+ "-drive", "if=pflash,format=raw,readonly=on,file=" + ovmfCodePath,
+ "-drive", "if=pflash,format=raw,file=" + ovmfVars.Name(),
+ "-drive", "if=virtio,format=raw,cache=unsafe,file=" + rootDevPath,
+ "-netdev", fmt.Sprintf("user,id=net0,net=10.42.0.0/24,dhcpstart=10.42.0.10,%s", blobGuestFwd),
+ "-device", "virtio-net-pci,netdev=net0,mac=22:d5:8e:76:1d:07",
+ "-device", "virtio-rng-pci",
+ "-serial", "stdio",
+ "-trace", "pflash*",
+ "-no-reboot",
+ }
+ // Install Bundle Y next
+ nbpMutex.Lock()
+ nextBundlePathToInstall = bundleYPath
+ nbpMutex.Unlock()
+
+ t.Log("Launching X image to install Y")
+ runAndCheckVariant(t, "X", qemuArgs)
+
+ // Install Bundle Z next
+ nbpMutex.Lock()
+ nextBundlePathToInstall = bundleZPath
+ nbpMutex.Unlock()
+
+ t.Log("Launching Y on slot B to install Z on slot A")
+ runAndCheckVariant(t, "Y", qemuArgs)
+
+ t.Log("Launching Z on slot A")
+ runAndCheckVariant(t, "Z", qemuArgs)
+}
diff --git a/metropolis/node/core/update/e2e/testos/BUILD.bazel b/metropolis/node/core/update/e2e/testos/BUILD.bazel
new file mode 100644
index 0000000..79fd0f9
--- /dev/null
+++ b/metropolis/node/core/update/e2e/testos/BUILD.bazel
@@ -0,0 +1,31 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load(":testos.bzl", "testos")
+
+testos(variant = "x")
+
+testos(variant = "y")
+
+testos(variant = "z")
+
+go_library(
+ name = "testos_lib",
+ srcs = ["main.go"],
+ importpath = "source.monogon.dev/metropolis/node/core/update/e2e/testos",
+ visibility = ["//visibility:private"],
+ deps = [
+ "//metropolis/node/build/mkimage/osimage",
+ "//metropolis/node/core/network",
+ "//metropolis/node/core/update",
+ "//metropolis/pkg/blockdev",
+ "//metropolis/pkg/gpt",
+ "//metropolis/pkg/logtree",
+ "//metropolis/pkg/supervisor",
+ "@org_golang_x_sys//unix",
+ ],
+)
+
+go_binary(
+ name = "testos",
+ embed = [":testos_lib"],
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/node/core/update/e2e/testos/main.go b/metropolis/node/core/update/e2e/testos/main.go
new file mode 100644
index 0000000..b780d17
--- /dev/null
+++ b/metropolis/node/core/update/e2e/testos/main.go
@@ -0,0 +1,137 @@
+package main
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "time"
+
+ "golang.org/x/sys/unix"
+
+ "source.monogon.dev/metropolis/node/build/mkimage/osimage"
+ "source.monogon.dev/metropolis/node/core/network"
+ "source.monogon.dev/metropolis/node/core/update"
+ "source.monogon.dev/metropolis/pkg/blockdev"
+ "source.monogon.dev/metropolis/pkg/gpt"
+ "source.monogon.dev/metropolis/pkg/logtree"
+ "source.monogon.dev/metropolis/pkg/supervisor"
+)
+
+var Variant = "U"
+
+func mkdirAndMount(dir, fs string, flags uintptr) error {
+ if err := os.MkdirAll(dir, 0o755); err != nil {
+ return fmt.Errorf("could not make %s: %w", dir, err)
+ }
+ if err := unix.Mount(fs, dir, fs, flags, ""); err != nil {
+ return fmt.Errorf("could not mount %s on %s: %w", fs, dir, err)
+ }
+ return nil
+}
+
+// setupMounts sets up basic mounts like sysfs, procfs, devtmpfs and cgroups.
+// This should be called early during init as a lot of processes depend on this
+// being available.
+func setupMounts() error {
+ // Set up target filesystems.
+ for _, el := range []struct {
+ dir string
+ fs string
+ flags uintptr
+ }{
+ {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/sys/kernel/tracing", "tracefs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/sys/fs/pstore", "pstore", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/sys/firmware/efi/efivars", "efivarfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
+ {"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
+ {"/tmp", "tmpfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ } {
+ if err := mkdirAndMount(el.dir, el.fs, el.flags); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func main() {
+ if err := setupMounts(); err != nil {
+ fmt.Printf("early init error, stopping: %v\n", err)
+ unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF)
+ return
+ }
+ lt := logtree.New()
+ f, err := os.OpenFile("/dev/ttyS0", os.O_WRONLY, 0)
+ if err != nil {
+ fmt.Printf("early init error, stopping: %v\n", err)
+ unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF)
+ return
+ }
+ reader, err := lt.Read("", logtree.WithChildren(), logtree.WithStream())
+ if err != nil {
+ fmt.Printf("early init error, stopping: %v\n", err)
+ unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF)
+ return
+ }
+
+ sCtx := context.Background()
+ supervisor.New(sCtx, testosRunnable, supervisor.WithExistingLogtree(lt))
+
+ for {
+ p := <-reader.Stream
+ fmt.Fprintf(f, "%s\n", p.String())
+ }
+}
+
+func testosRunnable(ctx context.Context) error {
+ supervisor.Logger(ctx).Info("TESTOS_VARIANT=" + Variant)
+ networkSvc := network.New(nil)
+ networkSvc.DHCPVendorClassID = "dev.monogon.testos.v1"
+ supervisor.Run(ctx, "networking", networkSvc.Run)
+
+ vda, err := blockdev.Open("/dev/vda")
+ if err != nil {
+ return fmt.Errorf("unable to open root device: %w", err)
+ }
+ defer vda.Close()
+ vdaParts, err := gpt.Read(vda)
+ if err != nil {
+ return fmt.Errorf("unable to read GPT from root device: %w", err)
+ }
+
+ updateSvc := update.Service{
+ Logger: supervisor.MustSubLogger(ctx, "update"),
+ }
+ for pn, p := range vdaParts.Partitions {
+ switch p.Type {
+ case gpt.PartitionTypeEFISystem:
+ if err := unix.Mount(fmt.Sprintf("/dev/vda%d", pn+1), "/esp", "vfat", unix.MS_SYNC, ""); err != nil {
+ return fmt.Errorf("unable to mkdir ESP mountpoint: %w", err)
+ }
+ updateSvc.ProvideESP("/esp", p.ID, uint32(pn+1))
+ case osimage.SystemAType:
+ if err := unix.Symlink(fmt.Sprintf("/dev/vda%d", pn+1), "/dev/system-a"); err != nil {
+ return fmt.Errorf("failed to symlink system-a: %w", err)
+ }
+ case osimage.SystemBType:
+ if err := unix.Symlink(fmt.Sprintf("/dev/vda%d", pn+1), "/dev/system-b"); err != nil {
+ return fmt.Errorf("failed to symlink system-b: %w", err)
+ }
+ }
+ }
+ if err := updateSvc.MarkBootSuccessful(); err != nil {
+ supervisor.Logger(ctx).Errorf("error marking boot successful: %w", err)
+ }
+ if Variant != "Z" {
+ if err := updateSvc.InstallBundle(ctx, "http://10.42.0.5:80/bundle.bin"); err != nil {
+ supervisor.Logger(ctx).Errorf("Error installing new bundle: %v", err)
+ }
+ }
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ supervisor.Logger(ctx).Info("Installed bundle successfully, powering off")
+ unix.Sync()
+ time.Sleep(1 * time.Second)
+ unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF)
+ return nil
+}
diff --git a/metropolis/node/core/update/e2e/testos/rootfs.fsspec b/metropolis/node/core/update/e2e/testos/rootfs.fsspec
new file mode 100644
index 0000000..03ec153
--- /dev/null
+++ b/metropolis/node/core/update/e2e/testos/rootfs.fsspec
@@ -0,0 +1,20 @@
+directory <
+ path: "/sys"
+ mode: 0555 uid: 0 gid: 0
+>
+directory <
+ path: "/proc"
+ mode: 0555 uid: 0 gid: 0
+>
+directory <
+ path: "/dev"
+ mode: 0555 uid: 0 gid: 0
+>
+directory <
+ path: "/esp"
+ mode: 0555 uid: 0 gid: 0
+>
+directory <
+ path: "/tmp"
+ mode: 0555 uid: 0 gid: 0
+>
diff --git a/metropolis/node/core/update/e2e/testos/testos.bzl b/metropolis/node/core/update/e2e/testos/testos.bzl
new file mode 100644
index 0000000..a123ea6
--- /dev/null
+++ b/metropolis/node/core/update/e2e/testos/testos.bzl
@@ -0,0 +1,63 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary")
+load("//metropolis/node/build:def.bzl", "erofs_image", "verity_image")
+load("//metropolis/node/build:efi.bzl", "efi_unified_kernel_image")
+load("@rules_pkg//:pkg.bzl", "pkg_zip")
+load("@rules_pkg//:mappings.bzl", "pkg_files")
+
+# Macro for generating multiple TestOS instances to check if the updater works.
+def testos(variant):
+ erofs_image(
+ name = "rootfs_" + variant,
+ files = {
+ ":testos_" + variant: "/init",
+ "//metropolis/node/core/network/dns:resolv.conf": "/etc/resolv.conf",
+ "@com_github_coredns_coredns//:coredns": "/kubernetes/bin/coredns",
+ },
+ fsspecs = [
+ "//metropolis/node/build:earlydev.fsspec",
+ ":rootfs.fsspec",
+ ],
+ )
+
+ verity_image(
+ name = "verity_rootfs_" + variant,
+ source = ":rootfs_" + variant,
+ visibility = ["//metropolis/node/core/update/e2e:__pkg__"],
+ )
+
+ efi_unified_kernel_image(
+ name = "kernel_efi_" + variant,
+ cmdline = "console=ttyS0 init=/init",
+ kernel = "//third_party/linux",
+ verity = ":verity_rootfs_" + variant,
+ visibility = ["//metropolis/node/core/update/e2e:__pkg__"],
+ )
+
+ # An intermediary "bundle" format until we finalize the actual bundle format. This is NOT stable until migrated
+ # to the actual bundle format.
+ # TODO(lorenz): Replace this
+ pkg_files(
+ name = "testos_bundle_files_" + variant,
+ srcs = [
+ ":kernel_efi_" + variant,
+ ":verity_rootfs_" + variant,
+ ],
+ renames = {
+ ":kernel_efi_" + variant: "kernel_efi.efi",
+ ":verity_rootfs_" + variant: "verity_rootfs.img",
+ },
+ )
+ pkg_zip(
+ name = "testos_bundle_" + variant,
+ srcs = [
+ ":testos_bundle_files_" + variant,
+ ],
+ visibility = ["//metropolis/node/core/update/e2e:__pkg__"],
+ )
+
+ go_binary(
+ name = "testos_" + variant,
+ embed = [":testos_lib"],
+ visibility = ["//visibility:public"],
+ x_defs = {"source.monogon.dev/metropolis/node/core/update/e2e/testos.Variant": variant.upper()},
+ )
diff --git a/metropolis/node/core/update/update.go b/metropolis/node/core/update/update.go
new file mode 100644
index 0000000..92e2e88
--- /dev/null
+++ b/metropolis/node/core/update/update.go
@@ -0,0 +1,387 @@
+package update
+
+import (
+ "archive/zip"
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strconv"
+
+ "github.com/cenkalti/backoff/v4"
+ "github.com/google/uuid"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
+
+ "source.monogon.dev/metropolis/node/build/mkimage/osimage"
+ "source.monogon.dev/metropolis/pkg/blockdev"
+ "source.monogon.dev/metropolis/pkg/efivarfs"
+ "source.monogon.dev/metropolis/pkg/logtree"
+)
+
+// Service contains data and functionality to perform A/B updates on a
+// Metropolis node.
+type Service struct {
+ // Path to the mount point of the EFI System Partition (ESP).
+ ESPPath string
+ // UUID of the ESP System Partition.
+ ESPUUID uuid.UUID
+ // Partition number (1-based) of the ESP in the GPT partitions array.
+ ESPPartNumber uint32
+ // Logger service for the update service.
+ Logger logtree.LeveledLogger
+}
+
+type Slot int
+
+const (
+ SlotInvalid Slot = 0
+ SlotA Slot = 1
+ SlotB Slot = 2
+)
+
+// Other returns the "other" slot, i.e. returns slot A for B and B for A.
+// It returns SlotInvalid for any s which is not SlotA or SlotB.
+func (s Slot) Other() Slot {
+ switch s {
+ case SlotA:
+ return SlotB
+ case SlotB:
+ return SlotA
+ default:
+ return SlotInvalid
+ }
+}
+
+func (s Slot) String() string {
+ switch s {
+ case SlotA:
+ return "A"
+ case SlotB:
+ return "B"
+ default:
+ return "<invalid slot>"
+ }
+}
+
+func (s Slot) EFIBootPath() string {
+ switch s {
+ case SlotA:
+ return osimage.EFIBootAPath
+ case SlotB:
+ return osimage.EFIBootBPath
+ default:
+ return ""
+ }
+}
+
+var slotRegexp = regexp.MustCompile(`PARTLABEL=METROPOLIS-SYSTEM-([AB])`)
+
+// ProvideESP is a convenience function for providing information about the
+// ESP after the update service has been instantiated.
+func (s *Service) ProvideESP(path string, partUUID uuid.UUID, partNum uint32) {
+ s.ESPPath = path
+ s.ESPPartNumber = partNum
+ s.ESPUUID = partUUID
+}
+
+// CurrentlyRunningSlot returns the slot the current system is booted from.
+func (s *Service) CurrentlyRunningSlot() Slot {
+ cmdline, err := os.ReadFile("/proc/cmdline")
+ if err != nil {
+ return SlotInvalid
+ }
+ slotMatches := slotRegexp.FindStringSubmatch(string(cmdline))
+ if len(slotMatches) != 2 {
+ return SlotInvalid
+ }
+ switch slotMatches[1] {
+ case "A":
+ return SlotA
+ case "B":
+ return SlotB
+ default:
+ panic("unreachable")
+ }
+}
+
+var bootVarRegexp = regexp.MustCompile(`^Boot([0-9A-Fa-f]{4})$`)
+
+func (s *Service) getAllBootEntries() (map[int]*efivarfs.LoadOption, error) {
+ res := make(map[int]*efivarfs.LoadOption)
+ varNames, err := efivarfs.List(efivarfs.ScopeGlobal)
+ if err != nil {
+ return nil, fmt.Errorf("failed to list EFI variables: %w", err)
+ }
+ for _, varName := range varNames {
+ m := bootVarRegexp.FindStringSubmatch(varName)
+ if m == nil {
+ continue
+ }
+ idx, err := strconv.ParseUint(m[1], 16, 16)
+ if err != nil {
+ // This cannot be hit as all regexp matches are parseable.
+ panic(err)
+ }
+ e, err := efivarfs.GetBootEntry(int(idx))
+ if err != nil {
+ return nil, fmt.Errorf("failed to get boot entry %d: %w", idx, err)
+ }
+ res[int(idx)] = e
+ }
+ return res, nil
+}
+
+func (s *Service) getOrMakeBootEntry(existing map[int]*efivarfs.LoadOption, slot Slot) (int, error) {
+ for idx, e := range existing {
+ if len(e.FilePath) != 2 {
+ // Not our entry
+ continue
+ }
+ switch p := e.FilePath[0].(type) {
+ case *efivarfs.HardDrivePath:
+ gptMatch, ok := p.PartitionMatch.(*efivarfs.PartitionGPT)
+ if ok && gptMatch.PartitionUUID != s.ESPUUID {
+ // Not related to our ESP
+ continue
+ }
+ default:
+ continue
+ }
+ switch p := e.FilePath[1].(type) {
+ case efivarfs.FilePath:
+ if string(p) == slot.EFIBootPath() {
+ return idx, nil
+ }
+ default:
+ continue
+ }
+ }
+ newEntry := &efivarfs.LoadOption{
+ Description: fmt.Sprintf("Metropolis Slot %s", slot),
+ FilePath: efivarfs.DevicePath{
+ &efivarfs.HardDrivePath{
+ PartitionNumber: s.ESPPartNumber,
+ PartitionMatch: efivarfs.PartitionGPT{
+ PartitionUUID: s.ESPUUID,
+ },
+ },
+ efivarfs.FilePath(slot.EFIBootPath()),
+ },
+ }
+ newIdx, err := efivarfs.AddBootEntry(newEntry)
+ if err == nil {
+ existing[newIdx] = newEntry
+ }
+ return newIdx, err
+}
+
+// MarkBootSuccessful must be called after each boot if some implementation-
+// defined criteria for a successful boot are met. If an update has been
+// installed and booted and this function is called, the updated version is
+// marked as default. If an issue occurs during boot and so this function is
+// not called the old version will be started again on next boot.
+func (s *Service) MarkBootSuccessful() error {
+ if s.ESPPath == "" {
+ return errors.New("no ESP information provided to update service, cannot continue")
+ }
+ bootEntries, err := s.getAllBootEntries()
+ if err != nil {
+ return fmt.Errorf("while getting boot entries: %w", err)
+ }
+ aIdx, err := s.getOrMakeBootEntry(bootEntries, SlotA)
+ if err != nil {
+ return fmt.Errorf("while ensuring slot A boot entry: %w", err)
+ }
+ bIdx, err := s.getOrMakeBootEntry(bootEntries, SlotB)
+ if err != nil {
+ return fmt.Errorf("while ensuring slot B boot entry: %w", err)
+ }
+
+ activeSlot := s.CurrentlyRunningSlot()
+ firstSlot := SlotInvalid
+
+ ord, err := efivarfs.GetBootOrder()
+ if err != nil {
+ return fmt.Errorf("failed to get boot order: %w", err)
+ }
+
+ for _, e := range ord {
+ if int(e) == aIdx {
+ firstSlot = SlotA
+ break
+ }
+ if int(e) == bIdx {
+ firstSlot = SlotB
+ break
+ }
+ }
+
+ if firstSlot == SlotInvalid {
+ bootOrder := make(efivarfs.BootOrder, 2)
+ switch activeSlot {
+ case SlotA:
+ bootOrder[0], bootOrder[1] = uint16(aIdx), uint16(bIdx)
+ case SlotB:
+ bootOrder[0], bootOrder[1] = uint16(bIdx), uint16(aIdx)
+ default:
+ return fmt.Errorf("invalid active slot")
+ }
+ efivarfs.SetBootOrder(bootOrder)
+ s.Logger.Infof("Metropolis missing from boot order, recreated it")
+ } else if activeSlot != firstSlot {
+ var aPos, bPos int
+ for i, e := range ord {
+ if int(e) == aIdx {
+ aPos = i
+ }
+ if int(e) == bIdx {
+ bPos = i
+ }
+ }
+ // swap A and B slots in boot order
+ ord[aPos], ord[bPos] = ord[bPos], ord[aPos]
+ if err := efivarfs.SetBootOrder(ord); err != nil {
+ return fmt.Errorf("failed to set boot order to permanently switch slot: %w", err)
+ }
+ s.Logger.Infof("Permanently activated slot %v", activeSlot)
+ } else {
+ s.Logger.Infof("Normal boot from slot %v", activeSlot)
+ }
+
+ return nil
+}
+
+func openSystemSlot(slot Slot) (*blockdev.Device, error) {
+ switch slot {
+ case SlotA:
+ return blockdev.Open("/dev/system-a")
+ case SlotB:
+ return blockdev.Open("/dev/system-b")
+ default:
+ return nil, errors.New("invalid slot identifier given")
+ }
+}
+
+// InstallBundle installs the bundle at the given HTTP(S) URL into the currently
+// inactive slot and sets that slot to boot next. If it doesn't return an error,
+// a reboot boots into the new slot.
+func (s *Service) InstallBundle(ctx context.Context, bundleURL string) error {
+ if s.ESPPath == "" {
+ return errors.New("no ESP information provided to update service, cannot continue")
+ }
+ // Download into a buffer as ZIP files cannot efficiently be read from
+ // HTTP in Go as the ReaderAt has no way of indicating continuous sections,
+ // thus a ton of small range requests would need to be used, causing
+ // a huge latency penalty as well as costing a lot of money on typical
+ // object storages. This should go away when we switch to a better bundle
+ // format which can be streamed.
+ var bundleRaw bytes.Buffer
+ b := backoff.NewExponentialBackOff()
+ err := backoff.Retry(func() error {
+ return s.tryDownloadBundle(ctx, bundleURL, &bundleRaw)
+ }, backoff.WithContext(b, ctx))
+ if err != nil {
+ return fmt.Errorf("error downloading Metropolis bundle: %v", err)
+ }
+ bundle, err := zip.NewReader(bytes.NewReader(bundleRaw.Bytes()), int64(bundleRaw.Len()))
+ if err != nil {
+ return fmt.Errorf("failed to open node bundle: %w", err)
+ }
+ efiPayload, err := bundle.Open("kernel_efi.efi")
+ if err != nil {
+ return fmt.Errorf("invalid bundle: %w", err)
+ }
+ defer efiPayload.Close()
+ systemImage, err := bundle.Open("verity_rootfs.img")
+ if err != nil {
+ return fmt.Errorf("invalid bundle: %w", err)
+ }
+ defer systemImage.Close()
+ activeSlot := s.CurrentlyRunningSlot()
+ if activeSlot == SlotInvalid {
+ return errors.New("unable to determine active slot, cannot continue")
+ }
+ targetSlot := activeSlot.Other()
+
+ bootEntries, err := s.getAllBootEntries()
+ if err != nil {
+ return fmt.Errorf("while getting boot entries: %w", err)
+ }
+ targetSlotBootEntryIdx, err := s.getOrMakeBootEntry(bootEntries, targetSlot)
+ if err != nil {
+ return fmt.Errorf("while ensuring target slot boot entry: %w", err)
+ }
+ targetSlotBootEntry := bootEntries[targetSlotBootEntryIdx]
+
+ // Disable boot entry while the corresponding slot is being modified.
+ targetSlotBootEntry.Inactive = true
+ if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
+ return fmt.Errorf("failed setting boot entry %d inactive: %w", targetSlotBootEntryIdx, err)
+ }
+
+ systemPart, err := openSystemSlot(targetSlot)
+ if err != nil {
+ return status.Errorf(codes.Internal, "Inactive system slot unavailable: %v", err)
+ }
+ defer systemPart.Close()
+ if _, err := io.Copy(blockdev.NewRWS(systemPart), systemImage); err != nil {
+ return status.Errorf(codes.Unavailable, "Failed to copy system image: %v", err)
+ }
+
+ bootFile, err := os.Create(filepath.Join(s.ESPPath, targetSlot.EFIBootPath()))
+ if err != nil {
+ return fmt.Errorf("failed to open boot file: %w", err)
+ }
+ defer bootFile.Close()
+ if _, err := io.Copy(bootFile, efiPayload); err != nil {
+ return fmt.Errorf("failed to write boot file: %w", err)
+ }
+
+ // Reenable target slot boot entry after boot and system have been written
+ // fully. The slot should now be bootable again.
+ targetSlotBootEntry.Inactive = false
+ if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
+ return fmt.Errorf("failed setting boot entry %d active: %w", targetSlotBootEntryIdx, err)
+ }
+
+ if err := efivarfs.SetBootNext(uint16(targetSlotBootEntryIdx)); err != nil {
+ return fmt.Errorf("failed to set BootNext variable: %w", err)
+ }
+
+ return nil
+}
+
+func (*Service) tryDownloadBundle(ctx context.Context, bundleURL string, bundleRaw *bytes.Buffer) error {
+ bundleReq, err := http.NewRequestWithContext(ctx, "GET", bundleURL, nil)
+ bundleRes, err := http.DefaultClient.Do(bundleReq)
+ if err != nil {
+ return fmt.Errorf("HTTP request failed: %w", err)
+ }
+ defer bundleRes.Body.Close()
+ switch bundleRes.StatusCode {
+ case http.StatusTooEarly, http.StatusTooManyRequests,
+ http.StatusInternalServerError, http.StatusBadGateway,
+ http.StatusServiceUnavailable, http.StatusGatewayTimeout:
+ return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
+ default:
+ // Non-standard code range used for proxy-related issue by various
+ // vendors. Treat as non-permanent error.
+ if bundleRes.StatusCode >= 520 && bundleRes.StatusCode < 599 {
+ return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
+ }
+ if bundleRes.StatusCode != 200 {
+ return backoff.Permanent(fmt.Errorf("HTTP error %d", bundleRes.StatusCode))
+ }
+ }
+ if _, err := bundleRaw.ReadFrom(bundleRes.Body); err != nil {
+ bundleRaw.Reset()
+ return err
+ }
+ return nil
+}