m/n/c/update: implement kexec-based activation
As we've had some issues with EFI-based slot activation and enterprise
server firmware is extremely slow, this implements kexec-based
activation. This just kexecs into the freshly-installed slot instead of
rebooting. It still updates the BootOrder on successful boot to allow
cold-boots if the server crashes or loses power, but no longer uses the
NextBoot mechanism to boot into the new slot once (this is taken care of
by kexec).
Change-Id: I6092c47d988634ba39fb6bdd7fd7ccd41ceb02ef
Reviewed-on: https://review.monogon.dev/c/monogon/+/2021
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/core/mgmt/update.go b/metropolis/node/core/mgmt/update.go
index 28a2a0a..ce8b26b 100644
--- a/metropolis/node/core/mgmt/update.go
+++ b/metropolis/node/core/mgmt/update.go
@@ -12,16 +12,30 @@
)
func (s *Service) UpdateNode(ctx context.Context, req *apb.UpdateNodeRequest) (*apb.UpdateNodeResponse, error) {
- if err := s.UpdateService.InstallBundle(ctx, req.BundleUrl); err != nil {
+ ok := s.updateMutex.TryLock()
+ if ok {
+ defer s.updateMutex.Unlock()
+ } else {
+ return nil, status.Error(codes.Aborted, "another UpdateNode RPC is in progress on this node")
+ }
+ if req.ActivationMode == apb.ActivationMode_ACTIVATION_INVALID {
+ return nil, status.Errorf(codes.InvalidArgument, "activation_mode needs to be explicitly specified")
+ }
+ if err := s.UpdateService.InstallBundle(ctx, req.BundleUrl, req.ActivationMode == apb.ActivationMode_ACTIVATION_KEXEC); err != nil {
return nil, status.Errorf(codes.Unavailable, "error installing update: %v", err)
}
- if !req.NoReboot {
+ if req.ActivationMode != apb.ActivationMode_ACTIVATION_NONE {
// TODO(#253): Tell Supervisor to shut down gracefully and reboot
go func() {
time.Sleep(10 * time.Second)
unix.Sync()
- unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
+ if req.ActivationMode == apb.ActivationMode_ACTIVATION_KEXEC {
+ unix.Reboot(unix.LINUX_REBOOT_CMD_KEXEC)
+ } else {
+ unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
+ }
}()
}
+
return &apb.UpdateNodeResponse{}, nil
}