blob: 28b2381cae2cb3f4d79a7f9afec0bd24ce4f6fb1 [file] [log] [blame]
Lorenz Brun35fcf032023-06-29 04:15:58 +02001package update
2
3import (
4 "archive/zip"
5 "bytes"
6 "context"
Lorenz Brund79881d2023-11-30 19:02:06 +01007 "crypto/sha256"
Lorenz Brund14be0e2023-07-31 16:46:14 +02008 "debug/pe"
Lorenz Brund79881d2023-11-30 19:02:06 +01009 _ "embed"
Lorenz Brun35fcf032023-06-29 04:15:58 +020010 "errors"
11 "fmt"
12 "io"
13 "net/http"
14 "os"
15 "path/filepath"
16 "regexp"
17 "strconv"
Lorenz Brund14be0e2023-07-31 16:46:14 +020018 "strings"
Lorenz Brun35fcf032023-06-29 04:15:58 +020019
20 "github.com/cenkalti/backoff/v4"
Lorenz Brund14be0e2023-07-31 16:46:14 +020021 "golang.org/x/sys/unix"
Lorenz Brun35fcf032023-06-29 04:15:58 +020022 "google.golang.org/grpc/codes"
23 "google.golang.org/grpc/status"
Lorenz Brun54a5a052023-10-02 16:40:11 +020024 "google.golang.org/protobuf/proto"
Lorenz Brun35fcf032023-06-29 04:15:58 +020025
26 "source.monogon.dev/metropolis/node/build/mkimage/osimage"
Lorenz Brun54a5a052023-10-02 16:40:11 +020027 abloaderpb "source.monogon.dev/metropolis/node/core/abloader/spec"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020028 "source.monogon.dev/osbase/blockdev"
29 "source.monogon.dev/osbase/efivarfs"
30 "source.monogon.dev/osbase/gpt"
31 "source.monogon.dev/osbase/kexec"
32 "source.monogon.dev/osbase/logtree"
Lorenz Brun35fcf032023-06-29 04:15:58 +020033)
34
35// Service contains data and functionality to perform A/B updates on a
36// Metropolis node.
37type Service struct {
38 // Path to the mount point of the EFI System Partition (ESP).
39 ESPPath string
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000040 // gpt.Partition of the ESP System Partition.
41 ESPPart *gpt.Partition
Lorenz Brun35fcf032023-06-29 04:15:58 +020042 // Partition number (1-based) of the ESP in the GPT partitions array.
43 ESPPartNumber uint32
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000044
Lorenz Brun35fcf032023-06-29 04:15:58 +020045 // Logger service for the update service.
46 Logger logtree.LeveledLogger
47}
48
49type Slot int
50
51const (
52 SlotInvalid Slot = 0
53 SlotA Slot = 1
54 SlotB Slot = 2
55)
56
57// Other returns the "other" slot, i.e. returns slot A for B and B for A.
58// It returns SlotInvalid for any s which is not SlotA or SlotB.
59func (s Slot) Other() Slot {
60 switch s {
61 case SlotA:
62 return SlotB
63 case SlotB:
64 return SlotA
65 default:
66 return SlotInvalid
67 }
68}
69
70func (s Slot) String() string {
71 switch s {
72 case SlotA:
73 return "A"
74 case SlotB:
75 return "B"
76 default:
77 return "<invalid slot>"
78 }
79}
80
81func (s Slot) EFIBootPath() string {
82 switch s {
83 case SlotA:
84 return osimage.EFIBootAPath
85 case SlotB:
86 return osimage.EFIBootBPath
87 default:
88 return ""
89 }
90}
91
92var slotRegexp = regexp.MustCompile(`PARTLABEL=METROPOLIS-SYSTEM-([AB])`)
93
94// ProvideESP is a convenience function for providing information about the
95// ESP after the update service has been instantiated.
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000096func (s *Service) ProvideESP(path string, partNum uint32, part *gpt.Partition) {
Lorenz Brun35fcf032023-06-29 04:15:58 +020097 s.ESPPath = path
98 s.ESPPartNumber = partNum
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000099 s.ESPPart = part
Lorenz Brun35fcf032023-06-29 04:15:58 +0200100}
101
102// CurrentlyRunningSlot returns the slot the current system is booted from.
103func (s *Service) CurrentlyRunningSlot() Slot {
104 cmdline, err := os.ReadFile("/proc/cmdline")
105 if err != nil {
106 return SlotInvalid
107 }
108 slotMatches := slotRegexp.FindStringSubmatch(string(cmdline))
109 if len(slotMatches) != 2 {
110 return SlotInvalid
111 }
112 switch slotMatches[1] {
113 case "A":
114 return SlotA
115 case "B":
116 return SlotB
117 default:
118 panic("unreachable")
119 }
120}
121
122var bootVarRegexp = regexp.MustCompile(`^Boot([0-9A-Fa-f]{4})$`)
123
Lorenz Brun35fcf032023-06-29 04:15:58 +0200124// MarkBootSuccessful must be called after each boot if some implementation-
125// defined criteria for a successful boot are met. If an update has been
126// installed and booted and this function is called, the updated version is
127// marked as default. If an issue occurs during boot and so this function is
128// not called the old version will be started again on next boot.
129func (s *Service) MarkBootSuccessful() error {
130 if s.ESPPath == "" {
131 return errors.New("no ESP information provided to update service, cannot continue")
132 }
Lorenz Brund79881d2023-11-30 19:02:06 +0100133 if err := s.fixupEFI(); err != nil {
134 s.Logger.Errorf("Error when checking boot entry configuration: %v", err)
135 }
136 if err := s.fixupPreloader(); err != nil {
137 s.Logger.Errorf("Error when fixing A/B preloader: %v", err)
138 }
Lorenz Brun35fcf032023-06-29 04:15:58 +0200139 activeSlot := s.CurrentlyRunningSlot()
Lorenz Brun54a5a052023-10-02 16:40:11 +0200140 abState, err := s.getABState()
Lorenz Brun35fcf032023-06-29 04:15:58 +0200141 if err != nil {
Lorenz Brun54a5a052023-10-02 16:40:11 +0200142 s.Logger.Warningf("Error while getting A/B loader state, recreating: %v", err)
143 abState = &abloaderpb.ABLoaderData{
144 ActiveSlot: abloaderpb.Slot(activeSlot),
Lorenz Brun35fcf032023-06-29 04:15:58 +0200145 }
Lorenz Brun54a5a052023-10-02 16:40:11 +0200146 err := s.setABState(abState)
147 if err != nil {
148 return fmt.Errorf("while recreating A/B loader state: %w", err)
Lorenz Brun35fcf032023-06-29 04:15:58 +0200149 }
150 }
Lorenz Brun54a5a052023-10-02 16:40:11 +0200151 if Slot(abState.ActiveSlot) != activeSlot {
152 err := s.setABState(&abloaderpb.ABLoaderData{
153 ActiveSlot: abloaderpb.Slot(activeSlot),
154 })
155 if err != nil {
156 return fmt.Errorf("while setting next A/B slot: %w", err)
Lorenz Brun35fcf032023-06-29 04:15:58 +0200157 }
158 s.Logger.Infof("Permanently activated slot %v", activeSlot)
159 } else {
160 s.Logger.Infof("Normal boot from slot %v", activeSlot)
161 }
162
163 return nil
164}
165
166func openSystemSlot(slot Slot) (*blockdev.Device, error) {
167 switch slot {
168 case SlotA:
169 return blockdev.Open("/dev/system-a")
170 case SlotB:
171 return blockdev.Open("/dev/system-b")
172 default:
173 return nil, errors.New("invalid slot identifier given")
174 }
175}
176
Lorenz Brun54a5a052023-10-02 16:40:11 +0200177func (s *Service) getABState() (*abloaderpb.ABLoaderData, error) {
178 abDataRaw, err := os.ReadFile(filepath.Join(s.ESPPath, "EFI/metropolis/loader_state.pb"))
179 if err != nil {
180 return nil, err
181 }
182 var abData abloaderpb.ABLoaderData
183 if err := proto.Unmarshal(abDataRaw, &abData); err != nil {
184 return nil, err
185 }
186 return &abData, nil
187}
188
189func (s *Service) setABState(d *abloaderpb.ABLoaderData) error {
190 abDataRaw, err := proto.Marshal(d)
191 if err != nil {
192 return fmt.Errorf("while marshaling: %w", err)
193 }
194 if err := os.WriteFile(filepath.Join(s.ESPPath, "EFI/metropolis/loader_state.pb"), abDataRaw, 0666); err != nil {
195 return err
196 }
197 return nil
198}
199
Lorenz Brun35fcf032023-06-29 04:15:58 +0200200// InstallBundle installs the bundle at the given HTTP(S) URL into the currently
201// inactive slot and sets that slot to boot next. If it doesn't return an error,
202// a reboot boots into the new slot.
Lorenz Brund14be0e2023-07-31 16:46:14 +0200203func (s *Service) InstallBundle(ctx context.Context, bundleURL string, withKexec bool) error {
Lorenz Brun35fcf032023-06-29 04:15:58 +0200204 if s.ESPPath == "" {
205 return errors.New("no ESP information provided to update service, cannot continue")
206 }
207 // Download into a buffer as ZIP files cannot efficiently be read from
208 // HTTP in Go as the ReaderAt has no way of indicating continuous sections,
209 // thus a ton of small range requests would need to be used, causing
210 // a huge latency penalty as well as costing a lot of money on typical
211 // object storages. This should go away when we switch to a better bundle
212 // format which can be streamed.
213 var bundleRaw bytes.Buffer
214 b := backoff.NewExponentialBackOff()
215 err := backoff.Retry(func() error {
216 return s.tryDownloadBundle(ctx, bundleURL, &bundleRaw)
217 }, backoff.WithContext(b, ctx))
218 if err != nil {
219 return fmt.Errorf("error downloading Metropolis bundle: %v", err)
220 }
221 bundle, err := zip.NewReader(bytes.NewReader(bundleRaw.Bytes()), int64(bundleRaw.Len()))
222 if err != nil {
223 return fmt.Errorf("failed to open node bundle: %w", err)
224 }
225 efiPayload, err := bundle.Open("kernel_efi.efi")
226 if err != nil {
227 return fmt.Errorf("invalid bundle: %w", err)
228 }
229 defer efiPayload.Close()
230 systemImage, err := bundle.Open("verity_rootfs.img")
231 if err != nil {
232 return fmt.Errorf("invalid bundle: %w", err)
233 }
234 defer systemImage.Close()
235 activeSlot := s.CurrentlyRunningSlot()
236 if activeSlot == SlotInvalid {
237 return errors.New("unable to determine active slot, cannot continue")
238 }
239 targetSlot := activeSlot.Other()
240
Lorenz Brun35fcf032023-06-29 04:15:58 +0200241 systemPart, err := openSystemSlot(targetSlot)
242 if err != nil {
243 return status.Errorf(codes.Internal, "Inactive system slot unavailable: %v", err)
244 }
245 defer systemPart.Close()
246 if _, err := io.Copy(blockdev.NewRWS(systemPart), systemImage); err != nil {
247 return status.Errorf(codes.Unavailable, "Failed to copy system image: %v", err)
248 }
249
250 bootFile, err := os.Create(filepath.Join(s.ESPPath, targetSlot.EFIBootPath()))
251 if err != nil {
252 return fmt.Errorf("failed to open boot file: %w", err)
253 }
254 defer bootFile.Close()
255 if _, err := io.Copy(bootFile, efiPayload); err != nil {
256 return fmt.Errorf("failed to write boot file: %w", err)
257 }
258
Lorenz Brund14be0e2023-07-31 16:46:14 +0200259 if withKexec {
260 if err := s.stageKexec(bootFile, targetSlot); err != nil {
261 return fmt.Errorf("while kexec staging: %w", err)
262 }
263 } else {
Lorenz Brun54a5a052023-10-02 16:40:11 +0200264 err := s.setABState(&abloaderpb.ABLoaderData{
265 ActiveSlot: abloaderpb.Slot(activeSlot),
266 NextSlot: abloaderpb.Slot(targetSlot),
267 })
268 if err != nil {
269 return fmt.Errorf("while setting next A/B slot: %w", err)
Lorenz Brund14be0e2023-07-31 16:46:14 +0200270 }
Lorenz Brun35fcf032023-06-29 04:15:58 +0200271 }
272
273 return nil
274}
275
276func (*Service) tryDownloadBundle(ctx context.Context, bundleURL string, bundleRaw *bytes.Buffer) error {
277 bundleReq, err := http.NewRequestWithContext(ctx, "GET", bundleURL, nil)
Tim Windelschmidt096654a2024-04-18 23:10:19 +0200278 if err != nil {
279 return fmt.Errorf("failed to create request: %w", err)
280 }
Lorenz Brun35fcf032023-06-29 04:15:58 +0200281 bundleRes, err := http.DefaultClient.Do(bundleReq)
282 if err != nil {
283 return fmt.Errorf("HTTP request failed: %w", err)
284 }
285 defer bundleRes.Body.Close()
286 switch bundleRes.StatusCode {
287 case http.StatusTooEarly, http.StatusTooManyRequests,
288 http.StatusInternalServerError, http.StatusBadGateway,
289 http.StatusServiceUnavailable, http.StatusGatewayTimeout:
290 return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
291 default:
292 // Non-standard code range used for proxy-related issue by various
293 // vendors. Treat as non-permanent error.
294 if bundleRes.StatusCode >= 520 && bundleRes.StatusCode < 599 {
295 return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
296 }
297 if bundleRes.StatusCode != 200 {
298 return backoff.Permanent(fmt.Errorf("HTTP error %d", bundleRes.StatusCode))
299 }
300 }
301 if _, err := bundleRaw.ReadFrom(bundleRes.Body); err != nil {
302 bundleRaw.Reset()
303 return err
304 }
305 return nil
306}
Lorenz Brund14be0e2023-07-31 16:46:14 +0200307
308// newMemfile creates a new file which is not located on a specific filesystem,
309// but is instead backed by anonymous memory.
310func newMemfile(name string, flags int) (*os.File, error) {
311 fd, err := unix.MemfdCreate(name, flags)
312 if err != nil {
313 return nil, fmt.Errorf("memfd_create: %w", err)
314 }
315 return os.NewFile(uintptr(fd), name), nil
316}
317
318// stageKexec stages the kernel, command line and initramfs if available for
319// a future kexec. It extracts the relevant data from the EFI boot executable.
320func (s *Service) stageKexec(bootFile io.ReaderAt, targetSlot Slot) error {
321 bootPE, err := pe.NewFile(bootFile)
322 if err != nil {
323 return fmt.Errorf("unable to open bootFile as PE: %w", err)
324 }
325 var cmdlineRaw []byte
326 cmdlineSection := bootPE.Section(".cmdline")
327 if cmdlineSection == nil {
328 return fmt.Errorf("no .cmdline section in boot PE")
329 }
330 cmdlineRaw, err = cmdlineSection.Data()
331 if err != nil {
332 return fmt.Errorf("while reading .cmdline PE section: %w", err)
333 }
334 cmdline := string(bytes.TrimRight(cmdlineRaw, "\x00"))
335 cmdline = strings.ReplaceAll(cmdline, "METROPOLIS-SYSTEM-X", fmt.Sprintf("METROPOLIS-SYSTEM-%s", targetSlot))
336 kernelFile, err := newMemfile("kernel", 0)
337 if err != nil {
338 return fmt.Errorf("failed to create kernel memfile: %w", err)
339 }
340 defer kernelFile.Close()
341 kernelSection := bootPE.Section(".linux")
342 if kernelSection == nil {
343 return fmt.Errorf("no .linux section in boot PE")
344 }
345 if _, err := io.Copy(kernelFile, kernelSection.Open()); err != nil {
346 return fmt.Errorf("while copying .linux PE section: %w", err)
347 }
348
349 initramfsSection := bootPE.Section(".initrd")
350 var initramfsFile *os.File
351 if initramfsSection != nil && initramfsSection.Size > 0 {
352 initramfsFile, err = newMemfile("initramfs", 0)
353 if err != nil {
354 return fmt.Errorf("failed to create initramfs memfile: %w", err)
355 }
356 defer initramfsFile.Close()
357 if _, err := io.Copy(initramfsFile, initramfsSection.Open()); err != nil {
358 return fmt.Errorf("while copying .initrd PE section: %w", err)
359 }
360 }
361 if err := kexec.FileLoad(kernelFile, initramfsFile, cmdline); err != nil {
362 return fmt.Errorf("while staging new kexec kernel: %w", err)
363 }
364 return nil
365}
Lorenz Brund79881d2023-11-30 19:02:06 +0100366
367//go:embed metropolis/node/core/abloader/abloader_bin.efi
368var abloader []byte
369
370func (s *Service) fixupPreloader() error {
371 abLoaderFile, err := os.Open(filepath.Join(s.ESPPath, osimage.EFIPayloadPath))
372 if err != nil {
373 s.Logger.Warningf("A/B preloader not available, attempting to restore: %v", err)
374 } else {
375 expectedSum := sha256.Sum256(abloader)
376 h := sha256.New()
377 _, err := io.Copy(h, abLoaderFile)
378 abLoaderFile.Close()
379 if err == nil {
380 if bytes.Equal(h.Sum(nil), expectedSum[:]) {
381 // A/B Preloader is present and has correct hash
382 return nil
383 } else {
384 s.Logger.Infof("Replacing A/B preloader with current version: %x %x", h.Sum(nil), expectedSum[:])
385 }
386 } else {
387 s.Logger.Warningf("Error while reading A/B preloader, restoring: %v", err)
388 }
389 }
390 preloader, err := os.Create(filepath.Join(s.ESPPath, "preloader.swp"))
391 if err != nil {
392 return fmt.Errorf("while creating preloader swap file: %w", err)
393 }
394 if _, err := preloader.Write(abloader); err != nil {
395 return fmt.Errorf("while writing preloader swap file: %w", err)
396 }
397 if err := preloader.Sync(); err != nil {
398 return fmt.Errorf("while sync'ing preloader swap file: %w", err)
399 }
400 preloader.Close()
401 if err := os.Rename(filepath.Join(s.ESPPath, "preloader.swp"), filepath.Join(s.ESPPath, osimage.EFIPayloadPath)); err != nil {
402 return fmt.Errorf("while swapping preloader: %w", err)
403 }
404 s.Logger.Info("Successfully wrote current preloader")
405 return nil
406}
407
408// fixupEFI checks for the existence and correctness of the EFI boot entry
409// repairs/recreates it if needed.
410func (s *Service) fixupEFI() error {
411 varNames, err := efivarfs.List(efivarfs.ScopeGlobal)
412 if err != nil {
413 return fmt.Errorf("failed to list EFI variables: %w", err)
414 }
Tim Windelschmidt5e460a92024-04-11 01:33:09 +0200415 var validBootEntryIdx = -1
Lorenz Brund79881d2023-11-30 19:02:06 +0100416 for _, varName := range varNames {
417 m := bootVarRegexp.FindStringSubmatch(varName)
418 if m == nil {
419 continue
420 }
421 idx, err := strconv.ParseUint(m[1], 16, 16)
422 if err != nil {
423 // This cannot be hit as all regexp matches are parseable.
424 panic(err)
425 }
426 e, err := efivarfs.GetBootEntry(int(idx))
427 if err != nil {
428 s.Logger.Warningf("Unable to get boot entry %d, skipping: %v", idx, err)
429 continue
430 }
431 if len(e.FilePath) != 2 {
432 // Not our entry, ours always have two parts
433 continue
434 }
435 switch p := e.FilePath[0].(type) {
436 case *efivarfs.HardDrivePath:
437 gptMatch, ok := p.PartitionMatch.(*efivarfs.PartitionGPT)
438 if ok && gptMatch.PartitionUUID != s.ESPPart.ID {
439 // Not related to our ESP
440 continue
441 }
442 default:
443 continue
444 }
445 switch p := e.FilePath[1].(type) {
446 case efivarfs.FilePath:
447 if string(p) == osimage.EFIPayloadPath {
448 if validBootEntryIdx == -1 {
449 validBootEntryIdx = int(idx)
450 } else {
451 // Another valid boot entry already exists, delete this one
452 err := efivarfs.DeleteBootEntry(int(idx))
453 if err == nil {
454 s.Logger.Infof("Deleted duplicate boot entry %q", e.Description)
455 } else {
456 s.Logger.Warningf("Error while deleting duplicate boot entry %q: %v", e.Description, err)
457 }
458 }
459 } else if strings.Contains(e.Description, "Metropolis") {
460 err := efivarfs.DeleteBootEntry(int(idx))
461 if err == nil {
462 s.Logger.Infof("Deleted orphaned boot entry %q", e.Description)
463 } else {
464 s.Logger.Warningf("Error while deleting orphaned boot entry %q: %v", e.Description, err)
465 }
466 }
467 default:
468 continue
469 }
470 }
471 if validBootEntryIdx == -1 {
472 validBootEntryIdx, err = efivarfs.AddBootEntry(&efivarfs.LoadOption{
473 Description: "Metropolis",
474 FilePath: efivarfs.DevicePath{
475 &efivarfs.HardDrivePath{
476 PartitionNumber: 1,
477 PartitionStartBlock: s.ESPPart.FirstBlock,
478 PartitionSizeBlocks: s.ESPPart.SizeBlocks(),
479 PartitionMatch: efivarfs.PartitionGPT{
480 PartitionUUID: s.ESPPart.ID,
481 },
482 },
483 efivarfs.FilePath(osimage.EFIPayloadPath),
484 },
485 })
486 if err == nil {
487 s.Logger.Infof("Restored missing EFI boot entry for Metropolis")
488 } else {
489 return fmt.Errorf("while restoring missing EFI boot entry for Metropolis: %v", err)
490 }
491 }
492 bootOrder, err := efivarfs.GetBootOrder()
493 if err != nil {
494 return fmt.Errorf("failed to get EFI boot order: %v", err)
495 }
496 for _, bentry := range bootOrder {
497 if bentry == uint16(validBootEntryIdx) {
498 // Our boot entry is in the boot order, everything's ok
499 return nil
500 }
501 }
502 newBootOrder := append(efivarfs.BootOrder{uint16(validBootEntryIdx)}, bootOrder...)
503 if err := efivarfs.SetBootOrder(newBootOrder); err != nil {
504 return fmt.Errorf("while setting EFI boot order: %w", err)
505 }
506 return nil
507}