blob: ce0d02136a78b3b1e04a45db3a4c80d0eff23ae2 [file] [log] [blame]
Lorenz Brun35fcf032023-06-29 04:15:58 +02001package update
2
3import (
4 "archive/zip"
5 "bytes"
6 "context"
Lorenz Brund79881d2023-11-30 19:02:06 +01007 "crypto/sha256"
Lorenz Brund14be0e2023-07-31 16:46:14 +02008 "debug/pe"
Lorenz Brund79881d2023-11-30 19:02:06 +01009 _ "embed"
Lorenz Brun35fcf032023-06-29 04:15:58 +020010 "errors"
11 "fmt"
12 "io"
13 "net/http"
14 "os"
15 "path/filepath"
16 "regexp"
17 "strconv"
Lorenz Brund14be0e2023-07-31 16:46:14 +020018 "strings"
Lorenz Brun35fcf032023-06-29 04:15:58 +020019
20 "github.com/cenkalti/backoff/v4"
Lorenz Brund14be0e2023-07-31 16:46:14 +020021 "golang.org/x/sys/unix"
Lorenz Brun35fcf032023-06-29 04:15:58 +020022 "google.golang.org/grpc/codes"
23 "google.golang.org/grpc/status"
Lorenz Brun54a5a052023-10-02 16:40:11 +020024 "google.golang.org/protobuf/proto"
Lorenz Brun35fcf032023-06-29 04:15:58 +020025
26 "source.monogon.dev/metropolis/node/build/mkimage/osimage"
Lorenz Brun54a5a052023-10-02 16:40:11 +020027 abloaderpb "source.monogon.dev/metropolis/node/core/abloader/spec"
Lorenz Brun35fcf032023-06-29 04:15:58 +020028 "source.monogon.dev/metropolis/pkg/blockdev"
29 "source.monogon.dev/metropolis/pkg/efivarfs"
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000030 "source.monogon.dev/metropolis/pkg/gpt"
Lorenz Brund14be0e2023-07-31 16:46:14 +020031 "source.monogon.dev/metropolis/pkg/kexec"
Lorenz Brun35fcf032023-06-29 04:15:58 +020032 "source.monogon.dev/metropolis/pkg/logtree"
33)
34
35// Service contains data and functionality to perform A/B updates on a
36// Metropolis node.
37type Service struct {
38 // Path to the mount point of the EFI System Partition (ESP).
39 ESPPath string
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000040 // gpt.Partition of the ESP System Partition.
41 ESPPart *gpt.Partition
Lorenz Brun35fcf032023-06-29 04:15:58 +020042 // Partition number (1-based) of the ESP in the GPT partitions array.
43 ESPPartNumber uint32
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000044
Lorenz Brun35fcf032023-06-29 04:15:58 +020045 // Logger service for the update service.
46 Logger logtree.LeveledLogger
47}
48
49type Slot int
50
51const (
52 SlotInvalid Slot = 0
53 SlotA Slot = 1
54 SlotB Slot = 2
55)
56
57// Other returns the "other" slot, i.e. returns slot A for B and B for A.
58// It returns SlotInvalid for any s which is not SlotA or SlotB.
59func (s Slot) Other() Slot {
60 switch s {
61 case SlotA:
62 return SlotB
63 case SlotB:
64 return SlotA
65 default:
66 return SlotInvalid
67 }
68}
69
70func (s Slot) String() string {
71 switch s {
72 case SlotA:
73 return "A"
74 case SlotB:
75 return "B"
76 default:
77 return "<invalid slot>"
78 }
79}
80
81func (s Slot) EFIBootPath() string {
82 switch s {
83 case SlotA:
84 return osimage.EFIBootAPath
85 case SlotB:
86 return osimage.EFIBootBPath
87 default:
88 return ""
89 }
90}
91
92var slotRegexp = regexp.MustCompile(`PARTLABEL=METROPOLIS-SYSTEM-([AB])`)
93
94// ProvideESP is a convenience function for providing information about the
95// ESP after the update service has been instantiated.
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000096func (s *Service) ProvideESP(path string, partNum uint32, part *gpt.Partition) {
Lorenz Brun35fcf032023-06-29 04:15:58 +020097 s.ESPPath = path
98 s.ESPPartNumber = partNum
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000099 s.ESPPart = part
Lorenz Brun35fcf032023-06-29 04:15:58 +0200100}
101
102// CurrentlyRunningSlot returns the slot the current system is booted from.
103func (s *Service) CurrentlyRunningSlot() Slot {
104 cmdline, err := os.ReadFile("/proc/cmdline")
105 if err != nil {
106 return SlotInvalid
107 }
108 slotMatches := slotRegexp.FindStringSubmatch(string(cmdline))
109 if len(slotMatches) != 2 {
110 return SlotInvalid
111 }
112 switch slotMatches[1] {
113 case "A":
114 return SlotA
115 case "B":
116 return SlotB
117 default:
118 panic("unreachable")
119 }
120}
121
122var bootVarRegexp = regexp.MustCompile(`^Boot([0-9A-Fa-f]{4})$`)
123
Lorenz Brun35fcf032023-06-29 04:15:58 +0200124// MarkBootSuccessful must be called after each boot if some implementation-
125// defined criteria for a successful boot are met. If an update has been
126// installed and booted and this function is called, the updated version is
127// marked as default. If an issue occurs during boot and so this function is
128// not called the old version will be started again on next boot.
129func (s *Service) MarkBootSuccessful() error {
130 if s.ESPPath == "" {
131 return errors.New("no ESP information provided to update service, cannot continue")
132 }
Lorenz Brund79881d2023-11-30 19:02:06 +0100133 if err := s.fixupEFI(); err != nil {
134 s.Logger.Errorf("Error when checking boot entry configuration: %v", err)
135 }
136 if err := s.fixupPreloader(); err != nil {
137 s.Logger.Errorf("Error when fixing A/B preloader: %v", err)
138 }
Lorenz Brun35fcf032023-06-29 04:15:58 +0200139 activeSlot := s.CurrentlyRunningSlot()
Lorenz Brun54a5a052023-10-02 16:40:11 +0200140 abState, err := s.getABState()
Lorenz Brun35fcf032023-06-29 04:15:58 +0200141 if err != nil {
Lorenz Brun54a5a052023-10-02 16:40:11 +0200142 s.Logger.Warningf("Error while getting A/B loader state, recreating: %v", err)
143 abState = &abloaderpb.ABLoaderData{
144 ActiveSlot: abloaderpb.Slot(activeSlot),
Lorenz Brun35fcf032023-06-29 04:15:58 +0200145 }
Lorenz Brun54a5a052023-10-02 16:40:11 +0200146 err := s.setABState(abState)
147 if err != nil {
148 return fmt.Errorf("while recreating A/B loader state: %w", err)
Lorenz Brun35fcf032023-06-29 04:15:58 +0200149 }
150 }
Lorenz Brun54a5a052023-10-02 16:40:11 +0200151 if Slot(abState.ActiveSlot) != activeSlot {
152 err := s.setABState(&abloaderpb.ABLoaderData{
153 ActiveSlot: abloaderpb.Slot(activeSlot),
154 })
155 if err != nil {
156 return fmt.Errorf("while setting next A/B slot: %w", err)
Lorenz Brun35fcf032023-06-29 04:15:58 +0200157 }
158 s.Logger.Infof("Permanently activated slot %v", activeSlot)
159 } else {
160 s.Logger.Infof("Normal boot from slot %v", activeSlot)
161 }
162
163 return nil
164}
165
166func openSystemSlot(slot Slot) (*blockdev.Device, error) {
167 switch slot {
168 case SlotA:
169 return blockdev.Open("/dev/system-a")
170 case SlotB:
171 return blockdev.Open("/dev/system-b")
172 default:
173 return nil, errors.New("invalid slot identifier given")
174 }
175}
176
Lorenz Brun54a5a052023-10-02 16:40:11 +0200177func (s *Service) getABState() (*abloaderpb.ABLoaderData, error) {
178 abDataRaw, err := os.ReadFile(filepath.Join(s.ESPPath, "EFI/metropolis/loader_state.pb"))
179 if err != nil {
180 return nil, err
181 }
182 var abData abloaderpb.ABLoaderData
183 if err := proto.Unmarshal(abDataRaw, &abData); err != nil {
184 return nil, err
185 }
186 return &abData, nil
187}
188
189func (s *Service) setABState(d *abloaderpb.ABLoaderData) error {
190 abDataRaw, err := proto.Marshal(d)
191 if err != nil {
192 return fmt.Errorf("while marshaling: %w", err)
193 }
194 if err := os.WriteFile(filepath.Join(s.ESPPath, "EFI/metropolis/loader_state.pb"), abDataRaw, 0666); err != nil {
195 return err
196 }
197 return nil
198}
199
Lorenz Brun35fcf032023-06-29 04:15:58 +0200200// InstallBundle installs the bundle at the given HTTP(S) URL into the currently
201// inactive slot and sets that slot to boot next. If it doesn't return an error,
202// a reboot boots into the new slot.
Lorenz Brund14be0e2023-07-31 16:46:14 +0200203func (s *Service) InstallBundle(ctx context.Context, bundleURL string, withKexec bool) error {
Lorenz Brun35fcf032023-06-29 04:15:58 +0200204 if s.ESPPath == "" {
205 return errors.New("no ESP information provided to update service, cannot continue")
206 }
207 // Download into a buffer as ZIP files cannot efficiently be read from
208 // HTTP in Go as the ReaderAt has no way of indicating continuous sections,
209 // thus a ton of small range requests would need to be used, causing
210 // a huge latency penalty as well as costing a lot of money on typical
211 // object storages. This should go away when we switch to a better bundle
212 // format which can be streamed.
213 var bundleRaw bytes.Buffer
214 b := backoff.NewExponentialBackOff()
215 err := backoff.Retry(func() error {
216 return s.tryDownloadBundle(ctx, bundleURL, &bundleRaw)
217 }, backoff.WithContext(b, ctx))
218 if err != nil {
219 return fmt.Errorf("error downloading Metropolis bundle: %v", err)
220 }
221 bundle, err := zip.NewReader(bytes.NewReader(bundleRaw.Bytes()), int64(bundleRaw.Len()))
222 if err != nil {
223 return fmt.Errorf("failed to open node bundle: %w", err)
224 }
225 efiPayload, err := bundle.Open("kernel_efi.efi")
226 if err != nil {
227 return fmt.Errorf("invalid bundle: %w", err)
228 }
229 defer efiPayload.Close()
230 systemImage, err := bundle.Open("verity_rootfs.img")
231 if err != nil {
232 return fmt.Errorf("invalid bundle: %w", err)
233 }
234 defer systemImage.Close()
235 activeSlot := s.CurrentlyRunningSlot()
236 if activeSlot == SlotInvalid {
237 return errors.New("unable to determine active slot, cannot continue")
238 }
239 targetSlot := activeSlot.Other()
240
Lorenz Brun35fcf032023-06-29 04:15:58 +0200241 systemPart, err := openSystemSlot(targetSlot)
242 if err != nil {
243 return status.Errorf(codes.Internal, "Inactive system slot unavailable: %v", err)
244 }
245 defer systemPart.Close()
246 if _, err := io.Copy(blockdev.NewRWS(systemPart), systemImage); err != nil {
247 return status.Errorf(codes.Unavailable, "Failed to copy system image: %v", err)
248 }
249
250 bootFile, err := os.Create(filepath.Join(s.ESPPath, targetSlot.EFIBootPath()))
251 if err != nil {
252 return fmt.Errorf("failed to open boot file: %w", err)
253 }
254 defer bootFile.Close()
255 if _, err := io.Copy(bootFile, efiPayload); err != nil {
256 return fmt.Errorf("failed to write boot file: %w", err)
257 }
258
Lorenz Brund14be0e2023-07-31 16:46:14 +0200259 if withKexec {
260 if err := s.stageKexec(bootFile, targetSlot); err != nil {
261 return fmt.Errorf("while kexec staging: %w", err)
262 }
263 } else {
Lorenz Brun54a5a052023-10-02 16:40:11 +0200264 err := s.setABState(&abloaderpb.ABLoaderData{
265 ActiveSlot: abloaderpb.Slot(activeSlot),
266 NextSlot: abloaderpb.Slot(targetSlot),
267 })
268 if err != nil {
269 return fmt.Errorf("while setting next A/B slot: %w", err)
Lorenz Brund14be0e2023-07-31 16:46:14 +0200270 }
Lorenz Brun35fcf032023-06-29 04:15:58 +0200271 }
272
273 return nil
274}
275
276func (*Service) tryDownloadBundle(ctx context.Context, bundleURL string, bundleRaw *bytes.Buffer) error {
277 bundleReq, err := http.NewRequestWithContext(ctx, "GET", bundleURL, nil)
278 bundleRes, err := http.DefaultClient.Do(bundleReq)
279 if err != nil {
280 return fmt.Errorf("HTTP request failed: %w", err)
281 }
282 defer bundleRes.Body.Close()
283 switch bundleRes.StatusCode {
284 case http.StatusTooEarly, http.StatusTooManyRequests,
285 http.StatusInternalServerError, http.StatusBadGateway,
286 http.StatusServiceUnavailable, http.StatusGatewayTimeout:
287 return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
288 default:
289 // Non-standard code range used for proxy-related issue by various
290 // vendors. Treat as non-permanent error.
291 if bundleRes.StatusCode >= 520 && bundleRes.StatusCode < 599 {
292 return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
293 }
294 if bundleRes.StatusCode != 200 {
295 return backoff.Permanent(fmt.Errorf("HTTP error %d", bundleRes.StatusCode))
296 }
297 }
298 if _, err := bundleRaw.ReadFrom(bundleRes.Body); err != nil {
299 bundleRaw.Reset()
300 return err
301 }
302 return nil
303}
Lorenz Brund14be0e2023-07-31 16:46:14 +0200304
305// newMemfile creates a new file which is not located on a specific filesystem,
306// but is instead backed by anonymous memory.
307func newMemfile(name string, flags int) (*os.File, error) {
308 fd, err := unix.MemfdCreate(name, flags)
309 if err != nil {
310 return nil, fmt.Errorf("memfd_create: %w", err)
311 }
312 return os.NewFile(uintptr(fd), name), nil
313}
314
315// stageKexec stages the kernel, command line and initramfs if available for
316// a future kexec. It extracts the relevant data from the EFI boot executable.
317func (s *Service) stageKexec(bootFile io.ReaderAt, targetSlot Slot) error {
318 bootPE, err := pe.NewFile(bootFile)
319 if err != nil {
320 return fmt.Errorf("unable to open bootFile as PE: %w", err)
321 }
322 var cmdlineRaw []byte
323 cmdlineSection := bootPE.Section(".cmdline")
324 if cmdlineSection == nil {
325 return fmt.Errorf("no .cmdline section in boot PE")
326 }
327 cmdlineRaw, err = cmdlineSection.Data()
328 if err != nil {
329 return fmt.Errorf("while reading .cmdline PE section: %w", err)
330 }
331 cmdline := string(bytes.TrimRight(cmdlineRaw, "\x00"))
332 cmdline = strings.ReplaceAll(cmdline, "METROPOLIS-SYSTEM-X", fmt.Sprintf("METROPOLIS-SYSTEM-%s", targetSlot))
333 kernelFile, err := newMemfile("kernel", 0)
334 if err != nil {
335 return fmt.Errorf("failed to create kernel memfile: %w", err)
336 }
337 defer kernelFile.Close()
338 kernelSection := bootPE.Section(".linux")
339 if kernelSection == nil {
340 return fmt.Errorf("no .linux section in boot PE")
341 }
342 if _, err := io.Copy(kernelFile, kernelSection.Open()); err != nil {
343 return fmt.Errorf("while copying .linux PE section: %w", err)
344 }
345
346 initramfsSection := bootPE.Section(".initrd")
347 var initramfsFile *os.File
348 if initramfsSection != nil && initramfsSection.Size > 0 {
349 initramfsFile, err = newMemfile("initramfs", 0)
350 if err != nil {
351 return fmt.Errorf("failed to create initramfs memfile: %w", err)
352 }
353 defer initramfsFile.Close()
354 if _, err := io.Copy(initramfsFile, initramfsSection.Open()); err != nil {
355 return fmt.Errorf("while copying .initrd PE section: %w", err)
356 }
357 }
358 if err := kexec.FileLoad(kernelFile, initramfsFile, cmdline); err != nil {
359 return fmt.Errorf("while staging new kexec kernel: %w", err)
360 }
361 return nil
362}
Lorenz Brund79881d2023-11-30 19:02:06 +0100363
364//go:embed metropolis/node/core/abloader/abloader_bin.efi
365var abloader []byte
366
367func (s *Service) fixupPreloader() error {
368 abLoaderFile, err := os.Open(filepath.Join(s.ESPPath, osimage.EFIPayloadPath))
369 if err != nil {
370 s.Logger.Warningf("A/B preloader not available, attempting to restore: %v", err)
371 } else {
372 expectedSum := sha256.Sum256(abloader)
373 h := sha256.New()
374 _, err := io.Copy(h, abLoaderFile)
375 abLoaderFile.Close()
376 if err == nil {
377 if bytes.Equal(h.Sum(nil), expectedSum[:]) {
378 // A/B Preloader is present and has correct hash
379 return nil
380 } else {
381 s.Logger.Infof("Replacing A/B preloader with current version: %x %x", h.Sum(nil), expectedSum[:])
382 }
383 } else {
384 s.Logger.Warningf("Error while reading A/B preloader, restoring: %v", err)
385 }
386 }
387 preloader, err := os.Create(filepath.Join(s.ESPPath, "preloader.swp"))
388 if err != nil {
389 return fmt.Errorf("while creating preloader swap file: %w", err)
390 }
391 if _, err := preloader.Write(abloader); err != nil {
392 return fmt.Errorf("while writing preloader swap file: %w", err)
393 }
394 if err := preloader.Sync(); err != nil {
395 return fmt.Errorf("while sync'ing preloader swap file: %w", err)
396 }
397 preloader.Close()
398 if err := os.Rename(filepath.Join(s.ESPPath, "preloader.swp"), filepath.Join(s.ESPPath, osimage.EFIPayloadPath)); err != nil {
399 return fmt.Errorf("while swapping preloader: %w", err)
400 }
401 s.Logger.Info("Successfully wrote current preloader")
402 return nil
403}
404
405// fixupEFI checks for the existence and correctness of the EFI boot entry
406// repairs/recreates it if needed.
407func (s *Service) fixupEFI() error {
408 varNames, err := efivarfs.List(efivarfs.ScopeGlobal)
409 if err != nil {
410 return fmt.Errorf("failed to list EFI variables: %w", err)
411 }
412 var validBootEntryIdx int = -1
413 for _, varName := range varNames {
414 m := bootVarRegexp.FindStringSubmatch(varName)
415 if m == nil {
416 continue
417 }
418 idx, err := strconv.ParseUint(m[1], 16, 16)
419 if err != nil {
420 // This cannot be hit as all regexp matches are parseable.
421 panic(err)
422 }
423 e, err := efivarfs.GetBootEntry(int(idx))
424 if err != nil {
425 s.Logger.Warningf("Unable to get boot entry %d, skipping: %v", idx, err)
426 continue
427 }
428 if len(e.FilePath) != 2 {
429 // Not our entry, ours always have two parts
430 continue
431 }
432 switch p := e.FilePath[0].(type) {
433 case *efivarfs.HardDrivePath:
434 gptMatch, ok := p.PartitionMatch.(*efivarfs.PartitionGPT)
435 if ok && gptMatch.PartitionUUID != s.ESPPart.ID {
436 // Not related to our ESP
437 continue
438 }
439 default:
440 continue
441 }
442 switch p := e.FilePath[1].(type) {
443 case efivarfs.FilePath:
444 if string(p) == osimage.EFIPayloadPath {
445 if validBootEntryIdx == -1 {
446 validBootEntryIdx = int(idx)
447 } else {
448 // Another valid boot entry already exists, delete this one
449 err := efivarfs.DeleteBootEntry(int(idx))
450 if err == nil {
451 s.Logger.Infof("Deleted duplicate boot entry %q", e.Description)
452 } else {
453 s.Logger.Warningf("Error while deleting duplicate boot entry %q: %v", e.Description, err)
454 }
455 }
456 } else if strings.Contains(e.Description, "Metropolis") {
457 err := efivarfs.DeleteBootEntry(int(idx))
458 if err == nil {
459 s.Logger.Infof("Deleted orphaned boot entry %q", e.Description)
460 } else {
461 s.Logger.Warningf("Error while deleting orphaned boot entry %q: %v", e.Description, err)
462 }
463 }
464 default:
465 continue
466 }
467 }
468 if validBootEntryIdx == -1 {
469 validBootEntryIdx, err = efivarfs.AddBootEntry(&efivarfs.LoadOption{
470 Description: "Metropolis",
471 FilePath: efivarfs.DevicePath{
472 &efivarfs.HardDrivePath{
473 PartitionNumber: 1,
474 PartitionStartBlock: s.ESPPart.FirstBlock,
475 PartitionSizeBlocks: s.ESPPart.SizeBlocks(),
476 PartitionMatch: efivarfs.PartitionGPT{
477 PartitionUUID: s.ESPPart.ID,
478 },
479 },
480 efivarfs.FilePath(osimage.EFIPayloadPath),
481 },
482 })
483 if err == nil {
484 s.Logger.Infof("Restored missing EFI boot entry for Metropolis")
485 } else {
486 return fmt.Errorf("while restoring missing EFI boot entry for Metropolis: %v", err)
487 }
488 }
489 bootOrder, err := efivarfs.GetBootOrder()
490 if err != nil {
491 return fmt.Errorf("failed to get EFI boot order: %v", err)
492 }
493 for _, bentry := range bootOrder {
494 if bentry == uint16(validBootEntryIdx) {
495 // Our boot entry is in the boot order, everything's ok
496 return nil
497 }
498 }
499 newBootOrder := append(efivarfs.BootOrder{uint16(validBootEntryIdx)}, bootOrder...)
500 if err := efivarfs.SetBootOrder(newBootOrder); err != nil {
501 return fmt.Errorf("while setting EFI boot order: %w", err)
502 }
503 return nil
504}