blob: 84a5db9c5b38ccee4ee8842defb9f304ed7be339 [file] [log] [blame]
Lorenz Brun35fcf032023-06-29 04:15:58 +02001package update
2
3import (
4 "archive/zip"
5 "bytes"
6 "context"
Lorenz Brund14be0e2023-07-31 16:46:14 +02007 "debug/pe"
Lorenz Brun35fcf032023-06-29 04:15:58 +02008 "errors"
9 "fmt"
10 "io"
11 "net/http"
12 "os"
13 "path/filepath"
14 "regexp"
15 "strconv"
Lorenz Brund14be0e2023-07-31 16:46:14 +020016 "strings"
Lorenz Brun35fcf032023-06-29 04:15:58 +020017
18 "github.com/cenkalti/backoff/v4"
Lorenz Brund14be0e2023-07-31 16:46:14 +020019 "golang.org/x/sys/unix"
Lorenz Brun35fcf032023-06-29 04:15:58 +020020 "google.golang.org/grpc/codes"
21 "google.golang.org/grpc/status"
22
23 "source.monogon.dev/metropolis/node/build/mkimage/osimage"
24 "source.monogon.dev/metropolis/pkg/blockdev"
25 "source.monogon.dev/metropolis/pkg/efivarfs"
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000026 "source.monogon.dev/metropolis/pkg/gpt"
Lorenz Brund14be0e2023-07-31 16:46:14 +020027 "source.monogon.dev/metropolis/pkg/kexec"
Lorenz Brun35fcf032023-06-29 04:15:58 +020028 "source.monogon.dev/metropolis/pkg/logtree"
29)
30
31// Service contains data and functionality to perform A/B updates on a
32// Metropolis node.
33type Service struct {
34 // Path to the mount point of the EFI System Partition (ESP).
35 ESPPath string
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000036 // gpt.Partition of the ESP System Partition.
37 ESPPart *gpt.Partition
Lorenz Brun35fcf032023-06-29 04:15:58 +020038 // Partition number (1-based) of the ESP in the GPT partitions array.
39 ESPPartNumber uint32
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000040
Lorenz Brun35fcf032023-06-29 04:15:58 +020041 // Logger service for the update service.
42 Logger logtree.LeveledLogger
43}
44
45type Slot int
46
47const (
48 SlotInvalid Slot = 0
49 SlotA Slot = 1
50 SlotB Slot = 2
51)
52
53// Other returns the "other" slot, i.e. returns slot A for B and B for A.
54// It returns SlotInvalid for any s which is not SlotA or SlotB.
55func (s Slot) Other() Slot {
56 switch s {
57 case SlotA:
58 return SlotB
59 case SlotB:
60 return SlotA
61 default:
62 return SlotInvalid
63 }
64}
65
66func (s Slot) String() string {
67 switch s {
68 case SlotA:
69 return "A"
70 case SlotB:
71 return "B"
72 default:
73 return "<invalid slot>"
74 }
75}
76
77func (s Slot) EFIBootPath() string {
78 switch s {
79 case SlotA:
80 return osimage.EFIBootAPath
81 case SlotB:
82 return osimage.EFIBootBPath
83 default:
84 return ""
85 }
86}
87
88var slotRegexp = regexp.MustCompile(`PARTLABEL=METROPOLIS-SYSTEM-([AB])`)
89
90// ProvideESP is a convenience function for providing information about the
91// ESP after the update service has been instantiated.
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000092func (s *Service) ProvideESP(path string, partNum uint32, part *gpt.Partition) {
Lorenz Brun35fcf032023-06-29 04:15:58 +020093 s.ESPPath = path
94 s.ESPPartNumber = partNum
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000095 s.ESPPart = part
Lorenz Brun35fcf032023-06-29 04:15:58 +020096}
97
98// CurrentlyRunningSlot returns the slot the current system is booted from.
99func (s *Service) CurrentlyRunningSlot() Slot {
100 cmdline, err := os.ReadFile("/proc/cmdline")
101 if err != nil {
102 return SlotInvalid
103 }
104 slotMatches := slotRegexp.FindStringSubmatch(string(cmdline))
105 if len(slotMatches) != 2 {
106 return SlotInvalid
107 }
108 switch slotMatches[1] {
109 case "A":
110 return SlotA
111 case "B":
112 return SlotB
113 default:
114 panic("unreachable")
115 }
116}
117
118var bootVarRegexp = regexp.MustCompile(`^Boot([0-9A-Fa-f]{4})$`)
119
120func (s *Service) getAllBootEntries() (map[int]*efivarfs.LoadOption, error) {
121 res := make(map[int]*efivarfs.LoadOption)
122 varNames, err := efivarfs.List(efivarfs.ScopeGlobal)
123 if err != nil {
124 return nil, fmt.Errorf("failed to list EFI variables: %w", err)
125 }
126 for _, varName := range varNames {
127 m := bootVarRegexp.FindStringSubmatch(varName)
128 if m == nil {
129 continue
130 }
131 idx, err := strconv.ParseUint(m[1], 16, 16)
132 if err != nil {
133 // This cannot be hit as all regexp matches are parseable.
134 panic(err)
135 }
136 e, err := efivarfs.GetBootEntry(int(idx))
137 if err != nil {
138 return nil, fmt.Errorf("failed to get boot entry %d: %w", idx, err)
139 }
140 res[int(idx)] = e
141 }
142 return res, nil
143}
144
145func (s *Service) getOrMakeBootEntry(existing map[int]*efivarfs.LoadOption, slot Slot) (int, error) {
146 for idx, e := range existing {
147 if len(e.FilePath) != 2 {
148 // Not our entry
149 continue
150 }
151 switch p := e.FilePath[0].(type) {
152 case *efivarfs.HardDrivePath:
153 gptMatch, ok := p.PartitionMatch.(*efivarfs.PartitionGPT)
Tim Windelschmidt8e87a062023-07-31 01:33:10 +0000154 if ok && gptMatch.PartitionUUID != s.ESPPart.ID {
Lorenz Brun35fcf032023-06-29 04:15:58 +0200155 // Not related to our ESP
156 continue
157 }
158 default:
159 continue
160 }
161 switch p := e.FilePath[1].(type) {
162 case efivarfs.FilePath:
163 if string(p) == slot.EFIBootPath() {
164 return idx, nil
165 }
166 default:
167 continue
168 }
169 }
170 newEntry := &efivarfs.LoadOption{
171 Description: fmt.Sprintf("Metropolis Slot %s", slot),
172 FilePath: efivarfs.DevicePath{
173 &efivarfs.HardDrivePath{
Tim Windelschmidt8e87a062023-07-31 01:33:10 +0000174 PartitionNumber: s.ESPPartNumber,
175 PartitionStartBlock: s.ESPPart.FirstBlock,
176 PartitionSizeBlocks: s.ESPPart.SizeBlocks(),
Lorenz Brun35fcf032023-06-29 04:15:58 +0200177 PartitionMatch: efivarfs.PartitionGPT{
Tim Windelschmidt8e87a062023-07-31 01:33:10 +0000178 PartitionUUID: s.ESPPart.ID,
Lorenz Brun35fcf032023-06-29 04:15:58 +0200179 },
180 },
181 efivarfs.FilePath(slot.EFIBootPath()),
182 },
183 }
Lorenz Brund14be0e2023-07-31 16:46:14 +0200184 s.Logger.Infof("Recreated boot entry %s", newEntry.Description)
Lorenz Brun35fcf032023-06-29 04:15:58 +0200185 newIdx, err := efivarfs.AddBootEntry(newEntry)
186 if err == nil {
187 existing[newIdx] = newEntry
188 }
189 return newIdx, err
190}
191
192// MarkBootSuccessful must be called after each boot if some implementation-
193// defined criteria for a successful boot are met. If an update has been
194// installed and booted and this function is called, the updated version is
195// marked as default. If an issue occurs during boot and so this function is
196// not called the old version will be started again on next boot.
197func (s *Service) MarkBootSuccessful() error {
198 if s.ESPPath == "" {
199 return errors.New("no ESP information provided to update service, cannot continue")
200 }
201 bootEntries, err := s.getAllBootEntries()
202 if err != nil {
203 return fmt.Errorf("while getting boot entries: %w", err)
204 }
205 aIdx, err := s.getOrMakeBootEntry(bootEntries, SlotA)
206 if err != nil {
207 return fmt.Errorf("while ensuring slot A boot entry: %w", err)
208 }
209 bIdx, err := s.getOrMakeBootEntry(bootEntries, SlotB)
210 if err != nil {
211 return fmt.Errorf("while ensuring slot B boot entry: %w", err)
212 }
213
214 activeSlot := s.CurrentlyRunningSlot()
215 firstSlot := SlotInvalid
216
217 ord, err := efivarfs.GetBootOrder()
218 if err != nil {
219 return fmt.Errorf("failed to get boot order: %w", err)
220 }
221
222 for _, e := range ord {
223 if int(e) == aIdx {
224 firstSlot = SlotA
225 break
226 }
227 if int(e) == bIdx {
228 firstSlot = SlotB
229 break
230 }
231 }
232
233 if firstSlot == SlotInvalid {
234 bootOrder := make(efivarfs.BootOrder, 2)
235 switch activeSlot {
236 case SlotA:
237 bootOrder[0], bootOrder[1] = uint16(aIdx), uint16(bIdx)
238 case SlotB:
239 bootOrder[0], bootOrder[1] = uint16(bIdx), uint16(aIdx)
240 default:
241 return fmt.Errorf("invalid active slot")
242 }
243 efivarfs.SetBootOrder(bootOrder)
244 s.Logger.Infof("Metropolis missing from boot order, recreated it")
245 } else if activeSlot != firstSlot {
246 var aPos, bPos int
247 for i, e := range ord {
248 if int(e) == aIdx {
249 aPos = i
250 }
251 if int(e) == bIdx {
252 bPos = i
253 }
254 }
255 // swap A and B slots in boot order
256 ord[aPos], ord[bPos] = ord[bPos], ord[aPos]
257 if err := efivarfs.SetBootOrder(ord); err != nil {
258 return fmt.Errorf("failed to set boot order to permanently switch slot: %w", err)
259 }
260 s.Logger.Infof("Permanently activated slot %v", activeSlot)
261 } else {
262 s.Logger.Infof("Normal boot from slot %v", activeSlot)
263 }
264
265 return nil
266}
267
268func openSystemSlot(slot Slot) (*blockdev.Device, error) {
269 switch slot {
270 case SlotA:
271 return blockdev.Open("/dev/system-a")
272 case SlotB:
273 return blockdev.Open("/dev/system-b")
274 default:
275 return nil, errors.New("invalid slot identifier given")
276 }
277}
278
279// InstallBundle installs the bundle at the given HTTP(S) URL into the currently
280// inactive slot and sets that slot to boot next. If it doesn't return an error,
281// a reboot boots into the new slot.
Lorenz Brund14be0e2023-07-31 16:46:14 +0200282func (s *Service) InstallBundle(ctx context.Context, bundleURL string, withKexec bool) error {
Lorenz Brun35fcf032023-06-29 04:15:58 +0200283 if s.ESPPath == "" {
284 return errors.New("no ESP information provided to update service, cannot continue")
285 }
286 // Download into a buffer as ZIP files cannot efficiently be read from
287 // HTTP in Go as the ReaderAt has no way of indicating continuous sections,
288 // thus a ton of small range requests would need to be used, causing
289 // a huge latency penalty as well as costing a lot of money on typical
290 // object storages. This should go away when we switch to a better bundle
291 // format which can be streamed.
292 var bundleRaw bytes.Buffer
293 b := backoff.NewExponentialBackOff()
294 err := backoff.Retry(func() error {
295 return s.tryDownloadBundle(ctx, bundleURL, &bundleRaw)
296 }, backoff.WithContext(b, ctx))
297 if err != nil {
298 return fmt.Errorf("error downloading Metropolis bundle: %v", err)
299 }
300 bundle, err := zip.NewReader(bytes.NewReader(bundleRaw.Bytes()), int64(bundleRaw.Len()))
301 if err != nil {
302 return fmt.Errorf("failed to open node bundle: %w", err)
303 }
304 efiPayload, err := bundle.Open("kernel_efi.efi")
305 if err != nil {
306 return fmt.Errorf("invalid bundle: %w", err)
307 }
308 defer efiPayload.Close()
309 systemImage, err := bundle.Open("verity_rootfs.img")
310 if err != nil {
311 return fmt.Errorf("invalid bundle: %w", err)
312 }
313 defer systemImage.Close()
314 activeSlot := s.CurrentlyRunningSlot()
315 if activeSlot == SlotInvalid {
316 return errors.New("unable to determine active slot, cannot continue")
317 }
318 targetSlot := activeSlot.Other()
319
320 bootEntries, err := s.getAllBootEntries()
321 if err != nil {
322 return fmt.Errorf("while getting boot entries: %w", err)
323 }
324 targetSlotBootEntryIdx, err := s.getOrMakeBootEntry(bootEntries, targetSlot)
325 if err != nil {
326 return fmt.Errorf("while ensuring target slot boot entry: %w", err)
327 }
328 targetSlotBootEntry := bootEntries[targetSlotBootEntryIdx]
329
330 // Disable boot entry while the corresponding slot is being modified.
331 targetSlotBootEntry.Inactive = true
332 if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
333 return fmt.Errorf("failed setting boot entry %d inactive: %w", targetSlotBootEntryIdx, err)
334 }
335
336 systemPart, err := openSystemSlot(targetSlot)
337 if err != nil {
338 return status.Errorf(codes.Internal, "Inactive system slot unavailable: %v", err)
339 }
340 defer systemPart.Close()
341 if _, err := io.Copy(blockdev.NewRWS(systemPart), systemImage); err != nil {
342 return status.Errorf(codes.Unavailable, "Failed to copy system image: %v", err)
343 }
344
345 bootFile, err := os.Create(filepath.Join(s.ESPPath, targetSlot.EFIBootPath()))
346 if err != nil {
347 return fmt.Errorf("failed to open boot file: %w", err)
348 }
349 defer bootFile.Close()
350 if _, err := io.Copy(bootFile, efiPayload); err != nil {
351 return fmt.Errorf("failed to write boot file: %w", err)
352 }
353
354 // Reenable target slot boot entry after boot and system have been written
355 // fully. The slot should now be bootable again.
356 targetSlotBootEntry.Inactive = false
357 if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
358 return fmt.Errorf("failed setting boot entry %d active: %w", targetSlotBootEntryIdx, err)
359 }
360
Lorenz Brund14be0e2023-07-31 16:46:14 +0200361 if withKexec {
362 if err := s.stageKexec(bootFile, targetSlot); err != nil {
363 return fmt.Errorf("while kexec staging: %w", err)
364 }
365 } else {
366 if err := efivarfs.SetBootNext(uint16(targetSlotBootEntryIdx)); err != nil {
367 return fmt.Errorf("failed to set BootNext variable: %w", err)
368 }
Lorenz Brun35fcf032023-06-29 04:15:58 +0200369 }
370
371 return nil
372}
373
374func (*Service) tryDownloadBundle(ctx context.Context, bundleURL string, bundleRaw *bytes.Buffer) error {
375 bundleReq, err := http.NewRequestWithContext(ctx, "GET", bundleURL, nil)
376 bundleRes, err := http.DefaultClient.Do(bundleReq)
377 if err != nil {
378 return fmt.Errorf("HTTP request failed: %w", err)
379 }
380 defer bundleRes.Body.Close()
381 switch bundleRes.StatusCode {
382 case http.StatusTooEarly, http.StatusTooManyRequests,
383 http.StatusInternalServerError, http.StatusBadGateway,
384 http.StatusServiceUnavailable, http.StatusGatewayTimeout:
385 return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
386 default:
387 // Non-standard code range used for proxy-related issue by various
388 // vendors. Treat as non-permanent error.
389 if bundleRes.StatusCode >= 520 && bundleRes.StatusCode < 599 {
390 return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
391 }
392 if bundleRes.StatusCode != 200 {
393 return backoff.Permanent(fmt.Errorf("HTTP error %d", bundleRes.StatusCode))
394 }
395 }
396 if _, err := bundleRaw.ReadFrom(bundleRes.Body); err != nil {
397 bundleRaw.Reset()
398 return err
399 }
400 return nil
401}
Lorenz Brund14be0e2023-07-31 16:46:14 +0200402
403// newMemfile creates a new file which is not located on a specific filesystem,
404// but is instead backed by anonymous memory.
405func newMemfile(name string, flags int) (*os.File, error) {
406 fd, err := unix.MemfdCreate(name, flags)
407 if err != nil {
408 return nil, fmt.Errorf("memfd_create: %w", err)
409 }
410 return os.NewFile(uintptr(fd), name), nil
411}
412
413// stageKexec stages the kernel, command line and initramfs if available for
414// a future kexec. It extracts the relevant data from the EFI boot executable.
415func (s *Service) stageKexec(bootFile io.ReaderAt, targetSlot Slot) error {
416 bootPE, err := pe.NewFile(bootFile)
417 if err != nil {
418 return fmt.Errorf("unable to open bootFile as PE: %w", err)
419 }
420 var cmdlineRaw []byte
421 cmdlineSection := bootPE.Section(".cmdline")
422 if cmdlineSection == nil {
423 return fmt.Errorf("no .cmdline section in boot PE")
424 }
425 cmdlineRaw, err = cmdlineSection.Data()
426 if err != nil {
427 return fmt.Errorf("while reading .cmdline PE section: %w", err)
428 }
429 cmdline := string(bytes.TrimRight(cmdlineRaw, "\x00"))
430 cmdline = strings.ReplaceAll(cmdline, "METROPOLIS-SYSTEM-X", fmt.Sprintf("METROPOLIS-SYSTEM-%s", targetSlot))
431 kernelFile, err := newMemfile("kernel", 0)
432 if err != nil {
433 return fmt.Errorf("failed to create kernel memfile: %w", err)
434 }
435 defer kernelFile.Close()
436 kernelSection := bootPE.Section(".linux")
437 if kernelSection == nil {
438 return fmt.Errorf("no .linux section in boot PE")
439 }
440 if _, err := io.Copy(kernelFile, kernelSection.Open()); err != nil {
441 return fmt.Errorf("while copying .linux PE section: %w", err)
442 }
443
444 initramfsSection := bootPE.Section(".initrd")
445 var initramfsFile *os.File
446 if initramfsSection != nil && initramfsSection.Size > 0 {
447 initramfsFile, err = newMemfile("initramfs", 0)
448 if err != nil {
449 return fmt.Errorf("failed to create initramfs memfile: %w", err)
450 }
451 defer initramfsFile.Close()
452 if _, err := io.Copy(initramfsFile, initramfsSection.Open()); err != nil {
453 return fmt.Errorf("while copying .initrd PE section: %w", err)
454 }
455 }
456 if err := kexec.FileLoad(kernelFile, initramfsFile, cmdline); err != nil {
457 return fmt.Errorf("while staging new kexec kernel: %w", err)
458 }
459 return nil
460}