blob: 107f9cc60d48af31d152158fc361c7ee5300f03a [file] [log] [blame]
Lorenz Brun35fcf032023-06-29 04:15:58 +02001package update
2
3import (
4 "archive/zip"
5 "bytes"
6 "context"
7 "errors"
8 "fmt"
9 "io"
10 "net/http"
11 "os"
12 "path/filepath"
13 "regexp"
14 "strconv"
15
16 "github.com/cenkalti/backoff/v4"
Lorenz Brun35fcf032023-06-29 04:15:58 +020017 "google.golang.org/grpc/codes"
18 "google.golang.org/grpc/status"
19
20 "source.monogon.dev/metropolis/node/build/mkimage/osimage"
21 "source.monogon.dev/metropolis/pkg/blockdev"
22 "source.monogon.dev/metropolis/pkg/efivarfs"
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000023 "source.monogon.dev/metropolis/pkg/gpt"
Lorenz Brun35fcf032023-06-29 04:15:58 +020024 "source.monogon.dev/metropolis/pkg/logtree"
25)
26
27// Service contains data and functionality to perform A/B updates on a
28// Metropolis node.
29type Service struct {
30 // Path to the mount point of the EFI System Partition (ESP).
31 ESPPath string
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000032 // gpt.Partition of the ESP System Partition.
33 ESPPart *gpt.Partition
Lorenz Brun35fcf032023-06-29 04:15:58 +020034 // Partition number (1-based) of the ESP in the GPT partitions array.
35 ESPPartNumber uint32
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000036
Lorenz Brun35fcf032023-06-29 04:15:58 +020037 // Logger service for the update service.
38 Logger logtree.LeveledLogger
39}
40
41type Slot int
42
43const (
44 SlotInvalid Slot = 0
45 SlotA Slot = 1
46 SlotB Slot = 2
47)
48
49// Other returns the "other" slot, i.e. returns slot A for B and B for A.
50// It returns SlotInvalid for any s which is not SlotA or SlotB.
51func (s Slot) Other() Slot {
52 switch s {
53 case SlotA:
54 return SlotB
55 case SlotB:
56 return SlotA
57 default:
58 return SlotInvalid
59 }
60}
61
62func (s Slot) String() string {
63 switch s {
64 case SlotA:
65 return "A"
66 case SlotB:
67 return "B"
68 default:
69 return "<invalid slot>"
70 }
71}
72
73func (s Slot) EFIBootPath() string {
74 switch s {
75 case SlotA:
76 return osimage.EFIBootAPath
77 case SlotB:
78 return osimage.EFIBootBPath
79 default:
80 return ""
81 }
82}
83
84var slotRegexp = regexp.MustCompile(`PARTLABEL=METROPOLIS-SYSTEM-([AB])`)
85
86// ProvideESP is a convenience function for providing information about the
87// ESP after the update service has been instantiated.
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000088func (s *Service) ProvideESP(path string, partNum uint32, part *gpt.Partition) {
Lorenz Brun35fcf032023-06-29 04:15:58 +020089 s.ESPPath = path
90 s.ESPPartNumber = partNum
Tim Windelschmidt8e87a062023-07-31 01:33:10 +000091 s.ESPPart = part
Lorenz Brun35fcf032023-06-29 04:15:58 +020092}
93
94// CurrentlyRunningSlot returns the slot the current system is booted from.
95func (s *Service) CurrentlyRunningSlot() Slot {
96 cmdline, err := os.ReadFile("/proc/cmdline")
97 if err != nil {
98 return SlotInvalid
99 }
100 slotMatches := slotRegexp.FindStringSubmatch(string(cmdline))
101 if len(slotMatches) != 2 {
102 return SlotInvalid
103 }
104 switch slotMatches[1] {
105 case "A":
106 return SlotA
107 case "B":
108 return SlotB
109 default:
110 panic("unreachable")
111 }
112}
113
114var bootVarRegexp = regexp.MustCompile(`^Boot([0-9A-Fa-f]{4})$`)
115
116func (s *Service) getAllBootEntries() (map[int]*efivarfs.LoadOption, error) {
117 res := make(map[int]*efivarfs.LoadOption)
118 varNames, err := efivarfs.List(efivarfs.ScopeGlobal)
119 if err != nil {
120 return nil, fmt.Errorf("failed to list EFI variables: %w", err)
121 }
122 for _, varName := range varNames {
123 m := bootVarRegexp.FindStringSubmatch(varName)
124 if m == nil {
125 continue
126 }
127 idx, err := strconv.ParseUint(m[1], 16, 16)
128 if err != nil {
129 // This cannot be hit as all regexp matches are parseable.
130 panic(err)
131 }
132 e, err := efivarfs.GetBootEntry(int(idx))
133 if err != nil {
134 return nil, fmt.Errorf("failed to get boot entry %d: %w", idx, err)
135 }
136 res[int(idx)] = e
137 }
138 return res, nil
139}
140
141func (s *Service) getOrMakeBootEntry(existing map[int]*efivarfs.LoadOption, slot Slot) (int, error) {
142 for idx, e := range existing {
143 if len(e.FilePath) != 2 {
144 // Not our entry
145 continue
146 }
147 switch p := e.FilePath[0].(type) {
148 case *efivarfs.HardDrivePath:
149 gptMatch, ok := p.PartitionMatch.(*efivarfs.PartitionGPT)
Tim Windelschmidt8e87a062023-07-31 01:33:10 +0000150 if ok && gptMatch.PartitionUUID != s.ESPPart.ID {
Lorenz Brun35fcf032023-06-29 04:15:58 +0200151 // Not related to our ESP
152 continue
153 }
154 default:
155 continue
156 }
157 switch p := e.FilePath[1].(type) {
158 case efivarfs.FilePath:
159 if string(p) == slot.EFIBootPath() {
160 return idx, nil
161 }
162 default:
163 continue
164 }
165 }
166 newEntry := &efivarfs.LoadOption{
167 Description: fmt.Sprintf("Metropolis Slot %s", slot),
168 FilePath: efivarfs.DevicePath{
169 &efivarfs.HardDrivePath{
Tim Windelschmidt8e87a062023-07-31 01:33:10 +0000170 PartitionNumber: s.ESPPartNumber,
171 PartitionStartBlock: s.ESPPart.FirstBlock,
172 PartitionSizeBlocks: s.ESPPart.SizeBlocks(),
Lorenz Brun35fcf032023-06-29 04:15:58 +0200173 PartitionMatch: efivarfs.PartitionGPT{
Tim Windelschmidt8e87a062023-07-31 01:33:10 +0000174 PartitionUUID: s.ESPPart.ID,
Lorenz Brun35fcf032023-06-29 04:15:58 +0200175 },
176 },
177 efivarfs.FilePath(slot.EFIBootPath()),
178 },
179 }
180 newIdx, err := efivarfs.AddBootEntry(newEntry)
181 if err == nil {
182 existing[newIdx] = newEntry
183 }
184 return newIdx, err
185}
186
187// MarkBootSuccessful must be called after each boot if some implementation-
188// defined criteria for a successful boot are met. If an update has been
189// installed and booted and this function is called, the updated version is
190// marked as default. If an issue occurs during boot and so this function is
191// not called the old version will be started again on next boot.
192func (s *Service) MarkBootSuccessful() error {
193 if s.ESPPath == "" {
194 return errors.New("no ESP information provided to update service, cannot continue")
195 }
196 bootEntries, err := s.getAllBootEntries()
197 if err != nil {
198 return fmt.Errorf("while getting boot entries: %w", err)
199 }
200 aIdx, err := s.getOrMakeBootEntry(bootEntries, SlotA)
201 if err != nil {
202 return fmt.Errorf("while ensuring slot A boot entry: %w", err)
203 }
204 bIdx, err := s.getOrMakeBootEntry(bootEntries, SlotB)
205 if err != nil {
206 return fmt.Errorf("while ensuring slot B boot entry: %w", err)
207 }
208
209 activeSlot := s.CurrentlyRunningSlot()
210 firstSlot := SlotInvalid
211
212 ord, err := efivarfs.GetBootOrder()
213 if err != nil {
214 return fmt.Errorf("failed to get boot order: %w", err)
215 }
216
217 for _, e := range ord {
218 if int(e) == aIdx {
219 firstSlot = SlotA
220 break
221 }
222 if int(e) == bIdx {
223 firstSlot = SlotB
224 break
225 }
226 }
227
228 if firstSlot == SlotInvalid {
229 bootOrder := make(efivarfs.BootOrder, 2)
230 switch activeSlot {
231 case SlotA:
232 bootOrder[0], bootOrder[1] = uint16(aIdx), uint16(bIdx)
233 case SlotB:
234 bootOrder[0], bootOrder[1] = uint16(bIdx), uint16(aIdx)
235 default:
236 return fmt.Errorf("invalid active slot")
237 }
238 efivarfs.SetBootOrder(bootOrder)
239 s.Logger.Infof("Metropolis missing from boot order, recreated it")
240 } else if activeSlot != firstSlot {
241 var aPos, bPos int
242 for i, e := range ord {
243 if int(e) == aIdx {
244 aPos = i
245 }
246 if int(e) == bIdx {
247 bPos = i
248 }
249 }
250 // swap A and B slots in boot order
251 ord[aPos], ord[bPos] = ord[bPos], ord[aPos]
252 if err := efivarfs.SetBootOrder(ord); err != nil {
253 return fmt.Errorf("failed to set boot order to permanently switch slot: %w", err)
254 }
255 s.Logger.Infof("Permanently activated slot %v", activeSlot)
256 } else {
257 s.Logger.Infof("Normal boot from slot %v", activeSlot)
258 }
259
260 return nil
261}
262
263func openSystemSlot(slot Slot) (*blockdev.Device, error) {
264 switch slot {
265 case SlotA:
266 return blockdev.Open("/dev/system-a")
267 case SlotB:
268 return blockdev.Open("/dev/system-b")
269 default:
270 return nil, errors.New("invalid slot identifier given")
271 }
272}
273
274// InstallBundle installs the bundle at the given HTTP(S) URL into the currently
275// inactive slot and sets that slot to boot next. If it doesn't return an error,
276// a reboot boots into the new slot.
277func (s *Service) InstallBundle(ctx context.Context, bundleURL string) error {
278 if s.ESPPath == "" {
279 return errors.New("no ESP information provided to update service, cannot continue")
280 }
281 // Download into a buffer as ZIP files cannot efficiently be read from
282 // HTTP in Go as the ReaderAt has no way of indicating continuous sections,
283 // thus a ton of small range requests would need to be used, causing
284 // a huge latency penalty as well as costing a lot of money on typical
285 // object storages. This should go away when we switch to a better bundle
286 // format which can be streamed.
287 var bundleRaw bytes.Buffer
288 b := backoff.NewExponentialBackOff()
289 err := backoff.Retry(func() error {
290 return s.tryDownloadBundle(ctx, bundleURL, &bundleRaw)
291 }, backoff.WithContext(b, ctx))
292 if err != nil {
293 return fmt.Errorf("error downloading Metropolis bundle: %v", err)
294 }
295 bundle, err := zip.NewReader(bytes.NewReader(bundleRaw.Bytes()), int64(bundleRaw.Len()))
296 if err != nil {
297 return fmt.Errorf("failed to open node bundle: %w", err)
298 }
299 efiPayload, err := bundle.Open("kernel_efi.efi")
300 if err != nil {
301 return fmt.Errorf("invalid bundle: %w", err)
302 }
303 defer efiPayload.Close()
304 systemImage, err := bundle.Open("verity_rootfs.img")
305 if err != nil {
306 return fmt.Errorf("invalid bundle: %w", err)
307 }
308 defer systemImage.Close()
309 activeSlot := s.CurrentlyRunningSlot()
310 if activeSlot == SlotInvalid {
311 return errors.New("unable to determine active slot, cannot continue")
312 }
313 targetSlot := activeSlot.Other()
314
315 bootEntries, err := s.getAllBootEntries()
316 if err != nil {
317 return fmt.Errorf("while getting boot entries: %w", err)
318 }
319 targetSlotBootEntryIdx, err := s.getOrMakeBootEntry(bootEntries, targetSlot)
320 if err != nil {
321 return fmt.Errorf("while ensuring target slot boot entry: %w", err)
322 }
323 targetSlotBootEntry := bootEntries[targetSlotBootEntryIdx]
324
325 // Disable boot entry while the corresponding slot is being modified.
326 targetSlotBootEntry.Inactive = true
327 if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
328 return fmt.Errorf("failed setting boot entry %d inactive: %w", targetSlotBootEntryIdx, err)
329 }
330
331 systemPart, err := openSystemSlot(targetSlot)
332 if err != nil {
333 return status.Errorf(codes.Internal, "Inactive system slot unavailable: %v", err)
334 }
335 defer systemPart.Close()
336 if _, err := io.Copy(blockdev.NewRWS(systemPart), systemImage); err != nil {
337 return status.Errorf(codes.Unavailable, "Failed to copy system image: %v", err)
338 }
339
340 bootFile, err := os.Create(filepath.Join(s.ESPPath, targetSlot.EFIBootPath()))
341 if err != nil {
342 return fmt.Errorf("failed to open boot file: %w", err)
343 }
344 defer bootFile.Close()
345 if _, err := io.Copy(bootFile, efiPayload); err != nil {
346 return fmt.Errorf("failed to write boot file: %w", err)
347 }
348
349 // Reenable target slot boot entry after boot and system have been written
350 // fully. The slot should now be bootable again.
351 targetSlotBootEntry.Inactive = false
352 if err := efivarfs.SetBootEntry(targetSlotBootEntryIdx, targetSlotBootEntry); err != nil {
353 return fmt.Errorf("failed setting boot entry %d active: %w", targetSlotBootEntryIdx, err)
354 }
355
356 if err := efivarfs.SetBootNext(uint16(targetSlotBootEntryIdx)); err != nil {
357 return fmt.Errorf("failed to set BootNext variable: %w", err)
358 }
359
360 return nil
361}
362
363func (*Service) tryDownloadBundle(ctx context.Context, bundleURL string, bundleRaw *bytes.Buffer) error {
364 bundleReq, err := http.NewRequestWithContext(ctx, "GET", bundleURL, nil)
365 bundleRes, err := http.DefaultClient.Do(bundleReq)
366 if err != nil {
367 return fmt.Errorf("HTTP request failed: %w", err)
368 }
369 defer bundleRes.Body.Close()
370 switch bundleRes.StatusCode {
371 case http.StatusTooEarly, http.StatusTooManyRequests,
372 http.StatusInternalServerError, http.StatusBadGateway,
373 http.StatusServiceUnavailable, http.StatusGatewayTimeout:
374 return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
375 default:
376 // Non-standard code range used for proxy-related issue by various
377 // vendors. Treat as non-permanent error.
378 if bundleRes.StatusCode >= 520 && bundleRes.StatusCode < 599 {
379 return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
380 }
381 if bundleRes.StatusCode != 200 {
382 return backoff.Permanent(fmt.Errorf("HTTP error %d", bundleRes.StatusCode))
383 }
384 }
385 if _, err := bundleRaw.ReadFrom(bundleRes.Body); err != nil {
386 bundleRaw.Reset()
387 return err
388 }
389 return nil
390}