blob: 520bc895771b6d0f17ba5e1cde4986e65d8ed313 [file] [log] [blame]
Mateusz Zalega43e21072021-10-08 18:05:29 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17// Installer creates a Metropolis image at a suitable block device based on the
18// installer bundle present in the installation medium's ESP, after which it
19// reboots. It's meant to be used as an init process.
20package main
21
22import (
Lorenz Brun0b93c8d2021-11-09 03:58:40 +010023 "archive/zip"
Lorenz Brun57d06a72022-01-13 14:12:27 +010024 "errors"
Mateusz Zalega43e21072021-10-08 18:05:29 +020025 "fmt"
26 "io"
Mateusz Zalega43e21072021-10-08 18:05:29 +020027 "os"
28 "path/filepath"
29 "strings"
30 "syscall"
Lorenz Brun57d06a72022-01-13 14:12:27 +010031 "time"
Mateusz Zalega43e21072021-10-08 18:05:29 +020032
33 "golang.org/x/sys/unix"
Serge Bazanski97783222021-12-14 16:04:26 +010034
Mateusz Zalega43e21072021-10-08 18:05:29 +020035 "source.monogon.dev/metropolis/node/build/mkimage/osimage"
36 "source.monogon.dev/metropolis/pkg/efivarfs"
37 "source.monogon.dev/metropolis/pkg/sysfs"
38)
39
40const mib = 1024 * 1024
41
42// mountPseudoFS mounts efivarfs, devtmpfs and sysfs, used by the installer in
43// the block device discovery stage.
44func mountPseudoFS() error {
45 for _, m := range []struct {
46 dir string
47 fs string
48 flags uintptr
49 }{
50 {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
51 {efivarfs.Path, "efivarfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
52 {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
53 } {
54 if err := unix.Mkdir(m.dir, 0700); err != nil && !os.IsExist(err) {
55 return fmt.Errorf("couldn't create the mountpoint at %q: %w", m.dir, err)
56 }
57 if err := unix.Mount(m.fs, m.dir, m.fs, m.flags, ""); err != nil {
58 return fmt.Errorf("couldn't mount %q at %q: %w", m.fs, m.dir, err)
59 }
60 }
61 return nil
62}
63
64// mountInstallerESP mounts the filesystem the installer was loaded from based
65// on espPath, which must point to the appropriate partition block device. The
66// filesystem is mounted at /installer.
67func mountInstallerESP(espPath string) error {
68 // Create the mountpoint.
69 if err := unix.Mkdir("/installer", 0700); err != nil {
70 return fmt.Errorf("couldn't create the installer mountpoint: %w", err)
71 }
72 // Mount the filesystem.
73 if err := unix.Mount(espPath, "/installer", "vfat", unix.MS_NOEXEC|unix.MS_RDONLY, ""); err != nil {
74 return fmt.Errorf("couldn't mount the installer ESP (%q -> %q): %w", espPath, "/installer", err)
75 }
76 return nil
77}
78
79// findInstallableBlockDevices returns names of all the block devices suitable
80// for hosting a Metropolis installation, limited by the size expressed in
81// bytes minSize. The install medium espDev will be excluded from the result.
82func findInstallableBlockDevices(espDev string, minSize uint64) ([]string, error) {
83 // Use the partition's name to find and return the name of its parent
84 // device. It will be excluded from the list of suitable target devices.
85 srcDev, err := sysfs.ParentBlockDevice(espDev)
86 // Build the exclusion list containing forbidden handle prefixes.
87 exclude := []string{"dm-", "zram", "ram", "loop", srcDev}
88
89 // Get the block device handles by looking up directory contents.
90 const blkDirPath = "/sys/class/block"
91 blkDevs, err := os.ReadDir(blkDirPath)
92 if err != nil {
93 return nil, fmt.Errorf("couldn't read %q: %w", blkDirPath, err)
94 }
95 // Iterate over the handles, skipping any block device that either points to
96 // a partition, matches the exclusion list, or is smaller than minSize.
97 var suitable []string
98probeLoop:
99 for _, devInfo := range blkDevs {
100 // Skip devices according to the exclusion list.
101 for _, prefix := range exclude {
102 if strings.HasPrefix(devInfo.Name(), prefix) {
103 continue probeLoop
104 }
105 }
106
107 // Skip partition symlinks.
108 if _, err := os.Stat(filepath.Join(blkDirPath, devInfo.Name(), "partition")); err == nil {
109 continue
110 } else if !os.IsNotExist(err) {
111 return nil, fmt.Errorf("while probing sysfs: %w", err)
112 }
113
114 // Skip devices of insufficient size.
115 devPath := filepath.Join("/dev", devInfo.Name())
116 dev, err := os.Open(devPath)
117 if err != nil {
118 return nil, fmt.Errorf("couldn't open a block device at %q: %w", devPath, err)
119 }
120 size, err := unix.IoctlGetInt(int(dev.Fd()), unix.BLKGETSIZE64)
121 dev.Close()
122 if err != nil {
123 return nil, fmt.Errorf("couldn't probe the size of %q: %w", devPath, err)
124 }
125 if uint64(size) < minSize {
126 continue
127 }
128
129 suitable = append(suitable, devInfo.Name())
130 }
131 return suitable, nil
132}
133
134// rereadPartitionTable causes the kernel to read the partition table present
135// in the block device at blkdevPath. It may return an error.
136func rereadPartitionTable(blkdevPath string) error {
137 dev, err := os.Open(blkdevPath)
138 if err != nil {
139 return fmt.Errorf("couldn't open the block device at %q: %w", blkdevPath, err)
140 }
141 defer dev.Close()
142 ret, err := unix.IoctlRetInt(int(dev.Fd()), unix.BLKRRPART)
143 if err != nil {
144 return fmt.Errorf("while doing an ioctl: %w", err)
145 }
146 if syscall.Errno(ret) == unix.EINVAL {
147 return fmt.Errorf("got an EINVAL from BLKRRPART ioctl")
148 }
149 return nil
150}
151
152// initializeSystemPartition writes image contents to the node's system
153// partition using the block device abstraction layer as opposed to slower
154// go-diskfs. tgtBlkdev must contain a path pointing to the block device
155// associated with the system partition. It may return an error.
156func initializeSystemPartition(image io.Reader, tgtBlkdev string) error {
157 // Check that tgtBlkdev points at an actual block device.
158 info, err := os.Stat(tgtBlkdev)
159 if err != nil {
160 return fmt.Errorf("couldn't stat the system partition at %q: %w", tgtBlkdev, err)
161 }
162 if info.Mode()&os.ModeDevice == 0 {
163 return fmt.Errorf("system partition path %q doesn't point to a block device", tgtBlkdev)
164 }
165
166 // Get the system partition's file descriptor.
167 sys, err := os.OpenFile(tgtBlkdev, os.O_WRONLY, 0600)
168 if err != nil {
169 return fmt.Errorf("couldn't open the system partition at %q: %w", tgtBlkdev, err)
170 }
171 defer sys.Close()
172 // Copy the system partition contents. Use a bigger buffer to optimize disk
173 // writes.
174 buf := make([]byte, mib)
175 if _, err := io.CopyBuffer(sys, image, buf); err != nil {
176 return fmt.Errorf("couldn't copy partition contents: %w", err)
177 }
178 return nil
179}
180
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100181// panicf is a replacement for log.panicf that doesn't print the error message
182// before calling panic.
183func panicf(format string, v ...interface{}) {
184 s := fmt.Sprintf(format, v...)
185 panic(s)
186}
187
Mateusz Zalega43e21072021-10-08 18:05:29 +0200188func main() {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100189 // Reboot on panic after a delay. The error string will have been printed
190 // before recover is called.
191 defer func() {
192 if r := recover(); r != nil {
193 fmt.Println(r)
194 fmt.Println("The installation could not be finalized. Please reboot to continue.")
195 syscall.Pause()
196 }
197 }()
198
Mateusz Zalega43e21072021-10-08 18:05:29 +0200199 // Mount sysfs, devtmpfs and efivarfs.
200 if err := mountPseudoFS(); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100201 panicf("While mounting pseudo-filesystems: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200202 }
203 // Read the installer ESP UUID from efivarfs.
204 espUuid, err := efivarfs.ReadLoaderDevicePartUUID()
205 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100206 panicf("While reading the installer ESP UUID: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200207 }
Lorenz Brun57d06a72022-01-13 14:12:27 +0100208 // Wait for up to 30 tries @ 1s (30s) for the ESP to show up
209 var espDev string
210 var retries = 30
211 for {
212 // Look up the installer partition based on espUuid.
213 espDev, err = sysfs.DeviceByPartUUID(espUuid)
214 if err == nil {
215 break
216 } else if errors.Is(err, sysfs.ErrDevNotFound) && retries > 0 {
217 time.Sleep(1 * time.Second)
218 retries--
219 } else {
220 panicf("While resolving the installer device handle: %v", err)
221 }
Mateusz Zalega43e21072021-10-08 18:05:29 +0200222 }
Lorenz Brun57d06a72022-01-13 14:12:27 +0100223 espPath := filepath.Join("/dev", espDev)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200224 // Mount the installer partition. The installer bundle will be read from it.
225 if err := mountInstallerESP(espPath); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100226 panicf("While mounting the installer ESP: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200227 }
228
Lorenz Brun6c35e972021-12-14 03:08:23 +0100229 nodeParameters, err := os.Open("/installer/metropolis-installer/nodeparams.pb")
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100230 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100231 panicf("Failed to open node parameters from ESP: %v", err)
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100232 }
233
234 // TODO(lorenz): Replace with proper bundles
Lorenz Brun6c35e972021-12-14 03:08:23 +0100235 bundle, err := zip.OpenReader("/installer/metropolis-installer/bundle.bin")
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100236 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100237 panicf("Failed to open node bundle from ESP: %v", err)
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100238 }
239 defer bundle.Close()
240 efiPayload, err := bundle.Open("kernel_efi.efi")
241 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100242 panicf("Cannot open EFI payload in bundle: %v", err)
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100243 }
244 defer efiPayload.Close()
Mateusz Zalega8c2c7712022-01-25 19:42:21 +0100245 systemImage, err := bundle.Open("verity_rootfs.img")
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100246 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100247 panicf("Cannot open system image in bundle: %v", err)
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100248 }
249 defer systemImage.Close()
250
Mateusz Zalega43e21072021-10-08 18:05:29 +0200251 // Build the osimage parameters.
252 installParams := osimage.Params{
253 PartitionSize: osimage.PartitionSizeInfo{
254 // ESP is the size of the node ESP partition, expressed in mebibytes.
255 ESP: 128,
256 // System is the size of the node system partition, expressed in
257 // mebibytes.
258 System: 4096,
259 // Data must be nonzero in order for the data partition to be created.
260 // osimage will extend the data partition to fill all the available space
261 // whenever it's writing to block devices, such as now.
262 Data: 128,
263 },
264 // Due to a bug in go-diskfs causing slow writes, SystemImage is explicitly
265 // marked unused here, as system partition contents will be written using
266 // a workaround below instead.
267 // TODO(mateusz@monogon.tech): Address that bug either by patching go-diskfs
268 // or rewriting osimage.
269 SystemImage: nil,
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100270
271 EFIPayload: efiPayload,
272 NodeParameters: nodeParameters,
Mateusz Zalega43e21072021-10-08 18:05:29 +0200273 }
274 // Calculate the minimum target size based on the installation parameters.
275 minSize := uint64((installParams.PartitionSize.ESP +
276 installParams.PartitionSize.System +
277 installParams.PartitionSize.Data + 1) * mib)
278
279 // Look for suitable block devices, given the minimum size.
280 blkDevs, err := findInstallableBlockDevices(espDev, minSize)
281 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100282 panicf(err.Error())
Mateusz Zalega43e21072021-10-08 18:05:29 +0200283 }
284 if len(blkDevs) == 0 {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100285 panicf("Couldn't find a suitable block device.")
Mateusz Zalega43e21072021-10-08 18:05:29 +0200286 }
287 // Set the first suitable block device found as the installation target.
288 tgtBlkdevName := blkDevs[0]
289 // Update the osimage parameters with a path pointing at the target device.
290 tgtBlkdevPath := filepath.Join("/dev", tgtBlkdevName)
291 installParams.OutputPath = tgtBlkdevPath
292
293 // Use osimage to partition the target block device and set up its ESP.
294 // Create will return an EFI boot entry on success.
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100295 fmt.Printf("Installing to %s\n", tgtBlkdevPath)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200296 be, err := osimage.Create(&installParams)
297 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100298 panicf("While installing: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200299 }
300 // The target device's partition table has just been updated. Re-read it to
301 // make the node system partition reachable through /dev.
302 if err := rereadPartitionTable(tgtBlkdevPath); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100303 panicf("While re-reading the partition table of %q: %v", tgtBlkdevPath, err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200304 }
305 // Look up the node's system partition path to be later used in the
306 // initialization step. It's always the second partition, right after
307 // the ESP.
308 sysBlkdevName, err := sysfs.PartitionBlockDevice(tgtBlkdevName, 2)
309 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100310 panicf("While looking up the system partition: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200311 }
312 sysBlkdevPath := filepath.Join("/dev", sysBlkdevName)
313 // Copy the system partition contents.
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100314 if err := initializeSystemPartition(systemImage, sysBlkdevPath); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100315 panicf("While initializing the system partition at %q: %v", sysBlkdevPath, err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200316 }
317
318 // Create an EFI boot entry for Metropolis.
319 en, err := efivarfs.CreateBootEntry(be)
320 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100321 panicf("While creating a boot entry: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200322 }
323 // Erase the preexisting boot order, leaving Metropolis as the only option.
324 if err := efivarfs.SetBootOrder(&efivarfs.BootOrder{uint16(en)}); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100325 panicf("While adjusting the boot order: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200326 }
327
328 // Reboot.
329 unix.Sync()
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100330 fmt.Println("Installation completed. Rebooting.")
Mateusz Zalega43e21072021-10-08 18:05:29 +0200331 unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
332}