blob: e3a4896b1c9c915461d3bf42e62a29974d2c4222 [file] [log] [blame]
Mateusz Zalega43e21072021-10-08 18:05:29 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17// Installer creates a Metropolis image at a suitable block device based on the
18// installer bundle present in the installation medium's ESP, after which it
19// reboots. It's meant to be used as an init process.
20package main
21
22import (
Lorenz Brun0b93c8d2021-11-09 03:58:40 +010023 "archive/zip"
Mateusz Zalega43e21072021-10-08 18:05:29 +020024 "fmt"
25 "io"
Mateusz Zalega43e21072021-10-08 18:05:29 +020026 "os"
27 "path/filepath"
28 "strings"
29 "syscall"
30
31 "golang.org/x/sys/unix"
Serge Bazanski97783222021-12-14 16:04:26 +010032
Mateusz Zalega43e21072021-10-08 18:05:29 +020033 "source.monogon.dev/metropolis/node/build/mkimage/osimage"
34 "source.monogon.dev/metropolis/pkg/efivarfs"
35 "source.monogon.dev/metropolis/pkg/sysfs"
36)
37
38const mib = 1024 * 1024
39
40// mountPseudoFS mounts efivarfs, devtmpfs and sysfs, used by the installer in
41// the block device discovery stage.
42func mountPseudoFS() error {
43 for _, m := range []struct {
44 dir string
45 fs string
46 flags uintptr
47 }{
48 {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
49 {efivarfs.Path, "efivarfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
50 {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
51 } {
52 if err := unix.Mkdir(m.dir, 0700); err != nil && !os.IsExist(err) {
53 return fmt.Errorf("couldn't create the mountpoint at %q: %w", m.dir, err)
54 }
55 if err := unix.Mount(m.fs, m.dir, m.fs, m.flags, ""); err != nil {
56 return fmt.Errorf("couldn't mount %q at %q: %w", m.fs, m.dir, err)
57 }
58 }
59 return nil
60}
61
62// mountInstallerESP mounts the filesystem the installer was loaded from based
63// on espPath, which must point to the appropriate partition block device. The
64// filesystem is mounted at /installer.
65func mountInstallerESP(espPath string) error {
66 // Create the mountpoint.
67 if err := unix.Mkdir("/installer", 0700); err != nil {
68 return fmt.Errorf("couldn't create the installer mountpoint: %w", err)
69 }
70 // Mount the filesystem.
71 if err := unix.Mount(espPath, "/installer", "vfat", unix.MS_NOEXEC|unix.MS_RDONLY, ""); err != nil {
72 return fmt.Errorf("couldn't mount the installer ESP (%q -> %q): %w", espPath, "/installer", err)
73 }
74 return nil
75}
76
77// findInstallableBlockDevices returns names of all the block devices suitable
78// for hosting a Metropolis installation, limited by the size expressed in
79// bytes minSize. The install medium espDev will be excluded from the result.
80func findInstallableBlockDevices(espDev string, minSize uint64) ([]string, error) {
81 // Use the partition's name to find and return the name of its parent
82 // device. It will be excluded from the list of suitable target devices.
83 srcDev, err := sysfs.ParentBlockDevice(espDev)
84 // Build the exclusion list containing forbidden handle prefixes.
85 exclude := []string{"dm-", "zram", "ram", "loop", srcDev}
86
87 // Get the block device handles by looking up directory contents.
88 const blkDirPath = "/sys/class/block"
89 blkDevs, err := os.ReadDir(blkDirPath)
90 if err != nil {
91 return nil, fmt.Errorf("couldn't read %q: %w", blkDirPath, err)
92 }
93 // Iterate over the handles, skipping any block device that either points to
94 // a partition, matches the exclusion list, or is smaller than minSize.
95 var suitable []string
96probeLoop:
97 for _, devInfo := range blkDevs {
98 // Skip devices according to the exclusion list.
99 for _, prefix := range exclude {
100 if strings.HasPrefix(devInfo.Name(), prefix) {
101 continue probeLoop
102 }
103 }
104
105 // Skip partition symlinks.
106 if _, err := os.Stat(filepath.Join(blkDirPath, devInfo.Name(), "partition")); err == nil {
107 continue
108 } else if !os.IsNotExist(err) {
109 return nil, fmt.Errorf("while probing sysfs: %w", err)
110 }
111
112 // Skip devices of insufficient size.
113 devPath := filepath.Join("/dev", devInfo.Name())
114 dev, err := os.Open(devPath)
115 if err != nil {
116 return nil, fmt.Errorf("couldn't open a block device at %q: %w", devPath, err)
117 }
118 size, err := unix.IoctlGetInt(int(dev.Fd()), unix.BLKGETSIZE64)
119 dev.Close()
120 if err != nil {
121 return nil, fmt.Errorf("couldn't probe the size of %q: %w", devPath, err)
122 }
123 if uint64(size) < minSize {
124 continue
125 }
126
127 suitable = append(suitable, devInfo.Name())
128 }
129 return suitable, nil
130}
131
132// rereadPartitionTable causes the kernel to read the partition table present
133// in the block device at blkdevPath. It may return an error.
134func rereadPartitionTable(blkdevPath string) error {
135 dev, err := os.Open(blkdevPath)
136 if err != nil {
137 return fmt.Errorf("couldn't open the block device at %q: %w", blkdevPath, err)
138 }
139 defer dev.Close()
140 ret, err := unix.IoctlRetInt(int(dev.Fd()), unix.BLKRRPART)
141 if err != nil {
142 return fmt.Errorf("while doing an ioctl: %w", err)
143 }
144 if syscall.Errno(ret) == unix.EINVAL {
145 return fmt.Errorf("got an EINVAL from BLKRRPART ioctl")
146 }
147 return nil
148}
149
150// initializeSystemPartition writes image contents to the node's system
151// partition using the block device abstraction layer as opposed to slower
152// go-diskfs. tgtBlkdev must contain a path pointing to the block device
153// associated with the system partition. It may return an error.
154func initializeSystemPartition(image io.Reader, tgtBlkdev string) error {
155 // Check that tgtBlkdev points at an actual block device.
156 info, err := os.Stat(tgtBlkdev)
157 if err != nil {
158 return fmt.Errorf("couldn't stat the system partition at %q: %w", tgtBlkdev, err)
159 }
160 if info.Mode()&os.ModeDevice == 0 {
161 return fmt.Errorf("system partition path %q doesn't point to a block device", tgtBlkdev)
162 }
163
164 // Get the system partition's file descriptor.
165 sys, err := os.OpenFile(tgtBlkdev, os.O_WRONLY, 0600)
166 if err != nil {
167 return fmt.Errorf("couldn't open the system partition at %q: %w", tgtBlkdev, err)
168 }
169 defer sys.Close()
170 // Copy the system partition contents. Use a bigger buffer to optimize disk
171 // writes.
172 buf := make([]byte, mib)
173 if _, err := io.CopyBuffer(sys, image, buf); err != nil {
174 return fmt.Errorf("couldn't copy partition contents: %w", err)
175 }
176 return nil
177}
178
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100179// panicf is a replacement for log.panicf that doesn't print the error message
180// before calling panic.
181func panicf(format string, v ...interface{}) {
182 s := fmt.Sprintf(format, v...)
183 panic(s)
184}
185
Mateusz Zalega43e21072021-10-08 18:05:29 +0200186func main() {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100187 // Reboot on panic after a delay. The error string will have been printed
188 // before recover is called.
189 defer func() {
190 if r := recover(); r != nil {
191 fmt.Println(r)
192 fmt.Println("The installation could not be finalized. Please reboot to continue.")
193 syscall.Pause()
194 }
195 }()
196
Mateusz Zalega43e21072021-10-08 18:05:29 +0200197 // Mount sysfs, devtmpfs and efivarfs.
198 if err := mountPseudoFS(); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100199 panicf("While mounting pseudo-filesystems: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200200 }
201 // Read the installer ESP UUID from efivarfs.
202 espUuid, err := efivarfs.ReadLoaderDevicePartUUID()
203 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100204 panicf("While reading the installer ESP UUID: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200205 }
206 // Look up the installer partition based on espUuid.
207 espDev, err := sysfs.DeviceByPartUUID(espUuid)
208 espPath := filepath.Join("/dev", espDev)
209 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100210 panicf("While resolving the installer device handle: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200211 }
212 // Mount the installer partition. The installer bundle will be read from it.
213 if err := mountInstallerESP(espPath); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100214 panicf("While mounting the installer ESP: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200215 }
216
Lorenz Brun6c35e972021-12-14 03:08:23 +0100217 nodeParameters, err := os.Open("/installer/metropolis-installer/nodeparams.pb")
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100218 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100219 panicf("Failed to open node parameters from ESP: %v", err)
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100220 }
221
222 // TODO(lorenz): Replace with proper bundles
Lorenz Brun6c35e972021-12-14 03:08:23 +0100223 bundle, err := zip.OpenReader("/installer/metropolis-installer/bundle.bin")
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100224 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100225 panicf("Failed to open node bundle from ESP: %v", err)
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100226 }
227 defer bundle.Close()
228 efiPayload, err := bundle.Open("kernel_efi.efi")
229 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100230 panicf("Cannot open EFI payload in bundle: %v", err)
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100231 }
232 defer efiPayload.Close()
233 systemImage, err := bundle.Open("rootfs.img")
234 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100235 panicf("Cannot open system image in bundle: %v", err)
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100236 }
237 defer systemImage.Close()
238
Mateusz Zalega43e21072021-10-08 18:05:29 +0200239 // Build the osimage parameters.
240 installParams := osimage.Params{
241 PartitionSize: osimage.PartitionSizeInfo{
242 // ESP is the size of the node ESP partition, expressed in mebibytes.
243 ESP: 128,
244 // System is the size of the node system partition, expressed in
245 // mebibytes.
246 System: 4096,
247 // Data must be nonzero in order for the data partition to be created.
248 // osimage will extend the data partition to fill all the available space
249 // whenever it's writing to block devices, such as now.
250 Data: 128,
251 },
252 // Due to a bug in go-diskfs causing slow writes, SystemImage is explicitly
253 // marked unused here, as system partition contents will be written using
254 // a workaround below instead.
255 // TODO(mateusz@monogon.tech): Address that bug either by patching go-diskfs
256 // or rewriting osimage.
257 SystemImage: nil,
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100258
259 EFIPayload: efiPayload,
260 NodeParameters: nodeParameters,
Mateusz Zalega43e21072021-10-08 18:05:29 +0200261 }
262 // Calculate the minimum target size based on the installation parameters.
263 minSize := uint64((installParams.PartitionSize.ESP +
264 installParams.PartitionSize.System +
265 installParams.PartitionSize.Data + 1) * mib)
266
267 // Look for suitable block devices, given the minimum size.
268 blkDevs, err := findInstallableBlockDevices(espDev, minSize)
269 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100270 panicf(err.Error())
Mateusz Zalega43e21072021-10-08 18:05:29 +0200271 }
272 if len(blkDevs) == 0 {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100273 panicf("Couldn't find a suitable block device.")
Mateusz Zalega43e21072021-10-08 18:05:29 +0200274 }
275 // Set the first suitable block device found as the installation target.
276 tgtBlkdevName := blkDevs[0]
277 // Update the osimage parameters with a path pointing at the target device.
278 tgtBlkdevPath := filepath.Join("/dev", tgtBlkdevName)
279 installParams.OutputPath = tgtBlkdevPath
280
281 // Use osimage to partition the target block device and set up its ESP.
282 // Create will return an EFI boot entry on success.
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100283 fmt.Printf("Installing to %s\n", tgtBlkdevPath)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200284 be, err := osimage.Create(&installParams)
285 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100286 panicf("While installing: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200287 }
288 // The target device's partition table has just been updated. Re-read it to
289 // make the node system partition reachable through /dev.
290 if err := rereadPartitionTable(tgtBlkdevPath); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100291 panicf("While re-reading the partition table of %q: %v", tgtBlkdevPath, err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200292 }
293 // Look up the node's system partition path to be later used in the
294 // initialization step. It's always the second partition, right after
295 // the ESP.
296 sysBlkdevName, err := sysfs.PartitionBlockDevice(tgtBlkdevName, 2)
297 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100298 panicf("While looking up the system partition: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200299 }
300 sysBlkdevPath := filepath.Join("/dev", sysBlkdevName)
301 // Copy the system partition contents.
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100302 if err := initializeSystemPartition(systemImage, sysBlkdevPath); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100303 panicf("While initializing the system partition at %q: %v", sysBlkdevPath, err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200304 }
305
306 // Create an EFI boot entry for Metropolis.
307 en, err := efivarfs.CreateBootEntry(be)
308 if err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100309 panicf("While creating a boot entry: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200310 }
311 // Erase the preexisting boot order, leaving Metropolis as the only option.
312 if err := efivarfs.SetBootOrder(&efivarfs.BootOrder{uint16(en)}); err != nil {
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100313 panicf("While adjusting the boot order: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200314 }
315
316 // Reboot.
317 unix.Sync()
Mateusz Zalegacdcc7392021-12-08 15:34:53 +0100318 fmt.Println("Installation completed. Rebooting.")
Mateusz Zalega43e21072021-10-08 18:05:29 +0200319 unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
320}