blob: 075dbb5697fda47375e943f481bed2de10abaf78 [file] [log] [blame]
Mateusz Zalega43e21072021-10-08 18:05:29 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17// Installer creates a Metropolis image at a suitable block device based on the
18// installer bundle present in the installation medium's ESP, after which it
19// reboots. It's meant to be used as an init process.
20package main
21
22import (
Lorenz Brun0b93c8d2021-11-09 03:58:40 +010023 "archive/zip"
Mateusz Zalega43e21072021-10-08 18:05:29 +020024 "fmt"
25 "io"
26 "log"
27 "os"
28 "path/filepath"
29 "strings"
30 "syscall"
31
32 "golang.org/x/sys/unix"
33 "source.monogon.dev/metropolis/node/build/mkimage/osimage"
34 "source.monogon.dev/metropolis/pkg/efivarfs"
35 "source.monogon.dev/metropolis/pkg/sysfs"
36)
37
38const mib = 1024 * 1024
39
40// mountPseudoFS mounts efivarfs, devtmpfs and sysfs, used by the installer in
41// the block device discovery stage.
42func mountPseudoFS() error {
43 for _, m := range []struct {
44 dir string
45 fs string
46 flags uintptr
47 }{
48 {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
49 {efivarfs.Path, "efivarfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
50 {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
51 } {
52 if err := unix.Mkdir(m.dir, 0700); err != nil && !os.IsExist(err) {
53 return fmt.Errorf("couldn't create the mountpoint at %q: %w", m.dir, err)
54 }
55 if err := unix.Mount(m.fs, m.dir, m.fs, m.flags, ""); err != nil {
56 return fmt.Errorf("couldn't mount %q at %q: %w", m.fs, m.dir, err)
57 }
58 }
59 return nil
60}
61
62// mountInstallerESP mounts the filesystem the installer was loaded from based
63// on espPath, which must point to the appropriate partition block device. The
64// filesystem is mounted at /installer.
65func mountInstallerESP(espPath string) error {
66 // Create the mountpoint.
67 if err := unix.Mkdir("/installer", 0700); err != nil {
68 return fmt.Errorf("couldn't create the installer mountpoint: %w", err)
69 }
70 // Mount the filesystem.
71 if err := unix.Mount(espPath, "/installer", "vfat", unix.MS_NOEXEC|unix.MS_RDONLY, ""); err != nil {
72 return fmt.Errorf("couldn't mount the installer ESP (%q -> %q): %w", espPath, "/installer", err)
73 }
74 return nil
75}
76
77// findInstallableBlockDevices returns names of all the block devices suitable
78// for hosting a Metropolis installation, limited by the size expressed in
79// bytes minSize. The install medium espDev will be excluded from the result.
80func findInstallableBlockDevices(espDev string, minSize uint64) ([]string, error) {
81 // Use the partition's name to find and return the name of its parent
82 // device. It will be excluded from the list of suitable target devices.
83 srcDev, err := sysfs.ParentBlockDevice(espDev)
84 // Build the exclusion list containing forbidden handle prefixes.
85 exclude := []string{"dm-", "zram", "ram", "loop", srcDev}
86
87 // Get the block device handles by looking up directory contents.
88 const blkDirPath = "/sys/class/block"
89 blkDevs, err := os.ReadDir(blkDirPath)
90 if err != nil {
91 return nil, fmt.Errorf("couldn't read %q: %w", blkDirPath, err)
92 }
93 // Iterate over the handles, skipping any block device that either points to
94 // a partition, matches the exclusion list, or is smaller than minSize.
95 var suitable []string
96probeLoop:
97 for _, devInfo := range blkDevs {
98 // Skip devices according to the exclusion list.
99 for _, prefix := range exclude {
100 if strings.HasPrefix(devInfo.Name(), prefix) {
101 continue probeLoop
102 }
103 }
104
105 // Skip partition symlinks.
106 if _, err := os.Stat(filepath.Join(blkDirPath, devInfo.Name(), "partition")); err == nil {
107 continue
108 } else if !os.IsNotExist(err) {
109 return nil, fmt.Errorf("while probing sysfs: %w", err)
110 }
111
112 // Skip devices of insufficient size.
113 devPath := filepath.Join("/dev", devInfo.Name())
114 dev, err := os.Open(devPath)
115 if err != nil {
116 return nil, fmt.Errorf("couldn't open a block device at %q: %w", devPath, err)
117 }
118 size, err := unix.IoctlGetInt(int(dev.Fd()), unix.BLKGETSIZE64)
119 dev.Close()
120 if err != nil {
121 return nil, fmt.Errorf("couldn't probe the size of %q: %w", devPath, err)
122 }
123 if uint64(size) < minSize {
124 continue
125 }
126
127 suitable = append(suitable, devInfo.Name())
128 }
129 return suitable, nil
130}
131
132// rereadPartitionTable causes the kernel to read the partition table present
133// in the block device at blkdevPath. It may return an error.
134func rereadPartitionTable(blkdevPath string) error {
135 dev, err := os.Open(blkdevPath)
136 if err != nil {
137 return fmt.Errorf("couldn't open the block device at %q: %w", blkdevPath, err)
138 }
139 defer dev.Close()
140 ret, err := unix.IoctlRetInt(int(dev.Fd()), unix.BLKRRPART)
141 if err != nil {
142 return fmt.Errorf("while doing an ioctl: %w", err)
143 }
144 if syscall.Errno(ret) == unix.EINVAL {
145 return fmt.Errorf("got an EINVAL from BLKRRPART ioctl")
146 }
147 return nil
148}
149
150// initializeSystemPartition writes image contents to the node's system
151// partition using the block device abstraction layer as opposed to slower
152// go-diskfs. tgtBlkdev must contain a path pointing to the block device
153// associated with the system partition. It may return an error.
154func initializeSystemPartition(image io.Reader, tgtBlkdev string) error {
155 // Check that tgtBlkdev points at an actual block device.
156 info, err := os.Stat(tgtBlkdev)
157 if err != nil {
158 return fmt.Errorf("couldn't stat the system partition at %q: %w", tgtBlkdev, err)
159 }
160 if info.Mode()&os.ModeDevice == 0 {
161 return fmt.Errorf("system partition path %q doesn't point to a block device", tgtBlkdev)
162 }
163
164 // Get the system partition's file descriptor.
165 sys, err := os.OpenFile(tgtBlkdev, os.O_WRONLY, 0600)
166 if err != nil {
167 return fmt.Errorf("couldn't open the system partition at %q: %w", tgtBlkdev, err)
168 }
169 defer sys.Close()
170 // Copy the system partition contents. Use a bigger buffer to optimize disk
171 // writes.
172 buf := make([]byte, mib)
173 if _, err := io.CopyBuffer(sys, image, buf); err != nil {
174 return fmt.Errorf("couldn't copy partition contents: %w", err)
175 }
176 return nil
177}
178
179func main() {
180 // Mount sysfs, devtmpfs and efivarfs.
181 if err := mountPseudoFS(); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100182 log.Fatalf("while mounting pseudo-filesystems: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200183 }
184 // Read the installer ESP UUID from efivarfs.
185 espUuid, err := efivarfs.ReadLoaderDevicePartUUID()
186 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100187 log.Fatalf("while reading the installer ESP UUID: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200188 }
189 // Look up the installer partition based on espUuid.
190 espDev, err := sysfs.DeviceByPartUUID(espUuid)
191 espPath := filepath.Join("/dev", espDev)
192 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100193 log.Fatalf("while resolving the installer device handle: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200194 }
195 // Mount the installer partition. The installer bundle will be read from it.
196 if err := mountInstallerESP(espPath); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100197 log.Fatalf("while mounting the installer ESP: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200198 }
199
Lorenz Brun6c35e972021-12-14 03:08:23 +0100200 nodeParameters, err := os.Open("/installer/metropolis-installer/nodeparams.pb")
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100201 if err != nil {
202 log.Fatalf("failed to open node parameters from ESP: %v", err)
203 }
204
205 // TODO(lorenz): Replace with proper bundles
Lorenz Brun6c35e972021-12-14 03:08:23 +0100206 bundle, err := zip.OpenReader("/installer/metropolis-installer/bundle.bin")
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100207 if err != nil {
208 log.Fatalf("failed to open node bundle from ESP: %v", err)
209 }
210 defer bundle.Close()
211 efiPayload, err := bundle.Open("kernel_efi.efi")
212 if err != nil {
213 log.Fatalf("Cannot open EFI payload in bundle: %v", err)
214 }
215 defer efiPayload.Close()
216 systemImage, err := bundle.Open("rootfs.img")
217 if err != nil {
218 log.Fatalf("Cannot open system image in bundle: %v", err)
219 }
220 defer systemImage.Close()
221
Mateusz Zalega43e21072021-10-08 18:05:29 +0200222 // Build the osimage parameters.
223 installParams := osimage.Params{
224 PartitionSize: osimage.PartitionSizeInfo{
225 // ESP is the size of the node ESP partition, expressed in mebibytes.
226 ESP: 128,
227 // System is the size of the node system partition, expressed in
228 // mebibytes.
229 System: 4096,
230 // Data must be nonzero in order for the data partition to be created.
231 // osimage will extend the data partition to fill all the available space
232 // whenever it's writing to block devices, such as now.
233 Data: 128,
234 },
235 // Due to a bug in go-diskfs causing slow writes, SystemImage is explicitly
236 // marked unused here, as system partition contents will be written using
237 // a workaround below instead.
238 // TODO(mateusz@monogon.tech): Address that bug either by patching go-diskfs
239 // or rewriting osimage.
240 SystemImage: nil,
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100241
242 EFIPayload: efiPayload,
243 NodeParameters: nodeParameters,
Mateusz Zalega43e21072021-10-08 18:05:29 +0200244 }
245 // Calculate the minimum target size based on the installation parameters.
246 minSize := uint64((installParams.PartitionSize.ESP +
247 installParams.PartitionSize.System +
248 installParams.PartitionSize.Data + 1) * mib)
249
250 // Look for suitable block devices, given the minimum size.
251 blkDevs, err := findInstallableBlockDevices(espDev, minSize)
252 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100253 log.Fatal(err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200254 }
255 if len(blkDevs) == 0 {
256 log.Fatal("couldn't find a suitable block device.")
257 }
258 // Set the first suitable block device found as the installation target.
259 tgtBlkdevName := blkDevs[0]
260 // Update the osimage parameters with a path pointing at the target device.
261 tgtBlkdevPath := filepath.Join("/dev", tgtBlkdevName)
262 installParams.OutputPath = tgtBlkdevPath
263
264 // Use osimage to partition the target block device and set up its ESP.
265 // Create will return an EFI boot entry on success.
266 log.Printf("Installing to %s\n", tgtBlkdevPath)
267 be, err := osimage.Create(&installParams)
268 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100269 log.Fatalf("while installing: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200270 }
271 // The target device's partition table has just been updated. Re-read it to
272 // make the node system partition reachable through /dev.
273 if err := rereadPartitionTable(tgtBlkdevPath); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100274 log.Fatalf("while re-reading the partition table of %q: %v", tgtBlkdevPath, err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200275 }
276 // Look up the node's system partition path to be later used in the
277 // initialization step. It's always the second partition, right after
278 // the ESP.
279 sysBlkdevName, err := sysfs.PartitionBlockDevice(tgtBlkdevName, 2)
280 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100281 log.Fatalf("while looking up the system partition: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200282 }
283 sysBlkdevPath := filepath.Join("/dev", sysBlkdevName)
284 // Copy the system partition contents.
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100285 if err := initializeSystemPartition(systemImage, sysBlkdevPath); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100286 log.Fatalf("while initializing the system partition at %q: %v", sysBlkdevPath, err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200287 }
288
289 // Create an EFI boot entry for Metropolis.
290 en, err := efivarfs.CreateBootEntry(be)
291 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100292 log.Fatalf("while creating a boot entry: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200293 }
294 // Erase the preexisting boot order, leaving Metropolis as the only option.
295 if err := efivarfs.SetBootOrder(&efivarfs.BootOrder{uint16(en)}); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100296 log.Fatalf("while adjusting the boot order: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200297 }
298
299 // Reboot.
300 unix.Sync()
301 log.Print("Installation completed. Rebooting.")
302 unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
303}