blob: 79a78fea2561a3a3cfab9e283b35b7b108f300d3 [file] [log] [blame]
Mateusz Zalega43e21072021-10-08 18:05:29 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17// Installer creates a Metropolis image at a suitable block device based on the
18// installer bundle present in the installation medium's ESP, after which it
19// reboots. It's meant to be used as an init process.
20package main
21
22import (
Lorenz Brun0b93c8d2021-11-09 03:58:40 +010023 "archive/zip"
Mateusz Zalega43e21072021-10-08 18:05:29 +020024 "fmt"
25 "io"
26 "log"
27 "os"
28 "path/filepath"
29 "strings"
30 "syscall"
31
32 "golang.org/x/sys/unix"
Serge Bazanski97783222021-12-14 16:04:26 +010033
Mateusz Zalega43e21072021-10-08 18:05:29 +020034 "source.monogon.dev/metropolis/node/build/mkimage/osimage"
35 "source.monogon.dev/metropolis/pkg/efivarfs"
36 "source.monogon.dev/metropolis/pkg/sysfs"
37)
38
39const mib = 1024 * 1024
40
41// mountPseudoFS mounts efivarfs, devtmpfs and sysfs, used by the installer in
42// the block device discovery stage.
43func mountPseudoFS() error {
44 for _, m := range []struct {
45 dir string
46 fs string
47 flags uintptr
48 }{
49 {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
50 {efivarfs.Path, "efivarfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
51 {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
52 } {
53 if err := unix.Mkdir(m.dir, 0700); err != nil && !os.IsExist(err) {
54 return fmt.Errorf("couldn't create the mountpoint at %q: %w", m.dir, err)
55 }
56 if err := unix.Mount(m.fs, m.dir, m.fs, m.flags, ""); err != nil {
57 return fmt.Errorf("couldn't mount %q at %q: %w", m.fs, m.dir, err)
58 }
59 }
60 return nil
61}
62
63// mountInstallerESP mounts the filesystem the installer was loaded from based
64// on espPath, which must point to the appropriate partition block device. The
65// filesystem is mounted at /installer.
66func mountInstallerESP(espPath string) error {
67 // Create the mountpoint.
68 if err := unix.Mkdir("/installer", 0700); err != nil {
69 return fmt.Errorf("couldn't create the installer mountpoint: %w", err)
70 }
71 // Mount the filesystem.
72 if err := unix.Mount(espPath, "/installer", "vfat", unix.MS_NOEXEC|unix.MS_RDONLY, ""); err != nil {
73 return fmt.Errorf("couldn't mount the installer ESP (%q -> %q): %w", espPath, "/installer", err)
74 }
75 return nil
76}
77
78// findInstallableBlockDevices returns names of all the block devices suitable
79// for hosting a Metropolis installation, limited by the size expressed in
80// bytes minSize. The install medium espDev will be excluded from the result.
81func findInstallableBlockDevices(espDev string, minSize uint64) ([]string, error) {
82 // Use the partition's name to find and return the name of its parent
83 // device. It will be excluded from the list of suitable target devices.
84 srcDev, err := sysfs.ParentBlockDevice(espDev)
85 // Build the exclusion list containing forbidden handle prefixes.
86 exclude := []string{"dm-", "zram", "ram", "loop", srcDev}
87
88 // Get the block device handles by looking up directory contents.
89 const blkDirPath = "/sys/class/block"
90 blkDevs, err := os.ReadDir(blkDirPath)
91 if err != nil {
92 return nil, fmt.Errorf("couldn't read %q: %w", blkDirPath, err)
93 }
94 // Iterate over the handles, skipping any block device that either points to
95 // a partition, matches the exclusion list, or is smaller than minSize.
96 var suitable []string
97probeLoop:
98 for _, devInfo := range blkDevs {
99 // Skip devices according to the exclusion list.
100 for _, prefix := range exclude {
101 if strings.HasPrefix(devInfo.Name(), prefix) {
102 continue probeLoop
103 }
104 }
105
106 // Skip partition symlinks.
107 if _, err := os.Stat(filepath.Join(blkDirPath, devInfo.Name(), "partition")); err == nil {
108 continue
109 } else if !os.IsNotExist(err) {
110 return nil, fmt.Errorf("while probing sysfs: %w", err)
111 }
112
113 // Skip devices of insufficient size.
114 devPath := filepath.Join("/dev", devInfo.Name())
115 dev, err := os.Open(devPath)
116 if err != nil {
117 return nil, fmt.Errorf("couldn't open a block device at %q: %w", devPath, err)
118 }
119 size, err := unix.IoctlGetInt(int(dev.Fd()), unix.BLKGETSIZE64)
120 dev.Close()
121 if err != nil {
122 return nil, fmt.Errorf("couldn't probe the size of %q: %w", devPath, err)
123 }
124 if uint64(size) < minSize {
125 continue
126 }
127
128 suitable = append(suitable, devInfo.Name())
129 }
130 return suitable, nil
131}
132
133// rereadPartitionTable causes the kernel to read the partition table present
134// in the block device at blkdevPath. It may return an error.
135func rereadPartitionTable(blkdevPath string) error {
136 dev, err := os.Open(blkdevPath)
137 if err != nil {
138 return fmt.Errorf("couldn't open the block device at %q: %w", blkdevPath, err)
139 }
140 defer dev.Close()
141 ret, err := unix.IoctlRetInt(int(dev.Fd()), unix.BLKRRPART)
142 if err != nil {
143 return fmt.Errorf("while doing an ioctl: %w", err)
144 }
145 if syscall.Errno(ret) == unix.EINVAL {
146 return fmt.Errorf("got an EINVAL from BLKRRPART ioctl")
147 }
148 return nil
149}
150
151// initializeSystemPartition writes image contents to the node's system
152// partition using the block device abstraction layer as opposed to slower
153// go-diskfs. tgtBlkdev must contain a path pointing to the block device
154// associated with the system partition. It may return an error.
155func initializeSystemPartition(image io.Reader, tgtBlkdev string) error {
156 // Check that tgtBlkdev points at an actual block device.
157 info, err := os.Stat(tgtBlkdev)
158 if err != nil {
159 return fmt.Errorf("couldn't stat the system partition at %q: %w", tgtBlkdev, err)
160 }
161 if info.Mode()&os.ModeDevice == 0 {
162 return fmt.Errorf("system partition path %q doesn't point to a block device", tgtBlkdev)
163 }
164
165 // Get the system partition's file descriptor.
166 sys, err := os.OpenFile(tgtBlkdev, os.O_WRONLY, 0600)
167 if err != nil {
168 return fmt.Errorf("couldn't open the system partition at %q: %w", tgtBlkdev, err)
169 }
170 defer sys.Close()
171 // Copy the system partition contents. Use a bigger buffer to optimize disk
172 // writes.
173 buf := make([]byte, mib)
174 if _, err := io.CopyBuffer(sys, image, buf); err != nil {
175 return fmt.Errorf("couldn't copy partition contents: %w", err)
176 }
177 return nil
178}
179
180func main() {
181 // Mount sysfs, devtmpfs and efivarfs.
182 if err := mountPseudoFS(); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100183 log.Fatalf("while mounting pseudo-filesystems: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200184 }
185 // Read the installer ESP UUID from efivarfs.
186 espUuid, err := efivarfs.ReadLoaderDevicePartUUID()
187 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100188 log.Fatalf("while reading the installer ESP UUID: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200189 }
190 // Look up the installer partition based on espUuid.
191 espDev, err := sysfs.DeviceByPartUUID(espUuid)
192 espPath := filepath.Join("/dev", espDev)
193 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100194 log.Fatalf("while resolving the installer device handle: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200195 }
196 // Mount the installer partition. The installer bundle will be read from it.
197 if err := mountInstallerESP(espPath); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100198 log.Fatalf("while mounting the installer ESP: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200199 }
200
Lorenz Brun6c35e972021-12-14 03:08:23 +0100201 nodeParameters, err := os.Open("/installer/metropolis-installer/nodeparams.pb")
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100202 if err != nil {
203 log.Fatalf("failed to open node parameters from ESP: %v", err)
204 }
205
206 // TODO(lorenz): Replace with proper bundles
Lorenz Brun6c35e972021-12-14 03:08:23 +0100207 bundle, err := zip.OpenReader("/installer/metropolis-installer/bundle.bin")
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100208 if err != nil {
209 log.Fatalf("failed to open node bundle from ESP: %v", err)
210 }
211 defer bundle.Close()
212 efiPayload, err := bundle.Open("kernel_efi.efi")
213 if err != nil {
214 log.Fatalf("Cannot open EFI payload in bundle: %v", err)
215 }
216 defer efiPayload.Close()
217 systemImage, err := bundle.Open("rootfs.img")
218 if err != nil {
219 log.Fatalf("Cannot open system image in bundle: %v", err)
220 }
221 defer systemImage.Close()
222
Mateusz Zalega43e21072021-10-08 18:05:29 +0200223 // Build the osimage parameters.
224 installParams := osimage.Params{
225 PartitionSize: osimage.PartitionSizeInfo{
226 // ESP is the size of the node ESP partition, expressed in mebibytes.
227 ESP: 128,
228 // System is the size of the node system partition, expressed in
229 // mebibytes.
230 System: 4096,
231 // Data must be nonzero in order for the data partition to be created.
232 // osimage will extend the data partition to fill all the available space
233 // whenever it's writing to block devices, such as now.
234 Data: 128,
235 },
236 // Due to a bug in go-diskfs causing slow writes, SystemImage is explicitly
237 // marked unused here, as system partition contents will be written using
238 // a workaround below instead.
239 // TODO(mateusz@monogon.tech): Address that bug either by patching go-diskfs
240 // or rewriting osimage.
241 SystemImage: nil,
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100242
243 EFIPayload: efiPayload,
244 NodeParameters: nodeParameters,
Mateusz Zalega43e21072021-10-08 18:05:29 +0200245 }
246 // Calculate the minimum target size based on the installation parameters.
247 minSize := uint64((installParams.PartitionSize.ESP +
248 installParams.PartitionSize.System +
249 installParams.PartitionSize.Data + 1) * mib)
250
251 // Look for suitable block devices, given the minimum size.
252 blkDevs, err := findInstallableBlockDevices(espDev, minSize)
253 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100254 log.Fatal(err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200255 }
256 if len(blkDevs) == 0 {
257 log.Fatal("couldn't find a suitable block device.")
258 }
259 // Set the first suitable block device found as the installation target.
260 tgtBlkdevName := blkDevs[0]
261 // Update the osimage parameters with a path pointing at the target device.
262 tgtBlkdevPath := filepath.Join("/dev", tgtBlkdevName)
263 installParams.OutputPath = tgtBlkdevPath
264
265 // Use osimage to partition the target block device and set up its ESP.
266 // Create will return an EFI boot entry on success.
267 log.Printf("Installing to %s\n", tgtBlkdevPath)
268 be, err := osimage.Create(&installParams)
269 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100270 log.Fatalf("while installing: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200271 }
272 // The target device's partition table has just been updated. Re-read it to
273 // make the node system partition reachable through /dev.
274 if err := rereadPartitionTable(tgtBlkdevPath); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100275 log.Fatalf("while re-reading the partition table of %q: %v", tgtBlkdevPath, err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200276 }
277 // Look up the node's system partition path to be later used in the
278 // initialization step. It's always the second partition, right after
279 // the ESP.
280 sysBlkdevName, err := sysfs.PartitionBlockDevice(tgtBlkdevName, 2)
281 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100282 log.Fatalf("while looking up the system partition: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200283 }
284 sysBlkdevPath := filepath.Join("/dev", sysBlkdevName)
285 // Copy the system partition contents.
Lorenz Brun0b93c8d2021-11-09 03:58:40 +0100286 if err := initializeSystemPartition(systemImage, sysBlkdevPath); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100287 log.Fatalf("while initializing the system partition at %q: %v", sysBlkdevPath, err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200288 }
289
290 // Create an EFI boot entry for Metropolis.
291 en, err := efivarfs.CreateBootEntry(be)
292 if err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100293 log.Fatalf("while creating a boot entry: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200294 }
295 // Erase the preexisting boot order, leaving Metropolis as the only option.
296 if err := efivarfs.SetBootOrder(&efivarfs.BootOrder{uint16(en)}); err != nil {
Mateusz Zalega8f72b5d2021-12-03 17:08:59 +0100297 log.Fatalf("while adjusting the boot order: %v", err)
Mateusz Zalega43e21072021-10-08 18:05:29 +0200298 }
299
300 // Reboot.
301 unix.Sync()
302 log.Print("Installation completed. Rebooting.")
303 unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
304}