| // Copyright 2020 The Monogon Project Authors. | 
 | // | 
 | // SPDX-License-Identifier: Apache-2.0 | 
 | // | 
 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
 | // you may not use this file except in compliance with the License. | 
 | // You may obtain a copy of the License at | 
 | // | 
 | //     http://www.apache.org/licenses/LICENSE-2.0 | 
 | // | 
 | // Unless required by applicable law or agreed to in writing, software | 
 | // distributed under the License is distributed on an "AS IS" BASIS, | 
 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | // See the License for the specific language governing permissions and | 
 | // limitations under the License. | 
 |  | 
 | // Installer creates a Metropolis image at a suitable block device based on the | 
 | // installer bundle present in the installation medium's ESP, after which it | 
 | // reboots. It's meant to be used as an init process. | 
 | package main | 
 |  | 
 | import ( | 
 | 	"archive/zip" | 
 | 	"errors" | 
 | 	"fmt" | 
 | 	"io" | 
 | 	"os" | 
 | 	"path/filepath" | 
 | 	"strings" | 
 | 	"syscall" | 
 | 	"time" | 
 |  | 
 | 	"golang.org/x/sys/unix" | 
 |  | 
 | 	"source.monogon.dev/metropolis/node/build/mkimage/osimage" | 
 | 	"source.monogon.dev/metropolis/pkg/efivarfs" | 
 | 	"source.monogon.dev/metropolis/pkg/sysfs" | 
 | ) | 
 |  | 
 | const mib = 1024 * 1024 | 
 |  | 
 | // mountPseudoFS mounts efivarfs, devtmpfs and sysfs, used by the installer in | 
 | // the block device discovery stage. | 
 | func mountPseudoFS() error { | 
 | 	for _, m := range []struct { | 
 | 		dir   string | 
 | 		fs    string | 
 | 		flags uintptr | 
 | 	}{ | 
 | 		{"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV}, | 
 | 		{efivarfs.Path, "efivarfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV}, | 
 | 		{"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID}, | 
 | 	} { | 
 | 		if err := unix.Mkdir(m.dir, 0700); err != nil && !os.IsExist(err) { | 
 | 			return fmt.Errorf("couldn't create the mountpoint at %q: %w", m.dir, err) | 
 | 		} | 
 | 		if err := unix.Mount(m.fs, m.dir, m.fs, m.flags, ""); err != nil { | 
 | 			return fmt.Errorf("couldn't mount %q at %q: %w", m.fs, m.dir, err) | 
 | 		} | 
 | 	} | 
 | 	return nil | 
 | } | 
 |  | 
 | // mountInstallerESP mounts the filesystem the installer was loaded from based | 
 | // on espPath, which must point to the appropriate partition block device. The | 
 | // filesystem is mounted at /installer. | 
 | func mountInstallerESP(espPath string) error { | 
 | 	// Create the mountpoint. | 
 | 	if err := unix.Mkdir("/installer", 0700); err != nil { | 
 | 		return fmt.Errorf("couldn't create the installer mountpoint: %w", err) | 
 | 	} | 
 | 	// Mount the filesystem. | 
 | 	if err := unix.Mount(espPath, "/installer", "vfat", unix.MS_NOEXEC|unix.MS_RDONLY, ""); err != nil { | 
 | 		return fmt.Errorf("couldn't mount the installer ESP (%q -> %q): %w", espPath, "/installer", err) | 
 | 	} | 
 | 	return nil | 
 | } | 
 |  | 
 | // findInstallableBlockDevices returns names of all the block devices suitable | 
 | // for hosting a Metropolis installation, limited by the size expressed in | 
 | // bytes minSize. The install medium espDev will be excluded from the result. | 
 | func findInstallableBlockDevices(espDev string, minSize uint64) ([]string, error) { | 
 | 	// Use the partition's name to find and return the name of its parent | 
 | 	// device. It will be excluded from the list of suitable target devices. | 
 | 	srcDev, err := sysfs.ParentBlockDevice(espDev) | 
 | 	// Build the exclusion list containing forbidden handle prefixes. | 
 | 	exclude := []string{"dm-", "zram", "ram", "loop", srcDev} | 
 |  | 
 | 	// Get the block device handles by looking up directory contents. | 
 | 	const blkDirPath = "/sys/class/block" | 
 | 	blkDevs, err := os.ReadDir(blkDirPath) | 
 | 	if err != nil { | 
 | 		return nil, fmt.Errorf("couldn't read %q: %w", blkDirPath, err) | 
 | 	} | 
 | 	// Iterate over the handles, skipping any block device that either points to | 
 | 	// a partition, matches the exclusion list, or is smaller than minSize. | 
 | 	var suitable []string | 
 | probeLoop: | 
 | 	for _, devInfo := range blkDevs { | 
 | 		// Skip devices according to the exclusion list. | 
 | 		for _, prefix := range exclude { | 
 | 			if strings.HasPrefix(devInfo.Name(), prefix) { | 
 | 				continue probeLoop | 
 | 			} | 
 | 		} | 
 |  | 
 | 		// Skip partition symlinks. | 
 | 		if _, err := os.Stat(filepath.Join(blkDirPath, devInfo.Name(), "partition")); err == nil { | 
 | 			continue | 
 | 		} else if !os.IsNotExist(err) { | 
 | 			return nil, fmt.Errorf("while probing sysfs: %w", err) | 
 | 		} | 
 |  | 
 | 		// Skip devices of insufficient size. | 
 | 		devPath := filepath.Join("/dev", devInfo.Name()) | 
 | 		dev, err := os.Open(devPath) | 
 | 		if err != nil { | 
 | 			return nil, fmt.Errorf("couldn't open a block device at %q: %w", devPath, err) | 
 | 		} | 
 | 		size, err := unix.IoctlGetInt(int(dev.Fd()), unix.BLKGETSIZE64) | 
 | 		dev.Close() | 
 | 		if err != nil { | 
 | 			return nil, fmt.Errorf("couldn't probe the size of %q: %w", devPath, err) | 
 | 		} | 
 | 		if uint64(size) < minSize { | 
 | 			continue | 
 | 		} | 
 |  | 
 | 		suitable = append(suitable, devInfo.Name()) | 
 | 	} | 
 | 	return suitable, nil | 
 | } | 
 |  | 
 | // rereadPartitionTable causes the kernel to read the partition table present | 
 | // in the block device at blkdevPath. It may return an error. | 
 | func rereadPartitionTable(blkdevPath string) error { | 
 | 	dev, err := os.Open(blkdevPath) | 
 | 	if err != nil { | 
 | 		return fmt.Errorf("couldn't open the block device at %q: %w", blkdevPath, err) | 
 | 	} | 
 | 	defer dev.Close() | 
 | 	ret, err := unix.IoctlRetInt(int(dev.Fd()), unix.BLKRRPART) | 
 | 	if err != nil { | 
 | 		return fmt.Errorf("while doing an ioctl: %w", err) | 
 | 	} | 
 | 	if syscall.Errno(ret) == unix.EINVAL { | 
 | 		return fmt.Errorf("got an EINVAL from BLKRRPART ioctl") | 
 | 	} | 
 | 	return nil | 
 | } | 
 |  | 
 | // initializeSystemPartition writes image contents to the node's system | 
 | // partition using the block device abstraction layer as opposed to slower | 
 | // go-diskfs. tgtBlkdev must contain a path pointing to the block device | 
 | // associated with the system partition. It may return an error. | 
 | func initializeSystemPartition(image io.Reader, tgtBlkdev string) error { | 
 | 	// Check that tgtBlkdev points at an actual block device. | 
 | 	info, err := os.Stat(tgtBlkdev) | 
 | 	if err != nil { | 
 | 		return fmt.Errorf("couldn't stat the system partition at %q: %w", tgtBlkdev, err) | 
 | 	} | 
 | 	if info.Mode()&os.ModeDevice == 0 { | 
 | 		return fmt.Errorf("system partition path %q doesn't point to a block device", tgtBlkdev) | 
 | 	} | 
 |  | 
 | 	// Get the system partition's file descriptor. | 
 | 	sys, err := os.OpenFile(tgtBlkdev, os.O_WRONLY, 0600) | 
 | 	if err != nil { | 
 | 		return fmt.Errorf("couldn't open the system partition at %q: %w", tgtBlkdev, err) | 
 | 	} | 
 | 	defer sys.Close() | 
 | 	// Copy the system partition contents. Use a bigger buffer to optimize disk | 
 | 	// writes. | 
 | 	buf := make([]byte, mib) | 
 | 	if _, err := io.CopyBuffer(sys, image, buf); err != nil { | 
 | 		return fmt.Errorf("couldn't copy partition contents: %w", err) | 
 | 	} | 
 | 	return nil | 
 | } | 
 |  | 
 | // panicf is a replacement for log.panicf that doesn't print the error message | 
 | // before calling panic. | 
 | func panicf(format string, v ...interface{}) { | 
 | 	s := fmt.Sprintf(format, v...) | 
 | 	panic(s) | 
 | } | 
 |  | 
 | func main() { | 
 | 	// Reboot on panic after a delay. The error string will have been printed | 
 | 	// before recover is called. | 
 | 	defer func() { | 
 | 		if r := recover(); r != nil { | 
 | 			fmt.Println(r) | 
 | 			fmt.Println("The installation could not be finalized. Please reboot to continue.") | 
 | 			syscall.Pause() | 
 | 		} | 
 | 	}() | 
 |  | 
 | 	// Mount sysfs, devtmpfs and efivarfs. | 
 | 	if err := mountPseudoFS(); err != nil { | 
 | 		panicf("While mounting pseudo-filesystems: %v", err) | 
 | 	} | 
 | 	// Read the installer ESP UUID from efivarfs. | 
 | 	espUuid, err := efivarfs.ReadLoaderDevicePartUUID() | 
 | 	if err != nil { | 
 | 		panicf("While reading the installer ESP UUID: %v", err) | 
 | 	} | 
 | 	// Wait for up to 30 tries @ 1s (30s) for the ESP to show up | 
 | 	var espDev string | 
 | 	var retries = 30 | 
 | 	for { | 
 | 		// Look up the installer partition based on espUuid. | 
 | 		espDev, err = sysfs.DeviceByPartUUID(espUuid) | 
 | 		if err == nil { | 
 | 			break | 
 | 		} else if errors.Is(err, sysfs.ErrDevNotFound) && retries > 0 { | 
 | 			time.Sleep(1 * time.Second) | 
 | 			retries-- | 
 | 		} else { | 
 | 			panicf("While resolving the installer device handle: %v", err) | 
 | 		} | 
 | 	} | 
 | 	espPath := filepath.Join("/dev", espDev) | 
 | 	// Mount the installer partition. The installer bundle will be read from it. | 
 | 	if err := mountInstallerESP(espPath); err != nil { | 
 | 		panicf("While mounting the installer ESP: %v", err) | 
 | 	} | 
 |  | 
 | 	nodeParameters, err := os.Open("/installer/metropolis-installer/nodeparams.pb") | 
 | 	if err != nil { | 
 | 		panicf("Failed to open node parameters from ESP: %v", err) | 
 | 	} | 
 |  | 
 | 	// TODO(lorenz): Replace with proper bundles | 
 | 	bundle, err := zip.OpenReader("/installer/metropolis-installer/bundle.bin") | 
 | 	if err != nil { | 
 | 		panicf("Failed to open node bundle from ESP: %v", err) | 
 | 	} | 
 | 	defer bundle.Close() | 
 | 	efiPayload, err := bundle.Open("kernel_efi.efi") | 
 | 	if err != nil { | 
 | 		panicf("Cannot open EFI payload in bundle: %v", err) | 
 | 	} | 
 | 	defer efiPayload.Close() | 
 | 	systemImage, err := bundle.Open("verity_rootfs.img") | 
 | 	if err != nil { | 
 | 		panicf("Cannot open system image in bundle: %v", err) | 
 | 	} | 
 | 	defer systemImage.Close() | 
 |  | 
 | 	// Build the osimage parameters. | 
 | 	installParams := osimage.Params{ | 
 | 		PartitionSize: osimage.PartitionSizeInfo{ | 
 | 			// ESP is the size of the node ESP partition, expressed in mebibytes. | 
 | 			ESP: 128, | 
 | 			// System is the size of the node system partition, expressed in | 
 | 			// mebibytes. | 
 | 			System: 4096, | 
 | 			// Data must be nonzero in order for the data partition to be created. | 
 | 			// osimage will extend the data partition to fill all the available space | 
 | 			// whenever it's writing to block devices, such as now. | 
 | 			Data: 128, | 
 | 		}, | 
 | 		// Due to a bug in go-diskfs causing slow writes, SystemImage is explicitly | 
 | 		// marked unused here, as system partition contents will be written using | 
 | 		// a workaround below instead. | 
 | 		// TODO(mateusz@monogon.tech): Address that bug either by patching go-diskfs | 
 | 		// or rewriting osimage. | 
 | 		SystemImage: nil, | 
 |  | 
 | 		EFIPayload:     efiPayload, | 
 | 		NodeParameters: nodeParameters, | 
 | 	} | 
 | 	// Calculate the minimum target size based on the installation parameters. | 
 | 	minSize := uint64((installParams.PartitionSize.ESP + | 
 | 		installParams.PartitionSize.System + | 
 | 		installParams.PartitionSize.Data + 1) * mib) | 
 |  | 
 | 	// Look for suitable block devices, given the minimum size. | 
 | 	blkDevs, err := findInstallableBlockDevices(espDev, minSize) | 
 | 	if err != nil { | 
 | 		panicf(err.Error()) | 
 | 	} | 
 | 	if len(blkDevs) == 0 { | 
 | 		panicf("Couldn't find a suitable block device.") | 
 | 	} | 
 | 	// Set the first suitable block device found as the installation target. | 
 | 	tgtBlkdevName := blkDevs[0] | 
 | 	// Update the osimage parameters with a path pointing at the target device. | 
 | 	tgtBlkdevPath := filepath.Join("/dev", tgtBlkdevName) | 
 | 	installParams.OutputPath = tgtBlkdevPath | 
 |  | 
 | 	// Use osimage to partition the target block device and set up its ESP. | 
 | 	// Create will return an EFI boot entry on success. | 
 | 	fmt.Printf("Installing to %s\n", tgtBlkdevPath) | 
 | 	be, err := osimage.Create(&installParams) | 
 | 	if err != nil { | 
 | 		panicf("While installing: %v", err) | 
 | 	} | 
 | 	// The target device's partition table has just been updated. Re-read it to | 
 | 	// make the node system partition reachable through /dev. | 
 | 	if err := rereadPartitionTable(tgtBlkdevPath); err != nil { | 
 | 		panicf("While re-reading the partition table of %q: %v", tgtBlkdevPath, err) | 
 | 	} | 
 | 	// Look up the node's system partition path to be later used in the | 
 | 	// initialization step. It's always the second partition, right after | 
 | 	// the ESP. | 
 | 	sysBlkdevName, err := sysfs.PartitionBlockDevice(tgtBlkdevName, 2) | 
 | 	if err != nil { | 
 | 		panicf("While looking up the system partition: %v", err) | 
 | 	} | 
 | 	sysBlkdevPath := filepath.Join("/dev", sysBlkdevName) | 
 | 	// Copy the system partition contents. | 
 | 	if err := initializeSystemPartition(systemImage, sysBlkdevPath); err != nil { | 
 | 		panicf("While initializing the system partition at %q: %v", sysBlkdevPath, err) | 
 | 	} | 
 |  | 
 | 	// Create an EFI boot entry for Metropolis. | 
 | 	en, err := efivarfs.CreateBootEntry(be) | 
 | 	if err != nil { | 
 | 		panicf("While creating a boot entry: %v", err) | 
 | 	} | 
 | 	// Erase the preexisting boot order, leaving Metropolis as the only option. | 
 | 	if err := efivarfs.SetBootOrder(&efivarfs.BootOrder{uint16(en)}); err != nil { | 
 | 		panicf("While adjusting the boot order: %v", err) | 
 | 	} | 
 |  | 
 | 	// Reboot. | 
 | 	unix.Sync() | 
 | 	fmt.Println("Installation completed. Rebooting.") | 
 | 	unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART) | 
 | } |