blob: 2515f53483dc331d5c6586217e1155077fb124c4 [file] [log] [blame]
// Copyright 2020 The Monogon Project Authors.
//
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Installer creates a Metropolis image at a suitable block device based on the
// installer bundle present in the installation medium's ESP, after which it
// reboots. It's meant to be used as an init process.
package main
import (
"archive/zip"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"syscall"
"time"
"golang.org/x/sys/unix"
"source.monogon.dev/metropolis/node/build/mkimage/osimage"
"source.monogon.dev/metropolis/pkg/efivarfs"
"source.monogon.dev/metropolis/pkg/sysfs"
)
const mib = 1024 * 1024
// mountPseudoFS mounts efivarfs, devtmpfs and sysfs, used by the installer in
// the block device discovery stage.
func mountPseudoFS() error {
for _, m := range []struct {
dir string
fs string
flags uintptr
}{
{"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
{efivarfs.Path, "efivarfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
{"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
} {
if err := unix.Mkdir(m.dir, 0700); err != nil && !os.IsExist(err) {
return fmt.Errorf("couldn't create the mountpoint at %q: %w", m.dir, err)
}
if err := unix.Mount(m.fs, m.dir, m.fs, m.flags, ""); err != nil {
return fmt.Errorf("couldn't mount %q at %q: %w", m.fs, m.dir, err)
}
}
return nil
}
// mountInstallerESP mounts the filesystem the installer was loaded from based
// on espPath, which must point to the appropriate partition block device. The
// filesystem is mounted at /installer.
func mountInstallerESP(espPath string) error {
// Create the mountpoint.
if err := unix.Mkdir("/installer", 0700); err != nil {
return fmt.Errorf("couldn't create the installer mountpoint: %w", err)
}
// Mount the filesystem.
if err := unix.Mount(espPath, "/installer", "vfat", unix.MS_NOEXEC|unix.MS_RDONLY, ""); err != nil {
return fmt.Errorf("couldn't mount the installer ESP (%q -> %q): %w", espPath, "/installer", err)
}
return nil
}
// findInstallableBlockDevices returns names of all the block devices suitable
// for hosting a Metropolis installation, limited by the size expressed in
// bytes minSize. The install medium espDev will be excluded from the result.
func findInstallableBlockDevices(espDev string, minSize uint64) ([]string, error) {
// Use the partition's name to find and return the name of its parent
// device. It will be excluded from the list of suitable target devices.
srcDev, err := sysfs.ParentBlockDevice(espDev)
// Build the exclusion list containing forbidden handle prefixes.
exclude := []string{"dm-", "zram", "ram", "loop", srcDev}
// Get the block device handles by looking up directory contents.
const blkDirPath = "/sys/class/block"
blkDevs, err := os.ReadDir(blkDirPath)
if err != nil {
return nil, fmt.Errorf("couldn't read %q: %w", blkDirPath, err)
}
// Iterate over the handles, skipping any block device that either points to
// a partition, matches the exclusion list, or is smaller than minSize.
var suitable []string
probeLoop:
for _, devInfo := range blkDevs {
// Skip devices according to the exclusion list.
for _, prefix := range exclude {
if strings.HasPrefix(devInfo.Name(), prefix) {
continue probeLoop
}
}
// Skip partition symlinks.
if _, err := os.Stat(filepath.Join(blkDirPath, devInfo.Name(), "partition")); err == nil {
continue
} else if !os.IsNotExist(err) {
return nil, fmt.Errorf("while probing sysfs: %w", err)
}
// Skip devices of insufficient size.
devPath := filepath.Join("/dev", devInfo.Name())
dev, err := os.Open(devPath)
if err != nil {
return nil, fmt.Errorf("couldn't open a block device at %q: %w", devPath, err)
}
size, err := unix.IoctlGetInt(int(dev.Fd()), unix.BLKGETSIZE64)
dev.Close()
if err != nil {
return nil, fmt.Errorf("couldn't probe the size of %q: %w", devPath, err)
}
if uint64(size) < minSize {
continue
}
suitable = append(suitable, devInfo.Name())
}
return suitable, nil
}
// rereadPartitionTable causes the kernel to read the partition table present
// in the block device at blkdevPath. It may return an error.
func rereadPartitionTable(blkdevPath string) error {
dev, err := os.Open(blkdevPath)
if err != nil {
return fmt.Errorf("couldn't open the block device at %q: %w", blkdevPath, err)
}
defer dev.Close()
ret, err := unix.IoctlRetInt(int(dev.Fd()), unix.BLKRRPART)
if err != nil {
return fmt.Errorf("while doing an ioctl: %w", err)
}
if syscall.Errno(ret) == unix.EINVAL {
return fmt.Errorf("got an EINVAL from BLKRRPART ioctl")
}
return nil
}
// initializeSystemPartition writes image contents to the node's system
// partition using the block device abstraction layer as opposed to slower
// go-diskfs. tgtBlkdev must contain a path pointing to the block device
// associated with the system partition. It may return an error.
func initializeSystemPartition(image io.Reader, tgtBlkdev string) error {
// Check that tgtBlkdev points at an actual block device.
info, err := os.Stat(tgtBlkdev)
if err != nil {
return fmt.Errorf("couldn't stat the system partition at %q: %w", tgtBlkdev, err)
}
if info.Mode()&os.ModeDevice == 0 {
return fmt.Errorf("system partition path %q doesn't point to a block device", tgtBlkdev)
}
// Get the system partition's file descriptor.
sys, err := os.OpenFile(tgtBlkdev, os.O_WRONLY, 0600)
if err != nil {
return fmt.Errorf("couldn't open the system partition at %q: %w", tgtBlkdev, err)
}
defer sys.Close()
// Copy the system partition contents. Use a bigger buffer to optimize disk
// writes.
buf := make([]byte, mib)
if _, err := io.CopyBuffer(sys, image, buf); err != nil {
return fmt.Errorf("couldn't copy partition contents: %w", err)
}
return nil
}
func main() {
// Reboot on panic after a delay. The error string will have been printed
// before recover is called.
defer func() {
if r := recover(); r != nil {
logf("Fatal error: %v", r)
logf("The installation could not be finalized. Please reboot to continue.")
syscall.Pause()
}
}()
// Mount sysfs, devtmpfs and efivarfs.
if err := mountPseudoFS(); err != nil {
panicf("While mounting pseudo-filesystems: %v", err)
}
go logPiper()
logf("Metropolis Installer")
logf("Copyright (c) 2023 The Monogon Project Authors")
logf("")
// Read the installer ESP UUID from efivarfs.
espUuid, err := efivarfs.ReadLoaderDevicePartUUID()
if err != nil {
panicf("While reading the installer ESP UUID: %v", err)
}
// Wait for up to 30 tries @ 1s (30s) for the ESP to show up
var espDev string
var retries = 30
for {
// Look up the installer partition based on espUuid.
espDev, err = sysfs.DeviceByPartUUID(espUuid)
if err == nil {
break
} else if errors.Is(err, sysfs.ErrDevNotFound) && retries > 0 {
time.Sleep(1 * time.Second)
retries--
} else {
panicf("While resolving the installer device handle: %v", err)
}
}
espPath := filepath.Join("/dev", espDev)
// Mount the installer partition. The installer bundle will be read from it.
if err := mountInstallerESP(espPath); err != nil {
panicf("While mounting the installer ESP: %v", err)
}
nodeParameters, err := os.Open("/installer/metropolis-installer/nodeparams.pb")
if err != nil {
panicf("Failed to open node parameters from ESP: %v", err)
}
// TODO(lorenz): Replace with proper bundles
bundle, err := zip.OpenReader("/installer/metropolis-installer/bundle.bin")
if err != nil {
panicf("Failed to open node bundle from ESP: %v", err)
}
defer bundle.Close()
efiPayload, err := bundle.Open("kernel_efi.efi")
if err != nil {
panicf("Cannot open EFI payload in bundle: %v", err)
}
defer efiPayload.Close()
systemImage, err := bundle.Open("verity_rootfs.img")
if err != nil {
panicf("Cannot open system image in bundle: %v", err)
}
defer systemImage.Close()
// Build the osimage parameters.
installParams := osimage.Params{
PartitionSize: osimage.PartitionSizeInfo{
// ESP is the size of the node ESP partition, expressed in mebibytes.
ESP: 128,
// System is the size of the node system partition, expressed in
// mebibytes.
System: 4096,
// Data must be nonzero in order for the data partition to be created.
// osimage will extend the data partition to fill all the available space
// whenever it's writing to block devices, such as now.
Data: 128,
},
// Due to a bug in go-diskfs causing slow writes, SystemImage is explicitly
// marked unused here, as system partition contents will be written using
// a workaround below instead.
// TODO(mateusz@monogon.tech): Address that bug either by patching go-diskfs
// or rewriting osimage.
SystemImage: nil,
EFIPayload: efiPayload,
NodeParameters: nodeParameters,
}
// Calculate the minimum target size based on the installation parameters.
minSize := uint64((installParams.PartitionSize.ESP +
installParams.PartitionSize.System +
installParams.PartitionSize.Data + 1) * mib)
// Look for suitable block devices, given the minimum size.
blkDevs, err := findInstallableBlockDevices(espDev, minSize)
if err != nil {
panicf(err.Error())
}
if len(blkDevs) == 0 {
panicf("Couldn't find a suitable block device.")
}
// Set the first suitable block device found as the installation target.
tgtBlkdevName := blkDevs[0]
// Update the osimage parameters with a path pointing at the target device.
tgtBlkdevPath := filepath.Join("/dev", tgtBlkdevName)
installParams.OutputPath = tgtBlkdevPath
// Use osimage to partition the target block device and set up its ESP.
// Create will return an EFI boot entry on success.
logf("Installing to %s...", tgtBlkdevPath)
be, err := osimage.Create(&installParams)
if err != nil {
panicf("While installing: %v", err)
}
// The target device's partition table has just been updated. Re-read it to
// make the node system partition reachable through /dev.
if err := rereadPartitionTable(tgtBlkdevPath); err != nil {
panicf("While re-reading the partition table of %q: %v", tgtBlkdevPath, err)
}
// Look up the node's system partition path to be later used in the
// initialization step. It's always the second partition, right after
// the ESP.
sysBlkdevName, err := sysfs.PartitionBlockDevice(tgtBlkdevName, 2)
if err != nil {
panicf("While looking up the system partition: %v", err)
}
sysBlkdevPath := filepath.Join("/dev", sysBlkdevName)
// Copy the system partition contents.
if err := initializeSystemPartition(systemImage, sysBlkdevPath); err != nil {
panicf("While initializing the system partition at %q: %v", sysBlkdevPath, err)
}
// Create an EFI boot entry for Metropolis.
en, err := efivarfs.AddBootEntry(be)
if err != nil {
panicf("While creating a boot entry: %v", err)
}
// Erase the preexisting boot order, leaving Metropolis as the only option.
if err := efivarfs.SetBootOrder(&efivarfs.BootOrder{uint16(en)}); err != nil {
panicf("While adjusting the boot order: %v", err)
}
// Reboot.
unix.Sync()
logf("Installation completed. Rebooting.")
unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
}