init: remount to tmpfs
runsc needs to be able to pivot_root. According to @lorenz this does not
work from initramfs. This introduces a temporary fix to re-mount and
re-exec into a new root based on tmpfs.
A proper fix would be to use a real filesystem instead of initramfs
(like squashfs), but this will do for now.
We also use this opportunity to use devtmpfs instead of manually
managing /dev. This collides with the storage manager that tries to
create all storage nodes - we just remove that.
Test Plan: shouldn't change behaviour
X-Origin-Diff: phab/D433
GitOrigin-RevId: aa59fec6551bab1b1b9c2fe037dce410e550981b
diff --git a/core/cmd/init/BUILD.bazel b/core/cmd/init/BUILD.bazel
index e8e55dc..0765538 100644
--- a/core/cmd/init/BUILD.bazel
+++ b/core/cmd/init/BUILD.bazel
@@ -2,7 +2,10 @@
go_library(
name = "go_default_library",
- srcs = ["main.go"],
+ srcs = [
+ "main.go",
+ "switchroot.go",
+ ],
importpath = "git.monogon.dev/source/nexantic.git/core/cmd/init",
visibility = ["//visibility:private"],
deps = [
diff --git a/core/cmd/init/main.go b/core/cmd/init/main.go
index 82ba033..f4ff871 100644
--- a/core/cmd/init/main.go
+++ b/core/cmd/init/main.go
@@ -55,23 +55,15 @@
if err != nil {
panic(err)
}
+
+ // Remount onto a tmpfs and re-exec if needed. Otherwise, keep running.
+ err = switchRoot(logger)
+ if err != nil {
+ panic(fmt.Errorf("could not remount root: %w", err))
+ }
+
logger.Info("Starting Smalltown Init")
- // Set up bare minimum mounts
- if err := os.Mkdir("/sys", 0755); err != nil {
- panic(err)
- }
- if err := unix.Mount("sysfs", "/sys", "sysfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, ""); err != nil {
- panic(err)
- }
-
- if err := os.Mkdir("/proc", 0755); err != nil {
- panic(err)
- }
- if err := unix.Mount("procfs", "/proc", "proc", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, ""); err != nil {
- panic(err)
- }
-
signalChannel := make(chan os.Signal, 2)
signal.Notify(signalChannel)
@@ -81,7 +73,7 @@
storageManager, err := storage.Initialize(logger.With(zap.String("component", "storage")))
if err != nil {
- panic(err)
+ panic(fmt.Errorf("could not initialize storage: %w", err))
}
networkSvc, err := network.NewNetworkService(network.Config{}, logger.With(zap.String("component", "network")))
diff --git a/core/cmd/init/switchroot.go b/core/cmd/init/switchroot.go
new file mode 100644
index 0000000..0e68b06
--- /dev/null
+++ b/core/cmd/init/switchroot.go
@@ -0,0 +1,174 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "syscall"
+
+ "go.uber.org/zap"
+ "golang.org/x/sys/unix"
+)
+
+// switchRoot moves the root from initramfs into a tmpfs
+// This is necessary because you cannot pivot_root from a initramfs (and runsc wants to do that).
+// In the future, we should instead use something like squashfs instead of an initramfs and just nuke this.
+func switchRoot(log *zap.Logger) error {
+ // We detect the need to remount to tmpfs over env vars.
+ // The first run of /init (from initramfs) will not have this var, and will be re-exec'd from a new tmpfs root with
+ // that variable set.
+ witness := "SIGNOS_REMOUNTED"
+
+ // If the witness env var is found in the environment, it means we are ready to go.
+ environ := os.Environ()
+ for _, env := range environ {
+ if strings.HasPrefix(env, witness+"=") {
+ log.Info("Smalltown running in tmpfs root")
+ return nil
+ }
+ }
+
+ // Otherwise, we need to remount to a tmpfs.
+ environ = append(environ, witness+"=yes")
+ log.Info("Smalltown running in initramfs, remounting to tmpfs...")
+
+ // Make note of all directories we have to make and files that we have to copy.
+ paths := []string{}
+ dirs := []string{}
+ err := filepath.Walk("/", func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+ if path == "/" {
+ return nil
+ }
+ // /dev is prepopulated by the initramfs, skip that. The target root uses devtmpfs.
+ if path == "/dev" || strings.HasPrefix(path, "/dev/") {
+ return nil
+ }
+
+ if info.IsDir() {
+ dirs = append(dirs, path)
+ } else {
+ paths = append(paths, path)
+ }
+
+ return nil
+ })
+ if err != nil {
+ return fmt.Errorf("could not list root files: %w", err)
+ }
+
+ log.Info("Copying to tmpfs", zap.Strings("paths", paths), zap.Strings("dirs", dirs))
+
+ // Make new root at /mnt
+ if err := os.Mkdir("/mnt", 0755); err != nil {
+ return fmt.Errorf("could not make /mnt: %w", err)
+ }
+ // And mount a tmpfs on it
+ if err := unix.Mount("tmpfs", "/mnt", "tmpfs", 0, ""); err != nil {
+ return fmt.Errorf("could not mount tmpfs on /mnt: %w", err)
+ }
+
+ // Make all directories. Since filepath.Walk is lexicographically ordered, we don't need to ensure that the parent
+ // exists.
+ for _, src := range dirs {
+ stat, err := os.Stat(src)
+ if err != nil {
+ return fmt.Errorf("Stat(%q): %w", src, err)
+ }
+ dst := "/mnt" + src
+ err = os.Mkdir(dst, stat.Mode())
+ if err != nil {
+ return fmt.Errorf("Mkdir(%q): %w", dst, err)
+ }
+ }
+
+ // Move all files over. Parent directories will exist by now.
+ for _, src := range paths {
+ stat, err := os.Stat(src)
+ if err != nil {
+ return fmt.Errorf("Stat(%q): %w", src, err)
+ }
+ dst := "/mnt" + src
+
+ // Copy file.
+ sfd, err := os.Open(src)
+ if err != nil {
+ return fmt.Errorf("Open(%q): %w", src, err)
+ }
+ dfd, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE, stat.Mode())
+ if err != nil {
+ sfd.Close()
+ return fmt.Errorf("OpenFile(%q): %w", dst, err)
+ }
+ _, err = io.Copy(dfd, sfd)
+
+ sfd.Close()
+ dfd.Close()
+ if err != nil {
+ return fmt.Errorf("Copying %q failed: %w", src, err)
+ }
+
+ // Remove the old file.
+ err = unix.Unlink(src)
+ if err != nil {
+ return fmt.Errorf("Unlink(%q): %w", src, err)
+ }
+ }
+
+ // Set up target filesystems.
+ for _, el := range []struct {
+ dir string
+ fs string
+ flags uintptr
+ }{
+ {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
+ {"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
+ } {
+ if err := os.Mkdir("/mnt"+el.dir, 0755); err != nil {
+ return fmt.Errorf("could not make /mnt%s: %w", el.dir, err)
+ }
+ if err := unix.Mount(el.fs, "/mnt"+el.dir, el.fs, el.flags, ""); err != nil {
+ return fmt.Errorf("could not mount %s on /mnt%s: %w", el.fs, el.dir, err)
+ }
+ }
+
+ // Chroot to new root.
+ // This is adapted from util-linux's switch_root.
+ err = os.Chdir("/mnt")
+ if err != nil {
+ return fmt.Errorf("could not chdir to /mnt: %w", err)
+ }
+ err = syscall.Mount("/mnt", "/", "", syscall.MS_MOVE, "")
+ if err != nil {
+ return fmt.Errorf("could not remount /mnt to /: %w", err)
+ }
+ err = syscall.Chroot(".")
+ if err != nil {
+ return fmt.Errorf("could not chroot to new root: %w", err)
+ }
+
+ // Re-exec into new init with new environment
+ return unix.Exec("/init", os.Args, environ)
+}
diff --git a/core/internal/storage/find.go b/core/internal/storage/find.go
index 1abf6c0..8d83510 100644
--- a/core/internal/storage/find.go
+++ b/core/internal/storage/find.go
@@ -56,14 +56,7 @@
if err != nil {
return fmt.Errorf("failed to convert uevent: %w", err)
}
- minorDev, err := strconv.Atoi(ueventData["MINOR"])
- if err != nil {
- return fmt.Errorf("failed to convert uevent: %w", err)
- }
devNodeName := fmt.Sprintf("/dev/%v", ueventData["DEVNAME"])
- if err := unix.Mknod(devNodeName, 0600|unix.S_IFBLK, int(unix.Mkdev(uint32(majorDev), uint32(minorDev)))); err != nil {
- return fmt.Errorf("failed to create block device node: %w", err)
- }
blkdev, err := os.Open(devNodeName)
if err != nil {
return fmt.Errorf("failed to open block device %v: %w", devNodeName, err)