core -> metropolis

Smalltown is now called Metropolis!

This is the first commit in a series of cleanup commits that prepare us
for an open source release. This one just some Bazel packages around to
follow a stricter directory layout.

All of Metropolis now lives in `//metropolis`.

All of Metropolis Node code now lives in `//metropolis/node`.

All of the main /init now lives in `//m/n/core`.

All of the Kubernetes functionality/glue now lives in `//m/n/kubernetes`.

Next steps:
     - hunt down all references to Smalltown and replace them appropriately
     - narrow down visibility rules
     - document new code organization
     - move `//build/toolchain` to `//monogon/build/toolchain`
     - do another cleanup pass between `//golibs` and
       `//monogon/node/{core,common}`.
     - remove `//delta` and `//anubis`

Fixes T799.

Test Plan: Just a very large refactor. CI should help us out here.

Bug: T799

X-Origin-Diff: phab/D667
GitOrigin-RevId: 6029b8d4edc42325d50042596b639e8b122d0ded
diff --git a/metropolis/node/core/switchroot.go b/metropolis/node/core/switchroot.go
new file mode 100644
index 0000000..5865225
--- /dev/null
+++ b/metropolis/node/core/switchroot.go
@@ -0,0 +1,213 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strings"
+	"syscall"
+
+	"golang.org/x/sys/unix"
+
+	"git.monogon.dev/source/nexantic.git/metropolis/node/core/logtree"
+)
+
+// switchRoot moves the root from initramfs into a tmpfs
+// This is necessary because you cannot pivot_root from a initramfs (and runsc wants to do that).
+// In the future, we should instead use something like squashfs instead of an initramfs and just nuke this.
+func switchRoot(log logtree.LeveledLogger) error {
+	// We detect the need to remount to tmpfs over env vars.
+	// The first run of /init (from initramfs) will not have this var, and will be re-exec'd from a new tmpfs root with
+	// that variable set.
+	witness := "SIGNOS_REMOUNTED"
+
+	// If the witness env var is found in the environment, it means we are ready to go.
+	environ := os.Environ()
+	for _, env := range environ {
+		if strings.HasPrefix(env, witness+"=") {
+			log.Info("Smalltown running in tmpfs root")
+			return nil
+		}
+	}
+
+	// Otherwise, we need to remount to a tmpfs.
+	environ = append(environ, witness+"=yes")
+	log.Info("Smalltown running in initramfs, remounting to tmpfs...")
+
+	// Make note of all directories we have to make and files that we have to copy.
+	paths := []string{}
+	dirs := []string{}
+	err := filepath.Walk("/", func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if path == "/" {
+			return nil
+		}
+		// /dev is prepopulated by the initramfs, skip that. The target root uses devtmpfs.
+		if path == "/dev" || strings.HasPrefix(path, "/dev/") {
+			return nil
+		}
+
+		if info.IsDir() {
+			dirs = append(dirs, path)
+		} else {
+			paths = append(paths, path)
+		}
+
+		return nil
+	})
+	if err != nil {
+		return fmt.Errorf("could not list root files: %w", err)
+	}
+
+	log.Info("Copying paths to tmpfs:")
+	for _, p := range paths {
+		log.Infof(" - %s", p)
+	}
+
+	// Make new root at /mnt
+	if err := os.Mkdir("/mnt", 0755); err != nil {
+		return fmt.Errorf("could not make /mnt: %w", err)
+	}
+	// And mount a tmpfs on it
+	if err := unix.Mount("tmpfs", "/mnt", "tmpfs", 0, ""); err != nil {
+		return fmt.Errorf("could not mount tmpfs on /mnt: %w", err)
+	}
+
+	// Make all directories. Since filepath.Walk is lexicographically ordered, we don't need to ensure that the parent
+	// exists.
+	for _, src := range dirs {
+		stat, err := os.Stat(src)
+		if err != nil {
+			return fmt.Errorf("Stat(%q): %w", src, err)
+		}
+		dst := "/mnt" + src
+		err = os.Mkdir(dst, stat.Mode())
+		if err != nil {
+			return fmt.Errorf("Mkdir(%q): %w", dst, err)
+		}
+	}
+
+	// Move all files over. Parent directories will exist by now.
+	for _, src := range paths {
+		stat, err := os.Stat(src)
+		if err != nil {
+			return fmt.Errorf("Stat(%q): %w", src, err)
+		}
+		dst := "/mnt" + src
+
+		// Copy file.
+		sfd, err := os.Open(src)
+		if err != nil {
+			return fmt.Errorf("Open(%q): %w", src, err)
+		}
+		dfd, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE, stat.Mode())
+		if err != nil {
+			sfd.Close()
+			return fmt.Errorf("OpenFile(%q): %w", dst, err)
+		}
+		_, err = io.Copy(dfd, sfd)
+
+		sfd.Close()
+		dfd.Close()
+		if err != nil {
+			return fmt.Errorf("Copying %q failed: %w", src, err)
+		}
+
+		// Remove the old file.
+		err = unix.Unlink(src)
+		if err != nil {
+			return fmt.Errorf("Unlink(%q): %w", src, err)
+		}
+	}
+
+	// Set up target filesystems.
+	for _, el := range []struct {
+		dir   string
+		fs    string
+		flags uintptr
+	}{
+		{"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+		{"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+		{"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
+		{"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
+	} {
+		if err := os.Mkdir("/mnt"+el.dir, 0755); err != nil {
+			return fmt.Errorf("could not make /mnt%s: %w", el.dir, err)
+		}
+		if err := unix.Mount(el.fs, "/mnt"+el.dir, el.fs, el.flags, ""); err != nil {
+			return fmt.Errorf("could not mount %s on /mnt%s: %w", el.fs, el.dir, err)
+		}
+	}
+
+	// Mount all available CGroups for v1 (v2 uses a single unified hierarchy and is not supported by our runtimes yet)
+	if unix.Mount("tmpfs", "/mnt/sys/fs/cgroup", "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, ""); err != nil {
+		panic(err)
+	}
+	cgroupsRaw, err := ioutil.ReadFile("/mnt/proc/cgroups")
+	if err != nil {
+		panic(err)
+	}
+
+	cgroupLines := strings.Split(string(cgroupsRaw), "\n")
+	for _, cgroupLine := range cgroupLines {
+		if cgroupLine == "" || strings.HasPrefix(cgroupLine, "#") {
+			continue
+		}
+		cgroupParts := strings.Split(cgroupLine, "\t")
+		cgroupName := cgroupParts[0]
+		if err := os.Mkdir("/mnt/sys/fs/cgroup/"+cgroupName, 0755); err != nil {
+			panic(err)
+		}
+		if err := unix.Mount("cgroup", "/mnt/sys/fs/cgroup/"+cgroupName, "cgroup", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, cgroupName); err != nil {
+			panic(err)
+		}
+	}
+
+	// Enable hierarchical memory accounting
+	useMemoryHierarchy, err := os.OpenFile("/mnt/sys/fs/cgroup/memory/memory.use_hierarchy", os.O_RDWR, 0)
+	if err != nil {
+		panic(err)
+	}
+	if _, err := useMemoryHierarchy.WriteString("1"); err != nil {
+		panic(err)
+	}
+	useMemoryHierarchy.Close()
+
+	// Chroot to new root.
+	// This is adapted from util-linux's switch_root.
+	err = os.Chdir("/mnt")
+	if err != nil {
+		return fmt.Errorf("could not chdir to /mnt: %w", err)
+	}
+	err = syscall.Mount("/mnt", "/", "", syscall.MS_MOVE, "")
+	if err != nil {
+		return fmt.Errorf("could not remount /mnt to /: %w", err)
+	}
+	err = syscall.Chroot(".")
+	if err != nil {
+		return fmt.Errorf("could not chroot to new root: %w", err)
+	}
+
+	// Re-exec into new init with new environment
+	return unix.Exec("/init", os.Args, environ)
+}