core -> metropolis

Smalltown is now called Metropolis!

This is the first commit in a series of cleanup commits that prepare us
for an open source release. This one just some Bazel packages around to
follow a stricter directory layout.

All of Metropolis now lives in `//metropolis`.

All of Metropolis Node code now lives in `//metropolis/node`.

All of the main /init now lives in `//m/n/core`.

All of the Kubernetes functionality/glue now lives in `//m/n/kubernetes`.

Next steps:
     - hunt down all references to Smalltown and replace them appropriately
     - narrow down visibility rules
     - document new code organization
     - move `//build/toolchain` to `//monogon/build/toolchain`
     - do another cleanup pass between `//golibs` and
       `//monogon/node/{core,common}`.
     - remove `//delta` and `//anubis`

Fixes T799.

Test Plan: Just a very large refactor. CI should help us out here.

Bug: T799

X-Origin-Diff: phab/D667
GitOrigin-RevId: 6029b8d4edc42325d50042596b639e8b122d0ded
diff --git a/metropolis/node/common/devicemapper/BUILD.bazel b/metropolis/node/common/devicemapper/BUILD.bazel
new file mode 100644
index 0000000..12ca0b3
--- /dev/null
+++ b/metropolis/node/common/devicemapper/BUILD.bazel
@@ -0,0 +1,13 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["devicemapper.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/common/devicemapper",
+    visibility = ["//visibility:public"],
+    deps = [
+        "@com_github_pkg_errors//:go_default_library",
+        "@com_github_yalue_native_endian//:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
diff --git a/metropolis/node/common/devicemapper/devicemapper.go b/metropolis/node/common/devicemapper/devicemapper.go
new file mode 100644
index 0000000..2687e3a
--- /dev/null
+++ b/metropolis/node/common/devicemapper/devicemapper.go
@@ -0,0 +1,298 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// package devicemapper is a thin wrapper for the devicemapper ioctl API.
+// See: https://github.com/torvalds/linux/blob/master/include/uapi/linux/dm-ioctl.h
+package devicemapper
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"os"
+	"runtime"
+	"unsafe"
+
+	"github.com/pkg/errors"
+	"github.com/yalue/native_endian"
+	"golang.org/x/sys/unix"
+)
+
+type DMIoctl struct {
+	Version     Version
+	DataSize    uint32
+	DataStart   uint32
+	TargetCount uint32
+	OpenCount   int32
+	Flags       uint32
+	EventNumber uint32
+	_padding1   uint32
+	Dev         uint64
+	Name        [128]byte
+	UUID        [129]byte
+	_padding2   [7]byte
+	Data        [16384]byte
+}
+
+type DMTargetSpec struct {
+	SectorStart uint64
+	Length      uint64
+	Status      int32
+	Next        uint32
+	TargetType  [16]byte
+}
+
+type DMTargetDeps struct {
+	Count   uint32
+	Padding uint32
+	Dev     []uint64
+}
+
+type DMNameList struct {
+	Dev  uint64
+	Next uint32
+	Name []byte
+}
+
+type DMTargetVersions struct {
+	Next    uint32
+	Version [3]uint32
+}
+
+type DMTargetMessage struct {
+	Sector  uint64
+	Message []byte
+}
+
+type Version [3]uint32
+
+const (
+	/* Top level cmds */
+	DM_VERSION_CMD uintptr = (0xc138fd << 8) + iota
+	DM_REMOVE_ALL_CMD
+	DM_LIST_DEVICES_CMD
+
+	/* device level cmds */
+	DM_DEV_CREATE_CMD
+	DM_DEV_REMOVE_CMD
+	DM_DEV_RENAME_CMD
+	DM_DEV_SUSPEND_CMD
+	DM_DEV_STATUS_CMD
+	DM_DEV_WAIT_CMD
+
+	/* Table level cmds */
+	DM_TABLE_LOAD_CMD
+	DM_TABLE_CLEAR_CMD
+	DM_TABLE_DEPS_CMD
+	DM_TABLE_STATUS_CMD
+
+	/* Added later */
+	DM_LIST_VERSIONS_CMD
+	DM_TARGET_MSG_CMD
+	DM_DEV_SET_GEOMETRY_CMD
+	DM_DEV_ARM_POLL_CMD
+)
+
+const (
+	DM_READONLY_FLAG       = 1 << 0 /* In/Out */
+	DM_SUSPEND_FLAG        = 1 << 1 /* In/Out */
+	DM_PERSISTENT_DEV_FLAG = 1 << 3 /* In */
+)
+
+const baseDataSize = uint32(unsafe.Sizeof(DMIoctl{})) - 16384
+
+func newReq() DMIoctl {
+	return DMIoctl{
+		Version:   Version{4, 0, 0},
+		DataSize:  baseDataSize,
+		DataStart: baseDataSize,
+	}
+}
+
+// stringToDelimitedBuf copies src to dst and returns an error if len(src) > len(dst),
+// or when the string contains a null byte.
+func stringToDelimitedBuf(dst []byte, src string) error {
+	if len(src) > len(dst)-1 {
+		return fmt.Errorf("string longer than target buffer (%v > %v)", len(src), len(dst)-1)
+	}
+	for i := 0; i < len(src); i++ {
+		if src[i] == 0x00 {
+			return errors.New("string contains null byte, this is unsupported by DM")
+		}
+		dst[i] = src[i]
+	}
+	return nil
+}
+
+var fd uintptr
+
+func getFd() (uintptr, error) {
+	if fd == 0 {
+		f, err := os.Open("/dev/mapper/control")
+		if os.IsNotExist(err) {
+			_ = os.MkdirAll("/dev/mapper", 0755)
+			if err := unix.Mknod("/dev/mapper/control", unix.S_IFCHR|0600, int(unix.Mkdev(10, 236))); err != nil {
+				return 0, err
+			}
+			f, err = os.Open("/dev/mapper/control")
+			if err != nil {
+				return 0, err
+			}
+		} else if err != nil {
+			return 0, err
+		}
+		fd = f.Fd()
+		return f.Fd(), nil
+	}
+	return fd, nil
+}
+
+func GetVersion() (Version, error) {
+	req := newReq()
+	fd, err := getFd()
+	if err != nil {
+		return Version{}, err
+	}
+	if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, DM_VERSION_CMD, uintptr(unsafe.Pointer(&req))); err != 0 {
+		return Version{}, err
+	}
+	return req.Version, nil
+}
+
+func CreateDevice(name string) (uint64, error) {
+	req := newReq()
+	if err := stringToDelimitedBuf(req.Name[:], name); err != nil {
+		return 0, err
+	}
+	fd, err := getFd()
+	if err != nil {
+		return 0, err
+	}
+	if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, DM_DEV_CREATE_CMD, uintptr(unsafe.Pointer(&req))); err != 0 {
+		return 0, err
+	}
+	return req.Dev, nil
+}
+
+func RemoveDevice(name string) error {
+	req := newReq()
+	if err := stringToDelimitedBuf(req.Name[:], name); err != nil {
+		return err
+	}
+	fd, err := getFd()
+	if err != nil {
+		return err
+	}
+	if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, DM_DEV_REMOVE_CMD, uintptr(unsafe.Pointer(&req))); err != 0 {
+		return err
+	}
+	runtime.KeepAlive(req)
+	return nil
+}
+
+type Target struct {
+	StartSector uint64
+	Length      uint64
+	Type        string
+	Parameters  string
+}
+
+func LoadTable(name string, targets []Target) error {
+	req := newReq()
+	if err := stringToDelimitedBuf(req.Name[:], name); err != nil {
+		return err
+	}
+	var data bytes.Buffer
+	for _, target := range targets {
+		// Gives the size of the spec and the null-terminated params aligned to 8 bytes
+		padding := len(target.Parameters) % 8
+		targetSize := uint32(int(unsafe.Sizeof(DMTargetSpec{})) + (len(target.Parameters) + 1) + padding)
+
+		targetSpec := DMTargetSpec{
+			SectorStart: target.StartSector,
+			Length:      target.Length,
+			Next:        targetSize,
+		}
+		if err := stringToDelimitedBuf(targetSpec.TargetType[:], target.Type); err != nil {
+			return err
+		}
+		if err := binary.Write(&data, native_endian.NativeEndian(), &targetSpec); err != nil {
+			panic(err)
+		}
+		data.WriteString(target.Parameters)
+		data.WriteByte(0x00)
+		for i := 0; i < padding; i++ {
+			data.WriteByte(0x00)
+		}
+	}
+	req.TargetCount = uint32(len(targets))
+	if data.Len() >= 16384 {
+		return errors.New("table too large for allocated memory")
+	}
+	req.DataSize = baseDataSize + uint32(data.Len())
+	copy(req.Data[:], data.Bytes())
+	fd, err := getFd()
+	if err != nil {
+		return err
+	}
+	if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, DM_TABLE_LOAD_CMD, uintptr(unsafe.Pointer(&req))); err != 0 {
+		return err
+	}
+	runtime.KeepAlive(req)
+	return nil
+}
+
+func suspendResume(name string, suspend bool) error {
+	req := newReq()
+	if err := stringToDelimitedBuf(req.Name[:], name); err != nil {
+		return err
+	}
+	if suspend {
+		req.Flags = DM_SUSPEND_FLAG
+	}
+	fd, err := getFd()
+	if err != nil {
+		return err
+	}
+	if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, DM_DEV_SUSPEND_CMD, uintptr(unsafe.Pointer(&req))); err != 0 {
+		return err
+	}
+	runtime.KeepAlive(req)
+	return nil
+}
+
+func Suspend(name string) error {
+	return suspendResume(name, true)
+}
+func Resume(name string) error {
+	return suspendResume(name, false)
+}
+
+func CreateActiveDevice(name string, targets []Target) (uint64, error) {
+	dev, err := CreateDevice(name)
+	if err != nil {
+		return 0, fmt.Errorf("DM_DEV_CREATE failed: %w", err)
+	}
+	if err := LoadTable(name, targets); err != nil {
+		_ = RemoveDevice(name)
+		return 0, fmt.Errorf("DM_TABLE_LOAD failed: %w", err)
+	}
+	if err := Resume(name); err != nil {
+		_ = RemoveDevice(name)
+		return 0, fmt.Errorf("DM_DEV_SUSPEND failed: %w", err)
+	}
+	return dev, nil
+}
diff --git a/metropolis/node/common/fileargs/BUILD.bazel b/metropolis/node/common/fileargs/BUILD.bazel
new file mode 100644
index 0000000..c4fffc2
--- /dev/null
+++ b/metropolis/node/common/fileargs/BUILD.bazel
@@ -0,0 +1,9 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["fileargs.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/common/fileargs",
+    visibility = ["//visibility:public"],
+    deps = ["@org_golang_x_sys//unix:go_default_library"],
+)
diff --git a/metropolis/node/common/fileargs/fileargs.go b/metropolis/node/common/fileargs/fileargs.go
new file mode 100644
index 0000000..26c054b
--- /dev/null
+++ b/metropolis/node/common/fileargs/fileargs.go
@@ -0,0 +1,101 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fileargs
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+
+	"golang.org/x/sys/unix"
+)
+
+// DefaultSize is the default size limit for FileArgs
+const DefaultSize = 4 * 1024 * 1024
+
+// TempDirectory is the directory where FileArgs will mount the actual files to. Defaults to
+// os.TempDir() but can be globally overridden by the application before any FileArgs are used.
+var TempDirectory = os.TempDir()
+
+type FileArgs struct {
+	path      string
+	lastError error
+}
+
+// New initializes a new set of file-based arguments. Remember to call Close() if you're done
+// using it, otherwise this leaks memory and mounts.
+func New() (*FileArgs, error) {
+	return NewWithSize(DefaultSize)
+}
+
+// NewWthSize is the same as new, but with a custom size limit. Please be aware that this data
+// cannot be swapped out and using a size limit that's too high can deadlock your kernel.
+func NewWithSize(size uint64) (*FileArgs, error) {
+	randomNameRaw := make([]byte, 128/8)
+	if _, err := io.ReadFull(rand.Reader, randomNameRaw); err != nil {
+		return nil, err
+	}
+	tmpPath := filepath.Join(TempDirectory, hex.EncodeToString(randomNameRaw))
+	if err := os.MkdirAll(tmpPath, 0700); err != nil {
+		return nil, err
+	}
+	// This uses ramfs instead of tmpfs because we never want to swap this for security reasons
+	if err := unix.Mount("none", tmpPath, "ramfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, fmt.Sprintf("size=%v", size)); err != nil {
+		return nil, err
+	}
+	return &FileArgs{
+		path: tmpPath,
+	}, nil
+}
+
+// ArgPath returns the path of the temporary file for this argument. It names the temporary
+// file according to name.
+func (f *FileArgs) ArgPath(name string, content []byte) string {
+	if f.lastError != nil {
+		return ""
+	}
+
+	path := filepath.Join(f.path, name)
+
+	if err := ioutil.WriteFile(path, content, 0600); err != nil {
+		f.lastError = err
+		return ""
+	}
+
+	return path
+}
+
+// FileOpt returns a full option with the temporary file name already filled in.
+// Example: `FileOpt("--testopt", "test.txt", []byte("hello")) == "--testopt=/tmp/daf8ed.../test.txt"`
+func (f *FileArgs) FileOpt(optName, fileName string, content []byte) string {
+	return fmt.Sprintf("%v=%v", optName, f.ArgPath(fileName, content))
+}
+
+func (f *FileArgs) Error() error {
+	return f.lastError
+}
+
+func (f *FileArgs) Close() error {
+	if err := unix.Unmount(f.path, 0); err != nil {
+		return err
+	}
+	return os.Remove(f.path)
+}
diff --git a/metropolis/node/common/fsquota/BUILD.bazel b/metropolis/node/common/fsquota/BUILD.bazel
new file mode 100644
index 0000000..b16d39e
--- /dev/null
+++ b/metropolis/node/common/fsquota/BUILD.bazel
@@ -0,0 +1,39 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//metropolis/test/ktest:ktest.bzl", "ktest")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "fsinfo.go",
+        "fsquota.go",
+    ],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/common/fsquota",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//metropolis/node/common/fsquota/fsxattrs:go_default_library",
+        "//metropolis/node/common/fsquota/quotactl:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
+
+go_test(
+    name = "go_default_test",
+    srcs = ["fsquota_test.go"],
+    embed = [":go_default_library"],
+    pure = "on",
+    deps = [
+        "@com_github_stretchr_testify//require:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
+
+ktest(
+    tester = ":go_default_test",
+    deps = [
+        "//third_party/xfsprogs:mkfs.xfs",
+    ],
+    initramfs_extra = """
+file /mkfs.xfs $(location //third_party/xfsprogs:mkfs.xfs) 0755 0 0
+    """,
+    cmdline = "ramdisk_size=51200",
+)
diff --git a/metropolis/node/common/fsquota/fsinfo.go b/metropolis/node/common/fsquota/fsinfo.go
new file mode 100644
index 0000000..e40a533
--- /dev/null
+++ b/metropolis/node/common/fsquota/fsinfo.go
@@ -0,0 +1,59 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsquota
+
+import (
+	"fmt"
+	"os"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+// This requires fsinfo() support, which is not yet in any stable kernel.
+// Our kernel has that syscall backported. This would otherwise be an extremely expensive
+// operation and also involve lots of logic from our side.
+
+// From syscall_64.tbl
+const sys_fsinfo = 441
+
+// From uapi/linux/fsinfo.h
+const fsinfo_attr_source = 0x09
+const fsinfo_flags_query_path = 0x0000
+const fsinfo_flags_query_fd = 0x0001
+
+type fsinfoParams struct {
+	resolveFlags uint64
+	atFlags      uint32
+	flags        uint32
+	request      uint32
+	nth          uint32
+	mth          uint32
+}
+
+func fsinfoGetSource(dir *os.File) (string, error) {
+	buf := make([]byte, 256)
+	params := fsinfoParams{
+		flags:   fsinfo_flags_query_fd,
+		request: fsinfo_attr_source,
+	}
+	n, _, err := unix.Syscall6(sys_fsinfo, dir.Fd(), 0, uintptr(unsafe.Pointer(&params)), unsafe.Sizeof(params), uintptr(unsafe.Pointer(&buf[0])), 128)
+	if err != unix.Errno(0) {
+		return "", fmt.Errorf("failed to call fsinfo: %w", err)
+	}
+	return string(buf[:n]), nil
+}
diff --git a/metropolis/node/common/fsquota/fsquota.go b/metropolis/node/common/fsquota/fsquota.go
new file mode 100644
index 0000000..f702d23
--- /dev/null
+++ b/metropolis/node/common/fsquota/fsquota.go
@@ -0,0 +1,146 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package fsquota provides a simplified interface to interact with Linux's filesystem qouta API.
+// It only supports setting quotas on directories, not groups or users.
+// Quotas need to be already enabled on the filesystem to be able to use them using this package.
+// See the quotactl package if you intend to use this on a filesystem where quotas need to be
+// enabled manually.
+package fsquota
+
+import (
+	"fmt"
+	"math"
+	"os"
+
+	"golang.org/x/sys/unix"
+
+	"git.monogon.dev/source/nexantic.git/metropolis/node/common/fsquota/fsxattrs"
+	"git.monogon.dev/source/nexantic.git/metropolis/node/common/fsquota/quotactl"
+)
+
+// SetQuota sets the quota of bytes and/or inodes in a given path. To not set a limit, set the
+// corresponding argument to zero. Setting both arguments to zero removes the quota entirely.
+// This function can only be called on an empty directory. It can't be used to create a quota
+// below a directory which already has a quota since Linux doesn't offer hierarchical quotas.
+func SetQuota(path string, maxBytes uint64, maxInodes uint64) error {
+	dir, err := os.Open(path)
+	if err != nil {
+		return err
+	}
+	defer dir.Close()
+	source, err := fsinfoGetSource(dir)
+	if err != nil {
+		return err
+	}
+	var valid uint32
+	if maxBytes > 0 {
+		valid |= quotactl.FlagBLimitsValid
+	}
+	if maxInodes > 0 {
+		valid |= quotactl.FlagILimitsValid
+	}
+
+	attrs, err := fsxattrs.Get(dir)
+	if err != nil {
+		return err
+	}
+
+	var lastID uint32 = attrs.ProjectID
+	if lastID == 0 {
+		// No project/quota exists for this directory, assign a new project quota
+		// TODO(lorenz): This is racy, but the kernel does not support atomically assigning
+		// quotas. So this needs to be added to the kernels setquota interface. Due to the short
+		// time window and infrequent calls this should not be an immediate issue.
+		for {
+			quota, err := quotactl.GetNextQuota(source, quotactl.QuotaTypeProject, lastID)
+			if err == unix.ENOENT || err == unix.ESRCH {
+				// We have enumerated all quotas, nothing exists here
+				break
+			} else if err != nil {
+				return fmt.Errorf("failed to call GetNextQuota: %w", err)
+			}
+			if quota.ID > lastID+1 {
+				// Take the first ID in the quota ID gap
+				lastID++
+				break
+			}
+			lastID++
+		}
+	}
+
+	// If both limits are zero, this is a delete operation, process it as such
+	if maxBytes == 0 && maxInodes == 0 {
+		valid = quotactl.FlagBLimitsValid | quotactl.FlagILimitsValid
+		attrs.ProjectID = 0
+		attrs.Flags &= ^fsxattrs.FlagProjectInherit
+	} else {
+		attrs.ProjectID = lastID
+		attrs.Flags |= fsxattrs.FlagProjectInherit
+	}
+
+	if err := fsxattrs.Set(dir, attrs); err != nil {
+		return err
+	}
+
+	// Always round up to the nearest block size
+	bytesLimitBlocks := uint64(math.Ceil(float64(maxBytes) / float64(1024)))
+
+	return quotactl.SetQuota(source, quotactl.QuotaTypeProject, lastID, &quotactl.Quota{
+		BHardLimit: bytesLimitBlocks,
+		BSoftLimit: bytesLimitBlocks,
+		IHardLimit: maxInodes,
+		ISoftLimit: maxInodes,
+		Valid:      valid,
+	})
+}
+
+type Quota struct {
+	Bytes      uint64
+	BytesUsed  uint64
+	Inodes     uint64
+	InodesUsed uint64
+}
+
+// GetQuota returns the current active quota and its utilization at the given path
+func GetQuota(path string) (*Quota, error) {
+	dir, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer dir.Close()
+	source, err := fsinfoGetSource(dir)
+	if err != nil {
+		return nil, err
+	}
+	attrs, err := fsxattrs.Get(dir)
+	if err != nil {
+		return nil, err
+	}
+	if attrs.ProjectID == 0 {
+		return nil, os.ErrNotExist
+	}
+	quota, err := quotactl.GetQuota(source, quotactl.QuotaTypeProject, attrs.ProjectID)
+	if err != nil {
+		return nil, err
+	}
+	return &Quota{
+		Bytes:      quota.BHardLimit * 1024,
+		BytesUsed:  quota.CurSpace,
+		Inodes:     quota.IHardLimit,
+		InodesUsed: quota.CurInodes,
+	}, nil
+}
diff --git a/metropolis/node/common/fsquota/fsquota_test.go b/metropolis/node/common/fsquota/fsquota_test.go
new file mode 100644
index 0000000..4729dac
--- /dev/null
+++ b/metropolis/node/common/fsquota/fsquota_test.go
@@ -0,0 +1,152 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsquota
+
+import (
+	"fmt"
+	"io/ioutil"
+	"math"
+	"os"
+	"os/exec"
+	"syscall"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"golang.org/x/sys/unix"
+)
+
+// withinTolerance is a helper for asserting that a value is within a certain percentage of the
+// expected value. The tolerance is specified as a float between 0 (exact match)
+// and 1 (between 0 and twice the expected value).
+func withinTolerance(t *testing.T, expected uint64, actual uint64, tolerance float64, name string) {
+	t.Helper()
+	delta := uint64(math.Round(float64(expected) * tolerance))
+	lowerBound := expected - delta
+	upperBound := expected + delta
+	if actual < lowerBound {
+		t.Errorf("Value %v (%v) is too low, expected between %v and %v", name, actual, lowerBound, upperBound)
+	}
+	if actual > upperBound {
+		t.Errorf("Value %v (%v) is too high, expected between %v and %v", name, actual, lowerBound, upperBound)
+	}
+}
+
+func TestBasic(t *testing.T) {
+	if os.Getenv("IN_KTEST") != "true" {
+		t.Skip("Not in ktest")
+	}
+	mkfsCmd := exec.Command("/mkfs.xfs", "-qf", "/dev/ram0")
+	if _, err := mkfsCmd.Output(); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Mkdir("/test", 0755); err != nil {
+		t.Error(err)
+	}
+
+	if err := unix.Mount("/dev/ram0", "/test", "xfs", unix.MS_NOEXEC|unix.MS_NODEV, "prjquota"); err != nil {
+		t.Fatal(err)
+	}
+	defer unix.Unmount("/test", 0)
+	defer os.RemoveAll("/test")
+	t.Run("SetQuota", func(t *testing.T) {
+		defer func() {
+			os.RemoveAll("/test/set")
+		}()
+		if err := os.Mkdir("/test/set", 0755); err != nil {
+			t.Fatal(err)
+		}
+		if err := SetQuota("/test/set", 1024*1024, 100); err != nil {
+			t.Fatal(err)
+		}
+	})
+	t.Run("SetQuotaAndExhaust", func(t *testing.T) {
+		defer func() {
+			os.RemoveAll("/test/sizequota")
+		}()
+		if err := os.Mkdir("/test/sizequota", 0755); err != nil {
+			t.Fatal(err)
+		}
+		const bytesQuota = 1024 * 1024 // 1MiB
+		if err := SetQuota("/test/sizequota", bytesQuota, 0); err != nil {
+			t.Fatal(err)
+		}
+		testfile, err := os.Create("/test/sizequota/testfile")
+		if err != nil {
+			t.Fatal(err)
+		}
+		testdata := make([]byte, 1024)
+		var bytesWritten int
+		for {
+			n, err := testfile.Write([]byte(testdata))
+			if err != nil {
+				if pathErr, ok := err.(*os.PathError); ok {
+					if pathErr.Err == syscall.ENOSPC {
+						// Running out of space is the only acceptable error to continue execution
+						break
+					}
+				}
+				t.Fatal(err)
+			}
+			bytesWritten += n
+		}
+		if bytesWritten > bytesQuota {
+			t.Errorf("Wrote %v bytes, quota is only %v bytes", bytesWritten, bytesQuota)
+		}
+	})
+	t.Run("GetQuotaReadbackAndUtilization", func(t *testing.T) {
+		defer func() {
+			os.RemoveAll("/test/readback")
+		}()
+		if err := os.Mkdir("/test/readback", 0755); err != nil {
+			t.Fatal(err)
+		}
+		const bytesQuota = 1024 * 1024 // 1MiB
+		const inodesQuota = 100
+		if err := SetQuota("/test/readback", bytesQuota, inodesQuota); err != nil {
+			t.Fatal(err)
+		}
+		sizeFileData := make([]byte, 512*1024)
+		if err := ioutil.WriteFile("/test/readback/512kfile", sizeFileData, 0644); err != nil {
+			t.Fatal(err)
+		}
+
+		quotaUtil, err := GetQuota("/test/readback")
+		if err != nil {
+			t.Fatal(err)
+		}
+		require.Equal(t, uint64(bytesQuota), quotaUtil.Bytes, "bytes quota readback incorrect")
+		require.Equal(t, uint64(inodesQuota), quotaUtil.Inodes, "inodes quota readback incorrect")
+
+		// Give 10% tolerance for quota used values to account for metadata overhead and internal
+		// structures that are also in there. If it's out by more than that it's an issue anyways.
+		withinTolerance(t, uint64(len(sizeFileData)), quotaUtil.BytesUsed, 0.1, "BytesUsed")
+
+		// Write 50 inodes for a total of 51 (with the 512K file)
+		for i := 0; i < 50; i++ {
+			if err := ioutil.WriteFile(fmt.Sprintf("/test/readback/ifile%v", i), []byte("test"), 0644); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		quotaUtil, err = GetQuota("/test/readback")
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		withinTolerance(t, 51, quotaUtil.InodesUsed, 0.1, "InodesUsed")
+	})
+}
diff --git a/metropolis/node/common/fsquota/fsxattrs/BUILD.bazel b/metropolis/node/common/fsquota/fsxattrs/BUILD.bazel
new file mode 100644
index 0000000..066200b
--- /dev/null
+++ b/metropolis/node/common/fsquota/fsxattrs/BUILD.bazel
@@ -0,0 +1,9 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["fsxattrs.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/common/fsquota/fsxattrs",
+    visibility = ["//visibility:public"],
+    deps = ["@org_golang_x_sys//unix:go_default_library"],
+)
diff --git a/metropolis/node/common/fsquota/fsxattrs/fsxattrs.go b/metropolis/node/common/fsquota/fsxattrs/fsxattrs.go
new file mode 100644
index 0000000..1d455eb
--- /dev/null
+++ b/metropolis/node/common/fsquota/fsxattrs/fsxattrs.go
@@ -0,0 +1,77 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsxattrs
+
+import (
+	"fmt"
+	"os"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+type FSXAttrFlag uint32
+
+// Defined in uapi/linux/fs.h
+const (
+	FlagRealtime        FSXAttrFlag = 0x00000001
+	FlagPreallocated    FSXAttrFlag = 0x00000002
+	FlagImmutable       FSXAttrFlag = 0x00000008
+	FlagAppend          FSXAttrFlag = 0x00000010
+	FlagSync            FSXAttrFlag = 0x00000020
+	FlagNoATime         FSXAttrFlag = 0x00000040
+	FlagNoDump          FSXAttrFlag = 0x00000080
+	FlagRealtimeInherit FSXAttrFlag = 0x00000100
+	FlagProjectInherit  FSXAttrFlag = 0x00000200
+	FlagNoSymlinks      FSXAttrFlag = 0x00000400
+	FlagExtentSize      FSXAttrFlag = 0x00000800
+	FlagNoDefragment    FSXAttrFlag = 0x00002000
+	FlagFilestream      FSXAttrFlag = 0x00004000
+	FlagDAX             FSXAttrFlag = 0x00008000
+	FlagCOWExtentSize   FSXAttrFlag = 0x00010000
+	FlagHasAttribute    FSXAttrFlag = 0x80000000
+)
+
+// FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR are defined in uapi/linux/fs.h
+const FS_IOC_FSGETXATTR = 0x801c581f
+const FS_IOC_FSSETXATTR = 0x401c5820
+
+type FSXAttrs struct {
+	Flags         FSXAttrFlag
+	ExtentSize    uint32
+	ExtentCount   uint32
+	ProjectID     uint32
+	CoWExtentSize uint32
+	_pad          [8]byte
+}
+
+func Get(file *os.File) (*FSXAttrs, error) {
+	var attrs FSXAttrs
+	_, _, errno := unix.Syscall(unix.SYS_IOCTL, file.Fd(), FS_IOC_FSGETXATTR, uintptr(unsafe.Pointer(&attrs)))
+	if errno != 0 {
+		return nil, fmt.Errorf("failed to execute getFSXAttrs: %v", errno)
+	}
+	return &attrs, nil
+}
+
+func Set(file *os.File, attrs *FSXAttrs) error {
+	_, _, errno := unix.Syscall(unix.SYS_IOCTL, file.Fd(), FS_IOC_FSSETXATTR, uintptr(unsafe.Pointer(attrs)))
+	if errno != 0 {
+		return fmt.Errorf("failed to execute setFSXAttrs: %v", errno)
+	}
+	return nil
+}
diff --git a/metropolis/node/common/fsquota/quotactl/BUILD.bazel b/metropolis/node/common/fsquota/quotactl/BUILD.bazel
new file mode 100644
index 0000000..c1582ad
--- /dev/null
+++ b/metropolis/node/common/fsquota/quotactl/BUILD.bazel
@@ -0,0 +1,9 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["quotactl.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/common/fsquota/quotactl",
+    visibility = ["//visibility:public"],
+    deps = ["@org_golang_x_sys//unix:go_default_library"],
+)
diff --git a/metropolis/node/common/fsquota/quotactl/quotactl.go b/metropolis/node/common/fsquota/quotactl/quotactl.go
new file mode 100644
index 0000000..5ed77d7
--- /dev/null
+++ b/metropolis/node/common/fsquota/quotactl/quotactl.go
@@ -0,0 +1,233 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package quotactl implements a low-level wrapper around the modern portion of Linux's
+// quotactl() syscall. See the fsquota package for a nicer interface to the most common part
+// of this API.
+package quotactl
+
+import (
+	"fmt"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+type QuotaType uint
+
+const (
+	QuotaTypeUser QuotaType = iota
+	QuotaTypeGroup
+	QuotaTypeProject
+)
+
+const (
+	Q_SYNC uint = ((0x800001 + iota) << 8)
+	Q_QUOTAON
+	Q_QUOTAOFF
+	Q_GETFMT
+	Q_GETINFO
+	Q_SETINFO
+	Q_GETQUOTA
+	Q_SETQUOTA
+	Q_GETNEXTQUOTA
+)
+
+const (
+	FlagBLimitsValid = 1 << iota
+	FlagSpaceValid
+	FlagILimitsValid
+	FlagInodesValid
+	FlagBTimeValid
+	FlagITimeValid
+)
+
+type DQInfo struct {
+	Bgrace uint64
+	Igrace uint64
+	Flags  uint32
+	Valid  uint32
+}
+
+type Quota struct {
+	BHardLimit uint64 // Both Byte limits are prescaled by 1024 (so are in KiB), but CurSpace is in B
+	BSoftLimit uint64
+	CurSpace   uint64
+	IHardLimit uint64
+	ISoftLimit uint64
+	CurInodes  uint64
+	BTime      uint64
+	ITime      uint64
+	Valid      uint32
+}
+
+type NextDQBlk struct {
+	HardLimitBytes  uint64
+	SoftLimitBytes  uint64
+	CurrentBytes    uint64
+	HardLimitInodes uint64
+	SoftLimitInodes uint64
+	CurrentInodes   uint64
+	BTime           uint64
+	ITime           uint64
+	Valid           uint32
+	ID              uint32
+}
+
+type QuotaFormat uint32
+
+// Collected from quota_format_type structs
+const (
+	// QuotaFormatNone is a special case where all quota information is
+	// stored inside filesystem metadata and thus requires no quotaFilePath.
+	QuotaFormatNone   QuotaFormat = 0
+	QuotaFormatVFSOld QuotaFormat = 1
+	QuotaFormatVFSV0  QuotaFormat = 2
+	QuotaFormatOCFS2  QuotaFormat = 3
+	QuotaFormatVFSV1  QuotaFormat = 4
+)
+
+// QuotaOn turns quota accounting and enforcement on
+func QuotaOn(device string, qtype QuotaType, quotaFormat QuotaFormat, quotaFilePath string) error {
+	devArg, err := unix.BytePtrFromString(device)
+	if err != nil {
+		return err
+	}
+	pathArg, err := unix.BytePtrFromString(quotaFilePath)
+	if err != nil {
+		return err
+	}
+	_, _, err = unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_QUOTAON|uint(qtype)), uintptr(unsafe.Pointer(devArg)), uintptr(quotaFormat), uintptr(unsafe.Pointer(pathArg)), 0, 0)
+	if err != unix.Errno(0) {
+		return err
+	}
+	return nil
+}
+
+// QuotaOff turns quotas off
+func QuotaOff(device string, qtype QuotaType) error {
+	devArg, err := unix.BytePtrFromString(device)
+	if err != nil {
+		return err
+	}
+	_, _, err = unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_QUOTAOFF|uint(qtype)), uintptr(unsafe.Pointer(devArg)), 0, 0, 0, 0)
+	if err != unix.Errno(0) {
+		return err
+	}
+	return nil
+}
+
+// GetFmt gets the quota format used on given filesystem
+func GetFmt(device string, qtype QuotaType) (uint32, error) {
+	var fmt uint32
+	devArg, err := unix.BytePtrFromString(device)
+	if err != nil {
+		return 0, err
+	}
+	_, _, err = unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_GETFMT|uint(qtype)), uintptr(unsafe.Pointer(devArg)), 0, uintptr(unsafe.Pointer(&fmt)), 0, 0)
+	if err != unix.Errno(0) {
+		return 0, err
+	}
+	return fmt, nil
+}
+
+// GetInfo gets information about quota files
+func GetInfo(device string, qtype QuotaType) (*DQInfo, error) {
+	var info DQInfo
+	devArg, err := unix.BytePtrFromString(device)
+	if err != nil {
+		return nil, err
+	}
+	_, _, err = unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_GETINFO|uint(qtype)), uintptr(unsafe.Pointer(devArg)), 0, uintptr(unsafe.Pointer(&info)), 0, 0)
+	if err != unix.Errno(0) {
+		return nil, err
+	}
+	return &info, nil
+}
+
+// SetInfo sets information about quota files
+func SetInfo(device string, qtype QuotaType, info *DQInfo) error {
+	devArg, err := unix.BytePtrFromString(device)
+	if err != nil {
+		return err
+	}
+	_, _, err = unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_SETINFO|uint(qtype)), uintptr(unsafe.Pointer(devArg)), 0, uintptr(unsafe.Pointer(info)), 0, 0)
+	if err != unix.Errno(0) {
+		return err
+	}
+	return nil
+}
+
+// GetQuota gets user quota structure
+func GetQuota(device string, qtype QuotaType, id uint32) (*Quota, error) {
+	var info Quota
+	devArg, err := unix.BytePtrFromString(device)
+	if err != nil {
+		return nil, err
+	}
+	_, _, err = unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_GETQUOTA|uint(qtype)), uintptr(unsafe.Pointer(devArg)), uintptr(id), uintptr(unsafe.Pointer(&info)), 0, 0)
+	if err != unix.Errno(0) {
+		return nil, err
+	}
+	return &info, nil
+}
+
+// GetNextQuota gets disk limits and usage > ID
+func GetNextQuota(device string, qtype QuotaType, id uint32) (*NextDQBlk, error) {
+	var info NextDQBlk
+	devArg, err := unix.BytePtrFromString(device)
+	if err != nil {
+		return nil, err
+	}
+	_, _, err = unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_GETNEXTQUOTA|uint(qtype)), uintptr(unsafe.Pointer(devArg)), uintptr(id), uintptr(unsafe.Pointer(&info)), 0, 0)
+	if err != unix.Errno(0) {
+		return nil, err
+	}
+	return &info, nil
+}
+
+// SetQuota sets the given quota
+func SetQuota(device string, qtype QuotaType, id uint32, quota *Quota) error {
+	devArg, err := unix.BytePtrFromString(device)
+	if err != nil {
+		return err
+	}
+	_, _, err = unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_SETQUOTA|uint(qtype)), uintptr(unsafe.Pointer(devArg)), uintptr(id), uintptr(unsafe.Pointer(quota)), 0, 0)
+	if err != unix.Errno(0) {
+		return fmt.Errorf("failed to set quota: %w", err)
+	}
+	return nil
+}
+
+// Sync syncs disk copy of filesystems quotas. If device is empty it syncs all filesystems.
+func Sync(device string) error {
+	if device != "" {
+		devArg, err := unix.BytePtrFromString(device)
+		if err != nil {
+			return err
+		}
+		_, _, err = unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_SYNC), uintptr(unsafe.Pointer(devArg)), 0, 0, 0, 0)
+		if err != unix.Errno(0) {
+			return err
+		}
+	} else {
+		_, _, err := unix.Syscall6(unix.SYS_QUOTACTL, uintptr(Q_SYNC), 0, 0, 0, 0, 0)
+		if err != unix.Errno(0) {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/metropolis/node/common/jsonpatch/BUILD.bazel b/metropolis/node/common/jsonpatch/BUILD.bazel
new file mode 100644
index 0000000..bd77e0a
--- /dev/null
+++ b/metropolis/node/common/jsonpatch/BUILD.bazel
@@ -0,0 +1,14 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["jsonpatch.go.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/common/jsonpatch",
+    visibility = ["//visibility:public"],
+)
+
+go_test(
+    name = "go_default_test",
+    srcs = ["jsonpatch_test.go"],
+    embed = [":go_default_library"],
+)
diff --git a/metropolis/node/common/jsonpatch/jsonpatch.go.go b/metropolis/node/common/jsonpatch/jsonpatch.go.go
new file mode 100644
index 0000000..9682980
--- /dev/null
+++ b/metropolis/node/common/jsonpatch/jsonpatch.go.go
@@ -0,0 +1,44 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package jsonpatch contains data structures and encoders for JSON Patch (RFC 6902) and JSON Pointers (RFC 6901)
+package jsonpatch
+
+import "strings"
+
+// JSON Patch operation (RFC 6902 Section 4)
+type JsonPatchOp struct {
+	Operation string      `json:"op"`
+	Path      string      `json:"path"` // Technically a JSON Pointer, but called Path in the RFC
+	From      string      `json:"from,omitempty"`
+	Value     interface{} `json:"value,omitempty"`
+}
+
+// EncodeJSONRefToken encodes a JSON reference token as part of a JSON Pointer (RFC 6901 Section 2)
+func EncodeJSONRefToken(token string) string {
+	x := strings.ReplaceAll(token, "~", "~0")
+	return strings.ReplaceAll(x, "/", "~1")
+}
+
+// PointerFromParts returns an encoded JSON Pointer from parts
+func PointerFromParts(pathParts []string) string {
+	var encodedParts []string
+	encodedParts = append(encodedParts, "")
+	for _, part := range pathParts {
+		encodedParts = append(encodedParts, EncodeJSONRefToken(part))
+	}
+	return strings.Join(encodedParts, "/")
+}
diff --git a/metropolis/node/common/jsonpatch/jsonpatch_test.go b/metropolis/node/common/jsonpatch/jsonpatch_test.go
new file mode 100644
index 0000000..33a56ba
--- /dev/null
+++ b/metropolis/node/common/jsonpatch/jsonpatch_test.go
@@ -0,0 +1,66 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package jsonpatch
+
+import (
+	"testing"
+)
+
+func TestEncodeJSONRefToken(t *testing.T) {
+	tests := []struct {
+		name  string
+		token string
+		want  string
+	}{
+		{"Passes through normal characters", "asdf123", "asdf123"},
+		{"Encodes simple slashes", "a/b", "a~1b"},
+		{"Encodes tildes", "m~n", "m~0n"},
+		{"Encodes bot tildes and slashes", "a/m~n", "a~1m~0n"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := EncodeJSONRefToken(tt.token); got != tt.want {
+				t.Errorf("EncodeJSONRefToken() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestPointerFromParts(t *testing.T) {
+	type args struct {
+		pathParts []string
+	}
+	tests := []struct {
+		name string
+		args args
+		want string
+	}{
+		{"Empty path", args{[]string{}}, ""},
+		{"Single level path", args{[]string{"foo"}}, "/foo"},
+		{"Multi-level path", args{[]string{"foo", "0"}}, "/foo/0"},
+		{"Path starting with empty key", args{[]string{""}}, "/"},
+		{"Path with part containing /", args{[]string{"a/b"}}, "/a~1b"},
+		{"Path with part containing spaces", args{[]string{" "}}, "/ "},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := PointerFromParts(tt.args.pathParts); got != tt.want {
+				t.Errorf("PointerFromParts() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
diff --git a/metropolis/node/common/logbuffer/BUILD.bazel b/metropolis/node/common/logbuffer/BUILD.bazel
new file mode 100644
index 0000000..2d4650d
--- /dev/null
+++ b/metropolis/node/common/logbuffer/BUILD.bazel
@@ -0,0 +1,22 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "linebuffer.go",
+        "logbuffer.go",
+    ],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/common/logbuffer",
+    visibility = ["//visibility:public"],
+    deps = ["//metropolis/proto/api:go_default_library"],
+)
+
+go_test(
+    name = "go_default_test",
+    srcs = [
+        "linebuffer_test.go",
+        "logbuffer_test.go",
+    ],
+    embed = [":go_default_library"],
+    deps = ["@com_github_stretchr_testify//require:go_default_library"],
+)
diff --git a/metropolis/node/common/logbuffer/linebuffer.go b/metropolis/node/common/logbuffer/linebuffer.go
new file mode 100644
index 0000000..246a91b
--- /dev/null
+++ b/metropolis/node/common/logbuffer/linebuffer.go
@@ -0,0 +1,160 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package logbuffer
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+	"sync"
+
+	apb "git.monogon.dev/source/nexantic.git/metropolis/proto/api"
+)
+
+// Line is a line stored in the log buffer - a string, that has been perhaps truncated (due to exceeded limits).
+type Line struct {
+	Data           string
+	OriginalLength int
+}
+
+// Truncated returns whether this line has been truncated to fit limits.
+func (l *Line) Truncated() bool {
+	return l.OriginalLength > len(l.Data)
+}
+
+// String returns the line with an ellipsis at the end (...) if the line has been truncated, or the original line
+// otherwise.
+func (l *Line) String() string {
+	if l.Truncated() {
+		return l.Data + "..."
+	}
+	return l.Data
+}
+
+// ProtoLog returns a Logging-specific protobuf structure.
+func (l *Line) ProtoLog() *apb.LogEntry_Raw {
+	return &apb.LogEntry_Raw{
+		Data:           l.Data,
+		OriginalLength: int64(l.OriginalLength),
+	}
+}
+
+// LineFromLogProto converts a Logging-specific protobuf message back into a Line.
+func LineFromLogProto(raw *apb.LogEntry_Raw) (*Line, error) {
+	if raw.OriginalLength < int64(len(raw.Data)) {
+		return nil, fmt.Errorf("original_length smaller than length of data")
+	}
+	originalLength := int(raw.OriginalLength)
+	if int64(originalLength) < raw.OriginalLength {
+		return nil, fmt.Errorf("original_length larger than native int size")
+	}
+	return &Line{
+		Data:           raw.Data,
+		OriginalLength: originalLength,
+	}, nil
+}
+
+// LineBuffer is a io.WriteCloser that will call a given callback every time a line is completed.
+type LineBuffer struct {
+	maxLineLength int
+	cb            LineBufferCallback
+
+	mu  sync.Mutex
+	cur strings.Builder
+	// length is the length of the line currently being written - this will continue to increase, even if the string
+	// exceeds maxLineLength.
+	length int
+	closed bool
+}
+
+// LineBufferCallback is a callback that will get called any time the line is completed. The function must not cause another
+// write to the LineBuffer, or the program will deadlock.
+type LineBufferCallback func(*Line)
+
+// NewLineBuffer creates a new LineBuffer with a given line length limit and callback.
+func NewLineBuffer(maxLineLength int, cb LineBufferCallback) *LineBuffer {
+	return &LineBuffer{
+		maxLineLength: maxLineLength,
+		cb:            cb,
+	}
+}
+
+// writeLimited writes to the internal buffer, making sure that its size does not exceed the maxLineLength.
+func (l *LineBuffer) writeLimited(data []byte) {
+	l.length += len(data)
+	if l.cur.Len()+len(data) > l.maxLineLength {
+		data = data[:l.maxLineLength-l.cur.Len()]
+	}
+	l.cur.Write(data)
+}
+
+// comitLine calls the callback and resets the builder.
+func (l *LineBuffer) commitLine() {
+	l.cb(&Line{
+		Data:           l.cur.String(),
+		OriginalLength: l.length,
+	})
+	l.cur.Reset()
+	l.length = 0
+}
+
+func (l *LineBuffer) Write(data []byte) (int, error) {
+	var pos = 0
+
+	l.mu.Lock()
+	defer l.mu.Unlock()
+
+	if l.closed {
+		return 0, fmt.Errorf("closed")
+	}
+
+	for {
+		nextNewline := bytes.IndexRune(data[pos:], '\n')
+
+		// No newline in the data, write everything to the current line
+		if nextNewline == -1 {
+			l.writeLimited(data[pos:])
+			break
+		}
+
+		// Write this line and update position
+		l.writeLimited(data[pos : pos+nextNewline])
+		l.commitLine()
+		pos += nextNewline + 1
+
+		// Data ends with a newline, stop now without writing an empty line
+		if nextNewline == len(data)-1 {
+			break
+		}
+	}
+	return len(data), nil
+}
+
+// Close will emit any leftover data in the buffer to the callback. Subsequent calls to Write will fail. Subsequent calls to Close
+// will also fail.
+func (l *LineBuffer) Close() error {
+	if l.closed {
+		return fmt.Errorf("already closed")
+	}
+	l.mu.Lock()
+	defer l.mu.Unlock()
+	l.closed = true
+	if l.length > 0 {
+		l.commitLine()
+	}
+	return nil
+}
diff --git a/metropolis/node/common/logbuffer/linebuffer_test.go b/metropolis/node/common/logbuffer/linebuffer_test.go
new file mode 100644
index 0000000..c821a4b
--- /dev/null
+++ b/metropolis/node/common/logbuffer/linebuffer_test.go
@@ -0,0 +1,75 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package logbuffer
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestLineBuffer(t *testing.T) {
+	var lines []*Line
+	lb := NewLineBuffer(1024, func(l *Line) {
+		lines = append(lines, l)
+	})
+
+	compare := func(a []*Line, b ...string) string {
+		msg := fmt.Sprintf("want %v, got %v", a, b)
+		if len(a) != len(b) {
+			return msg
+		}
+		for i, _ := range a {
+			if a[i].String() != b[i] {
+				return msg
+			}
+		}
+		return ""
+	}
+
+	// Write some data.
+	fmt.Fprintf(lb, "foo ")
+	if diff := compare(lines); diff != "" {
+		t.Fatal(diff)
+	}
+	fmt.Fprintf(lb, "bar\n")
+	if diff := compare(lines, "foo bar"); diff != "" {
+		t.Fatal(diff)
+	}
+	fmt.Fprintf(lb, "baz")
+	if diff := compare(lines, "foo bar"); diff != "" {
+		t.Fatal(diff)
+	}
+	fmt.Fprintf(lb, " baz")
+	if diff := compare(lines, "foo bar"); diff != "" {
+		t.Fatal(diff)
+	}
+	// Close and expect flush.
+	if err := lb.Close(); err != nil {
+		t.Fatalf("Close: %v", err)
+	}
+	if diff := compare(lines, "foo bar", "baz baz"); diff != "" {
+		t.Fatal(diff)
+	}
+
+	// Check behaviour after close
+	if _, err := fmt.Fprintf(lb, "nope"); err == nil {
+		t.Fatalf("Write after Close: wanted  error, got nil")
+	}
+	if err := lb.Close(); err == nil {
+		t.Fatalf("second Close: wanted error, got nil")
+	}
+}
diff --git a/metropolis/node/common/logbuffer/logbuffer.go b/metropolis/node/common/logbuffer/logbuffer.go
new file mode 100644
index 0000000..ce47816
--- /dev/null
+++ b/metropolis/node/common/logbuffer/logbuffer.go
@@ -0,0 +1,97 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package logbuffer implements a fixed-size in-memory ring buffer for line-separated logs.
+// It implements io.Writer and splits the data into lines. The lines are kept in a ring where the
+// oldest are overwritten once it's full. It allows retrieval of the last n lines. There is a built-in
+// line length limit to bound the memory usage at maxLineLength * size.
+package logbuffer
+
+import (
+	"sync"
+)
+
+// LogBuffer implements a fixed-size in-memory ring buffer for line-separated logs
+type LogBuffer struct {
+	mu      sync.RWMutex
+	content []Line
+	length  int
+	*LineBuffer
+}
+
+// New creates a new LogBuffer with a given ringbuffer size and maximum line length.
+func New(size, maxLineLength int) *LogBuffer {
+	lb := &LogBuffer{
+		content: make([]Line, size),
+	}
+	lb.LineBuffer = NewLineBuffer(maxLineLength, lb.lineCallback)
+	return lb
+}
+
+func (b *LogBuffer) lineCallback(line *Line) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	b.content[b.length%len(b.content)] = *line
+	b.length++
+}
+
+// capToContentLength caps the number of requested lines to what is actually available
+func (b *LogBuffer) capToContentLength(n int) int {
+	// If there aren't enough lines to read, reduce the request size
+	if n > b.length {
+		n = b.length
+	}
+	// If there isn't enough ringbuffer space, reduce the request size
+	if n > len(b.content) {
+		n = len(b.content)
+	}
+	return n
+}
+
+// ReadLines reads the last n lines from the buffer in chronological order. If n is bigger than the
+// ring buffer or the number of available lines only the number of stored lines are returned.
+func (b *LogBuffer) ReadLines(n int) []Line {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	n = b.capToContentLength(n)
+
+	// Copy references out to keep them around
+	outArray := make([]Line, n)
+	for i := 1; i <= n; i++ {
+		outArray[n-i] = b.content[(b.length-i)%len(b.content)]
+	}
+	return outArray
+}
+
+// ReadLinesTruncated works exactly the same as ReadLines, but adds an ellipsis at the end of every
+// line that was truncated because it was over MaxLineLength
+func (b *LogBuffer) ReadLinesTruncated(n int, ellipsis string) []string {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+	// This does not use ReadLines() to prevent excessive reference copying and associated GC pressure
+	// since it could process a lot of lines.
+
+	n = b.capToContentLength(n)
+
+	outArray := make([]string, n)
+	for i := 1; i <= n; i++ {
+		line := b.content[(b.length-i)%len(b.content)]
+		outArray[n-i] = line.String()
+	}
+	return outArray
+}
diff --git a/metropolis/node/common/logbuffer/logbuffer_test.go b/metropolis/node/common/logbuffer/logbuffer_test.go
new file mode 100644
index 0000000..c38d7a6
--- /dev/null
+++ b/metropolis/node/common/logbuffer/logbuffer_test.go
@@ -0,0 +1,94 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package logbuffer
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestSingleLine(t *testing.T) {
+	buf := New(1, 16000)
+	buf.Write([]byte("Hello World\n"))
+	out := buf.ReadLines(1)
+	require.Len(t, out, 1, "Invalid number of lines read")
+	require.Equal(t, "Hello World", out[0].Data, "Read bad log line")
+	require.Equal(t, 11, out[0].OriginalLength, "Invalid line length")
+}
+
+func TestPartialWritesAndReads(t *testing.T) {
+	buf := New(2, 16000)
+	buf.Write([]byte("Hello "))
+	buf.Write([]byte("World\nTest "))
+	buf.Write([]byte("2\n"))
+
+	out := buf.ReadLines(1)
+	require.Len(t, out, 1, "Invalid number of lines for partial read")
+	require.Equal(t, "Test 2", out[0].Data, "Read bad log line")
+
+	out2 := buf.ReadLines(2)
+	require.Len(t, out2, 2, "Invalid number of lines read")
+	require.Equal(t, "Hello World", out2[0].Data, "Read bad log line")
+	require.Equal(t, "Test 2", out2[1].Data, "Read bad log line")
+}
+
+func TestBufferOverwrite(t *testing.T) {
+	buf := New(3, 16000)
+	buf.Write([]byte("Test1\nTest2\nTest3\nTest4\n"))
+
+	out := buf.ReadLines(3)
+	require.Equal(t, out[0].Data, "Test2", "Read bad log line")
+	require.Equal(t, out[1].Data, "Test3", "Read bad log line")
+	require.Equal(t, out[2].Data, "Test4", "Overwritten data is invalid")
+}
+
+func TestTooLargeRequests(t *testing.T) {
+	buf := New(1, 16000)
+	outEmpty := buf.ReadLines(1)
+	require.Len(t, outEmpty, 0, "Returned more data than there is")
+
+	buf.Write([]byte("1\n2\n"))
+	out := buf.ReadLines(2)
+	require.Len(t, out, 1, "Returned more data than the ring buffer can hold")
+}
+
+func TestSpecialCases(t *testing.T) {
+	buf := New(2, 16000)
+	buf.Write([]byte("Test1"))
+	buf.Write([]byte("\nTest2\n"))
+	out := buf.ReadLines(2)
+	require.Len(t, out, 2, "Too many lines written")
+	require.Equal(t, out[0].Data, "Test1", "Read bad log line")
+	require.Equal(t, out[1].Data, "Test2", "Read bad log line")
+}
+
+func TestLineLengthLimit(t *testing.T) {
+	buf := New(2, 6)
+
+	testStr := "Just Testing"
+
+	buf.Write([]byte(testStr + "\nShort\n"))
+
+	out := buf.ReadLines(2)
+	require.Equal(t, len(testStr), out[0].OriginalLength, "Line is over length limit")
+	require.Equal(t, "Just T", out[0].Data, "Log line not properly truncated")
+
+	out2 := buf.ReadLinesTruncated(2, "...")
+	require.Equal(t, out2[0], "Just T...", "Line is over length limit")
+	require.Equal(t, out2[1], "Short", "Truncated small enough line")
+}
diff --git a/metropolis/node/common/supervisor/BUILD.bazel b/metropolis/node/common/supervisor/BUILD.bazel
new file mode 100644
index 0000000..ae95892
--- /dev/null
+++ b/metropolis/node/common/supervisor/BUILD.bazel
@@ -0,0 +1,28 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "supervisor.go",
+        "supervisor_node.go",
+        "supervisor_processor.go",
+        "supervisor_support.go",
+        "supervisor_testhelpers.go",
+    ],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/common/supervisor",
+    visibility = [
+        "//metropolis/node:__subpackages__",
+        "//metropolis/test:__subpackages__",
+    ],
+    deps = [
+        "//metropolis/node/core/logtree:go_default_library",
+        "@com_github_cenkalti_backoff_v4//:go_default_library",
+        "@org_golang_google_grpc//:go_default_library",
+    ],
+)
+
+go_test(
+    name = "go_default_test",
+    srcs = ["supervisor_test.go"],
+    embed = [":go_default_library"],
+)
diff --git a/metropolis/node/common/supervisor/supervisor.go b/metropolis/node/common/supervisor/supervisor.go
new file mode 100644
index 0000000..df7492c
--- /dev/null
+++ b/metropolis/node/common/supervisor/supervisor.go
@@ -0,0 +1,145 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package supervisor
+
+// The service supervision library allows for writing of reliable, service-style software within Smalltown.
+// It builds upon the Erlang/OTP supervision tree system, adapted to be more Go-ish.
+// For detailed design see go/supervision.
+
+import (
+	"context"
+	"io"
+	"sync"
+
+	"git.monogon.dev/source/nexantic.git/metropolis/node/core/logtree"
+)
+
+// A Runnable is a function that will be run in a goroutine, and supervised throughout its lifetime. It can in turn
+// start more runnables as its children, and those will form part of a supervision tree.
+// The context passed to a runnable is very important and needs to be handled properly. It will be live (non-errored) as
+// long as the runnable should be running, and canceled (ctx.Err() will be non-nil) when the supervisor wants it to
+// exit. This means this context is also perfectly usable for performing any blocking operations.
+type Runnable func(ctx context.Context) error
+
+// RunGroup starts a set of runnables as a group. These runnables will run together, and if any one of them quits
+// unexpectedly, the result will be canceled and restarted.
+// The context here must be an existing Runnable context, and the spawned runnables will run under the node that this
+// context represents.
+func RunGroup(ctx context.Context, runnables map[string]Runnable) error {
+	node, unlock := fromContext(ctx)
+	defer unlock()
+	return node.runGroup(runnables)
+}
+
+// Run starts a single runnable in its own group.
+func Run(ctx context.Context, name string, runnable Runnable) error {
+	return RunGroup(ctx, map[string]Runnable{
+		name: runnable,
+	})
+}
+
+// Signal tells the supervisor that the calling runnable has reached a certain state of its lifecycle. All runnables
+// should SignalHealthy when they are ready with set up, running other child runnables and are now 'serving'.
+func Signal(ctx context.Context, signal SignalType) {
+	node, unlock := fromContext(ctx)
+	defer unlock()
+	node.signal(signal)
+}
+
+type SignalType int
+
+const (
+	// The runnable is healthy, done with setup, done with spawning more Runnables, and ready to serve in a loop.
+	// The runnable needs to check the parent context and ensure that if that context is done, the runnable exits.
+	SignalHealthy SignalType = iota
+	// The runnable is done - it does not need to run any loop. This is useful for Runnables that only set up other
+	// child runnables. This runnable will be restarted if a related failure happens somewhere in the supervision tree.
+	SignalDone
+)
+
+// supervisor represents and instance of the supervision system. It keeps track of a supervision tree and a request
+// channel to its internal processor goroutine.
+type supervisor struct {
+	// mu guards the entire state of the supervisor.
+	mu sync.RWMutex
+	// root is the root node of the supervision tree, named 'root'. It represents the Runnable started with the
+	// supervisor.New call.
+	root *node
+	// logtree is the main logtree exposed to runnables and used internally.
+	logtree *logtree.LogTree
+	// ilogger is the internal logger logging to "supervisor" in the logtree.
+	ilogger logtree.LeveledLogger
+
+	// pReq is an interface channel to the lifecycle processor of the supervisor.
+	pReq chan *processorRequest
+
+	// propagate panics, ie. don't catch them.
+	propagatePanic bool
+}
+
+// SupervisorOpt are runtime configurable options for the supervisor.
+type SupervisorOpt func(s *supervisor)
+
+var (
+	// WithPropagatePanic prevents the Supervisor from catching panics in runnables and treating them as failures.
+	// This is useful to enable for testing and local debugging.
+	WithPropagatePanic = func(s *supervisor) {
+		s.propagatePanic = true
+	}
+)
+
+func WithExistingLogtree(lt *logtree.LogTree) SupervisorOpt {
+	return func(s *supervisor) {
+		s.logtree = lt
+	}
+}
+
+// New creates a new supervisor with its root running the given root runnable.
+// The given context can be used to cancel the entire supervision tree.
+func New(ctx context.Context, rootRunnable Runnable, opts ...SupervisorOpt) *supervisor {
+	sup := &supervisor{
+		logtree: logtree.New(),
+		pReq:    make(chan *processorRequest),
+	}
+
+	for _, o := range opts {
+		o(sup)
+	}
+
+	sup.ilogger = sup.logtree.MustLeveledFor("supervisor")
+	sup.root = newNode("root", rootRunnable, sup, nil)
+
+	go sup.processor(ctx)
+
+	sup.pReq <- &processorRequest{
+		schedule: &processorRequestSchedule{dn: "root"},
+	}
+
+	return sup
+}
+
+func Logger(ctx context.Context) logtree.LeveledLogger {
+	node, unlock := fromContext(ctx)
+	defer unlock()
+	return node.sup.logtree.MustLeveledFor(logtree.DN(node.dn()))
+}
+
+func RawLogger(ctx context.Context) io.Writer {
+	node, unlock := fromContext(ctx)
+	defer unlock()
+	return node.sup.logtree.MustRawFor(logtree.DN(node.dn()))
+}
diff --git a/metropolis/node/common/supervisor/supervisor_node.go b/metropolis/node/common/supervisor/supervisor_node.go
new file mode 100644
index 0000000..a7caf82
--- /dev/null
+++ b/metropolis/node/common/supervisor/supervisor_node.go
@@ -0,0 +1,282 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package supervisor
+
+import (
+	"context"
+	"fmt"
+	"regexp"
+	"strings"
+
+	"github.com/cenkalti/backoff/v4"
+)
+
+// node is a supervision tree node. It represents the state of a Runnable within this tree, its relation to other tree
+// elements, and contains supporting data needed to actually supervise it.
+type node struct {
+	// The name of this node. Opaque string. It's used to make up the 'dn' (distinguished name) of a node within
+	// the tree. When starting a runnable inside a tree, this is where that name gets used.
+	name     string
+	runnable Runnable
+
+	// The supervisor managing this tree.
+	sup *supervisor
+	// The parent, within the tree, of this node. If this is the root node of the tree, this is nil.
+	parent *node
+	// Children of this tree. This is represented by a map keyed from child node names, for easy access.
+	children map[string]*node
+	// Supervision groups. Each group is a set of names of children. Sets, and as such groups, don't overlap between
+	// each other. A supervision group indicates that if any child within that group fails, all others should be
+	// canceled and restarted together.
+	groups []map[string]bool
+
+	// The current state of the runnable in this node.
+	state nodeState
+
+	// Backoff used to keep runnables from being restarted too fast.
+	bo *backoff.ExponentialBackOff
+
+	// Context passed to the runnable, and its cancel function.
+	ctx  context.Context
+	ctxC context.CancelFunc
+}
+
+// nodeState is the state of a runnable within a node, and in a way the node itself.
+// This follows the state diagram from go/supervision.
+type nodeState int
+
+const (
+	// A node that has just been created, and whose runnable has been started already but hasn't signaled anything yet.
+	nodeStateNew nodeState = iota
+	// A node whose runnable has signaled being healthy - this means it's ready to serve/act.
+	nodeStateHealthy
+	// A node that has unexpectedly returned or panicked.
+	nodeStateDead
+	// A node that has declared that its done with its work and should not be restarted, unless a supervision tree
+	// failure requires that.
+	nodeStateDone
+	// A node that has returned after being requested to cancel.
+	nodeStateCanceled
+)
+
+func (s nodeState) String() string {
+	switch s {
+	case nodeStateNew:
+		return "NODE_STATE_NEW"
+	case nodeStateHealthy:
+		return "NODE_STATE_HEALTHY"
+	case nodeStateDead:
+		return "NODE_STATE_DEAD"
+	case nodeStateDone:
+		return "NODE_STATE_DONE"
+	case nodeStateCanceled:
+		return "NODE_STATE_CANCELED"
+	}
+	return "UNKNOWN"
+}
+
+func (n *node) String() string {
+	return fmt.Sprintf("%s (%s)", n.dn(), n.state.String())
+}
+
+// contextKey is a type used to keep data within context values.
+type contextKey string
+
+var (
+	supervisorKey = contextKey("supervisor")
+	dnKey         = contextKey("dn")
+)
+
+// fromContext retrieves a tree node from a runnable context. It takes a lock on the tree and returns an unlock
+// function. This unlock function needs to be called once mutations on the tree/supervisor/node are done.
+func fromContext(ctx context.Context) (*node, func()) {
+	sup, ok := ctx.Value(supervisorKey).(*supervisor)
+	if !ok {
+		panic("supervisor function called from non-runnable context")
+	}
+
+	sup.mu.Lock()
+
+	dnParent, ok := ctx.Value(dnKey).(string)
+	if !ok {
+		sup.mu.Unlock()
+		panic("supervisor function called from non-runnable context")
+	}
+
+	return sup.nodeByDN(dnParent), sup.mu.Unlock
+}
+
+// All the following 'internal' supervisor functions must only be called with the supervisor lock taken. Getting a lock
+// via fromContext is enough.
+
+// dn returns the distinguished name of a node. This distinguished name is a period-separated, inverse-DNS-like name.
+// For instance, the runnable 'foo' within the runnable 'bar' will be called 'root.bar.foo'. The root of the tree is
+// always named, and has the dn, 'root'.
+func (n *node) dn() string {
+	if n.parent != nil {
+		return fmt.Sprintf("%s.%s", n.parent.dn(), n.name)
+	}
+	return n.name
+}
+
+// groupSiblings is a helper function to get all runnable group siblings of a given runnable name within this node.
+// All children are always in a group, even if that group is unary.
+func (n *node) groupSiblings(name string) map[string]bool {
+	for _, m := range n.groups {
+		if _, ok := m[name]; ok {
+			return m
+		}
+	}
+	return nil
+}
+
+// newNode creates a new node with a given parent. It does not register it with the parent (as that depends on group
+// placement).
+func newNode(name string, runnable Runnable, sup *supervisor, parent *node) *node {
+	// We use exponential backoff for failed runnables, but at some point we cap at a given backoff time.
+	// To achieve this, we set MaxElapsedTime to 0, which will cap the backoff at MaxInterval.
+	bo := backoff.NewExponentialBackOff()
+	bo.MaxElapsedTime = 0
+
+	n := &node{
+		name:     name,
+		runnable: runnable,
+
+		bo: bo,
+
+		sup:    sup,
+		parent: parent,
+	}
+	n.reset()
+	return n
+}
+
+// resetNode sets up all the dynamic fields of the node, in preparation of starting a runnable. It clears the node's
+// children, groups and resets its context.
+func (n *node) reset() {
+	// Make new context. First, acquire parent context. For the root node that's Background, otherwise it's the
+	// parent's context.
+	var pCtx context.Context
+	if n.parent == nil {
+		pCtx = context.Background()
+	} else {
+		pCtx = n.parent.ctx
+	}
+	// Mark DN and supervisor in context.
+	ctx := context.WithValue(pCtx, dnKey, n.dn())
+	ctx = context.WithValue(ctx, supervisorKey, n.sup)
+	ctx, ctxC := context.WithCancel(ctx)
+	// Set context
+	n.ctx = ctx
+	n.ctxC = ctxC
+
+	// Clear children and state
+	n.state = nodeStateNew
+	n.children = make(map[string]*node)
+	n.groups = nil
+
+	// The node is now ready to be scheduled.
+}
+
+// nodeByDN returns a node by given DN from the supervisor.
+func (s *supervisor) nodeByDN(dn string) *node {
+	parts := strings.Split(dn, ".")
+	if parts[0] != "root" {
+		panic("DN does not start with root.")
+	}
+	parts = parts[1:]
+	cur := s.root
+	for {
+		if len(parts) == 0 {
+			return cur
+		}
+
+		next, ok := cur.children[parts[0]]
+		if !ok {
+			panic(fmt.Errorf("could not find %v (%s) in %s", parts, dn, cur))
+		}
+		cur = next
+		parts = parts[1:]
+	}
+}
+
+// reNodeName validates a node name against constraints.
+var reNodeName = regexp.MustCompile(`[a-z90-9_]{1,64}`)
+
+// runGroup schedules a new group of runnables to run on a node.
+func (n *node) runGroup(runnables map[string]Runnable) error {
+	// Check that the parent node is in the right state.
+	if n.state != nodeStateNew {
+		return fmt.Errorf("cannot run new runnable on non-NEW node")
+	}
+
+	// Check the requested runnable names.
+	for name, _ := range runnables {
+		if !reNodeName.MatchString(name) {
+			return fmt.Errorf("runnable name %q is invalid", name)
+		}
+		if _, ok := n.children[name]; ok {
+			return fmt.Errorf("runnable %q already exists", name)
+		}
+	}
+
+	// Create child nodes.
+	dns := make(map[string]string)
+	group := make(map[string]bool)
+	for name, runnable := range runnables {
+		if g := n.groupSiblings(name); g != nil {
+			return fmt.Errorf("duplicate child name %q", name)
+		}
+		node := newNode(name, runnable, n.sup, n)
+		n.children[name] = node
+
+		dns[name] = node.dn()
+		group[name] = true
+	}
+	// Add group.
+	n.groups = append(n.groups, group)
+
+	// Schedule execution of group members.
+	go func() {
+		for name, _ := range runnables {
+			n.sup.pReq <- &processorRequest{
+				schedule: &processorRequestSchedule{
+					dn: dns[name],
+				},
+			}
+		}
+	}()
+	return nil
+}
+
+// signal sequences state changes by signals received from runnables and updates a node's status accordingly.
+func (n *node) signal(signal SignalType) {
+	switch signal {
+	case SignalHealthy:
+		if n.state != nodeStateNew {
+			panic(fmt.Errorf("node %s signaled healthy", n))
+		}
+		n.state = nodeStateHealthy
+		n.bo.Reset()
+	case SignalDone:
+		if n.state != nodeStateHealthy {
+			panic(fmt.Errorf("node %s signaled done", n))
+		}
+		n.state = nodeStateDone
+		n.bo.Reset()
+	}
+}
diff --git a/metropolis/node/common/supervisor/supervisor_processor.go b/metropolis/node/common/supervisor/supervisor_processor.go
new file mode 100644
index 0000000..965a667
--- /dev/null
+++ b/metropolis/node/common/supervisor/supervisor_processor.go
@@ -0,0 +1,404 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package supervisor
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"runtime/debug"
+	"time"
+)
+
+// The processor maintains runnable goroutines - ie., when requested will start one, and then once it exists it will
+// record the result and act accordingly. It is also responsible for detecting and acting upon supervision subtrees that
+// need to be restarted after death (via a 'GC' process)
+
+// processorRequest is a request for the processor. Only one of the fields can be set.
+type processorRequest struct {
+	schedule    *processorRequestSchedule
+	died        *processorRequestDied
+	waitSettled *processorRequestWaitSettled
+}
+
+// processorRequestSchedule requests that a given node's runnable be started.
+type processorRequestSchedule struct {
+	dn string
+}
+
+// processorRequestDied is a signal from a runnable goroutine that the runnable has died.
+type processorRequestDied struct {
+	dn  string
+	err error
+}
+
+type processorRequestWaitSettled struct {
+	waiter chan struct{}
+}
+
+// processor is the main processing loop.
+func (s *supervisor) processor(ctx context.Context) {
+	s.ilogger.Info("supervisor processor started")
+
+	// Waiters waiting for the GC to be settled.
+	var waiters []chan struct{}
+
+	// The GC will run every millisecond if needed. Any time the processor requests a change in the supervision tree
+	// (ie a death or a new runnable) it will mark the state as dirty and run the GC on the next millisecond cycle.
+	gc := time.NewTicker(1 * time.Millisecond)
+	defer gc.Stop()
+	clean := true
+
+	// How long has the GC been clean. This is used to notify 'settled' waiters.
+	cleanCycles := 0
+
+	markDirty := func() {
+		clean = false
+		cleanCycles = 0
+	}
+
+	for {
+		select {
+		case <-ctx.Done():
+			s.ilogger.Infof("supervisor processor exiting: %v", ctx.Err())
+			s.processKill()
+			s.ilogger.Info("supervisor exited")
+			return
+		case <-gc.C:
+			if !clean {
+				s.processGC()
+			}
+			clean = true
+			cleanCycles += 1
+
+			// This threshold is somewhat arbitrary. It's a balance between test speed and test reliability.
+			if cleanCycles > 50 {
+				for _, w := range waiters {
+					close(w)
+				}
+				waiters = nil
+			}
+		case r := <-s.pReq:
+			switch {
+			case r.schedule != nil:
+				s.processSchedule(r.schedule)
+				markDirty()
+			case r.died != nil:
+				s.processDied(r.died)
+				markDirty()
+			case r.waitSettled != nil:
+				waiters = append(waiters, r.waitSettled.waiter)
+			default:
+				panic(fmt.Errorf("unhandled request %+v", r))
+			}
+		}
+	}
+}
+
+// processKill cancels all nodes in the supervision tree. This is only called right before exiting the processor, so
+// they do not get automatically restarted.
+func (s *supervisor) processKill() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// Gather all context cancel functions.
+	var cancels []func()
+	queue := []*node{s.root}
+	for {
+		if len(queue) == 0 {
+			break
+		}
+
+		cur := queue[0]
+		queue = queue[1:]
+
+		cancels = append(cancels, cur.ctxC)
+		for _, c := range cur.children {
+			queue = append(queue, c)
+		}
+	}
+
+	// Call all context cancels.
+	for _, c := range cancels {
+		c()
+	}
+}
+
+// processSchedule starts a node's runnable in a goroutine and records its output once it's done.
+func (s *supervisor) processSchedule(r *processorRequestSchedule) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	n := s.nodeByDN(r.dn)
+	go func() {
+		if !s.propagatePanic {
+			defer func() {
+				if rec := recover(); rec != nil {
+					s.pReq <- &processorRequest{
+						died: &processorRequestDied{
+							dn:  r.dn,
+							err: fmt.Errorf("panic: %v, stacktrace: %s", rec, string(debug.Stack())),
+						},
+					}
+				}
+			}()
+		}
+
+		res := n.runnable(n.ctx)
+
+		s.pReq <- &processorRequest{
+			died: &processorRequestDied{
+				dn:  r.dn,
+				err: res,
+			},
+		}
+	}()
+}
+
+// processDied records the result from a runnable goroutine, and updates its node state accordingly. If the result
+// is a death and not an expected exit, related nodes (ie. children and group siblings) are canceled accordingly.
+func (s *supervisor) processDied(r *processorRequestDied) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// Okay, so a Runnable has quit. What now?
+	n := s.nodeByDN(r.dn)
+	ctx := n.ctx
+
+	// Simple case: it was marked as Done and quit with no error.
+	if n.state == nodeStateDone && r.err == nil {
+		// Do nothing. This was supposed to happen. Keep the process as DONE.
+		return
+	}
+
+	// Find innermost error to check if it's a context canceled error.
+	perr := r.err
+	for {
+		if inner := errors.Unwrap(perr); inner != nil {
+			perr = inner
+			continue
+		}
+		break
+	}
+
+	// Simple case: the context was canceled and the returned error is the context error.
+	if err := ctx.Err(); err != nil && perr == err {
+		// Mark the node as canceled successfully.
+		n.state = nodeStateCanceled
+		return
+	}
+
+	// Otherwise, the Runnable should not have died or quit. Handle accordingly.
+	err := r.err
+	// A lack of returned error is also an error.
+	if err == nil {
+		err = fmt.Errorf("returned when %s", n.state)
+	} else {
+		err = fmt.Errorf("returned error when %s: %w", n.state, err)
+	}
+
+	s.ilogger.Errorf("Runnable %s died: %v", n.dn(), err)
+	// Mark as dead.
+	n.state = nodeStateDead
+
+	// Cancel that node's context, just in case something still depends on it.
+	n.ctxC()
+
+	// Cancel all siblings.
+	if n.parent != nil {
+		for name, _ := range n.parent.groupSiblings(n.name) {
+			if name == n.name {
+				continue
+			}
+			sibling := n.parent.children[name]
+			// TODO(q3k): does this need to run in a goroutine, ie. can a context cancel block?
+			sibling.ctxC()
+		}
+	}
+}
+
+// processGC runs the GC process. It's not really Garbage Collection, as in, it doesn't remove unnecessary tree nodes -
+// but it does find nodes that need to be restarted, find the subset that can and then schedules them for running.
+// As such, it's less of a Garbage Collector and more of a Necromancer. However, GC is a friendlier name.
+func (s *supervisor) processGC() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// The 'GC' serves is the main business logic of the supervision tree. It traverses a locked tree and tries to
+	// find subtrees that must be restarted (because of a DEAD/CANCELED runnable). It then finds which of these
+	// subtrees that should be restarted can be restarted, ie. which ones are fully recursively DEAD/CANCELED. It
+	// also finds the smallest set of largest subtrees that can be restarted, ie. if there's multiple DEAD runnables
+	// that can be restarted at once, it will do so.
+
+	// Phase one: Find all leaves.
+	// This is a simple DFS that finds all the leaves of the tree, ie all nodes that do not have children nodes.
+	leaves := make(map[string]bool)
+	queue := []*node{s.root}
+	for {
+		if len(queue) == 0 {
+			break
+		}
+		cur := queue[0]
+		queue = queue[1:]
+
+		for _, c := range cur.children {
+			queue = append([]*node{c}, queue...)
+		}
+
+		if len(cur.children) == 0 {
+			leaves[cur.dn()] = true
+		}
+	}
+
+	// Phase two: traverse tree from node to root and make note of all subtrees that can be restarted.
+	// A subtree is restartable/ready iff every node in that subtree is either CANCELED, DEAD or DONE.
+	// Such a 'ready' subtree can be restarted by the supervisor if needed.
+
+	// DNs that we already visited.
+	visited := make(map[string]bool)
+	// DNs whose subtrees are ready to be restarted.
+	// These are all subtrees recursively - ie., root.a.a and root.a will both be marked here.
+	ready := make(map[string]bool)
+
+	// We build a queue of nodes to visit, starting from the leaves.
+	queue = []*node{}
+	for l, _ := range leaves {
+		queue = append(queue, s.nodeByDN(l))
+	}
+
+	for {
+		if len(queue) == 0 {
+			break
+		}
+
+		cur := queue[0]
+		curDn := cur.dn()
+
+		queue = queue[1:]
+
+		// Do we have a decision about our children?
+		allVisited := true
+		for _, c := range cur.children {
+			if !visited[c.dn()] {
+				allVisited = false
+				break
+			}
+		}
+
+		// If no decision about children is available, it means we ended up in this subtree through some shorter path
+		// of a shorter/lower-order leaf. There is a path to a leaf that's longer than the one that caused this node
+		// to be enqueued. Easy solution: just push back the current element and retry later.
+		if !allVisited {
+			// Push back to queue and wait for a decision later.
+			queue = append(queue, cur)
+			continue
+		}
+
+		// All children have been visited and we have an idea about whether they're ready/restartable. All of the node's
+		// children must be restartable in order for this node to be restartable.
+		childrenReady := true
+		for _, c := range cur.children {
+			if !ready[c.dn()] {
+				childrenReady = false
+				break
+			}
+		}
+
+		// In addition to children, the node itself must be restartable (ie. DONE, DEAD or CANCELED).
+		curReady := false
+		switch cur.state {
+		case nodeStateDone:
+			curReady = true
+		case nodeStateCanceled:
+			curReady = true
+		case nodeStateDead:
+			curReady = true
+		}
+
+		// Note down that we have an opinion on this node, and note that opinion down.
+		visited[curDn] = true
+		ready[curDn] = childrenReady && curReady
+
+		// Now we can also enqueue the parent of this node for processing.
+		if cur.parent != nil && !visited[cur.parent.dn()] {
+			queue = append(queue, cur.parent)
+		}
+	}
+
+	// Phase 3: traverse tree from root to find largest subtrees that need to be restarted and are ready to be
+	// restarted.
+
+	// All DNs that need to be restarted by the GC process.
+	want := make(map[string]bool)
+	// All DNs that need to be restarted and can be restarted by the GC process - a subset of 'want' DNs.
+	can := make(map[string]bool)
+	// The set difference between 'want' and 'can' are all nodes that should be restarted but can't yet (ie. because
+	// a child is still in the process of being canceled).
+
+	// DFS from root.
+	queue = []*node{s.root}
+	for {
+		if len(queue) == 0 {
+			break
+		}
+
+		cur := queue[0]
+		queue = queue[1:]
+
+		// If this node is DEAD or CANCELED it should be restarted.
+		if cur.state == nodeStateDead || cur.state == nodeStateCanceled {
+			want[cur.dn()] = true
+		}
+
+		// If it should be restarted and is ready to be restarted...
+		if want[cur.dn()] && ready[cur.dn()] {
+			// And its parent context is valid (ie hasn't been canceled), mark it as restartable.
+			if cur.parent == nil || cur.parent.ctx.Err() == nil {
+				can[cur.dn()] = true
+				continue
+			}
+		}
+
+		// Otherwise, traverse further down the tree to see if something else needs to be done.
+		for _, c := range cur.children {
+			queue = append(queue, c)
+		}
+	}
+
+	// Reinitialize and reschedule all subtrees
+	for dn, _ := range can {
+		n := s.nodeByDN(dn)
+
+		// Only back off when the node unexpectedly died - not when it got canceled.
+		bo := time.Duration(0)
+		if n.state == nodeStateDead {
+			bo = n.bo.NextBackOff()
+		}
+
+		// Prepare node for rescheduling - remove its children, reset its state to new.
+		n.reset()
+		s.ilogger.Infof("rescheduling supervised node %s with backoff %s", dn, bo.String())
+
+		// Reschedule node runnable to run after backoff.
+		go func(n *node, bo time.Duration) {
+			time.Sleep(bo)
+			s.pReq <- &processorRequest{
+				schedule: &processorRequestSchedule{dn: n.dn()},
+			}
+		}(n, bo)
+	}
+}
diff --git a/metropolis/node/common/supervisor/supervisor_support.go b/metropolis/node/common/supervisor/supervisor_support.go
new file mode 100644
index 0000000..d54b35c
--- /dev/null
+++ b/metropolis/node/common/supervisor/supervisor_support.go
@@ -0,0 +1,62 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package supervisor
+
+// Supporting infrastructure to allow running some non-Go payloads under supervision.
+
+import (
+	"context"
+	"net"
+	"os/exec"
+
+	"google.golang.org/grpc"
+)
+
+// GRPCServer creates a Runnable that serves gRPC requests as longs as it's not canceled.
+// If graceful is set to true, the server will be gracefully stopped instead of plain stopped. This means all pending
+// RPCs will finish, but also requires streaming gRPC handlers to check their context liveliness and exit accordingly.
+// If the server code does not support this, `graceful` should be false and the server will be killed violently instead.
+func GRPCServer(srv *grpc.Server, lis net.Listener, graceful bool) Runnable {
+	return func(ctx context.Context) error {
+		Signal(ctx, SignalHealthy)
+		errC := make(chan error)
+		go func() {
+			errC <- srv.Serve(lis)
+		}()
+		select {
+		case <-ctx.Done():
+			if graceful {
+				srv.GracefulStop()
+			} else {
+				srv.Stop()
+			}
+			return ctx.Err()
+		case err := <-errC:
+			return err
+		}
+	}
+}
+
+// RunCommand will create a Runnable that starts a long-running command, whose exit is determined to be a failure.
+func RunCommand(ctx context.Context, cmd *exec.Cmd) error {
+	Signal(ctx, SignalHealthy)
+	cmd.Stdout = RawLogger(ctx)
+	cmd.Stderr = RawLogger(ctx)
+	err := cmd.Run()
+	Logger(ctx).Infof("Command returned: %v", err)
+	return err
+}
diff --git a/metropolis/node/common/supervisor/supervisor_test.go b/metropolis/node/common/supervisor/supervisor_test.go
new file mode 100644
index 0000000..9c7bdb7
--- /dev/null
+++ b/metropolis/node/common/supervisor/supervisor_test.go
@@ -0,0 +1,557 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package supervisor
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+)
+
+func runnableBecomesHealthy(healthy, done chan struct{}) Runnable {
+	return func(ctx context.Context) error {
+		Signal(ctx, SignalHealthy)
+
+		go func() {
+			if healthy != nil {
+				healthy <- struct{}{}
+			}
+		}()
+
+		<-ctx.Done()
+
+		go func() {
+			if done != nil {
+				done <- struct{}{}
+			}
+		}()
+
+		return ctx.Err()
+	}
+}
+
+func runnableSpawnsMore(healthy, done chan struct{}, levels int) Runnable {
+	return func(ctx context.Context) error {
+		if levels > 0 {
+			err := RunGroup(ctx, map[string]Runnable{
+				"a": runnableSpawnsMore(nil, nil, levels-1),
+				"b": runnableSpawnsMore(nil, nil, levels-1),
+			})
+			if err != nil {
+				return err
+			}
+		}
+
+		Signal(ctx, SignalHealthy)
+
+		go func() {
+			if healthy != nil {
+				healthy <- struct{}{}
+			}
+		}()
+
+		<-ctx.Done()
+
+		go func() {
+			if done != nil {
+				done <- struct{}{}
+			}
+		}()
+		return ctx.Err()
+	}
+}
+
+// rc is a Remote Controlled runnable. It is a generic runnable used for testing the supervisor.
+type rc struct {
+	req chan rcRunnableRequest
+}
+
+type rcRunnableRequest struct {
+	cmd    rcRunnableCommand
+	stateC chan rcRunnableState
+}
+
+type rcRunnableCommand int
+
+const (
+	rcRunnableCommandBecomeHealthy rcRunnableCommand = iota
+	rcRunnableCommandBecomeDone
+	rcRunnableCommandDie
+	rcRunnableCommandPanic
+	rcRunnableCommandState
+)
+
+type rcRunnableState int
+
+const (
+	rcRunnableStateNew rcRunnableState = iota
+	rcRunnableStateHealthy
+	rcRunnableStateDone
+)
+
+func (r *rc) becomeHealthy() {
+	r.req <- rcRunnableRequest{cmd: rcRunnableCommandBecomeHealthy}
+}
+
+func (r *rc) becomeDone() {
+	r.req <- rcRunnableRequest{cmd: rcRunnableCommandBecomeDone}
+}
+func (r *rc) die() {
+	r.req <- rcRunnableRequest{cmd: rcRunnableCommandDie}
+}
+
+func (r *rc) panic() {
+	r.req <- rcRunnableRequest{cmd: rcRunnableCommandPanic}
+}
+
+func (r *rc) state() rcRunnableState {
+	c := make(chan rcRunnableState)
+	r.req <- rcRunnableRequest{
+		cmd:    rcRunnableCommandState,
+		stateC: c,
+	}
+	return <-c
+}
+
+func (r *rc) waitState(s rcRunnableState) {
+	// This is poll based. Making it non-poll based would make the RC runnable logic a bit more complex for little gain.
+	for {
+		got := r.state()
+		if got == s {
+			return
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+}
+
+func newRC() *rc {
+	return &rc{
+		req: make(chan rcRunnableRequest),
+	}
+}
+
+// Remote Controlled Runnable
+func (r *rc) runnable() Runnable {
+	return func(ctx context.Context) error {
+		state := rcRunnableStateNew
+
+		for {
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case r := <-r.req:
+				switch r.cmd {
+				case rcRunnableCommandBecomeHealthy:
+					Signal(ctx, SignalHealthy)
+					state = rcRunnableStateHealthy
+				case rcRunnableCommandBecomeDone:
+					Signal(ctx, SignalDone)
+					state = rcRunnableStateDone
+				case rcRunnableCommandDie:
+					return fmt.Errorf("died on request")
+				case rcRunnableCommandPanic:
+					panic("at the disco")
+				case rcRunnableCommandState:
+					r.stateC <- state
+				}
+			}
+		}
+	}
+}
+
+func TestSimple(t *testing.T) {
+	h1 := make(chan struct{})
+	d1 := make(chan struct{})
+	h2 := make(chan struct{})
+	d2 := make(chan struct{})
+
+	ctx, ctxC := context.WithCancel(context.Background())
+	defer ctxC()
+	s := New(ctx, func(ctx context.Context) error {
+		err := RunGroup(ctx, map[string]Runnable{
+			"one": runnableBecomesHealthy(h1, d1),
+			"two": runnableBecomesHealthy(h2, d2),
+		})
+		if err != nil {
+			return err
+		}
+		Signal(ctx, SignalHealthy)
+		Signal(ctx, SignalDone)
+		return nil
+	}, WithPropagatePanic)
+
+	// Expect both to start running.
+	s.waitSettleError(ctx, t)
+	select {
+	case <-h1:
+	default:
+		t.Fatalf("runnable 'one' didn't start")
+	}
+	select {
+	case <-h2:
+	default:
+		t.Fatalf("runnable 'one' didn't start")
+	}
+}
+
+func TestSimpleFailure(t *testing.T) {
+	h1 := make(chan struct{})
+	d1 := make(chan struct{})
+	two := newRC()
+
+	ctx, ctxC := context.WithTimeout(context.Background(), 10*time.Second)
+	defer ctxC()
+	s := New(ctx, func(ctx context.Context) error {
+		err := RunGroup(ctx, map[string]Runnable{
+			"one": runnableBecomesHealthy(h1, d1),
+			"two": two.runnable(),
+		})
+		if err != nil {
+			return err
+		}
+		Signal(ctx, SignalHealthy)
+		Signal(ctx, SignalDone)
+		return nil
+	}, WithPropagatePanic)
+	s.waitSettleError(ctx, t)
+
+	two.becomeHealthy()
+	s.waitSettleError(ctx, t)
+	// Expect one to start running.
+	select {
+	case <-h1:
+	default:
+		t.Fatalf("runnable 'one' didn't start")
+	}
+
+	// Kill off two, one should restart.
+	two.die()
+	s.waitSettleError(ctx, t)
+	select {
+	case <-d1:
+	default:
+		t.Fatalf("runnable 'one' didn't acknowledge cancel")
+	}
+
+	// And one should start running again.
+	s.waitSettleError(ctx, t)
+	select {
+	case <-h1:
+	default:
+		t.Fatalf("runnable 'one' didn't restart")
+	}
+}
+
+func TestDeepFailure(t *testing.T) {
+	h1 := make(chan struct{})
+	d1 := make(chan struct{})
+	two := newRC()
+
+	ctx, ctxC := context.WithTimeout(context.Background(), 10*time.Second)
+	defer ctxC()
+	s := New(ctx, func(ctx context.Context) error {
+		err := RunGroup(ctx, map[string]Runnable{
+			"one": runnableSpawnsMore(h1, d1, 5),
+			"two": two.runnable(),
+		})
+		if err != nil {
+			return err
+		}
+		Signal(ctx, SignalHealthy)
+		Signal(ctx, SignalDone)
+		return nil
+	}, WithPropagatePanic)
+
+	two.becomeHealthy()
+	s.waitSettleError(ctx, t)
+	// Expect one to start running.
+	select {
+	case <-h1:
+	default:
+		t.Fatalf("runnable 'one' didn't start")
+	}
+
+	// Kill off two, one should restart.
+	two.die()
+	s.waitSettleError(ctx, t)
+	select {
+	case <-d1:
+	default:
+		t.Fatalf("runnable 'one' didn't acknowledge cancel")
+	}
+
+	// And one should start running again.
+	s.waitSettleError(ctx, t)
+	select {
+	case <-h1:
+	default:
+		t.Fatalf("runnable 'one' didn't restart")
+	}
+}
+
+func TestPanic(t *testing.T) {
+	h1 := make(chan struct{})
+	d1 := make(chan struct{})
+	two := newRC()
+
+	ctx, ctxC := context.WithCancel(context.Background())
+	defer ctxC()
+	s := New(ctx, func(ctx context.Context) error {
+		err := RunGroup(ctx, map[string]Runnable{
+			"one": runnableBecomesHealthy(h1, d1),
+			"two": two.runnable(),
+		})
+		if err != nil {
+			return err
+		}
+		Signal(ctx, SignalHealthy)
+		Signal(ctx, SignalDone)
+		return nil
+	})
+
+	two.becomeHealthy()
+	s.waitSettleError(ctx, t)
+	// Expect one to start running.
+	select {
+	case <-h1:
+	default:
+		t.Fatalf("runnable 'one' didn't start")
+	}
+
+	// Kill off two, one should restart.
+	two.panic()
+	s.waitSettleError(ctx, t)
+	select {
+	case <-d1:
+	default:
+		t.Fatalf("runnable 'one' didn't acknowledge cancel")
+	}
+
+	// And one should start running again.
+	s.waitSettleError(ctx, t)
+	select {
+	case <-h1:
+	default:
+		t.Fatalf("runnable 'one' didn't restart")
+	}
+}
+
+func TestMultipleLevelFailure(t *testing.T) {
+	ctx, ctxC := context.WithCancel(context.Background())
+	defer ctxC()
+	New(ctx, func(ctx context.Context) error {
+		err := RunGroup(ctx, map[string]Runnable{
+			"one": runnableSpawnsMore(nil, nil, 4),
+			"two": runnableSpawnsMore(nil, nil, 4),
+		})
+		if err != nil {
+			return err
+		}
+		Signal(ctx, SignalHealthy)
+		Signal(ctx, SignalDone)
+		return nil
+	}, WithPropagatePanic)
+}
+
+func TestBackoff(t *testing.T) {
+	one := newRC()
+
+	ctx, ctxC := context.WithTimeout(context.Background(), 20*time.Second)
+	defer ctxC()
+
+	s := New(ctx, func(ctx context.Context) error {
+		if err := Run(ctx, "one", one.runnable()); err != nil {
+			return err
+		}
+		Signal(ctx, SignalHealthy)
+		Signal(ctx, SignalDone)
+		return nil
+	}, WithPropagatePanic)
+
+	one.becomeHealthy()
+	// Die a bunch of times in a row, this brings up the next exponential backoff to over a second.
+	for i := 0; i < 4; i += 1 {
+		one.die()
+		one.waitState(rcRunnableStateNew)
+	}
+	// Measure how long it takes for the runnable to respawn after a number of failures
+	start := time.Now()
+	one.die()
+	one.becomeHealthy()
+	one.waitState(rcRunnableStateHealthy)
+	taken := time.Since(start)
+	if taken < 1*time.Second {
+		t.Errorf("Runnable took %v to restart, wanted at least a second from backoff", taken)
+	}
+
+	s.waitSettleError(ctx, t)
+	// Now that we've become healthy, die again. Becoming healthy resets the backoff.
+	start = time.Now()
+	one.die()
+	one.becomeHealthy()
+	one.waitState(rcRunnableStateHealthy)
+	taken = time.Since(start)
+	if taken > 1*time.Second || taken < 100*time.Millisecond {
+		t.Errorf("Runnable took %v to restart, wanted at least 100ms from backoff and at most 1s from backoff reset", taken)
+	}
+}
+
+// TestResilience throws some curveballs at the supervisor - either programming errors or high load. It then ensures
+// that another runnable is running, and that it restarts on its sibling failure.
+func TestResilience(t *testing.T) {
+	// request/response channel for testing liveness of the 'one' runnable
+	req := make(chan chan struct{})
+
+	// A runnable that responds on the 'req' channel.
+	one := func(ctx context.Context) error {
+		Signal(ctx, SignalHealthy)
+		for {
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case r := <-req:
+				r <- struct{}{}
+			}
+		}
+	}
+	oneSibling := newRC()
+
+	oneTest := func() {
+		timeout := time.NewTicker(1000 * time.Millisecond)
+		ping := make(chan struct{})
+		req <- ping
+		select {
+		case <-ping:
+		case <-timeout.C:
+			t.Fatalf("one ping response timeout")
+		}
+		timeout.Stop()
+	}
+
+	// A nasty runnable that calls Signal with the wrong context (this is a programming error)
+	two := func(ctx context.Context) error {
+		Signal(context.TODO(), SignalHealthy)
+		return nil
+	}
+
+	// A nasty runnable that calls Signal wrong (this is a programming error).
+	three := func(ctx context.Context) error {
+		Signal(ctx, SignalDone)
+		return nil
+	}
+
+	// A nasty runnable that runs in a busy loop (this is a programming error).
+	four := func(ctx context.Context) error {
+		for {
+			time.Sleep(0)
+		}
+	}
+
+	// A nasty runnable that keeps creating more runnables.
+	five := func(ctx context.Context) error {
+		i := 1
+		for {
+			err := Run(ctx, fmt.Sprintf("r%d", i), runnableSpawnsMore(nil, nil, 2))
+			if err != nil {
+				return err
+			}
+
+			time.Sleep(100 * time.Millisecond)
+			i += 1
+		}
+	}
+
+	ctx, ctxC := context.WithCancel(context.Background())
+	defer ctxC()
+	New(ctx, func(ctx context.Context) error {
+		RunGroup(ctx, map[string]Runnable{
+			"one":        one,
+			"oneSibling": oneSibling.runnable(),
+		})
+		rs := map[string]Runnable{
+			"two": two, "three": three, "four": four, "five": five,
+		}
+		for k, v := range rs {
+			if err := Run(ctx, k, v); err != nil {
+				return err
+			}
+		}
+		Signal(ctx, SignalHealthy)
+		Signal(ctx, SignalDone)
+		return nil
+	})
+
+	// Five rounds of letting one run, then restarting it.
+	for i := 0; i < 5; i += 1 {
+		oneSibling.becomeHealthy()
+		oneSibling.waitState(rcRunnableStateHealthy)
+
+		// 'one' should work for at least a second.
+		deadline := time.Now().Add(1 * time.Second)
+		for {
+			if time.Now().After(deadline) {
+				break
+			}
+
+			oneTest()
+		}
+
+		// Killing 'oneSibling' should restart one.
+		oneSibling.panic()
+	}
+	// Make sure 'one' is still okay.
+	oneTest()
+}
+
+func ExampleNew() {
+	// Minimal runnable that is immediately done.
+	childC := make(chan struct{})
+	child := func(ctx context.Context) error {
+		Signal(ctx, SignalHealthy)
+		close(childC)
+		Signal(ctx, SignalDone)
+		return nil
+	}
+
+	// Start a supervision tree with a root runnable.
+	ctx, ctxC := context.WithCancel(context.Background())
+	defer ctxC()
+	New(ctx, func(ctx context.Context) error {
+		err := Run(ctx, "child", child)
+		if err != nil {
+			return fmt.Errorf("could not run 'child': %w", err)
+		}
+		Signal(ctx, SignalHealthy)
+
+		t := time.NewTicker(time.Second)
+		defer t.Stop()
+
+		// Do something in the background, and exit on context cancel.
+		for {
+			select {
+			case <-t.C:
+				fmt.Printf("tick!")
+			case <-ctx.Done():
+				return ctx.Err()
+			}
+		}
+	})
+
+	// root.child will close this channel.
+	<-childC
+}
diff --git a/metropolis/node/common/supervisor/supervisor_testhelpers.go b/metropolis/node/common/supervisor/supervisor_testhelpers.go
new file mode 100644
index 0000000..771e02f
--- /dev/null
+++ b/metropolis/node/common/supervisor/supervisor_testhelpers.go
@@ -0,0 +1,50 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package supervisor
+
+import (
+	"context"
+	"testing"
+)
+
+// waitSettle waits until the supervisor reaches a 'settled' state - ie., one
+// where no actions have been performed for a number of GC cycles.
+// This is used in tests only.
+func (s *supervisor) waitSettle(ctx context.Context) error {
+	waiter := make(chan struct{})
+	s.pReq <- &processorRequest{
+		waitSettled: &processorRequestWaitSettled{
+			waiter: waiter,
+		},
+	}
+
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case <-waiter:
+		return nil
+	}
+}
+
+// waitSettleError wraps waitSettle to fail a test if an error occurs, eg. the
+// context is canceled.
+func (s *supervisor) waitSettleError(ctx context.Context, t *testing.T) {
+	err := s.waitSettle(ctx)
+	if err != nil {
+		t.Fatalf("waitSettle: %v", err)
+	}
+}
diff --git a/metropolis/node/common/sysfs/BUILD.bazel b/metropolis/node/common/sysfs/BUILD.bazel
new file mode 100644
index 0000000..a4c7f18
--- /dev/null
+++ b/metropolis/node/common/sysfs/BUILD.bazel
@@ -0,0 +1,8 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["uevents.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/common/sysfs",
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/node/common/sysfs/uevents.go b/metropolis/node/common/sysfs/uevents.go
new file mode 100644
index 0000000..fed4319
--- /dev/null
+++ b/metropolis/node/common/sysfs/uevents.go
@@ -0,0 +1,50 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sysfs
+
+import (
+	"bufio"
+	"io"
+	"os"
+	"strings"
+)
+
+func ReadUevents(filename string) (map[string]string, error) {
+	f, err := os.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	ueventMap := make(map[string]string)
+	reader := bufio.NewReader(f)
+	for {
+		name, err := reader.ReadString(byte('='))
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			return nil, err
+		}
+		value, err := reader.ReadString(byte('\n'))
+		if err == io.EOF {
+			continue
+		} else if err != nil {
+			return nil, err
+		}
+		ueventMap[strings.Trim(name, "=")] = strings.TrimSpace(value)
+	}
+	return ueventMap, nil
+}