core -> metropolis

Smalltown is now called Metropolis!

This is the first commit in a series of cleanup commits that prepare us
for an open source release. This one just some Bazel packages around to
follow a stricter directory layout.

All of Metropolis now lives in `//metropolis`.

All of Metropolis Node code now lives in `//metropolis/node`.

All of the main /init now lives in `//m/n/core`.

All of the Kubernetes functionality/glue now lives in `//m/n/kubernetes`.

Next steps:
     - hunt down all references to Smalltown and replace them appropriately
     - narrow down visibility rules
     - document new code organization
     - move `//build/toolchain` to `//monogon/build/toolchain`
     - do another cleanup pass between `//golibs` and
       `//monogon/node/{core,common}`.
     - remove `//delta` and `//anubis`

Fixes T799.

Test Plan: Just a very large refactor. CI should help us out here.

Bug: T799

X-Origin-Diff: phab/D667
GitOrigin-RevId: 6029b8d4edc42325d50042596b639e8b122d0ded
diff --git a/metropolis/test/e2e/BUILD.bazel b/metropolis/test/e2e/BUILD.bazel
new file mode 100644
index 0000000..b9bb6f8
--- /dev/null
+++ b/metropolis/test/e2e/BUILD.bazel
@@ -0,0 +1,43 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "kubernetes_helpers.go",
+        "utils.go",
+    ],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/e2e",
+    visibility = ["//metropolis/test:__subpackages__"],
+    deps = [
+        "//metropolis/proto/api:go_default_library",
+        "@io_k8s_api//apps/v1:go_default_library",
+        "@io_k8s_api//core/v1:go_default_library",
+        "@io_k8s_apimachinery//pkg/api/resource:go_default_library",
+        "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
+        "@io_k8s_apimachinery//pkg/util/intstr:go_default_library",
+        "@io_k8s_client_go//kubernetes:go_default_library",
+        "@io_k8s_client_go//tools/clientcmd:go_default_library",
+    ],
+)
+
+go_test(
+    name = "go_default_test",
+    size = "large",
+    srcs = ["main_test.go"],
+    data = [
+        "//metropolis/node:image",
+        "//metropolis/node:swtpm_data",
+        "//third_party/edk2:firmware",
+    ],
+    embed = [":go_default_library"],
+    rundir = ".",
+    deps = [
+        "//metropolis/node:go_default_library",
+        "//metropolis/proto/api:go_default_library",
+        "//metropolis/test/launch:go_default_library",
+        "@io_k8s_api//core/v1:go_default_library",
+        "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
+        "@io_k8s_kubernetes//pkg/api/v1/pod:go_default_library",
+        "@org_golang_google_grpc//:go_default_library",
+    ],
+)
diff --git a/metropolis/test/e2e/k8s_cts/BUILD.bazel b/metropolis/test/e2e/k8s_cts/BUILD.bazel
new file mode 100644
index 0000000..0e43c24
--- /dev/null
+++ b/metropolis/test/e2e/k8s_cts/BUILD.bazel
@@ -0,0 +1,55 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load("@io_bazel_rules_docker//go:image.bzl", "go_image")
+load("@io_bazel_rules_docker//container:container.bzl", "container_image")
+
+go_image(
+    name = "kubectl",
+    binary = "@io_k8s_kubernetes//cmd/kubectl",
+    pure = "on",
+)
+
+container_image(
+    name = "kubectl_in_path",
+    base = ":kubectl",
+    env = {
+        # Don't include FHS paths since they aren't available anyways
+        "PATH": "/app/cmd/kubectl",
+    },
+)
+
+go_image(
+    name = "k8s_cts_image",
+    base = ":kubectl_in_path",
+    binary = "@io_k8s_kubernetes//test/e2e:_go_default_test-pure",
+    pure = "on",
+    visibility = ["//visibility:public"],
+)
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/e2e/k8s_cts",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//metropolis/node:go_default_library",
+        "//metropolis/test/e2e:go_default_library",
+        "//metropolis/test/launch:go_default_library",
+        "@io_k8s_api//core/v1:go_default_library",
+        "@io_k8s_api//rbac/v1:go_default_library",
+        "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
+    ],
+)
+
+go_binary(
+    name = "k8s_cts",
+    data = [
+        "//metropolis/node:image",
+        "//metropolis/node:swtpm_data",
+        "//metropolis/test/nanoswitch:initramfs",
+        "//metropolis/test/ktest:linux-testing",
+        "//third_party/edk2:firmware",
+        "@com_github_bonzini_qboot//:qboot-bin",
+    ],
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/e2e/k8s_cts/main.go b/metropolis/test/e2e/k8s_cts/main.go
new file mode 100644
index 0000000..728a890
--- /dev/null
+++ b/metropolis/test/e2e/k8s_cts/main.go
@@ -0,0 +1,175 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This package launches a Smalltown Cluster with two nodes and spawns in the CTS container. Then it streams its output
+// to the console. When the CTS has finished it exits with the appropriate error code.
+package main
+
+import (
+	"context"
+	"io"
+	"log"
+	"os"
+	"os/signal"
+	"strings"
+	"syscall"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	common "git.monogon.dev/source/nexantic.git/metropolis/node"
+	"git.monogon.dev/source/nexantic.git/metropolis/test/e2e"
+	"git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+// makeCTSPodSpec generates a spec for a standalone pod running the Kubernetes CTS. It also sets the test configuration
+// for the Kubernetes E2E test suite to only run CTS tests and excludes known-broken ones.
+func makeCTSPodSpec(name string, saName string) *corev1.Pod {
+	skipRegexes := []string{
+		// hostNetworking cannot be supported since we run different network stacks for the host and containers
+		"should function for node-pod communication",
+		// gVisor misreports statfs() syscalls: https://github.com/google/gvisor/issues/3339
+		`should support \((non-)?root,`,
+		"volume on tmpfs should have the correct mode",
+		"volume on default medium should have the correct mode",
+		// gVisor doesn't support the full Linux privilege machinery including SUID and NewPrivs
+		// https://github.com/google/gvisor/issues/189#issuecomment-481064000
+		"should run the container as unprivileged when false",
+	}
+	return &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: name,
+			Labels: map[string]string{
+				"name": name,
+			},
+		},
+		Spec: corev1.PodSpec{
+			Containers: []corev1.Container{
+				{
+					Name:  "cts",
+					Image: "bazel/metropolis/test/e2e/k8s_cts:k8s_cts_image",
+					Args: []string{
+						"-cluster-ip-range=10.0.0.0/17",
+						"-dump-systemd-journal=false",
+						"-ginkgo.focus=\\[Conformance\\]",
+						"-ginkgo.skip=" + strings.Join(skipRegexes, "|"),
+						"-test.parallel=8",
+					},
+					ImagePullPolicy: corev1.PullNever,
+				},
+			},
+			Tolerations: []corev1.Toleration{{ // Tolerate all taints, otherwise the CTS likes to self-evict
+				Operator: "Exists",
+			}},
+			PriorityClassName:  "system-cluster-critical", // Don't evict the CTS pod
+			RestartPolicy:      corev1.RestartPolicyNever,
+			ServiceAccountName: saName,
+		},
+	}
+}
+
+func main() {
+	sigs := make(chan os.Signal, 1)
+	signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
+	ctx, cancel := context.WithCancel(context.Background())
+	go func() {
+		<-sigs
+		cancel()
+	}()
+
+	debugClient, portMap, err := launch.LaunchCluster(ctx, launch.ClusterOptions{NumNodes: 2})
+	if err != nil {
+		log.Fatalf("Failed to launch cluster: %v", err)
+	}
+	log.Println("Cluster initialized")
+
+	clientSet, err := e2e.GetKubeClientSet(ctx, debugClient, portMap[common.KubernetesAPIPort])
+	if err != nil {
+		log.Fatalf("Failed to get clientSet: %v", err)
+	}
+	log.Println("Credentials available")
+
+	saName := "cts"
+	ctsSA := &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: saName}}
+	for {
+		if _, err := clientSet.CoreV1().ServiceAccounts("default").Create(ctx, ctsSA, metav1.CreateOptions{}); err != nil {
+			log.Printf("Failed to create ServiceAccount: %v", err)
+			time.Sleep(1 * time.Second)
+			continue
+		}
+		break
+	}
+	ctsRoleBinding := &rbacv1.ClusterRoleBinding{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: saName,
+		},
+		Subjects: []rbacv1.Subject{
+			{
+				Namespace: "default",
+				Name:      saName,
+				Kind:      rbacv1.ServiceAccountKind,
+			},
+		},
+		RoleRef: rbacv1.RoleRef{
+			Kind: "ClusterRole",
+			Name: "cluster-admin",
+		},
+	}
+	podName := "cts"
+	if _, err := clientSet.RbacV1().ClusterRoleBindings().Create(ctx, ctsRoleBinding, metav1.CreateOptions{}); err != nil {
+		log.Fatalf("Failed to create ClusterRoleBinding: %v", err)
+	}
+	for {
+		if _, err := clientSet.CoreV1().Pods("default").Create(ctx, makeCTSPodSpec(podName, saName), metav1.CreateOptions{}); err != nil {
+			log.Printf("Failed to create Pod: %v", err)
+			time.Sleep(1 * time.Second)
+			continue
+		}
+		break
+	}
+	var logs io.ReadCloser
+	go func() {
+		// This loops the whole .Stream()/io.Copy process because the API sometimes returns streams that immediately return EOF
+		for {
+			logs, err = clientSet.CoreV1().Pods("default").GetLogs(podName, &corev1.PodLogOptions{Follow: true}).Stream(ctx)
+			if err == nil {
+				if _, err := io.Copy(os.Stdout, logs); err != nil {
+					log.Printf("Log pump error: %v", err)
+				}
+				logs.Close()
+			} else {
+				log.Printf("Pod logs not ready yet: %v", err)
+			}
+			time.Sleep(1 * time.Second)
+		}
+	}()
+	for {
+		time.Sleep(1 * time.Second)
+		pod, err := clientSet.CoreV1().Pods("default").Get(ctx, podName, metav1.GetOptions{})
+		if err != nil {
+			log.Printf("Failed to get CTS pod: %v", err)
+			continue
+		}
+		if pod.Status.Phase == corev1.PodSucceeded {
+			return
+		}
+		if pod.Status.Phase == corev1.PodFailed {
+			log.Fatalf("CTS failed")
+		}
+	}
+}
diff --git a/metropolis/test/e2e/kubernetes_helpers.go b/metropolis/test/e2e/kubernetes_helpers.go
new file mode 100644
index 0000000..fffbd1b
--- /dev/null
+++ b/metropolis/test/e2e/kubernetes_helpers.go
@@ -0,0 +1,146 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package e2e
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"time"
+
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/clientcmd"
+
+	apb "git.monogon.dev/source/nexantic.git/metropolis/proto/api"
+)
+
+// GetKubeClientSet gets a Kubeconfig from the debug API and creates a K8s ClientSet using it. The identity used has
+// the system:masters group and thus has RBAC access to everything.
+func GetKubeClientSet(ctx context.Context, client apb.NodeDebugServiceClient, port uint16) (kubernetes.Interface, error) {
+	var lastErr = errors.New("context canceled before any operation completed")
+	for {
+		reqT, cancel := context.WithTimeout(ctx, 5*time.Second)
+		defer cancel()
+		res, err := client.GetDebugKubeconfig(reqT, &apb.GetDebugKubeconfigRequest{Id: "debug-user", Groups: []string{"system:masters"}})
+		if err == nil {
+			rawClientConfig, err := clientcmd.NewClientConfigFromBytes([]byte(res.DebugKubeconfig))
+			if err != nil {
+				return nil, err // Invalid Kubeconfigs are immediately fatal
+			}
+
+			clientConfig, err := rawClientConfig.ClientConfig()
+			clientConfig.Host = fmt.Sprintf("localhost:%v", port)
+			clientSet, err := kubernetes.NewForConfig(clientConfig)
+			if err != nil {
+				return nil, err
+			}
+			return clientSet, nil
+		}
+		if err != nil && err == ctx.Err() {
+			return nil, lastErr
+		}
+		lastErr = err
+		select {
+		case <-ctx.Done():
+			return nil, lastErr
+		case <-time.After(1 * time.Second):
+		}
+	}
+}
+
+// makeTestDeploymentSpec generates a Deployment spec for a single pod running NGINX with a readiness probe. This allows
+// verifying that the control plane is capable of scheduling simple pods and that kubelet works, its runtime is set up
+// well enough to run a simple container and the network to the pod can pass readiness probe traffic.
+func makeTestDeploymentSpec(name string) *appsv1.Deployment {
+	return &appsv1.Deployment{
+		ObjectMeta: metav1.ObjectMeta{Name: name},
+		Spec: appsv1.DeploymentSpec{
+			Selector: &metav1.LabelSelector{MatchLabels: map[string]string{
+				"name": name,
+			}},
+			Template: corev1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						"name": name,
+					},
+				},
+				Spec: corev1.PodSpec{
+					Containers: []corev1.Container{
+						{
+							Name: "test",
+							// TODO(phab/T793): Build and preseed our own container images
+							Image: "nginx:alpine",
+							ReadinessProbe: &corev1.Probe{
+								Handler: corev1.Handler{
+									HTTPGet: &corev1.HTTPGetAction{Port: intstr.FromInt(80)},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+// makeTestStatefulSet generates a StatefulSet spec
+func makeTestStatefulSet(name string) *appsv1.StatefulSet {
+	return &appsv1.StatefulSet{
+		ObjectMeta: metav1.ObjectMeta{Name: name},
+		Spec: appsv1.StatefulSetSpec{
+			Selector: &metav1.LabelSelector{MatchLabels: map[string]string{
+				"name": name,
+			}},
+			VolumeClaimTemplates: []corev1.PersistentVolumeClaim{
+				{
+					ObjectMeta: metav1.ObjectMeta{Name: "www"},
+					Spec: corev1.PersistentVolumeClaimSpec{
+						AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce},
+						Resources: corev1.ResourceRequirements{
+							Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: resource.MustParse("50Mi")},
+						},
+					},
+				},
+			},
+			Template: corev1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						"name": name,
+					},
+				},
+				Spec: corev1.PodSpec{
+					Containers: []corev1.Container{
+						{
+							Name:  "test",
+							Image: "nginx:alpine",
+							ReadinessProbe: &corev1.Probe{
+								Handler: corev1.Handler{
+									HTTPGet: &corev1.HTTPGetAction{Port: intstr.FromInt(80)},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+}
diff --git a/metropolis/test/e2e/main_test.go b/metropolis/test/e2e/main_test.go
new file mode 100644
index 0000000..46e862c
--- /dev/null
+++ b/metropolis/test/e2e/main_test.go
@@ -0,0 +1,244 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package e2e
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+	"net"
+	"net/http"
+	_ "net/http"
+	_ "net/http/pprof"
+	"os"
+	"strings"
+	"testing"
+	"time"
+
+	"google.golang.org/grpc"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
+
+	common "git.monogon.dev/source/nexantic.git/metropolis/node"
+	apb "git.monogon.dev/source/nexantic.git/metropolis/proto/api"
+	"git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+const (
+	// Timeout for the global test context.
+	//
+	// Bazel would eventually time out the test after 900s ("large") if, for some reason,
+	// the context cancellation fails to abort it.
+	globalTestTimeout = 600 * time.Second
+
+	// Timeouts for individual end-to-end tests of different sizes.
+	smallTestTimeout = 60 * time.Second
+	largeTestTimeout = 120 * time.Second
+)
+
+// TestE2E is the main E2E test entrypoint for single-node freshly-bootstrapped E2E tests. It starts a full Smalltown node
+// in bootstrap mode and then runs tests against it. The actual tests it performs are located in the RunGroup subtest.
+func TestE2E(t *testing.T) {
+	// Run pprof server for debugging
+	go func() {
+		addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
+		if err != nil {
+			panic(err)
+		}
+
+		l, err := net.ListenTCP("tcp", addr)
+		if err != nil {
+			log.Fatalf("Failed to listen on pprof port: %s", l.Addr())
+		}
+		defer l.Close()
+
+		log.Printf("pprof server listening on %s", l.Addr())
+		log.Printf("pprof server returned an error: %v", http.Serve(l, nil))
+	}()
+
+	// Set a global timeout to make sure this terminates
+	ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
+	portMap, err := launch.ConflictFreePortMap(launch.NodePorts)
+	if err != nil {
+		t.Fatalf("Failed to acquire ports for e2e test: %v", err)
+	}
+
+	procExit := make(chan struct{})
+
+	go func() {
+		if err := launch.Launch(ctx, launch.Options{Ports: portMap, SerialPort: os.Stdout}); err != nil {
+			panic(err)
+		}
+		close(procExit)
+	}()
+	grpcClient, err := portMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure())
+	if err != nil {
+		fmt.Printf("Failed to dial debug service (is it running): %v\n", err)
+	}
+	debugClient := apb.NewNodeDebugServiceClient(grpcClient)
+
+	// This exists to keep the parent around while all the children race
+	// It currently tests both a set of OS-level conditions and Kubernetes Deployments and StatefulSets
+	t.Run("RunGroup", func(t *testing.T) {
+		t.Run("Get Kubernetes Debug Kubeconfig", func(t *testing.T) {
+			t.Parallel()
+			selfCtx, cancel := context.WithTimeout(ctx, largeTestTimeout)
+			defer cancel()
+			clientSet, err := GetKubeClientSet(selfCtx, debugClient, portMap[common.KubernetesAPIPort])
+			if err != nil {
+				t.Fatal(err)
+			}
+			testEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
+				nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+				if err != nil {
+					return err
+				}
+				if len(nodes.Items) < 1 {
+					return errors.New("node not registered")
+				}
+				if len(nodes.Items) > 1 {
+					return errors.New("more than one node registered (but there is only one)")
+				}
+				node := nodes.Items[0]
+				for _, cond := range node.Status.Conditions {
+					if cond.Type != corev1.NodeReady {
+						continue
+					}
+					if cond.Status != corev1.ConditionTrue {
+						return fmt.Errorf("node not ready: %v", cond.Message)
+					}
+				}
+				return nil
+			})
+			testEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
+				_, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
+				return err
+			})
+			testEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
+				res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
+				if err != nil {
+					return err
+				}
+				if len(res.Items) == 0 {
+					return errors.New("pod didn't get created")
+				}
+				pod := res.Items[0]
+				if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+					return nil
+				}
+				events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+				if err != nil || len(events.Items) == 0 {
+					return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+				} else {
+					return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
+				}
+			})
+			testEventual(t, "Simple deployment with runc", ctx, largeTestTimeout, func(ctx context.Context) error {
+				deployment := makeTestDeploymentSpec("test-deploy-2")
+				var runcStr = "runc"
+				deployment.Spec.Template.Spec.RuntimeClassName = &runcStr
+				_, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
+				return err
+			})
+			testEventual(t, "Simple deployment is running on runc", ctx, largeTestTimeout, func(ctx context.Context) error {
+				res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
+				if err != nil {
+					return err
+				}
+				if len(res.Items) == 0 {
+					return errors.New("pod didn't get created")
+				}
+				pod := res.Items[0]
+				if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+					return nil
+				}
+				events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+				if err != nil || len(events.Items) == 0 {
+					return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+				} else {
+					var errorMsg strings.Builder
+					for _, msg := range events.Items {
+						errorMsg.WriteString(" | ")
+						errorMsg.WriteString(msg.Message)
+					}
+					return fmt.Errorf("pod is not ready: %v", errorMsg.String())
+				}
+			})
+			testEventual(t, "Simple StatefulSet with PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
+				_, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-1"), metav1.CreateOptions{})
+				return err
+			})
+			testEventual(t, "Simple StatefulSet with PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
+				res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-1"})
+				if err != nil {
+					return err
+				}
+				if len(res.Items) == 0 {
+					return errors.New("pod didn't get created")
+				}
+				pod := res.Items[0]
+				if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+					return nil
+				}
+				events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+				if err != nil || len(events.Items) == 0 {
+					return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+				} else {
+					return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
+				}
+			})
+			testEventual(t, "Pod with preseeded image", ctx, smallTestTimeout, func(ctx context.Context) error {
+				_, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "preseed-test-1",
+					},
+					Spec: corev1.PodSpec{
+						Containers: []corev1.Container{{
+							Name:            "preseed-test-1",
+							ImagePullPolicy: corev1.PullNever,
+							Image:           "bazel/metropolis/test/e2e/preseedtest:preseedtest",
+						}},
+						RestartPolicy: corev1.RestartPolicyNever,
+					},
+				}, metav1.CreateOptions{})
+				return err
+			})
+			testEventual(t, "Pod with preseeded image is completed", ctx, largeTestTimeout, func(ctx context.Context) error {
+				pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "preseed-test-1", metav1.GetOptions{})
+				if err != nil {
+					return fmt.Errorf("failed to get pod: %w", err)
+				}
+				if pod.Status.Phase == corev1.PodSucceeded {
+					return nil
+				}
+				events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+				if err != nil || len(events.Items) == 0 {
+					return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+				} else {
+					return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
+				}
+			})
+		})
+	})
+
+	// Cancel the main context and wait for our subprocesses to exit
+	// to avoid leaking them and blocking the parent.
+	cancel()
+	<-procExit
+}
diff --git a/metropolis/test/e2e/preseedtest/BUILD.bazel b/metropolis/test/e2e/preseedtest/BUILD.bazel
new file mode 100644
index 0000000..41b32e3
--- /dev/null
+++ b/metropolis/test/e2e/preseedtest/BUILD.bazel
@@ -0,0 +1,16 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("@io_bazel_rules_docker//go:image.bzl", "go_image")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/e2e/preseedtest",
+    visibility = ["//visibility:private"],
+)
+
+go_image(
+    name = "preseedtest",
+    embed = [":go_default_library"],
+    pure = "on",
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/e2e/preseedtest/main.go b/metropolis/test/e2e/preseedtest/main.go
new file mode 100644
index 0000000..ceb3898
--- /dev/null
+++ b/metropolis/test/e2e/preseedtest/main.go
@@ -0,0 +1,23 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import "fmt"
+
+func main() {
+	fmt.Println("Hello world from preseeded image")
+}
diff --git a/metropolis/test/e2e/utils.go b/metropolis/test/e2e/utils.go
new file mode 100644
index 0000000..f888189
--- /dev/null
+++ b/metropolis/test/e2e/utils.go
@@ -0,0 +1,51 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package e2e
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+)
+
+// testEventual creates a new subtest looping the given function until it either doesn't return an error anymore or
+// the timeout is exceeded. The last returned non-context-related error is being used as the test error.
+func testEventual(t *testing.T, name string, ctx context.Context, timeout time.Duration, f func(context.Context) error) {
+	ctx, cancel := context.WithTimeout(ctx, timeout)
+	t.Helper()
+	t.Run(name, func(t *testing.T) {
+		defer cancel()
+		var lastErr = errors.New("test didn't run to completion at least once")
+		t.Parallel()
+		for {
+			err := f(ctx)
+			if err == nil {
+				return
+			}
+			if err == ctx.Err() {
+				t.Fatal(lastErr)
+			}
+			lastErr = err
+			select {
+			case <-ctx.Done():
+				t.Fatal(lastErr)
+			case <-time.After(1 * time.Second):
+			}
+		}
+	})
+}
diff --git a/metropolis/test/ktest/BUILD b/metropolis/test/ktest/BUILD
new file mode 100644
index 0000000..d94831c
--- /dev/null
+++ b/metropolis/test/ktest/BUILD
@@ -0,0 +1,63 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load("//metropolis/node/build/kconfig-patcher:kconfig-patcher.bzl", "kconfig_patch")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/ktest",
+    visibility = ["//visibility:private"],
+    deps = ["//metropolis/test/launch:go_default_library"],
+)
+
+go_binary(
+    name = "ktest",
+    embed = [":go_default_library"],
+    pure = "on",
+    visibility = ["//visibility:public"],
+)
+
+kconfig_patch(
+    name = "testing-config",
+    src = "//third_party/linux:kernel-config",
+    out = "testing.config",
+    override_configs = {
+        # Unlock command line
+        "CONFIG_CMDLINE_OVERRIDE": "n",
+        "CONFIG_CMDLINE_BOOL": "n",
+        # Shave off 1 second from boot time
+        "CONFIG_SERIO_I8042": "",
+        "CONFIG_KEYBOARD_ATKBD": "",
+        "CONFIG_RTC_DRV_CMOS": "",
+        # Shave off an additional 18ms (half of the boot time)
+        "CONFIG_DEBUG_WX": "",
+    },
+)
+
+genrule(
+    name = "linux-testing",
+    srcs = [
+        "@linux//:all",
+        ":testing-config",
+    ],
+    outs = [
+        "linux-testing.elf",
+    ],
+    cmd = """
+    DIR=external/linux
+
+    mkdir $$DIR/.bin
+
+    cp $(location :testing-config) $$DIR/.config
+
+    (cd $$DIR && make -j $$(nproc) vmlinux >/dev/null)
+
+    cp $$DIR/vmlinux $@
+    """,
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "test-script",
+    srcs = ["run_ktest.sh"],
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/ktest/init/BUILD.bazel b/metropolis/test/ktest/init/BUILD.bazel
new file mode 100644
index 0000000..4161146
--- /dev/null
+++ b/metropolis/test/ktest/init/BUILD.bazel
@@ -0,0 +1,16 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/ktest/init",
+    visibility = ["//visibility:private"],
+    deps = ["@org_golang_x_sys//unix:go_default_library"],
+)
+
+go_binary(
+    name = "init",
+    embed = [":go_default_library"],
+    pure = "on",
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/ktest/init/main.go b/metropolis/test/ktest/init/main.go
new file mode 100644
index 0000000..f6049db
--- /dev/null
+++ b/metropolis/test/ktest/init/main.go
@@ -0,0 +1,83 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ktestinit is an init designed to run inside a lightweight VM for running tests in there.
+// It performs basic platform initialization like mounting kernel filesystems and launches the
+// test executable at /tester, passes the exit code back out over the control socket to ktest and
+// then terminates the VM kernel.
+package main
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"os/exec"
+
+	"golang.org/x/sys/unix"
+)
+
+func mountInit() error {
+	for _, el := range []struct {
+		dir   string
+		fs    string
+		flags uintptr
+	}{
+		{"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+		{"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+		{"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
+		{"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
+	} {
+		if err := os.Mkdir(el.dir, 0755); err != nil && !os.IsExist(err) {
+			return fmt.Errorf("could not make %s: %w", el.dir, err)
+		}
+		if err := unix.Mount(el.fs, el.dir, el.fs, el.flags, ""); err != nil {
+			return fmt.Errorf("could not mount %s on %s: %w", el.fs, el.dir, err)
+		}
+	}
+	return nil
+}
+
+func main() {
+	if err := mountInit(); err != nil {
+		panic(err)
+	}
+
+	// First virtual serial is always stdout, second is control
+	ioConn, err := os.OpenFile("/dev/vport1p1", os.O_RDWR, 0)
+	if err != nil {
+		fmt.Printf("Failed to open communication device: %v\n", err)
+		return
+	}
+	cmd := exec.Command("/tester", "-test.v")
+	cmd.Stderr = os.Stderr
+	cmd.Stdout = os.Stdout
+	cmd.Env = append(cmd.Env, "IN_KTEST=true")
+	if err := cmd.Run(); err != nil {
+		var exerr *exec.ExitError
+		if errors.As(err, &exerr) {
+			if _, err := ioConn.Write([]byte{uint8(exerr.ExitCode())}); err != nil {
+				panic(err)
+			}
+		} else if err != nil {
+			fmt.Printf("Failed to execute tests (tests didn't run): %v", err)
+		}
+	} else {
+		ioConn.Write([]byte{0})
+	}
+	ioConn.Close()
+
+	unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
+}
diff --git a/metropolis/test/ktest/ktest.bzl b/metropolis/test/ktest/ktest.bzl
new file mode 100644
index 0000000..fdbff20
--- /dev/null
+++ b/metropolis/test/ktest/ktest.bzl
@@ -0,0 +1,62 @@
+#  Copyright 2020 The Monogon Project Authors.
+#
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""
+Ktest provides a simple macro to run tests inside the normal Smalltown kernel
+"""
+
+def ktest(deps, tester, initramfs_extra, cmdline):
+    native.genrule(
+        name = "test_initramfs",
+        srcs = [
+            "//metropolis/test/ktest/init",
+        ] + deps + [tester],
+        outs = [
+            "initramfs.cpio.lz4",
+        ],
+        testonly = True,
+        cmd = """
+        $(location @linux//:gen_init_cpio) - <<- 'EOF' | lz4 -l > \"$@\" 
+dir /dev 0755 0 0
+nod /dev/console 0600 0 0 c 5 1
+nod /dev/null 0644 0 0 c 1 3
+file /init $(location //metropolis/test/ktest/init) 0755 0 0
+file /tester $(location """ + tester + """) 0755 0 0
+""" + initramfs_extra + """
+EOF
+        """,
+        tools = [
+            "@linux//:gen_init_cpio",
+        ],
+    )
+
+    native.sh_test(
+        name = "ktest",
+        args = [
+            "$(location //metropolis/test/ktest)",
+            "$(location :test_initramfs)",
+            "$(location //metropolis/test/ktest:linux-testing)",
+            cmdline,
+        ],
+        size = "small",
+        srcs = ["//metropolis/test/ktest:test-script"],
+        data = [
+            "//metropolis/test/ktest",
+            ":test_initramfs",
+            "//metropolis/test/ktest:linux-testing",
+            "@com_github_bonzini_qboot//:qboot-bin",
+        ],
+    )
diff --git a/metropolis/test/ktest/main.go b/metropolis/test/ktest/main.go
new file mode 100644
index 0000000..7f750b8
--- /dev/null
+++ b/metropolis/test/ktest/main.go
@@ -0,0 +1,75 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ktest is a test launcher for running tests inside a custom kernel and passes the results
+// back out.
+package main
+
+import (
+	"context"
+	"flag"
+	"io"
+	"log"
+	"os"
+	"time"
+
+	"git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+var (
+	kernelPath = flag.String("kernel-path", "", "Path of the Kernel ELF file")
+	initrdPath = flag.String("initrd-path", "", "Path of the initrd image")
+	cmdline    = flag.String("cmdline", "", "Additional kernel command line options")
+)
+
+func main() {
+	flag.Parse()
+
+	hostFeedbackConn, vmFeedbackConn, err := launch.NewSocketPair()
+	if err != nil {
+		log.Fatalf("Failed to create socket pair: %v", err)
+	}
+
+	exitCodeChan := make(chan uint8, 1)
+
+	go func() {
+		defer hostFeedbackConn.Close()
+
+		returnCode := make([]byte, 1)
+		if _, err := io.ReadFull(hostFeedbackConn, returnCode); err != nil {
+			log.Fatalf("Failed to read socket: %v", err)
+		}
+		exitCodeChan <- returnCode[0]
+	}()
+
+	if err := launch.RunMicroVM(context.Background(), &launch.MicroVMOptions{
+		KernelPath:                  *kernelPath,
+		InitramfsPath:               *initrdPath,
+		Cmdline:                     *cmdline,
+		SerialPort:                  os.Stdout,
+		ExtraChardevs:               []*os.File{vmFeedbackConn},
+		DisableHostNetworkInterface: true,
+	}); err != nil {
+		log.Fatalf("Failed to run ktest VM: %v", err)
+	}
+
+	select {
+	case exitCode := <-exitCodeChan:
+		os.Exit(int(exitCode))
+	case <-time.After(1 * time.Second):
+		log.Fatal("Failed to get an error code back (test runtime probably crashed)")
+	}
+}
diff --git a/metropolis/test/ktest/run_ktest.sh b/metropolis/test/ktest/run_ktest.sh
new file mode 100755
index 0000000..02920a1
--- /dev/null
+++ b/metropolis/test/ktest/run_ktest.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+exec "$1" -initrd-path "$2" -kernel-path "$3" -cmdline "$4"
\ No newline at end of file
diff --git a/metropolis/test/launch/BUILD.bazel b/metropolis/test/launch/BUILD.bazel
new file mode 100644
index 0000000..b6245e1
--- /dev/null
+++ b/metropolis/test/launch/BUILD.bazel
@@ -0,0 +1,17 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["launch.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/launch",
+    visibility = ["//metropolis:__subpackages__"],
+    deps = [
+        "//golibs/common:go_default_library",
+        "//metropolis/node:go_default_library",
+        "//metropolis/proto/api:go_default_library",
+        "@com_github_golang_protobuf//proto:go_default_library",
+        "@com_github_grpc_ecosystem_go_grpc_middleware//retry:go_default_library",
+        "@org_golang_google_grpc//:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
diff --git a/metropolis/test/launch/cli/launch-multi2/BUILD.bazel b/metropolis/test/launch/cli/launch-multi2/BUILD.bazel
new file mode 100644
index 0000000..9f27860
--- /dev/null
+++ b/metropolis/test/launch/cli/launch-multi2/BUILD.bazel
@@ -0,0 +1,29 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/launch/cli/launch-multi2",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//metropolis/node:go_default_library",
+        "//metropolis/proto/api:go_default_library",
+        "//metropolis/test/launch:go_default_library",
+        "@com_github_grpc_ecosystem_go_grpc_middleware//retry:go_default_library",
+        "@org_golang_google_grpc//:go_default_library",
+    ],
+)
+
+go_binary(
+    name = "launch-multi2",
+    data = [
+        "//metropolis/node:image",
+        "//metropolis/node:swtpm_data",
+        "//metropolis/test/nanoswitch:initramfs",
+        "//metropolis/test/ktest:linux-testing",
+        "//third_party/edk2:firmware",
+        "@com_github_bonzini_qboot//:qboot-bin",
+    ],
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/launch/cli/launch-multi2/main.go b/metropolis/test/launch/cli/launch-multi2/main.go
new file mode 100644
index 0000000..265d6a0
--- /dev/null
+++ b/metropolis/test/launch/cli/launch-multi2/main.go
@@ -0,0 +1,102 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"context"
+	"log"
+	"os"
+	"os/signal"
+	"syscall"
+	"time"
+
+	grpcretry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
+	"google.golang.org/grpc"
+
+	common "git.monogon.dev/source/nexantic.git/metropolis/node"
+	apb "git.monogon.dev/source/nexantic.git/metropolis/proto/api"
+	"git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+func main() {
+	sigs := make(chan os.Signal, 1)
+	signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
+	ctx, cancel := context.WithCancel(context.Background())
+	go func() {
+		<-sigs
+		cancel()
+	}()
+	sw0, vm0, err := launch.NewSocketPair()
+	if err != nil {
+		log.Fatalf("Failed to create network pipe: %v\n", err)
+	}
+	sw1, vm1, err := launch.NewSocketPair()
+	if err != nil {
+		log.Fatalf("Failed to create network pipe: %v\n", err)
+	}
+
+	go func() {
+		if err := launch.Launch(ctx, launch.Options{ConnectToSocket: vm0, SerialPort: os.Stdout}); err != nil {
+			log.Fatalf("Failed to launch vm0: %v", err)
+		}
+	}()
+	nanoswitchPortMap := make(launch.PortMap)
+	identityPorts := []uint16{
+		common.ExternalServicePort,
+		common.DebugServicePort,
+		common.KubernetesAPIPort,
+	}
+	for _, port := range identityPorts {
+		nanoswitchPortMap[port] = port
+	}
+	go func() {
+		opts := []grpcretry.CallOption{
+			grpcretry.WithBackoff(grpcretry.BackoffExponential(100 * time.Millisecond)),
+		}
+		conn, err := nanoswitchPortMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure(),
+			grpc.WithUnaryInterceptor(grpcretry.UnaryClientInterceptor(opts...)))
+		if err != nil {
+			panic(err)
+		}
+		defer conn.Close()
+		debug := apb.NewNodeDebugServiceClient(conn)
+		res, err := debug.GetGoldenTicket(ctx, &apb.GetGoldenTicketRequest{
+			// HACK: this is assigned by DHCP, and we assume that everything goes well.
+			ExternalIp: "10.1.0.3",
+		}, grpcretry.WithMax(10))
+		if err != nil {
+			log.Fatalf("Failed to get golden ticket: %v", err)
+		}
+
+		ec := &apb.EnrolmentConfig{
+			GoldenTicket: res.Ticket,
+		}
+
+		if err := launch.Launch(ctx, launch.Options{ConnectToSocket: vm1, EnrolmentConfig: ec, SerialPort: os.Stdout}); err != nil {
+			log.Fatalf("Failed to launch vm1: %v", err)
+		}
+	}()
+	if err := launch.RunMicroVM(ctx, &launch.MicroVMOptions{
+		SerialPort:             os.Stdout,
+		KernelPath:             "metropolis/test/ktest/linux-testing.elf",
+		InitramfsPath:          "metropolis/test/nanoswitch/initramfs.lz4",
+		ExtraNetworkInterfaces: []*os.File{sw0, sw1},
+		PortMap:                nanoswitchPortMap,
+	}); err != nil {
+		log.Fatalf("Failed to launch nanoswitch: %v", err)
+	}
+}
diff --git a/metropolis/test/launch/cli/launch/BUILD.bazel b/metropolis/test/launch/cli/launch/BUILD.bazel
new file mode 100644
index 0000000..6b1461d
--- /dev/null
+++ b/metropolis/test/launch/cli/launch/BUILD.bazel
@@ -0,0 +1,20 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/launch/cli/launch",
+    visibility = ["//visibility:private"],
+    deps = ["//metropolis/test/launch:go_default_library"],
+)
+
+go_binary(
+    name = "launch",
+    data = [
+        "//metropolis/node:image",
+        "//metropolis/node:swtpm_data",
+        "//third_party/edk2:firmware",
+    ],
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/launch/cli/launch/main.go b/metropolis/test/launch/cli/launch/main.go
new file mode 100644
index 0000000..852c8e1
--- /dev/null
+++ b/metropolis/test/launch/cli/launch/main.go
@@ -0,0 +1,43 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"context"
+	"log"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+func main() {
+	sigs := make(chan os.Signal, 1)
+	signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
+	ctx, cancel := context.WithCancel(context.Background())
+	go func() {
+		<-sigs
+		cancel()
+	}()
+	if err := launch.Launch(ctx, launch.Options{Ports: launch.IdentityPortMap(launch.NodePorts), SerialPort: os.Stdout}); err != nil {
+		if err == ctx.Err() {
+			return
+		}
+		log.Fatalf("Failed to execute: %v\n", err)
+	}
+}
diff --git a/metropolis/test/launch/launch.go b/metropolis/test/launch/launch.go
new file mode 100644
index 0000000..2d495e0
--- /dev/null
+++ b/metropolis/test/launch/launch.go
@@ -0,0 +1,557 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package launch
+
+import (
+	"bytes"
+	"context"
+	"crypto/rand"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"net"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"syscall"
+	"time"
+
+	"github.com/golang/protobuf/proto"
+	grpcretry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
+	"golang.org/x/sys/unix"
+	"google.golang.org/grpc"
+
+	freeport "git.monogon.dev/source/nexantic.git/golibs/common"
+	common "git.monogon.dev/source/nexantic.git/metropolis/node"
+	apb "git.monogon.dev/source/nexantic.git/metropolis/proto/api"
+)
+
+type qemuValue map[string][]string
+
+// toOption encodes structured data into a QEMU option.
+// Example: "test", {"key1": {"val1"}, "key2": {"val2", "val3"}} returns "test,key1=val1,key2=val2,key2=val3"
+func (value qemuValue) toOption(name string) string {
+	var optionValues []string
+	if name != "" {
+		optionValues = append(optionValues, name)
+	}
+	for name, values := range value {
+		if len(values) == 0 {
+			optionValues = append(optionValues, name)
+		}
+		for _, val := range values {
+			optionValues = append(optionValues, fmt.Sprintf("%v=%v", name, val))
+		}
+	}
+	return strings.Join(optionValues, ",")
+}
+
+func copyFile(src, dst string) error {
+	in, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+
+	out, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+
+	_, err = io.Copy(out, in)
+	if err != nil {
+		return err
+	}
+	return out.Close()
+}
+
+// PortMap represents where VM ports are mapped to on the host. It maps from the VM port number to the host port number.
+type PortMap map[uint16]uint16
+
+// toQemuForwards generates QEMU hostfwd values (https://qemu.weilnetz.de/doc/qemu-doc.html#:~:text=hostfwd=) for all
+// mapped ports.
+func (p PortMap) toQemuForwards() []string {
+	var hostfwdOptions []string
+	for vmPort, hostPort := range p {
+		hostfwdOptions = append(hostfwdOptions, fmt.Sprintf("tcp::%v-:%v", hostPort, vmPort))
+	}
+	return hostfwdOptions
+}
+
+// DialGRPC creates a gRPC client for a VM port that's forwarded/mapped to the host. The given port is automatically
+// resolved to the host-mapped port.
+func (p PortMap) DialGRPC(port uint16, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
+	mappedPort, ok := p[port]
+	if !ok {
+		return nil, fmt.Errorf("cannot dial port: port %v is not mapped/forwarded", port)
+	}
+	grpcClient, err := grpc.Dial(fmt.Sprintf("localhost:%v", mappedPort), opts...)
+	if err != nil {
+		return nil, fmt.Errorf("failed to dial port %v: %w", port, err)
+	}
+	return grpcClient, nil
+}
+
+// Options contains all options that can be passed to Launch()
+type Options struct {
+	// Ports contains the port mapping where to expose the internal ports of the VM to the host. See IdentityPortMap()
+	// and ConflictFreePortMap(). Ignored when ConnectToSocket is set.
+	Ports PortMap
+
+	// If set to true, reboots are honored. Otherwise all reboots exit the Launch() command. Smalltown generally restarts
+	// on almost all errors, so unless you want to test reboot behavior this should be false.
+	AllowReboot bool
+
+	// By default the Smalltown VM is connected to the Host via SLIRP. If ConnectToSocket is set, it is instead connected
+	// to the given file descriptor/socket. If this is set, all port maps from the Ports option are ignored.
+	// Intended for networking this instance together with others for running  more complex network configurations.
+	ConnectToSocket *os.File
+
+	// SerialPort is a File(descriptor) over which you can communicate with the serial port of the machine
+	// It can be set to an existing file descriptor (like os.Stdout/os.Stderr) or you can use NewSocketPair() to get one
+	// end to talk to from Go.
+	SerialPort *os.File
+
+	// EnrolmentConfig is passed into the VM and subsequently used for bootstrapping if no enrolment config is built-in
+	EnrolmentConfig *apb.EnrolmentConfig
+}
+
+// NodePorts is the list of ports a fully operational Smalltown node listens on
+var NodePorts = []uint16{common.ConsensusPort, common.NodeServicePort, common.MasterServicePort,
+	common.ExternalServicePort, common.DebugServicePort, common.KubernetesAPIPort, common.DebuggerPort}
+
+// IdentityPortMap returns a port map where each given port is mapped onto itself on the host. This is mainly useful
+// for development against Smalltown. The dbg command requires this mapping.
+func IdentityPortMap(ports []uint16) PortMap {
+	portMap := make(PortMap)
+	for _, port := range ports {
+		portMap[port] = port
+	}
+	return portMap
+}
+
+// ConflictFreePortMap returns a port map where each given port is mapped onto a random free port on the host. This is
+// intended for automated testing where multiple instances of Smalltown might be running. Please call this function for
+// each Launch command separately and as close to it as possible since it cannot guarantee that the ports will remain
+// free.
+func ConflictFreePortMap(ports []uint16) (PortMap, error) {
+	portMap := make(PortMap)
+	for _, port := range ports {
+		mappedPort, listenCloser, err := freeport.AllocateTCPPort()
+		if err != nil {
+			return portMap, fmt.Errorf("failed to get free host port: %w", err)
+		}
+		// Defer closing of the listening port until the function is done and all ports are allocated
+		defer listenCloser.Close()
+		portMap[port] = mappedPort
+	}
+	return portMap, nil
+}
+
+// Gets a random EUI-48 Ethernet MAC address
+func generateRandomEthernetMAC() (*net.HardwareAddr, error) {
+	macBuf := make([]byte, 6)
+	_, err := rand.Read(macBuf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read randomness for MAC: %v", err)
+	}
+
+	// Set U/L bit and clear I/G bit (locally administered individual MAC)
+	// Ref IEEE 802-2014 Section 8.2.2
+	macBuf[0] = (macBuf[0] | 2) & 0xfe
+	mac := net.HardwareAddr(macBuf)
+	return &mac, nil
+}
+
+// Launch launches a Smalltown instance with the given options. The instance runs mostly paravirtualized but with some
+// emulated hardware similar to how a cloud provider might set up its VMs. The disk is fully writable but is run
+// in snapshot mode meaning that changes are not kept beyond a single invocation.
+func Launch(ctx context.Context, options Options) error {
+	// Pin temp directory to /tmp until we can use abstract socket namespace in QEMU (next release after 5.0,
+	// https://github.com/qemu/qemu/commit/776b97d3605ed0fc94443048fdf988c7725e38a9). swtpm accepts already-open FDs
+	// so we can pass in an abstract socket namespace FD that we open and pass the name of it to QEMU. Not pinning this
+	// crashes both swtpm and qemu because we run into UNIX socket length limitations (for legacy reasons 108 chars).
+	tempDir, err := ioutil.TempDir("/tmp", "launch*")
+	if err != nil {
+		return fmt.Errorf("failed to create temporary directory: %w", err)
+	}
+	defer os.RemoveAll(tempDir)
+
+	// Copy TPM state into a temporary directory since it's being modified by the emulator
+	tpmTargetDir := filepath.Join(tempDir, "tpm")
+	tpmSrcDir := "metropolis/node/tpm"
+	if err := os.Mkdir(tpmTargetDir, 0644); err != nil {
+		return fmt.Errorf("failed to create TPM state directory: %w", err)
+	}
+	tpmFiles, err := ioutil.ReadDir(tpmSrcDir)
+	if err != nil {
+		return fmt.Errorf("failed to read TPM directory: %w", err)
+	}
+	for _, file := range tpmFiles {
+		name := file.Name()
+		if err := copyFile(filepath.Join(tpmSrcDir, name), filepath.Join(tpmTargetDir, name)); err != nil {
+			return fmt.Errorf("failed to copy TPM directory: %w", err)
+		}
+	}
+
+	var qemuNetType string
+	var qemuNetConfig qemuValue
+	if options.ConnectToSocket != nil {
+		qemuNetType = "socket"
+		qemuNetConfig = qemuValue{
+			"id": {"net0"},
+			"fd": {"3"},
+		}
+	} else {
+		qemuNetType = "user"
+		qemuNetConfig = qemuValue{
+			"id":        {"net0"},
+			"net":       {"10.42.0.0/24"},
+			"dhcpstart": {"10.42.0.10"},
+			"hostfwd":   options.Ports.toQemuForwards(),
+		}
+	}
+
+	tpmSocketPath := filepath.Join(tempDir, "tpm-socket")
+
+	mac, err := generateRandomEthernetMAC()
+	if err != nil {
+		return err
+	}
+
+	qemuArgs := []string{"-machine", "q35", "-accel", "kvm", "-nographic", "-nodefaults", "-m", "4096",
+		"-cpu", "host", "-smp", "sockets=1,cpus=1,cores=2,threads=2,maxcpus=4",
+		"-drive", "if=pflash,format=raw,readonly,file=external/edk2/OVMF_CODE.fd",
+		"-drive", "if=pflash,format=raw,snapshot=on,file=external/edk2/OVMF_VARS.fd",
+		"-drive", "if=virtio,format=raw,snapshot=on,cache=unsafe,file=metropolis/node/smalltown.img",
+		"-netdev", qemuNetConfig.toOption(qemuNetType),
+		"-device", "virtio-net-pci,netdev=net0,mac=" + mac.String(),
+		"-chardev", "socket,id=chrtpm,path=" + tpmSocketPath,
+		"-tpmdev", "emulator,id=tpm0,chardev=chrtpm",
+		"-device", "tpm-tis,tpmdev=tpm0",
+		"-device", "virtio-rng-pci",
+		"-serial", "stdio"}
+
+	if !options.AllowReboot {
+		qemuArgs = append(qemuArgs, "-no-reboot")
+	}
+
+	if options.EnrolmentConfig != nil {
+		enrolmentConfigPath := filepath.Join(tempDir, "enrolment.pb")
+		enrolmentConfigRaw, err := proto.Marshal(options.EnrolmentConfig)
+		if err != nil {
+			return fmt.Errorf("failed to encode enrolment config: %w", err)
+		}
+		if err := ioutil.WriteFile(enrolmentConfigPath, enrolmentConfigRaw, 0644); err != nil {
+			return fmt.Errorf("failed to write enrolment config: %w", err)
+		}
+		qemuArgs = append(qemuArgs, "-fw_cfg", "name=com.nexantic.smalltown/enrolment.pb,file="+enrolmentConfigPath)
+	}
+
+	// Start TPM emulator as a subprocess
+	tpmCtx, tpmCancel := context.WithCancel(ctx)
+	defer tpmCancel()
+
+	tpmEmuCmd := exec.CommandContext(tpmCtx, "swtpm", "socket", "--tpm2", "--tpmstate", "dir="+tpmTargetDir, "--ctrl", "type=unixio,path="+tpmSocketPath)
+	tpmEmuCmd.Stderr = os.Stderr
+	tpmEmuCmd.Stdout = os.Stdout
+
+	err = tpmEmuCmd.Start()
+	if err != nil {
+		return fmt.Errorf("failed to start TPM emulator: %w", err)
+	}
+
+	// Start the main qemu binary
+	systemCmd := exec.CommandContext(ctx, "qemu-system-x86_64", qemuArgs...)
+	if options.ConnectToSocket != nil {
+		systemCmd.ExtraFiles = []*os.File{options.ConnectToSocket}
+	}
+
+	var stdErrBuf bytes.Buffer
+	systemCmd.Stderr = &stdErrBuf
+	systemCmd.Stdout = options.SerialPort
+
+	err = systemCmd.Run()
+
+	// Stop TPM emulator and wait for it to exit to properly reap the child process
+	tpmCancel()
+	log.Print("Waiting for TPM emulator to exit")
+	// Wait returns a SIGKILL error because we just cancelled its context.
+	// We still need to call it to avoid creating zombies.
+	_ = tpmEmuCmd.Wait()
+
+	var exerr *exec.ExitError
+	if err != nil && errors.As(err, &exerr) {
+		status := exerr.ProcessState.Sys().(syscall.WaitStatus)
+		if status.Signaled() && status.Signal() == syscall.SIGKILL {
+			// Process was killed externally (most likely by our context being canceled).
+			// This is a normal exit for us, so return nil
+			return nil
+		}
+		exerr.Stderr = stdErrBuf.Bytes()
+		newErr := QEMUError(*exerr)
+		return &newErr
+	}
+	return err
+}
+
+// NewSocketPair creates a new socket pair. By connecting both ends to different instances you can connect them
+// with a virtual "network cable". The ends can be passed into the ConnectToSocket option.
+func NewSocketPair() (*os.File, *os.File, error) {
+	fds, err := unix.Socketpair(unix.AF_UNIX, syscall.SOCK_STREAM, 0)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to call socketpair: %w", err)
+	}
+
+	fd1 := os.NewFile(uintptr(fds[0]), "network0")
+	fd2 := os.NewFile(uintptr(fds[1]), "network1")
+	return fd1, fd2, nil
+}
+
+// HostInterfaceMAC is the MAC address the host SLIRP network interface has if it is not disabled (see
+// DisableHostNetworkInterface in MicroVMOptions)
+var HostInterfaceMAC = net.HardwareAddr{0x02, 0x72, 0x82, 0xbf, 0xc3, 0x56}
+
+// MicroVMOptions contains all options to start a MicroVM
+type MicroVMOptions struct {
+	// Path to the ELF kernel binary
+	KernelPath string
+
+	// Path to the Initramfs
+	InitramfsPath string
+
+	// Cmdline contains additional kernel commandline options
+	Cmdline string
+
+	// SerialPort is a File(descriptor) over which you can communicate with the serial port of the machine
+	// It can be set to an existing file descriptor (like os.Stdout/os.Stderr) or you can use NewSocketPair() to get one
+	// end to talk to from Go.
+	SerialPort *os.File
+
+	// ExtraChardevs can be used similar to SerialPort, but can contain an arbitrary number of additional serial ports
+	ExtraChardevs []*os.File
+
+	// ExtraNetworkInterfaces can contain an arbitrary number of file descriptors which are mapped into the VM as virtio
+	// network interfaces. The first interface is always a SLIRP-backed interface for communicating with the host.
+	ExtraNetworkInterfaces []*os.File
+
+	// PortMap contains ports that are mapped to the host through the built-in SLIRP network interface.
+	PortMap PortMap
+
+	// DisableHostNetworkInterface disables the SLIRP-backed host network interface that is normally the first network
+	// interface. If this is set PortMap is ignored. Mostly useful for speeding up QEMU's startup time for tests.
+	DisableHostNetworkInterface bool
+}
+
+// RunMicroVM launches a tiny VM mostly intended for testing. Very quick to boot (<40ms).
+func RunMicroVM(ctx context.Context, opts *MicroVMOptions) error {
+	// Generate options for all the file descriptors we'll be passing as virtio "serial ports"
+	var extraArgs []string
+	for idx, _ := range opts.ExtraChardevs {
+		idxStr := strconv.Itoa(idx)
+		id := "extra" + idxStr
+		// That this works is pretty much a hack, but upstream QEMU doesn't have a bidirectional chardev backend not
+		// based around files/sockets on the disk which are a giant pain to work with.
+		// We're using QEMU's fdset functionality to make FDs available as pseudo-files and then "ab"using the pipe
+		// backend's fallback functionality to get a single bidirectional chardev backend backed by a passed-down
+		// RDWR fd.
+		// Ref https://lists.gnu.org/archive/html/qemu-devel/2015-12/msg01256.html
+		addFdConf := qemuValue{
+			"set": {idxStr},
+			"fd":  {strconv.Itoa(idx + 3)},
+		}
+		chardevConf := qemuValue{
+			"id":   {id},
+			"path": {"/dev/fdset/" + idxStr},
+		}
+		deviceConf := qemuValue{
+			"chardev": {id},
+		}
+		extraArgs = append(extraArgs, "-add-fd", addFdConf.toOption(""),
+			"-chardev", chardevConf.toOption("pipe"), "-device", deviceConf.toOption("virtserialport"))
+	}
+
+	for idx, _ := range opts.ExtraNetworkInterfaces {
+		id := fmt.Sprintf("net%v", idx)
+		netdevConf := qemuValue{
+			"id": {id},
+			"fd": {strconv.Itoa(idx + 3 + len(opts.ExtraChardevs))},
+		}
+		extraArgs = append(extraArgs, "-netdev", netdevConf.toOption("socket"), "-device", "virtio-net-device,netdev="+id)
+	}
+
+	// This sets up a minimum viable environment for our Linux kernel.
+	// It clears all standard QEMU configuration and sets up a MicroVM machine
+	// (https://github.com/qemu/qemu/blob/master/docs/microvm.rst) with all legacy emulation turned off. This means
+	// the only "hardware" the Linux kernel inside can communicate with is a single virtio-mmio region. Over that MMIO
+	// interface we run a paravirtualized RNG (since the kernel in there has nothing to gather that from and it
+	// delays booting), a single paravirtualized console and an arbitrary number of extra serial ports for talking to
+	// various things that might run inside. The kernel, initramfs and command line are mapped into VM memory at boot
+	// time and not loaded from any sort of disk. Booting and shutting off one of these VMs takes <100ms.
+	baseArgs := []string{"-nodefaults", "-no-user-config", "-nographic", "-no-reboot",
+		"-accel", "kvm", "-cpu", "host",
+		// Needed until QEMU updates their bundled qboot version (needs https://github.com/bonzini/qboot/pull/28)
+		"-bios", "external/com_github_bonzini_qboot/bios.bin",
+		"-M", "microvm,x-option-roms=off,pic=off,pit=off,rtc=off,isa-serial=off",
+		"-kernel", opts.KernelPath,
+		// We force using a triple-fault reboot strategy since otherwise the kernel first tries others (like ACPI) which
+		// are not available in this very restricted environment. Similarly we need to override the boot console since
+		// there's nothing on the ISA bus that the kernel could talk to. We also force quiet for performance reasons.
+		"-append", "reboot=t console=hvc0 quiet " + opts.Cmdline,
+		"-initrd", opts.InitramfsPath,
+		"-device", "virtio-rng-device,max-bytes=1024,period=1000",
+		"-device", "virtio-serial-device,max_ports=16",
+		"-chardev", "stdio,id=con0", "-device", "virtconsole,chardev=con0",
+	}
+
+	if !opts.DisableHostNetworkInterface {
+		qemuNetType := "user"
+		qemuNetConfig := qemuValue{
+			"id":        {"usernet0"},
+			"net":       {"10.42.0.0/24"},
+			"dhcpstart": {"10.42.0.10"},
+		}
+		if opts.PortMap != nil {
+			qemuNetConfig["hostfwd"] = opts.PortMap.toQemuForwards()
+		}
+
+		baseArgs = append(baseArgs, "-netdev", qemuNetConfig.toOption(qemuNetType),
+			"-device", "virtio-net-device,netdev=usernet0,mac="+HostInterfaceMAC.String())
+	}
+
+	var stdErrBuf bytes.Buffer
+	cmd := exec.CommandContext(ctx, "qemu-system-x86_64", append(baseArgs, extraArgs...)...)
+	cmd.Stdout = opts.SerialPort
+	cmd.Stderr = &stdErrBuf
+
+	cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraChardevs...)
+	cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraNetworkInterfaces...)
+
+	err := cmd.Run()
+	var exerr *exec.ExitError
+	if err != nil && errors.As(err, &exerr) {
+		exerr.Stderr = stdErrBuf.Bytes()
+		newErr := QEMUError(*exerr)
+		return &newErr
+	}
+	return err
+}
+
+// QEMUError is a special type of ExitError used when QEMU fails. In addition to normal ExitError features it
+// prints stderr for debugging.
+type QEMUError exec.ExitError
+
+func (e *QEMUError) Error() string {
+	return fmt.Sprintf("%v: %v", e.String(), string(e.Stderr))
+}
+
+// NanoswitchPorts contains all ports forwarded by Nanoswitch to the first VM
+var NanoswitchPorts = []uint16{
+	common.ExternalServicePort,
+	common.DebugServicePort,
+	common.KubernetesAPIPort,
+}
+
+// ClusterOptions contains all options for launching a Smalltown cluster
+type ClusterOptions struct {
+	// The number of nodes this cluster should be started with initially
+	NumNodes int
+}
+
+// LaunchCluster launches a cluster of Smalltown VMs together with a Nanoswitch instance to network them all together.
+func LaunchCluster(ctx context.Context, opts ClusterOptions) (apb.NodeDebugServiceClient, PortMap, error) {
+	var switchPorts []*os.File
+	var vmPorts []*os.File
+	for i := 0; i < opts.NumNodes; i++ {
+		switchPort, vmPort, err := NewSocketPair()
+		if err != nil {
+			return nil, nil, fmt.Errorf("failed to get socketpair: %w", err)
+		}
+		switchPorts = append(switchPorts, switchPort)
+		vmPorts = append(vmPorts, vmPort)
+	}
+
+	if opts.NumNodes == 0 {
+		return nil, nil, errors.New("refusing to start cluster with zero nodes")
+	}
+
+	if opts.NumNodes > 2 {
+		return nil, nil, errors.New("launching more than 2 nodes is unsupported pending replacement of golden tickets")
+	}
+
+	go func() {
+		if err := Launch(ctx, Options{ConnectToSocket: vmPorts[0]}); err != nil {
+			// Launch() only terminates when QEMU has terminated. At that point our function probably doesn't run anymore
+			// so we have no way of communicating the error back up, so let's just log it. Also a failure in launching
+			// VMs should be very visible by the unavailability of the clients we return.
+			log.Printf("Failed to launch vm0: %v", err)
+		}
+	}()
+
+	portMap, err := ConflictFreePortMap(NanoswitchPorts)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to allocate ephemeral ports: %w", err)
+	}
+
+	go func() {
+		if err := RunMicroVM(ctx, &MicroVMOptions{
+			KernelPath:             "metropolis/test/ktest/linux-testing.elf",
+			InitramfsPath:          "metropolis/test/nanoswitch/initramfs.lz4",
+			ExtraNetworkInterfaces: switchPorts,
+			PortMap:                portMap,
+		}); err != nil {
+			log.Printf("Failed to launch nanoswitch: %v", err)
+		}
+	}()
+	copts := []grpcretry.CallOption{
+		grpcretry.WithBackoff(grpcretry.BackoffExponential(100 * time.Millisecond)),
+	}
+	conn, err := portMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure(),
+		grpc.WithUnaryInterceptor(grpcretry.UnaryClientInterceptor(copts...)))
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to dial debug service: %w", err)
+	}
+	defer conn.Close()
+	debug := apb.NewNodeDebugServiceClient(conn)
+
+	if opts.NumNodes == 2 {
+		res, err := debug.GetGoldenTicket(ctx, &apb.GetGoldenTicketRequest{
+			// HACK: this is assigned by DHCP, and we assume that everything goes well.
+			ExternalIp: "10.1.0.3",
+		}, grpcretry.WithMax(10))
+		if err != nil {
+			return nil, nil, fmt.Errorf("failed to get golden ticket: %w", err)
+		}
+
+		ec := &apb.EnrolmentConfig{
+			GoldenTicket: res.Ticket,
+		}
+
+		go func() {
+			if err := Launch(ctx, Options{ConnectToSocket: vmPorts[1], EnrolmentConfig: ec}); err != nil {
+				log.Printf("Failed to launch vm1: %v", err)
+			}
+		}()
+	}
+
+	return debug, portMap, nil
+}
diff --git a/metropolis/test/nanoswitch/BUILD b/metropolis/test/nanoswitch/BUILD
new file mode 100644
index 0000000..fc4f932
--- /dev/null
+++ b/metropolis/test/nanoswitch/BUILD
@@ -0,0 +1,40 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load("//metropolis/node/build:def.bzl", "smalltown_initramfs")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["nanoswitch.go"],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/nanoswitch",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//metropolis/node:go_default_library",
+        "//metropolis/node/common/supervisor:go_default_library",
+        "//metropolis/node/core/network/dhcp4c:go_default_library",
+        "//metropolis/node/core/network/dhcp4c/callback:go_default_library",
+        "//metropolis/test/launch:go_default_library",
+        "@com_github_google_nftables//:go_default_library",
+        "@com_github_google_nftables//expr:go_default_library",
+        "@com_github_insomniacslk_dhcp//dhcpv4:go_default_library",
+        "@com_github_insomniacslk_dhcp//dhcpv4/server4:go_default_library",
+        "@com_github_vishvananda_netlink//:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
+
+go_binary(
+    name = "nanoswitch",
+    embed = [":go_default_library"],
+    pure = "on",
+    visibility = ["//visibility:public"],
+)
+
+smalltown_initramfs(
+    name = "initramfs",
+    files = {
+        ":nanoswitch": "/init",
+
+        # CA Certificate bundle
+        "@cacerts//file": "/etc/ssl/cert.pem",
+    },
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/nanoswitch/nanoswitch.go b/metropolis/test/nanoswitch/nanoswitch.go
new file mode 100644
index 0000000..1fe6740
--- /dev/null
+++ b/metropolis/test/nanoswitch/nanoswitch.go
@@ -0,0 +1,301 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// nanoswitch is a virtualized switch/router combo intended for testing.
+// It uses the first interface as an external interface to connect to the host and pass traffic in and out. All other
+// interfaces are switched together and served by a built-in DHCP server. Traffic from that network to the
+// SLIRP/external network is SNATed as the host-side SLIRP ignores routed packets.
+// It also has built-in userspace proxying support for debugging.
+package main
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net"
+	"os"
+	"time"
+
+	"github.com/google/nftables"
+	"github.com/google/nftables/expr"
+	"github.com/insomniacslk/dhcp/dhcpv4"
+	"github.com/insomniacslk/dhcp/dhcpv4/server4"
+	"github.com/vishvananda/netlink"
+	"golang.org/x/sys/unix"
+
+	common "git.monogon.dev/source/nexantic.git/metropolis/node"
+	"git.monogon.dev/source/nexantic.git/metropolis/node/common/supervisor"
+	"git.monogon.dev/source/nexantic.git/metropolis/node/core/network/dhcp4c"
+	dhcpcb "git.monogon.dev/source/nexantic.git/metropolis/node/core/network/dhcp4c/callback"
+	"git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+var switchIP = net.IP{10, 1, 0, 1}
+var switchSubnetMask = net.CIDRMask(24, 32)
+
+// defaultLeaseOptions sets the lease options needed to properly configure connectivity to nanoswitch
+func defaultLeaseOptions(reply *dhcpv4.DHCPv4) {
+	reply.GatewayIPAddr = switchIP
+	reply.UpdateOption(dhcpv4.OptDNS(net.IPv4(10, 42, 0, 3))) // SLIRP fake DNS server
+	reply.UpdateOption(dhcpv4.OptRouter(switchIP))
+	reply.UpdateOption(dhcpv4.OptIPAddressLeaseTime(30 * time.Second)) // Make sure we exercise our DHCP client in E2E tests
+	reply.UpdateOption(dhcpv4.OptSubnetMask(switchSubnetMask))
+}
+
+// runDHCPServer runs an extremely minimal DHCP server with most options hardcoded, a wrapping bump allocator for the
+// IPs, 12h Lease timeout and no support for DHCP collision detection.
+func runDHCPServer(link netlink.Link) supervisor.Runnable {
+	currentIP := net.IP{10, 1, 0, 1}
+
+	return func(ctx context.Context) error {
+		laddr := net.UDPAddr{
+			IP:   net.IPv4(0, 0, 0, 0),
+			Port: 67,
+		}
+		server, err := server4.NewServer(link.Attrs().Name, &laddr, func(conn net.PacketConn, peer net.Addr, m *dhcpv4.DHCPv4) {
+			if m == nil {
+				return
+			}
+			reply, err := dhcpv4.NewReplyFromRequest(m)
+			if err != nil {
+				supervisor.Logger(ctx).Warningf("Failed to generate DHCP reply: %v", err)
+				return
+			}
+			reply.UpdateOption(dhcpv4.OptServerIdentifier(switchIP))
+			reply.ServerIPAddr = switchIP
+
+			switch m.MessageType() {
+			case dhcpv4.MessageTypeDiscover:
+				reply.UpdateOption(dhcpv4.OptMessageType(dhcpv4.MessageTypeOffer))
+				defaultLeaseOptions(reply)
+				currentIP[3]++ // Works only because it's a /24
+				reply.YourIPAddr = currentIP
+				supervisor.Logger(ctx).Infof("Replying with DHCP IP %s", reply.YourIPAddr.String())
+			case dhcpv4.MessageTypeRequest:
+				reply.UpdateOption(dhcpv4.OptMessageType(dhcpv4.MessageTypeAck))
+				defaultLeaseOptions(reply)
+				if m.RequestedIPAddress() != nil {
+					reply.YourIPAddr = m.RequestedIPAddress()
+				} else {
+					reply.YourIPAddr = m.ClientIPAddr
+				}
+			case dhcpv4.MessageTypeRelease, dhcpv4.MessageTypeDecline:
+				supervisor.Logger(ctx).Info("Ignoring Release/Decline")
+			}
+			if _, err := conn.WriteTo(reply.ToBytes(), peer); err != nil {
+				supervisor.Logger(ctx).Warningf("Cannot reply to client: %v", err)
+			}
+		})
+		if err != nil {
+			return err
+		}
+		supervisor.Signal(ctx, supervisor.SignalHealthy)
+		go func() {
+			<-ctx.Done()
+			server.Close()
+		}()
+		return server.Serve()
+	}
+}
+
+// userspaceProxy listens on port and proxies all TCP connections to the same port on targetIP
+func userspaceProxy(targetIP net.IP, port uint16) supervisor.Runnable {
+	return func(ctx context.Context) error {
+		logger := supervisor.Logger(ctx)
+		tcpListener, err := net.ListenTCP("tcp", &net.TCPAddr{IP: net.IPv4(0, 0, 0, 0), Port: int(port)})
+		if err != nil {
+			return err
+		}
+		supervisor.Signal(ctx, supervisor.SignalHealthy)
+		go func() {
+			<-ctx.Done()
+			tcpListener.Close()
+		}()
+		for {
+			conn, err := tcpListener.AcceptTCP()
+			if err != nil {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				return err
+			}
+			go func(conn *net.TCPConn) {
+				defer conn.Close()
+				upstreamConn, err := net.DialTCP("tcp", nil, &net.TCPAddr{IP: targetIP, Port: int(port)})
+				if err != nil {
+					logger.Infof("Userspace proxy failed to connect to upstream: %v", err)
+					return
+				}
+				defer upstreamConn.Close()
+				go io.Copy(upstreamConn, conn)
+				io.Copy(conn, upstreamConn)
+			}(conn)
+		}
+
+	}
+}
+
+// addNetworkRoutes sets up routing from DHCP
+func addNetworkRoutes(link netlink.Link, addr net.IPNet, gw net.IP) error {
+	if err := netlink.AddrReplace(link, &netlink.Addr{IPNet: &addr}); err != nil {
+		return fmt.Errorf("failed to add DHCP address to network interface \"%v\": %w", link.Attrs().Name, err)
+	}
+
+	if gw.IsUnspecified() {
+		return nil
+	}
+
+	route := &netlink.Route{
+		Dst:   &net.IPNet{IP: net.IPv4(0, 0, 0, 0), Mask: net.IPv4Mask(0, 0, 0, 0)},
+		Gw:    gw,
+		Scope: netlink.SCOPE_UNIVERSE,
+	}
+	if err := netlink.RouteAdd(route); err != nil {
+		return fmt.Errorf("could not add default route: netlink.RouteAdd(%+v): %v", route, err)
+	}
+	return nil
+}
+
+// nfifname converts an interface name into 16 bytes padded with zeroes (for nftables)
+func nfifname(n string) []byte {
+	b := make([]byte, 16)
+	copy(b, []byte(n+"\x00"))
+	return b
+}
+
+func main() {
+	supervisor.New(context.Background(), func(ctx context.Context) error {
+		logger := supervisor.Logger(ctx)
+		logger.Info("Starting NanoSwitch, a tiny TOR switch emulator")
+
+		// Set up target filesystems.
+		for _, el := range []struct {
+			dir   string
+			fs    string
+			flags uintptr
+		}{
+			{"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+			{"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+			{"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
+			{"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
+		} {
+			if err := os.Mkdir(el.dir, 0755); err != nil && !os.IsExist(err) {
+				return fmt.Errorf("could not make %s: %w", el.dir, err)
+			}
+			if err := unix.Mount(el.fs, el.dir, el.fs, el.flags, ""); err != nil {
+				return fmt.Errorf("could not mount %s on %s: %w", el.fs, el.dir, err)
+			}
+		}
+
+		c := &nftables.Conn{}
+
+		links, err := netlink.LinkList()
+		if err != nil {
+			logger.Fatalf("Failed to list links: %v", err)
+		}
+		var externalLink netlink.Link
+		var vmLinks []netlink.Link
+		for _, link := range links {
+			attrs := link.Attrs()
+			if link.Type() == "device" && len(attrs.HardwareAddr) > 0 {
+				if attrs.Flags&net.FlagUp != net.FlagUp {
+					netlink.LinkSetUp(link) // Attempt to take up all ethernet links
+				}
+				if bytes.Equal(attrs.HardwareAddr, launch.HostInterfaceMAC) {
+					externalLink = link
+				} else {
+					vmLinks = append(vmLinks, link)
+				}
+			}
+		}
+		vmBridgeLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "vmbridge", Flags: net.FlagUp}}
+		if err := netlink.LinkAdd(vmBridgeLink); err != nil {
+			logger.Fatalf("Failed to create vmbridge: %v", err)
+		}
+		for _, link := range vmLinks {
+			if err := netlink.LinkSetMaster(link, vmBridgeLink); err != nil {
+				logger.Fatalf("Failed to add VM interface to bridge: %v", err)
+			}
+			logger.Infof("Assigned interface %s to bridge", link.Attrs().Name)
+		}
+		if err := netlink.AddrReplace(vmBridgeLink, &netlink.Addr{IPNet: &net.IPNet{IP: switchIP, Mask: switchSubnetMask}}); err != nil {
+			logger.Fatalf("Failed to assign static IP to vmbridge: %v", err)
+		}
+		if externalLink != nil {
+			nat := c.AddTable(&nftables.Table{
+				Family: nftables.TableFamilyIPv4,
+				Name:   "nat",
+			})
+
+			postrouting := c.AddChain(&nftables.Chain{
+				Name:     "postrouting",
+				Hooknum:  nftables.ChainHookPostrouting,
+				Priority: nftables.ChainPriorityNATSource,
+				Table:    nat,
+				Type:     nftables.ChainTypeNAT,
+			})
+
+			// Masquerade/SNAT all traffic going out of the external interface
+			c.AddRule(&nftables.Rule{
+				Table: nat,
+				Chain: postrouting,
+				Exprs: []expr.Any{
+					&expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
+					&expr.Cmp{
+						Op:       expr.CmpOpEq,
+						Register: 1,
+						Data:     nfifname(externalLink.Attrs().Name),
+					},
+					&expr.Masq{},
+				},
+			})
+
+			if err := c.Flush(); err != nil {
+				panic(err)
+			}
+
+			netIface := &net.Interface{
+				Name:         externalLink.Attrs().Name,
+				MTU:          externalLink.Attrs().MTU,
+				Index:        externalLink.Attrs().Index,
+				Flags:        externalLink.Attrs().Flags,
+				HardwareAddr: externalLink.Attrs().HardwareAddr,
+			}
+			dhcpClient, err := dhcp4c.NewClient(netIface)
+			if err != nil {
+				logger.Fatalf("Failed to create DHCP client: %v", err)
+			}
+			dhcpClient.RequestedOptions = []dhcpv4.OptionCode{dhcpv4.OptionRouter}
+			dhcpClient.LeaseCallback = dhcpcb.Compose(dhcpcb.ManageIP(externalLink), dhcpcb.ManageDefaultRoute(externalLink))
+			supervisor.Run(ctx, "dhcp-client", dhcpClient.Run)
+			if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte("1\n"), 0644); err != nil {
+				logger.Fatalf("Failed to write ip forwards: %v", err)
+			}
+		} else {
+			logger.Info("No upstream interface detected")
+		}
+		supervisor.Run(ctx, "dhcp-server", runDHCPServer(vmBridgeLink))
+		supervisor.Run(ctx, "proxy-ext1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.ExternalServicePort))
+		supervisor.Run(ctx, "proxy-dbg1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.DebugServicePort))
+		supervisor.Run(ctx, "proxy-k8s-api1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.KubernetesAPIPort))
+		supervisor.Signal(ctx, supervisor.SignalHealthy)
+		supervisor.Signal(ctx, supervisor.SignalDone)
+		return nil
+	})
+	select {}
+}