core -> metropolis
Smalltown is now called Metropolis!
This is the first commit in a series of cleanup commits that prepare us
for an open source release. This one just some Bazel packages around to
follow a stricter directory layout.
All of Metropolis now lives in `//metropolis`.
All of Metropolis Node code now lives in `//metropolis/node`.
All of the main /init now lives in `//m/n/core`.
All of the Kubernetes functionality/glue now lives in `//m/n/kubernetes`.
Next steps:
- hunt down all references to Smalltown and replace them appropriately
- narrow down visibility rules
- document new code organization
- move `//build/toolchain` to `//monogon/build/toolchain`
- do another cleanup pass between `//golibs` and
`//monogon/node/{core,common}`.
- remove `//delta` and `//anubis`
Fixes T799.
Test Plan: Just a very large refactor. CI should help us out here.
Bug: T799
X-Origin-Diff: phab/D667
GitOrigin-RevId: 6029b8d4edc42325d50042596b639e8b122d0ded
diff --git a/metropolis/test/e2e/BUILD.bazel b/metropolis/test/e2e/BUILD.bazel
new file mode 100644
index 0000000..b9bb6f8
--- /dev/null
+++ b/metropolis/test/e2e/BUILD.bazel
@@ -0,0 +1,43 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "go_default_library",
+ srcs = [
+ "kubernetes_helpers.go",
+ "utils.go",
+ ],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/e2e",
+ visibility = ["//metropolis/test:__subpackages__"],
+ deps = [
+ "//metropolis/proto/api:go_default_library",
+ "@io_k8s_api//apps/v1:go_default_library",
+ "@io_k8s_api//core/v1:go_default_library",
+ "@io_k8s_apimachinery//pkg/api/resource:go_default_library",
+ "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
+ "@io_k8s_apimachinery//pkg/util/intstr:go_default_library",
+ "@io_k8s_client_go//kubernetes:go_default_library",
+ "@io_k8s_client_go//tools/clientcmd:go_default_library",
+ ],
+)
+
+go_test(
+ name = "go_default_test",
+ size = "large",
+ srcs = ["main_test.go"],
+ data = [
+ "//metropolis/node:image",
+ "//metropolis/node:swtpm_data",
+ "//third_party/edk2:firmware",
+ ],
+ embed = [":go_default_library"],
+ rundir = ".",
+ deps = [
+ "//metropolis/node:go_default_library",
+ "//metropolis/proto/api:go_default_library",
+ "//metropolis/test/launch:go_default_library",
+ "@io_k8s_api//core/v1:go_default_library",
+ "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
+ "@io_k8s_kubernetes//pkg/api/v1/pod:go_default_library",
+ "@org_golang_google_grpc//:go_default_library",
+ ],
+)
diff --git a/metropolis/test/e2e/k8s_cts/BUILD.bazel b/metropolis/test/e2e/k8s_cts/BUILD.bazel
new file mode 100644
index 0000000..0e43c24
--- /dev/null
+++ b/metropolis/test/e2e/k8s_cts/BUILD.bazel
@@ -0,0 +1,55 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load("@io_bazel_rules_docker//go:image.bzl", "go_image")
+load("@io_bazel_rules_docker//container:container.bzl", "container_image")
+
+go_image(
+ name = "kubectl",
+ binary = "@io_k8s_kubernetes//cmd/kubectl",
+ pure = "on",
+)
+
+container_image(
+ name = "kubectl_in_path",
+ base = ":kubectl",
+ env = {
+ # Don't include FHS paths since they aren't available anyways
+ "PATH": "/app/cmd/kubectl",
+ },
+)
+
+go_image(
+ name = "k8s_cts_image",
+ base = ":kubectl_in_path",
+ binary = "@io_k8s_kubernetes//test/e2e:_go_default_test-pure",
+ pure = "on",
+ visibility = ["//visibility:public"],
+)
+
+go_library(
+ name = "go_default_library",
+ srcs = ["main.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/e2e/k8s_cts",
+ visibility = ["//visibility:private"],
+ deps = [
+ "//metropolis/node:go_default_library",
+ "//metropolis/test/e2e:go_default_library",
+ "//metropolis/test/launch:go_default_library",
+ "@io_k8s_api//core/v1:go_default_library",
+ "@io_k8s_api//rbac/v1:go_default_library",
+ "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
+ ],
+)
+
+go_binary(
+ name = "k8s_cts",
+ data = [
+ "//metropolis/node:image",
+ "//metropolis/node:swtpm_data",
+ "//metropolis/test/nanoswitch:initramfs",
+ "//metropolis/test/ktest:linux-testing",
+ "//third_party/edk2:firmware",
+ "@com_github_bonzini_qboot//:qboot-bin",
+ ],
+ embed = [":go_default_library"],
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/e2e/k8s_cts/main.go b/metropolis/test/e2e/k8s_cts/main.go
new file mode 100644
index 0000000..728a890
--- /dev/null
+++ b/metropolis/test/e2e/k8s_cts/main.go
@@ -0,0 +1,175 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This package launches a Smalltown Cluster with two nodes and spawns in the CTS container. Then it streams its output
+// to the console. When the CTS has finished it exits with the appropriate error code.
+package main
+
+import (
+ "context"
+ "io"
+ "log"
+ "os"
+ "os/signal"
+ "strings"
+ "syscall"
+ "time"
+
+ corev1 "k8s.io/api/core/v1"
+ rbacv1 "k8s.io/api/rbac/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+ common "git.monogon.dev/source/nexantic.git/metropolis/node"
+ "git.monogon.dev/source/nexantic.git/metropolis/test/e2e"
+ "git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+// makeCTSPodSpec generates a spec for a standalone pod running the Kubernetes CTS. It also sets the test configuration
+// for the Kubernetes E2E test suite to only run CTS tests and excludes known-broken ones.
+func makeCTSPodSpec(name string, saName string) *corev1.Pod {
+ skipRegexes := []string{
+ // hostNetworking cannot be supported since we run different network stacks for the host and containers
+ "should function for node-pod communication",
+ // gVisor misreports statfs() syscalls: https://github.com/google/gvisor/issues/3339
+ `should support \((non-)?root,`,
+ "volume on tmpfs should have the correct mode",
+ "volume on default medium should have the correct mode",
+ // gVisor doesn't support the full Linux privilege machinery including SUID and NewPrivs
+ // https://github.com/google/gvisor/issues/189#issuecomment-481064000
+ "should run the container as unprivileged when false",
+ }
+ return &corev1.Pod{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Labels: map[string]string{
+ "name": name,
+ },
+ },
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{
+ {
+ Name: "cts",
+ Image: "bazel/metropolis/test/e2e/k8s_cts:k8s_cts_image",
+ Args: []string{
+ "-cluster-ip-range=10.0.0.0/17",
+ "-dump-systemd-journal=false",
+ "-ginkgo.focus=\\[Conformance\\]",
+ "-ginkgo.skip=" + strings.Join(skipRegexes, "|"),
+ "-test.parallel=8",
+ },
+ ImagePullPolicy: corev1.PullNever,
+ },
+ },
+ Tolerations: []corev1.Toleration{{ // Tolerate all taints, otherwise the CTS likes to self-evict
+ Operator: "Exists",
+ }},
+ PriorityClassName: "system-cluster-critical", // Don't evict the CTS pod
+ RestartPolicy: corev1.RestartPolicyNever,
+ ServiceAccountName: saName,
+ },
+ }
+}
+
+func main() {
+ sigs := make(chan os.Signal, 1)
+ signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
+ ctx, cancel := context.WithCancel(context.Background())
+ go func() {
+ <-sigs
+ cancel()
+ }()
+
+ debugClient, portMap, err := launch.LaunchCluster(ctx, launch.ClusterOptions{NumNodes: 2})
+ if err != nil {
+ log.Fatalf("Failed to launch cluster: %v", err)
+ }
+ log.Println("Cluster initialized")
+
+ clientSet, err := e2e.GetKubeClientSet(ctx, debugClient, portMap[common.KubernetesAPIPort])
+ if err != nil {
+ log.Fatalf("Failed to get clientSet: %v", err)
+ }
+ log.Println("Credentials available")
+
+ saName := "cts"
+ ctsSA := &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: saName}}
+ for {
+ if _, err := clientSet.CoreV1().ServiceAccounts("default").Create(ctx, ctsSA, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create ServiceAccount: %v", err)
+ time.Sleep(1 * time.Second)
+ continue
+ }
+ break
+ }
+ ctsRoleBinding := &rbacv1.ClusterRoleBinding{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: saName,
+ },
+ Subjects: []rbacv1.Subject{
+ {
+ Namespace: "default",
+ Name: saName,
+ Kind: rbacv1.ServiceAccountKind,
+ },
+ },
+ RoleRef: rbacv1.RoleRef{
+ Kind: "ClusterRole",
+ Name: "cluster-admin",
+ },
+ }
+ podName := "cts"
+ if _, err := clientSet.RbacV1().ClusterRoleBindings().Create(ctx, ctsRoleBinding, metav1.CreateOptions{}); err != nil {
+ log.Fatalf("Failed to create ClusterRoleBinding: %v", err)
+ }
+ for {
+ if _, err := clientSet.CoreV1().Pods("default").Create(ctx, makeCTSPodSpec(podName, saName), metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create Pod: %v", err)
+ time.Sleep(1 * time.Second)
+ continue
+ }
+ break
+ }
+ var logs io.ReadCloser
+ go func() {
+ // This loops the whole .Stream()/io.Copy process because the API sometimes returns streams that immediately return EOF
+ for {
+ logs, err = clientSet.CoreV1().Pods("default").GetLogs(podName, &corev1.PodLogOptions{Follow: true}).Stream(ctx)
+ if err == nil {
+ if _, err := io.Copy(os.Stdout, logs); err != nil {
+ log.Printf("Log pump error: %v", err)
+ }
+ logs.Close()
+ } else {
+ log.Printf("Pod logs not ready yet: %v", err)
+ }
+ time.Sleep(1 * time.Second)
+ }
+ }()
+ for {
+ time.Sleep(1 * time.Second)
+ pod, err := clientSet.CoreV1().Pods("default").Get(ctx, podName, metav1.GetOptions{})
+ if err != nil {
+ log.Printf("Failed to get CTS pod: %v", err)
+ continue
+ }
+ if pod.Status.Phase == corev1.PodSucceeded {
+ return
+ }
+ if pod.Status.Phase == corev1.PodFailed {
+ log.Fatalf("CTS failed")
+ }
+ }
+}
diff --git a/metropolis/test/e2e/kubernetes_helpers.go b/metropolis/test/e2e/kubernetes_helpers.go
new file mode 100644
index 0000000..fffbd1b
--- /dev/null
+++ b/metropolis/test/e2e/kubernetes_helpers.go
@@ -0,0 +1,146 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package e2e
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "time"
+
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/resource"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/util/intstr"
+ "k8s.io/client-go/kubernetes"
+ "k8s.io/client-go/tools/clientcmd"
+
+ apb "git.monogon.dev/source/nexantic.git/metropolis/proto/api"
+)
+
+// GetKubeClientSet gets a Kubeconfig from the debug API and creates a K8s ClientSet using it. The identity used has
+// the system:masters group and thus has RBAC access to everything.
+func GetKubeClientSet(ctx context.Context, client apb.NodeDebugServiceClient, port uint16) (kubernetes.Interface, error) {
+ var lastErr = errors.New("context canceled before any operation completed")
+ for {
+ reqT, cancel := context.WithTimeout(ctx, 5*time.Second)
+ defer cancel()
+ res, err := client.GetDebugKubeconfig(reqT, &apb.GetDebugKubeconfigRequest{Id: "debug-user", Groups: []string{"system:masters"}})
+ if err == nil {
+ rawClientConfig, err := clientcmd.NewClientConfigFromBytes([]byte(res.DebugKubeconfig))
+ if err != nil {
+ return nil, err // Invalid Kubeconfigs are immediately fatal
+ }
+
+ clientConfig, err := rawClientConfig.ClientConfig()
+ clientConfig.Host = fmt.Sprintf("localhost:%v", port)
+ clientSet, err := kubernetes.NewForConfig(clientConfig)
+ if err != nil {
+ return nil, err
+ }
+ return clientSet, nil
+ }
+ if err != nil && err == ctx.Err() {
+ return nil, lastErr
+ }
+ lastErr = err
+ select {
+ case <-ctx.Done():
+ return nil, lastErr
+ case <-time.After(1 * time.Second):
+ }
+ }
+}
+
+// makeTestDeploymentSpec generates a Deployment spec for a single pod running NGINX with a readiness probe. This allows
+// verifying that the control plane is capable of scheduling simple pods and that kubelet works, its runtime is set up
+// well enough to run a simple container and the network to the pod can pass readiness probe traffic.
+func makeTestDeploymentSpec(name string) *appsv1.Deployment {
+ return &appsv1.Deployment{
+ ObjectMeta: metav1.ObjectMeta{Name: name},
+ Spec: appsv1.DeploymentSpec{
+ Selector: &metav1.LabelSelector{MatchLabels: map[string]string{
+ "name": name,
+ }},
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{
+ "name": name,
+ },
+ },
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{
+ {
+ Name: "test",
+ // TODO(phab/T793): Build and preseed our own container images
+ Image: "nginx:alpine",
+ ReadinessProbe: &corev1.Probe{
+ Handler: corev1.Handler{
+ HTTPGet: &corev1.HTTPGetAction{Port: intstr.FromInt(80)},
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+}
+
+// makeTestStatefulSet generates a StatefulSet spec
+func makeTestStatefulSet(name string) *appsv1.StatefulSet {
+ return &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{Name: name},
+ Spec: appsv1.StatefulSetSpec{
+ Selector: &metav1.LabelSelector{MatchLabels: map[string]string{
+ "name": name,
+ }},
+ VolumeClaimTemplates: []corev1.PersistentVolumeClaim{
+ {
+ ObjectMeta: metav1.ObjectMeta{Name: "www"},
+ Spec: corev1.PersistentVolumeClaimSpec{
+ AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce},
+ Resources: corev1.ResourceRequirements{
+ Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: resource.MustParse("50Mi")},
+ },
+ },
+ },
+ },
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{
+ "name": name,
+ },
+ },
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{
+ {
+ Name: "test",
+ Image: "nginx:alpine",
+ ReadinessProbe: &corev1.Probe{
+ Handler: corev1.Handler{
+ HTTPGet: &corev1.HTTPGetAction{Port: intstr.FromInt(80)},
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+}
diff --git a/metropolis/test/e2e/main_test.go b/metropolis/test/e2e/main_test.go
new file mode 100644
index 0000000..46e862c
--- /dev/null
+++ b/metropolis/test/e2e/main_test.go
@@ -0,0 +1,244 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package e2e
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "log"
+ "net"
+ "net/http"
+ _ "net/http"
+ _ "net/http/pprof"
+ "os"
+ "strings"
+ "testing"
+ "time"
+
+ "google.golang.org/grpc"
+ corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
+
+ common "git.monogon.dev/source/nexantic.git/metropolis/node"
+ apb "git.monogon.dev/source/nexantic.git/metropolis/proto/api"
+ "git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+const (
+ // Timeout for the global test context.
+ //
+ // Bazel would eventually time out the test after 900s ("large") if, for some reason,
+ // the context cancellation fails to abort it.
+ globalTestTimeout = 600 * time.Second
+
+ // Timeouts for individual end-to-end tests of different sizes.
+ smallTestTimeout = 60 * time.Second
+ largeTestTimeout = 120 * time.Second
+)
+
+// TestE2E is the main E2E test entrypoint for single-node freshly-bootstrapped E2E tests. It starts a full Smalltown node
+// in bootstrap mode and then runs tests against it. The actual tests it performs are located in the RunGroup subtest.
+func TestE2E(t *testing.T) {
+ // Run pprof server for debugging
+ go func() {
+ addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
+ if err != nil {
+ panic(err)
+ }
+
+ l, err := net.ListenTCP("tcp", addr)
+ if err != nil {
+ log.Fatalf("Failed to listen on pprof port: %s", l.Addr())
+ }
+ defer l.Close()
+
+ log.Printf("pprof server listening on %s", l.Addr())
+ log.Printf("pprof server returned an error: %v", http.Serve(l, nil))
+ }()
+
+ // Set a global timeout to make sure this terminates
+ ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
+ portMap, err := launch.ConflictFreePortMap(launch.NodePorts)
+ if err != nil {
+ t.Fatalf("Failed to acquire ports for e2e test: %v", err)
+ }
+
+ procExit := make(chan struct{})
+
+ go func() {
+ if err := launch.Launch(ctx, launch.Options{Ports: portMap, SerialPort: os.Stdout}); err != nil {
+ panic(err)
+ }
+ close(procExit)
+ }()
+ grpcClient, err := portMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure())
+ if err != nil {
+ fmt.Printf("Failed to dial debug service (is it running): %v\n", err)
+ }
+ debugClient := apb.NewNodeDebugServiceClient(grpcClient)
+
+ // This exists to keep the parent around while all the children race
+ // It currently tests both a set of OS-level conditions and Kubernetes Deployments and StatefulSets
+ t.Run("RunGroup", func(t *testing.T) {
+ t.Run("Get Kubernetes Debug Kubeconfig", func(t *testing.T) {
+ t.Parallel()
+ selfCtx, cancel := context.WithTimeout(ctx, largeTestTimeout)
+ defer cancel()
+ clientSet, err := GetKubeClientSet(selfCtx, debugClient, portMap[common.KubernetesAPIPort])
+ if err != nil {
+ t.Fatal(err)
+ }
+ testEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
+ nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+ if err != nil {
+ return err
+ }
+ if len(nodes.Items) < 1 {
+ return errors.New("node not registered")
+ }
+ if len(nodes.Items) > 1 {
+ return errors.New("more than one node registered (but there is only one)")
+ }
+ node := nodes.Items[0]
+ for _, cond := range node.Status.Conditions {
+ if cond.Type != corev1.NodeReady {
+ continue
+ }
+ if cond.Status != corev1.ConditionTrue {
+ return fmt.Errorf("node not ready: %v", cond.Message)
+ }
+ }
+ return nil
+ })
+ testEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
+ _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
+ return err
+ })
+ testEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
+ res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
+ if err != nil {
+ return err
+ }
+ if len(res.Items) == 0 {
+ return errors.New("pod didn't get created")
+ }
+ pod := res.Items[0]
+ if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+ return nil
+ }
+ events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+ if err != nil || len(events.Items) == 0 {
+ return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+ } else {
+ return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
+ }
+ })
+ testEventual(t, "Simple deployment with runc", ctx, largeTestTimeout, func(ctx context.Context) error {
+ deployment := makeTestDeploymentSpec("test-deploy-2")
+ var runcStr = "runc"
+ deployment.Spec.Template.Spec.RuntimeClassName = &runcStr
+ _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
+ return err
+ })
+ testEventual(t, "Simple deployment is running on runc", ctx, largeTestTimeout, func(ctx context.Context) error {
+ res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
+ if err != nil {
+ return err
+ }
+ if len(res.Items) == 0 {
+ return errors.New("pod didn't get created")
+ }
+ pod := res.Items[0]
+ if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+ return nil
+ }
+ events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+ if err != nil || len(events.Items) == 0 {
+ return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+ } else {
+ var errorMsg strings.Builder
+ for _, msg := range events.Items {
+ errorMsg.WriteString(" | ")
+ errorMsg.WriteString(msg.Message)
+ }
+ return fmt.Errorf("pod is not ready: %v", errorMsg.String())
+ }
+ })
+ testEventual(t, "Simple StatefulSet with PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
+ _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-1"), metav1.CreateOptions{})
+ return err
+ })
+ testEventual(t, "Simple StatefulSet with PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
+ res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-1"})
+ if err != nil {
+ return err
+ }
+ if len(res.Items) == 0 {
+ return errors.New("pod didn't get created")
+ }
+ pod := res.Items[0]
+ if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
+ return nil
+ }
+ events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+ if err != nil || len(events.Items) == 0 {
+ return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+ } else {
+ return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
+ }
+ })
+ testEventual(t, "Pod with preseeded image", ctx, smallTestTimeout, func(ctx context.Context) error {
+ _, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "preseed-test-1",
+ },
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "preseed-test-1",
+ ImagePullPolicy: corev1.PullNever,
+ Image: "bazel/metropolis/test/e2e/preseedtest:preseedtest",
+ }},
+ RestartPolicy: corev1.RestartPolicyNever,
+ },
+ }, metav1.CreateOptions{})
+ return err
+ })
+ testEventual(t, "Pod with preseeded image is completed", ctx, largeTestTimeout, func(ctx context.Context) error {
+ pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "preseed-test-1", metav1.GetOptions{})
+ if err != nil {
+ return fmt.Errorf("failed to get pod: %w", err)
+ }
+ if pod.Status.Phase == corev1.PodSucceeded {
+ return nil
+ }
+ events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
+ if err != nil || len(events.Items) == 0 {
+ return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
+ } else {
+ return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
+ }
+ })
+ })
+ })
+
+ // Cancel the main context and wait for our subprocesses to exit
+ // to avoid leaking them and blocking the parent.
+ cancel()
+ <-procExit
+}
diff --git a/metropolis/test/e2e/preseedtest/BUILD.bazel b/metropolis/test/e2e/preseedtest/BUILD.bazel
new file mode 100644
index 0000000..41b32e3
--- /dev/null
+++ b/metropolis/test/e2e/preseedtest/BUILD.bazel
@@ -0,0 +1,16 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("@io_bazel_rules_docker//go:image.bzl", "go_image")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["main.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/e2e/preseedtest",
+ visibility = ["//visibility:private"],
+)
+
+go_image(
+ name = "preseedtest",
+ embed = [":go_default_library"],
+ pure = "on",
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/e2e/preseedtest/main.go b/metropolis/test/e2e/preseedtest/main.go
new file mode 100644
index 0000000..ceb3898
--- /dev/null
+++ b/metropolis/test/e2e/preseedtest/main.go
@@ -0,0 +1,23 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import "fmt"
+
+func main() {
+ fmt.Println("Hello world from preseeded image")
+}
diff --git a/metropolis/test/e2e/utils.go b/metropolis/test/e2e/utils.go
new file mode 100644
index 0000000..f888189
--- /dev/null
+++ b/metropolis/test/e2e/utils.go
@@ -0,0 +1,51 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package e2e
+
+import (
+ "context"
+ "errors"
+ "testing"
+ "time"
+)
+
+// testEventual creates a new subtest looping the given function until it either doesn't return an error anymore or
+// the timeout is exceeded. The last returned non-context-related error is being used as the test error.
+func testEventual(t *testing.T, name string, ctx context.Context, timeout time.Duration, f func(context.Context) error) {
+ ctx, cancel := context.WithTimeout(ctx, timeout)
+ t.Helper()
+ t.Run(name, func(t *testing.T) {
+ defer cancel()
+ var lastErr = errors.New("test didn't run to completion at least once")
+ t.Parallel()
+ for {
+ err := f(ctx)
+ if err == nil {
+ return
+ }
+ if err == ctx.Err() {
+ t.Fatal(lastErr)
+ }
+ lastErr = err
+ select {
+ case <-ctx.Done():
+ t.Fatal(lastErr)
+ case <-time.After(1 * time.Second):
+ }
+ }
+ })
+}
diff --git a/metropolis/test/ktest/BUILD b/metropolis/test/ktest/BUILD
new file mode 100644
index 0000000..d94831c
--- /dev/null
+++ b/metropolis/test/ktest/BUILD
@@ -0,0 +1,63 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load("//metropolis/node/build/kconfig-patcher:kconfig-patcher.bzl", "kconfig_patch")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["main.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/ktest",
+ visibility = ["//visibility:private"],
+ deps = ["//metropolis/test/launch:go_default_library"],
+)
+
+go_binary(
+ name = "ktest",
+ embed = [":go_default_library"],
+ pure = "on",
+ visibility = ["//visibility:public"],
+)
+
+kconfig_patch(
+ name = "testing-config",
+ src = "//third_party/linux:kernel-config",
+ out = "testing.config",
+ override_configs = {
+ # Unlock command line
+ "CONFIG_CMDLINE_OVERRIDE": "n",
+ "CONFIG_CMDLINE_BOOL": "n",
+ # Shave off 1 second from boot time
+ "CONFIG_SERIO_I8042": "",
+ "CONFIG_KEYBOARD_ATKBD": "",
+ "CONFIG_RTC_DRV_CMOS": "",
+ # Shave off an additional 18ms (half of the boot time)
+ "CONFIG_DEBUG_WX": "",
+ },
+)
+
+genrule(
+ name = "linux-testing",
+ srcs = [
+ "@linux//:all",
+ ":testing-config",
+ ],
+ outs = [
+ "linux-testing.elf",
+ ],
+ cmd = """
+ DIR=external/linux
+
+ mkdir $$DIR/.bin
+
+ cp $(location :testing-config) $$DIR/.config
+
+ (cd $$DIR && make -j $$(nproc) vmlinux >/dev/null)
+
+ cp $$DIR/vmlinux $@
+ """,
+ visibility = ["//visibility:public"],
+)
+
+filegroup(
+ name = "test-script",
+ srcs = ["run_ktest.sh"],
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/ktest/init/BUILD.bazel b/metropolis/test/ktest/init/BUILD.bazel
new file mode 100644
index 0000000..4161146
--- /dev/null
+++ b/metropolis/test/ktest/init/BUILD.bazel
@@ -0,0 +1,16 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["main.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/ktest/init",
+ visibility = ["//visibility:private"],
+ deps = ["@org_golang_x_sys//unix:go_default_library"],
+)
+
+go_binary(
+ name = "init",
+ embed = [":go_default_library"],
+ pure = "on",
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/ktest/init/main.go b/metropolis/test/ktest/init/main.go
new file mode 100644
index 0000000..f6049db
--- /dev/null
+++ b/metropolis/test/ktest/init/main.go
@@ -0,0 +1,83 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ktestinit is an init designed to run inside a lightweight VM for running tests in there.
+// It performs basic platform initialization like mounting kernel filesystems and launches the
+// test executable at /tester, passes the exit code back out over the control socket to ktest and
+// then terminates the VM kernel.
+package main
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "os/exec"
+
+ "golang.org/x/sys/unix"
+)
+
+func mountInit() error {
+ for _, el := range []struct {
+ dir string
+ fs string
+ flags uintptr
+ }{
+ {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
+ {"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
+ } {
+ if err := os.Mkdir(el.dir, 0755); err != nil && !os.IsExist(err) {
+ return fmt.Errorf("could not make %s: %w", el.dir, err)
+ }
+ if err := unix.Mount(el.fs, el.dir, el.fs, el.flags, ""); err != nil {
+ return fmt.Errorf("could not mount %s on %s: %w", el.fs, el.dir, err)
+ }
+ }
+ return nil
+}
+
+func main() {
+ if err := mountInit(); err != nil {
+ panic(err)
+ }
+
+ // First virtual serial is always stdout, second is control
+ ioConn, err := os.OpenFile("/dev/vport1p1", os.O_RDWR, 0)
+ if err != nil {
+ fmt.Printf("Failed to open communication device: %v\n", err)
+ return
+ }
+ cmd := exec.Command("/tester", "-test.v")
+ cmd.Stderr = os.Stderr
+ cmd.Stdout = os.Stdout
+ cmd.Env = append(cmd.Env, "IN_KTEST=true")
+ if err := cmd.Run(); err != nil {
+ var exerr *exec.ExitError
+ if errors.As(err, &exerr) {
+ if _, err := ioConn.Write([]byte{uint8(exerr.ExitCode())}); err != nil {
+ panic(err)
+ }
+ } else if err != nil {
+ fmt.Printf("Failed to execute tests (tests didn't run): %v", err)
+ }
+ } else {
+ ioConn.Write([]byte{0})
+ }
+ ioConn.Close()
+
+ unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
+}
diff --git a/metropolis/test/ktest/ktest.bzl b/metropolis/test/ktest/ktest.bzl
new file mode 100644
index 0000000..fdbff20
--- /dev/null
+++ b/metropolis/test/ktest/ktest.bzl
@@ -0,0 +1,62 @@
+# Copyright 2020 The Monogon Project Authors.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Ktest provides a simple macro to run tests inside the normal Smalltown kernel
+"""
+
+def ktest(deps, tester, initramfs_extra, cmdline):
+ native.genrule(
+ name = "test_initramfs",
+ srcs = [
+ "//metropolis/test/ktest/init",
+ ] + deps + [tester],
+ outs = [
+ "initramfs.cpio.lz4",
+ ],
+ testonly = True,
+ cmd = """
+ $(location @linux//:gen_init_cpio) - <<- 'EOF' | lz4 -l > \"$@\"
+dir /dev 0755 0 0
+nod /dev/console 0600 0 0 c 5 1
+nod /dev/null 0644 0 0 c 1 3
+file /init $(location //metropolis/test/ktest/init) 0755 0 0
+file /tester $(location """ + tester + """) 0755 0 0
+""" + initramfs_extra + """
+EOF
+ """,
+ tools = [
+ "@linux//:gen_init_cpio",
+ ],
+ )
+
+ native.sh_test(
+ name = "ktest",
+ args = [
+ "$(location //metropolis/test/ktest)",
+ "$(location :test_initramfs)",
+ "$(location //metropolis/test/ktest:linux-testing)",
+ cmdline,
+ ],
+ size = "small",
+ srcs = ["//metropolis/test/ktest:test-script"],
+ data = [
+ "//metropolis/test/ktest",
+ ":test_initramfs",
+ "//metropolis/test/ktest:linux-testing",
+ "@com_github_bonzini_qboot//:qboot-bin",
+ ],
+ )
diff --git a/metropolis/test/ktest/main.go b/metropolis/test/ktest/main.go
new file mode 100644
index 0000000..7f750b8
--- /dev/null
+++ b/metropolis/test/ktest/main.go
@@ -0,0 +1,75 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// ktest is a test launcher for running tests inside a custom kernel and passes the results
+// back out.
+package main
+
+import (
+ "context"
+ "flag"
+ "io"
+ "log"
+ "os"
+ "time"
+
+ "git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+var (
+ kernelPath = flag.String("kernel-path", "", "Path of the Kernel ELF file")
+ initrdPath = flag.String("initrd-path", "", "Path of the initrd image")
+ cmdline = flag.String("cmdline", "", "Additional kernel command line options")
+)
+
+func main() {
+ flag.Parse()
+
+ hostFeedbackConn, vmFeedbackConn, err := launch.NewSocketPair()
+ if err != nil {
+ log.Fatalf("Failed to create socket pair: %v", err)
+ }
+
+ exitCodeChan := make(chan uint8, 1)
+
+ go func() {
+ defer hostFeedbackConn.Close()
+
+ returnCode := make([]byte, 1)
+ if _, err := io.ReadFull(hostFeedbackConn, returnCode); err != nil {
+ log.Fatalf("Failed to read socket: %v", err)
+ }
+ exitCodeChan <- returnCode[0]
+ }()
+
+ if err := launch.RunMicroVM(context.Background(), &launch.MicroVMOptions{
+ KernelPath: *kernelPath,
+ InitramfsPath: *initrdPath,
+ Cmdline: *cmdline,
+ SerialPort: os.Stdout,
+ ExtraChardevs: []*os.File{vmFeedbackConn},
+ DisableHostNetworkInterface: true,
+ }); err != nil {
+ log.Fatalf("Failed to run ktest VM: %v", err)
+ }
+
+ select {
+ case exitCode := <-exitCodeChan:
+ os.Exit(int(exitCode))
+ case <-time.After(1 * time.Second):
+ log.Fatal("Failed to get an error code back (test runtime probably crashed)")
+ }
+}
diff --git a/metropolis/test/ktest/run_ktest.sh b/metropolis/test/ktest/run_ktest.sh
new file mode 100755
index 0000000..02920a1
--- /dev/null
+++ b/metropolis/test/ktest/run_ktest.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+exec "$1" -initrd-path "$2" -kernel-path "$3" -cmdline "$4"
\ No newline at end of file
diff --git a/metropolis/test/launch/BUILD.bazel b/metropolis/test/launch/BUILD.bazel
new file mode 100644
index 0000000..b6245e1
--- /dev/null
+++ b/metropolis/test/launch/BUILD.bazel
@@ -0,0 +1,17 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["launch.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/launch",
+ visibility = ["//metropolis:__subpackages__"],
+ deps = [
+ "//golibs/common:go_default_library",
+ "//metropolis/node:go_default_library",
+ "//metropolis/proto/api:go_default_library",
+ "@com_github_golang_protobuf//proto:go_default_library",
+ "@com_github_grpc_ecosystem_go_grpc_middleware//retry:go_default_library",
+ "@org_golang_google_grpc//:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
diff --git a/metropolis/test/launch/cli/launch-multi2/BUILD.bazel b/metropolis/test/launch/cli/launch-multi2/BUILD.bazel
new file mode 100644
index 0000000..9f27860
--- /dev/null
+++ b/metropolis/test/launch/cli/launch-multi2/BUILD.bazel
@@ -0,0 +1,29 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["main.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/launch/cli/launch-multi2",
+ visibility = ["//visibility:private"],
+ deps = [
+ "//metropolis/node:go_default_library",
+ "//metropolis/proto/api:go_default_library",
+ "//metropolis/test/launch:go_default_library",
+ "@com_github_grpc_ecosystem_go_grpc_middleware//retry:go_default_library",
+ "@org_golang_google_grpc//:go_default_library",
+ ],
+)
+
+go_binary(
+ name = "launch-multi2",
+ data = [
+ "//metropolis/node:image",
+ "//metropolis/node:swtpm_data",
+ "//metropolis/test/nanoswitch:initramfs",
+ "//metropolis/test/ktest:linux-testing",
+ "//third_party/edk2:firmware",
+ "@com_github_bonzini_qboot//:qboot-bin",
+ ],
+ embed = [":go_default_library"],
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/launch/cli/launch-multi2/main.go b/metropolis/test/launch/cli/launch-multi2/main.go
new file mode 100644
index 0000000..265d6a0
--- /dev/null
+++ b/metropolis/test/launch/cli/launch-multi2/main.go
@@ -0,0 +1,102 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+ "context"
+ "log"
+ "os"
+ "os/signal"
+ "syscall"
+ "time"
+
+ grpcretry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
+ "google.golang.org/grpc"
+
+ common "git.monogon.dev/source/nexantic.git/metropolis/node"
+ apb "git.monogon.dev/source/nexantic.git/metropolis/proto/api"
+ "git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+func main() {
+ sigs := make(chan os.Signal, 1)
+ signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
+ ctx, cancel := context.WithCancel(context.Background())
+ go func() {
+ <-sigs
+ cancel()
+ }()
+ sw0, vm0, err := launch.NewSocketPair()
+ if err != nil {
+ log.Fatalf("Failed to create network pipe: %v\n", err)
+ }
+ sw1, vm1, err := launch.NewSocketPair()
+ if err != nil {
+ log.Fatalf("Failed to create network pipe: %v\n", err)
+ }
+
+ go func() {
+ if err := launch.Launch(ctx, launch.Options{ConnectToSocket: vm0, SerialPort: os.Stdout}); err != nil {
+ log.Fatalf("Failed to launch vm0: %v", err)
+ }
+ }()
+ nanoswitchPortMap := make(launch.PortMap)
+ identityPorts := []uint16{
+ common.ExternalServicePort,
+ common.DebugServicePort,
+ common.KubernetesAPIPort,
+ }
+ for _, port := range identityPorts {
+ nanoswitchPortMap[port] = port
+ }
+ go func() {
+ opts := []grpcretry.CallOption{
+ grpcretry.WithBackoff(grpcretry.BackoffExponential(100 * time.Millisecond)),
+ }
+ conn, err := nanoswitchPortMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure(),
+ grpc.WithUnaryInterceptor(grpcretry.UnaryClientInterceptor(opts...)))
+ if err != nil {
+ panic(err)
+ }
+ defer conn.Close()
+ debug := apb.NewNodeDebugServiceClient(conn)
+ res, err := debug.GetGoldenTicket(ctx, &apb.GetGoldenTicketRequest{
+ // HACK: this is assigned by DHCP, and we assume that everything goes well.
+ ExternalIp: "10.1.0.3",
+ }, grpcretry.WithMax(10))
+ if err != nil {
+ log.Fatalf("Failed to get golden ticket: %v", err)
+ }
+
+ ec := &apb.EnrolmentConfig{
+ GoldenTicket: res.Ticket,
+ }
+
+ if err := launch.Launch(ctx, launch.Options{ConnectToSocket: vm1, EnrolmentConfig: ec, SerialPort: os.Stdout}); err != nil {
+ log.Fatalf("Failed to launch vm1: %v", err)
+ }
+ }()
+ if err := launch.RunMicroVM(ctx, &launch.MicroVMOptions{
+ SerialPort: os.Stdout,
+ KernelPath: "metropolis/test/ktest/linux-testing.elf",
+ InitramfsPath: "metropolis/test/nanoswitch/initramfs.lz4",
+ ExtraNetworkInterfaces: []*os.File{sw0, sw1},
+ PortMap: nanoswitchPortMap,
+ }); err != nil {
+ log.Fatalf("Failed to launch nanoswitch: %v", err)
+ }
+}
diff --git a/metropolis/test/launch/cli/launch/BUILD.bazel b/metropolis/test/launch/cli/launch/BUILD.bazel
new file mode 100644
index 0000000..6b1461d
--- /dev/null
+++ b/metropolis/test/launch/cli/launch/BUILD.bazel
@@ -0,0 +1,20 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["main.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/launch/cli/launch",
+ visibility = ["//visibility:private"],
+ deps = ["//metropolis/test/launch:go_default_library"],
+)
+
+go_binary(
+ name = "launch",
+ data = [
+ "//metropolis/node:image",
+ "//metropolis/node:swtpm_data",
+ "//third_party/edk2:firmware",
+ ],
+ embed = [":go_default_library"],
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/launch/cli/launch/main.go b/metropolis/test/launch/cli/launch/main.go
new file mode 100644
index 0000000..852c8e1
--- /dev/null
+++ b/metropolis/test/launch/cli/launch/main.go
@@ -0,0 +1,43 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+ "context"
+ "log"
+ "os"
+ "os/signal"
+ "syscall"
+
+ "git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+func main() {
+ sigs := make(chan os.Signal, 1)
+ signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
+ ctx, cancel := context.WithCancel(context.Background())
+ go func() {
+ <-sigs
+ cancel()
+ }()
+ if err := launch.Launch(ctx, launch.Options{Ports: launch.IdentityPortMap(launch.NodePorts), SerialPort: os.Stdout}); err != nil {
+ if err == ctx.Err() {
+ return
+ }
+ log.Fatalf("Failed to execute: %v\n", err)
+ }
+}
diff --git a/metropolis/test/launch/launch.go b/metropolis/test/launch/launch.go
new file mode 100644
index 0000000..2d495e0
--- /dev/null
+++ b/metropolis/test/launch/launch.go
@@ -0,0 +1,557 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package launch
+
+import (
+ "bytes"
+ "context"
+ "crypto/rand"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "net"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/golang/protobuf/proto"
+ grpcretry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
+ "golang.org/x/sys/unix"
+ "google.golang.org/grpc"
+
+ freeport "git.monogon.dev/source/nexantic.git/golibs/common"
+ common "git.monogon.dev/source/nexantic.git/metropolis/node"
+ apb "git.monogon.dev/source/nexantic.git/metropolis/proto/api"
+)
+
+type qemuValue map[string][]string
+
+// toOption encodes structured data into a QEMU option.
+// Example: "test", {"key1": {"val1"}, "key2": {"val2", "val3"}} returns "test,key1=val1,key2=val2,key2=val3"
+func (value qemuValue) toOption(name string) string {
+ var optionValues []string
+ if name != "" {
+ optionValues = append(optionValues, name)
+ }
+ for name, values := range value {
+ if len(values) == 0 {
+ optionValues = append(optionValues, name)
+ }
+ for _, val := range values {
+ optionValues = append(optionValues, fmt.Sprintf("%v=%v", name, val))
+ }
+ }
+ return strings.Join(optionValues, ",")
+}
+
+func copyFile(src, dst string) error {
+ in, err := os.Open(src)
+ if err != nil {
+ return err
+ }
+ defer in.Close()
+
+ out, err := os.Create(dst)
+ if err != nil {
+ return err
+ }
+ defer out.Close()
+
+ _, err = io.Copy(out, in)
+ if err != nil {
+ return err
+ }
+ return out.Close()
+}
+
+// PortMap represents where VM ports are mapped to on the host. It maps from the VM port number to the host port number.
+type PortMap map[uint16]uint16
+
+// toQemuForwards generates QEMU hostfwd values (https://qemu.weilnetz.de/doc/qemu-doc.html#:~:text=hostfwd=) for all
+// mapped ports.
+func (p PortMap) toQemuForwards() []string {
+ var hostfwdOptions []string
+ for vmPort, hostPort := range p {
+ hostfwdOptions = append(hostfwdOptions, fmt.Sprintf("tcp::%v-:%v", hostPort, vmPort))
+ }
+ return hostfwdOptions
+}
+
+// DialGRPC creates a gRPC client for a VM port that's forwarded/mapped to the host. The given port is automatically
+// resolved to the host-mapped port.
+func (p PortMap) DialGRPC(port uint16, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
+ mappedPort, ok := p[port]
+ if !ok {
+ return nil, fmt.Errorf("cannot dial port: port %v is not mapped/forwarded", port)
+ }
+ grpcClient, err := grpc.Dial(fmt.Sprintf("localhost:%v", mappedPort), opts...)
+ if err != nil {
+ return nil, fmt.Errorf("failed to dial port %v: %w", port, err)
+ }
+ return grpcClient, nil
+}
+
+// Options contains all options that can be passed to Launch()
+type Options struct {
+ // Ports contains the port mapping where to expose the internal ports of the VM to the host. See IdentityPortMap()
+ // and ConflictFreePortMap(). Ignored when ConnectToSocket is set.
+ Ports PortMap
+
+ // If set to true, reboots are honored. Otherwise all reboots exit the Launch() command. Smalltown generally restarts
+ // on almost all errors, so unless you want to test reboot behavior this should be false.
+ AllowReboot bool
+
+ // By default the Smalltown VM is connected to the Host via SLIRP. If ConnectToSocket is set, it is instead connected
+ // to the given file descriptor/socket. If this is set, all port maps from the Ports option are ignored.
+ // Intended for networking this instance together with others for running more complex network configurations.
+ ConnectToSocket *os.File
+
+ // SerialPort is a File(descriptor) over which you can communicate with the serial port of the machine
+ // It can be set to an existing file descriptor (like os.Stdout/os.Stderr) or you can use NewSocketPair() to get one
+ // end to talk to from Go.
+ SerialPort *os.File
+
+ // EnrolmentConfig is passed into the VM and subsequently used for bootstrapping if no enrolment config is built-in
+ EnrolmentConfig *apb.EnrolmentConfig
+}
+
+// NodePorts is the list of ports a fully operational Smalltown node listens on
+var NodePorts = []uint16{common.ConsensusPort, common.NodeServicePort, common.MasterServicePort,
+ common.ExternalServicePort, common.DebugServicePort, common.KubernetesAPIPort, common.DebuggerPort}
+
+// IdentityPortMap returns a port map where each given port is mapped onto itself on the host. This is mainly useful
+// for development against Smalltown. The dbg command requires this mapping.
+func IdentityPortMap(ports []uint16) PortMap {
+ portMap := make(PortMap)
+ for _, port := range ports {
+ portMap[port] = port
+ }
+ return portMap
+}
+
+// ConflictFreePortMap returns a port map where each given port is mapped onto a random free port on the host. This is
+// intended for automated testing where multiple instances of Smalltown might be running. Please call this function for
+// each Launch command separately and as close to it as possible since it cannot guarantee that the ports will remain
+// free.
+func ConflictFreePortMap(ports []uint16) (PortMap, error) {
+ portMap := make(PortMap)
+ for _, port := range ports {
+ mappedPort, listenCloser, err := freeport.AllocateTCPPort()
+ if err != nil {
+ return portMap, fmt.Errorf("failed to get free host port: %w", err)
+ }
+ // Defer closing of the listening port until the function is done and all ports are allocated
+ defer listenCloser.Close()
+ portMap[port] = mappedPort
+ }
+ return portMap, nil
+}
+
+// Gets a random EUI-48 Ethernet MAC address
+func generateRandomEthernetMAC() (*net.HardwareAddr, error) {
+ macBuf := make([]byte, 6)
+ _, err := rand.Read(macBuf)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read randomness for MAC: %v", err)
+ }
+
+ // Set U/L bit and clear I/G bit (locally administered individual MAC)
+ // Ref IEEE 802-2014 Section 8.2.2
+ macBuf[0] = (macBuf[0] | 2) & 0xfe
+ mac := net.HardwareAddr(macBuf)
+ return &mac, nil
+}
+
+// Launch launches a Smalltown instance with the given options. The instance runs mostly paravirtualized but with some
+// emulated hardware similar to how a cloud provider might set up its VMs. The disk is fully writable but is run
+// in snapshot mode meaning that changes are not kept beyond a single invocation.
+func Launch(ctx context.Context, options Options) error {
+ // Pin temp directory to /tmp until we can use abstract socket namespace in QEMU (next release after 5.0,
+ // https://github.com/qemu/qemu/commit/776b97d3605ed0fc94443048fdf988c7725e38a9). swtpm accepts already-open FDs
+ // so we can pass in an abstract socket namespace FD that we open and pass the name of it to QEMU. Not pinning this
+ // crashes both swtpm and qemu because we run into UNIX socket length limitations (for legacy reasons 108 chars).
+ tempDir, err := ioutil.TempDir("/tmp", "launch*")
+ if err != nil {
+ return fmt.Errorf("failed to create temporary directory: %w", err)
+ }
+ defer os.RemoveAll(tempDir)
+
+ // Copy TPM state into a temporary directory since it's being modified by the emulator
+ tpmTargetDir := filepath.Join(tempDir, "tpm")
+ tpmSrcDir := "metropolis/node/tpm"
+ if err := os.Mkdir(tpmTargetDir, 0644); err != nil {
+ return fmt.Errorf("failed to create TPM state directory: %w", err)
+ }
+ tpmFiles, err := ioutil.ReadDir(tpmSrcDir)
+ if err != nil {
+ return fmt.Errorf("failed to read TPM directory: %w", err)
+ }
+ for _, file := range tpmFiles {
+ name := file.Name()
+ if err := copyFile(filepath.Join(tpmSrcDir, name), filepath.Join(tpmTargetDir, name)); err != nil {
+ return fmt.Errorf("failed to copy TPM directory: %w", err)
+ }
+ }
+
+ var qemuNetType string
+ var qemuNetConfig qemuValue
+ if options.ConnectToSocket != nil {
+ qemuNetType = "socket"
+ qemuNetConfig = qemuValue{
+ "id": {"net0"},
+ "fd": {"3"},
+ }
+ } else {
+ qemuNetType = "user"
+ qemuNetConfig = qemuValue{
+ "id": {"net0"},
+ "net": {"10.42.0.0/24"},
+ "dhcpstart": {"10.42.0.10"},
+ "hostfwd": options.Ports.toQemuForwards(),
+ }
+ }
+
+ tpmSocketPath := filepath.Join(tempDir, "tpm-socket")
+
+ mac, err := generateRandomEthernetMAC()
+ if err != nil {
+ return err
+ }
+
+ qemuArgs := []string{"-machine", "q35", "-accel", "kvm", "-nographic", "-nodefaults", "-m", "4096",
+ "-cpu", "host", "-smp", "sockets=1,cpus=1,cores=2,threads=2,maxcpus=4",
+ "-drive", "if=pflash,format=raw,readonly,file=external/edk2/OVMF_CODE.fd",
+ "-drive", "if=pflash,format=raw,snapshot=on,file=external/edk2/OVMF_VARS.fd",
+ "-drive", "if=virtio,format=raw,snapshot=on,cache=unsafe,file=metropolis/node/smalltown.img",
+ "-netdev", qemuNetConfig.toOption(qemuNetType),
+ "-device", "virtio-net-pci,netdev=net0,mac=" + mac.String(),
+ "-chardev", "socket,id=chrtpm,path=" + tpmSocketPath,
+ "-tpmdev", "emulator,id=tpm0,chardev=chrtpm",
+ "-device", "tpm-tis,tpmdev=tpm0",
+ "-device", "virtio-rng-pci",
+ "-serial", "stdio"}
+
+ if !options.AllowReboot {
+ qemuArgs = append(qemuArgs, "-no-reboot")
+ }
+
+ if options.EnrolmentConfig != nil {
+ enrolmentConfigPath := filepath.Join(tempDir, "enrolment.pb")
+ enrolmentConfigRaw, err := proto.Marshal(options.EnrolmentConfig)
+ if err != nil {
+ return fmt.Errorf("failed to encode enrolment config: %w", err)
+ }
+ if err := ioutil.WriteFile(enrolmentConfigPath, enrolmentConfigRaw, 0644); err != nil {
+ return fmt.Errorf("failed to write enrolment config: %w", err)
+ }
+ qemuArgs = append(qemuArgs, "-fw_cfg", "name=com.nexantic.smalltown/enrolment.pb,file="+enrolmentConfigPath)
+ }
+
+ // Start TPM emulator as a subprocess
+ tpmCtx, tpmCancel := context.WithCancel(ctx)
+ defer tpmCancel()
+
+ tpmEmuCmd := exec.CommandContext(tpmCtx, "swtpm", "socket", "--tpm2", "--tpmstate", "dir="+tpmTargetDir, "--ctrl", "type=unixio,path="+tpmSocketPath)
+ tpmEmuCmd.Stderr = os.Stderr
+ tpmEmuCmd.Stdout = os.Stdout
+
+ err = tpmEmuCmd.Start()
+ if err != nil {
+ return fmt.Errorf("failed to start TPM emulator: %w", err)
+ }
+
+ // Start the main qemu binary
+ systemCmd := exec.CommandContext(ctx, "qemu-system-x86_64", qemuArgs...)
+ if options.ConnectToSocket != nil {
+ systemCmd.ExtraFiles = []*os.File{options.ConnectToSocket}
+ }
+
+ var stdErrBuf bytes.Buffer
+ systemCmd.Stderr = &stdErrBuf
+ systemCmd.Stdout = options.SerialPort
+
+ err = systemCmd.Run()
+
+ // Stop TPM emulator and wait for it to exit to properly reap the child process
+ tpmCancel()
+ log.Print("Waiting for TPM emulator to exit")
+ // Wait returns a SIGKILL error because we just cancelled its context.
+ // We still need to call it to avoid creating zombies.
+ _ = tpmEmuCmd.Wait()
+
+ var exerr *exec.ExitError
+ if err != nil && errors.As(err, &exerr) {
+ status := exerr.ProcessState.Sys().(syscall.WaitStatus)
+ if status.Signaled() && status.Signal() == syscall.SIGKILL {
+ // Process was killed externally (most likely by our context being canceled).
+ // This is a normal exit for us, so return nil
+ return nil
+ }
+ exerr.Stderr = stdErrBuf.Bytes()
+ newErr := QEMUError(*exerr)
+ return &newErr
+ }
+ return err
+}
+
+// NewSocketPair creates a new socket pair. By connecting both ends to different instances you can connect them
+// with a virtual "network cable". The ends can be passed into the ConnectToSocket option.
+func NewSocketPair() (*os.File, *os.File, error) {
+ fds, err := unix.Socketpair(unix.AF_UNIX, syscall.SOCK_STREAM, 0)
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to call socketpair: %w", err)
+ }
+
+ fd1 := os.NewFile(uintptr(fds[0]), "network0")
+ fd2 := os.NewFile(uintptr(fds[1]), "network1")
+ return fd1, fd2, nil
+}
+
+// HostInterfaceMAC is the MAC address the host SLIRP network interface has if it is not disabled (see
+// DisableHostNetworkInterface in MicroVMOptions)
+var HostInterfaceMAC = net.HardwareAddr{0x02, 0x72, 0x82, 0xbf, 0xc3, 0x56}
+
+// MicroVMOptions contains all options to start a MicroVM
+type MicroVMOptions struct {
+ // Path to the ELF kernel binary
+ KernelPath string
+
+ // Path to the Initramfs
+ InitramfsPath string
+
+ // Cmdline contains additional kernel commandline options
+ Cmdline string
+
+ // SerialPort is a File(descriptor) over which you can communicate with the serial port of the machine
+ // It can be set to an existing file descriptor (like os.Stdout/os.Stderr) or you can use NewSocketPair() to get one
+ // end to talk to from Go.
+ SerialPort *os.File
+
+ // ExtraChardevs can be used similar to SerialPort, but can contain an arbitrary number of additional serial ports
+ ExtraChardevs []*os.File
+
+ // ExtraNetworkInterfaces can contain an arbitrary number of file descriptors which are mapped into the VM as virtio
+ // network interfaces. The first interface is always a SLIRP-backed interface for communicating with the host.
+ ExtraNetworkInterfaces []*os.File
+
+ // PortMap contains ports that are mapped to the host through the built-in SLIRP network interface.
+ PortMap PortMap
+
+ // DisableHostNetworkInterface disables the SLIRP-backed host network interface that is normally the first network
+ // interface. If this is set PortMap is ignored. Mostly useful for speeding up QEMU's startup time for tests.
+ DisableHostNetworkInterface bool
+}
+
+// RunMicroVM launches a tiny VM mostly intended for testing. Very quick to boot (<40ms).
+func RunMicroVM(ctx context.Context, opts *MicroVMOptions) error {
+ // Generate options for all the file descriptors we'll be passing as virtio "serial ports"
+ var extraArgs []string
+ for idx, _ := range opts.ExtraChardevs {
+ idxStr := strconv.Itoa(idx)
+ id := "extra" + idxStr
+ // That this works is pretty much a hack, but upstream QEMU doesn't have a bidirectional chardev backend not
+ // based around files/sockets on the disk which are a giant pain to work with.
+ // We're using QEMU's fdset functionality to make FDs available as pseudo-files and then "ab"using the pipe
+ // backend's fallback functionality to get a single bidirectional chardev backend backed by a passed-down
+ // RDWR fd.
+ // Ref https://lists.gnu.org/archive/html/qemu-devel/2015-12/msg01256.html
+ addFdConf := qemuValue{
+ "set": {idxStr},
+ "fd": {strconv.Itoa(idx + 3)},
+ }
+ chardevConf := qemuValue{
+ "id": {id},
+ "path": {"/dev/fdset/" + idxStr},
+ }
+ deviceConf := qemuValue{
+ "chardev": {id},
+ }
+ extraArgs = append(extraArgs, "-add-fd", addFdConf.toOption(""),
+ "-chardev", chardevConf.toOption("pipe"), "-device", deviceConf.toOption("virtserialport"))
+ }
+
+ for idx, _ := range opts.ExtraNetworkInterfaces {
+ id := fmt.Sprintf("net%v", idx)
+ netdevConf := qemuValue{
+ "id": {id},
+ "fd": {strconv.Itoa(idx + 3 + len(opts.ExtraChardevs))},
+ }
+ extraArgs = append(extraArgs, "-netdev", netdevConf.toOption("socket"), "-device", "virtio-net-device,netdev="+id)
+ }
+
+ // This sets up a minimum viable environment for our Linux kernel.
+ // It clears all standard QEMU configuration and sets up a MicroVM machine
+ // (https://github.com/qemu/qemu/blob/master/docs/microvm.rst) with all legacy emulation turned off. This means
+ // the only "hardware" the Linux kernel inside can communicate with is a single virtio-mmio region. Over that MMIO
+ // interface we run a paravirtualized RNG (since the kernel in there has nothing to gather that from and it
+ // delays booting), a single paravirtualized console and an arbitrary number of extra serial ports for talking to
+ // various things that might run inside. The kernel, initramfs and command line are mapped into VM memory at boot
+ // time and not loaded from any sort of disk. Booting and shutting off one of these VMs takes <100ms.
+ baseArgs := []string{"-nodefaults", "-no-user-config", "-nographic", "-no-reboot",
+ "-accel", "kvm", "-cpu", "host",
+ // Needed until QEMU updates their bundled qboot version (needs https://github.com/bonzini/qboot/pull/28)
+ "-bios", "external/com_github_bonzini_qboot/bios.bin",
+ "-M", "microvm,x-option-roms=off,pic=off,pit=off,rtc=off,isa-serial=off",
+ "-kernel", opts.KernelPath,
+ // We force using a triple-fault reboot strategy since otherwise the kernel first tries others (like ACPI) which
+ // are not available in this very restricted environment. Similarly we need to override the boot console since
+ // there's nothing on the ISA bus that the kernel could talk to. We also force quiet for performance reasons.
+ "-append", "reboot=t console=hvc0 quiet " + opts.Cmdline,
+ "-initrd", opts.InitramfsPath,
+ "-device", "virtio-rng-device,max-bytes=1024,period=1000",
+ "-device", "virtio-serial-device,max_ports=16",
+ "-chardev", "stdio,id=con0", "-device", "virtconsole,chardev=con0",
+ }
+
+ if !opts.DisableHostNetworkInterface {
+ qemuNetType := "user"
+ qemuNetConfig := qemuValue{
+ "id": {"usernet0"},
+ "net": {"10.42.0.0/24"},
+ "dhcpstart": {"10.42.0.10"},
+ }
+ if opts.PortMap != nil {
+ qemuNetConfig["hostfwd"] = opts.PortMap.toQemuForwards()
+ }
+
+ baseArgs = append(baseArgs, "-netdev", qemuNetConfig.toOption(qemuNetType),
+ "-device", "virtio-net-device,netdev=usernet0,mac="+HostInterfaceMAC.String())
+ }
+
+ var stdErrBuf bytes.Buffer
+ cmd := exec.CommandContext(ctx, "qemu-system-x86_64", append(baseArgs, extraArgs...)...)
+ cmd.Stdout = opts.SerialPort
+ cmd.Stderr = &stdErrBuf
+
+ cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraChardevs...)
+ cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraNetworkInterfaces...)
+
+ err := cmd.Run()
+ var exerr *exec.ExitError
+ if err != nil && errors.As(err, &exerr) {
+ exerr.Stderr = stdErrBuf.Bytes()
+ newErr := QEMUError(*exerr)
+ return &newErr
+ }
+ return err
+}
+
+// QEMUError is a special type of ExitError used when QEMU fails. In addition to normal ExitError features it
+// prints stderr for debugging.
+type QEMUError exec.ExitError
+
+func (e *QEMUError) Error() string {
+ return fmt.Sprintf("%v: %v", e.String(), string(e.Stderr))
+}
+
+// NanoswitchPorts contains all ports forwarded by Nanoswitch to the first VM
+var NanoswitchPorts = []uint16{
+ common.ExternalServicePort,
+ common.DebugServicePort,
+ common.KubernetesAPIPort,
+}
+
+// ClusterOptions contains all options for launching a Smalltown cluster
+type ClusterOptions struct {
+ // The number of nodes this cluster should be started with initially
+ NumNodes int
+}
+
+// LaunchCluster launches a cluster of Smalltown VMs together with a Nanoswitch instance to network them all together.
+func LaunchCluster(ctx context.Context, opts ClusterOptions) (apb.NodeDebugServiceClient, PortMap, error) {
+ var switchPorts []*os.File
+ var vmPorts []*os.File
+ for i := 0; i < opts.NumNodes; i++ {
+ switchPort, vmPort, err := NewSocketPair()
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to get socketpair: %w", err)
+ }
+ switchPorts = append(switchPorts, switchPort)
+ vmPorts = append(vmPorts, vmPort)
+ }
+
+ if opts.NumNodes == 0 {
+ return nil, nil, errors.New("refusing to start cluster with zero nodes")
+ }
+
+ if opts.NumNodes > 2 {
+ return nil, nil, errors.New("launching more than 2 nodes is unsupported pending replacement of golden tickets")
+ }
+
+ go func() {
+ if err := Launch(ctx, Options{ConnectToSocket: vmPorts[0]}); err != nil {
+ // Launch() only terminates when QEMU has terminated. At that point our function probably doesn't run anymore
+ // so we have no way of communicating the error back up, so let's just log it. Also a failure in launching
+ // VMs should be very visible by the unavailability of the clients we return.
+ log.Printf("Failed to launch vm0: %v", err)
+ }
+ }()
+
+ portMap, err := ConflictFreePortMap(NanoswitchPorts)
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to allocate ephemeral ports: %w", err)
+ }
+
+ go func() {
+ if err := RunMicroVM(ctx, &MicroVMOptions{
+ KernelPath: "metropolis/test/ktest/linux-testing.elf",
+ InitramfsPath: "metropolis/test/nanoswitch/initramfs.lz4",
+ ExtraNetworkInterfaces: switchPorts,
+ PortMap: portMap,
+ }); err != nil {
+ log.Printf("Failed to launch nanoswitch: %v", err)
+ }
+ }()
+ copts := []grpcretry.CallOption{
+ grpcretry.WithBackoff(grpcretry.BackoffExponential(100 * time.Millisecond)),
+ }
+ conn, err := portMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure(),
+ grpc.WithUnaryInterceptor(grpcretry.UnaryClientInterceptor(copts...)))
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to dial debug service: %w", err)
+ }
+ defer conn.Close()
+ debug := apb.NewNodeDebugServiceClient(conn)
+
+ if opts.NumNodes == 2 {
+ res, err := debug.GetGoldenTicket(ctx, &apb.GetGoldenTicketRequest{
+ // HACK: this is assigned by DHCP, and we assume that everything goes well.
+ ExternalIp: "10.1.0.3",
+ }, grpcretry.WithMax(10))
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to get golden ticket: %w", err)
+ }
+
+ ec := &apb.EnrolmentConfig{
+ GoldenTicket: res.Ticket,
+ }
+
+ go func() {
+ if err := Launch(ctx, Options{ConnectToSocket: vmPorts[1], EnrolmentConfig: ec}); err != nil {
+ log.Printf("Failed to launch vm1: %v", err)
+ }
+ }()
+ }
+
+ return debug, portMap, nil
+}
diff --git a/metropolis/test/nanoswitch/BUILD b/metropolis/test/nanoswitch/BUILD
new file mode 100644
index 0000000..fc4f932
--- /dev/null
+++ b/metropolis/test/nanoswitch/BUILD
@@ -0,0 +1,40 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load("//metropolis/node/build:def.bzl", "smalltown_initramfs")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["nanoswitch.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/metropolis/test/nanoswitch",
+ visibility = ["//visibility:private"],
+ deps = [
+ "//metropolis/node:go_default_library",
+ "//metropolis/node/common/supervisor:go_default_library",
+ "//metropolis/node/core/network/dhcp4c:go_default_library",
+ "//metropolis/node/core/network/dhcp4c/callback:go_default_library",
+ "//metropolis/test/launch:go_default_library",
+ "@com_github_google_nftables//:go_default_library",
+ "@com_github_google_nftables//expr:go_default_library",
+ "@com_github_insomniacslk_dhcp//dhcpv4:go_default_library",
+ "@com_github_insomniacslk_dhcp//dhcpv4/server4:go_default_library",
+ "@com_github_vishvananda_netlink//:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+go_binary(
+ name = "nanoswitch",
+ embed = [":go_default_library"],
+ pure = "on",
+ visibility = ["//visibility:public"],
+)
+
+smalltown_initramfs(
+ name = "initramfs",
+ files = {
+ ":nanoswitch": "/init",
+
+ # CA Certificate bundle
+ "@cacerts//file": "/etc/ssl/cert.pem",
+ },
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/test/nanoswitch/nanoswitch.go b/metropolis/test/nanoswitch/nanoswitch.go
new file mode 100644
index 0000000..1fe6740
--- /dev/null
+++ b/metropolis/test/nanoswitch/nanoswitch.go
@@ -0,0 +1,301 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// nanoswitch is a virtualized switch/router combo intended for testing.
+// It uses the first interface as an external interface to connect to the host and pass traffic in and out. All other
+// interfaces are switched together and served by a built-in DHCP server. Traffic from that network to the
+// SLIRP/external network is SNATed as the host-side SLIRP ignores routed packets.
+// It also has built-in userspace proxying support for debugging.
+package main
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net"
+ "os"
+ "time"
+
+ "github.com/google/nftables"
+ "github.com/google/nftables/expr"
+ "github.com/insomniacslk/dhcp/dhcpv4"
+ "github.com/insomniacslk/dhcp/dhcpv4/server4"
+ "github.com/vishvananda/netlink"
+ "golang.org/x/sys/unix"
+
+ common "git.monogon.dev/source/nexantic.git/metropolis/node"
+ "git.monogon.dev/source/nexantic.git/metropolis/node/common/supervisor"
+ "git.monogon.dev/source/nexantic.git/metropolis/node/core/network/dhcp4c"
+ dhcpcb "git.monogon.dev/source/nexantic.git/metropolis/node/core/network/dhcp4c/callback"
+ "git.monogon.dev/source/nexantic.git/metropolis/test/launch"
+)
+
+var switchIP = net.IP{10, 1, 0, 1}
+var switchSubnetMask = net.CIDRMask(24, 32)
+
+// defaultLeaseOptions sets the lease options needed to properly configure connectivity to nanoswitch
+func defaultLeaseOptions(reply *dhcpv4.DHCPv4) {
+ reply.GatewayIPAddr = switchIP
+ reply.UpdateOption(dhcpv4.OptDNS(net.IPv4(10, 42, 0, 3))) // SLIRP fake DNS server
+ reply.UpdateOption(dhcpv4.OptRouter(switchIP))
+ reply.UpdateOption(dhcpv4.OptIPAddressLeaseTime(30 * time.Second)) // Make sure we exercise our DHCP client in E2E tests
+ reply.UpdateOption(dhcpv4.OptSubnetMask(switchSubnetMask))
+}
+
+// runDHCPServer runs an extremely minimal DHCP server with most options hardcoded, a wrapping bump allocator for the
+// IPs, 12h Lease timeout and no support for DHCP collision detection.
+func runDHCPServer(link netlink.Link) supervisor.Runnable {
+ currentIP := net.IP{10, 1, 0, 1}
+
+ return func(ctx context.Context) error {
+ laddr := net.UDPAddr{
+ IP: net.IPv4(0, 0, 0, 0),
+ Port: 67,
+ }
+ server, err := server4.NewServer(link.Attrs().Name, &laddr, func(conn net.PacketConn, peer net.Addr, m *dhcpv4.DHCPv4) {
+ if m == nil {
+ return
+ }
+ reply, err := dhcpv4.NewReplyFromRequest(m)
+ if err != nil {
+ supervisor.Logger(ctx).Warningf("Failed to generate DHCP reply: %v", err)
+ return
+ }
+ reply.UpdateOption(dhcpv4.OptServerIdentifier(switchIP))
+ reply.ServerIPAddr = switchIP
+
+ switch m.MessageType() {
+ case dhcpv4.MessageTypeDiscover:
+ reply.UpdateOption(dhcpv4.OptMessageType(dhcpv4.MessageTypeOffer))
+ defaultLeaseOptions(reply)
+ currentIP[3]++ // Works only because it's a /24
+ reply.YourIPAddr = currentIP
+ supervisor.Logger(ctx).Infof("Replying with DHCP IP %s", reply.YourIPAddr.String())
+ case dhcpv4.MessageTypeRequest:
+ reply.UpdateOption(dhcpv4.OptMessageType(dhcpv4.MessageTypeAck))
+ defaultLeaseOptions(reply)
+ if m.RequestedIPAddress() != nil {
+ reply.YourIPAddr = m.RequestedIPAddress()
+ } else {
+ reply.YourIPAddr = m.ClientIPAddr
+ }
+ case dhcpv4.MessageTypeRelease, dhcpv4.MessageTypeDecline:
+ supervisor.Logger(ctx).Info("Ignoring Release/Decline")
+ }
+ if _, err := conn.WriteTo(reply.ToBytes(), peer); err != nil {
+ supervisor.Logger(ctx).Warningf("Cannot reply to client: %v", err)
+ }
+ })
+ if err != nil {
+ return err
+ }
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ go func() {
+ <-ctx.Done()
+ server.Close()
+ }()
+ return server.Serve()
+ }
+}
+
+// userspaceProxy listens on port and proxies all TCP connections to the same port on targetIP
+func userspaceProxy(targetIP net.IP, port uint16) supervisor.Runnable {
+ return func(ctx context.Context) error {
+ logger := supervisor.Logger(ctx)
+ tcpListener, err := net.ListenTCP("tcp", &net.TCPAddr{IP: net.IPv4(0, 0, 0, 0), Port: int(port)})
+ if err != nil {
+ return err
+ }
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ go func() {
+ <-ctx.Done()
+ tcpListener.Close()
+ }()
+ for {
+ conn, err := tcpListener.AcceptTCP()
+ if err != nil {
+ if ctx.Err() != nil {
+ return ctx.Err()
+ }
+ return err
+ }
+ go func(conn *net.TCPConn) {
+ defer conn.Close()
+ upstreamConn, err := net.DialTCP("tcp", nil, &net.TCPAddr{IP: targetIP, Port: int(port)})
+ if err != nil {
+ logger.Infof("Userspace proxy failed to connect to upstream: %v", err)
+ return
+ }
+ defer upstreamConn.Close()
+ go io.Copy(upstreamConn, conn)
+ io.Copy(conn, upstreamConn)
+ }(conn)
+ }
+
+ }
+}
+
+// addNetworkRoutes sets up routing from DHCP
+func addNetworkRoutes(link netlink.Link, addr net.IPNet, gw net.IP) error {
+ if err := netlink.AddrReplace(link, &netlink.Addr{IPNet: &addr}); err != nil {
+ return fmt.Errorf("failed to add DHCP address to network interface \"%v\": %w", link.Attrs().Name, err)
+ }
+
+ if gw.IsUnspecified() {
+ return nil
+ }
+
+ route := &netlink.Route{
+ Dst: &net.IPNet{IP: net.IPv4(0, 0, 0, 0), Mask: net.IPv4Mask(0, 0, 0, 0)},
+ Gw: gw,
+ Scope: netlink.SCOPE_UNIVERSE,
+ }
+ if err := netlink.RouteAdd(route); err != nil {
+ return fmt.Errorf("could not add default route: netlink.RouteAdd(%+v): %v", route, err)
+ }
+ return nil
+}
+
+// nfifname converts an interface name into 16 bytes padded with zeroes (for nftables)
+func nfifname(n string) []byte {
+ b := make([]byte, 16)
+ copy(b, []byte(n+"\x00"))
+ return b
+}
+
+func main() {
+ supervisor.New(context.Background(), func(ctx context.Context) error {
+ logger := supervisor.Logger(ctx)
+ logger.Info("Starting NanoSwitch, a tiny TOR switch emulator")
+
+ // Set up target filesystems.
+ for _, el := range []struct {
+ dir string
+ fs string
+ flags uintptr
+ }{
+ {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+ {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
+ {"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
+ } {
+ if err := os.Mkdir(el.dir, 0755); err != nil && !os.IsExist(err) {
+ return fmt.Errorf("could not make %s: %w", el.dir, err)
+ }
+ if err := unix.Mount(el.fs, el.dir, el.fs, el.flags, ""); err != nil {
+ return fmt.Errorf("could not mount %s on %s: %w", el.fs, el.dir, err)
+ }
+ }
+
+ c := &nftables.Conn{}
+
+ links, err := netlink.LinkList()
+ if err != nil {
+ logger.Fatalf("Failed to list links: %v", err)
+ }
+ var externalLink netlink.Link
+ var vmLinks []netlink.Link
+ for _, link := range links {
+ attrs := link.Attrs()
+ if link.Type() == "device" && len(attrs.HardwareAddr) > 0 {
+ if attrs.Flags&net.FlagUp != net.FlagUp {
+ netlink.LinkSetUp(link) // Attempt to take up all ethernet links
+ }
+ if bytes.Equal(attrs.HardwareAddr, launch.HostInterfaceMAC) {
+ externalLink = link
+ } else {
+ vmLinks = append(vmLinks, link)
+ }
+ }
+ }
+ vmBridgeLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "vmbridge", Flags: net.FlagUp}}
+ if err := netlink.LinkAdd(vmBridgeLink); err != nil {
+ logger.Fatalf("Failed to create vmbridge: %v", err)
+ }
+ for _, link := range vmLinks {
+ if err := netlink.LinkSetMaster(link, vmBridgeLink); err != nil {
+ logger.Fatalf("Failed to add VM interface to bridge: %v", err)
+ }
+ logger.Infof("Assigned interface %s to bridge", link.Attrs().Name)
+ }
+ if err := netlink.AddrReplace(vmBridgeLink, &netlink.Addr{IPNet: &net.IPNet{IP: switchIP, Mask: switchSubnetMask}}); err != nil {
+ logger.Fatalf("Failed to assign static IP to vmbridge: %v", err)
+ }
+ if externalLink != nil {
+ nat := c.AddTable(&nftables.Table{
+ Family: nftables.TableFamilyIPv4,
+ Name: "nat",
+ })
+
+ postrouting := c.AddChain(&nftables.Chain{
+ Name: "postrouting",
+ Hooknum: nftables.ChainHookPostrouting,
+ Priority: nftables.ChainPriorityNATSource,
+ Table: nat,
+ Type: nftables.ChainTypeNAT,
+ })
+
+ // Masquerade/SNAT all traffic going out of the external interface
+ c.AddRule(&nftables.Rule{
+ Table: nat,
+ Chain: postrouting,
+ Exprs: []expr.Any{
+ &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
+ &expr.Cmp{
+ Op: expr.CmpOpEq,
+ Register: 1,
+ Data: nfifname(externalLink.Attrs().Name),
+ },
+ &expr.Masq{},
+ },
+ })
+
+ if err := c.Flush(); err != nil {
+ panic(err)
+ }
+
+ netIface := &net.Interface{
+ Name: externalLink.Attrs().Name,
+ MTU: externalLink.Attrs().MTU,
+ Index: externalLink.Attrs().Index,
+ Flags: externalLink.Attrs().Flags,
+ HardwareAddr: externalLink.Attrs().HardwareAddr,
+ }
+ dhcpClient, err := dhcp4c.NewClient(netIface)
+ if err != nil {
+ logger.Fatalf("Failed to create DHCP client: %v", err)
+ }
+ dhcpClient.RequestedOptions = []dhcpv4.OptionCode{dhcpv4.OptionRouter}
+ dhcpClient.LeaseCallback = dhcpcb.Compose(dhcpcb.ManageIP(externalLink), dhcpcb.ManageDefaultRoute(externalLink))
+ supervisor.Run(ctx, "dhcp-client", dhcpClient.Run)
+ if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte("1\n"), 0644); err != nil {
+ logger.Fatalf("Failed to write ip forwards: %v", err)
+ }
+ } else {
+ logger.Info("No upstream interface detected")
+ }
+ supervisor.Run(ctx, "dhcp-server", runDHCPServer(vmBridgeLink))
+ supervisor.Run(ctx, "proxy-ext1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.ExternalServicePort))
+ supervisor.Run(ctx, "proxy-dbg1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.DebugServicePort))
+ supervisor.Run(ctx, "proxy-k8s-api1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.KubernetesAPIPort))
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ supervisor.Signal(ctx, supervisor.SignalDone)
+ return nil
+ })
+ select {}
+}